Source code for berhoel.odf.ods
"""Processing ods (OpenDocument spreadsheets)."""
from __future__ import annotations
import datetime
import functools
import re
from typing import TYPE_CHECKING
from . import Odf, OdfXml
if TYPE_CHECKING:
from pathlib import Path
from lxml import etree
__date__ = "2024/08/03 17:16:01 hoel"
__author__ = "Berthold Höllmann"
__copyright__ = "Copyright © 2020 by Berthold Höllmann"
__credits__ = ["Berthold Höllmann"]
__maintainer__ = "Berthold Höllmann"
__email__ = "berhoel@gmail.com"
# Identify hyperlink in formula.
IMDB_HYPERLINK = re.compile(
r"of:=HYPERLINK\(\s*"
r'"(?P<url>https?://www.imdb.com/title/tt[0-9]+)'
r'(?:/?(?:[?]ref_=(?:[a-z]{2}_)+(?:[0-9]{1,3}|[a-z]{1,3})|(?:episodes))?)?"\s*;\s*'
r'"(?P<name>.+)"\s*\)'
)
# Identify relations to other cells in formulas.
LINKED_CELL = re.compile(
r"of:=-(:?[-+]?[0-9]*\.?[0-9]*[-+*/])*\[\$?(?P<sheet>'k\&v')"
r"\.(?P<row>F)(?P<line>[0-9]+)]"
)
[docs]
class P(OdfXml):
"""Representing a paragraph in a cell."""
@property
def text(self) -> str | None:
"""Get text from paragraph.
Returns:
str: Whole paragraph text if available.
"""
res = " ".join(list(self.root.itertext()))
return res if res.strip() else None
[docs]
class Cell(OdfXml):
"""Representing a cell in a table row."""
@property
def text(self) -> str | None:
"""Return text associated with cell.
Returns:
str: Cell text if avaliable.
"""
if self.root is None:
return None
res = self.find("text:p")
return None if res is None else P(res).text
@property
def date(self) -> datetime.date | None:
"""Return date value of cell if available.
Returns:
datetime.date: Date value of cell if available.
"""
val = self.get("office:date-value")
return (
None if val is None else datetime.datetime.strptime(val, "%Y-%m-%d").date() # noqa:DTZ007
)
@property
def value(self) -> str | None:
"""Return value of cell.
Returns:
str: Value of cell if available.
"""
return self.get("office:value")
@property
def float(self) -> float | None:
"""Return float value of cell.
Returns:
float: ``float`` falue of cell if available.
"""
return None if self.value is None else float(self.value)
@property
def int(self) -> int | None:
"""Return integer value of cell.
Returns:
int: ``int`` value if cell if available.
"""
return None if self.value is None else int(self.value)
@property
def url(self) -> str | None:
"""Return URL associated with cell from hyperlink function.
Returns:
str: url from ``HYPERLINK`` formula or cell hyperlink.
"""
s_val: str = self.get("table:formula")
val = None if s_val is None else IMDB_HYPERLINK.match(s_val)
return None if val is None else val.group("url")
@property
def link(self) -> dict[str, str] | None:
"""Provide Link information.
This link information is information of values referenced from diffrent tables
in formula.
Returns:
dict: Result of ``re.match.groupdict``. Provided keys are ``sheet``, ``row``,
and ``line.``
"""
dvd_link: str = self.get("table:formula")
if dvd_link is not None:
match = LINKED_CELL.match(dvd_link)
if match is not None:
return match.groupdict()
return None
[docs]
class Row(OdfXml):
"""Representing table row."""
[docs]
def __init__(self, elem: etree._Element) -> None:
"""Initalize instance."""
super().__init__(elem)
self.content_cell: str = self._attrib_map("table:table-cell")
self.covered_cell: str = self._attrib_map("table:covered-table-cell")
self.columns_repeated: str = self._attrib_map("table:number-columns-repeated")
@functools.cached_property
def cells(self) -> list[Cell | None]:
"""Provide list of cells in row.
Empty cells are replaced by ``None``, as are cells covered by merged cells.
Returns:
List[Cell]: The cells making up the current row.
"""
res: list[Cell | None] = []
for cell in self.root:
if cell.tag == self.content_cell:
elem = Cell(cell)
res.extend(
[elem]
* (
int(elem.get("table:number-columns-repeated"))
if self.columns_repeated in cell.attrib
else 1
)
)
elif cell.tag == self.covered_cell:
elem = Cell(cell)
res.extend(
[None]
* (
int(elem.get("table:number-columns-repeated"))
if self.columns_repeated in cell.attrib
else 1
)
)
else:
msg = f"Unknown tag {cell.tag}"
raise NotImplementedError(msg)
return res
[docs]
class Table(OdfXml):
"""Representation of spreadsheet table."""
@functools.cached_property
def name(self) -> str:
"""Return name attriute of table.
Returns:
str: table name.
"""
return self.get("table:name")
@functools.cached_property
def rows(self) -> list[Row]:
"""Return rows in table.
Returns:
List[Row]: list of ``Row`` intances.
"""
return [Row(e) for e in self.findall("table:table-row")]
@functools.cached_property
def style_name(self) -> str:
"""Return table style-name.
I Know of ``ta1`` for ordinary tables, and ``ta2`` for hidden tables.
Returns:
str: style name of table.
"""
return self.get("table:style-name")
@property
def hidden(self) -> bool:
"""Return whether table is hidden.
Returns:
bool: ``True`` if table is hidden.
"""
return self.style_name == "ta2"
[docs]
class Ods(Odf):
"""Processing ODS files."""
[docs]
def __init__(self, path: str | Path) -> None:
"""Open ODF spreadsheet file."""
super().__init__(path)
self.root = self.find("office:body/office:spreadsheet")
@functools.cached_property
def tables(self) -> list[Table]:
"""Return all tables in spreadsheet.
Returns:
List[Table]: Tables in OpenOffice spreadsheet.
"""
return [Table(e) for e in self.findall("table:table")]