refactor: excel parse
This commit is contained in:
@@ -0,0 +1,251 @@
|
||||
# ruff: noqa: E402, I001
|
||||
|
||||
"""Initializes oxml sub-package.
|
||||
|
||||
This including registering custom element classes corresponding to Open XML elements.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from docx.oxml.drawing import CT_Drawing
|
||||
from docx.oxml.parser import OxmlElement, parse_xml, register_element_cls
|
||||
from docx.oxml.shape import (
|
||||
CT_Anchor,
|
||||
CT_Blip,
|
||||
CT_BlipFillProperties,
|
||||
CT_GraphicalObject,
|
||||
CT_GraphicalObjectData,
|
||||
CT_Inline,
|
||||
CT_NonVisualDrawingProps,
|
||||
CT_Picture,
|
||||
CT_PictureNonVisual,
|
||||
CT_Point2D,
|
||||
CT_PositiveSize2D,
|
||||
CT_ShapeProperties,
|
||||
CT_Transform2D,
|
||||
)
|
||||
from docx.oxml.shared import CT_DecimalNumber, CT_OnOff, CT_String
|
||||
from docx.oxml.text.hyperlink import CT_Hyperlink
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
from docx.oxml.text.run import (
|
||||
CT_R,
|
||||
CT_Br,
|
||||
CT_Cr,
|
||||
CT_NoBreakHyphen,
|
||||
CT_PTab,
|
||||
CT_Text,
|
||||
)
|
||||
|
||||
# -- `OxmlElement` and `parse_xml()` are not used in this module but several downstream
|
||||
# -- "extension" packages expect to find them here and there's no compelling reason
|
||||
# -- not to republish them here so those keep working.
|
||||
__all__ = ["OxmlElement", "parse_xml"]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DrawingML-related elements
|
||||
|
||||
register_element_cls("a:blip", CT_Blip)
|
||||
register_element_cls("a:ext", CT_PositiveSize2D)
|
||||
register_element_cls("a:graphic", CT_GraphicalObject)
|
||||
register_element_cls("a:graphicData", CT_GraphicalObjectData)
|
||||
register_element_cls("a:off", CT_Point2D)
|
||||
register_element_cls("a:xfrm", CT_Transform2D)
|
||||
register_element_cls("pic:blipFill", CT_BlipFillProperties)
|
||||
register_element_cls("pic:cNvPr", CT_NonVisualDrawingProps)
|
||||
register_element_cls("pic:nvPicPr", CT_PictureNonVisual)
|
||||
register_element_cls("pic:pic", CT_Picture)
|
||||
register_element_cls("pic:spPr", CT_ShapeProperties)
|
||||
register_element_cls("w:drawing", CT_Drawing)
|
||||
register_element_cls("wp:anchor", CT_Anchor)
|
||||
register_element_cls("wp:docPr", CT_NonVisualDrawingProps)
|
||||
register_element_cls("wp:extent", CT_PositiveSize2D)
|
||||
register_element_cls("wp:inline", CT_Inline)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# hyperlink-related elements
|
||||
|
||||
register_element_cls("w:hyperlink", CT_Hyperlink)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# text-related elements
|
||||
|
||||
register_element_cls("w:br", CT_Br)
|
||||
register_element_cls("w:cr", CT_Cr)
|
||||
register_element_cls("w:lastRenderedPageBreak", CT_LastRenderedPageBreak)
|
||||
register_element_cls("w:noBreakHyphen", CT_NoBreakHyphen)
|
||||
register_element_cls("w:ptab", CT_PTab)
|
||||
register_element_cls("w:r", CT_R)
|
||||
register_element_cls("w:t", CT_Text)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# header/footer-related mappings
|
||||
|
||||
register_element_cls("w:evenAndOddHeaders", CT_OnOff)
|
||||
register_element_cls("w:titlePg", CT_OnOff)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# other custom element class mappings
|
||||
|
||||
from .comments import CT_Comments, CT_Comment
|
||||
|
||||
register_element_cls("w:comments", CT_Comments)
|
||||
register_element_cls("w:comment", CT_Comment)
|
||||
|
||||
from .coreprops import CT_CoreProperties
|
||||
|
||||
register_element_cls("cp:coreProperties", CT_CoreProperties)
|
||||
|
||||
from .document import CT_Body, CT_Document
|
||||
|
||||
register_element_cls("w:body", CT_Body)
|
||||
register_element_cls("w:document", CT_Document)
|
||||
|
||||
from .numbering import CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr
|
||||
|
||||
register_element_cls("w:abstractNumId", CT_DecimalNumber)
|
||||
register_element_cls("w:ilvl", CT_DecimalNumber)
|
||||
register_element_cls("w:lvlOverride", CT_NumLvl)
|
||||
register_element_cls("w:num", CT_Num)
|
||||
register_element_cls("w:numId", CT_DecimalNumber)
|
||||
register_element_cls("w:numPr", CT_NumPr)
|
||||
register_element_cls("w:numbering", CT_Numbering)
|
||||
register_element_cls("w:startOverride", CT_DecimalNumber)
|
||||
|
||||
from .section import (
|
||||
CT_HdrFtr,
|
||||
CT_HdrFtrRef,
|
||||
CT_PageMar,
|
||||
CT_PageSz,
|
||||
CT_SectPr,
|
||||
CT_SectType,
|
||||
)
|
||||
|
||||
register_element_cls("w:footerReference", CT_HdrFtrRef)
|
||||
register_element_cls("w:ftr", CT_HdrFtr)
|
||||
register_element_cls("w:hdr", CT_HdrFtr)
|
||||
register_element_cls("w:headerReference", CT_HdrFtrRef)
|
||||
register_element_cls("w:pgMar", CT_PageMar)
|
||||
register_element_cls("w:pgSz", CT_PageSz)
|
||||
register_element_cls("w:sectPr", CT_SectPr)
|
||||
register_element_cls("w:type", CT_SectType)
|
||||
|
||||
from .settings import CT_Settings
|
||||
|
||||
register_element_cls("w:settings", CT_Settings)
|
||||
|
||||
from .styles import CT_LatentStyles, CT_LsdException, CT_Style, CT_Styles
|
||||
|
||||
register_element_cls("w:basedOn", CT_String)
|
||||
register_element_cls("w:latentStyles", CT_LatentStyles)
|
||||
register_element_cls("w:locked", CT_OnOff)
|
||||
register_element_cls("w:lsdException", CT_LsdException)
|
||||
register_element_cls("w:name", CT_String)
|
||||
register_element_cls("w:next", CT_String)
|
||||
register_element_cls("w:qFormat", CT_OnOff)
|
||||
register_element_cls("w:semiHidden", CT_OnOff)
|
||||
register_element_cls("w:style", CT_Style)
|
||||
register_element_cls("w:styles", CT_Styles)
|
||||
register_element_cls("w:uiPriority", CT_DecimalNumber)
|
||||
register_element_cls("w:unhideWhenUsed", CT_OnOff)
|
||||
|
||||
from .table import (
|
||||
CT_Height,
|
||||
CT_Row,
|
||||
CT_Tbl,
|
||||
CT_TblGrid,
|
||||
CT_TblGridCol,
|
||||
CT_TblLayoutType,
|
||||
CT_TblPr,
|
||||
CT_TblPrEx,
|
||||
CT_TblWidth,
|
||||
CT_Tc,
|
||||
CT_TcPr,
|
||||
CT_TrPr,
|
||||
CT_VMerge,
|
||||
CT_VerticalJc,
|
||||
)
|
||||
|
||||
register_element_cls("w:bidiVisual", CT_OnOff)
|
||||
register_element_cls("w:gridAfter", CT_DecimalNumber)
|
||||
register_element_cls("w:gridBefore", CT_DecimalNumber)
|
||||
register_element_cls("w:gridCol", CT_TblGridCol)
|
||||
register_element_cls("w:gridSpan", CT_DecimalNumber)
|
||||
register_element_cls("w:tbl", CT_Tbl)
|
||||
register_element_cls("w:tblGrid", CT_TblGrid)
|
||||
register_element_cls("w:tblLayout", CT_TblLayoutType)
|
||||
register_element_cls("w:tblPr", CT_TblPr)
|
||||
register_element_cls("w:tblPrEx", CT_TblPrEx)
|
||||
register_element_cls("w:tblStyle", CT_String)
|
||||
register_element_cls("w:tc", CT_Tc)
|
||||
register_element_cls("w:tcPr", CT_TcPr)
|
||||
register_element_cls("w:tcW", CT_TblWidth)
|
||||
register_element_cls("w:tr", CT_Row)
|
||||
register_element_cls("w:trHeight", CT_Height)
|
||||
register_element_cls("w:trPr", CT_TrPr)
|
||||
register_element_cls("w:vAlign", CT_VerticalJc)
|
||||
register_element_cls("w:vMerge", CT_VMerge)
|
||||
|
||||
from .text.font import (
|
||||
CT_Color,
|
||||
CT_Fonts,
|
||||
CT_Highlight,
|
||||
CT_HpsMeasure,
|
||||
CT_RPr,
|
||||
CT_Underline,
|
||||
CT_VerticalAlignRun,
|
||||
)
|
||||
|
||||
register_element_cls("w:b", CT_OnOff)
|
||||
register_element_cls("w:bCs", CT_OnOff)
|
||||
register_element_cls("w:caps", CT_OnOff)
|
||||
register_element_cls("w:color", CT_Color)
|
||||
register_element_cls("w:cs", CT_OnOff)
|
||||
register_element_cls("w:dstrike", CT_OnOff)
|
||||
register_element_cls("w:emboss", CT_OnOff)
|
||||
register_element_cls("w:highlight", CT_Highlight)
|
||||
register_element_cls("w:i", CT_OnOff)
|
||||
register_element_cls("w:iCs", CT_OnOff)
|
||||
register_element_cls("w:imprint", CT_OnOff)
|
||||
register_element_cls("w:noProof", CT_OnOff)
|
||||
register_element_cls("w:oMath", CT_OnOff)
|
||||
register_element_cls("w:outline", CT_OnOff)
|
||||
register_element_cls("w:rFonts", CT_Fonts)
|
||||
register_element_cls("w:rPr", CT_RPr)
|
||||
register_element_cls("w:rStyle", CT_String)
|
||||
register_element_cls("w:rtl", CT_OnOff)
|
||||
register_element_cls("w:shadow", CT_OnOff)
|
||||
register_element_cls("w:smallCaps", CT_OnOff)
|
||||
register_element_cls("w:snapToGrid", CT_OnOff)
|
||||
register_element_cls("w:specVanish", CT_OnOff)
|
||||
register_element_cls("w:strike", CT_OnOff)
|
||||
register_element_cls("w:sz", CT_HpsMeasure)
|
||||
register_element_cls("w:u", CT_Underline)
|
||||
register_element_cls("w:vanish", CT_OnOff)
|
||||
register_element_cls("w:vertAlign", CT_VerticalAlignRun)
|
||||
register_element_cls("w:webHidden", CT_OnOff)
|
||||
|
||||
from .text.paragraph import CT_P
|
||||
|
||||
register_element_cls("w:p", CT_P)
|
||||
|
||||
from .text.parfmt import (
|
||||
CT_Ind,
|
||||
CT_Jc,
|
||||
CT_PPr,
|
||||
CT_Spacing,
|
||||
CT_TabStop,
|
||||
CT_TabStops,
|
||||
)
|
||||
|
||||
register_element_cls("w:ind", CT_Ind)
|
||||
register_element_cls("w:jc", CT_Jc)
|
||||
register_element_cls("w:keepLines", CT_OnOff)
|
||||
register_element_cls("w:keepNext", CT_OnOff)
|
||||
register_element_cls("w:outlineLvl", CT_DecimalNumber)
|
||||
register_element_cls("w:pageBreakBefore", CT_OnOff)
|
||||
register_element_cls("w:pPr", CT_PPr)
|
||||
register_element_cls("w:pStyle", CT_String)
|
||||
register_element_cls("w:spacing", CT_Spacing)
|
||||
register_element_cls("w:tab", CT_TabStop)
|
||||
register_element_cls("w:tabs", CT_TabStops)
|
||||
register_element_cls("w:widowControl", CT_OnOff)
|
||||
@@ -0,0 +1,124 @@
|
||||
"""Custom element classes related to document comments."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import TYPE_CHECKING, Callable, cast
|
||||
|
||||
from docx.oxml.ns import nsdecls
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.oxml.simpletypes import ST_DateTime, ST_DecimalNumber, ST_String
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, RequiredAttribute, ZeroOrMore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.table import CT_Tbl
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
|
||||
|
||||
class CT_Comments(BaseOxmlElement):
|
||||
"""`w:comments` element, the root element for the comments part.
|
||||
|
||||
Simply contains a collection of `w:comment` elements, each representing a single comment. Each
|
||||
contained comment is identified by a unique `w:id` attribute, used to reference the comment
|
||||
from the document text. The offset of the comment in this collection is arbitrary; it is
|
||||
essentially a _set_ implemented as a list.
|
||||
"""
|
||||
|
||||
# -- type-declarations to fill in the gaps for metaclass-added methods --
|
||||
comment_lst: list[CT_Comment]
|
||||
|
||||
comment = ZeroOrMore("w:comment")
|
||||
|
||||
def add_comment(self) -> CT_Comment:
|
||||
"""Return newly added `w:comment` child of this `w:comments`.
|
||||
|
||||
The returned `w:comment` element is the minimum valid value, having a `w:id` value unique
|
||||
within the existing comments and the required `w:author` attribute present but set to the
|
||||
empty string. It's content is limited to a single run containing the necessary annotation
|
||||
reference but no text. Content is added by adding runs to this first paragraph and by
|
||||
adding additional paragraphs as needed.
|
||||
"""
|
||||
next_id = self._next_available_comment_id()
|
||||
comment = cast(
|
||||
CT_Comment,
|
||||
parse_xml(
|
||||
f'<w:comment {nsdecls("w")} w:id="{next_id}" w:author="">'
|
||||
f" <w:p>"
|
||||
f" <w:pPr>"
|
||||
f' <w:pStyle w:val="CommentText"/>'
|
||||
f" </w:pPr>"
|
||||
f" <w:r>"
|
||||
f" <w:rPr>"
|
||||
f' <w:rStyle w:val="CommentReference"/>'
|
||||
f" </w:rPr>"
|
||||
f" <w:annotationRef/>"
|
||||
f" </w:r>"
|
||||
f" </w:p>"
|
||||
f"</w:comment>"
|
||||
),
|
||||
)
|
||||
self.append(comment)
|
||||
return comment
|
||||
|
||||
def get_comment_by_id(self, comment_id: int) -> CT_Comment | None:
|
||||
"""Return the `w:comment` element identified by `comment_id`, or |None| if not found."""
|
||||
comment_elms = self.xpath(f"(./w:comment[@w:id='{comment_id}'])[1]")
|
||||
return comment_elms[0] if comment_elms else None
|
||||
|
||||
def _next_available_comment_id(self) -> int:
|
||||
"""The next available comment id.
|
||||
|
||||
According to the schema, this can be any positive integer, as big as you like, and the
|
||||
default mechanism is to use `max() + 1`. However, if that yields a value larger than will
|
||||
fit in a 32-bit signed integer, we take a more deliberate approach to use the first
|
||||
ununsed integer starting from 0.
|
||||
"""
|
||||
used_ids = [int(x) for x in self.xpath("./w:comment/@w:id")]
|
||||
|
||||
next_id = max(used_ids, default=-1) + 1
|
||||
|
||||
if next_id <= 2**31 - 1:
|
||||
return next_id
|
||||
|
||||
# -- fall-back to enumerating all used ids to find the first unused one --
|
||||
for expected, actual in enumerate(sorted(used_ids)):
|
||||
if expected != actual:
|
||||
return expected
|
||||
|
||||
return len(used_ids)
|
||||
|
||||
|
||||
class CT_Comment(BaseOxmlElement):
|
||||
"""`w:comment` element, representing a single comment.
|
||||
|
||||
A comment is a so-called "story" and can contain paragraphs and tables much like a table-cell.
|
||||
While probably most often used for a single sentence or phrase, a comment can contain rich
|
||||
content, including multiple rich-text paragraphs, hyperlinks, images, and tables.
|
||||
"""
|
||||
|
||||
# -- attributes on `w:comment` --
|
||||
id: int = RequiredAttribute("w:id", ST_DecimalNumber) # pyright: ignore[reportAssignmentType]
|
||||
author: str = RequiredAttribute("w:author", ST_String) # pyright: ignore[reportAssignmentType]
|
||||
initials: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:initials", ST_String
|
||||
)
|
||||
date: dt.datetime | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:date", ST_DateTime
|
||||
)
|
||||
|
||||
# -- children --
|
||||
|
||||
p = ZeroOrMore("w:p", successors=())
|
||||
tbl = ZeroOrMore("w:tbl", successors=())
|
||||
|
||||
# -- type-declarations for methods added by metaclass --
|
||||
|
||||
add_p: Callable[[], CT_P]
|
||||
p_lst: list[CT_P]
|
||||
tbl_lst: list[CT_Tbl]
|
||||
_insert_tbl: Callable[[CT_Tbl], CT_Tbl]
|
||||
|
||||
@property
|
||||
def inner_content_elements(self) -> list[CT_P | CT_Tbl]:
|
||||
"""Generate all `w:p` and `w:tbl` elements in this comment."""
|
||||
return self.xpath("./w:p | ./w:tbl")
|
||||
@@ -0,0 +1,298 @@
|
||||
"""Custom element classes for core properties-related XML elements."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import re
|
||||
from typing import TYPE_CHECKING, Any, Callable, cast
|
||||
|
||||
from docx.oxml.ns import nsdecls, qn
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrOne
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from lxml.etree import _Element as etree_Element # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
|
||||
class CT_CoreProperties(BaseOxmlElement):
|
||||
"""`<cp:coreProperties>` element, the root element of the Core Properties part.
|
||||
|
||||
Stored as `/docProps/core.xml`. Implements many of the Dublin Core document metadata
|
||||
elements. String elements resolve to an empty string ("") if the element is not
|
||||
present in the XML. String elements are limited in length to 255 unicode characters.
|
||||
"""
|
||||
|
||||
get_or_add_revision: Callable[[], etree_Element]
|
||||
|
||||
category = ZeroOrOne("cp:category", successors=())
|
||||
contentStatus = ZeroOrOne("cp:contentStatus", successors=())
|
||||
created = ZeroOrOne("dcterms:created", successors=())
|
||||
creator = ZeroOrOne("dc:creator", successors=())
|
||||
description = ZeroOrOne("dc:description", successors=())
|
||||
identifier = ZeroOrOne("dc:identifier", successors=())
|
||||
keywords = ZeroOrOne("cp:keywords", successors=())
|
||||
language = ZeroOrOne("dc:language", successors=())
|
||||
lastModifiedBy = ZeroOrOne("cp:lastModifiedBy", successors=())
|
||||
lastPrinted = ZeroOrOne("cp:lastPrinted", successors=())
|
||||
modified = ZeroOrOne("dcterms:modified", successors=())
|
||||
revision: etree_Element | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"cp:revision", successors=()
|
||||
)
|
||||
subject = ZeroOrOne("dc:subject", successors=())
|
||||
title = ZeroOrOne("dc:title", successors=())
|
||||
version = ZeroOrOne("cp:version", successors=())
|
||||
|
||||
_coreProperties_tmpl = "<cp:coreProperties %s/>\n" % nsdecls("cp", "dc", "dcterms")
|
||||
|
||||
@classmethod
|
||||
def new(cls) -> CT_CoreProperties:
|
||||
"""Return a new `<cp:coreProperties>` element."""
|
||||
xml = cls._coreProperties_tmpl
|
||||
coreProperties = cast(CT_CoreProperties, parse_xml(xml))
|
||||
return coreProperties
|
||||
|
||||
@property
|
||||
def author_text(self) -> str:
|
||||
"""The text in the `dc:creator` child element."""
|
||||
return self._text_of_element("creator")
|
||||
|
||||
@author_text.setter
|
||||
def author_text(self, value: str):
|
||||
self._set_element_text("creator", value)
|
||||
|
||||
@property
|
||||
def category_text(self) -> str:
|
||||
return self._text_of_element("category")
|
||||
|
||||
@category_text.setter
|
||||
def category_text(self, value: str):
|
||||
self._set_element_text("category", value)
|
||||
|
||||
@property
|
||||
def comments_text(self) -> str:
|
||||
return self._text_of_element("description")
|
||||
|
||||
@comments_text.setter
|
||||
def comments_text(self, value: str):
|
||||
self._set_element_text("description", value)
|
||||
|
||||
@property
|
||||
def contentStatus_text(self) -> str:
|
||||
return self._text_of_element("contentStatus")
|
||||
|
||||
@contentStatus_text.setter
|
||||
def contentStatus_text(self, value: str):
|
||||
self._set_element_text("contentStatus", value)
|
||||
|
||||
@property
|
||||
def created_datetime(self) -> dt.datetime | None:
|
||||
return self._datetime_of_element("created")
|
||||
|
||||
@created_datetime.setter
|
||||
def created_datetime(self, value: dt.datetime):
|
||||
self._set_element_datetime("created", value)
|
||||
|
||||
@property
|
||||
def identifier_text(self) -> str:
|
||||
return self._text_of_element("identifier")
|
||||
|
||||
@identifier_text.setter
|
||||
def identifier_text(self, value: str):
|
||||
self._set_element_text("identifier", value)
|
||||
|
||||
@property
|
||||
def keywords_text(self) -> str:
|
||||
return self._text_of_element("keywords")
|
||||
|
||||
@keywords_text.setter
|
||||
def keywords_text(self, value: str):
|
||||
self._set_element_text("keywords", value)
|
||||
|
||||
@property
|
||||
def language_text(self) -> str:
|
||||
return self._text_of_element("language")
|
||||
|
||||
@language_text.setter
|
||||
def language_text(self, value: str):
|
||||
self._set_element_text("language", value)
|
||||
|
||||
@property
|
||||
def lastModifiedBy_text(self) -> str:
|
||||
return self._text_of_element("lastModifiedBy")
|
||||
|
||||
@lastModifiedBy_text.setter
|
||||
def lastModifiedBy_text(self, value: str):
|
||||
self._set_element_text("lastModifiedBy", value)
|
||||
|
||||
@property
|
||||
def lastPrinted_datetime(self) -> dt.datetime | None:
|
||||
return self._datetime_of_element("lastPrinted")
|
||||
|
||||
@lastPrinted_datetime.setter
|
||||
def lastPrinted_datetime(self, value: dt.datetime):
|
||||
self._set_element_datetime("lastPrinted", value)
|
||||
|
||||
@property
|
||||
def modified_datetime(self) -> dt.datetime | None:
|
||||
return self._datetime_of_element("modified")
|
||||
|
||||
@modified_datetime.setter
|
||||
def modified_datetime(self, value: dt.datetime):
|
||||
self._set_element_datetime("modified", value)
|
||||
|
||||
@property
|
||||
def revision_number(self) -> int:
|
||||
"""Integer value of revision property."""
|
||||
revision = self.revision
|
||||
if revision is None:
|
||||
return 0
|
||||
revision_str = str(revision.text)
|
||||
try:
|
||||
revision = int(revision_str)
|
||||
except ValueError:
|
||||
# non-integer revision strings also resolve to 0
|
||||
revision = 0
|
||||
# as do negative integers
|
||||
if revision < 0:
|
||||
revision = 0
|
||||
return revision
|
||||
|
||||
@revision_number.setter
|
||||
def revision_number(self, value: int):
|
||||
"""Set revision property to string value of integer `value`."""
|
||||
if not isinstance(value, int) or value < 1: # pyright: ignore[reportUnnecessaryIsInstance]
|
||||
tmpl = "revision property requires positive int, got '%s'"
|
||||
raise ValueError(tmpl % value)
|
||||
revision = self.get_or_add_revision()
|
||||
revision.text = str(value)
|
||||
|
||||
@property
|
||||
def subject_text(self) -> str:
|
||||
return self._text_of_element("subject")
|
||||
|
||||
@subject_text.setter
|
||||
def subject_text(self, value: str):
|
||||
self._set_element_text("subject", value)
|
||||
|
||||
@property
|
||||
def title_text(self) -> str:
|
||||
return self._text_of_element("title")
|
||||
|
||||
@title_text.setter
|
||||
def title_text(self, value: str):
|
||||
self._set_element_text("title", value)
|
||||
|
||||
@property
|
||||
def version_text(self) -> str:
|
||||
return self._text_of_element("version")
|
||||
|
||||
@version_text.setter
|
||||
def version_text(self, value: str):
|
||||
self._set_element_text("version", value)
|
||||
|
||||
def _datetime_of_element(self, property_name: str) -> dt.datetime | None:
|
||||
element = getattr(self, property_name)
|
||||
if element is None:
|
||||
return None
|
||||
datetime_str = element.text
|
||||
try:
|
||||
return self._parse_W3CDTF_to_datetime(datetime_str)
|
||||
except ValueError:
|
||||
# invalid datetime strings are ignored
|
||||
return None
|
||||
|
||||
def _get_or_add(self, prop_name: str) -> BaseOxmlElement:
|
||||
"""Return element returned by "get_or_add_" method for `prop_name`."""
|
||||
get_or_add_method_name = "get_or_add_%s" % prop_name
|
||||
get_or_add_method = getattr(self, get_or_add_method_name)
|
||||
element = get_or_add_method()
|
||||
return element
|
||||
|
||||
@classmethod
|
||||
def _offset_dt(cls, dt_: dt.datetime, offset_str: str) -> dt.datetime:
|
||||
"""A |datetime| instance offset from `dt_` by timezone offset in `offset_str`.
|
||||
|
||||
`offset_str` is like `"-07:00"`.
|
||||
"""
|
||||
match = cls._offset_pattern.match(offset_str)
|
||||
if match is None:
|
||||
raise ValueError("'%s' is not a valid offset string" % offset_str)
|
||||
sign, hours_str, minutes_str = match.groups()
|
||||
sign_factor = -1 if sign == "+" else 1
|
||||
hours = int(hours_str) * sign_factor
|
||||
minutes = int(minutes_str) * sign_factor
|
||||
td = dt.timedelta(hours=hours, minutes=minutes)
|
||||
return dt_ + td
|
||||
|
||||
_offset_pattern = re.compile(r"([+-])(\d\d):(\d\d)")
|
||||
|
||||
@classmethod
|
||||
def _parse_W3CDTF_to_datetime(cls, w3cdtf_str: str) -> dt.datetime:
|
||||
# valid W3CDTF date cases:
|
||||
# yyyy e.g. "2003"
|
||||
# yyyy-mm e.g. "2003-12"
|
||||
# yyyy-mm-dd e.g. "2003-12-31"
|
||||
# UTC timezone e.g. "2003-12-31T10:14:55Z"
|
||||
# numeric timezone e.g. "2003-12-31T10:14:55-08:00"
|
||||
templates = (
|
||||
"%Y-%m-%dT%H:%M:%S",
|
||||
"%Y-%m-%d",
|
||||
"%Y-%m",
|
||||
"%Y",
|
||||
)
|
||||
# strptime isn't smart enough to parse literal timezone offsets like
|
||||
# "-07:30", so we have to do it ourselves
|
||||
parseable_part = w3cdtf_str[:19]
|
||||
offset_str = w3cdtf_str[19:]
|
||||
dt_ = None
|
||||
for tmpl in templates:
|
||||
try:
|
||||
dt_ = dt.datetime.strptime(parseable_part, tmpl)
|
||||
except ValueError:
|
||||
continue
|
||||
if dt_ is None:
|
||||
tmpl = "could not parse W3CDTF datetime string '%s'"
|
||||
raise ValueError(tmpl % w3cdtf_str)
|
||||
if len(offset_str) == 6:
|
||||
dt_ = cls._offset_dt(dt_, offset_str)
|
||||
return dt_.replace(tzinfo=dt.timezone.utc)
|
||||
|
||||
def _set_element_datetime(self, prop_name: str, value: dt.datetime) -> None:
|
||||
"""Set date/time value of child element having `prop_name` to `value`."""
|
||||
if not isinstance(value, dt.datetime): # pyright: ignore[reportUnnecessaryIsInstance]
|
||||
tmpl = "property requires <type 'datetime.datetime'> object, got %s"
|
||||
raise ValueError(tmpl % type(value))
|
||||
element = self._get_or_add(prop_name)
|
||||
dt_str = value.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
element.text = dt_str
|
||||
if prop_name in ("created", "modified"):
|
||||
# These two require an explicit "xsi:type="dcterms:W3CDTF""
|
||||
# attribute. The first and last line are a hack required to add
|
||||
# the xsi namespace to the root element rather than each child
|
||||
# element in which it is referenced
|
||||
self.set(qn("xsi:foo"), "bar")
|
||||
element.set(qn("xsi:type"), "dcterms:W3CDTF")
|
||||
del self.attrib[qn("xsi:foo")]
|
||||
|
||||
def _set_element_text(self, prop_name: str, value: Any) -> None:
|
||||
"""Set string value of `name` property to `value`."""
|
||||
if not isinstance(value, str):
|
||||
value = str(value)
|
||||
|
||||
if len(value) > 255:
|
||||
tmpl = "exceeded 255 char limit for property, got:\n\n'%s'"
|
||||
raise ValueError(tmpl % value)
|
||||
element = self._get_or_add(prop_name)
|
||||
element.text = value
|
||||
|
||||
def _text_of_element(self, property_name: str) -> str:
|
||||
"""The text in the element matching `property_name`.
|
||||
|
||||
The empty string if the element is not present or contains no text.
|
||||
"""
|
||||
element = getattr(self, property_name)
|
||||
if element is None:
|
||||
return ""
|
||||
if element.text is None:
|
||||
return ""
|
||||
return element.text
|
||||
@@ -0,0 +1,88 @@
|
||||
"""Custom element classes that correspond to the document part, e.g. <w:document>."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, List
|
||||
|
||||
from docx.oxml.section import CT_SectPr
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrMore, ZeroOrOne
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.table import CT_Tbl
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
|
||||
|
||||
class CT_Document(BaseOxmlElement):
|
||||
"""``<w:document>`` element, the root element of a document.xml file."""
|
||||
|
||||
body: CT_Body = ZeroOrOne("w:body") # pyright: ignore[reportAssignmentType]
|
||||
|
||||
@property
|
||||
def sectPr_lst(self) -> List[CT_SectPr]:
|
||||
"""All `w:sectPr` elements directly accessible from document element.
|
||||
|
||||
Note this does not include a `sectPr` child in a paragraphs wrapped in
|
||||
revision marks or other intervening layer, perhaps `w:sdt` or customXml
|
||||
elements.
|
||||
|
||||
`w:sectPr` elements appear in document order. The last one is always
|
||||
`w:body/w:sectPr`, all preceding are `w:p/w:pPr/w:sectPr`.
|
||||
"""
|
||||
xpath = "./w:body/w:p/w:pPr/w:sectPr | ./w:body/w:sectPr"
|
||||
return self.xpath(xpath)
|
||||
|
||||
|
||||
class CT_Body(BaseOxmlElement):
|
||||
"""`w:body`, the container element for the main document story in `document.xml`."""
|
||||
|
||||
add_p: Callable[[], CT_P]
|
||||
get_or_add_sectPr: Callable[[], CT_SectPr]
|
||||
p_lst: List[CT_P]
|
||||
tbl_lst: List[CT_Tbl]
|
||||
|
||||
_insert_tbl: Callable[[CT_Tbl], CT_Tbl]
|
||||
|
||||
p = ZeroOrMore("w:p", successors=("w:sectPr",))
|
||||
tbl = ZeroOrMore("w:tbl", successors=("w:sectPr",))
|
||||
sectPr: CT_SectPr | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:sectPr", successors=()
|
||||
)
|
||||
|
||||
def add_section_break(self) -> CT_SectPr:
|
||||
"""Return `w:sectPr` element for new section added at end of document.
|
||||
|
||||
The last `w:sectPr` becomes the second-to-last, with the new `w:sectPr` being an
|
||||
exact clone of the previous one, except that all header and footer references
|
||||
are removed (and are therefore now "inherited" from the prior section).
|
||||
|
||||
A copy of the previously-last `w:sectPr` will now appear in a new `w:p` at the
|
||||
end of the document. The returned `w:sectPr` is the sentinel `w:sectPr` for the
|
||||
document (and as implemented, `is` the prior sentinel `w:sectPr` with headers
|
||||
and footers removed).
|
||||
"""
|
||||
# ---get the sectPr at file-end, which controls last section (sections[-1])---
|
||||
sentinel_sectPr = self.get_or_add_sectPr()
|
||||
# ---add exact copy to new `w:p` element; that is now second-to last section---
|
||||
self.add_p().set_sectPr(sentinel_sectPr.clone())
|
||||
# ---remove any header or footer references from "new" last section---
|
||||
for hdrftr_ref in sentinel_sectPr.xpath("w:headerReference|w:footerReference"):
|
||||
sentinel_sectPr.remove(hdrftr_ref)
|
||||
# ---the sentinel `w:sectPr` now controls the new last section---
|
||||
return sentinel_sectPr
|
||||
|
||||
def clear_content(self):
|
||||
"""Remove all content child elements from this <w:body> element.
|
||||
|
||||
Leave the <w:sectPr> element if it is present.
|
||||
"""
|
||||
for content_elm in self.xpath("./*[not(self::w:sectPr)]"):
|
||||
self.remove(content_elm)
|
||||
|
||||
@property
|
||||
def inner_content_elements(self) -> List[CT_P | CT_Tbl]:
|
||||
"""Generate all `w:p` and `w:tbl` elements in this document-body.
|
||||
|
||||
Elements appear in document order. Elements shaded by nesting in a `w:ins` or
|
||||
other "wrapper" element will not be included.
|
||||
"""
|
||||
return self.xpath("./w:p | ./w:tbl")
|
||||
@@ -0,0 +1,11 @@
|
||||
"""Custom element-classes for DrawingML-related elements like `<w:drawing>`.
|
||||
|
||||
For legacy reasons, many DrawingML-related elements are in `docx.oxml.shape`. Expect
|
||||
those to move over here as we have reason to touch them.
|
||||
"""
|
||||
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement
|
||||
|
||||
|
||||
class CT_Drawing(BaseOxmlElement):
|
||||
"""`<w:drawing>` element, containing a DrawingML object like a picture or chart."""
|
||||
@@ -0,0 +1,10 @@
|
||||
"""Exceptions for oxml sub-package."""
|
||||
|
||||
|
||||
class XmlchemyError(Exception):
|
||||
"""Generic error class."""
|
||||
|
||||
|
||||
class InvalidXmlError(XmlchemyError):
|
||||
"""Raised when invalid XML is encountered, such as on attempt to access a missing
|
||||
required child element."""
|
||||
@@ -0,0 +1,109 @@
|
||||
"""Namespace-related objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict
|
||||
|
||||
nsmap = {
|
||||
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
|
||||
"c": "http://schemas.openxmlformats.org/drawingml/2006/chart",
|
||||
"cp": "http://schemas.openxmlformats.org/package/2006/metadata/core-properties",
|
||||
"dc": "http://purl.org/dc/elements/1.1/",
|
||||
"dcmitype": "http://purl.org/dc/dcmitype/",
|
||||
"dcterms": "http://purl.org/dc/terms/",
|
||||
"dgm": "http://schemas.openxmlformats.org/drawingml/2006/diagram",
|
||||
"m": "http://schemas.openxmlformats.org/officeDocument/2006/math",
|
||||
"pic": "http://schemas.openxmlformats.org/drawingml/2006/picture",
|
||||
"r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
|
||||
"sl": "http://schemas.openxmlformats.org/schemaLibrary/2006/main",
|
||||
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
|
||||
"w14": "http://schemas.microsoft.com/office/word/2010/wordml",
|
||||
"wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
|
||||
"xml": "http://www.w3.org/XML/1998/namespace",
|
||||
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
||||
}
|
||||
|
||||
pfxmap = {value: key for key, value in nsmap.items()}
|
||||
|
||||
|
||||
class NamespacePrefixedTag(str):
|
||||
"""Value object that knows the semantics of an XML tag having a namespace prefix."""
|
||||
|
||||
def __new__(cls, nstag: str):
|
||||
return super(NamespacePrefixedTag, cls).__new__(cls, nstag)
|
||||
|
||||
def __init__(self, nstag: str):
|
||||
self._pfx, self._local_part = nstag.split(":")
|
||||
self._ns_uri = nsmap[self._pfx]
|
||||
|
||||
@property
|
||||
def clark_name(self) -> str:
|
||||
return "{%s}%s" % (self._ns_uri, self._local_part)
|
||||
|
||||
@classmethod
|
||||
def from_clark_name(cls, clark_name: str) -> NamespacePrefixedTag:
|
||||
nsuri, local_name = clark_name[1:].split("}")
|
||||
nstag = "%s:%s" % (pfxmap[nsuri], local_name)
|
||||
return cls(nstag)
|
||||
|
||||
@property
|
||||
def local_part(self) -> str:
|
||||
"""The local part of this tag.
|
||||
|
||||
E.g. "foobar" is returned for tag "f:foobar".
|
||||
"""
|
||||
return self._local_part
|
||||
|
||||
@property
|
||||
def nsmap(self) -> Dict[str, str]:
|
||||
"""Single-member dict mapping prefix of this tag to it's namespace name.
|
||||
|
||||
Example: `{"f": "http://foo/bar"}`. This is handy for passing to xpath calls
|
||||
and other uses.
|
||||
"""
|
||||
return {self._pfx: self._ns_uri}
|
||||
|
||||
@property
|
||||
def nspfx(self) -> str:
|
||||
"""The namespace-prefix for this tag.
|
||||
|
||||
For example, "f" is returned for tag "f:foobar".
|
||||
"""
|
||||
return self._pfx
|
||||
|
||||
@property
|
||||
def nsuri(self) -> str:
|
||||
"""The namespace URI for this tag.
|
||||
|
||||
For example, "http://foo/bar" would be returned for tag "f:foobar" if the "f"
|
||||
prefix maps to "http://foo/bar" in nsmap.
|
||||
"""
|
||||
return self._ns_uri
|
||||
|
||||
|
||||
def nsdecls(*prefixes: str) -> str:
|
||||
"""Namespace declaration including each namespace-prefix in `prefixes`.
|
||||
|
||||
Handy for adding required namespace declarations to a tree root element.
|
||||
"""
|
||||
return " ".join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes])
|
||||
|
||||
|
||||
def nspfxmap(*nspfxs: str) -> Dict[str, str]:
|
||||
"""Subset namespace-prefix mappings specified by *nspfxs*.
|
||||
|
||||
Any number of namespace prefixes can be supplied, e.g. namespaces("a", "r", "p").
|
||||
"""
|
||||
return {pfx: nsmap[pfx] for pfx in nspfxs}
|
||||
|
||||
|
||||
def qn(tag: str) -> str:
|
||||
"""Stands for "qualified name".
|
||||
|
||||
This utility function converts a familiar namespace-prefixed tag name like "w:p"
|
||||
into a Clark-notation qualified tag name for lxml. For example, `qn("w:p")` returns
|
||||
"{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p".
|
||||
"""
|
||||
prefix, tagroot = tag.split(":")
|
||||
uri = nsmap[prefix]
|
||||
return "{%s}%s" % (uri, tagroot)
|
||||
@@ -0,0 +1,109 @@
|
||||
"""Custom element classes related to the numbering part."""
|
||||
|
||||
from docx.oxml.parser import OxmlElement
|
||||
from docx.oxml.shared import CT_DecimalNumber
|
||||
from docx.oxml.simpletypes import ST_DecimalNumber
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OneAndOnlyOne,
|
||||
RequiredAttribute,
|
||||
ZeroOrMore,
|
||||
ZeroOrOne,
|
||||
)
|
||||
|
||||
|
||||
class CT_Num(BaseOxmlElement):
|
||||
"""``<w:num>`` element, which represents a concrete list definition instance, having
|
||||
a required child <w:abstractNumId> that references an abstract numbering definition
|
||||
that defines most of the formatting details."""
|
||||
|
||||
abstractNumId = OneAndOnlyOne("w:abstractNumId")
|
||||
lvlOverride = ZeroOrMore("w:lvlOverride")
|
||||
numId = RequiredAttribute("w:numId", ST_DecimalNumber)
|
||||
|
||||
def add_lvlOverride(self, ilvl):
|
||||
"""Return a newly added CT_NumLvl (<w:lvlOverride>) element having its ``ilvl``
|
||||
attribute set to `ilvl`."""
|
||||
return self._add_lvlOverride(ilvl=ilvl)
|
||||
|
||||
@classmethod
|
||||
def new(cls, num_id, abstractNum_id):
|
||||
"""Return a new ``<w:num>`` element having numId of `num_id` and having a
|
||||
``<w:abstractNumId>`` child with val attribute set to `abstractNum_id`."""
|
||||
num = OxmlElement("w:num")
|
||||
num.numId = num_id
|
||||
abstractNumId = CT_DecimalNumber.new("w:abstractNumId", abstractNum_id)
|
||||
num.append(abstractNumId)
|
||||
return num
|
||||
|
||||
|
||||
class CT_NumLvl(BaseOxmlElement):
|
||||
"""``<w:lvlOverride>`` element, which identifies a level in a list definition to
|
||||
override with settings it contains."""
|
||||
|
||||
startOverride = ZeroOrOne("w:startOverride", successors=("w:lvl",))
|
||||
ilvl = RequiredAttribute("w:ilvl", ST_DecimalNumber)
|
||||
|
||||
def add_startOverride(self, val):
|
||||
"""Return a newly added CT_DecimalNumber element having tagname
|
||||
``w:startOverride`` and ``val`` attribute set to `val`."""
|
||||
return self._add_startOverride(val=val)
|
||||
|
||||
|
||||
class CT_NumPr(BaseOxmlElement):
|
||||
"""A ``<w:numPr>`` element, a container for numbering properties applied to a
|
||||
paragraph."""
|
||||
|
||||
ilvl = ZeroOrOne("w:ilvl", successors=("w:numId", "w:numberingChange", "w:ins"))
|
||||
numId = ZeroOrOne("w:numId", successors=("w:numberingChange", "w:ins"))
|
||||
|
||||
# @ilvl.setter
|
||||
# def _set_ilvl(self, val):
|
||||
# """
|
||||
# Get or add a <w:ilvl> child and set its ``w:val`` attribute to `val`.
|
||||
# """
|
||||
# ilvl = self.get_or_add_ilvl()
|
||||
# ilvl.val = val
|
||||
|
||||
# @numId.setter
|
||||
# def numId(self, val):
|
||||
# """
|
||||
# Get or add a <w:numId> child and set its ``w:val`` attribute to
|
||||
# `val`.
|
||||
# """
|
||||
# numId = self.get_or_add_numId()
|
||||
# numId.val = val
|
||||
|
||||
|
||||
class CT_Numbering(BaseOxmlElement):
|
||||
"""``<w:numbering>`` element, the root element of a numbering part, i.e.
|
||||
numbering.xml."""
|
||||
|
||||
num = ZeroOrMore("w:num", successors=("w:numIdMacAtCleanup",))
|
||||
|
||||
def add_num(self, abstractNum_id):
|
||||
"""Return a newly added CT_Num (<w:num>) element referencing the abstract
|
||||
numbering definition identified by `abstractNum_id`."""
|
||||
next_num_id = self._next_numId
|
||||
num = CT_Num.new(next_num_id, abstractNum_id)
|
||||
return self._insert_num(num)
|
||||
|
||||
def num_having_numId(self, numId):
|
||||
"""Return the ``<w:num>`` child element having ``numId`` attribute matching
|
||||
`numId`."""
|
||||
xpath = './w:num[@w:numId="%d"]' % numId
|
||||
try:
|
||||
return self.xpath(xpath)[0]
|
||||
except IndexError:
|
||||
raise KeyError("no <w:num> element with numId %d" % numId)
|
||||
|
||||
@property
|
||||
def _next_numId(self):
|
||||
"""The first ``numId`` unused by a ``<w:num>`` element, starting at 1 and
|
||||
filling any gaps in numbering between existing ``<w:num>`` elements."""
|
||||
numId_strs = self.xpath("./w:num/@w:numId")
|
||||
num_ids = [int(numId_str) for numId_str in numId_strs]
|
||||
for num in range(1, len(num_ids) + 2):
|
||||
if num not in num_ids:
|
||||
break
|
||||
return num
|
||||
@@ -0,0 +1,62 @@
|
||||
# pyright: reportImportCycles=false
|
||||
|
||||
"""XML parser for python-docx."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Dict, Type, cast
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from docx.oxml.ns import NamespacePrefixedTag, nsmap
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement
|
||||
|
||||
|
||||
# -- configure XML parser --
|
||||
element_class_lookup = etree.ElementNamespaceClassLookup()
|
||||
oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
|
||||
oxml_parser.set_element_class_lookup(element_class_lookup)
|
||||
|
||||
|
||||
def parse_xml(xml: str | bytes) -> "BaseOxmlElement":
|
||||
"""Root lxml element obtained by parsing XML character string `xml`.
|
||||
|
||||
The custom parser is used, so custom element classes are produced for elements in
|
||||
`xml` that have them.
|
||||
"""
|
||||
return cast("BaseOxmlElement", etree.fromstring(xml, oxml_parser))
|
||||
|
||||
|
||||
def register_element_cls(tag: str, cls: Type["BaseOxmlElement"]):
|
||||
"""Register an lxml custom element-class to use for `tag`.
|
||||
|
||||
A instance of `cls` to be constructed when the oxml parser encounters an element
|
||||
with matching `tag`. `tag` is a string of the form `nspfx:tagroot`, e.g.
|
||||
`'w:document'`.
|
||||
"""
|
||||
nspfx, tagroot = tag.split(":")
|
||||
namespace = element_class_lookup.get_namespace(nsmap[nspfx])
|
||||
namespace[tagroot] = cls
|
||||
|
||||
|
||||
def OxmlElement(
|
||||
nsptag_str: str,
|
||||
attrs: Dict[str, str] | None = None,
|
||||
nsdecls: Dict[str, str] | None = None,
|
||||
) -> BaseOxmlElement | etree._Element: # pyright: ignore[reportPrivateUsage]
|
||||
"""Return a 'loose' lxml element having the tag specified by `nsptag_str`.
|
||||
|
||||
The tag in `nsptag_str` must contain the standard namespace prefix, e.g. `a:tbl`.
|
||||
The resulting element is an instance of the custom element class for this tag name
|
||||
if one is defined. A dictionary of attribute values may be provided as `attrs`; they
|
||||
are set if present. All namespaces defined in the dict `nsdecls` are declared in the
|
||||
element using the key as the prefix and the value as the namespace name. If
|
||||
`nsdecls` is not provided, a single namespace declaration is added based on the
|
||||
prefix on `nsptag_str`.
|
||||
"""
|
||||
nsptag = NamespacePrefixedTag(nsptag_str)
|
||||
if nsdecls is None:
|
||||
nsdecls = nsptag.nsmap
|
||||
return oxml_parser.makeelement(nsptag.clark_name, attrib=attrs, nsmap=nsdecls)
|
||||
@@ -0,0 +1,537 @@
|
||||
"""Section-related custom element classes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from copy import deepcopy
|
||||
from typing import Callable, Iterator, List, Sequence, cast
|
||||
|
||||
from lxml import etree
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from docx.enum.section import WD_HEADER_FOOTER, WD_ORIENTATION, WD_SECTION_START
|
||||
from docx.oxml.ns import nsmap
|
||||
from docx.oxml.shared import CT_OnOff
|
||||
from docx.oxml.simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure, XsdString
|
||||
from docx.oxml.table import CT_Tbl
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OptionalAttribute,
|
||||
RequiredAttribute,
|
||||
ZeroOrMore,
|
||||
ZeroOrOne,
|
||||
)
|
||||
from docx.shared import Length, lazyproperty
|
||||
|
||||
BlockElement: TypeAlias = "CT_P | CT_Tbl"
|
||||
|
||||
|
||||
class CT_HdrFtr(BaseOxmlElement):
|
||||
"""`w:hdr` and `w:ftr`, the root element for header and footer part respectively."""
|
||||
|
||||
add_p: Callable[[], CT_P]
|
||||
p_lst: List[CT_P]
|
||||
tbl_lst: List[CT_Tbl]
|
||||
|
||||
_insert_tbl: Callable[[CT_Tbl], CT_Tbl]
|
||||
|
||||
p = ZeroOrMore("w:p", successors=())
|
||||
tbl = ZeroOrMore("w:tbl", successors=())
|
||||
|
||||
@property
|
||||
def inner_content_elements(self) -> List[CT_P | CT_Tbl]:
|
||||
"""Generate all `w:p` and `w:tbl` elements in this header or footer.
|
||||
|
||||
Elements appear in document order. Elements shaded by nesting in a `w:ins` or
|
||||
other "wrapper" element will not be included.
|
||||
"""
|
||||
return self.xpath("./w:p | ./w:tbl")
|
||||
|
||||
|
||||
class CT_HdrFtrRef(BaseOxmlElement):
|
||||
"""`w:headerReference` and `w:footerReference` elements."""
|
||||
|
||||
type_: WD_HEADER_FOOTER = RequiredAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:type", WD_HEADER_FOOTER
|
||||
)
|
||||
rId: str = RequiredAttribute("r:id", XsdString) # pyright: ignore[reportAssignmentType]
|
||||
|
||||
|
||||
class CT_PageMar(BaseOxmlElement):
|
||||
"""``<w:pgMar>`` element, defining page margins."""
|
||||
|
||||
top: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:top", ST_SignedTwipsMeasure
|
||||
)
|
||||
right: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:right", ST_TwipsMeasure
|
||||
)
|
||||
bottom: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:bottom", ST_SignedTwipsMeasure
|
||||
)
|
||||
left: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:left", ST_TwipsMeasure
|
||||
)
|
||||
header: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:header", ST_TwipsMeasure
|
||||
)
|
||||
footer: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:footer", ST_TwipsMeasure
|
||||
)
|
||||
gutter: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:gutter", ST_TwipsMeasure
|
||||
)
|
||||
|
||||
|
||||
class CT_PageSz(BaseOxmlElement):
|
||||
"""``<w:pgSz>`` element, defining page dimensions and orientation."""
|
||||
|
||||
w: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:w", ST_TwipsMeasure
|
||||
)
|
||||
h: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:h", ST_TwipsMeasure
|
||||
)
|
||||
orient: WD_ORIENTATION = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:orient", WD_ORIENTATION, default=WD_ORIENTATION.PORTRAIT
|
||||
)
|
||||
|
||||
|
||||
class CT_SectPr(BaseOxmlElement):
|
||||
"""`w:sectPr` element, the container element for section properties."""
|
||||
|
||||
get_or_add_pgMar: Callable[[], CT_PageMar]
|
||||
get_or_add_pgSz: Callable[[], CT_PageSz]
|
||||
get_or_add_titlePg: Callable[[], CT_OnOff]
|
||||
get_or_add_type: Callable[[], CT_SectType]
|
||||
_add_footerReference: Callable[[], CT_HdrFtrRef]
|
||||
_add_headerReference: Callable[[], CT_HdrFtrRef]
|
||||
_remove_titlePg: Callable[[], None]
|
||||
_remove_type: Callable[[], None]
|
||||
|
||||
_tag_seq = (
|
||||
"w:footnotePr",
|
||||
"w:endnotePr",
|
||||
"w:type",
|
||||
"w:pgSz",
|
||||
"w:pgMar",
|
||||
"w:paperSrc",
|
||||
"w:pgBorders",
|
||||
"w:lnNumType",
|
||||
"w:pgNumType",
|
||||
"w:cols",
|
||||
"w:formProt",
|
||||
"w:vAlign",
|
||||
"w:noEndnote",
|
||||
"w:titlePg",
|
||||
"w:textDirection",
|
||||
"w:bidi",
|
||||
"w:rtlGutter",
|
||||
"w:docGrid",
|
||||
"w:printerSettings",
|
||||
"w:sectPrChange",
|
||||
)
|
||||
headerReference = ZeroOrMore("w:headerReference", successors=_tag_seq)
|
||||
footerReference = ZeroOrMore("w:footerReference", successors=_tag_seq)
|
||||
type: CT_SectType | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:type", successors=_tag_seq[3:]
|
||||
)
|
||||
pgSz: CT_PageSz | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:pgSz", successors=_tag_seq[4:]
|
||||
)
|
||||
pgMar: CT_PageMar | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:pgMar", successors=_tag_seq[5:]
|
||||
)
|
||||
titlePg: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:titlePg", successors=_tag_seq[14:]
|
||||
)
|
||||
del _tag_seq
|
||||
|
||||
def add_footerReference(self, type_: WD_HEADER_FOOTER, rId: str) -> CT_HdrFtrRef:
|
||||
"""Return newly added CT_HdrFtrRef element of `type_` with `rId`.
|
||||
|
||||
The element tag is `w:footerReference`.
|
||||
"""
|
||||
footerReference = self._add_footerReference()
|
||||
footerReference.type_ = type_
|
||||
footerReference.rId = rId
|
||||
return footerReference
|
||||
|
||||
def add_headerReference(self, type_: WD_HEADER_FOOTER, rId: str) -> CT_HdrFtrRef:
|
||||
"""Return newly added CT_HdrFtrRef element of `type_` with `rId`.
|
||||
|
||||
The element tag is `w:headerReference`.
|
||||
"""
|
||||
headerReference = self._add_headerReference()
|
||||
headerReference.type_ = type_
|
||||
headerReference.rId = rId
|
||||
return headerReference
|
||||
|
||||
@property
|
||||
def bottom_margin(self) -> Length | None:
|
||||
"""Value of the `w:bottom` attr of `<w:pgMar>` child element, as |Length|.
|
||||
|
||||
|None| when either the element or the attribute is not present.
|
||||
"""
|
||||
pgMar = self.pgMar
|
||||
if pgMar is None:
|
||||
return None
|
||||
return pgMar.bottom
|
||||
|
||||
@bottom_margin.setter
|
||||
def bottom_margin(self, value: int | Length | None):
|
||||
pgMar = self.get_or_add_pgMar()
|
||||
pgMar.bottom = value if value is None or isinstance(value, Length) else Length(value)
|
||||
|
||||
def clone(self) -> CT_SectPr:
|
||||
"""Return an exact duplicate of this ``<w:sectPr>`` element tree suitable for
|
||||
use in adding a section break.
|
||||
|
||||
All rsid* attributes are removed from the root ``<w:sectPr>`` element.
|
||||
"""
|
||||
cloned_sectPr = deepcopy(self)
|
||||
cloned_sectPr.attrib.clear()
|
||||
return cloned_sectPr
|
||||
|
||||
@property
|
||||
def footer(self) -> Length | None:
|
||||
"""Distance from bottom edge of page to bottom edge of the footer.
|
||||
|
||||
This is the value of the `w:footer` attribute in the `w:pgMar` child element,
|
||||
as a |Length| object, or |None| if either the element or the attribute is not
|
||||
present.
|
||||
"""
|
||||
pgMar = self.pgMar
|
||||
if pgMar is None:
|
||||
return None
|
||||
return pgMar.footer
|
||||
|
||||
@footer.setter
|
||||
def footer(self, value: int | Length | None):
|
||||
pgMar = self.get_or_add_pgMar()
|
||||
pgMar.footer = value if value is None or isinstance(value, Length) else Length(value)
|
||||
|
||||
def get_footerReference(self, type_: WD_HEADER_FOOTER) -> CT_HdrFtrRef | None:
|
||||
"""Return footerReference element of `type_` or None if not present."""
|
||||
path = "./w:footerReference[@w:type='%s']" % WD_HEADER_FOOTER.to_xml(type_)
|
||||
footerReferences = self.xpath(path)
|
||||
if not footerReferences:
|
||||
return None
|
||||
return footerReferences[0]
|
||||
|
||||
def get_headerReference(self, type_: WD_HEADER_FOOTER) -> CT_HdrFtrRef | None:
|
||||
"""Return headerReference element of `type_` or None if not present."""
|
||||
matching_headerReferences = self.xpath(
|
||||
"./w:headerReference[@w:type='%s']" % WD_HEADER_FOOTER.to_xml(type_)
|
||||
)
|
||||
if len(matching_headerReferences) == 0:
|
||||
return None
|
||||
return matching_headerReferences[0]
|
||||
|
||||
@property
|
||||
def gutter(self) -> Length | None:
|
||||
"""The value of the ``w:gutter`` attribute in the ``<w:pgMar>`` child element,
|
||||
as a |Length| object, or |None| if either the element or the attribute is not
|
||||
present."""
|
||||
pgMar = self.pgMar
|
||||
if pgMar is None:
|
||||
return None
|
||||
return pgMar.gutter
|
||||
|
||||
@gutter.setter
|
||||
def gutter(self, value: int | Length | None):
|
||||
pgMar = self.get_or_add_pgMar()
|
||||
pgMar.gutter = value if value is None or isinstance(value, Length) else Length(value)
|
||||
|
||||
@property
|
||||
def header(self) -> Length | None:
|
||||
"""Distance from top edge of page to top edge of header.
|
||||
|
||||
This value comes from the `w:header` attribute on the `w:pgMar` child element.
|
||||
|None| if either the element or the attribute is not present.
|
||||
"""
|
||||
pgMar = self.pgMar
|
||||
if pgMar is None:
|
||||
return None
|
||||
return pgMar.header
|
||||
|
||||
@header.setter
|
||||
def header(self, value: int | Length | None):
|
||||
pgMar = self.get_or_add_pgMar()
|
||||
pgMar.header = value if value is None or isinstance(value, Length) else Length(value)
|
||||
|
||||
def iter_inner_content(self) -> Iterator[CT_P | CT_Tbl]:
|
||||
"""Generate all `w:p` and `w:tbl` elements in this section.
|
||||
|
||||
Elements appear in document order. Elements shaded by nesting in a `w:ins` or
|
||||
other "wrapper" element will not be included.
|
||||
"""
|
||||
return _SectBlockElementIterator.iter_sect_block_elements(self)
|
||||
|
||||
@property
|
||||
def left_margin(self) -> Length | None:
|
||||
"""The value of the ``w:left`` attribute in the ``<w:pgMar>`` child element, as
|
||||
a |Length| object, or |None| if either the element or the attribute is not
|
||||
present."""
|
||||
pgMar = self.pgMar
|
||||
if pgMar is None:
|
||||
return None
|
||||
return pgMar.left
|
||||
|
||||
@left_margin.setter
|
||||
def left_margin(self, value: int | Length | None):
|
||||
pgMar = self.get_or_add_pgMar()
|
||||
pgMar.left = value if value is None or isinstance(value, Length) else Length(value)
|
||||
|
||||
@property
|
||||
def orientation(self) -> WD_ORIENTATION:
|
||||
"""`WD_ORIENTATION` member indicating page-orientation for this section.
|
||||
|
||||
This is the value of the `orient` attribute on the `w:pgSz` child, or
|
||||
`WD_ORIENTATION.PORTRAIT` if not present.
|
||||
"""
|
||||
pgSz = self.pgSz
|
||||
if pgSz is None:
|
||||
return WD_ORIENTATION.PORTRAIT
|
||||
return pgSz.orient
|
||||
|
||||
@orientation.setter
|
||||
def orientation(self, value: WD_ORIENTATION | None):
|
||||
pgSz = self.get_or_add_pgSz()
|
||||
pgSz.orient = value if value else WD_ORIENTATION.PORTRAIT
|
||||
|
||||
@property
|
||||
def page_height(self) -> Length | None:
|
||||
"""Value in EMU of the `h` attribute of the `w:pgSz` child element.
|
||||
|
||||
|None| if not present.
|
||||
"""
|
||||
pgSz = self.pgSz
|
||||
if pgSz is None:
|
||||
return None
|
||||
return pgSz.h
|
||||
|
||||
@page_height.setter
|
||||
def page_height(self, value: Length | None):
|
||||
pgSz = self.get_or_add_pgSz()
|
||||
pgSz.h = value
|
||||
|
||||
@property
|
||||
def page_width(self) -> Length | None:
|
||||
"""Value in EMU of the ``w`` attribute of the ``<w:pgSz>`` child element.
|
||||
|
||||
|None| if not present.
|
||||
"""
|
||||
pgSz = self.pgSz
|
||||
if pgSz is None:
|
||||
return None
|
||||
return pgSz.w
|
||||
|
||||
@page_width.setter
|
||||
def page_width(self, value: Length | None):
|
||||
pgSz = self.get_or_add_pgSz()
|
||||
pgSz.w = value
|
||||
|
||||
@property
|
||||
def preceding_sectPr(self) -> CT_SectPr | None:
|
||||
"""SectPr immediately preceding this one or None if this is the first."""
|
||||
# -- [1] predicate returns list of zero or one value --
|
||||
preceding_sectPrs = self.xpath("./preceding::w:sectPr[1]")
|
||||
return preceding_sectPrs[0] if len(preceding_sectPrs) > 0 else None
|
||||
|
||||
def remove_footerReference(self, type_: WD_HEADER_FOOTER) -> str:
|
||||
"""Return rId of w:footerReference child of `type_` after removing it."""
|
||||
footerReference = self.get_footerReference(type_)
|
||||
if footerReference is None:
|
||||
# -- should never happen, but to satisfy type-check and just in case --
|
||||
raise ValueError("CT_SectPr has no footer reference")
|
||||
rId = footerReference.rId
|
||||
self.remove(footerReference)
|
||||
return rId
|
||||
|
||||
def remove_headerReference(self, type_: WD_HEADER_FOOTER):
|
||||
"""Return rId of w:headerReference child of `type_` after removing it."""
|
||||
headerReference = self.get_headerReference(type_)
|
||||
if headerReference is None:
|
||||
# -- should never happen, but to satisfy type-check and just in case --
|
||||
raise ValueError("CT_SectPr has no header reference")
|
||||
rId = headerReference.rId
|
||||
self.remove(headerReference)
|
||||
return rId
|
||||
|
||||
@property
|
||||
def right_margin(self) -> Length | None:
|
||||
"""The value of the ``w:right`` attribute in the ``<w:pgMar>`` child element, as
|
||||
a |Length| object, or |None| if either the element or the attribute is not
|
||||
present."""
|
||||
pgMar = self.pgMar
|
||||
if pgMar is None:
|
||||
return None
|
||||
return pgMar.right
|
||||
|
||||
@right_margin.setter
|
||||
def right_margin(self, value: Length | None):
|
||||
pgMar = self.get_or_add_pgMar()
|
||||
pgMar.right = value
|
||||
|
||||
@property
|
||||
def start_type(self) -> WD_SECTION_START:
|
||||
"""The member of the ``WD_SECTION_START`` enumeration corresponding to the value
|
||||
of the ``val`` attribute of the ``<w:type>`` child element, or
|
||||
``WD_SECTION_START.NEW_PAGE`` if not present."""
|
||||
type = self.type
|
||||
if type is None or type.val is None:
|
||||
return WD_SECTION_START.NEW_PAGE
|
||||
return type.val
|
||||
|
||||
@start_type.setter
|
||||
def start_type(self, value: WD_SECTION_START | None):
|
||||
if value is None or value is WD_SECTION_START.NEW_PAGE:
|
||||
self._remove_type()
|
||||
return
|
||||
type = self.get_or_add_type()
|
||||
type.val = value
|
||||
|
||||
@property
|
||||
def titlePg_val(self) -> bool:
|
||||
"""Value of `w:titlePg/@val` or |False| if `./w:titlePg` is not present."""
|
||||
titlePg = self.titlePg
|
||||
if titlePg is None:
|
||||
return False
|
||||
return titlePg.val
|
||||
|
||||
@titlePg_val.setter
|
||||
def titlePg_val(self, value: bool | None):
|
||||
if value in [None, False]:
|
||||
self._remove_titlePg()
|
||||
else:
|
||||
self.get_or_add_titlePg().val = True
|
||||
|
||||
@property
|
||||
def top_margin(self) -> Length | None:
|
||||
"""The value of the ``w:top`` attribute in the ``<w:pgMar>`` child element, as a
|
||||
|Length| object, or |None| if either the element or the attribute is not
|
||||
present."""
|
||||
pgMar = self.pgMar
|
||||
if pgMar is None:
|
||||
return None
|
||||
return pgMar.top
|
||||
|
||||
@top_margin.setter
|
||||
def top_margin(self, value: Length | None):
|
||||
pgMar = self.get_or_add_pgMar()
|
||||
pgMar.top = value
|
||||
|
||||
|
||||
class CT_SectType(BaseOxmlElement):
|
||||
"""``<w:sectType>`` element, defining the section start type."""
|
||||
|
||||
val: WD_SECTION_START | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:val", WD_SECTION_START
|
||||
)
|
||||
|
||||
|
||||
# == HELPERS =========================================================================
|
||||
|
||||
|
||||
class _SectBlockElementIterator:
|
||||
"""Generates the block-item XML elements in a section.
|
||||
|
||||
A block-item element is a `CT_P` (paragraph) or a `CT_Tbl` (table).
|
||||
"""
|
||||
|
||||
_compiled_blocks_xpath: etree.XPath | None = None
|
||||
_compiled_count_xpath: etree.XPath | None = None
|
||||
|
||||
def __init__(self, sectPr: CT_SectPr):
|
||||
self._sectPr = sectPr
|
||||
|
||||
@classmethod
|
||||
def iter_sect_block_elements(cls, sectPr: CT_SectPr) -> Iterator[BlockElement]:
|
||||
"""Generate each CT_P or CT_Tbl element within extents governed by `sectPr`."""
|
||||
return cls(sectPr)._iter_sect_block_elements()
|
||||
|
||||
def _iter_sect_block_elements(self) -> Iterator[BlockElement]:
|
||||
"""Generate each CT_P or CT_Tbl element in section."""
|
||||
# -- General strategy is to get all block (<w;p> and <w:tbl>) elements from
|
||||
# -- start of doc to and including this section, then compute the count of those
|
||||
# -- elements that came from prior sections and skip that many to leave only the
|
||||
# -- ones in this section. It's possible to express this "between here and
|
||||
# -- there" (end of prior section and end of this one) concept in XPath, but it
|
||||
# -- would be harder to follow because there are special cases (e.g. no prior
|
||||
# -- section) and the boundary expressions are fairly hairy. I also believe it
|
||||
# -- would be computationally more expensive than doing it this straighforward
|
||||
# -- albeit (theoretically) slightly wasteful way.
|
||||
|
||||
sectPr, sectPrs = self._sectPr, self._sectPrs
|
||||
sectPr_idx = sectPrs.index(sectPr)
|
||||
|
||||
# -- count block items belonging to prior sections --
|
||||
n_blks_to_skip = (
|
||||
0
|
||||
if sectPr_idx == 0
|
||||
else self._count_of_blocks_in_and_above_section(sectPrs[sectPr_idx - 1])
|
||||
)
|
||||
|
||||
# -- and skip those in set of all blks from doc start to end of this section --
|
||||
for element in self._blocks_in_and_above_section(sectPr)[n_blks_to_skip:]:
|
||||
yield element
|
||||
|
||||
def _blocks_in_and_above_section(self, sectPr: CT_SectPr) -> Sequence[BlockElement]:
|
||||
"""All ps and tbls in section defined by `sectPr` and all prior sections."""
|
||||
if self._compiled_blocks_xpath is None:
|
||||
self._compiled_blocks_xpath = etree.XPath(
|
||||
self._blocks_in_and_above_section_xpath,
|
||||
namespaces=nsmap,
|
||||
regexp=False,
|
||||
)
|
||||
xpath = self._compiled_blocks_xpath
|
||||
# -- XPath callable results are Any (basically), so need a cast. --
|
||||
return cast(Sequence[BlockElement], xpath(sectPr))
|
||||
|
||||
@lazyproperty
|
||||
def _blocks_in_and_above_section_xpath(self) -> str:
|
||||
"""XPath expr for ps and tbls in context of a sectPr and all prior sectPrs."""
|
||||
# -- "p_sect" is a section with sectPr located at w:p/w:pPr/w:sectPr.
|
||||
# -- "body_sect" is a section with sectPr located at w:body/w:sectPr. The last
|
||||
# -- section in the document is a "body_sect". All others are of the "p_sect"
|
||||
# -- variety. "term" means "terminal", like the last p or tbl in the section.
|
||||
# -- "pred" means "predecessor", like a preceding p or tbl in the section.
|
||||
|
||||
# -- the terminal block in a p-based sect is the p the sectPr appears in --
|
||||
p_sect_term_block = "./parent::w:pPr/parent::w:p"
|
||||
# -- the terminus of a body-based sect is the sectPr itself (not a block) --
|
||||
body_sect_term = "self::w:sectPr[parent::w:body]"
|
||||
# -- all the ps and tbls preceding (but not including) the context node --
|
||||
pred_ps_and_tbls = "preceding-sibling::*[self::w:p | self::w:tbl]"
|
||||
|
||||
# -- p_sect_term_block and body_sect_term(inus) are mutually exclusive. So the
|
||||
# -- result is either the union of nodes found by the first two selectors or the
|
||||
# -- nodes found by the last selector, never both.
|
||||
return (
|
||||
# -- include the p containing a sectPr --
|
||||
f"{p_sect_term_block}"
|
||||
# -- along with all the blocks that precede it --
|
||||
f" | {p_sect_term_block}/{pred_ps_and_tbls}"
|
||||
# -- or all the preceding blocks if sectPr is body-based (last sectPr) --
|
||||
f" | {body_sect_term}/{pred_ps_and_tbls}"
|
||||
)
|
||||
|
||||
def _count_of_blocks_in_and_above_section(self, sectPr: CT_SectPr) -> int:
|
||||
"""All ps and tbls in section defined by `sectPr` and all prior sections."""
|
||||
if self._compiled_count_xpath is None:
|
||||
self._compiled_count_xpath = etree.XPath(
|
||||
f"count({self._blocks_in_and_above_section_xpath})",
|
||||
namespaces=nsmap,
|
||||
regexp=False,
|
||||
)
|
||||
xpath = self._compiled_count_xpath
|
||||
# -- numeric XPath results are always float, so need an int() conversion --
|
||||
return int(cast(float, xpath(sectPr)))
|
||||
|
||||
@lazyproperty
|
||||
def _sectPrs(self) -> Sequence[CT_SectPr]:
|
||||
"""All w:sectPr elements in document, in document-order."""
|
||||
return self._sectPr.xpath(
|
||||
"/w:document/w:body/w:p/w:pPr/w:sectPr | /w:document/w:body/w:sectPr",
|
||||
)
|
||||
@@ -0,0 +1,138 @@
|
||||
"""Custom element classes related to document settings."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrOne
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.shared import CT_OnOff
|
||||
|
||||
|
||||
class CT_Settings(BaseOxmlElement):
|
||||
"""`w:settings` element, root element for the settings part."""
|
||||
|
||||
get_or_add_evenAndOddHeaders: Callable[[], CT_OnOff]
|
||||
_remove_evenAndOddHeaders: Callable[[], None]
|
||||
|
||||
_tag_seq = (
|
||||
"w:writeProtection",
|
||||
"w:view",
|
||||
"w:zoom",
|
||||
"w:removePersonalInformation",
|
||||
"w:removeDateAndTime",
|
||||
"w:doNotDisplayPageBoundaries",
|
||||
"w:displayBackgroundShape",
|
||||
"w:printPostScriptOverText",
|
||||
"w:printFractionalCharacterWidth",
|
||||
"w:printFormsData",
|
||||
"w:embedTrueTypeFonts",
|
||||
"w:embedSystemFonts",
|
||||
"w:saveSubsetFonts",
|
||||
"w:saveFormsData",
|
||||
"w:mirrorMargins",
|
||||
"w:alignBordersAndEdges",
|
||||
"w:bordersDoNotSurroundHeader",
|
||||
"w:bordersDoNotSurroundFooter",
|
||||
"w:gutterAtTop",
|
||||
"w:hideSpellingErrors",
|
||||
"w:hideGrammaticalErrors",
|
||||
"w:activeWritingStyle",
|
||||
"w:proofState",
|
||||
"w:formsDesign",
|
||||
"w:attachedTemplate",
|
||||
"w:linkStyles",
|
||||
"w:stylePaneFormatFilter",
|
||||
"w:stylePaneSortMethod",
|
||||
"w:documentType",
|
||||
"w:mailMerge",
|
||||
"w:revisionView",
|
||||
"w:trackRevisions",
|
||||
"w:doNotTrackMoves",
|
||||
"w:doNotTrackFormatting",
|
||||
"w:documentProtection",
|
||||
"w:autoFormatOverride",
|
||||
"w:styleLockTheme",
|
||||
"w:styleLockQFSet",
|
||||
"w:defaultTabStop",
|
||||
"w:autoHyphenation",
|
||||
"w:consecutiveHyphenLimit",
|
||||
"w:hyphenationZone",
|
||||
"w:doNotHyphenateCaps",
|
||||
"w:showEnvelope",
|
||||
"w:summaryLength",
|
||||
"w:clickAndTypeStyle",
|
||||
"w:defaultTableStyle",
|
||||
"w:evenAndOddHeaders",
|
||||
"w:bookFoldRevPrinting",
|
||||
"w:bookFoldPrinting",
|
||||
"w:bookFoldPrintingSheets",
|
||||
"w:drawingGridHorizontalSpacing",
|
||||
"w:drawingGridVerticalSpacing",
|
||||
"w:displayHorizontalDrawingGridEvery",
|
||||
"w:displayVerticalDrawingGridEvery",
|
||||
"w:doNotUseMarginsForDrawingGridOrigin",
|
||||
"w:drawingGridHorizontalOrigin",
|
||||
"w:drawingGridVerticalOrigin",
|
||||
"w:doNotShadeFormData",
|
||||
"w:noPunctuationKerning",
|
||||
"w:characterSpacingControl",
|
||||
"w:printTwoOnOne",
|
||||
"w:strictFirstAndLastChars",
|
||||
"w:noLineBreaksAfter",
|
||||
"w:noLineBreaksBefore",
|
||||
"w:savePreviewPicture",
|
||||
"w:doNotValidateAgainstSchema",
|
||||
"w:saveInvalidXml",
|
||||
"w:ignoreMixedContent",
|
||||
"w:alwaysShowPlaceholderText",
|
||||
"w:doNotDemarcateInvalidXml",
|
||||
"w:saveXmlDataOnly",
|
||||
"w:useXSLTWhenSaving",
|
||||
"w:saveThroughXslt",
|
||||
"w:showXMLTags",
|
||||
"w:alwaysMergeEmptyNamespace",
|
||||
"w:updateFields",
|
||||
"w:hdrShapeDefaults",
|
||||
"w:footnotePr",
|
||||
"w:endnotePr",
|
||||
"w:compat",
|
||||
"w:docVars",
|
||||
"w:rsids",
|
||||
"m:mathPr",
|
||||
"w:attachedSchema",
|
||||
"w:themeFontLang",
|
||||
"w:clrSchemeMapping",
|
||||
"w:doNotIncludeSubdocsInStats",
|
||||
"w:doNotAutoCompressPictures",
|
||||
"w:forceUpgrade",
|
||||
"w:captions",
|
||||
"w:readModeInkLockDown",
|
||||
"w:smartTagType",
|
||||
"sl:schemaLibrary",
|
||||
"w:shapeDefaults",
|
||||
"w:doNotEmbedSmartTags",
|
||||
"w:decimalSymbol",
|
||||
"w:listSeparator",
|
||||
)
|
||||
evenAndOddHeaders: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:evenAndOddHeaders", successors=_tag_seq[48:]
|
||||
)
|
||||
del _tag_seq
|
||||
|
||||
@property
|
||||
def evenAndOddHeaders_val(self) -> bool:
|
||||
"""Value of `w:evenAndOddHeaders/@w:val` or |None| if not present."""
|
||||
evenAndOddHeaders = self.evenAndOddHeaders
|
||||
if evenAndOddHeaders is None:
|
||||
return False
|
||||
return evenAndOddHeaders.val
|
||||
|
||||
@evenAndOddHeaders_val.setter
|
||||
def evenAndOddHeaders_val(self, value: bool | None):
|
||||
if value is None or value is False:
|
||||
self._remove_evenAndOddHeaders()
|
||||
return
|
||||
|
||||
self.get_or_add_evenAndOddHeaders().val = value
|
||||
@@ -0,0 +1,299 @@
|
||||
"""Custom element classes for shape-related elements like `<w:inline>`."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
from docx.oxml.ns import nsdecls
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.oxml.simpletypes import (
|
||||
ST_Coordinate,
|
||||
ST_DrawingElementId,
|
||||
ST_PositiveCoordinate,
|
||||
ST_RelationshipId,
|
||||
XsdString,
|
||||
XsdToken,
|
||||
)
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OneAndOnlyOne,
|
||||
OptionalAttribute,
|
||||
RequiredAttribute,
|
||||
ZeroOrOne,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.shared import Length
|
||||
|
||||
|
||||
class CT_Anchor(BaseOxmlElement):
|
||||
"""`<wp:anchor>` element, container for a "floating" shape."""
|
||||
|
||||
|
||||
class CT_Blip(BaseOxmlElement):
|
||||
"""``<a:blip>`` element, specifies image source and adjustments such as alpha and
|
||||
tint."""
|
||||
|
||||
embed: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"r:embed", ST_RelationshipId
|
||||
)
|
||||
link: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"r:link", ST_RelationshipId
|
||||
)
|
||||
|
||||
|
||||
class CT_BlipFillProperties(BaseOxmlElement):
|
||||
"""``<pic:blipFill>`` element, specifies picture properties."""
|
||||
|
||||
blip: CT_Blip = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"a:blip", successors=("a:srcRect", "a:tile", "a:stretch")
|
||||
)
|
||||
|
||||
|
||||
class CT_GraphicalObject(BaseOxmlElement):
|
||||
"""``<a:graphic>`` element, container for a DrawingML object."""
|
||||
|
||||
graphicData: CT_GraphicalObjectData = OneAndOnlyOne( # pyright: ignore[reportAssignmentType]
|
||||
"a:graphicData"
|
||||
)
|
||||
|
||||
|
||||
class CT_GraphicalObjectData(BaseOxmlElement):
|
||||
"""``<a:graphicData>`` element, container for the XML of a DrawingML object."""
|
||||
|
||||
pic: CT_Picture = ZeroOrOne("pic:pic") # pyright: ignore[reportAssignmentType]
|
||||
uri: str = RequiredAttribute("uri", XsdToken) # pyright: ignore[reportAssignmentType]
|
||||
|
||||
|
||||
class CT_Inline(BaseOxmlElement):
|
||||
"""`<wp:inline>` element, container for an inline shape."""
|
||||
|
||||
extent: CT_PositiveSize2D = OneAndOnlyOne("wp:extent") # pyright: ignore[reportAssignmentType]
|
||||
docPr: CT_NonVisualDrawingProps = OneAndOnlyOne( # pyright: ignore[reportAssignmentType]
|
||||
"wp:docPr"
|
||||
)
|
||||
graphic: CT_GraphicalObject = OneAndOnlyOne( # pyright: ignore[reportAssignmentType]
|
||||
"a:graphic"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def new(cls, cx: Length, cy: Length, shape_id: int, pic: CT_Picture) -> CT_Inline:
|
||||
"""Return a new ``<wp:inline>`` element populated with the values passed as
|
||||
parameters."""
|
||||
inline = cast(CT_Inline, parse_xml(cls._inline_xml()))
|
||||
inline.extent.cx = cx
|
||||
inline.extent.cy = cy
|
||||
inline.docPr.id = shape_id
|
||||
inline.docPr.name = "Picture %d" % shape_id
|
||||
inline.graphic.graphicData.uri = "http://schemas.openxmlformats.org/drawingml/2006/picture"
|
||||
inline.graphic.graphicData._insert_pic(pic)
|
||||
return inline
|
||||
|
||||
@classmethod
|
||||
def new_pic_inline(
|
||||
cls, shape_id: int, rId: str, filename: str, cx: Length, cy: Length
|
||||
) -> CT_Inline:
|
||||
"""Create `wp:inline` element containing a `pic:pic` element.
|
||||
|
||||
The contents of the `pic:pic` element is taken from the argument values.
|
||||
"""
|
||||
pic_id = 0 # Word doesn't seem to use this, but does not omit it
|
||||
pic = CT_Picture.new(pic_id, filename, rId, cx, cy)
|
||||
inline = cls.new(cx, cy, shape_id, pic)
|
||||
return inline
|
||||
|
||||
@classmethod
|
||||
def _inline_xml(cls):
|
||||
return (
|
||||
"<wp:inline %s>\n"
|
||||
' <wp:extent cx="914400" cy="914400"/>\n'
|
||||
' <wp:docPr id="666" name="unnamed"/>\n'
|
||||
" <wp:cNvGraphicFramePr>\n"
|
||||
' <a:graphicFrameLocks noChangeAspect="1"/>\n'
|
||||
" </wp:cNvGraphicFramePr>\n"
|
||||
" <a:graphic>\n"
|
||||
' <a:graphicData uri="URI not set"/>\n'
|
||||
" </a:graphic>\n"
|
||||
"</wp:inline>" % nsdecls("wp", "a", "pic", "r")
|
||||
)
|
||||
|
||||
|
||||
class CT_NonVisualDrawingProps(BaseOxmlElement):
|
||||
"""Used for ``<wp:docPr>`` element, and perhaps others.
|
||||
|
||||
Specifies the id and name of a DrawingML drawing.
|
||||
"""
|
||||
|
||||
id = RequiredAttribute("id", ST_DrawingElementId)
|
||||
name = RequiredAttribute("name", XsdString)
|
||||
|
||||
|
||||
class CT_NonVisualPictureProperties(BaseOxmlElement):
|
||||
"""``<pic:cNvPicPr>`` element, specifies picture locking and resize behaviors."""
|
||||
|
||||
|
||||
class CT_Picture(BaseOxmlElement):
|
||||
"""``<pic:pic>`` element, a DrawingML picture."""
|
||||
|
||||
nvPicPr: CT_PictureNonVisual = OneAndOnlyOne( # pyright: ignore[reportAssignmentType]
|
||||
"pic:nvPicPr"
|
||||
)
|
||||
blipFill: CT_BlipFillProperties = OneAndOnlyOne( # pyright: ignore[reportAssignmentType]
|
||||
"pic:blipFill"
|
||||
)
|
||||
spPr: CT_ShapeProperties = OneAndOnlyOne("pic:spPr") # pyright: ignore[reportAssignmentType]
|
||||
|
||||
@classmethod
|
||||
def new(cls, pic_id: int, filename: str, rId: str, cx: Length, cy: Length) -> CT_Picture:
|
||||
"""A new minimum viable `<pic:pic>` (picture) element."""
|
||||
pic = parse_xml(cls._pic_xml())
|
||||
pic.nvPicPr.cNvPr.id = pic_id
|
||||
pic.nvPicPr.cNvPr.name = filename
|
||||
pic.blipFill.blip.embed = rId
|
||||
pic.spPr.cx = cx
|
||||
pic.spPr.cy = cy
|
||||
return pic
|
||||
|
||||
@classmethod
|
||||
def _pic_xml(cls):
|
||||
return (
|
||||
"<pic:pic %s>\n"
|
||||
" <pic:nvPicPr>\n"
|
||||
' <pic:cNvPr id="666" name="unnamed"/>\n'
|
||||
" <pic:cNvPicPr/>\n"
|
||||
" </pic:nvPicPr>\n"
|
||||
" <pic:blipFill>\n"
|
||||
" <a:blip/>\n"
|
||||
" <a:stretch>\n"
|
||||
" <a:fillRect/>\n"
|
||||
" </a:stretch>\n"
|
||||
" </pic:blipFill>\n"
|
||||
" <pic:spPr>\n"
|
||||
" <a:xfrm>\n"
|
||||
' <a:off x="0" y="0"/>\n'
|
||||
' <a:ext cx="914400" cy="914400"/>\n'
|
||||
" </a:xfrm>\n"
|
||||
' <a:prstGeom prst="rect"/>\n'
|
||||
" </pic:spPr>\n"
|
||||
"</pic:pic>" % nsdecls("pic", "a", "r")
|
||||
)
|
||||
|
||||
|
||||
class CT_PictureNonVisual(BaseOxmlElement):
|
||||
"""``<pic:nvPicPr>`` element, non-visual picture properties."""
|
||||
|
||||
cNvPr = OneAndOnlyOne("pic:cNvPr")
|
||||
|
||||
|
||||
class CT_Point2D(BaseOxmlElement):
|
||||
"""Used for ``<a:off>`` element, and perhaps others.
|
||||
|
||||
Specifies an x, y coordinate (point).
|
||||
"""
|
||||
|
||||
x = RequiredAttribute("x", ST_Coordinate)
|
||||
y = RequiredAttribute("y", ST_Coordinate)
|
||||
|
||||
|
||||
class CT_PositiveSize2D(BaseOxmlElement):
|
||||
"""Used for ``<wp:extent>`` element, and perhaps others later.
|
||||
|
||||
Specifies the size of a DrawingML drawing.
|
||||
"""
|
||||
|
||||
cx: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"cx", ST_PositiveCoordinate
|
||||
)
|
||||
cy: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"cy", ST_PositiveCoordinate
|
||||
)
|
||||
|
||||
|
||||
class CT_PresetGeometry2D(BaseOxmlElement):
|
||||
"""``<a:prstGeom>`` element, specifies an preset autoshape geometry, such as
|
||||
``rect``."""
|
||||
|
||||
|
||||
class CT_RelativeRect(BaseOxmlElement):
|
||||
"""``<a:fillRect>`` element, specifying picture should fill containing rectangle
|
||||
shape."""
|
||||
|
||||
|
||||
class CT_ShapeProperties(BaseOxmlElement):
|
||||
"""``<pic:spPr>`` element, specifies size and shape of picture container."""
|
||||
|
||||
xfrm = ZeroOrOne(
|
||||
"a:xfrm",
|
||||
successors=(
|
||||
"a:custGeom",
|
||||
"a:prstGeom",
|
||||
"a:ln",
|
||||
"a:effectLst",
|
||||
"a:effectDag",
|
||||
"a:scene3d",
|
||||
"a:sp3d",
|
||||
"a:extLst",
|
||||
),
|
||||
)
|
||||
|
||||
@property
|
||||
def cx(self):
|
||||
"""Shape width as an instance of Emu, or None if not present."""
|
||||
xfrm = self.xfrm
|
||||
if xfrm is None:
|
||||
return None
|
||||
return xfrm.cx
|
||||
|
||||
@cx.setter
|
||||
def cx(self, value):
|
||||
xfrm = self.get_or_add_xfrm()
|
||||
xfrm.cx = value
|
||||
|
||||
@property
|
||||
def cy(self):
|
||||
"""Shape height as an instance of Emu, or None if not present."""
|
||||
xfrm = self.xfrm
|
||||
if xfrm is None:
|
||||
return None
|
||||
return xfrm.cy
|
||||
|
||||
@cy.setter
|
||||
def cy(self, value):
|
||||
xfrm = self.get_or_add_xfrm()
|
||||
xfrm.cy = value
|
||||
|
||||
|
||||
class CT_StretchInfoProperties(BaseOxmlElement):
|
||||
"""``<a:stretch>`` element, specifies how picture should fill its containing
|
||||
shape."""
|
||||
|
||||
|
||||
class CT_Transform2D(BaseOxmlElement):
|
||||
"""``<a:xfrm>`` element, specifies size and shape of picture container."""
|
||||
|
||||
off = ZeroOrOne("a:off", successors=("a:ext",))
|
||||
ext = ZeroOrOne("a:ext", successors=())
|
||||
|
||||
@property
|
||||
def cx(self):
|
||||
ext = self.ext
|
||||
if ext is None:
|
||||
return None
|
||||
return ext.cx
|
||||
|
||||
@cx.setter
|
||||
def cx(self, value):
|
||||
ext = self.get_or_add_ext()
|
||||
ext.cx = value
|
||||
|
||||
@property
|
||||
def cy(self):
|
||||
ext = self.ext
|
||||
if ext is None:
|
||||
return None
|
||||
return ext.cy
|
||||
|
||||
@cy.setter
|
||||
def cy(self, value):
|
||||
ext = self.get_or_add_ext()
|
||||
ext.cy = value
|
||||
@@ -0,0 +1,52 @@
|
||||
"""Objects shared by modules in the docx.oxml subpackage."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import cast
|
||||
|
||||
from docx.oxml.ns import qn
|
||||
from docx.oxml.parser import OxmlElement
|
||||
from docx.oxml.simpletypes import ST_DecimalNumber, ST_OnOff, ST_String
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, RequiredAttribute
|
||||
|
||||
|
||||
class CT_DecimalNumber(BaseOxmlElement):
|
||||
"""Used for ``<w:numId>``, ``<w:ilvl>``, ``<w:abstractNumId>`` and several others,
|
||||
containing a text representation of a decimal number (e.g. 42) in its ``val``
|
||||
attribute."""
|
||||
|
||||
val: int = RequiredAttribute("w:val", ST_DecimalNumber) # pyright: ignore[reportAssignmentType]
|
||||
|
||||
@classmethod
|
||||
def new(cls, nsptagname: str, val: int):
|
||||
"""Return a new ``CT_DecimalNumber`` element having tagname `nsptagname` and
|
||||
``val`` attribute set to `val`."""
|
||||
return OxmlElement(nsptagname, attrs={qn("w:val"): str(val)})
|
||||
|
||||
|
||||
class CT_OnOff(BaseOxmlElement):
|
||||
"""Used for `w:b`, `w:i` elements and others.
|
||||
|
||||
Contains a bool-ish string in its `val` attribute, xsd:boolean plus "on" and
|
||||
"off". Defaults to `True`, so `<w:b>` for example means "bold is turned on".
|
||||
"""
|
||||
|
||||
val: bool = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:val", ST_OnOff, default=True
|
||||
)
|
||||
|
||||
|
||||
class CT_String(BaseOxmlElement):
|
||||
"""Used for `w:pStyle` and `w:tblStyle` elements and others.
|
||||
|
||||
In those cases, it containing a style name in its `val` attribute.
|
||||
"""
|
||||
|
||||
val: str = RequiredAttribute("w:val", ST_String) # pyright: ignore[reportAssignmentType]
|
||||
|
||||
@classmethod
|
||||
def new(cls, nsptagname: str, val: str):
|
||||
"""A new `CT_String`` element with tagname `nsptagname` and `val` attribute set to `val`."""
|
||||
elm = cast(CT_String, OxmlElement(nsptagname))
|
||||
elm.val = val
|
||||
return elm
|
||||
@@ -0,0 +1,434 @@
|
||||
# pyright: reportImportCycles=false
|
||||
|
||||
"""Simple-type classes, corresponding to ST_* schema items.
|
||||
|
||||
These provide validation and format translation for values stored in XML element
|
||||
attributes. Naming generally corresponds to the simple type in the associated XML
|
||||
schema.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import TYPE_CHECKING, Any, Tuple
|
||||
|
||||
from docx.exceptions import InvalidXmlError
|
||||
from docx.shared import Emu, Pt, RGBColor, Twips
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.shared import Length
|
||||
|
||||
|
||||
class BaseSimpleType:
|
||||
"""Base class for simple-types."""
|
||||
|
||||
@classmethod
|
||||
def from_xml(cls, xml_value: str) -> Any:
|
||||
return cls.convert_from_xml(xml_value)
|
||||
|
||||
@classmethod
|
||||
def to_xml(cls, value: Any) -> str:
|
||||
cls.validate(value)
|
||||
str_value = cls.convert_to_xml(value)
|
||||
return str_value
|
||||
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> Any:
|
||||
return int(str_value)
|
||||
|
||||
@classmethod
|
||||
def convert_to_xml(cls, value: Any) -> str: ...
|
||||
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None: ...
|
||||
|
||||
@classmethod
|
||||
def validate_int(cls, value: object):
|
||||
if not isinstance(value, int):
|
||||
raise TypeError("value must be <type 'int'>, got %s" % type(value))
|
||||
|
||||
@classmethod
|
||||
def validate_int_in_range(cls, value: int, min_inclusive: int, max_inclusive: int) -> None:
|
||||
cls.validate_int(value)
|
||||
if value < min_inclusive or value > max_inclusive:
|
||||
raise ValueError(
|
||||
"value must be in range %d to %d inclusive, got %d"
|
||||
% (min_inclusive, max_inclusive, value)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def validate_string(cls, value: Any) -> str:
|
||||
if not isinstance(value, str):
|
||||
raise TypeError("value must be a string, got %s" % type(value))
|
||||
return value
|
||||
|
||||
|
||||
class BaseIntType(BaseSimpleType):
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> int:
|
||||
return int(str_value)
|
||||
|
||||
@classmethod
|
||||
def convert_to_xml(cls, value: int) -> str:
|
||||
return str(value)
|
||||
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_int(value)
|
||||
|
||||
|
||||
class BaseStringType(BaseSimpleType):
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> str:
|
||||
return str_value
|
||||
|
||||
@classmethod
|
||||
def convert_to_xml(cls, value: str) -> str:
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def validate(cls, value: str):
|
||||
cls.validate_string(value)
|
||||
|
||||
|
||||
class BaseStringEnumerationType(BaseStringType):
|
||||
_members: Tuple[str, ...]
|
||||
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_string(value)
|
||||
if value not in cls._members:
|
||||
raise ValueError("must be one of %s, got '%s'" % (cls._members, value))
|
||||
|
||||
|
||||
class XsdAnyUri(BaseStringType):
|
||||
"""There's a regex in the spec this is supposed to meet...
|
||||
|
||||
but current assessment is that spending cycles on validating wouldn't be worth it
|
||||
for the number of programming errors it would catch.
|
||||
"""
|
||||
|
||||
|
||||
class XsdBoolean(BaseSimpleType):
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> bool:
|
||||
if str_value not in ("1", "0", "true", "false"):
|
||||
raise InvalidXmlError(
|
||||
"value must be one of '1', '0', 'true' or 'false', got '%s'" % str_value
|
||||
)
|
||||
return str_value in ("1", "true")
|
||||
|
||||
@classmethod
|
||||
def convert_to_xml(cls, value: bool) -> str:
|
||||
return {True: "1", False: "0"}[value]
|
||||
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
if value not in (True, False):
|
||||
raise TypeError(
|
||||
"only True or False (and possibly None) may be assigned, got '%s'" % value
|
||||
)
|
||||
|
||||
|
||||
class XsdId(BaseStringType):
|
||||
"""String that must begin with a letter or underscore and cannot contain any colons.
|
||||
|
||||
Not fully validated because not used in external API.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class XsdInt(BaseIntType):
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_int_in_range(value, -2147483648, 2147483647)
|
||||
|
||||
|
||||
class XsdLong(BaseIntType):
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_int_in_range(value, -9223372036854775808, 9223372036854775807)
|
||||
|
||||
|
||||
class XsdString(BaseStringType):
|
||||
pass
|
||||
|
||||
|
||||
class XsdStringEnumeration(BaseStringEnumerationType):
|
||||
"""Set of enumerated xsd:string values."""
|
||||
|
||||
|
||||
class XsdToken(BaseStringType):
|
||||
"""Xsd:string with whitespace collapsing, e.g. multiple spaces reduced to one,
|
||||
leading and trailing space stripped."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class XsdUnsignedInt(BaseIntType):
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_int_in_range(value, 0, 4294967295)
|
||||
|
||||
|
||||
class XsdUnsignedLong(BaseIntType):
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_int_in_range(value, 0, 18446744073709551615)
|
||||
|
||||
|
||||
class ST_BrClear(XsdString):
|
||||
@classmethod
|
||||
def validate(cls, value: str) -> None:
|
||||
cls.validate_string(value)
|
||||
valid_values = ("none", "left", "right", "all")
|
||||
if value not in valid_values:
|
||||
raise ValueError("must be one of %s, got '%s'" % (valid_values, value))
|
||||
|
||||
|
||||
class ST_BrType(XsdString):
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_string(value)
|
||||
valid_values = ("page", "column", "textWrapping")
|
||||
if value not in valid_values:
|
||||
raise ValueError("must be one of %s, got '%s'" % (valid_values, value))
|
||||
|
||||
|
||||
class ST_Coordinate(BaseIntType):
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> Length:
|
||||
if "i" in str_value or "m" in str_value or "p" in str_value:
|
||||
return ST_UniversalMeasure.convert_from_xml(str_value)
|
||||
return Emu(int(str_value))
|
||||
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
ST_CoordinateUnqualified.validate(value)
|
||||
|
||||
|
||||
class ST_CoordinateUnqualified(XsdLong):
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_int_in_range(value, -27273042329600, 27273042316900)
|
||||
|
||||
|
||||
class ST_DateTime(BaseSimpleType):
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> dt.datetime:
|
||||
"""Convert an xsd:dateTime string to a datetime object."""
|
||||
|
||||
def parse_xsd_datetime(dt_str: str) -> dt.datetime:
|
||||
# -- handle trailing 'Z' (Zulu/UTC), common in Word files --
|
||||
if dt_str.endswith("Z"):
|
||||
try:
|
||||
# -- optional fractional seconds case --
|
||||
return dt.datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace(
|
||||
tzinfo=dt.timezone.utc
|
||||
)
|
||||
except ValueError:
|
||||
return dt.datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%SZ").replace(
|
||||
tzinfo=dt.timezone.utc
|
||||
)
|
||||
|
||||
# -- handles explicit offsets like +00:00, -05:00, or naive datetimes --
|
||||
try:
|
||||
return dt.datetime.fromisoformat(dt_str)
|
||||
except ValueError:
|
||||
# -- fall-back to parsing as naive datetime (with or without fractional seconds) --
|
||||
try:
|
||||
return dt.datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f")
|
||||
except ValueError:
|
||||
return dt.datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
try:
|
||||
# -- parse anything reasonable, but never raise, just use default epoch time --
|
||||
return parse_xsd_datetime(str_value)
|
||||
except Exception:
|
||||
return dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc)
|
||||
|
||||
@classmethod
|
||||
def convert_to_xml(cls, value: dt.datetime) -> str:
|
||||
# -- convert naive datetime to timezon-aware assuming local timezone --
|
||||
if value.tzinfo is None:
|
||||
value = value.astimezone()
|
||||
|
||||
# -- convert to UTC if not already --
|
||||
value = value.astimezone(dt.timezone.utc)
|
||||
|
||||
# -- format with 'Z' suffix for UTC --
|
||||
return value.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
if not isinstance(value, dt.datetime):
|
||||
raise TypeError("only a datetime.datetime object may be assigned, got '%s'" % value)
|
||||
|
||||
|
||||
class ST_DecimalNumber(XsdInt):
|
||||
pass
|
||||
|
||||
|
||||
class ST_DrawingElementId(XsdUnsignedInt):
|
||||
pass
|
||||
|
||||
|
||||
class ST_HexColor(BaseStringType):
|
||||
@classmethod
|
||||
def convert_from_xml( # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
cls, str_value: str
|
||||
) -> RGBColor | str:
|
||||
if str_value == "auto":
|
||||
return ST_HexColorAuto.AUTO
|
||||
return RGBColor.from_string(str_value)
|
||||
|
||||
@classmethod
|
||||
def convert_to_xml( # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
cls, value: RGBColor
|
||||
) -> str:
|
||||
"""Keep alpha hex numerals all uppercase just for consistency."""
|
||||
# expecting 3-tuple of ints in range 0-255
|
||||
return "%02X%02X%02X" % value
|
||||
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
# must be an RGBColor object ---
|
||||
if not isinstance(value, RGBColor):
|
||||
raise ValueError(
|
||||
"rgb color value must be RGBColor object, got %s %s" % (type(value), value)
|
||||
)
|
||||
|
||||
|
||||
class ST_HexColorAuto(XsdStringEnumeration):
|
||||
"""Value for `w:color/[@val="auto"] attribute setting."""
|
||||
|
||||
AUTO = "auto"
|
||||
|
||||
_members = (AUTO,)
|
||||
|
||||
|
||||
class ST_HpsMeasure(XsdUnsignedLong):
|
||||
"""Half-point measure, e.g. 24.0 represents 12.0 points."""
|
||||
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> Length:
|
||||
if "m" in str_value or "n" in str_value or "p" in str_value:
|
||||
return ST_UniversalMeasure.convert_from_xml(str_value)
|
||||
return Pt(int(str_value) / 2.0)
|
||||
|
||||
@classmethod
|
||||
def convert_to_xml(cls, value: int | Length) -> str:
|
||||
emu = Emu(value)
|
||||
half_points = int(emu.pt * 2)
|
||||
return str(half_points)
|
||||
|
||||
|
||||
class ST_Merge(XsdStringEnumeration):
|
||||
"""Valid values for <w:xMerge val=""> attribute."""
|
||||
|
||||
CONTINUE = "continue"
|
||||
RESTART = "restart"
|
||||
|
||||
_members = (CONTINUE, RESTART)
|
||||
|
||||
|
||||
class ST_OnOff(XsdBoolean):
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> bool:
|
||||
if str_value not in ("1", "0", "true", "false", "on", "off"):
|
||||
raise InvalidXmlError(
|
||||
"value must be one of '1', '0', 'true', 'false', 'on', or 'o"
|
||||
"ff', got '%s'" % str_value
|
||||
)
|
||||
return str_value in ("1", "true", "on")
|
||||
|
||||
|
||||
class ST_PositiveCoordinate(XsdLong):
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> Length:
|
||||
return Emu(int(str_value))
|
||||
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_int_in_range(value, 0, 27273042316900)
|
||||
|
||||
|
||||
class ST_RelationshipId(XsdString):
|
||||
pass
|
||||
|
||||
|
||||
class ST_SignedTwipsMeasure(XsdInt):
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> Length:
|
||||
if "i" in str_value or "m" in str_value or "p" in str_value:
|
||||
return ST_UniversalMeasure.convert_from_xml(str_value)
|
||||
return Twips(int(round(float(str_value))))
|
||||
|
||||
@classmethod
|
||||
def convert_to_xml(cls, value: int | Length) -> str:
|
||||
emu = Emu(value)
|
||||
twips = emu.twips
|
||||
return str(twips)
|
||||
|
||||
|
||||
class ST_String(XsdString):
|
||||
pass
|
||||
|
||||
|
||||
class ST_TblLayoutType(XsdString):
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_string(value)
|
||||
valid_values = ("fixed", "autofit")
|
||||
if value not in valid_values:
|
||||
raise ValueError("must be one of %s, got '%s'" % (valid_values, value))
|
||||
|
||||
|
||||
class ST_TblWidth(XsdString):
|
||||
@classmethod
|
||||
def validate(cls, value: Any) -> None:
|
||||
cls.validate_string(value)
|
||||
valid_values = ("auto", "dxa", "nil", "pct")
|
||||
if value not in valid_values:
|
||||
raise ValueError("must be one of %s, got '%s'" % (valid_values, value))
|
||||
|
||||
|
||||
class ST_TwipsMeasure(XsdUnsignedLong):
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> Length:
|
||||
if "i" in str_value or "m" in str_value or "p" in str_value:
|
||||
return ST_UniversalMeasure.convert_from_xml(str_value)
|
||||
return Twips(int(str_value))
|
||||
|
||||
@classmethod
|
||||
def convert_to_xml(cls, value: int | Length) -> str:
|
||||
emu = Emu(value)
|
||||
twips = emu.twips
|
||||
return str(twips)
|
||||
|
||||
|
||||
class ST_UniversalMeasure(BaseSimpleType):
|
||||
@classmethod
|
||||
def convert_from_xml(cls, str_value: str) -> Emu:
|
||||
float_part, units_part = str_value[:-2], str_value[-2:]
|
||||
quantity = float(float_part)
|
||||
multiplier = {
|
||||
"mm": 36000,
|
||||
"cm": 360000,
|
||||
"in": 914400,
|
||||
"pt": 12700,
|
||||
"pc": 152400,
|
||||
"pi": 152400,
|
||||
}[units_part]
|
||||
return Emu(int(round(quantity * multiplier)))
|
||||
|
||||
|
||||
class ST_VerticalAlignRun(XsdStringEnumeration):
|
||||
"""Valid values for `w:vertAlign/@val`."""
|
||||
|
||||
BASELINE = "baseline"
|
||||
SUPERSCRIPT = "superscript"
|
||||
SUBSCRIPT = "subscript"
|
||||
|
||||
_members = (BASELINE, SUPERSCRIPT, SUBSCRIPT)
|
||||
@@ -0,0 +1,320 @@
|
||||
"""Custom element classes related to the styles part."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from docx.enum.style import WD_STYLE_TYPE
|
||||
from docx.oxml.simpletypes import ST_DecimalNumber, ST_OnOff, ST_String
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OptionalAttribute,
|
||||
RequiredAttribute,
|
||||
ZeroOrMore,
|
||||
ZeroOrOne,
|
||||
)
|
||||
|
||||
|
||||
def styleId_from_name(name):
|
||||
"""Return the style id corresponding to `name`, taking into account special-case
|
||||
names such as 'Heading 1'."""
|
||||
return {
|
||||
"caption": "Caption",
|
||||
"heading 1": "Heading1",
|
||||
"heading 2": "Heading2",
|
||||
"heading 3": "Heading3",
|
||||
"heading 4": "Heading4",
|
||||
"heading 5": "Heading5",
|
||||
"heading 6": "Heading6",
|
||||
"heading 7": "Heading7",
|
||||
"heading 8": "Heading8",
|
||||
"heading 9": "Heading9",
|
||||
}.get(name, name.replace(" ", ""))
|
||||
|
||||
|
||||
class CT_LatentStyles(BaseOxmlElement):
|
||||
"""`w:latentStyles` element, defining behavior defaults for latent styles and
|
||||
containing `w:lsdException` child elements that each override those defaults for a
|
||||
named latent style."""
|
||||
|
||||
lsdException = ZeroOrMore("w:lsdException", successors=())
|
||||
|
||||
count = OptionalAttribute("w:count", ST_DecimalNumber)
|
||||
defLockedState = OptionalAttribute("w:defLockedState", ST_OnOff)
|
||||
defQFormat = OptionalAttribute("w:defQFormat", ST_OnOff)
|
||||
defSemiHidden = OptionalAttribute("w:defSemiHidden", ST_OnOff)
|
||||
defUIPriority = OptionalAttribute("w:defUIPriority", ST_DecimalNumber)
|
||||
defUnhideWhenUsed = OptionalAttribute("w:defUnhideWhenUsed", ST_OnOff)
|
||||
|
||||
def bool_prop(self, attr_name):
|
||||
"""Return the boolean value of the attribute having `attr_name`, or |False| if
|
||||
not present."""
|
||||
value = getattr(self, attr_name)
|
||||
if value is None:
|
||||
return False
|
||||
return value
|
||||
|
||||
def get_by_name(self, name):
|
||||
"""Return the `w:lsdException` child having `name`, or |None| if not found."""
|
||||
found = self.xpath('w:lsdException[@w:name="%s"]' % name)
|
||||
if not found:
|
||||
return None
|
||||
return found[0]
|
||||
|
||||
def set_bool_prop(self, attr_name, value):
|
||||
"""Set the on/off attribute having `attr_name` to `value`."""
|
||||
setattr(self, attr_name, bool(value))
|
||||
|
||||
|
||||
class CT_LsdException(BaseOxmlElement):
|
||||
"""``<w:lsdException>`` element, defining override visibility behaviors for a named
|
||||
latent style."""
|
||||
|
||||
locked = OptionalAttribute("w:locked", ST_OnOff)
|
||||
name = RequiredAttribute("w:name", ST_String)
|
||||
qFormat = OptionalAttribute("w:qFormat", ST_OnOff)
|
||||
semiHidden = OptionalAttribute("w:semiHidden", ST_OnOff)
|
||||
uiPriority = OptionalAttribute("w:uiPriority", ST_DecimalNumber)
|
||||
unhideWhenUsed = OptionalAttribute("w:unhideWhenUsed", ST_OnOff)
|
||||
|
||||
def delete(self):
|
||||
"""Remove this `w:lsdException` element from the XML document."""
|
||||
self.getparent().remove(self)
|
||||
|
||||
def on_off_prop(self, attr_name):
|
||||
"""Return the boolean value of the attribute having `attr_name`, or |None| if
|
||||
not present."""
|
||||
return getattr(self, attr_name)
|
||||
|
||||
def set_on_off_prop(self, attr_name, value):
|
||||
"""Set the on/off attribute having `attr_name` to `value`."""
|
||||
setattr(self, attr_name, value)
|
||||
|
||||
|
||||
class CT_Style(BaseOxmlElement):
|
||||
"""A ``<w:style>`` element, representing a style definition."""
|
||||
|
||||
_tag_seq = (
|
||||
"w:name",
|
||||
"w:aliases",
|
||||
"w:basedOn",
|
||||
"w:next",
|
||||
"w:link",
|
||||
"w:autoRedefine",
|
||||
"w:hidden",
|
||||
"w:uiPriority",
|
||||
"w:semiHidden",
|
||||
"w:unhideWhenUsed",
|
||||
"w:qFormat",
|
||||
"w:locked",
|
||||
"w:personal",
|
||||
"w:personalCompose",
|
||||
"w:personalReply",
|
||||
"w:rsid",
|
||||
"w:pPr",
|
||||
"w:rPr",
|
||||
"w:tblPr",
|
||||
"w:trPr",
|
||||
"w:tcPr",
|
||||
"w:tblStylePr",
|
||||
)
|
||||
name = ZeroOrOne("w:name", successors=_tag_seq[1:])
|
||||
basedOn = ZeroOrOne("w:basedOn", successors=_tag_seq[3:])
|
||||
next = ZeroOrOne("w:next", successors=_tag_seq[4:])
|
||||
uiPriority = ZeroOrOne("w:uiPriority", successors=_tag_seq[8:])
|
||||
semiHidden = ZeroOrOne("w:semiHidden", successors=_tag_seq[9:])
|
||||
unhideWhenUsed = ZeroOrOne("w:unhideWhenUsed", successors=_tag_seq[10:])
|
||||
qFormat = ZeroOrOne("w:qFormat", successors=_tag_seq[11:])
|
||||
locked = ZeroOrOne("w:locked", successors=_tag_seq[12:])
|
||||
pPr = ZeroOrOne("w:pPr", successors=_tag_seq[17:])
|
||||
rPr = ZeroOrOne("w:rPr", successors=_tag_seq[18:])
|
||||
del _tag_seq
|
||||
|
||||
type: WD_STYLE_TYPE | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:type", WD_STYLE_TYPE
|
||||
)
|
||||
styleId: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:styleId", ST_String
|
||||
)
|
||||
default = OptionalAttribute("w:default", ST_OnOff)
|
||||
customStyle = OptionalAttribute("w:customStyle", ST_OnOff)
|
||||
|
||||
@property
|
||||
def basedOn_val(self):
|
||||
"""Value of `w:basedOn/@w:val` or |None| if not present."""
|
||||
basedOn = self.basedOn
|
||||
if basedOn is None:
|
||||
return None
|
||||
return basedOn.val
|
||||
|
||||
@basedOn_val.setter
|
||||
def basedOn_val(self, value):
|
||||
if value is None:
|
||||
self._remove_basedOn()
|
||||
else:
|
||||
self.get_or_add_basedOn().val = value
|
||||
|
||||
@property
|
||||
def base_style(self):
|
||||
"""Sibling CT_Style element this style is based on or |None| if no base style or
|
||||
base style not found."""
|
||||
basedOn = self.basedOn
|
||||
if basedOn is None:
|
||||
return None
|
||||
styles = self.getparent()
|
||||
base_style = styles.get_by_id(basedOn.val)
|
||||
if base_style is None:
|
||||
return None
|
||||
return base_style
|
||||
|
||||
def delete(self):
|
||||
"""Remove this `w:style` element from its parent `w:styles` element."""
|
||||
self.getparent().remove(self)
|
||||
|
||||
@property
|
||||
def locked_val(self):
|
||||
"""Value of `w:locked/@w:val` or |False| if not present."""
|
||||
locked = self.locked
|
||||
if locked is None:
|
||||
return False
|
||||
return locked.val
|
||||
|
||||
@locked_val.setter
|
||||
def locked_val(self, value):
|
||||
self._remove_locked()
|
||||
if bool(value) is True:
|
||||
locked = self._add_locked()
|
||||
locked.val = value
|
||||
|
||||
@property
|
||||
def name_val(self):
|
||||
"""Value of ``<w:name>`` child or |None| if not present."""
|
||||
name = self.name
|
||||
if name is None:
|
||||
return None
|
||||
return name.val
|
||||
|
||||
@name_val.setter
|
||||
def name_val(self, value):
|
||||
self._remove_name()
|
||||
if value is not None:
|
||||
name = self._add_name()
|
||||
name.val = value
|
||||
|
||||
@property
|
||||
def next_style(self):
|
||||
"""Sibling CT_Style element identified by the value of `w:name/@w:val` or |None|
|
||||
if no value is present or no style with that style id is found."""
|
||||
next = self.next
|
||||
if next is None:
|
||||
return None
|
||||
styles = self.getparent()
|
||||
return styles.get_by_id(next.val) # None if not found
|
||||
|
||||
@property
|
||||
def qFormat_val(self):
|
||||
"""Value of `w:qFormat/@w:val` or |False| if not present."""
|
||||
qFormat = self.qFormat
|
||||
if qFormat is None:
|
||||
return False
|
||||
return qFormat.val
|
||||
|
||||
@qFormat_val.setter
|
||||
def qFormat_val(self, value):
|
||||
self._remove_qFormat()
|
||||
if bool(value):
|
||||
self._add_qFormat()
|
||||
|
||||
@property
|
||||
def semiHidden_val(self):
|
||||
"""Value of ``<w:semiHidden>`` child or |False| if not present."""
|
||||
semiHidden = self.semiHidden
|
||||
if semiHidden is None:
|
||||
return False
|
||||
return semiHidden.val
|
||||
|
||||
@semiHidden_val.setter
|
||||
def semiHidden_val(self, value):
|
||||
self._remove_semiHidden()
|
||||
if bool(value) is True:
|
||||
semiHidden = self._add_semiHidden()
|
||||
semiHidden.val = value
|
||||
|
||||
@property
|
||||
def uiPriority_val(self):
|
||||
"""Value of ``<w:uiPriority>`` child or |None| if not present."""
|
||||
uiPriority = self.uiPriority
|
||||
if uiPriority is None:
|
||||
return None
|
||||
return uiPriority.val
|
||||
|
||||
@uiPriority_val.setter
|
||||
def uiPriority_val(self, value):
|
||||
self._remove_uiPriority()
|
||||
if value is not None:
|
||||
uiPriority = self._add_uiPriority()
|
||||
uiPriority.val = value
|
||||
|
||||
@property
|
||||
def unhideWhenUsed_val(self):
|
||||
"""Value of `w:unhideWhenUsed/@w:val` or |False| if not present."""
|
||||
unhideWhenUsed = self.unhideWhenUsed
|
||||
if unhideWhenUsed is None:
|
||||
return False
|
||||
return unhideWhenUsed.val
|
||||
|
||||
@unhideWhenUsed_val.setter
|
||||
def unhideWhenUsed_val(self, value):
|
||||
self._remove_unhideWhenUsed()
|
||||
if bool(value) is True:
|
||||
unhideWhenUsed = self._add_unhideWhenUsed()
|
||||
unhideWhenUsed.val = value
|
||||
|
||||
|
||||
class CT_Styles(BaseOxmlElement):
|
||||
"""``<w:styles>`` element, the root element of a styles part, i.e. styles.xml."""
|
||||
|
||||
_tag_seq = ("w:docDefaults", "w:latentStyles", "w:style")
|
||||
latentStyles = ZeroOrOne("w:latentStyles", successors=_tag_seq[2:])
|
||||
style = ZeroOrMore("w:style", successors=())
|
||||
del _tag_seq
|
||||
|
||||
def add_style_of_type(self, name, style_type, builtin):
|
||||
"""Return a newly added `w:style` element having `name` and `style_type`.
|
||||
|
||||
`w:style/@customStyle` is set based on the value of `builtin`.
|
||||
"""
|
||||
style = self.add_style()
|
||||
style.type = style_type
|
||||
style.customStyle = None if builtin else True
|
||||
style.styleId = styleId_from_name(name)
|
||||
style.name_val = name
|
||||
return style
|
||||
|
||||
def default_for(self, style_type):
|
||||
"""Return `w:style[@w:type="*{style_type}*][-1]` or |None| if not found."""
|
||||
default_styles_for_type = [
|
||||
s for s in self._iter_styles() if s.type == style_type and s.default
|
||||
]
|
||||
if not default_styles_for_type:
|
||||
return None
|
||||
# spec calls for last default in document order
|
||||
return default_styles_for_type[-1]
|
||||
|
||||
def get_by_id(self, styleId: str) -> CT_Style | None:
|
||||
"""`w:style` child where @styleId = `styleId`.
|
||||
|
||||
|None| if not found.
|
||||
"""
|
||||
xpath = f'w:style[@w:styleId="{styleId}"]'
|
||||
return next(iter(self.xpath(xpath)), None)
|
||||
|
||||
def get_by_name(self, name: str) -> CT_Style | None:
|
||||
"""`w:style` child with `w:name` grandchild having value `name`.
|
||||
|
||||
|None| if not found.
|
||||
"""
|
||||
xpath = 'w:style[w:name/@w:val="%s"]' % name
|
||||
return next(iter(self.xpath(xpath)), None)
|
||||
|
||||
def _iter_styles(self):
|
||||
"""Generate each of the `w:style` child elements in document order."""
|
||||
return (style for style in self.xpath("w:style"))
|
||||
@@ -0,0 +1,977 @@
|
||||
"""Custom element classes for tables."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, cast
|
||||
|
||||
from docx.enum.table import WD_CELL_VERTICAL_ALIGNMENT, WD_ROW_HEIGHT_RULE, WD_TABLE_DIRECTION
|
||||
from docx.exceptions import InvalidSpanError
|
||||
from docx.oxml.ns import nsdecls, qn
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.oxml.shared import CT_DecimalNumber
|
||||
from docx.oxml.simpletypes import (
|
||||
ST_Merge,
|
||||
ST_TblLayoutType,
|
||||
ST_TblWidth,
|
||||
ST_TwipsMeasure,
|
||||
XsdInt,
|
||||
)
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OneAndOnlyOne,
|
||||
OneOrMore,
|
||||
OptionalAttribute,
|
||||
RequiredAttribute,
|
||||
ZeroOrMore,
|
||||
ZeroOrOne,
|
||||
)
|
||||
from docx.shared import Emu, Length, Twips
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.enum.table import WD_TABLE_ALIGNMENT
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
from docx.oxml.shared import CT_OnOff, CT_String
|
||||
from docx.oxml.text.parfmt import CT_Jc
|
||||
|
||||
|
||||
class CT_Height(BaseOxmlElement):
|
||||
"""Used for `w:trHeight` to specify a row height and row height rule."""
|
||||
|
||||
val: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:val", ST_TwipsMeasure
|
||||
)
|
||||
hRule: WD_ROW_HEIGHT_RULE | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:hRule", WD_ROW_HEIGHT_RULE
|
||||
)
|
||||
|
||||
|
||||
class CT_Row(BaseOxmlElement):
|
||||
"""``<w:tr>`` element."""
|
||||
|
||||
add_tc: Callable[[], CT_Tc]
|
||||
get_or_add_trPr: Callable[[], CT_TrPr]
|
||||
_add_trPr: Callable[[], CT_TrPr]
|
||||
|
||||
tc_lst: list[CT_Tc]
|
||||
# -- custom inserter below --
|
||||
tblPrEx: CT_TblPrEx | None = ZeroOrOne("w:tblPrEx") # pyright: ignore[reportAssignmentType]
|
||||
# -- custom inserter below --
|
||||
trPr: CT_TrPr | None = ZeroOrOne("w:trPr") # pyright: ignore[reportAssignmentType]
|
||||
tc = ZeroOrMore("w:tc")
|
||||
|
||||
@property
|
||||
def grid_after(self) -> int:
|
||||
"""The number of unpopulated layout-grid cells at the end of this row."""
|
||||
trPr = self.trPr
|
||||
if trPr is None:
|
||||
return 0
|
||||
return trPr.grid_after
|
||||
|
||||
@property
|
||||
def grid_before(self) -> int:
|
||||
"""The number of unpopulated layout-grid cells at the start of this row."""
|
||||
trPr = self.trPr
|
||||
if trPr is None:
|
||||
return 0
|
||||
return trPr.grid_before
|
||||
|
||||
def tc_at_grid_offset(self, grid_offset: int) -> CT_Tc:
|
||||
"""The `tc` element in this tr at exact `grid offset`.
|
||||
|
||||
Raises ValueError when this `w:tr` contains no `w:tc` with exact starting `grid_offset`.
|
||||
"""
|
||||
# -- account for omitted cells at the start of the row --
|
||||
remaining_offset = grid_offset - self.grid_before
|
||||
|
||||
for tc in self.tc_lst:
|
||||
# -- We've gone past grid_offset without finding a tc, no sense searching further. --
|
||||
if remaining_offset < 0:
|
||||
break
|
||||
# -- We've arrived at grid_offset, this is the `w:tc` we're looking for. --
|
||||
if remaining_offset == 0:
|
||||
return tc
|
||||
# -- We're not there yet, skip forward the number of layout-grid cells this cell
|
||||
# -- occupies.
|
||||
remaining_offset -= tc.grid_span
|
||||
|
||||
raise ValueError(f"no `tc` element at grid_offset={grid_offset}")
|
||||
|
||||
@property
|
||||
def tr_idx(self) -> int:
|
||||
"""Index of this `w:tr` element within its parent `w:tbl` element."""
|
||||
tbl = cast(CT_Tbl, self.getparent())
|
||||
return tbl.tr_lst.index(self)
|
||||
|
||||
@property
|
||||
def trHeight_hRule(self) -> WD_ROW_HEIGHT_RULE | None:
|
||||
"""The value of `./w:trPr/w:trHeight/@w:hRule`, or |None| if not present."""
|
||||
trPr = self.trPr
|
||||
if trPr is None:
|
||||
return None
|
||||
return trPr.trHeight_hRule
|
||||
|
||||
@trHeight_hRule.setter
|
||||
def trHeight_hRule(self, value: WD_ROW_HEIGHT_RULE | None):
|
||||
trPr = self.get_or_add_trPr()
|
||||
trPr.trHeight_hRule = value
|
||||
|
||||
@property
|
||||
def trHeight_val(self):
|
||||
"""Return the value of `w:trPr/w:trHeight@w:val`, or |None| if not present."""
|
||||
trPr = self.trPr
|
||||
if trPr is None:
|
||||
return None
|
||||
return trPr.trHeight_val
|
||||
|
||||
@trHeight_val.setter
|
||||
def trHeight_val(self, value: Length | None):
|
||||
trPr = self.get_or_add_trPr()
|
||||
trPr.trHeight_val = value
|
||||
|
||||
def _insert_tblPrEx(self, tblPrEx: CT_TblPrEx):
|
||||
self.insert(0, tblPrEx)
|
||||
|
||||
def _insert_trPr(self, trPr: CT_TrPr):
|
||||
tblPrEx = self.tblPrEx
|
||||
if tblPrEx is not None:
|
||||
tblPrEx.addnext(trPr)
|
||||
else:
|
||||
self.insert(0, trPr)
|
||||
|
||||
def _new_tc(self):
|
||||
return CT_Tc.new()
|
||||
|
||||
|
||||
class CT_Tbl(BaseOxmlElement):
|
||||
"""``<w:tbl>`` element."""
|
||||
|
||||
add_tr: Callable[[], CT_Row]
|
||||
tr_lst: list[CT_Row]
|
||||
|
||||
tblPr: CT_TblPr = OneAndOnlyOne("w:tblPr") # pyright: ignore[reportAssignmentType]
|
||||
tblGrid: CT_TblGrid = OneAndOnlyOne("w:tblGrid") # pyright: ignore[reportAssignmentType]
|
||||
tr = ZeroOrMore("w:tr")
|
||||
|
||||
@property
|
||||
def bidiVisual_val(self) -> bool | None:
|
||||
"""Value of `./w:tblPr/w:bidiVisual/@w:val` or |None| if not present.
|
||||
|
||||
Controls whether table cells are displayed right-to-left or left-to-right.
|
||||
"""
|
||||
bidiVisual = self.tblPr.bidiVisual
|
||||
if bidiVisual is None:
|
||||
return None
|
||||
return bidiVisual.val
|
||||
|
||||
@bidiVisual_val.setter
|
||||
def bidiVisual_val(self, value: WD_TABLE_DIRECTION | None):
|
||||
tblPr = self.tblPr
|
||||
if value is None:
|
||||
tblPr._remove_bidiVisual() # pyright: ignore[reportPrivateUsage]
|
||||
else:
|
||||
tblPr.get_or_add_bidiVisual().val = bool(value)
|
||||
|
||||
@property
|
||||
def col_count(self):
|
||||
"""The number of grid columns in this table."""
|
||||
return len(self.tblGrid.gridCol_lst)
|
||||
|
||||
def iter_tcs(self):
|
||||
"""Generate each of the `w:tc` elements in this table, left to right and top to
|
||||
bottom.
|
||||
|
||||
Each cell in the first row is generated, followed by each cell in the second
|
||||
row, etc.
|
||||
"""
|
||||
for tr in self.tr_lst:
|
||||
for tc in tr.tc_lst:
|
||||
yield tc
|
||||
|
||||
@classmethod
|
||||
def new_tbl(cls, rows: int, cols: int, width: Length) -> CT_Tbl:
|
||||
"""Return a new `w:tbl` element having `rows` rows and `cols` columns.
|
||||
|
||||
`width` is distributed evenly between the columns.
|
||||
"""
|
||||
return cast(CT_Tbl, parse_xml(cls._tbl_xml(rows, cols, width)))
|
||||
|
||||
@property
|
||||
def tblStyle_val(self) -> str | None:
|
||||
"""`w:tblPr/w:tblStyle/@w:val` (a table style id) or |None| if not present."""
|
||||
tblStyle = self.tblPr.tblStyle
|
||||
if tblStyle is None:
|
||||
return None
|
||||
return tblStyle.val
|
||||
|
||||
@tblStyle_val.setter
|
||||
def tblStyle_val(self, styleId: str | None) -> None:
|
||||
"""Set the value of `w:tblPr/w:tblStyle/@w:val` (a table style id) to `styleId`.
|
||||
|
||||
If `styleId` is None, remove the `w:tblStyle` element.
|
||||
"""
|
||||
tblPr = self.tblPr
|
||||
tblPr._remove_tblStyle() # pyright: ignore[reportPrivateUsage]
|
||||
if styleId is None:
|
||||
return
|
||||
tblPr._add_tblStyle().val = styleId # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
@classmethod
|
||||
def _tbl_xml(cls, rows: int, cols: int, width: Length) -> str:
|
||||
col_width = Emu(width // cols) if cols > 0 else Emu(0)
|
||||
return (
|
||||
f"<w:tbl {nsdecls('w')}>\n"
|
||||
f" <w:tblPr>\n"
|
||||
f' <w:tblW w:type="auto" w:w="0"/>\n'
|
||||
f' <w:tblLook w:firstColumn="1" w:firstRow="1"\n'
|
||||
f' w:lastColumn="0" w:lastRow="0" w:noHBand="0"\n'
|
||||
f' w:noVBand="1" w:val="04A0"/>\n'
|
||||
f" </w:tblPr>\n"
|
||||
f"{cls._tblGrid_xml(cols, col_width)}"
|
||||
f"{cls._trs_xml(rows, cols, col_width)}"
|
||||
f"</w:tbl>\n"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _tblGrid_xml(cls, col_count: int, col_width: Length) -> str:
|
||||
xml = " <w:tblGrid>\n"
|
||||
for _ in range(col_count):
|
||||
xml += ' <w:gridCol w:w="%d"/>\n' % col_width.twips
|
||||
xml += " </w:tblGrid>\n"
|
||||
return xml
|
||||
|
||||
@classmethod
|
||||
def _trs_xml(cls, row_count: int, col_count: int, col_width: Length) -> str:
|
||||
return f" <w:tr>\n{cls._tcs_xml(col_count, col_width)} </w:tr>\n" * row_count
|
||||
|
||||
@classmethod
|
||||
def _tcs_xml(cls, col_count: int, col_width: Length) -> str:
|
||||
return (
|
||||
f" <w:tc>\n"
|
||||
f" <w:tcPr>\n"
|
||||
f' <w:tcW w:type="dxa" w:w="{col_width.twips}"/>\n'
|
||||
f" </w:tcPr>\n"
|
||||
f" <w:p/>\n"
|
||||
f" </w:tc>\n"
|
||||
) * col_count
|
||||
|
||||
|
||||
class CT_TblGrid(BaseOxmlElement):
|
||||
"""`w:tblGrid` element.
|
||||
|
||||
Child of `w:tbl`, holds `w:gridCol> elements that define column count, width, etc.
|
||||
"""
|
||||
|
||||
add_gridCol: Callable[[], CT_TblGridCol]
|
||||
gridCol_lst: list[CT_TblGridCol]
|
||||
|
||||
gridCol = ZeroOrMore("w:gridCol", successors=("w:tblGridChange",))
|
||||
|
||||
|
||||
class CT_TblGridCol(BaseOxmlElement):
|
||||
"""`w:gridCol` element, child of `w:tblGrid`, defines a table column."""
|
||||
|
||||
w: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:w", ST_TwipsMeasure
|
||||
)
|
||||
|
||||
@property
|
||||
def gridCol_idx(self) -> int:
|
||||
"""Index of this `w:gridCol` element within its parent `w:tblGrid` element."""
|
||||
tblGrid = cast(CT_TblGrid, self.getparent())
|
||||
return tblGrid.gridCol_lst.index(self)
|
||||
|
||||
|
||||
class CT_TblLayoutType(BaseOxmlElement):
|
||||
"""`w:tblLayout` element.
|
||||
|
||||
Specifies whether column widths are fixed or can be automatically adjusted based on
|
||||
content.
|
||||
"""
|
||||
|
||||
type: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:type", ST_TblLayoutType
|
||||
)
|
||||
|
||||
|
||||
class CT_TblPr(BaseOxmlElement):
|
||||
"""``<w:tblPr>`` element, child of ``<w:tbl>``, holds child elements that define
|
||||
table properties such as style and borders."""
|
||||
|
||||
get_or_add_bidiVisual: Callable[[], CT_OnOff]
|
||||
get_or_add_jc: Callable[[], CT_Jc]
|
||||
get_or_add_tblLayout: Callable[[], CT_TblLayoutType]
|
||||
_add_tblStyle: Callable[[], CT_String]
|
||||
_remove_bidiVisual: Callable[[], None]
|
||||
_remove_jc: Callable[[], None]
|
||||
_remove_tblStyle: Callable[[], None]
|
||||
|
||||
_tag_seq = (
|
||||
"w:tblStyle",
|
||||
"w:tblpPr",
|
||||
"w:tblOverlap",
|
||||
"w:bidiVisual",
|
||||
"w:tblStyleRowBandSize",
|
||||
"w:tblStyleColBandSize",
|
||||
"w:tblW",
|
||||
"w:jc",
|
||||
"w:tblCellSpacing",
|
||||
"w:tblInd",
|
||||
"w:tblBorders",
|
||||
"w:shd",
|
||||
"w:tblLayout",
|
||||
"w:tblCellMar",
|
||||
"w:tblLook",
|
||||
"w:tblCaption",
|
||||
"w:tblDescription",
|
||||
"w:tblPrChange",
|
||||
)
|
||||
tblStyle: CT_String | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:tblStyle", successors=_tag_seq[1:]
|
||||
)
|
||||
bidiVisual: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:bidiVisual", successors=_tag_seq[4:]
|
||||
)
|
||||
jc: CT_Jc | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:jc", successors=_tag_seq[8:]
|
||||
)
|
||||
tblLayout: CT_TblLayoutType | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:tblLayout", successors=_tag_seq[13:]
|
||||
)
|
||||
del _tag_seq
|
||||
|
||||
@property
|
||||
def alignment(self) -> WD_TABLE_ALIGNMENT | None:
|
||||
"""Horizontal alignment of table, |None| if `./w:jc` is not present."""
|
||||
jc = self.jc
|
||||
if jc is None:
|
||||
return None
|
||||
return cast("WD_TABLE_ALIGNMENT | None", jc.val)
|
||||
|
||||
@alignment.setter
|
||||
def alignment(self, value: WD_TABLE_ALIGNMENT | None):
|
||||
self._remove_jc()
|
||||
if value is None:
|
||||
return
|
||||
jc = self.get_or_add_jc()
|
||||
jc.val = cast("WD_ALIGN_PARAGRAPH", value)
|
||||
|
||||
@property
|
||||
def autofit(self) -> bool:
|
||||
"""|False| when there is a `w:tblLayout` child with `@w:type="fixed"`.
|
||||
|
||||
Otherwise |True|.
|
||||
"""
|
||||
tblLayout = self.tblLayout
|
||||
return True if tblLayout is None else tblLayout.type != "fixed"
|
||||
|
||||
@autofit.setter
|
||||
def autofit(self, value: bool):
|
||||
tblLayout = self.get_or_add_tblLayout()
|
||||
tblLayout.type = "autofit" if value else "fixed"
|
||||
|
||||
@property
|
||||
def style(self):
|
||||
"""Return the value of the ``val`` attribute of the ``<w:tblStyle>`` child or
|
||||
|None| if not present."""
|
||||
tblStyle = self.tblStyle
|
||||
if tblStyle is None:
|
||||
return None
|
||||
return tblStyle.val
|
||||
|
||||
@style.setter
|
||||
def style(self, value: str | None):
|
||||
self._remove_tblStyle()
|
||||
if value is None:
|
||||
return
|
||||
self._add_tblStyle().val = value
|
||||
|
||||
|
||||
class CT_TblPrEx(BaseOxmlElement):
|
||||
"""`w:tblPrEx` element, exceptions to table-properties.
|
||||
|
||||
Applied at a lower level, like a `w:tr` to modify the appearance. Possibly used when
|
||||
two tables are merged. For more see:
|
||||
http://officeopenxml.com/WPtablePropertyExceptions.php
|
||||
"""
|
||||
|
||||
|
||||
class CT_TblWidth(BaseOxmlElement):
|
||||
"""Used for `w:tblW` and `w:tcW` and others, specifies a table-related width."""
|
||||
|
||||
# the type for `w` attr is actually ST_MeasurementOrPercent, but using
|
||||
# XsdInt for now because only dxa (twips) values are being used. It's not
|
||||
# entirely clear what the semantics are for other values like -01.4mm
|
||||
w: int = RequiredAttribute("w:w", XsdInt) # pyright: ignore[reportAssignmentType]
|
||||
type = RequiredAttribute("w:type", ST_TblWidth)
|
||||
|
||||
@property
|
||||
def width(self) -> Length | None:
|
||||
"""EMU length indicated by the combined `w:w` and `w:type` attrs."""
|
||||
if self.type != "dxa":
|
||||
return None
|
||||
return Twips(self.w)
|
||||
|
||||
@width.setter
|
||||
def width(self, value: Length):
|
||||
self.type = "dxa"
|
||||
self.w = Emu(value).twips
|
||||
|
||||
|
||||
class CT_Tc(BaseOxmlElement):
|
||||
"""`w:tc` table cell element."""
|
||||
|
||||
add_p: Callable[[], CT_P]
|
||||
get_or_add_tcPr: Callable[[], CT_TcPr]
|
||||
p_lst: list[CT_P]
|
||||
tbl_lst: list[CT_Tbl]
|
||||
_insert_tbl: Callable[[CT_Tbl], CT_Tbl]
|
||||
_new_p: Callable[[], CT_P]
|
||||
|
||||
# -- tcPr has many successors, `._insert_tcPr()` is overridden below --
|
||||
tcPr: CT_TcPr | None = ZeroOrOne("w:tcPr") # pyright: ignore[reportAssignmentType]
|
||||
p = OneOrMore("w:p")
|
||||
tbl = OneOrMore("w:tbl")
|
||||
|
||||
@property
|
||||
def bottom(self) -> int:
|
||||
"""The row index that marks the bottom extent of the vertical span of this cell.
|
||||
|
||||
This is one greater than the index of the bottom-most row of the span, similar
|
||||
to how a slice of the cell's rows would be specified.
|
||||
"""
|
||||
if self.vMerge is not None:
|
||||
tc_below = self._tc_below
|
||||
if tc_below is not None and tc_below.vMerge == ST_Merge.CONTINUE:
|
||||
return tc_below.bottom
|
||||
return self._tr_idx + 1
|
||||
|
||||
def clear_content(self):
|
||||
"""Remove all content elements, preserving `w:tcPr` element if present.
|
||||
|
||||
Note that this leaves the `w:tc` element in an invalid state because it doesn't
|
||||
contain at least one block-level element. It's up to the caller to add a
|
||||
`w:p`child element as the last content element.
|
||||
"""
|
||||
# -- remove all cell inner-content except a `w:tcPr` when present. --
|
||||
for e in self.xpath("./*[not(self::w:tcPr)]"):
|
||||
self.remove(e)
|
||||
|
||||
@property
|
||||
def grid_offset(self) -> int:
|
||||
"""Starting offset of `tc` in the layout-grid columns of its table.
|
||||
|
||||
A cell in the leftmost grid-column has offset 0.
|
||||
"""
|
||||
grid_before = self._tr.grid_before
|
||||
preceding_tc_grid_spans = sum(
|
||||
tc.grid_span for tc in self.xpath("./preceding-sibling::w:tc")
|
||||
)
|
||||
return grid_before + preceding_tc_grid_spans
|
||||
|
||||
@property
|
||||
def grid_span(self) -> int:
|
||||
"""The integer number of columns this cell spans.
|
||||
|
||||
Determined by ./w:tcPr/w:gridSpan/@val, it defaults to 1.
|
||||
"""
|
||||
tcPr = self.tcPr
|
||||
return 1 if tcPr is None else tcPr.grid_span
|
||||
|
||||
@grid_span.setter
|
||||
def grid_span(self, value: int):
|
||||
tcPr = self.get_or_add_tcPr()
|
||||
tcPr.grid_span = value
|
||||
|
||||
@property
|
||||
def inner_content_elements(self) -> list[CT_P | CT_Tbl]:
|
||||
"""Generate all `w:p` and `w:tbl` elements in this document-body.
|
||||
|
||||
Elements appear in document order. Elements shaded by nesting in a `w:ins` or
|
||||
other "wrapper" element will not be included.
|
||||
"""
|
||||
return self.xpath("./w:p | ./w:tbl")
|
||||
|
||||
def iter_block_items(self):
|
||||
"""Generate a reference to each of the block-level content elements in this
|
||||
cell, in the order they appear."""
|
||||
block_item_tags = (qn("w:p"), qn("w:tbl"), qn("w:sdt"))
|
||||
for child in self:
|
||||
if child.tag in block_item_tags:
|
||||
yield child
|
||||
|
||||
@property
|
||||
def left(self) -> int:
|
||||
"""The grid column index at which this ``<w:tc>`` element appears."""
|
||||
return self.grid_offset
|
||||
|
||||
def merge(self, other_tc: CT_Tc) -> CT_Tc:
|
||||
"""Return top-left `w:tc` element of a new span.
|
||||
|
||||
Span is formed by merging the rectangular region defined by using this tc
|
||||
element and `other_tc` as diagonal corners.
|
||||
"""
|
||||
top, left, height, width = self._span_dimensions(other_tc)
|
||||
top_tc = self._tbl.tr_lst[top].tc_at_grid_offset(left)
|
||||
top_tc._grow_to(width, height)
|
||||
return top_tc
|
||||
|
||||
@classmethod
|
||||
def new(cls) -> CT_Tc:
|
||||
"""A new `w:tc` element, containing an empty paragraph as the required EG_BlockLevelElt."""
|
||||
return cast(CT_Tc, parse_xml("<w:tc %s><w:p/></w:tc>" % nsdecls("w")))
|
||||
|
||||
@property
|
||||
def right(self) -> int:
|
||||
"""The grid column index that marks the right-side extent of the horizontal span
|
||||
of this cell.
|
||||
|
||||
This is one greater than the index of the right-most column of the span, similar
|
||||
to how a slice of the cell's columns would be specified.
|
||||
"""
|
||||
return self.grid_offset + self.grid_span
|
||||
|
||||
@property
|
||||
def top(self) -> int:
|
||||
"""The top-most row index in the vertical span of this cell."""
|
||||
if self.vMerge is None or self.vMerge == ST_Merge.RESTART:
|
||||
return self._tr_idx
|
||||
return self._tc_above.top
|
||||
|
||||
@property
|
||||
def vMerge(self) -> str | None:
|
||||
"""Value of ./w:tcPr/w:vMerge/@val, |None| if w:vMerge is not present."""
|
||||
tcPr = self.tcPr
|
||||
if tcPr is None:
|
||||
return None
|
||||
return tcPr.vMerge_val
|
||||
|
||||
@vMerge.setter
|
||||
def vMerge(self, value: str | None):
|
||||
tcPr = self.get_or_add_tcPr()
|
||||
tcPr.vMerge_val = value
|
||||
|
||||
@property
|
||||
def width(self) -> Length | None:
|
||||
"""EMU length represented in `./w:tcPr/w:tcW` or |None| if not present."""
|
||||
tcPr = self.tcPr
|
||||
if tcPr is None:
|
||||
return None
|
||||
return tcPr.width
|
||||
|
||||
@width.setter
|
||||
def width(self, value: Length):
|
||||
tcPr = self.get_or_add_tcPr()
|
||||
tcPr.width = value
|
||||
|
||||
def _add_width_of(self, other_tc: CT_Tc):
|
||||
"""Add the width of `other_tc` to this cell.
|
||||
|
||||
Does nothing if either this tc or `other_tc` does not have a specified width.
|
||||
"""
|
||||
if self.width and other_tc.width:
|
||||
self.width = Length(self.width + other_tc.width)
|
||||
|
||||
def _grow_to(self, width: int, height: int, top_tc: CT_Tc | None = None):
|
||||
"""Grow this cell to `width` grid columns and `height` rows.
|
||||
|
||||
This is accomplished by expanding horizontal spans and creating continuation
|
||||
cells to form vertical spans.
|
||||
"""
|
||||
|
||||
def vMerge_val(top_tc: CT_Tc):
|
||||
return (
|
||||
ST_Merge.CONTINUE
|
||||
if top_tc is not self
|
||||
else None
|
||||
if height == 1
|
||||
else ST_Merge.RESTART
|
||||
)
|
||||
|
||||
top_tc = self if top_tc is None else top_tc
|
||||
self._span_to_width(width, top_tc, vMerge_val(top_tc))
|
||||
if height > 1:
|
||||
tc_below = self._tc_below
|
||||
assert tc_below is not None
|
||||
tc_below._grow_to(width, height - 1, top_tc)
|
||||
|
||||
def _insert_tcPr(self, tcPr: CT_TcPr) -> CT_TcPr:
|
||||
"""Override default `._insert_tcPr()`."""
|
||||
# -- `tcPr`` has a large number of successors, but always comes first if it appears,
|
||||
# -- so just using insert(0, ...) rather than spelling out successors.
|
||||
self.insert(0, tcPr)
|
||||
return tcPr
|
||||
|
||||
@property
|
||||
def _is_empty(self) -> bool:
|
||||
"""True if this cell contains only a single empty `w:p` element."""
|
||||
block_items = list(self.iter_block_items())
|
||||
if len(block_items) > 1:
|
||||
return False
|
||||
# -- cell must include at least one block item but can be a `w:tbl`, `w:sdt`,
|
||||
# -- `w:customXml` or a `w:p`
|
||||
only_item = block_items[0]
|
||||
return isinstance(only_item, CT_P) and len(only_item.r_lst) == 0
|
||||
|
||||
def _move_content_to(self, other_tc: CT_Tc):
|
||||
"""Append the content of this cell to `other_tc`.
|
||||
|
||||
Leaves this cell with a single empty ``<w:p>`` element.
|
||||
"""
|
||||
if other_tc is self:
|
||||
return
|
||||
if self._is_empty:
|
||||
return
|
||||
other_tc._remove_trailing_empty_p()
|
||||
# -- appending moves each element from self to other_tc --
|
||||
for block_element in self.iter_block_items():
|
||||
other_tc.append(block_element)
|
||||
# -- add back the required minimum single empty <w:p> element --
|
||||
self.append(self._new_p())
|
||||
|
||||
def _new_tbl(self) -> None:
|
||||
raise NotImplementedError(
|
||||
"use CT_Tbl.new_tbl() to add a new table, specifying rows and columns"
|
||||
)
|
||||
|
||||
@property
|
||||
def _next_tc(self) -> CT_Tc | None:
|
||||
"""The `w:tc` element immediately following this one in this row, or |None| if
|
||||
this is the last `w:tc` element in the row."""
|
||||
following_tcs = self.xpath("./following-sibling::w:tc")
|
||||
return following_tcs[0] if following_tcs else None
|
||||
|
||||
def _remove(self):
|
||||
"""Remove this `w:tc` element from the XML tree."""
|
||||
parent_element = self.getparent()
|
||||
assert parent_element is not None
|
||||
parent_element.remove(self)
|
||||
|
||||
def _remove_trailing_empty_p(self):
|
||||
"""Remove last content element from this cell if it's an empty `w:p` element."""
|
||||
block_items = list(self.iter_block_items())
|
||||
last_content_elm = block_items[-1]
|
||||
if not isinstance(last_content_elm, CT_P):
|
||||
return
|
||||
p = last_content_elm
|
||||
if len(p.r_lst) > 0:
|
||||
return
|
||||
self.remove(p)
|
||||
|
||||
def _span_dimensions(self, other_tc: CT_Tc) -> tuple[int, int, int, int]:
|
||||
"""Return a (top, left, height, width) 4-tuple specifying the extents of the
|
||||
merged cell formed by using this tc and `other_tc` as opposite corner
|
||||
extents."""
|
||||
|
||||
def raise_on_inverted_L(a: CT_Tc, b: CT_Tc):
|
||||
if a.top == b.top and a.bottom != b.bottom:
|
||||
raise InvalidSpanError("requested span not rectangular")
|
||||
if a.left == b.left and a.right != b.right:
|
||||
raise InvalidSpanError("requested span not rectangular")
|
||||
|
||||
def raise_on_tee_shaped(a: CT_Tc, b: CT_Tc):
|
||||
top_most, other = (a, b) if a.top < b.top else (b, a)
|
||||
if top_most.top < other.top and top_most.bottom > other.bottom:
|
||||
raise InvalidSpanError("requested span not rectangular")
|
||||
|
||||
left_most, other = (a, b) if a.left < b.left else (b, a)
|
||||
if left_most.left < other.left and left_most.right > other.right:
|
||||
raise InvalidSpanError("requested span not rectangular")
|
||||
|
||||
raise_on_inverted_L(self, other_tc)
|
||||
raise_on_tee_shaped(self, other_tc)
|
||||
|
||||
top = min(self.top, other_tc.top)
|
||||
left = min(self.left, other_tc.left)
|
||||
bottom = max(self.bottom, other_tc.bottom)
|
||||
right = max(self.right, other_tc.right)
|
||||
|
||||
return top, left, bottom - top, right - left
|
||||
|
||||
def _span_to_width(self, grid_width: int, top_tc: CT_Tc, vMerge: str | None):
|
||||
"""Incorporate `w:tc` elements to the right until this cell spans `grid_width`.
|
||||
|
||||
Incorporated `w:tc` elements are removed (replaced by gridSpan value).
|
||||
|
||||
Raises |ValueError| if `grid_width` cannot be exactly achieved, such as when a
|
||||
merged cell would drive the span width greater than `grid_width` or if not
|
||||
enough grid columns are available to make this cell that wide. All content from
|
||||
incorporated cells is appended to `top_tc`. The val attribute of the vMerge
|
||||
element on the single remaining cell is set to `vMerge`. If `vMerge` is |None|,
|
||||
the vMerge element is removed if present.
|
||||
"""
|
||||
self._move_content_to(top_tc)
|
||||
while self.grid_span < grid_width:
|
||||
self._swallow_next_tc(grid_width, top_tc)
|
||||
self.vMerge = vMerge
|
||||
|
||||
def _swallow_next_tc(self, grid_width: int, top_tc: CT_Tc):
|
||||
"""Extend the horizontal span of this `w:tc` element to incorporate the
|
||||
following `w:tc` element in the row and then delete that following `w:tc`
|
||||
element.
|
||||
|
||||
Any content in the following `w:tc` element is appended to the content of
|
||||
`top_tc`. The width of the following `w:tc` element is added to this one, if
|
||||
present. Raises |InvalidSpanError| if the width of the resulting cell is greater
|
||||
than `grid_width` or if there is no next `<w:tc>` element in the row.
|
||||
"""
|
||||
|
||||
def raise_on_invalid_swallow(next_tc: CT_Tc | None):
|
||||
if next_tc is None:
|
||||
raise InvalidSpanError("not enough grid columns")
|
||||
if self.grid_span + next_tc.grid_span > grid_width:
|
||||
raise InvalidSpanError("span is not rectangular")
|
||||
|
||||
next_tc = self._next_tc
|
||||
raise_on_invalid_swallow(next_tc)
|
||||
assert next_tc is not None
|
||||
next_tc._move_content_to(top_tc)
|
||||
self._add_width_of(next_tc)
|
||||
self.grid_span += next_tc.grid_span
|
||||
next_tc._remove()
|
||||
|
||||
@property
|
||||
def _tbl(self) -> CT_Tbl:
|
||||
"""The tbl element this tc element appears in."""
|
||||
return cast(CT_Tbl, self.xpath("./ancestor::w:tbl[position()=1]")[0])
|
||||
|
||||
@property
|
||||
def _tc_above(self) -> CT_Tc:
|
||||
"""The `w:tc` element immediately above this one in its grid column."""
|
||||
return self._tr_above.tc_at_grid_offset(self.grid_offset)
|
||||
|
||||
@property
|
||||
def _tc_below(self) -> CT_Tc | None:
|
||||
"""The tc element immediately below this one in its grid column."""
|
||||
tr_below = self._tr_below
|
||||
if tr_below is None:
|
||||
return None
|
||||
return tr_below.tc_at_grid_offset(self.grid_offset)
|
||||
|
||||
@property
|
||||
def _tr(self) -> CT_Row:
|
||||
"""The tr element this tc element appears in."""
|
||||
return cast(CT_Row, self.xpath("./ancestor::w:tr[position()=1]")[0])
|
||||
|
||||
@property
|
||||
def _tr_above(self) -> CT_Row:
|
||||
"""The tr element prior in sequence to the tr this cell appears in.
|
||||
|
||||
Raises |ValueError| if called on a cell in the top-most row.
|
||||
"""
|
||||
tr_aboves = self.xpath("./ancestor::w:tr[position()=1]/preceding-sibling::w:tr[1]")
|
||||
if not tr_aboves:
|
||||
raise ValueError("no tr above topmost tr in w:tbl")
|
||||
return tr_aboves[0]
|
||||
|
||||
@property
|
||||
def _tr_below(self) -> CT_Row | None:
|
||||
"""The tr element next in sequence after the tr this cell appears in, or |None|
|
||||
if this cell appears in the last row."""
|
||||
tr_lst = self._tbl.tr_lst
|
||||
tr_idx = tr_lst.index(self._tr)
|
||||
try:
|
||||
return tr_lst[tr_idx + 1]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
@property
|
||||
def _tr_idx(self) -> int:
|
||||
"""The row index of the tr element this tc element appears in."""
|
||||
return self._tbl.tr_lst.index(self._tr)
|
||||
|
||||
|
||||
class CT_TcPr(BaseOxmlElement):
|
||||
"""``<w:tcPr>`` element, defining table cell properties."""
|
||||
|
||||
get_or_add_gridSpan: Callable[[], CT_DecimalNumber]
|
||||
get_or_add_tcW: Callable[[], CT_TblWidth]
|
||||
get_or_add_vAlign: Callable[[], CT_VerticalJc]
|
||||
_add_vMerge: Callable[[], CT_VMerge]
|
||||
_remove_gridSpan: Callable[[], None]
|
||||
_remove_vAlign: Callable[[], None]
|
||||
_remove_vMerge: Callable[[], None]
|
||||
|
||||
_tag_seq = (
|
||||
"w:cnfStyle",
|
||||
"w:tcW",
|
||||
"w:gridSpan",
|
||||
"w:hMerge",
|
||||
"w:vMerge",
|
||||
"w:tcBorders",
|
||||
"w:shd",
|
||||
"w:noWrap",
|
||||
"w:tcMar",
|
||||
"w:textDirection",
|
||||
"w:tcFitText",
|
||||
"w:vAlign",
|
||||
"w:hideMark",
|
||||
"w:headers",
|
||||
"w:cellIns",
|
||||
"w:cellDel",
|
||||
"w:cellMerge",
|
||||
"w:tcPrChange",
|
||||
)
|
||||
tcW: CT_TblWidth | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:tcW", successors=_tag_seq[2:]
|
||||
)
|
||||
gridSpan: CT_DecimalNumber | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:gridSpan", successors=_tag_seq[3:]
|
||||
)
|
||||
vMerge: CT_VMerge | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:vMerge", successors=_tag_seq[5:]
|
||||
)
|
||||
vAlign: CT_VerticalJc | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:vAlign", successors=_tag_seq[12:]
|
||||
)
|
||||
del _tag_seq
|
||||
|
||||
@property
|
||||
def grid_span(self) -> int:
|
||||
"""The integer number of columns this cell spans.
|
||||
|
||||
Determined by ./w:gridSpan/@val, it defaults to 1.
|
||||
"""
|
||||
gridSpan = self.gridSpan
|
||||
return 1 if gridSpan is None else gridSpan.val
|
||||
|
||||
@grid_span.setter
|
||||
def grid_span(self, value: int):
|
||||
self._remove_gridSpan()
|
||||
if value > 1:
|
||||
self.get_or_add_gridSpan().val = value
|
||||
|
||||
@property
|
||||
def vAlign_val(self):
|
||||
"""Value of `w:val` attribute on `w:vAlign` child.
|
||||
|
||||
Value is |None| if `w:vAlign` child is not present. The `w:val` attribute on
|
||||
`w:vAlign` is required.
|
||||
"""
|
||||
vAlign = self.vAlign
|
||||
if vAlign is None:
|
||||
return None
|
||||
return vAlign.val
|
||||
|
||||
@vAlign_val.setter
|
||||
def vAlign_val(self, value: WD_CELL_VERTICAL_ALIGNMENT | None):
|
||||
if value is None:
|
||||
self._remove_vAlign()
|
||||
return
|
||||
self.get_or_add_vAlign().val = value
|
||||
|
||||
@property
|
||||
def vMerge_val(self):
|
||||
"""The value of the ./w:vMerge/@val attribute, or |None| if the w:vMerge element
|
||||
is not present."""
|
||||
vMerge = self.vMerge
|
||||
if vMerge is None:
|
||||
return None
|
||||
return vMerge.val
|
||||
|
||||
@vMerge_val.setter
|
||||
def vMerge_val(self, value: str | None):
|
||||
self._remove_vMerge()
|
||||
if value is not None:
|
||||
self._add_vMerge().val = value
|
||||
|
||||
@property
|
||||
def width(self) -> Length | None:
|
||||
"""EMU length in `./w:tcW` or |None| if not present or its type is not 'dxa'."""
|
||||
tcW = self.tcW
|
||||
if tcW is None:
|
||||
return None
|
||||
return tcW.width
|
||||
|
||||
@width.setter
|
||||
def width(self, value: Length):
|
||||
tcW = self.get_or_add_tcW()
|
||||
tcW.width = value
|
||||
|
||||
|
||||
class CT_TrPr(BaseOxmlElement):
|
||||
"""``<w:trPr>`` element, defining table row properties."""
|
||||
|
||||
get_or_add_trHeight: Callable[[], CT_Height]
|
||||
|
||||
_tag_seq = (
|
||||
"w:cnfStyle",
|
||||
"w:divId",
|
||||
"w:gridBefore",
|
||||
"w:gridAfter",
|
||||
"w:wBefore",
|
||||
"w:wAfter",
|
||||
"w:cantSplit",
|
||||
"w:trHeight",
|
||||
"w:tblHeader",
|
||||
"w:tblCellSpacing",
|
||||
"w:jc",
|
||||
"w:hidden",
|
||||
"w:ins",
|
||||
"w:del",
|
||||
"w:trPrChange",
|
||||
)
|
||||
gridAfter: CT_DecimalNumber | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:gridAfter", successors=_tag_seq[4:]
|
||||
)
|
||||
gridBefore: CT_DecimalNumber | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:gridBefore", successors=_tag_seq[3:]
|
||||
)
|
||||
trHeight: CT_Height | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:trHeight", successors=_tag_seq[8:]
|
||||
)
|
||||
del _tag_seq
|
||||
|
||||
@property
|
||||
def grid_after(self) -> int:
|
||||
"""The number of unpopulated layout-grid cells at the end of this row."""
|
||||
gridAfter = self.gridAfter
|
||||
return 0 if gridAfter is None else gridAfter.val
|
||||
|
||||
@property
|
||||
def grid_before(self) -> int:
|
||||
"""The number of unpopulated layout-grid cells at the start of this row."""
|
||||
gridBefore = self.gridBefore
|
||||
return 0 if gridBefore is None else gridBefore.val
|
||||
|
||||
@property
|
||||
def trHeight_hRule(self) -> WD_ROW_HEIGHT_RULE | None:
|
||||
"""Return the value of `w:trHeight@w:hRule`, or |None| if not present."""
|
||||
trHeight = self.trHeight
|
||||
return None if trHeight is None else trHeight.hRule
|
||||
|
||||
@trHeight_hRule.setter
|
||||
def trHeight_hRule(self, value: WD_ROW_HEIGHT_RULE | None):
|
||||
if value is None and self.trHeight is None:
|
||||
return
|
||||
trHeight = self.get_or_add_trHeight()
|
||||
trHeight.hRule = value
|
||||
|
||||
@property
|
||||
def trHeight_val(self):
|
||||
"""Return the value of `w:trHeight@w:val`, or |None| if not present."""
|
||||
trHeight = self.trHeight
|
||||
return None if trHeight is None else trHeight.val
|
||||
|
||||
@trHeight_val.setter
|
||||
def trHeight_val(self, value: Length | None):
|
||||
if value is None and self.trHeight is None:
|
||||
return
|
||||
trHeight = self.get_or_add_trHeight()
|
||||
trHeight.val = value
|
||||
|
||||
|
||||
class CT_VerticalJc(BaseOxmlElement):
|
||||
"""`w:vAlign` element, specifying vertical alignment of cell."""
|
||||
|
||||
val: WD_CELL_VERTICAL_ALIGNMENT = RequiredAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:val", WD_CELL_VERTICAL_ALIGNMENT
|
||||
)
|
||||
|
||||
|
||||
class CT_VMerge(BaseOxmlElement):
|
||||
"""``<w:vMerge>`` element, specifying vertical merging behavior of a cell."""
|
||||
|
||||
val: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:val", ST_Merge, default=ST_Merge.CONTINUE
|
||||
)
|
||||
@@ -0,0 +1,331 @@
|
||||
# pyright: reportAssignmentType=false
|
||||
|
||||
"""Custom element classes related to run properties (font)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
from docx.enum.dml import MSO_THEME_COLOR
|
||||
from docx.enum.text import WD_COLOR_INDEX, WD_UNDERLINE
|
||||
from docx.oxml.ns import nsdecls
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.oxml.simpletypes import (
|
||||
ST_HexColor,
|
||||
ST_HpsMeasure,
|
||||
ST_String,
|
||||
ST_VerticalAlignRun,
|
||||
)
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OptionalAttribute,
|
||||
RequiredAttribute,
|
||||
ZeroOrOne,
|
||||
)
|
||||
from docx.shared import RGBColor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.shared import CT_OnOff, CT_String
|
||||
from docx.shared import Length
|
||||
|
||||
|
||||
class CT_Color(BaseOxmlElement):
|
||||
"""`w:color` element, specifying the color of a font and perhaps other objects."""
|
||||
|
||||
val: RGBColor | str = RequiredAttribute("w:val", ST_HexColor)
|
||||
themeColor: MSO_THEME_COLOR | None = OptionalAttribute("w:themeColor", MSO_THEME_COLOR)
|
||||
|
||||
|
||||
class CT_Fonts(BaseOxmlElement):
|
||||
"""`<w:rFonts>` element.
|
||||
|
||||
Specifies typeface name for the various language types.
|
||||
"""
|
||||
|
||||
ascii: str | None = OptionalAttribute("w:ascii", ST_String)
|
||||
hAnsi: str | None = OptionalAttribute("w:hAnsi", ST_String)
|
||||
|
||||
|
||||
class CT_Highlight(BaseOxmlElement):
|
||||
"""`w:highlight` element, specifying font highlighting/background color."""
|
||||
|
||||
val: WD_COLOR_INDEX = RequiredAttribute("w:val", WD_COLOR_INDEX)
|
||||
|
||||
|
||||
class CT_HpsMeasure(BaseOxmlElement):
|
||||
"""Used for `<w:sz>` element and others, specifying font size in half-points."""
|
||||
|
||||
val: Length = RequiredAttribute("w:val", ST_HpsMeasure)
|
||||
|
||||
|
||||
class CT_RPr(BaseOxmlElement):
|
||||
"""`<w:rPr>` element, containing the properties for a run."""
|
||||
|
||||
get_or_add_color: Callable[[], CT_Color]
|
||||
get_or_add_highlight: Callable[[], CT_Highlight]
|
||||
get_or_add_rFonts: Callable[[], CT_Fonts]
|
||||
get_or_add_sz: Callable[[], CT_HpsMeasure]
|
||||
get_or_add_vertAlign: Callable[[], CT_VerticalAlignRun]
|
||||
_add_rStyle: Callable[..., CT_String]
|
||||
_add_u: Callable[[], CT_Underline]
|
||||
_remove_color: Callable[[], None]
|
||||
_remove_highlight: Callable[[], None]
|
||||
_remove_rFonts: Callable[[], None]
|
||||
_remove_rStyle: Callable[[], None]
|
||||
_remove_sz: Callable[[], None]
|
||||
_remove_u: Callable[[], None]
|
||||
_remove_vertAlign: Callable[[], None]
|
||||
|
||||
_tag_seq = (
|
||||
"w:rStyle",
|
||||
"w:rFonts",
|
||||
"w:b",
|
||||
"w:bCs",
|
||||
"w:i",
|
||||
"w:iCs",
|
||||
"w:caps",
|
||||
"w:smallCaps",
|
||||
"w:strike",
|
||||
"w:dstrike",
|
||||
"w:outline",
|
||||
"w:shadow",
|
||||
"w:emboss",
|
||||
"w:imprint",
|
||||
"w:noProof",
|
||||
"w:snapToGrid",
|
||||
"w:vanish",
|
||||
"w:webHidden",
|
||||
"w:color",
|
||||
"w:spacing",
|
||||
"w:w",
|
||||
"w:kern",
|
||||
"w:position",
|
||||
"w:sz",
|
||||
"w:szCs",
|
||||
"w:highlight",
|
||||
"w:u",
|
||||
"w:effect",
|
||||
"w:bdr",
|
||||
"w:shd",
|
||||
"w:fitText",
|
||||
"w:vertAlign",
|
||||
"w:rtl",
|
||||
"w:cs",
|
||||
"w:em",
|
||||
"w:lang",
|
||||
"w:eastAsianLayout",
|
||||
"w:specVanish",
|
||||
"w:oMath",
|
||||
)
|
||||
rStyle: CT_String | None = ZeroOrOne("w:rStyle", successors=_tag_seq[1:])
|
||||
rFonts: CT_Fonts | None = ZeroOrOne("w:rFonts", successors=_tag_seq[2:])
|
||||
b: CT_OnOff | None = ZeroOrOne("w:b", successors=_tag_seq[3:])
|
||||
bCs = ZeroOrOne("w:bCs", successors=_tag_seq[4:])
|
||||
i = ZeroOrOne("w:i", successors=_tag_seq[5:])
|
||||
iCs = ZeroOrOne("w:iCs", successors=_tag_seq[6:])
|
||||
caps = ZeroOrOne("w:caps", successors=_tag_seq[7:])
|
||||
smallCaps = ZeroOrOne("w:smallCaps", successors=_tag_seq[8:])
|
||||
strike = ZeroOrOne("w:strike", successors=_tag_seq[9:])
|
||||
dstrike = ZeroOrOne("w:dstrike", successors=_tag_seq[10:])
|
||||
outline = ZeroOrOne("w:outline", successors=_tag_seq[11:])
|
||||
shadow = ZeroOrOne("w:shadow", successors=_tag_seq[12:])
|
||||
emboss = ZeroOrOne("w:emboss", successors=_tag_seq[13:])
|
||||
imprint = ZeroOrOne("w:imprint", successors=_tag_seq[14:])
|
||||
noProof = ZeroOrOne("w:noProof", successors=_tag_seq[15:])
|
||||
snapToGrid = ZeroOrOne("w:snapToGrid", successors=_tag_seq[16:])
|
||||
vanish = ZeroOrOne("w:vanish", successors=_tag_seq[17:])
|
||||
webHidden = ZeroOrOne("w:webHidden", successors=_tag_seq[18:])
|
||||
color: CT_Color | None = ZeroOrOne("w:color", successors=_tag_seq[19:])
|
||||
sz: CT_HpsMeasure | None = ZeroOrOne("w:sz", successors=_tag_seq[24:])
|
||||
highlight: CT_Highlight | None = ZeroOrOne("w:highlight", successors=_tag_seq[26:])
|
||||
u: CT_Underline | None = ZeroOrOne("w:u", successors=_tag_seq[27:])
|
||||
vertAlign: CT_VerticalAlignRun | None = ZeroOrOne("w:vertAlign", successors=_tag_seq[32:])
|
||||
rtl = ZeroOrOne("w:rtl", successors=_tag_seq[33:])
|
||||
cs = ZeroOrOne("w:cs", successors=_tag_seq[34:])
|
||||
specVanish = ZeroOrOne("w:specVanish", successors=_tag_seq[38:])
|
||||
oMath = ZeroOrOne("w:oMath", successors=_tag_seq[39:])
|
||||
del _tag_seq
|
||||
|
||||
def _new_color(self):
|
||||
"""Override metaclass method to set `w:color/@val` to RGB black on create."""
|
||||
return parse_xml('<w:color %s w:val="000000"/>' % nsdecls("w"))
|
||||
|
||||
@property
|
||||
def highlight_val(self) -> WD_COLOR_INDEX | None:
|
||||
"""Value of `./w:highlight/@val`.
|
||||
|
||||
Specifies font's highlight color, or `None` if the text is not highlighted.
|
||||
"""
|
||||
highlight = self.highlight
|
||||
if highlight is None:
|
||||
return None
|
||||
return highlight.val
|
||||
|
||||
@highlight_val.setter
|
||||
def highlight_val(self, value: WD_COLOR_INDEX | None) -> None:
|
||||
if value is None:
|
||||
self._remove_highlight()
|
||||
return
|
||||
highlight = self.get_or_add_highlight()
|
||||
highlight.val = value
|
||||
|
||||
@property
|
||||
def rFonts_ascii(self) -> str | None:
|
||||
"""The value of `w:rFonts/@w:ascii` or |None| if not present.
|
||||
|
||||
Represents the assigned typeface name. The rFonts element also specifies other
|
||||
special-case typeface names; this method handles the case where just the common
|
||||
name is required.
|
||||
"""
|
||||
rFonts = self.rFonts
|
||||
if rFonts is None:
|
||||
return None
|
||||
return rFonts.ascii
|
||||
|
||||
@rFonts_ascii.setter
|
||||
def rFonts_ascii(self, value: str | None) -> None:
|
||||
if value is None:
|
||||
self._remove_rFonts()
|
||||
return
|
||||
rFonts = self.get_or_add_rFonts()
|
||||
rFonts.ascii = value
|
||||
|
||||
@property
|
||||
def rFonts_hAnsi(self) -> str | None:
|
||||
"""The value of `w:rFonts/@w:hAnsi` or |None| if not present."""
|
||||
rFonts = self.rFonts
|
||||
if rFonts is None:
|
||||
return None
|
||||
return rFonts.hAnsi
|
||||
|
||||
@rFonts_hAnsi.setter
|
||||
def rFonts_hAnsi(self, value: str | None):
|
||||
if value is None and self.rFonts is None:
|
||||
return
|
||||
rFonts = self.get_or_add_rFonts()
|
||||
rFonts.hAnsi = value
|
||||
|
||||
@property
|
||||
def style(self) -> str | None:
|
||||
"""String in `./w:rStyle/@val`, or None if `w:rStyle` is not present."""
|
||||
rStyle = self.rStyle
|
||||
if rStyle is None:
|
||||
return None
|
||||
return rStyle.val
|
||||
|
||||
@style.setter
|
||||
def style(self, style: str | None) -> None:
|
||||
"""Set `./w:rStyle/@val` to `style`, adding the `w:rStyle` element if necessary.
|
||||
|
||||
If `style` is |None|, remove `w:rStyle` element if present.
|
||||
"""
|
||||
if style is None:
|
||||
self._remove_rStyle()
|
||||
elif self.rStyle is None:
|
||||
self._add_rStyle(val=style)
|
||||
else:
|
||||
self.rStyle.val = style
|
||||
|
||||
@property
|
||||
def subscript(self) -> bool | None:
|
||||
"""|True| if `./w:vertAlign/@w:val` is "subscript".
|
||||
|
||||
|False| if `w:vertAlign/@w:val` contains any other value. |None| if
|
||||
`w:vertAlign` is not present.
|
||||
"""
|
||||
vertAlign = self.vertAlign
|
||||
if vertAlign is None:
|
||||
return None
|
||||
return vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT
|
||||
|
||||
@subscript.setter
|
||||
def subscript(self, value: bool | None) -> None:
|
||||
if value is None:
|
||||
self._remove_vertAlign()
|
||||
elif bool(value) is True:
|
||||
self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUBSCRIPT
|
||||
# -- assert bool(value) is False --
|
||||
elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT:
|
||||
self._remove_vertAlign()
|
||||
|
||||
@property
|
||||
def superscript(self) -> bool | None:
|
||||
"""|True| if `w:vertAlign/@w:val` is 'superscript'.
|
||||
|
||||
|False| if `w:vertAlign/@w:val` contains any other value. |None| if
|
||||
`w:vertAlign` is not present.
|
||||
"""
|
||||
vertAlign = self.vertAlign
|
||||
if vertAlign is None:
|
||||
return None
|
||||
return vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT
|
||||
|
||||
@superscript.setter
|
||||
def superscript(self, value: bool | None):
|
||||
if value is None:
|
||||
self._remove_vertAlign()
|
||||
elif bool(value) is True:
|
||||
self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUPERSCRIPT
|
||||
# -- assert bool(value) is False --
|
||||
elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT:
|
||||
self._remove_vertAlign()
|
||||
|
||||
@property
|
||||
def sz_val(self) -> Length | None:
|
||||
"""The value of `w:sz/@w:val` or |None| if not present."""
|
||||
sz = self.sz
|
||||
if sz is None:
|
||||
return None
|
||||
return sz.val
|
||||
|
||||
@sz_val.setter
|
||||
def sz_val(self, value: Length | None):
|
||||
if value is None:
|
||||
self._remove_sz()
|
||||
return
|
||||
sz = self.get_or_add_sz()
|
||||
sz.val = value
|
||||
|
||||
@property
|
||||
def u_val(self) -> WD_UNDERLINE | None:
|
||||
"""Value of `w:u/@val`, or None if not present.
|
||||
|
||||
Values `WD_UNDERLINE.SINGLE` and `WD_UNDERLINE.NONE` are mapped to `True` and
|
||||
`False` respectively.
|
||||
"""
|
||||
u = self.u
|
||||
if u is None:
|
||||
return None
|
||||
return u.val
|
||||
|
||||
@u_val.setter
|
||||
def u_val(self, value: WD_UNDERLINE | None):
|
||||
self._remove_u()
|
||||
if value is not None:
|
||||
self._add_u().val = value
|
||||
|
||||
def _get_bool_val(self, name: str) -> bool | None:
|
||||
"""Value of boolean child with `name`, e.g. "w:b", "w:i", and "w:smallCaps"."""
|
||||
element = getattr(self, name)
|
||||
if element is None:
|
||||
return None
|
||||
return element.val
|
||||
|
||||
def _set_bool_val(self, name: str, value: bool | None):
|
||||
if value is None:
|
||||
getattr(self, "_remove_%s" % name)()
|
||||
return
|
||||
element = getattr(self, "get_or_add_%s" % name)()
|
||||
element.val = value
|
||||
|
||||
|
||||
class CT_Underline(BaseOxmlElement):
|
||||
"""`<w:u>` element, specifying the underlining style for a run."""
|
||||
|
||||
val: WD_UNDERLINE | None = OptionalAttribute("w:val", WD_UNDERLINE)
|
||||
|
||||
|
||||
class CT_VerticalAlignRun(BaseOxmlElement):
|
||||
"""`<w:vertAlign>` element, specifying subscript or superscript."""
|
||||
|
||||
val: str = RequiredAttribute("w:val", ST_VerticalAlignRun)
|
||||
@@ -0,0 +1,45 @@
|
||||
"""Custom element classes related to hyperlinks (CT_Hyperlink)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, List
|
||||
|
||||
from docx.oxml.simpletypes import ST_OnOff, ST_String, XsdString
|
||||
from docx.oxml.text.run import CT_R
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OptionalAttribute,
|
||||
ZeroOrMore,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
|
||||
|
||||
class CT_Hyperlink(BaseOxmlElement):
|
||||
"""`<w:hyperlink>` element, containing the text and address for a hyperlink."""
|
||||
|
||||
r_lst: List[CT_R]
|
||||
|
||||
rId: str | None = OptionalAttribute("r:id", XsdString) # pyright: ignore[reportAssignmentType]
|
||||
anchor: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:anchor", ST_String
|
||||
)
|
||||
history: bool = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:history", ST_OnOff, default=True
|
||||
)
|
||||
|
||||
r = ZeroOrMore("w:r")
|
||||
|
||||
@property
|
||||
def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
|
||||
"""All `w:lastRenderedPageBreak` descendants of this hyperlink."""
|
||||
return self.xpath("./w:r/w:lastRenderedPageBreak")
|
||||
|
||||
@property
|
||||
def text(self) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""The textual content of this hyperlink.
|
||||
|
||||
`CT_Hyperlink` stores the hyperlink-text as one or more `w:r` children.
|
||||
"""
|
||||
return "".join(r.text for r in self.xpath("w:r"))
|
||||
@@ -0,0 +1,278 @@
|
||||
"""Custom element class for rendered page-break (CT_LastRenderedPageBreak)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement
|
||||
from docx.shared import lazyproperty
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.text.hyperlink import CT_Hyperlink
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
|
||||
|
||||
class CT_LastRenderedPageBreak(BaseOxmlElement):
|
||||
"""`<w:lastRenderedPageBreak>` element, indicating page break inserted by renderer.
|
||||
|
||||
A rendered page-break is one inserted by the renderer when it runs out of room on a
|
||||
page. It is an empty element (no attrs or children) and is a child of CT_R, peer to
|
||||
CT_Text.
|
||||
|
||||
NOTE: this complex-type name does not exist in the schema, where
|
||||
`w:lastRenderedPageBreak` maps to `CT_Empty`. This name was added to give it
|
||||
distinguished behavior. CT_Empty is used for many elements.
|
||||
"""
|
||||
|
||||
@property
|
||||
def following_fragment_p(self) -> CT_P:
|
||||
"""A "loose" `CT_P` containing only the paragraph content before this break.
|
||||
|
||||
Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
|
||||
page-break in its paragraph.
|
||||
|
||||
The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
|
||||
page-break with this `w:lastRenderedPageBreak` element and all content preceding
|
||||
it removed.
|
||||
|
||||
NOTE: this `w:p` can itself contain one or more `w:renderedPageBreak` elements
|
||||
(when the paragraph contained more than one). While this is rare, the caller
|
||||
should treat this paragraph the same as other paragraphs and split it if
|
||||
necessary in a folloing step or recursion.
|
||||
"""
|
||||
if not self == self._first_lrpb_in_p(self._enclosing_p):
|
||||
raise ValueError("only defined on first rendered page-break in paragraph")
|
||||
|
||||
# -- splitting approach is different when break is inside a hyperlink --
|
||||
return (
|
||||
self._following_frag_in_hlink if self._is_in_hyperlink else self._following_frag_in_run
|
||||
)
|
||||
|
||||
@property
|
||||
def follows_all_content(self) -> bool:
|
||||
"""True when this page-break element is the last "content" in the paragraph.
|
||||
|
||||
This is very uncommon case and may only occur in contrived or cases where the
|
||||
XML is edited by hand, but it is not precluded by the spec.
|
||||
"""
|
||||
# -- a page-break inside a hyperlink never meets these criteria (for our
|
||||
# -- purposes at least) because it is considered "atomic" and always associated
|
||||
# -- with the page it starts on.
|
||||
if self._is_in_hyperlink:
|
||||
return False
|
||||
|
||||
return bool(
|
||||
# -- XPath will match zero-or-one w:lastRenderedPageBreak element --
|
||||
self._enclosing_p.xpath(
|
||||
# -- in first run of paragraph --
|
||||
f"(./w:r)[last()]"
|
||||
# -- all page-breaks --
|
||||
f"/w:lastRenderedPageBreak"
|
||||
# -- that are not preceded by any content-bearing elements --
|
||||
f"[not(following-sibling::*[{self._run_inner_content_xpath}])]"
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def precedes_all_content(self) -> bool:
|
||||
"""True when a `w:lastRenderedPageBreak` precedes all paragraph content.
|
||||
|
||||
This is a common case; it occurs whenever the page breaks on an even paragraph
|
||||
boundary.
|
||||
"""
|
||||
# -- a page-break inside a hyperlink never meets these criteria because there
|
||||
# -- is always part of the hyperlink text before the page-break.
|
||||
if self._is_in_hyperlink:
|
||||
return False
|
||||
|
||||
return bool(
|
||||
# -- XPath will match zero-or-one w:lastRenderedPageBreak element --
|
||||
self._enclosing_p.xpath(
|
||||
# -- in first run of paragraph --
|
||||
f"./w:r[1]"
|
||||
# -- all page-breaks --
|
||||
f"/w:lastRenderedPageBreak"
|
||||
# -- that are not preceded by any content-bearing elements --
|
||||
f"[not(preceding-sibling::*[{self._run_inner_content_xpath}])]"
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def preceding_fragment_p(self) -> CT_P:
|
||||
"""A "loose" `CT_P` containing only the paragraph content before this break.
|
||||
|
||||
Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
|
||||
paragraph in its paragraph.
|
||||
|
||||
The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
|
||||
page-break with this `w:lastRenderedPageBreak` element and all its following
|
||||
siblings removed.
|
||||
"""
|
||||
if not self == self._first_lrpb_in_p(self._enclosing_p):
|
||||
raise ValueError("only defined on first rendered page-break in paragraph")
|
||||
|
||||
# -- splitting approach is different when break is inside a hyperlink --
|
||||
return (
|
||||
self._preceding_frag_in_hlink if self._is_in_hyperlink else self._preceding_frag_in_run
|
||||
)
|
||||
|
||||
def _enclosing_hyperlink(self, lrpb: CT_LastRenderedPageBreak) -> CT_Hyperlink:
|
||||
"""The `w:hyperlink` grandparent of this `w:lastRenderedPageBreak`.
|
||||
|
||||
Raises `IndexError` when this page-break has a `w:p` grandparent, so only call
|
||||
when `._is_in_hyperlink` is True.
|
||||
"""
|
||||
return lrpb.xpath("./parent::w:r/parent::w:hyperlink")[0]
|
||||
|
||||
@property
|
||||
def _enclosing_p(self) -> CT_P:
|
||||
"""The `w:p` element parent or grandparent of this `w:lastRenderedPageBreak`."""
|
||||
return self.xpath("./ancestor::w:p[1]")[0]
|
||||
|
||||
def _first_lrpb_in_p(self, p: CT_P) -> CT_LastRenderedPageBreak:
|
||||
"""The first `w:lastRenderedPageBreak` element in `p`.
|
||||
|
||||
Raises `ValueError` if there are no rendered page-breaks in `p`.
|
||||
"""
|
||||
lrpbs = p.xpath("./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak")
|
||||
if not lrpbs:
|
||||
raise ValueError("no rendered page-breaks in paragraph element")
|
||||
return lrpbs[0]
|
||||
|
||||
@lazyproperty
|
||||
def _following_frag_in_hlink(self) -> CT_P:
|
||||
"""Following CT_P fragment when break occurs within a hyperlink.
|
||||
|
||||
Note this is a *partial-function* and raises when `lrpb` is not inside a
|
||||
hyperlink.
|
||||
"""
|
||||
if not self._is_in_hyperlink:
|
||||
raise ValueError("only defined on a rendered page-break in a hyperlink")
|
||||
|
||||
# -- work on a clone `w:p` so our mutations don't persist --
|
||||
p = copy.deepcopy(self._enclosing_p)
|
||||
|
||||
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
|
||||
lrpb = self._first_lrpb_in_p(p)
|
||||
|
||||
# -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
|
||||
hyperlink = lrpb._enclosing_hyperlink(lrpb)
|
||||
|
||||
# -- delete all w:p inner-content preceding the hyperlink --
|
||||
for e in hyperlink.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
|
||||
p.remove(e)
|
||||
|
||||
# -- remove the whole hyperlink, it belongs to the preceding-fragment-p --
|
||||
hyperlink.getparent().remove(hyperlink)
|
||||
|
||||
# -- that's it, return the remaining fragment of `w:p` clone --
|
||||
return p
|
||||
|
||||
@lazyproperty
|
||||
def _following_frag_in_run(self) -> CT_P:
|
||||
"""following CT_P fragment when break does not occur in a hyperlink.
|
||||
|
||||
Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
|
||||
"""
|
||||
if self._is_in_hyperlink:
|
||||
raise ValueError("only defined on a rendered page-break not in a hyperlink")
|
||||
|
||||
# -- work on a clone `w:p` so our mutations don't persist --
|
||||
p = copy.deepcopy(self._enclosing_p)
|
||||
|
||||
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
|
||||
lrpb = self._first_lrpb_in_p(p)
|
||||
|
||||
# -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
|
||||
enclosing_r = lrpb.xpath("./parent::w:r")[0]
|
||||
|
||||
# -- delete all w:p inner-content preceding that run (but not w:pPr) --
|
||||
for e in enclosing_r.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
|
||||
p.remove(e)
|
||||
|
||||
# -- then remove all run inner-content preceding this lrpb in its run (but not
|
||||
# -- the `w:rPr`) and also remove the page-break itself
|
||||
for e in lrpb.xpath("./preceding-sibling::*[not(self::w:rPr)]"):
|
||||
enclosing_r.remove(e)
|
||||
enclosing_r.remove(lrpb)
|
||||
|
||||
return p
|
||||
|
||||
@lazyproperty
|
||||
def _is_in_hyperlink(self) -> bool:
|
||||
"""True when this page-break is embedded in a hyperlink run."""
|
||||
return bool(self.xpath("./parent::w:r/parent::w:hyperlink"))
|
||||
|
||||
@lazyproperty
|
||||
def _preceding_frag_in_hlink(self) -> CT_P:
|
||||
"""Preceding CT_P fragment when break occurs within a hyperlink.
|
||||
|
||||
Note this is a *partial-function* and raises when `lrpb` is not inside a
|
||||
hyperlink.
|
||||
"""
|
||||
if not self._is_in_hyperlink:
|
||||
raise ValueError("only defined on a rendered page-break in a hyperlink")
|
||||
|
||||
# -- work on a clone `w:p` so our mutations don't persist --
|
||||
p = copy.deepcopy(self._enclosing_p)
|
||||
|
||||
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
|
||||
lrpb = self._first_lrpb_in_p(p)
|
||||
|
||||
# -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
|
||||
hyperlink = lrpb._enclosing_hyperlink(lrpb)
|
||||
|
||||
# -- delete all w:p inner-content following the hyperlink --
|
||||
for e in hyperlink.xpath("./following-sibling::*"):
|
||||
p.remove(e)
|
||||
|
||||
# -- remove this page-break from inside the hyperlink --
|
||||
lrpb.getparent().remove(lrpb)
|
||||
|
||||
# -- that's it, the entire hyperlink goes into the preceding fragment so
|
||||
# -- the hyperlink is not "split".
|
||||
return p
|
||||
|
||||
@lazyproperty
|
||||
def _preceding_frag_in_run(self) -> CT_P:
|
||||
"""Preceding CT_P fragment when break does not occur in a hyperlink.
|
||||
|
||||
Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
|
||||
"""
|
||||
if self._is_in_hyperlink:
|
||||
raise ValueError("only defined on a rendered page-break not in a hyperlink")
|
||||
|
||||
# -- work on a clone `w:p` so our mutations don't persist --
|
||||
p = copy.deepcopy(self._enclosing_p)
|
||||
|
||||
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
|
||||
lrpb = self._first_lrpb_in_p(p)
|
||||
|
||||
# -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
|
||||
enclosing_r = lrpb.xpath("./parent::w:r")[0]
|
||||
|
||||
# -- delete all `w:p` inner-content following that run --
|
||||
for e in enclosing_r.xpath("./following-sibling::*"):
|
||||
p.remove(e)
|
||||
|
||||
# -- then delete all `w:r` inner-content following this lrpb in its run and
|
||||
# -- also remove the page-break itself
|
||||
for e in lrpb.xpath("./following-sibling::*"):
|
||||
enclosing_r.remove(e)
|
||||
enclosing_r.remove(lrpb)
|
||||
|
||||
return p
|
||||
|
||||
@lazyproperty
|
||||
def _run_inner_content_xpath(self) -> str:
|
||||
"""XPath fragment matching any run inner-content elements."""
|
||||
return (
|
||||
"self::w:br"
|
||||
" | self::w:cr"
|
||||
" | self::w:drawing"
|
||||
" | self::w:noBreakHyphen"
|
||||
" | self::w:ptab"
|
||||
" | self::w:t"
|
||||
" | self::w:tab"
|
||||
)
|
||||
@@ -0,0 +1,106 @@
|
||||
# pyright: reportPrivateUsage=false
|
||||
|
||||
"""Custom element classes related to paragraphs (CT_P)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, List, cast
|
||||
|
||||
from docx.oxml.parser import OxmlElement
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrMore, ZeroOrOne
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
from docx.oxml.section import CT_SectPr
|
||||
from docx.oxml.text.hyperlink import CT_Hyperlink
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
from docx.oxml.text.parfmt import CT_PPr
|
||||
from docx.oxml.text.run import CT_R
|
||||
|
||||
|
||||
class CT_P(BaseOxmlElement):
|
||||
"""`<w:p>` element, containing the properties and text for a paragraph."""
|
||||
|
||||
add_r: Callable[[], CT_R]
|
||||
get_or_add_pPr: Callable[[], CT_PPr]
|
||||
hyperlink_lst: List[CT_Hyperlink]
|
||||
r_lst: List[CT_R]
|
||||
|
||||
pPr: CT_PPr | None = ZeroOrOne("w:pPr") # pyright: ignore[reportAssignmentType]
|
||||
hyperlink = ZeroOrMore("w:hyperlink")
|
||||
r = ZeroOrMore("w:r")
|
||||
|
||||
def add_p_before(self) -> CT_P:
|
||||
"""Return a new `<w:p>` element inserted directly prior to this one."""
|
||||
new_p = cast(CT_P, OxmlElement("w:p"))
|
||||
self.addprevious(new_p)
|
||||
return new_p
|
||||
|
||||
@property
|
||||
def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None:
|
||||
"""The value of the `<w:jc>` grandchild element or |None| if not present."""
|
||||
pPr = self.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.jc_val
|
||||
|
||||
@alignment.setter
|
||||
def alignment(self, value: WD_PARAGRAPH_ALIGNMENT):
|
||||
pPr = self.get_or_add_pPr()
|
||||
pPr.jc_val = value
|
||||
|
||||
def clear_content(self):
|
||||
"""Remove all child elements, except the `<w:pPr>` element if present."""
|
||||
for child in self.xpath("./*[not(self::w:pPr)]"):
|
||||
self.remove(child)
|
||||
|
||||
@property
|
||||
def inner_content_elements(self) -> List[CT_R | CT_Hyperlink]:
|
||||
"""Run and hyperlink children of the `w:p` element, in document order."""
|
||||
return self.xpath("./w:r | ./w:hyperlink")
|
||||
|
||||
@property
|
||||
def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
|
||||
"""All `w:lastRenderedPageBreak` descendants of this paragraph.
|
||||
|
||||
Rendered page-breaks commonly occur in a run but can also occur in a run inside
|
||||
a hyperlink. This returns both.
|
||||
"""
|
||||
return self.xpath(
|
||||
"./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak"
|
||||
)
|
||||
|
||||
def set_sectPr(self, sectPr: CT_SectPr):
|
||||
"""Unconditionally replace or add `sectPr` as grandchild in correct sequence."""
|
||||
pPr = self.get_or_add_pPr()
|
||||
pPr._remove_sectPr()
|
||||
pPr._insert_sectPr(sectPr)
|
||||
|
||||
@property
|
||||
def style(self) -> str | None:
|
||||
"""String contained in `w:val` attribute of `./w:pPr/w:pStyle` grandchild.
|
||||
|
||||
|None| if not present.
|
||||
"""
|
||||
pPr = self.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.style
|
||||
|
||||
@style.setter
|
||||
def style(self, style: str | None):
|
||||
pPr = self.get_or_add_pPr()
|
||||
pPr.style = style
|
||||
|
||||
@property
|
||||
def text(self): # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""The textual content of this paragraph.
|
||||
|
||||
Inner-content child elements like `w:r` and `w:hyperlink` are translated to
|
||||
their text equivalent.
|
||||
"""
|
||||
return "".join(e.text for e in self.xpath("w:r | w:hyperlink"))
|
||||
|
||||
def _insert_pPr(self, pPr: CT_PPr) -> CT_PPr:
|
||||
self.insert(0, pPr)
|
||||
return pPr
|
||||
@@ -0,0 +1,392 @@
|
||||
"""Custom element classes related to paragraph properties (CT_PPr)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
from docx.enum.text import (
|
||||
WD_ALIGN_PARAGRAPH,
|
||||
WD_LINE_SPACING,
|
||||
WD_TAB_ALIGNMENT,
|
||||
WD_TAB_LEADER,
|
||||
)
|
||||
from docx.oxml.shared import CT_DecimalNumber
|
||||
from docx.oxml.simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OneOrMore,
|
||||
OptionalAttribute,
|
||||
RequiredAttribute,
|
||||
ZeroOrOne,
|
||||
)
|
||||
from docx.shared import Length
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.section import CT_SectPr
|
||||
from docx.oxml.shared import CT_String
|
||||
|
||||
|
||||
class CT_Ind(BaseOxmlElement):
|
||||
"""``<w:ind>`` element, specifying paragraph indentation."""
|
||||
|
||||
left: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:left", ST_SignedTwipsMeasure
|
||||
)
|
||||
right: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:right", ST_SignedTwipsMeasure
|
||||
)
|
||||
firstLine: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:firstLine", ST_TwipsMeasure
|
||||
)
|
||||
hanging: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:hanging", ST_TwipsMeasure
|
||||
)
|
||||
|
||||
|
||||
class CT_Jc(BaseOxmlElement):
|
||||
"""``<w:jc>`` element, specifying paragraph justification."""
|
||||
|
||||
val: WD_ALIGN_PARAGRAPH = RequiredAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:val", WD_ALIGN_PARAGRAPH
|
||||
)
|
||||
|
||||
|
||||
class CT_PPr(BaseOxmlElement):
|
||||
"""``<w:pPr>`` element, containing the properties for a paragraph."""
|
||||
|
||||
get_or_add_ind: Callable[[], CT_Ind]
|
||||
get_or_add_pStyle: Callable[[], CT_String]
|
||||
get_or_add_sectPr: Callable[[], CT_SectPr]
|
||||
_insert_sectPr: Callable[[CT_SectPr], None]
|
||||
_remove_pStyle: Callable[[], None]
|
||||
_remove_sectPr: Callable[[], None]
|
||||
|
||||
_tag_seq = (
|
||||
"w:pStyle",
|
||||
"w:keepNext",
|
||||
"w:keepLines",
|
||||
"w:pageBreakBefore",
|
||||
"w:framePr",
|
||||
"w:widowControl",
|
||||
"w:numPr",
|
||||
"w:suppressLineNumbers",
|
||||
"w:pBdr",
|
||||
"w:shd",
|
||||
"w:tabs",
|
||||
"w:suppressAutoHyphens",
|
||||
"w:kinsoku",
|
||||
"w:wordWrap",
|
||||
"w:overflowPunct",
|
||||
"w:topLinePunct",
|
||||
"w:autoSpaceDE",
|
||||
"w:autoSpaceDN",
|
||||
"w:bidi",
|
||||
"w:adjustRightInd",
|
||||
"w:snapToGrid",
|
||||
"w:spacing",
|
||||
"w:ind",
|
||||
"w:contextualSpacing",
|
||||
"w:mirrorIndents",
|
||||
"w:suppressOverlap",
|
||||
"w:jc",
|
||||
"w:textDirection",
|
||||
"w:textAlignment",
|
||||
"w:textboxTightWrap",
|
||||
"w:outlineLvl",
|
||||
"w:divId",
|
||||
"w:cnfStyle",
|
||||
"w:rPr",
|
||||
"w:sectPr",
|
||||
"w:pPrChange",
|
||||
)
|
||||
pStyle: CT_String | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:pStyle", successors=_tag_seq[1:]
|
||||
)
|
||||
keepNext = ZeroOrOne("w:keepNext", successors=_tag_seq[2:])
|
||||
keepLines = ZeroOrOne("w:keepLines", successors=_tag_seq[3:])
|
||||
pageBreakBefore = ZeroOrOne("w:pageBreakBefore", successors=_tag_seq[4:])
|
||||
widowControl = ZeroOrOne("w:widowControl", successors=_tag_seq[6:])
|
||||
numPr = ZeroOrOne("w:numPr", successors=_tag_seq[7:])
|
||||
tabs = ZeroOrOne("w:tabs", successors=_tag_seq[11:])
|
||||
spacing = ZeroOrOne("w:spacing", successors=_tag_seq[22:])
|
||||
ind: CT_Ind | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:ind", successors=_tag_seq[23:]
|
||||
)
|
||||
jc = ZeroOrOne("w:jc", successors=_tag_seq[27:])
|
||||
outlineLvl: CT_DecimalNumber = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:outlineLvl", successors=_tag_seq[31:]
|
||||
)
|
||||
sectPr = ZeroOrOne("w:sectPr", successors=_tag_seq[35:])
|
||||
del _tag_seq
|
||||
|
||||
@property
|
||||
def first_line_indent(self) -> Length | None:
|
||||
"""A |Length| value calculated from the values of `w:ind/@w:firstLine` and
|
||||
`w:ind/@w:hanging`.
|
||||
|
||||
Returns |None| if the `w:ind` child is not present.
|
||||
"""
|
||||
ind = self.ind
|
||||
if ind is None:
|
||||
return None
|
||||
hanging = ind.hanging
|
||||
if hanging is not None:
|
||||
return Length(-hanging)
|
||||
firstLine = ind.firstLine
|
||||
if firstLine is None:
|
||||
return None
|
||||
return firstLine
|
||||
|
||||
@first_line_indent.setter
|
||||
def first_line_indent(self, value: Length | None):
|
||||
if self.ind is None and value is None:
|
||||
return
|
||||
ind = self.get_or_add_ind()
|
||||
ind.firstLine = ind.hanging = None
|
||||
if value is None:
|
||||
return
|
||||
elif value < 0:
|
||||
ind.hanging = -value
|
||||
else:
|
||||
ind.firstLine = value
|
||||
|
||||
@property
|
||||
def ind_left(self) -> Length | None:
|
||||
"""The value of `w:ind/@w:left` or |None| if not present."""
|
||||
ind = self.ind
|
||||
if ind is None:
|
||||
return None
|
||||
return ind.left
|
||||
|
||||
@ind_left.setter
|
||||
def ind_left(self, value: Length | None):
|
||||
if value is None and self.ind is None:
|
||||
return
|
||||
ind = self.get_or_add_ind()
|
||||
ind.left = value
|
||||
|
||||
@property
|
||||
def ind_right(self) -> Length | None:
|
||||
"""The value of `w:ind/@w:right` or |None| if not present."""
|
||||
ind = self.ind
|
||||
if ind is None:
|
||||
return None
|
||||
return ind.right
|
||||
|
||||
@ind_right.setter
|
||||
def ind_right(self, value: Length | None):
|
||||
if value is None and self.ind is None:
|
||||
return
|
||||
ind = self.get_or_add_ind()
|
||||
ind.right = value
|
||||
|
||||
@property
|
||||
def jc_val(self) -> WD_ALIGN_PARAGRAPH | None:
|
||||
"""Value of the `<w:jc>` child element or |None| if not present."""
|
||||
return self.jc.val if self.jc is not None else None
|
||||
|
||||
@jc_val.setter
|
||||
def jc_val(self, value):
|
||||
if value is None:
|
||||
self._remove_jc()
|
||||
return
|
||||
self.get_or_add_jc().val = value
|
||||
|
||||
@property
|
||||
def keepLines_val(self):
|
||||
"""The value of `keepLines/@val` or |None| if not present."""
|
||||
keepLines = self.keepLines
|
||||
if keepLines is None:
|
||||
return None
|
||||
return keepLines.val
|
||||
|
||||
@keepLines_val.setter
|
||||
def keepLines_val(self, value):
|
||||
if value is None:
|
||||
self._remove_keepLines()
|
||||
else:
|
||||
self.get_or_add_keepLines().val = value
|
||||
|
||||
@property
|
||||
def keepNext_val(self):
|
||||
"""The value of `keepNext/@val` or |None| if not present."""
|
||||
keepNext = self.keepNext
|
||||
if keepNext is None:
|
||||
return None
|
||||
return keepNext.val
|
||||
|
||||
@keepNext_val.setter
|
||||
def keepNext_val(self, value):
|
||||
if value is None:
|
||||
self._remove_keepNext()
|
||||
else:
|
||||
self.get_or_add_keepNext().val = value
|
||||
|
||||
@property
|
||||
def pageBreakBefore_val(self):
|
||||
"""The value of `pageBreakBefore/@val` or |None| if not present."""
|
||||
pageBreakBefore = self.pageBreakBefore
|
||||
if pageBreakBefore is None:
|
||||
return None
|
||||
return pageBreakBefore.val
|
||||
|
||||
@pageBreakBefore_val.setter
|
||||
def pageBreakBefore_val(self, value):
|
||||
if value is None:
|
||||
self._remove_pageBreakBefore()
|
||||
else:
|
||||
self.get_or_add_pageBreakBefore().val = value
|
||||
|
||||
@property
|
||||
def spacing_after(self):
|
||||
"""The value of `w:spacing/@w:after` or |None| if not present."""
|
||||
spacing = self.spacing
|
||||
if spacing is None:
|
||||
return None
|
||||
return spacing.after
|
||||
|
||||
@spacing_after.setter
|
||||
def spacing_after(self, value):
|
||||
if value is None and self.spacing is None:
|
||||
return
|
||||
self.get_or_add_spacing().after = value
|
||||
|
||||
@property
|
||||
def spacing_before(self):
|
||||
"""The value of `w:spacing/@w:before` or |None| if not present."""
|
||||
spacing = self.spacing
|
||||
if spacing is None:
|
||||
return None
|
||||
return spacing.before
|
||||
|
||||
@spacing_before.setter
|
||||
def spacing_before(self, value):
|
||||
if value is None and self.spacing is None:
|
||||
return
|
||||
self.get_or_add_spacing().before = value
|
||||
|
||||
@property
|
||||
def spacing_line(self):
|
||||
"""The value of `w:spacing/@w:line` or |None| if not present."""
|
||||
spacing = self.spacing
|
||||
if spacing is None:
|
||||
return None
|
||||
return spacing.line
|
||||
|
||||
@spacing_line.setter
|
||||
def spacing_line(self, value):
|
||||
if value is None and self.spacing is None:
|
||||
return
|
||||
self.get_or_add_spacing().line = value
|
||||
|
||||
@property
|
||||
def spacing_lineRule(self):
|
||||
"""The value of `w:spacing/@w:lineRule` as a member of the :ref:`WdLineSpacing`
|
||||
enumeration.
|
||||
|
||||
Only the `MULTIPLE`, `EXACTLY`, and `AT_LEAST` members are used. It is the
|
||||
responsibility of the client to calculate the use of `SINGLE`, `DOUBLE`, and
|
||||
`MULTIPLE` based on the value of `w:spacing/@w:line` if that behavior is
|
||||
desired.
|
||||
"""
|
||||
spacing = self.spacing
|
||||
if spacing is None:
|
||||
return None
|
||||
lineRule = spacing.lineRule
|
||||
if lineRule is None and spacing.line is not None:
|
||||
return WD_LINE_SPACING.MULTIPLE
|
||||
return lineRule
|
||||
|
||||
@spacing_lineRule.setter
|
||||
def spacing_lineRule(self, value):
|
||||
if value is None and self.spacing is None:
|
||||
return
|
||||
self.get_or_add_spacing().lineRule = value
|
||||
|
||||
@property
|
||||
def style(self) -> str | None:
|
||||
"""String contained in `./w:pStyle/@val`, or None if child is not present."""
|
||||
pStyle = self.pStyle
|
||||
if pStyle is None:
|
||||
return None
|
||||
return pStyle.val
|
||||
|
||||
@style.setter
|
||||
def style(self, style: str | None):
|
||||
"""Set `./w:pStyle/@val` `style`, adding a new element if necessary.
|
||||
|
||||
If `style` is |None|, remove `./w:pStyle` when present.
|
||||
"""
|
||||
if style is None:
|
||||
self._remove_pStyle()
|
||||
return
|
||||
pStyle = self.get_or_add_pStyle()
|
||||
pStyle.val = style
|
||||
|
||||
@property
|
||||
def widowControl_val(self):
|
||||
"""The value of `widowControl/@val` or |None| if not present."""
|
||||
widowControl = self.widowControl
|
||||
if widowControl is None:
|
||||
return None
|
||||
return widowControl.val
|
||||
|
||||
@widowControl_val.setter
|
||||
def widowControl_val(self, value):
|
||||
if value is None:
|
||||
self._remove_widowControl()
|
||||
else:
|
||||
self.get_or_add_widowControl().val = value
|
||||
|
||||
|
||||
class CT_Spacing(BaseOxmlElement):
|
||||
"""``<w:spacing>`` element, specifying paragraph spacing attributes such as space
|
||||
before and line spacing."""
|
||||
|
||||
after = OptionalAttribute("w:after", ST_TwipsMeasure)
|
||||
before = OptionalAttribute("w:before", ST_TwipsMeasure)
|
||||
line = OptionalAttribute("w:line", ST_SignedTwipsMeasure)
|
||||
lineRule = OptionalAttribute("w:lineRule", WD_LINE_SPACING)
|
||||
|
||||
|
||||
class CT_TabStop(BaseOxmlElement):
|
||||
"""`<w:tab>` element, representing an individual tab stop.
|
||||
|
||||
Overloaded to use for a tab-character in a run, which also uses the w:tab tag but
|
||||
only needs a __str__ method.
|
||||
"""
|
||||
|
||||
val: WD_TAB_ALIGNMENT = RequiredAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:val", WD_TAB_ALIGNMENT
|
||||
)
|
||||
leader: WD_TAB_LEADER | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:leader", WD_TAB_LEADER, default=WD_TAB_LEADER.SPACES
|
||||
)
|
||||
pos: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:pos", ST_SignedTwipsMeasure
|
||||
)
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text equivalent of a `w:tab` element appearing in a run.
|
||||
|
||||
Allows text of run inner-content to be accessed consistently across all text
|
||||
inner-content.
|
||||
"""
|
||||
return "\t"
|
||||
|
||||
|
||||
class CT_TabStops(BaseOxmlElement):
|
||||
"""``<w:tabs>`` element, container for a sorted sequence of tab stops."""
|
||||
|
||||
tab = OneOrMore("w:tab", successors=())
|
||||
|
||||
def insert_tab_in_order(self, pos, align, leader):
|
||||
"""Insert a newly created `w:tab` child element in `pos` order."""
|
||||
new_tab = self._new_tab()
|
||||
new_tab.pos, new_tab.val, new_tab.leader = pos, align, leader
|
||||
for tab in self.tab_lst:
|
||||
if new_tab.pos < tab.pos:
|
||||
tab.addprevious(new_tab)
|
||||
return new_tab
|
||||
self.append(new_tab)
|
||||
return new_tab
|
||||
@@ -0,0 +1,307 @@
|
||||
"""Custom element classes related to text runs (CT_R)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, Iterator, List, cast
|
||||
|
||||
from docx.oxml.drawing import CT_Drawing
|
||||
from docx.oxml.ns import qn
|
||||
from docx.oxml.parser import OxmlElement
|
||||
from docx.oxml.simpletypes import ST_BrClear, ST_BrType
|
||||
from docx.oxml.text.font import CT_RPr
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne
|
||||
from docx.shared import TextAccumulator
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.shape import CT_Anchor, CT_Inline
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
from docx.oxml.text.parfmt import CT_TabStop
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Run-level elements
|
||||
|
||||
|
||||
class CT_R(BaseOxmlElement):
|
||||
"""`<w:r>` element, containing the properties and text for a run."""
|
||||
|
||||
add_br: Callable[[], CT_Br]
|
||||
add_tab: Callable[[], CT_TabStop]
|
||||
get_or_add_rPr: Callable[[], CT_RPr]
|
||||
_add_drawing: Callable[[], CT_Drawing]
|
||||
_add_t: Callable[..., CT_Text]
|
||||
|
||||
rPr: CT_RPr | None = ZeroOrOne("w:rPr") # pyright: ignore[reportAssignmentType]
|
||||
br = ZeroOrMore("w:br")
|
||||
cr = ZeroOrMore("w:cr")
|
||||
drawing = ZeroOrMore("w:drawing")
|
||||
t = ZeroOrMore("w:t")
|
||||
tab = ZeroOrMore("w:tab")
|
||||
|
||||
def add_t(self, text: str) -> CT_Text:
|
||||
"""Return a newly added `<w:t>` element containing `text`."""
|
||||
t = self._add_t(text=text)
|
||||
if len(text.strip()) < len(text):
|
||||
t.set(qn("xml:space"), "preserve")
|
||||
return t
|
||||
|
||||
def add_drawing(self, inline_or_anchor: CT_Inline | CT_Anchor) -> CT_Drawing:
|
||||
"""Return newly appended `CT_Drawing` (`w:drawing`) child element.
|
||||
|
||||
The `w:drawing` element has `inline_or_anchor` as its child.
|
||||
"""
|
||||
drawing = self._add_drawing()
|
||||
drawing.append(inline_or_anchor)
|
||||
return drawing
|
||||
|
||||
def clear_content(self) -> None:
|
||||
"""Remove all child elements except a `w:rPr` element if present."""
|
||||
# -- remove all run inner-content except a `w:rPr` when present. --
|
||||
for e in self.xpath("./*[not(self::w:rPr)]"):
|
||||
self.remove(e)
|
||||
|
||||
@property
|
||||
def inner_content_items(self) -> List[str | CT_Drawing | CT_LastRenderedPageBreak]:
|
||||
"""Text of run, possibly punctuated by `w:lastRenderedPageBreak` elements."""
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
|
||||
accum = TextAccumulator()
|
||||
|
||||
def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]:
|
||||
for e in self.xpath(
|
||||
"w:br"
|
||||
" | w:cr"
|
||||
" | w:drawing"
|
||||
" | w:lastRenderedPageBreak"
|
||||
" | w:noBreakHyphen"
|
||||
" | w:ptab"
|
||||
" | w:t"
|
||||
" | w:tab"
|
||||
):
|
||||
if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)):
|
||||
yield from accum.pop()
|
||||
yield e
|
||||
else:
|
||||
accum.push(str(e))
|
||||
|
||||
# -- don't forget the "tail" string --
|
||||
yield from accum.pop()
|
||||
|
||||
return list(iter_items())
|
||||
|
||||
def insert_comment_range_end_and_reference_below(self, comment_id: int) -> None:
|
||||
"""Insert a `w:commentRangeEnd` and `w:commentReference` element after this run.
|
||||
|
||||
The `w:commentRangeEnd` element is the immediate sibling of this `w:r` and is followed by
|
||||
a `w:r` containing the `w:commentReference` element.
|
||||
"""
|
||||
self.addnext(self._new_comment_reference_run(comment_id))
|
||||
self.addnext(OxmlElement("w:commentRangeEnd", attrs={qn("w:id"): str(comment_id)}))
|
||||
|
||||
def insert_comment_range_start_above(self, comment_id: int) -> None:
|
||||
"""Insert a `w:commentRangeStart` element with `comment_id` before this run."""
|
||||
self.addprevious(OxmlElement("w:commentRangeStart", attrs={qn("w:id"): str(comment_id)}))
|
||||
|
||||
@property
|
||||
def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
|
||||
"""All `w:lastRenderedPageBreaks` descendants of this run."""
|
||||
return self.xpath("./w:lastRenderedPageBreak")
|
||||
|
||||
@property
|
||||
def style(self) -> str | None:
|
||||
"""String contained in `w:val` attribute of `w:rStyle` grandchild.
|
||||
|
||||
|None| if that element is not present.
|
||||
"""
|
||||
rPr = self.rPr
|
||||
if rPr is None:
|
||||
return None
|
||||
return rPr.style
|
||||
|
||||
@style.setter
|
||||
def style(self, style: str | None):
|
||||
"""Set character style of this `w:r` element to `style`.
|
||||
|
||||
If `style` is None, remove the style element.
|
||||
"""
|
||||
rPr = self.get_or_add_rPr()
|
||||
rPr.style = style
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
"""The textual content of this run.
|
||||
|
||||
Inner-content child elements like `w:tab` are translated to their text
|
||||
equivalent.
|
||||
"""
|
||||
return "".join(
|
||||
str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab")
|
||||
)
|
||||
|
||||
@text.setter
|
||||
def text(self, text: str): # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
self.clear_content()
|
||||
_RunContentAppender.append_to_run_from_text(self, text)
|
||||
|
||||
def _insert_rPr(self, rPr: CT_RPr) -> CT_RPr:
|
||||
self.insert(0, rPr)
|
||||
return rPr
|
||||
|
||||
def _new_comment_reference_run(self, comment_id: int) -> CT_R:
|
||||
"""Return a new `w:r` element with `w:commentReference` referencing `comment_id`.
|
||||
|
||||
Should look like this:
|
||||
|
||||
<w:r>
|
||||
<w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
|
||||
<w:commentReference w:id="0"/>
|
||||
</w:r>
|
||||
|
||||
"""
|
||||
r = cast(CT_R, OxmlElement("w:r"))
|
||||
rPr = r.get_or_add_rPr()
|
||||
rPr.style = "CommentReference"
|
||||
r.append(OxmlElement("w:commentReference", attrs={qn("w:id"): str(comment_id)}))
|
||||
return r
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Run inner-content elements
|
||||
|
||||
|
||||
class CT_Br(BaseOxmlElement):
|
||||
"""`<w:br>` element, indicating a line, page, or column break in a run."""
|
||||
|
||||
type: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:type", ST_BrType, default="textWrapping"
|
||||
)
|
||||
clear: str | None = OptionalAttribute("w:clear", ST_BrClear) # pyright: ignore
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text equivalent of this element. Actual value depends on break type.
|
||||
|
||||
A line break is translated as "\n". Column and page breaks produce the empty
|
||||
string ("").
|
||||
|
||||
This allows the text of run inner-content to be accessed in a consistent way
|
||||
for all run inner-context text elements.
|
||||
"""
|
||||
return "\n" if self.type == "textWrapping" else ""
|
||||
|
||||
|
||||
class CT_Cr(BaseOxmlElement):
|
||||
"""`<w:cr>` element, representing a carriage-return (0x0D) character within a run.
|
||||
|
||||
In Word, this represents a "soft carriage-return" in the sense that it does not end
|
||||
the paragraph the way pressing Enter (aka. Return) on the keyboard does. Here the
|
||||
text equivalent is considered to be newline ("\n") since in plain-text that's the
|
||||
closest Python equivalent.
|
||||
|
||||
NOTE: this complex-type name does not exist in the schema, where `w:tab` maps to
|
||||
`CT_Empty`. This name was added to give it distinguished behavior. CT_Empty is used
|
||||
for many elements.
|
||||
"""
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text equivalent of this element, a single newline ("\n")."""
|
||||
return "\n"
|
||||
|
||||
|
||||
class CT_NoBreakHyphen(BaseOxmlElement):
|
||||
"""`<w:noBreakHyphen>` element, a hyphen ineligible for a line-wrap position.
|
||||
|
||||
This maps to a plain-text dash ("-").
|
||||
|
||||
NOTE: this complex-type name does not exist in the schema, where `w:noBreakHyphen`
|
||||
maps to `CT_Empty`. This name was added to give it behavior distinguished from the
|
||||
many other elements represented in the schema by CT_Empty.
|
||||
"""
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text equivalent of this element, a single dash character ("-")."""
|
||||
return "-"
|
||||
|
||||
|
||||
class CT_PTab(BaseOxmlElement):
|
||||
"""`<w:ptab>` element, representing an absolute-position tab character within a run.
|
||||
|
||||
This character advances the rendering position to the specified position regardless
|
||||
of any tab-stops, perhaps for layout of a table-of-contents (TOC) or similar.
|
||||
"""
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text equivalent of this element, a single tab ("\t") character.
|
||||
|
||||
This allows the text of run inner-content to be accessed in a consistent way
|
||||
for all run inner-context text elements.
|
||||
"""
|
||||
return "\t"
|
||||
|
||||
|
||||
# -- CT_Tab functionality is provided by CT_TabStop which also uses `w:tab` tag. That
|
||||
# -- element class provides the __str__() method for this empty element, unconditionally
|
||||
# -- returning "\t".
|
||||
|
||||
|
||||
class CT_Text(BaseOxmlElement):
|
||||
"""`<w:t>` element, containing a sequence of characters within a run."""
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text contained in this element, the empty string if it has no content.
|
||||
|
||||
This property allows this run inner-content element to be queried for its text
|
||||
the same way as other run-content elements are. In particular, this never
|
||||
returns None, as etree._Element does when there is no content.
|
||||
"""
|
||||
return self.text or ""
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Utility
|
||||
|
||||
|
||||
class _RunContentAppender:
|
||||
"""Translates a Python string into run content elements appended in a `w:r` element.
|
||||
|
||||
Contiguous sequences of regular characters are appended in a single `<w:t>` element.
|
||||
Each tab character ('\t') causes a `<w:tab/>` element to be appended. Likewise a
|
||||
newline or carriage return character ('\n', '\r') causes a `<w:cr>` element to be
|
||||
appended.
|
||||
"""
|
||||
|
||||
def __init__(self, r: CT_R):
|
||||
self._r = r
|
||||
self._bfr: List[str] = []
|
||||
|
||||
@classmethod
|
||||
def append_to_run_from_text(cls, r: CT_R, text: str):
|
||||
"""Append inner-content elements for `text` to `r` element."""
|
||||
appender = cls(r)
|
||||
appender.add_text(text)
|
||||
|
||||
def add_text(self, text: str):
|
||||
"""Append inner-content elements for `text` to the `w:r` element."""
|
||||
for char in text:
|
||||
self.add_char(char)
|
||||
self.flush()
|
||||
|
||||
def add_char(self, char: str):
|
||||
"""Process next character of input through finite state maching (FSM).
|
||||
|
||||
There are two possible states, buffer pending and not pending, but those are
|
||||
hidden behind the `.flush()` method which must be called at the end of text to
|
||||
ensure any pending `<w:t>` element is written.
|
||||
"""
|
||||
if char == "\t":
|
||||
self.flush()
|
||||
self._r.add_tab()
|
||||
elif char in "\r\n":
|
||||
self.flush()
|
||||
self._r.add_br()
|
||||
else:
|
||||
self._bfr.append(char)
|
||||
|
||||
def flush(self):
|
||||
text = "".join(self._bfr)
|
||||
if text:
|
||||
self._r.add_t(text)
|
||||
self._bfr.clear()
|
||||
@@ -0,0 +1,696 @@
|
||||
# pyright: reportImportCycles=false
|
||||
|
||||
"""Enabling declarative definition of lxml custom element classes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import TYPE_CHECKING, Any, Callable, Sequence, Type, TypeVar
|
||||
|
||||
from lxml import etree
|
||||
from lxml.etree import ElementBase, _Element # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
from docx.oxml.exceptions import InvalidXmlError
|
||||
from docx.oxml.ns import NamespacePrefixedTag, nsmap, qn
|
||||
from docx.shared import lazyproperty
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.enum.base import BaseXmlEnum
|
||||
from docx.oxml.simpletypes import BaseSimpleType
|
||||
|
||||
|
||||
def serialize_for_reading(element: ElementBase):
|
||||
"""Serialize `element` to human-readable XML suitable for tests.
|
||||
|
||||
No XML declaration.
|
||||
"""
|
||||
xml = etree.tostring(element, encoding="unicode", pretty_print=True)
|
||||
return XmlString(xml)
|
||||
|
||||
|
||||
class XmlString(str):
|
||||
"""Provides string comparison override suitable for serialized XML that is useful
|
||||
for tests."""
|
||||
|
||||
# ' <w:xyz xmlns:a="http://ns/decl/a" attr_name="val">text</w:xyz>'
|
||||
# | | || |
|
||||
# +----------+------------------------------------------++-----------+
|
||||
# front attrs | text
|
||||
# close
|
||||
|
||||
_xml_elm_line_patt = re.compile(r"( *</?[\w:]+)(.*?)(/?>)([^<]*</[\w:]+>)?$")
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, str):
|
||||
return False
|
||||
lines = self.splitlines()
|
||||
lines_other = other.splitlines()
|
||||
if len(lines) != len(lines_other):
|
||||
return False
|
||||
for line, line_other in zip(lines, lines_other):
|
||||
if not self._eq_elm_strs(line, line_other):
|
||||
return False
|
||||
return True
|
||||
|
||||
def __ne__(self, other: object) -> bool:
|
||||
return not self.__eq__(other)
|
||||
|
||||
def _attr_seq(self, attrs: str) -> list[str]:
|
||||
"""Return a sequence of attribute strings parsed from `attrs`.
|
||||
|
||||
Each attribute string is stripped of whitespace on both ends.
|
||||
"""
|
||||
attrs = attrs.strip()
|
||||
attr_lst = attrs.split()
|
||||
return sorted(attr_lst)
|
||||
|
||||
def _eq_elm_strs(self, line: str, line_2: str):
|
||||
"""Return True if the element in `line_2` is XML equivalent to the element in
|
||||
`line`."""
|
||||
front, attrs, close, text = self._parse_line(line)
|
||||
front_2, attrs_2, close_2, text_2 = self._parse_line(line_2)
|
||||
if front != front_2:
|
||||
return False
|
||||
if self._attr_seq(attrs) != self._attr_seq(attrs_2):
|
||||
return False
|
||||
if close != close_2:
|
||||
return False
|
||||
return text == text_2
|
||||
|
||||
@classmethod
|
||||
def _parse_line(cls, line: str) -> tuple[str, str, str, str]:
|
||||
"""(front, attrs, close, text) 4-tuple result of parsing XML element `line`."""
|
||||
match = cls._xml_elm_line_patt.match(line)
|
||||
if match is None:
|
||||
return "", "", "", ""
|
||||
front, attrs, close, text = [match.group(n) for n in range(1, 5)]
|
||||
return front, attrs, close, text
|
||||
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
||||
|
||||
class MetaOxmlElement(type):
|
||||
"""Metaclass for BaseOxmlElement."""
|
||||
|
||||
def __init__(cls, clsname: str, bases: tuple[type, ...], namespace: dict[str, Any]):
|
||||
dispatchable = (
|
||||
OneAndOnlyOne,
|
||||
OneOrMore,
|
||||
OptionalAttribute,
|
||||
RequiredAttribute,
|
||||
ZeroOrMore,
|
||||
ZeroOrOne,
|
||||
ZeroOrOneChoice,
|
||||
)
|
||||
for key, value in namespace.items():
|
||||
if isinstance(value, dispatchable):
|
||||
value.populate_class_members(cls, key)
|
||||
|
||||
|
||||
class BaseAttribute:
|
||||
"""Base class for OptionalAttribute and RequiredAttribute.
|
||||
|
||||
Provides common methods.
|
||||
"""
|
||||
|
||||
def __init__(self, attr_name: str, simple_type: Type[BaseXmlEnum] | Type[BaseSimpleType]):
|
||||
super(BaseAttribute, self).__init__()
|
||||
self._attr_name = attr_name
|
||||
self._simple_type = simple_type
|
||||
|
||||
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
|
||||
"""Add the appropriate methods to `element_cls`."""
|
||||
self._element_cls = element_cls
|
||||
self._prop_name = prop_name
|
||||
|
||||
self._add_attr_property()
|
||||
|
||||
def _add_attr_property(self):
|
||||
"""Add a read/write `.{prop_name}` property to the element class.
|
||||
|
||||
The property returns the interpreted value of this attribute on access and
|
||||
changes the attribute value to its ST_* counterpart on assignment.
|
||||
"""
|
||||
property_ = property(self._getter, self._setter, None)
|
||||
# -- assign unconditionally to overwrite element name definition --
|
||||
setattr(self._element_cls, self._prop_name, property_)
|
||||
|
||||
@property
|
||||
def _clark_name(self):
|
||||
if ":" in self._attr_name:
|
||||
return qn(self._attr_name)
|
||||
return self._attr_name
|
||||
|
||||
@property
|
||||
def _getter(self) -> Callable[[BaseOxmlElement], Any | None]: ...
|
||||
|
||||
@property
|
||||
def _setter(
|
||||
self,
|
||||
) -> Callable[[BaseOxmlElement, Any | None], None]: ...
|
||||
|
||||
|
||||
class OptionalAttribute(BaseAttribute):
|
||||
"""Defines an optional attribute on a custom element class.
|
||||
|
||||
An optional attribute returns a default value when not present for reading. When
|
||||
assigned |None|, the attribute is removed, but still returns the default value when
|
||||
one is specified.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
attr_name: str,
|
||||
simple_type: Type[BaseXmlEnum] | Type[BaseSimpleType],
|
||||
default: BaseXmlEnum | BaseSimpleType | str | bool | None = None,
|
||||
):
|
||||
super(OptionalAttribute, self).__init__(attr_name, simple_type)
|
||||
self._default = default
|
||||
|
||||
@property
|
||||
def _docstring(self):
|
||||
"""String to use as `__doc__` attribute of attribute property."""
|
||||
return (
|
||||
f"{self._simple_type.__name__} type-converted value of"
|
||||
f" ``{self._attr_name}`` attribute, or |None| (or specified default"
|
||||
f" value) if not present. Assigning the default value causes the"
|
||||
f" attribute to be removed from the element."
|
||||
)
|
||||
|
||||
@property
|
||||
def _getter(
|
||||
self,
|
||||
) -> Callable[[BaseOxmlElement], Any | None]:
|
||||
"""Function suitable for `__get__()` method on attribute property descriptor."""
|
||||
|
||||
def get_attr_value(
|
||||
obj: BaseOxmlElement,
|
||||
) -> Any | None:
|
||||
attr_str_value = obj.get(self._clark_name)
|
||||
if attr_str_value is None:
|
||||
return self._default
|
||||
return self._simple_type.from_xml(attr_str_value)
|
||||
|
||||
get_attr_value.__doc__ = self._docstring
|
||||
return get_attr_value
|
||||
|
||||
@property
|
||||
def _setter(self) -> Callable[[BaseOxmlElement, Any], None]:
|
||||
"""Function suitable for `__set__()` method on attribute property descriptor."""
|
||||
|
||||
def set_attr_value(obj: BaseOxmlElement, value: Any | None):
|
||||
if value is None or value == self._default:
|
||||
if self._clark_name in obj.attrib:
|
||||
del obj.attrib[self._clark_name]
|
||||
return
|
||||
str_value = self._simple_type.to_xml(value)
|
||||
if str_value is None:
|
||||
if self._clark_name in obj.attrib:
|
||||
del obj.attrib[self._clark_name]
|
||||
return
|
||||
obj.set(self._clark_name, str_value)
|
||||
|
||||
return set_attr_value
|
||||
|
||||
|
||||
class RequiredAttribute(BaseAttribute):
|
||||
"""Defines a required attribute on a custom element class.
|
||||
|
||||
A required attribute is assumed to be present for reading, so does not have a
|
||||
default value; its actual value is always used. If missing on read, an
|
||||
|InvalidXmlError| is raised. It also does not remove the attribute if |None| is
|
||||
assigned. Assigning |None| raises |TypeError| or |ValueError|, depending on the
|
||||
simple type of the attribute.
|
||||
"""
|
||||
|
||||
@property
|
||||
def _docstring(self):
|
||||
"""Return the string to use as the ``__doc__`` attribute of the property for
|
||||
this attribute."""
|
||||
return "%s type-converted value of ``%s`` attribute." % (
|
||||
self._simple_type.__name__,
|
||||
self._attr_name,
|
||||
)
|
||||
|
||||
@property
|
||||
def _getter(self) -> Callable[[BaseOxmlElement], Any]:
|
||||
"""function object suitable for "get" side of attr property descriptor."""
|
||||
|
||||
def get_attr_value(obj: BaseOxmlElement) -> Any | None:
|
||||
attr_str_value = obj.get(self._clark_name)
|
||||
if attr_str_value is None:
|
||||
raise InvalidXmlError(
|
||||
"required '%s' attribute not present on element %s" % (self._attr_name, obj.tag)
|
||||
)
|
||||
return self._simple_type.from_xml(attr_str_value)
|
||||
|
||||
get_attr_value.__doc__ = self._docstring
|
||||
return get_attr_value
|
||||
|
||||
@property
|
||||
def _setter(self) -> Callable[[BaseOxmlElement, Any], None]:
|
||||
"""function object suitable for "set" side of attribute property descriptor."""
|
||||
|
||||
def set_attr_value(obj: BaseOxmlElement, value: Any):
|
||||
str_value = self._simple_type.to_xml(value)
|
||||
if str_value is None:
|
||||
raise ValueError(f"cannot assign {value} to this required attribute")
|
||||
obj.set(self._clark_name, str_value)
|
||||
|
||||
return set_attr_value
|
||||
|
||||
|
||||
class _BaseChildElement:
|
||||
"""Base class for the child-element classes.
|
||||
|
||||
The child-element sub-classes correspond to varying cardinalities, such as ZeroOrOne
|
||||
and ZeroOrMore.
|
||||
"""
|
||||
|
||||
def __init__(self, nsptagname: str, successors: tuple[str, ...] = ()):
|
||||
super(_BaseChildElement, self).__init__()
|
||||
self._nsptagname = nsptagname
|
||||
self._successors = successors
|
||||
|
||||
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
|
||||
"""Baseline behavior for adding the appropriate methods to `element_cls`."""
|
||||
self._element_cls = element_cls
|
||||
self._prop_name = prop_name
|
||||
|
||||
def _add_adder(self):
|
||||
"""Add an ``_add_x()`` method to the element class for this child element."""
|
||||
|
||||
def _add_child(obj: BaseOxmlElement, **attrs: Any):
|
||||
new_method = getattr(obj, self._new_method_name)
|
||||
child = new_method()
|
||||
for key, value in attrs.items():
|
||||
setattr(child, key, value)
|
||||
insert_method = getattr(obj, self._insert_method_name)
|
||||
insert_method(child)
|
||||
return child
|
||||
|
||||
_add_child.__doc__ = (
|
||||
"Add a new ``<%s>`` child element unconditionally, inserted in t"
|
||||
"he correct sequence." % self._nsptagname
|
||||
)
|
||||
self._add_to_class(self._add_method_name, _add_child)
|
||||
|
||||
def _add_creator(self):
|
||||
"""Add a ``_new_{prop_name}()`` method to the element class that creates a new,
|
||||
empty element of the correct type, having no attributes."""
|
||||
creator = self._creator
|
||||
creator.__doc__ = (
|
||||
'Return a "loose", newly created ``<%s>`` element having no attri'
|
||||
"butes, text, or children." % self._nsptagname
|
||||
)
|
||||
self._add_to_class(self._new_method_name, creator)
|
||||
|
||||
def _add_getter(self):
|
||||
"""Add a read-only ``{prop_name}`` property to the element class for this child
|
||||
element."""
|
||||
property_ = property(self._getter, None, None)
|
||||
# -- assign unconditionally to overwrite element name definition --
|
||||
setattr(self._element_cls, self._prop_name, property_)
|
||||
|
||||
def _add_inserter(self):
|
||||
"""Add an ``_insert_x()`` method to the element class for this child element."""
|
||||
|
||||
def _insert_child(obj: BaseOxmlElement, child: BaseOxmlElement):
|
||||
obj.insert_element_before(child, *self._successors)
|
||||
return child
|
||||
|
||||
_insert_child.__doc__ = (
|
||||
"Return the passed ``<%s>`` element after inserting it as a chil"
|
||||
"d in the correct sequence." % self._nsptagname
|
||||
)
|
||||
self._add_to_class(self._insert_method_name, _insert_child)
|
||||
|
||||
def _add_list_getter(self):
|
||||
"""Add a read-only ``{prop_name}_lst`` property to the element class to retrieve
|
||||
a list of child elements matching this type."""
|
||||
prop_name = "%s_lst" % self._prop_name
|
||||
property_ = property(self._list_getter, None, None)
|
||||
setattr(self._element_cls, prop_name, property_)
|
||||
|
||||
@lazyproperty
|
||||
def _add_method_name(self):
|
||||
return "_add_%s" % self._prop_name
|
||||
|
||||
def _add_public_adder(self):
|
||||
"""Add a public ``add_x()`` method to the parent element class."""
|
||||
|
||||
def add_child(obj: BaseOxmlElement):
|
||||
private_add_method = getattr(obj, self._add_method_name)
|
||||
child = private_add_method()
|
||||
return child
|
||||
|
||||
add_child.__doc__ = (
|
||||
"Add a new ``<%s>`` child element unconditionally, inserted in t"
|
||||
"he correct sequence." % self._nsptagname
|
||||
)
|
||||
self._add_to_class(self._public_add_method_name, add_child)
|
||||
|
||||
def _add_to_class(self, name: str, method: Callable[..., Any]):
|
||||
"""Add `method` to the target class as `name`, unless `name` is already defined
|
||||
on the class."""
|
||||
if hasattr(self._element_cls, name):
|
||||
return
|
||||
setattr(self._element_cls, name, method)
|
||||
|
||||
@property
|
||||
def _creator(self) -> Callable[[BaseOxmlElement], BaseOxmlElement]:
|
||||
"""Callable that creates an empty element of the right type, with no attrs."""
|
||||
from docx.oxml.parser import OxmlElement
|
||||
|
||||
def new_child_element(obj: BaseOxmlElement):
|
||||
return OxmlElement(self._nsptagname)
|
||||
|
||||
return new_child_element
|
||||
|
||||
@property
|
||||
def _getter(self):
|
||||
"""Return a function object suitable for the "get" side of the property
|
||||
descriptor.
|
||||
|
||||
This default getter returns the child element with matching tag name or |None|
|
||||
if not present.
|
||||
"""
|
||||
|
||||
def get_child_element(obj: BaseOxmlElement):
|
||||
return obj.find(qn(self._nsptagname))
|
||||
|
||||
get_child_element.__doc__ = (
|
||||
"``<%s>`` child element or |None| if not present." % self._nsptagname
|
||||
)
|
||||
return get_child_element
|
||||
|
||||
@lazyproperty
|
||||
def _insert_method_name(self):
|
||||
return "_insert_%s" % self._prop_name
|
||||
|
||||
@property
|
||||
def _list_getter(self):
|
||||
"""Return a function object suitable for the "get" side of a list property
|
||||
descriptor."""
|
||||
|
||||
def get_child_element_list(obj: BaseOxmlElement):
|
||||
return obj.findall(qn(self._nsptagname))
|
||||
|
||||
get_child_element_list.__doc__ = (
|
||||
"A list containing each of the ``<%s>`` child elements, in the o"
|
||||
"rder they appear." % self._nsptagname
|
||||
)
|
||||
return get_child_element_list
|
||||
|
||||
@lazyproperty
|
||||
def _public_add_method_name(self):
|
||||
"""add_childElement() is public API for a repeating element, allowing new
|
||||
elements to be added to the sequence.
|
||||
|
||||
May be overridden to provide a friendlier API to clients having domain
|
||||
appropriate parameter names for required attributes.
|
||||
"""
|
||||
return "add_%s" % self._prop_name
|
||||
|
||||
@lazyproperty
|
||||
def _remove_method_name(self):
|
||||
return "_remove_%s" % self._prop_name
|
||||
|
||||
@lazyproperty
|
||||
def _new_method_name(self):
|
||||
return "_new_%s" % self._prop_name
|
||||
|
||||
|
||||
class Choice(_BaseChildElement):
|
||||
"""Defines a child element belonging to a group, only one of which may appear as a child."""
|
||||
|
||||
@property
|
||||
def nsptagname(self):
|
||||
return self._nsptagname
|
||||
|
||||
def populate_class_members( # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
self,
|
||||
element_cls: MetaOxmlElement,
|
||||
group_prop_name: str,
|
||||
successors: tuple[str, ...],
|
||||
) -> None:
|
||||
"""Add the appropriate methods to `element_cls`."""
|
||||
self._element_cls = element_cls
|
||||
self._group_prop_name = group_prop_name
|
||||
self._successors = successors
|
||||
|
||||
self._add_getter()
|
||||
self._add_creator()
|
||||
self._add_inserter()
|
||||
self._add_adder()
|
||||
self._add_get_or_change_to_method()
|
||||
|
||||
def _add_get_or_change_to_method(self):
|
||||
"""Add a ``get_or_change_to_x()`` method to the element class for this child
|
||||
element."""
|
||||
|
||||
def get_or_change_to_child(obj: BaseOxmlElement):
|
||||
child = getattr(obj, self._prop_name)
|
||||
if child is not None:
|
||||
return child
|
||||
remove_group_method = getattr(obj, self._remove_group_method_name)
|
||||
remove_group_method()
|
||||
add_method = getattr(obj, self._add_method_name)
|
||||
child = add_method()
|
||||
return child
|
||||
|
||||
get_or_change_to_child.__doc__ = (
|
||||
"Return the ``<%s>`` child, replacing any other group element if found."
|
||||
) % self._nsptagname
|
||||
self._add_to_class(self._get_or_change_to_method_name, get_or_change_to_child)
|
||||
|
||||
@property
|
||||
def _prop_name(self):
|
||||
"""Property name computed from tag name, e.g. a:schemeClr -> schemeClr."""
|
||||
start = self._nsptagname.index(":") + 1 if ":" in self._nsptagname else 0
|
||||
return self._nsptagname[start:]
|
||||
|
||||
@lazyproperty
|
||||
def _get_or_change_to_method_name(self):
|
||||
return "get_or_change_to_%s" % self._prop_name
|
||||
|
||||
@lazyproperty
|
||||
def _remove_group_method_name(self):
|
||||
return "_remove_%s" % self._group_prop_name
|
||||
|
||||
|
||||
class OneAndOnlyOne(_BaseChildElement):
|
||||
"""Defines a required child element for MetaOxmlElement."""
|
||||
|
||||
def __init__(self, nsptagname: str):
|
||||
super(OneAndOnlyOne, self).__init__(nsptagname, ())
|
||||
|
||||
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
|
||||
"""Add the appropriate methods to `element_cls`."""
|
||||
super(OneAndOnlyOne, self).populate_class_members(element_cls, prop_name)
|
||||
self._add_getter()
|
||||
|
||||
@property
|
||||
def _getter(self):
|
||||
"""Return a function object suitable for the "get" side of the property
|
||||
descriptor."""
|
||||
|
||||
def get_child_element(obj: BaseOxmlElement):
|
||||
child = obj.find(qn(self._nsptagname))
|
||||
if child is None:
|
||||
raise InvalidXmlError(
|
||||
"required ``<%s>`` child element not present" % self._nsptagname
|
||||
)
|
||||
return child
|
||||
|
||||
get_child_element.__doc__ = "Required ``<%s>`` child element." % self._nsptagname
|
||||
return get_child_element
|
||||
|
||||
|
||||
class OneOrMore(_BaseChildElement):
|
||||
"""Defines a repeating child element for MetaOxmlElement that must appear at least
|
||||
once."""
|
||||
|
||||
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
|
||||
"""Add the appropriate methods to `element_cls`."""
|
||||
super(OneOrMore, self).populate_class_members(element_cls, prop_name)
|
||||
self._add_list_getter()
|
||||
self._add_creator()
|
||||
self._add_inserter()
|
||||
self._add_adder()
|
||||
self._add_public_adder()
|
||||
delattr(element_cls, prop_name)
|
||||
|
||||
|
||||
class ZeroOrMore(_BaseChildElement):
|
||||
"""Defines an optional repeating child element for MetaOxmlElement."""
|
||||
|
||||
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
|
||||
"""Add the appropriate methods to `element_cls`."""
|
||||
super(ZeroOrMore, self).populate_class_members(element_cls, prop_name)
|
||||
self._add_list_getter()
|
||||
self._add_creator()
|
||||
self._add_inserter()
|
||||
self._add_adder()
|
||||
self._add_public_adder()
|
||||
delattr(element_cls, prop_name)
|
||||
|
||||
|
||||
class ZeroOrOne(_BaseChildElement):
|
||||
"""Defines an optional child element for MetaOxmlElement."""
|
||||
|
||||
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
|
||||
"""Add the appropriate methods to `element_cls`."""
|
||||
super(ZeroOrOne, self).populate_class_members(element_cls, prop_name)
|
||||
self._add_getter()
|
||||
self._add_creator()
|
||||
self._add_inserter()
|
||||
self._add_adder()
|
||||
self._add_get_or_adder()
|
||||
self._add_remover()
|
||||
|
||||
def _add_get_or_adder(self):
|
||||
"""Add a ``get_or_add_x()`` method to the element class for this child
|
||||
element."""
|
||||
|
||||
def get_or_add_child(obj: BaseOxmlElement):
|
||||
child = getattr(obj, self._prop_name)
|
||||
if child is None:
|
||||
add_method = getattr(obj, self._add_method_name)
|
||||
child = add_method()
|
||||
return child
|
||||
|
||||
get_or_add_child.__doc__ = (
|
||||
"Return the ``<%s>`` child element, newly added if not present."
|
||||
) % self._nsptagname
|
||||
self._add_to_class(self._get_or_add_method_name, get_or_add_child)
|
||||
|
||||
def _add_remover(self):
|
||||
"""Add a ``_remove_x()`` method to the element class for this child element."""
|
||||
|
||||
def _remove_child(obj: BaseOxmlElement):
|
||||
obj.remove_all(self._nsptagname)
|
||||
|
||||
_remove_child.__doc__ = ("Remove all ``<%s>`` child elements.") % self._nsptagname
|
||||
self._add_to_class(self._remove_method_name, _remove_child)
|
||||
|
||||
@lazyproperty
|
||||
def _get_or_add_method_name(self):
|
||||
return "get_or_add_%s" % self._prop_name
|
||||
|
||||
|
||||
class ZeroOrOneChoice(_BaseChildElement):
|
||||
"""Correspondes to an ``EG_*`` element group where at most one of its members may
|
||||
appear as a child."""
|
||||
|
||||
def __init__(self, choices: Sequence[Choice], successors: tuple[str, ...] = ()):
|
||||
self._choices = choices
|
||||
self._successors = successors
|
||||
|
||||
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
|
||||
"""Add the appropriate methods to `element_cls`."""
|
||||
super(ZeroOrOneChoice, self).populate_class_members(element_cls, prop_name)
|
||||
self._add_choice_getter()
|
||||
for choice in self._choices:
|
||||
choice.populate_class_members(element_cls, self._prop_name, self._successors)
|
||||
self._add_group_remover()
|
||||
|
||||
def _add_choice_getter(self):
|
||||
"""Add a read-only ``{prop_name}`` property to the element class that returns
|
||||
the present member of this group, or |None| if none are present."""
|
||||
property_ = property(self._choice_getter, None, None)
|
||||
# assign unconditionally to overwrite element name definition
|
||||
setattr(self._element_cls, self._prop_name, property_)
|
||||
|
||||
def _add_group_remover(self):
|
||||
"""Add a ``_remove_eg_x()`` method to the element class for this choice
|
||||
group."""
|
||||
|
||||
def _remove_choice_group(obj: BaseOxmlElement):
|
||||
for tagname in self._member_nsptagnames:
|
||||
obj.remove_all(tagname)
|
||||
|
||||
_remove_choice_group.__doc__ = "Remove the current choice group child element if present."
|
||||
self._add_to_class(self._remove_choice_group_method_name, _remove_choice_group)
|
||||
|
||||
@property
|
||||
def _choice_getter(self):
|
||||
"""Return a function object suitable for the "get" side of the property
|
||||
descriptor."""
|
||||
|
||||
def get_group_member_element(obj: BaseOxmlElement):
|
||||
return obj.first_child_found_in(*self._member_nsptagnames)
|
||||
|
||||
get_group_member_element.__doc__ = (
|
||||
"Return the child element belonging to this element group, or "
|
||||
"|None| if no member child is present."
|
||||
)
|
||||
return get_group_member_element
|
||||
|
||||
@lazyproperty
|
||||
def _member_nsptagnames(self):
|
||||
"""Sequence of namespace-prefixed tagnames, one for each of the member elements
|
||||
of this choice group."""
|
||||
return [choice.nsptagname for choice in self._choices]
|
||||
|
||||
@lazyproperty
|
||||
def _remove_choice_group_method_name(self):
|
||||
return "_remove_%s" % self._prop_name
|
||||
|
||||
|
||||
# -- lxml typing isn't quite right here, just ignore this error on _Element --
|
||||
class BaseOxmlElement(etree.ElementBase, metaclass=MetaOxmlElement):
|
||||
"""Effective base class for all custom element classes.
|
||||
|
||||
Adds standardized behavior to all classes in one place.
|
||||
"""
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s '<%s>' at 0x%0x>" % (
|
||||
self.__class__.__name__,
|
||||
self._nsptag,
|
||||
id(self),
|
||||
)
|
||||
|
||||
def first_child_found_in(self, *tagnames: str) -> _Element | None:
|
||||
"""First child with tag in `tagnames`, or None if not found."""
|
||||
for tagname in tagnames:
|
||||
child = self.find(qn(tagname))
|
||||
if child is not None:
|
||||
return child
|
||||
return None
|
||||
|
||||
def insert_element_before(self, elm: ElementBase, *tagnames: str):
|
||||
successor = self.first_child_found_in(*tagnames)
|
||||
if successor is not None:
|
||||
successor.addprevious(elm)
|
||||
else:
|
||||
self.append(elm)
|
||||
return elm
|
||||
|
||||
def remove_all(self, *tagnames: str) -> None:
|
||||
"""Remove child elements with tagname (e.g. "a:p") in `tagnames`."""
|
||||
for tagname in tagnames:
|
||||
matching = self.findall(qn(tagname))
|
||||
for child in matching:
|
||||
self.remove(child)
|
||||
|
||||
@property
|
||||
def xml(self) -> str:
|
||||
"""XML string for this element, suitable for testing purposes.
|
||||
|
||||
Pretty printed for readability and without an XML declaration at the top.
|
||||
"""
|
||||
return serialize_for_reading(self)
|
||||
|
||||
def xpath(self, xpath_str: str) -> Any: # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""Override of `lxml` _Element.xpath() method.
|
||||
|
||||
Provides standard Open XML namespace mapping (`nsmap`) in centralized location.
|
||||
"""
|
||||
return super().xpath(xpath_str, namespaces=nsmap)
|
||||
|
||||
@property
|
||||
def _nsptag(self) -> str:
|
||||
return NamespacePrefixedTag.from_clark_name(self.tag)
|
||||
Reference in New Issue
Block a user