refactor: excel parse
This commit is contained in:
@@ -0,0 +1,428 @@
|
||||
"""Font-related proxy objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from docx.dml.color import ColorFormat
|
||||
from docx.enum.text import WD_UNDERLINE
|
||||
from docx.shared import ElementProxy, Emu
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.enum.text import WD_COLOR_INDEX
|
||||
from docx.oxml.text.run import CT_R
|
||||
from docx.shared import Length
|
||||
|
||||
|
||||
class Font(ElementProxy):
|
||||
"""Proxy object for parent of a `<w:rPr>` element and providing access to
|
||||
character properties such as font name, font size, bold, and subscript."""
|
||||
|
||||
def __init__(self, r: CT_R, parent: Any | None = None):
|
||||
super().__init__(r, parent)
|
||||
self._element = r
|
||||
self._r = r
|
||||
|
||||
@property
|
||||
def all_caps(self) -> bool | None:
|
||||
"""Read/write.
|
||||
|
||||
Causes text in this font to appear in capital letters.
|
||||
"""
|
||||
return self._get_bool_prop("caps")
|
||||
|
||||
@all_caps.setter
|
||||
def all_caps(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("caps", value)
|
||||
|
||||
@property
|
||||
def bold(self) -> bool | None:
|
||||
"""Read/write.
|
||||
|
||||
Causes text in this font to appear in bold.
|
||||
"""
|
||||
return self._get_bool_prop("b")
|
||||
|
||||
@bold.setter
|
||||
def bold(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("b", value)
|
||||
|
||||
@property
|
||||
def color(self):
|
||||
"""A |ColorFormat| object providing a way to get and set the text color for this
|
||||
font."""
|
||||
return ColorFormat(self._element)
|
||||
|
||||
@property
|
||||
def complex_script(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, causes the characters in the run to be treated as complex script
|
||||
regardless of their Unicode values.
|
||||
"""
|
||||
return self._get_bool_prop("cs")
|
||||
|
||||
@complex_script.setter
|
||||
def complex_script(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("cs", value)
|
||||
|
||||
@property
|
||||
def cs_bold(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, causes the complex script characters in the run to be displayed in
|
||||
bold typeface.
|
||||
"""
|
||||
return self._get_bool_prop("bCs")
|
||||
|
||||
@cs_bold.setter
|
||||
def cs_bold(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("bCs", value)
|
||||
|
||||
@property
|
||||
def cs_italic(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, causes the complex script characters in the run to be displayed in
|
||||
italic typeface.
|
||||
"""
|
||||
return self._get_bool_prop("iCs")
|
||||
|
||||
@cs_italic.setter
|
||||
def cs_italic(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("iCs", value)
|
||||
|
||||
@property
|
||||
def double_strike(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, causes the text in the run to appear with double strikethrough.
|
||||
"""
|
||||
return self._get_bool_prop("dstrike")
|
||||
|
||||
@double_strike.setter
|
||||
def double_strike(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("dstrike", value)
|
||||
|
||||
@property
|
||||
def emboss(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, causes the text in the run to appear as if raised off the page in
|
||||
relief.
|
||||
"""
|
||||
return self._get_bool_prop("emboss")
|
||||
|
||||
@emboss.setter
|
||||
def emboss(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("emboss", value)
|
||||
|
||||
@property
|
||||
def hidden(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, causes the text in the run to be hidden from display, unless
|
||||
applications settings force hidden text to be shown.
|
||||
"""
|
||||
return self._get_bool_prop("vanish")
|
||||
|
||||
@hidden.setter
|
||||
def hidden(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("vanish", value)
|
||||
|
||||
@property
|
||||
def highlight_color(self) -> WD_COLOR_INDEX | None:
|
||||
"""Color of highlighing applied or |None| if not highlighted."""
|
||||
rPr = self._element.rPr
|
||||
if rPr is None:
|
||||
return None
|
||||
return rPr.highlight_val
|
||||
|
||||
@highlight_color.setter
|
||||
def highlight_color(self, value: WD_COLOR_INDEX | None):
|
||||
rPr = self._element.get_or_add_rPr()
|
||||
rPr.highlight_val = value
|
||||
|
||||
@property
|
||||
def italic(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, causes the text of the run to appear in italics. |None| indicates
|
||||
the effective value is inherited from the style hierarchy.
|
||||
"""
|
||||
return self._get_bool_prop("i")
|
||||
|
||||
@italic.setter
|
||||
def italic(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("i", value)
|
||||
|
||||
@property
|
||||
def imprint(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, causes the text in the run to appear as if pressed into the page.
|
||||
"""
|
||||
return self._get_bool_prop("imprint")
|
||||
|
||||
@imprint.setter
|
||||
def imprint(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("imprint", value)
|
||||
|
||||
@property
|
||||
def math(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, specifies this run contains WML that should be handled as though it
|
||||
was Office Open XML Math.
|
||||
"""
|
||||
return self._get_bool_prop("oMath")
|
||||
|
||||
@math.setter
|
||||
def math(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("oMath", value)
|
||||
|
||||
@property
|
||||
def name(self) -> str | None:
|
||||
"""The typeface name for this |Font|.
|
||||
|
||||
Causes the text it controls to appear in the named font, if a matching font is
|
||||
found. |None| indicates the typeface is inherited from the style hierarchy.
|
||||
"""
|
||||
rPr = self._element.rPr
|
||||
if rPr is None:
|
||||
return None
|
||||
return rPr.rFonts_ascii
|
||||
|
||||
@name.setter
|
||||
def name(self, value: str | None) -> None:
|
||||
rPr = self._element.get_or_add_rPr()
|
||||
rPr.rFonts_ascii = value
|
||||
rPr.rFonts_hAnsi = value
|
||||
|
||||
@property
|
||||
def no_proof(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, specifies that the contents of this run should not report any
|
||||
errors when the document is scanned for spelling and grammar.
|
||||
"""
|
||||
return self._get_bool_prop("noProof")
|
||||
|
||||
@no_proof.setter
|
||||
def no_proof(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("noProof", value)
|
||||
|
||||
@property
|
||||
def outline(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True| causes the characters in the run to appear as if they have an
|
||||
outline, by drawing a one pixel wide border around the inside and outside
|
||||
borders of each character glyph.
|
||||
"""
|
||||
return self._get_bool_prop("outline")
|
||||
|
||||
@outline.setter
|
||||
def outline(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("outline", value)
|
||||
|
||||
@property
|
||||
def rtl(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True| causes the text in the run to have right-to-left characteristics.
|
||||
"""
|
||||
return self._get_bool_prop("rtl")
|
||||
|
||||
@rtl.setter
|
||||
def rtl(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("rtl", value)
|
||||
|
||||
@property
|
||||
def shadow(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True| causes the text in the run to appear as if each character has a
|
||||
shadow.
|
||||
"""
|
||||
return self._get_bool_prop("shadow")
|
||||
|
||||
@shadow.setter
|
||||
def shadow(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("shadow", value)
|
||||
|
||||
@property
|
||||
def size(self) -> Length | None:
|
||||
"""Font height in English Metric Units (EMU).
|
||||
|
||||
|None| indicates the font size should be inherited from the style hierarchy.
|
||||
|Length| is a subclass of |int| having properties for convenient conversion into
|
||||
points or other length units. The :class:`docx.shared.Pt` class allows
|
||||
convenient specification of point values::
|
||||
|
||||
>>> font.size = Pt(24)
|
||||
>>> font.size
|
||||
304800
|
||||
>>> font.size.pt
|
||||
24.0
|
||||
|
||||
"""
|
||||
rPr = self._element.rPr
|
||||
if rPr is None:
|
||||
return None
|
||||
return rPr.sz_val
|
||||
|
||||
@size.setter
|
||||
def size(self, emu: int | Length | None) -> None:
|
||||
rPr = self._element.get_or_add_rPr()
|
||||
rPr.sz_val = None if emu is None else Emu(emu)
|
||||
|
||||
@property
|
||||
def small_caps(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True| causes the lowercase characters in the run to appear as capital
|
||||
letters two points smaller than the font size specified for the run.
|
||||
"""
|
||||
return self._get_bool_prop("smallCaps")
|
||||
|
||||
@small_caps.setter
|
||||
def small_caps(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("smallCaps", value)
|
||||
|
||||
@property
|
||||
def snap_to_grid(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True| causes the run to use the document grid characters per line settings
|
||||
defined in the docGrid element when laying out the characters in this run.
|
||||
"""
|
||||
return self._get_bool_prop("snapToGrid")
|
||||
|
||||
@snap_to_grid.setter
|
||||
def snap_to_grid(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("snapToGrid", value)
|
||||
|
||||
@property
|
||||
def spec_vanish(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, specifies that the given run shall always behave as if it is
|
||||
hidden, even when hidden text is being displayed in the current document. The
|
||||
property has a very narrow, specialized use related to the table of contents.
|
||||
Consult the spec (§17.3.2.36) for more details.
|
||||
"""
|
||||
return self._get_bool_prop("specVanish")
|
||||
|
||||
@spec_vanish.setter
|
||||
def spec_vanish(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("specVanish", value)
|
||||
|
||||
@property
|
||||
def strike(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True| causes the text in the run to appear with a single horizontal line
|
||||
through the center of the line.
|
||||
"""
|
||||
return self._get_bool_prop("strike")
|
||||
|
||||
@strike.setter
|
||||
def strike(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("strike", value)
|
||||
|
||||
@property
|
||||
def subscript(self) -> bool | None:
|
||||
"""Boolean indicating whether the characters in this |Font| appear as subscript.
|
||||
|
||||
|None| indicates the subscript/subscript value is inherited from the style
|
||||
hierarchy.
|
||||
"""
|
||||
rPr = self._element.rPr
|
||||
if rPr is None:
|
||||
return None
|
||||
return rPr.subscript
|
||||
|
||||
@subscript.setter
|
||||
def subscript(self, value: bool | None) -> None:
|
||||
rPr = self._element.get_or_add_rPr()
|
||||
rPr.subscript = value
|
||||
|
||||
@property
|
||||
def superscript(self) -> bool | None:
|
||||
"""Boolean indicating whether the characters in this |Font| appear as
|
||||
superscript.
|
||||
|
||||
|None| indicates the subscript/superscript value is inherited from the style
|
||||
hierarchy.
|
||||
"""
|
||||
rPr = self._element.rPr
|
||||
if rPr is None:
|
||||
return None
|
||||
return rPr.superscript
|
||||
|
||||
@superscript.setter
|
||||
def superscript(self, value: bool | None) -> None:
|
||||
rPr = self._element.get_or_add_rPr()
|
||||
rPr.superscript = value
|
||||
|
||||
@property
|
||||
def underline(self) -> bool | WD_UNDERLINE | None:
|
||||
"""The underline style for this |Font|.
|
||||
|
||||
The value is one of |None|, |True|, |False|, or a member of :ref:`WdUnderline`.
|
||||
|
||||
|None| indicates the font inherits its underline value from the style hierarchy.
|
||||
|False| indicates no underline. |True| indicates single underline. The values
|
||||
from :ref:`WdUnderline` are used to specify other outline styles such as double,
|
||||
wavy, and dotted.
|
||||
"""
|
||||
rPr = self._element.rPr
|
||||
if rPr is None:
|
||||
return None
|
||||
val = rPr.u_val
|
||||
return (
|
||||
None
|
||||
if val == WD_UNDERLINE.INHERITED
|
||||
else True
|
||||
if val == WD_UNDERLINE.SINGLE
|
||||
else False
|
||||
if val == WD_UNDERLINE.NONE
|
||||
else val
|
||||
)
|
||||
|
||||
@underline.setter
|
||||
def underline(self, value: bool | WD_UNDERLINE | None) -> None:
|
||||
rPr = self._element.get_or_add_rPr()
|
||||
# -- works fine without these two mappings, but only because True == 1 and
|
||||
# -- False == 0, which happen to match the mapping for WD_UNDERLINE.SINGLE
|
||||
# -- and .NONE respectively.
|
||||
val = (
|
||||
WD_UNDERLINE.SINGLE if value is True else WD_UNDERLINE.NONE if value is False else value
|
||||
)
|
||||
rPr.u_val = val
|
||||
|
||||
@property
|
||||
def web_hidden(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, specifies that the contents of this run shall be hidden when the
|
||||
document is displayed in web page view.
|
||||
"""
|
||||
return self._get_bool_prop("webHidden")
|
||||
|
||||
@web_hidden.setter
|
||||
def web_hidden(self, value: bool | None) -> None:
|
||||
self._set_bool_prop("webHidden", value)
|
||||
|
||||
def _get_bool_prop(self, name: str) -> bool | None:
|
||||
"""Return the value of boolean child of `w:rPr` having `name`."""
|
||||
rPr = self._element.rPr
|
||||
if rPr is None:
|
||||
return None
|
||||
return rPr._get_bool_val(name) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
def _set_bool_prop(self, name: str, value: bool | None):
|
||||
"""Assign `value` to the boolean child `name` of `w:rPr`."""
|
||||
rPr = self._element.get_or_add_rPr()
|
||||
rPr._set_bool_val(name, value) # pyright: ignore[reportPrivateUsage]
|
||||
@@ -0,0 +1,121 @@
|
||||
"""Hyperlink-related proxy objects for python-docx, Hyperlink in particular.
|
||||
|
||||
A hyperlink occurs in a paragraph, at the same level as a Run, and a hyperlink itself
|
||||
contains runs, which is where the visible text of the hyperlink is stored. So it's kind
|
||||
of in-between, less than a paragraph and more than a run. So it gets its own module.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.shared import Parented
|
||||
from docx.text.run import Run
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import docx.types as t
|
||||
from docx.oxml.text.hyperlink import CT_Hyperlink
|
||||
|
||||
|
||||
class Hyperlink(Parented):
|
||||
"""Proxy object wrapping a `<w:hyperlink>` element.
|
||||
|
||||
A hyperlink occurs as a child of a paragraph, at the same level as a Run. A
|
||||
hyperlink itself contains runs, which is where the visible text of the hyperlink is
|
||||
stored.
|
||||
"""
|
||||
|
||||
def __init__(self, hyperlink: CT_Hyperlink, parent: t.ProvidesStoryPart):
|
||||
super().__init__(parent)
|
||||
self._parent = parent
|
||||
self._hyperlink = self._element = hyperlink
|
||||
|
||||
@property
|
||||
def address(self) -> str:
|
||||
"""The "URL" of the hyperlink (but not necessarily a web link).
|
||||
|
||||
While commonly a web link like "https://google.com" the hyperlink address can
|
||||
take a variety of forms including "internal links" to bookmarked locations
|
||||
within the document. When this hyperlink is an internal "jump" to for example a
|
||||
heading from the table-of-contents (TOC), the address is blank. The bookmark
|
||||
reference (like "_Toc147925734") is stored in the `.fragment` property.
|
||||
"""
|
||||
rId = self._hyperlink.rId
|
||||
return self._parent.part.rels[rId].target_ref if rId else ""
|
||||
|
||||
@property
|
||||
def contains_page_break(self) -> bool:
|
||||
"""True when the text of this hyperlink is broken across page boundaries.
|
||||
|
||||
This is not uncommon and can happen for example when the hyperlink text is
|
||||
multiple words and occurs in the last line of a page. Theoretically, a hyperlink
|
||||
can contain more than one page break but that would be extremely uncommon in
|
||||
practice. Still, this value should be understood to mean that "one-or-more"
|
||||
rendered page breaks are present.
|
||||
"""
|
||||
return bool(self._hyperlink.lastRenderedPageBreaks)
|
||||
|
||||
@property
|
||||
def fragment(self) -> str:
|
||||
"""Reference like `#glossary` at end of URL that refers to a sub-resource.
|
||||
|
||||
Note that this value does not include the fragment-separator character ("#").
|
||||
|
||||
This value is known as a "named anchor" in an HTML context and "anchor" in the
|
||||
MS API, but an "anchor" element (`<a>`) represents a full hyperlink in HTML so
|
||||
we avoid confusion by using the more precise RFC 3986 naming "URI fragment".
|
||||
|
||||
These are also used to refer to bookmarks within the same document, in which
|
||||
case the `.address` value with be blank ("") and this property will hold a
|
||||
value like "_Toc147925734".
|
||||
|
||||
To reliably get an entire web URL you will need to concatenate this with the
|
||||
`.address` value, separated by "#" when both are present. Consider using the
|
||||
`.url` property for that purpose.
|
||||
|
||||
Word sometimes stores a fragment in this property (an XML attribute) and
|
||||
sometimes with the address, depending on how the URL is inserted, so don't
|
||||
depend on this field being empty to indicate no fragment is present.
|
||||
"""
|
||||
return self._hyperlink.anchor or ""
|
||||
|
||||
@property
|
||||
def runs(self) -> list[Run]:
|
||||
"""List of |Run| instances in this hyperlink.
|
||||
|
||||
Together these define the visible text of the hyperlink. The text of a hyperlink
|
||||
is typically contained in a single run will be broken into multiple runs if for
|
||||
example part of the hyperlink is bold or the text was changed after the document
|
||||
was saved.
|
||||
"""
|
||||
return [Run(r, self._parent) for r in self._hyperlink.r_lst]
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
"""String formed by concatenating the text of each run in the hyperlink.
|
||||
|
||||
Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` characters
|
||||
respectively. Note that rendered page-breaks can occur within a hyperlink but
|
||||
they are not reflected in this text.
|
||||
"""
|
||||
return self._hyperlink.text
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
"""Convenience property to get web URLs from hyperlinks that contain them.
|
||||
|
||||
This value is the empty string ("") when there is no address portion, so its
|
||||
boolean value can also be used to distinguish external URIs from internal "jump"
|
||||
hyperlinks like those found in a table-of-contents.
|
||||
|
||||
Note that this value may also be a link to a file, so if you only want web-urls
|
||||
you'll need to check for a protocol prefix like `https://`.
|
||||
|
||||
When both an address and fragment are present, the return value joins the two
|
||||
separated by the fragment-separator hash ("#"). Otherwise this value is the same
|
||||
as that of the `.address` property.
|
||||
"""
|
||||
address, fragment = self.address, self.fragment
|
||||
if not address:
|
||||
return ""
|
||||
return f"{address}#{fragment}" if fragment else address
|
||||
@@ -0,0 +1,104 @@
|
||||
"""Proxy objects related to rendered page-breaks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
from docx.shared import Parented
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import docx.types as t
|
||||
from docx.text.paragraph import Paragraph
|
||||
|
||||
|
||||
class RenderedPageBreak(Parented):
|
||||
"""A page-break inserted by Word during page-layout for print or display purposes.
|
||||
|
||||
This usually does not correspond to a "hard" page-break inserted by the document
|
||||
author, rather just that Word ran out of room on one page and needed to start
|
||||
another. The position of these can change depending on the printer and page-size, as
|
||||
well as margins, etc. They also will change in response to edits, but not until Word
|
||||
loads and saves the document.
|
||||
|
||||
Note these are never inserted by `python-docx` because it has no rendering function.
|
||||
These are generally only useful for text-extraction of existing documents when
|
||||
`python-docx` is being used solely as a document "reader".
|
||||
|
||||
NOTE: a rendered page-break can occur within a hyperlink; consider a multi-word
|
||||
hyperlink like "excellent Wikipedia article on LLMs" that happens to fall close to
|
||||
the end of the last line on a page such that the page breaks between "Wikipedia" and
|
||||
"article". In such a "page-breaks-in-hyperlink" case, THESE METHODS WILL "MOVE" THE
|
||||
PAGE-BREAK to occur after the hyperlink, such that the entire hyperlink appears in
|
||||
the paragraph returned by `.preceding_paragraph_fragment`. While this places the
|
||||
"tail" text of the hyperlink on the "wrong" page, it avoids having two hyperlinks
|
||||
each with a fragment of the actual text and pointing to the same address.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
lastRenderedPageBreak: CT_LastRenderedPageBreak,
|
||||
parent: t.ProvidesStoryPart,
|
||||
):
|
||||
super().__init__(parent)
|
||||
self._element = lastRenderedPageBreak
|
||||
self._lastRenderedPageBreak = lastRenderedPageBreak
|
||||
|
||||
@property
|
||||
def preceding_paragraph_fragment(self) -> Paragraph | None:
|
||||
"""A "loose" paragraph containing the content preceding this page-break.
|
||||
|
||||
Compare `.following_paragraph_fragment` as these two are intended to be used
|
||||
together.
|
||||
|
||||
This value is `None` when no content precedes this page-break. This case is
|
||||
common and occurs whenever a page breaks on an even paragraph boundary.
|
||||
Returning `None` for this case avoids "inserting" a non-existent paragraph into
|
||||
the content stream. Note that content can include DrawingML items like images or
|
||||
charts.
|
||||
|
||||
Note the returned paragraph *is divorced from the document body*. Any changes
|
||||
made to it will not be reflected in the document. It is intended to provide a
|
||||
familiar container (`Paragraph`) to interrogate for the content preceding this
|
||||
page-break in the paragraph in which it occured.
|
||||
|
||||
Contains the entire hyperlink when this break occurs within a hyperlink.
|
||||
"""
|
||||
if self._lastRenderedPageBreak.precedes_all_content:
|
||||
return None
|
||||
|
||||
from docx.text.paragraph import Paragraph
|
||||
|
||||
return Paragraph(self._lastRenderedPageBreak.preceding_fragment_p, self._parent)
|
||||
|
||||
@property
|
||||
def following_paragraph_fragment(self) -> Paragraph | None:
|
||||
"""A "loose" paragraph containing the content following this page-break.
|
||||
|
||||
HAS POTENTIALLY SURPRISING BEHAVIORS so read carefully to be sure this is what
|
||||
you want. This is primarily targeted toward text-extraction use-cases for which
|
||||
precisely associating text with the page it occurs on is important.
|
||||
|
||||
Compare `.preceding_paragraph_fragment` as these two are intended to be used
|
||||
together.
|
||||
|
||||
This value is `None` when no content follows this page-break. This case is
|
||||
unlikely to occur in practice because Word places even-paragraph-boundary
|
||||
page-breaks on the paragraph *following* the page-break. Still, it is possible
|
||||
and must be checked for. Returning `None` for this case avoids "inserting" an
|
||||
extra, non-existent paragraph into the content stream. Note that content can
|
||||
include DrawingML items like images or charts, not just text.
|
||||
|
||||
The returned paragraph *is divorced from the document body*. Any changes made to
|
||||
it will not be reflected in the document. It is intended to provide a container
|
||||
(`Paragraph`) with familiar properties and methods that can be used to
|
||||
characterize the paragraph content following a mid-paragraph page-break.
|
||||
|
||||
Contains no portion of the hyperlink when this break occurs within a hyperlink.
|
||||
"""
|
||||
if self._lastRenderedPageBreak.follows_all_content:
|
||||
return None
|
||||
|
||||
from docx.text.paragraph import Paragraph
|
||||
|
||||
return Paragraph(self._lastRenderedPageBreak.following_fragment_p, self._parent)
|
||||
@@ -0,0 +1,173 @@
|
||||
"""Paragraph-related proxy types."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterator, List, cast
|
||||
|
||||
from docx.enum.style import WD_STYLE_TYPE
|
||||
from docx.oxml.text.run import CT_R
|
||||
from docx.shared import StoryChild
|
||||
from docx.styles.style import ParagraphStyle
|
||||
from docx.text.hyperlink import Hyperlink
|
||||
from docx.text.pagebreak import RenderedPageBreak
|
||||
from docx.text.parfmt import ParagraphFormat
|
||||
from docx.text.run import Run
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import docx.types as t
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
from docx.styles.style import CharacterStyle
|
||||
|
||||
|
||||
class Paragraph(StoryChild):
|
||||
"""Proxy object wrapping a `<w:p>` element."""
|
||||
|
||||
def __init__(self, p: CT_P, parent: t.ProvidesStoryPart):
|
||||
super(Paragraph, self).__init__(parent)
|
||||
self._p = self._element = p
|
||||
|
||||
def add_run(self, text: str | None = None, style: str | CharacterStyle | None = None) -> Run:
|
||||
"""Append run containing `text` and having character-style `style`.
|
||||
|
||||
`text` can contain tab (``\\t``) characters, which are converted to the
|
||||
appropriate XML form for a tab. `text` can also include newline (``\\n``) or
|
||||
carriage return (``\\r``) characters, each of which is converted to a line
|
||||
break. When `text` is `None`, the new run is empty.
|
||||
"""
|
||||
r = self._p.add_r()
|
||||
run = Run(r, self)
|
||||
if text:
|
||||
run.text = text
|
||||
if style:
|
||||
run.style = style
|
||||
return run
|
||||
|
||||
@property
|
||||
def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None:
|
||||
"""A member of the :ref:`WdParagraphAlignment` enumeration specifying the
|
||||
justification setting for this paragraph.
|
||||
|
||||
A value of |None| indicates the paragraph has no directly-applied alignment
|
||||
value and will inherit its alignment value from its style hierarchy. Assigning
|
||||
|None| to this property removes any directly-applied alignment value.
|
||||
"""
|
||||
return self._p.alignment
|
||||
|
||||
@alignment.setter
|
||||
def alignment(self, value: WD_PARAGRAPH_ALIGNMENT):
|
||||
self._p.alignment = value
|
||||
|
||||
def clear(self):
|
||||
"""Return this same paragraph after removing all its content.
|
||||
|
||||
Paragraph-level formatting, such as style, is preserved.
|
||||
"""
|
||||
self._p.clear_content()
|
||||
return self
|
||||
|
||||
@property
|
||||
def contains_page_break(self) -> bool:
|
||||
"""`True` when one or more rendered page-breaks occur in this paragraph."""
|
||||
return bool(self._p.lastRenderedPageBreaks)
|
||||
|
||||
@property
|
||||
def hyperlinks(self) -> List[Hyperlink]:
|
||||
"""A |Hyperlink| instance for each hyperlink in this paragraph."""
|
||||
return [Hyperlink(hyperlink, self) for hyperlink in self._p.hyperlink_lst]
|
||||
|
||||
def insert_paragraph_before(
|
||||
self, text: str | None = None, style: str | ParagraphStyle | None = None
|
||||
) -> Paragraph:
|
||||
"""Return a newly created paragraph, inserted directly before this paragraph.
|
||||
|
||||
If `text` is supplied, the new paragraph contains that text in a single run. If
|
||||
`style` is provided, that style is assigned to the new paragraph.
|
||||
"""
|
||||
paragraph = self._insert_paragraph_before()
|
||||
if text:
|
||||
paragraph.add_run(text)
|
||||
if style is not None:
|
||||
paragraph.style = style
|
||||
return paragraph
|
||||
|
||||
def iter_inner_content(self) -> Iterator[Run | Hyperlink]:
|
||||
"""Generate the runs and hyperlinks in this paragraph, in the order they appear.
|
||||
|
||||
The content in a paragraph consists of both runs and hyperlinks. This method
|
||||
allows accessing each of those separately, in document order, for when the
|
||||
precise position of the hyperlink within the paragraph text is important. Note
|
||||
that a hyperlink itself contains runs.
|
||||
"""
|
||||
for r_or_hlink in self._p.inner_content_elements:
|
||||
yield (
|
||||
Run(r_or_hlink, self)
|
||||
if isinstance(r_or_hlink, CT_R)
|
||||
else Hyperlink(r_or_hlink, self)
|
||||
)
|
||||
|
||||
@property
|
||||
def paragraph_format(self):
|
||||
"""The |ParagraphFormat| object providing access to the formatting properties
|
||||
for this paragraph, such as line spacing and indentation."""
|
||||
return ParagraphFormat(self._element)
|
||||
|
||||
@property
|
||||
def rendered_page_breaks(self) -> List[RenderedPageBreak]:
|
||||
"""All rendered page-breaks in this paragraph.
|
||||
|
||||
Most often an empty list, sometimes contains one page-break, but can contain
|
||||
more than one is rare or contrived cases.
|
||||
"""
|
||||
return [RenderedPageBreak(lrpb, self) for lrpb in self._p.lastRenderedPageBreaks]
|
||||
|
||||
@property
|
||||
def runs(self) -> List[Run]:
|
||||
"""Sequence of |Run| instances corresponding to the <w:r> elements in this
|
||||
paragraph."""
|
||||
return [Run(r, self) for r in self._p.r_lst]
|
||||
|
||||
@property
|
||||
def style(self) -> ParagraphStyle | None:
|
||||
"""Read/Write.
|
||||
|
||||
|_ParagraphStyle| object representing the style assigned to this paragraph. If
|
||||
no explicit style is assigned to this paragraph, its value is the default
|
||||
paragraph style for the document. A paragraph style name can be assigned in lieu
|
||||
of a paragraph style object. Assigning |None| removes any applied style, making
|
||||
its effective value the default paragraph style for the document.
|
||||
"""
|
||||
style_id = self._p.style
|
||||
style = self.part.get_style(style_id, WD_STYLE_TYPE.PARAGRAPH)
|
||||
return cast(ParagraphStyle, style)
|
||||
|
||||
@style.setter
|
||||
def style(self, style_or_name: str | ParagraphStyle | None):
|
||||
style_id = self.part.get_style_id(style_or_name, WD_STYLE_TYPE.PARAGRAPH)
|
||||
self._p.style = style_id
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
"""The textual content of this paragraph.
|
||||
|
||||
The text includes the visible-text portion of any hyperlinks in the paragraph.
|
||||
Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` characters
|
||||
respectively.
|
||||
|
||||
Assigning text to this property causes all existing paragraph content to be
|
||||
replaced with a single run containing the assigned text. A ``\\t`` character in
|
||||
the text is mapped to a ``<w:tab/>`` element and each ``\\n`` or ``\\r``
|
||||
character is mapped to a line break. Paragraph-level formatting, such as style,
|
||||
is preserved. All run-level formatting, such as bold or italic, is removed.
|
||||
"""
|
||||
return self._p.text
|
||||
|
||||
@text.setter
|
||||
def text(self, text: str | None):
|
||||
self.clear()
|
||||
self.add_run(text)
|
||||
|
||||
def _insert_paragraph_before(self):
|
||||
"""Return a newly created paragraph, inserted directly before this paragraph."""
|
||||
p = self._p.add_p_before()
|
||||
return Paragraph(p, self._parent)
|
||||
@@ -0,0 +1,286 @@
|
||||
"""Paragraph-related proxy types."""
|
||||
|
||||
from docx.enum.text import WD_LINE_SPACING
|
||||
from docx.shared import ElementProxy, Emu, Length, Pt, Twips, lazyproperty
|
||||
from docx.text.tabstops import TabStops
|
||||
|
||||
|
||||
class ParagraphFormat(ElementProxy):
|
||||
"""Provides access to paragraph formatting such as justification, indentation, line
|
||||
spacing, space before and after, and widow/orphan control."""
|
||||
|
||||
@property
|
||||
def alignment(self):
|
||||
"""A member of the :ref:`WdParagraphAlignment` enumeration specifying the
|
||||
justification setting for this paragraph.
|
||||
|
||||
A value of |None| indicates paragraph alignment is inherited from the style
|
||||
hierarchy.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.jc_val
|
||||
|
||||
@alignment.setter
|
||||
def alignment(self, value):
|
||||
pPr = self._element.get_or_add_pPr()
|
||||
pPr.jc_val = value
|
||||
|
||||
@property
|
||||
def first_line_indent(self):
|
||||
"""|Length| value specifying the relative difference in indentation for the
|
||||
first line of the paragraph.
|
||||
|
||||
A positive value causes the first line to be indented. A negative value produces
|
||||
a hanging indent. |None| indicates first line indentation is inherited from the
|
||||
style hierarchy.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.first_line_indent
|
||||
|
||||
@first_line_indent.setter
|
||||
def first_line_indent(self, value):
|
||||
pPr = self._element.get_or_add_pPr()
|
||||
pPr.first_line_indent = value
|
||||
|
||||
@property
|
||||
def keep_together(self):
|
||||
"""|True| if the paragraph should be kept "in one piece" and not broken across a
|
||||
page boundary when the document is rendered.
|
||||
|
||||
|None| indicates its effective value is inherited from the style hierarchy.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.keepLines_val
|
||||
|
||||
@keep_together.setter
|
||||
def keep_together(self, value):
|
||||
self._element.get_or_add_pPr().keepLines_val = value
|
||||
|
||||
@property
|
||||
def keep_with_next(self):
|
||||
"""|True| if the paragraph should be kept on the same page as the subsequent
|
||||
paragraph when the document is rendered.
|
||||
|
||||
For example, this property could be used to keep a section heading on the same
|
||||
page as its first paragraph. |None| indicates its effective value is inherited
|
||||
from the style hierarchy.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.keepNext_val
|
||||
|
||||
@keep_with_next.setter
|
||||
def keep_with_next(self, value):
|
||||
self._element.get_or_add_pPr().keepNext_val = value
|
||||
|
||||
@property
|
||||
def left_indent(self):
|
||||
"""|Length| value specifying the space between the left margin and the left side
|
||||
of the paragraph.
|
||||
|
||||
|None| indicates the left indent value is inherited from the style hierarchy.
|
||||
Use an |Inches| value object as a convenient way to apply indentation in units
|
||||
of inches.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.ind_left
|
||||
|
||||
@left_indent.setter
|
||||
def left_indent(self, value):
|
||||
pPr = self._element.get_or_add_pPr()
|
||||
pPr.ind_left = value
|
||||
|
||||
@property
|
||||
def line_spacing(self):
|
||||
"""|float| or |Length| value specifying the space between baselines in
|
||||
successive lines of the paragraph.
|
||||
|
||||
A value of |None| indicates line spacing is inherited from the style hierarchy.
|
||||
A float value, e.g. ``2.0`` or ``1.75``, indicates spacing is applied in
|
||||
multiples of line heights. A |Length| value such as ``Pt(12)`` indicates spacing
|
||||
is a fixed height. The |Pt| value class is a convenient way to apply line
|
||||
spacing in units of points. Assigning |None| resets line spacing to inherit from
|
||||
the style hierarchy.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return self._line_spacing(pPr.spacing_line, pPr.spacing_lineRule)
|
||||
|
||||
@line_spacing.setter
|
||||
def line_spacing(self, value):
|
||||
pPr = self._element.get_or_add_pPr()
|
||||
if value is None:
|
||||
pPr.spacing_line = None
|
||||
pPr.spacing_lineRule = None
|
||||
elif isinstance(value, Length):
|
||||
pPr.spacing_line = value
|
||||
if pPr.spacing_lineRule != WD_LINE_SPACING.AT_LEAST:
|
||||
pPr.spacing_lineRule = WD_LINE_SPACING.EXACTLY
|
||||
else:
|
||||
pPr.spacing_line = Emu(value * Twips(240))
|
||||
pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE
|
||||
|
||||
@property
|
||||
def line_spacing_rule(self):
|
||||
"""A member of the :ref:`WdLineSpacing` enumeration indicating how the value of
|
||||
:attr:`line_spacing` should be interpreted.
|
||||
|
||||
Assigning any of the :ref:`WdLineSpacing` members :attr:`SINGLE`,
|
||||
:attr:`DOUBLE`, or :attr:`ONE_POINT_FIVE` will cause the value of
|
||||
:attr:`line_spacing` to be updated to produce the corresponding line spacing.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return self._line_spacing_rule(pPr.spacing_line, pPr.spacing_lineRule)
|
||||
|
||||
@line_spacing_rule.setter
|
||||
def line_spacing_rule(self, value):
|
||||
pPr = self._element.get_or_add_pPr()
|
||||
if value == WD_LINE_SPACING.SINGLE:
|
||||
pPr.spacing_line = Twips(240)
|
||||
pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE
|
||||
elif value == WD_LINE_SPACING.ONE_POINT_FIVE:
|
||||
pPr.spacing_line = Twips(360)
|
||||
pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE
|
||||
elif value == WD_LINE_SPACING.DOUBLE:
|
||||
pPr.spacing_line = Twips(480)
|
||||
pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE
|
||||
else:
|
||||
pPr.spacing_lineRule = value
|
||||
|
||||
@property
|
||||
def page_break_before(self):
|
||||
"""|True| if the paragraph should appear at the top of the page following the
|
||||
prior paragraph.
|
||||
|
||||
|None| indicates its effective value is inherited from the style hierarchy.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.pageBreakBefore_val
|
||||
|
||||
@page_break_before.setter
|
||||
def page_break_before(self, value):
|
||||
self._element.get_or_add_pPr().pageBreakBefore_val = value
|
||||
|
||||
@property
|
||||
def right_indent(self):
|
||||
"""|Length| value specifying the space between the right margin and the right
|
||||
side of the paragraph.
|
||||
|
||||
|None| indicates the right indent value is inherited from the style hierarchy.
|
||||
Use a |Cm| value object as a convenient way to apply indentation in units of
|
||||
centimeters.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.ind_right
|
||||
|
||||
@right_indent.setter
|
||||
def right_indent(self, value):
|
||||
pPr = self._element.get_or_add_pPr()
|
||||
pPr.ind_right = value
|
||||
|
||||
@property
|
||||
def space_after(self):
|
||||
"""|Length| value specifying the spacing to appear between this paragraph and
|
||||
the subsequent paragraph.
|
||||
|
||||
|None| indicates this value is inherited from the style hierarchy. |Length|
|
||||
objects provide convenience properties, such as :attr:`~.Length.pt` and
|
||||
:attr:`~.Length.inches`, that allow easy conversion to various length units.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.spacing_after
|
||||
|
||||
@space_after.setter
|
||||
def space_after(self, value):
|
||||
self._element.get_or_add_pPr().spacing_after = value
|
||||
|
||||
@property
|
||||
def space_before(self):
|
||||
"""|Length| value specifying the spacing to appear between this paragraph and
|
||||
the prior paragraph.
|
||||
|
||||
|None| indicates this value is inherited from the style hierarchy. |Length|
|
||||
objects provide convenience properties, such as :attr:`~.Length.pt` and
|
||||
:attr:`~.Length.cm`, that allow easy conversion to various length units.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.spacing_before
|
||||
|
||||
@space_before.setter
|
||||
def space_before(self, value):
|
||||
self._element.get_or_add_pPr().spacing_before = value
|
||||
|
||||
@lazyproperty
|
||||
def tab_stops(self):
|
||||
"""|TabStops| object providing access to the tab stops defined for this
|
||||
paragraph format."""
|
||||
pPr = self._element.get_or_add_pPr()
|
||||
return TabStops(pPr)
|
||||
|
||||
@property
|
||||
def widow_control(self):
|
||||
"""|True| if the first and last lines in the paragraph remain on the same page
|
||||
as the rest of the paragraph when Word repaginates the document.
|
||||
|
||||
|None| indicates its effective value is inherited from the style hierarchy.
|
||||
"""
|
||||
pPr = self._element.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.widowControl_val
|
||||
|
||||
@widow_control.setter
|
||||
def widow_control(self, value):
|
||||
self._element.get_or_add_pPr().widowControl_val = value
|
||||
|
||||
@staticmethod
|
||||
def _line_spacing(spacing_line, spacing_lineRule):
|
||||
"""Return the line spacing value calculated from the combination of
|
||||
`spacing_line` and `spacing_lineRule`.
|
||||
|
||||
Returns a |float| number of lines when `spacing_lineRule` is
|
||||
``WD_LINE_SPACING.MULTIPLE``, otherwise a |Length| object of absolute line
|
||||
height is returned. Returns |None| when `spacing_line` is |None|.
|
||||
"""
|
||||
if spacing_line is None:
|
||||
return None
|
||||
if spacing_lineRule == WD_LINE_SPACING.MULTIPLE:
|
||||
return spacing_line / Pt(12)
|
||||
return spacing_line
|
||||
|
||||
@staticmethod
|
||||
def _line_spacing_rule(line, lineRule):
|
||||
"""Return the line spacing rule value calculated from the combination of `line`
|
||||
and `lineRule`.
|
||||
|
||||
Returns special members of the :ref:`WdLineSpacing` enumeration when line
|
||||
spacing is single, double, or 1.5 lines.
|
||||
"""
|
||||
if lineRule == WD_LINE_SPACING.MULTIPLE:
|
||||
if line == Twips(240):
|
||||
return WD_LINE_SPACING.SINGLE
|
||||
if line == Twips(360):
|
||||
return WD_LINE_SPACING.ONE_POINT_FIVE
|
||||
if line == Twips(480):
|
||||
return WD_LINE_SPACING.DOUBLE
|
||||
return lineRule
|
||||
@@ -0,0 +1,257 @@
|
||||
"""Run-related proxy objects for python-docx, Run in particular."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, TYPE_CHECKING, Iterator, cast
|
||||
|
||||
from docx.drawing import Drawing
|
||||
from docx.enum.style import WD_STYLE_TYPE
|
||||
from docx.enum.text import WD_BREAK
|
||||
from docx.oxml.drawing import CT_Drawing
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
from docx.shape import InlineShape
|
||||
from docx.shared import StoryChild
|
||||
from docx.styles.style import CharacterStyle
|
||||
from docx.text.font import Font
|
||||
from docx.text.pagebreak import RenderedPageBreak
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import docx.types as t
|
||||
from docx.enum.text import WD_UNDERLINE
|
||||
from docx.oxml.text.run import CT_R, CT_Text
|
||||
from docx.shared import Length
|
||||
|
||||
|
||||
class Run(StoryChild):
|
||||
"""Proxy object wrapping `<w:r>` element.
|
||||
|
||||
Several of the properties on Run take a tri-state value, |True|, |False|, or |None|.
|
||||
|True| and |False| correspond to on and off respectively. |None| indicates the
|
||||
property is not specified directly on the run and its effective value is taken from
|
||||
the style hierarchy.
|
||||
"""
|
||||
|
||||
def __init__(self, r: CT_R, parent: t.ProvidesStoryPart):
|
||||
super().__init__(parent)
|
||||
self._r = self._element = self.element = r
|
||||
|
||||
def add_break(self, break_type: WD_BREAK = WD_BREAK.LINE):
|
||||
"""Add a break element of `break_type` to this run.
|
||||
|
||||
`break_type` can take the values `WD_BREAK.LINE`, `WD_BREAK.PAGE`, and
|
||||
`WD_BREAK.COLUMN` where `WD_BREAK` is imported from `docx.enum.text`.
|
||||
`break_type` defaults to `WD_BREAK.LINE`.
|
||||
"""
|
||||
type_, clear = {
|
||||
WD_BREAK.LINE: (None, None),
|
||||
WD_BREAK.PAGE: ("page", None),
|
||||
WD_BREAK.COLUMN: ("column", None),
|
||||
WD_BREAK.LINE_CLEAR_LEFT: ("textWrapping", "left"),
|
||||
WD_BREAK.LINE_CLEAR_RIGHT: ("textWrapping", "right"),
|
||||
WD_BREAK.LINE_CLEAR_ALL: ("textWrapping", "all"),
|
||||
}[break_type]
|
||||
br = self._r.add_br()
|
||||
if type_ is not None:
|
||||
br.type = type_
|
||||
if clear is not None:
|
||||
br.clear = clear
|
||||
|
||||
def add_picture(
|
||||
self,
|
||||
image_path_or_stream: str | IO[bytes],
|
||||
width: int | Length | None = None,
|
||||
height: int | Length | None = None,
|
||||
) -> InlineShape:
|
||||
"""Return |InlineShape| containing image identified by `image_path_or_stream`.
|
||||
|
||||
The picture is added to the end of this run.
|
||||
|
||||
`image_path_or_stream` can be a path (a string) or a file-like object containing
|
||||
a binary image.
|
||||
|
||||
If neither width nor height is specified, the picture appears at
|
||||
its native size. If only one is specified, it is used to compute a scaling
|
||||
factor that is then applied to the unspecified dimension, preserving the aspect
|
||||
ratio of the image. The native size of the picture is calculated using the dots-
|
||||
per-inch (dpi) value specified in the image file, defaulting to 72 dpi if no
|
||||
value is specified, as is often the case.
|
||||
"""
|
||||
inline = self.part.new_pic_inline(image_path_or_stream, width, height)
|
||||
self._r.add_drawing(inline)
|
||||
return InlineShape(inline)
|
||||
|
||||
def add_tab(self) -> None:
|
||||
"""Add a ``<w:tab/>`` element at the end of the run, which Word interprets as a
|
||||
tab character."""
|
||||
self._r.add_tab()
|
||||
|
||||
def add_text(self, text: str):
|
||||
"""Returns a newly appended |_Text| object (corresponding to a new ``<w:t>``
|
||||
child element) to the run, containing `text`.
|
||||
|
||||
Compare with the possibly more friendly approach of assigning text to the
|
||||
:attr:`Run.text` property.
|
||||
"""
|
||||
t = self._r.add_t(text)
|
||||
return _Text(t)
|
||||
|
||||
@property
|
||||
def bold(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, causes the text of the run to appear in bold face. When |False|,
|
||||
the text unconditionally appears non-bold. When |None| the bold setting for this
|
||||
run is inherited from the style hierarchy.
|
||||
"""
|
||||
return self.font.bold
|
||||
|
||||
@bold.setter
|
||||
def bold(self, value: bool | None):
|
||||
self.font.bold = value
|
||||
|
||||
def clear(self):
|
||||
"""Return reference to this run after removing all its content.
|
||||
|
||||
All run formatting is preserved.
|
||||
"""
|
||||
self._r.clear_content()
|
||||
return self
|
||||
|
||||
@property
|
||||
def contains_page_break(self) -> bool:
|
||||
"""`True` when one or more rendered page-breaks occur in this run.
|
||||
|
||||
Note that "hard" page-breaks inserted by the author are not included. A hard
|
||||
page-break gives rise to a rendered page-break in the right position so if those
|
||||
were included that page-break would be "double-counted".
|
||||
|
||||
It would be very rare for multiple rendered page-breaks to occur in a single
|
||||
run, but it is possible.
|
||||
"""
|
||||
return bool(self._r.lastRenderedPageBreaks)
|
||||
|
||||
@property
|
||||
def font(self) -> Font:
|
||||
"""The |Font| object providing access to the character formatting properties for
|
||||
this run, such as font name and size."""
|
||||
return Font(self._element)
|
||||
|
||||
@property
|
||||
def italic(self) -> bool | None:
|
||||
"""Read/write tri-state value.
|
||||
|
||||
When |True|, causes the text of the run to appear in italics. When |False|, the
|
||||
text unconditionally appears non-italic. When |None| the italic setting for this
|
||||
run is inherited from the style hierarchy.
|
||||
"""
|
||||
return self.font.italic
|
||||
|
||||
@italic.setter
|
||||
def italic(self, value: bool | None):
|
||||
self.font.italic = value
|
||||
|
||||
def iter_inner_content(self) -> Iterator[str | Drawing | RenderedPageBreak]:
|
||||
"""Generate the content-items in this run in the order they appear.
|
||||
|
||||
NOTE: only content-types currently supported by `python-docx` are generated. In
|
||||
this version, that is text and rendered page-breaks. Drawing is included but
|
||||
currently only provides access to its XML element (CT_Drawing) on its
|
||||
`._drawing` attribute. `Drawing` attributes and methods may be expanded in
|
||||
future releases.
|
||||
|
||||
There are a number of element-types that can appear inside a run, but most of
|
||||
those (w:br, w:cr, w:noBreakHyphen, w:t, w:tab) have a clear plain-text
|
||||
equivalent. Any contiguous range of such elements is generated as a single
|
||||
`str`. Rendered page-break and drawing elements are generated individually. Any
|
||||
other elements are ignored.
|
||||
"""
|
||||
for item in self._r.inner_content_items:
|
||||
if isinstance(item, str):
|
||||
yield item
|
||||
elif isinstance(item, CT_LastRenderedPageBreak):
|
||||
yield RenderedPageBreak(item, self)
|
||||
elif isinstance(item, CT_Drawing): # pyright: ignore[reportUnnecessaryIsInstance]
|
||||
yield Drawing(item, self)
|
||||
|
||||
def mark_comment_range(self, last_run: Run, comment_id: int) -> None:
|
||||
"""Mark the range of runs from this run to `last_run` (inclusive) as belonging to a comment.
|
||||
|
||||
`comment_id` identfies the comment that references this range.
|
||||
"""
|
||||
# -- insert `w:commentRangeStart` with `comment_id` before this (first) run --
|
||||
self._r.insert_comment_range_start_above(comment_id)
|
||||
|
||||
# -- insert `w:commentRangeEnd` and `w:commentReference` run with `comment_id` after
|
||||
# -- `last_run`
|
||||
last_run._r.insert_comment_range_end_and_reference_below(comment_id)
|
||||
|
||||
@property
|
||||
def style(self) -> CharacterStyle:
|
||||
"""Read/write.
|
||||
|
||||
A |CharacterStyle| object representing the character style applied to this run.
|
||||
The default character style for the document (often `Default Character Font`) is
|
||||
returned if the run has no directly-applied character style. Setting this
|
||||
property to |None| removes any directly-applied character style.
|
||||
"""
|
||||
style_id = self._r.style
|
||||
return cast(CharacterStyle, self.part.get_style(style_id, WD_STYLE_TYPE.CHARACTER))
|
||||
|
||||
@style.setter
|
||||
def style(self, style_or_name: str | CharacterStyle | None):
|
||||
style_id = self.part.get_style_id(style_or_name, WD_STYLE_TYPE.CHARACTER)
|
||||
self._r.style = style_id
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
"""String formed by concatenating the text equivalent of each run.
|
||||
|
||||
Each `<w:t>` element adds the text characters it contains. A `<w:tab/>` element
|
||||
adds a `\\t` character. A `<w:cr/>` or `<w:br>` element each add a `\\n`
|
||||
character. Note that a `<w:br>` element can indicate a page break or column
|
||||
break as well as a line break. Only line-break `<w:br>` elements translate to
|
||||
a `\\n` character. Others are ignored. All other content child elements, such as
|
||||
`<w:drawing>`, are ignored.
|
||||
|
||||
Assigning text to this property has the reverse effect, translating each `\\t`
|
||||
character to a `<w:tab/>` element and each `\\n` or `\\r` character to a
|
||||
`<w:cr/>` element. Any existing run content is replaced. Run formatting is
|
||||
preserved.
|
||||
"""
|
||||
return self._r.text
|
||||
|
||||
@text.setter
|
||||
def text(self, text: str):
|
||||
self._r.text = text
|
||||
|
||||
@property
|
||||
def underline(self) -> bool | WD_UNDERLINE | None:
|
||||
"""The underline style for this |Run|.
|
||||
|
||||
Value is one of |None|, |True|, |False|, or a member of :ref:`WdUnderline`.
|
||||
|
||||
A value of |None| indicates the run has no directly-applied underline value and
|
||||
so will inherit the underline value of its containing paragraph. Assigning
|
||||
|None| to this property removes any directly-applied underline value.
|
||||
|
||||
A value of |False| indicates a directly-applied setting of no underline,
|
||||
overriding any inherited value.
|
||||
|
||||
A value of |True| indicates single underline.
|
||||
|
||||
The values from :ref:`WdUnderline` are used to specify other outline styles such
|
||||
as double, wavy, and dotted.
|
||||
"""
|
||||
return self.font.underline
|
||||
|
||||
@underline.setter
|
||||
def underline(self, value: bool | WD_UNDERLINE | None):
|
||||
self.font.underline = value
|
||||
|
||||
|
||||
class _Text:
|
||||
"""Proxy object wrapping `<w:t>` element."""
|
||||
|
||||
def __init__(self, t_elm: CT_Text):
|
||||
super(_Text, self).__init__()
|
||||
self._t = t_elm
|
||||
@@ -0,0 +1,123 @@
|
||||
"""Tabstop-related proxy types."""
|
||||
|
||||
from docx.enum.text import WD_TAB_ALIGNMENT, WD_TAB_LEADER
|
||||
from docx.shared import ElementProxy
|
||||
|
||||
|
||||
class TabStops(ElementProxy):
|
||||
"""A sequence of |TabStop| objects providing access to the tab stops of a paragraph
|
||||
or paragraph style.
|
||||
|
||||
Supports iteration, indexed access, del, and len(). It is accesed using the
|
||||
:attr:`~.ParagraphFormat.tab_stops` property of ParagraphFormat; it is not intended
|
||||
to be constructed directly.
|
||||
"""
|
||||
|
||||
def __init__(self, element):
|
||||
super(TabStops, self).__init__(element, None)
|
||||
self._pPr = element
|
||||
|
||||
def __delitem__(self, idx):
|
||||
"""Remove the tab at offset `idx` in this sequence."""
|
||||
tabs = self._pPr.tabs
|
||||
try:
|
||||
tabs.remove(tabs[idx])
|
||||
except (AttributeError, IndexError):
|
||||
raise IndexError("tab index out of range")
|
||||
|
||||
if len(tabs) == 0:
|
||||
self._pPr.remove(tabs)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""Enables list-style access by index."""
|
||||
tabs = self._pPr.tabs
|
||||
if tabs is None:
|
||||
raise IndexError("TabStops object is empty")
|
||||
tab = tabs.tab_lst[idx]
|
||||
return TabStop(tab)
|
||||
|
||||
def __iter__(self):
|
||||
"""Generate a TabStop object for each of the w:tab elements, in XML document
|
||||
order."""
|
||||
tabs = self._pPr.tabs
|
||||
if tabs is not None:
|
||||
for tab in tabs.tab_lst:
|
||||
yield TabStop(tab)
|
||||
|
||||
def __len__(self):
|
||||
tabs = self._pPr.tabs
|
||||
if tabs is None:
|
||||
return 0
|
||||
return len(tabs.tab_lst)
|
||||
|
||||
def add_tab_stop(self, position, alignment=WD_TAB_ALIGNMENT.LEFT, leader=WD_TAB_LEADER.SPACES):
|
||||
"""Add a new tab stop at `position`, a |Length| object specifying the location
|
||||
of the tab stop relative to the paragraph edge.
|
||||
|
||||
A negative `position` value is valid and appears in hanging indentation. Tab
|
||||
alignment defaults to left, but may be specified by passing a member of the
|
||||
:ref:`WdTabAlignment` enumeration as `alignment`. An optional leader character
|
||||
can be specified by passing a member of the :ref:`WdTabLeader` enumeration as
|
||||
`leader`.
|
||||
"""
|
||||
tabs = self._pPr.get_or_add_tabs()
|
||||
tab = tabs.insert_tab_in_order(position, alignment, leader)
|
||||
return TabStop(tab)
|
||||
|
||||
def clear_all(self):
|
||||
"""Remove all custom tab stops."""
|
||||
self._pPr._remove_tabs()
|
||||
|
||||
|
||||
class TabStop(ElementProxy):
|
||||
"""An individual tab stop applying to a paragraph or style.
|
||||
|
||||
Accessed using list semantics on its containing |TabStops| object.
|
||||
"""
|
||||
|
||||
def __init__(self, element):
|
||||
super(TabStop, self).__init__(element, None)
|
||||
self._tab = element
|
||||
|
||||
@property
|
||||
def alignment(self):
|
||||
"""A member of :ref:`WdTabAlignment` specifying the alignment setting for this
|
||||
tab stop.
|
||||
|
||||
Read/write.
|
||||
"""
|
||||
return self._tab.val
|
||||
|
||||
@alignment.setter
|
||||
def alignment(self, value):
|
||||
self._tab.val = value
|
||||
|
||||
@property
|
||||
def leader(self):
|
||||
"""A member of :ref:`WdTabLeader` specifying a repeating character used as a
|
||||
"leader", filling in the space spanned by this tab.
|
||||
|
||||
Assigning |None| produces the same result as assigning `WD_TAB_LEADER.SPACES`.
|
||||
Read/write.
|
||||
"""
|
||||
return self._tab.leader
|
||||
|
||||
@leader.setter
|
||||
def leader(self, value):
|
||||
self._tab.leader = value
|
||||
|
||||
@property
|
||||
def position(self):
|
||||
"""A |Length| object representing the distance of this tab stop from the inside
|
||||
edge of the paragraph.
|
||||
|
||||
May be positive or negative. Read/write.
|
||||
"""
|
||||
return self._tab.pos
|
||||
|
||||
@position.setter
|
||||
def position(self, value):
|
||||
tab = self._tab
|
||||
tabs = tab.getparent()
|
||||
self._tab = tabs.insert_tab_in_order(value, tab.val, tab.leader)
|
||||
tabs.remove(tab)
|
||||
Reference in New Issue
Block a user