refactor: excel parse
This commit is contained in:
@@ -0,0 +1,51 @@
|
||||
"""Contains comments added to the document."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
from typing_extensions import Self
|
||||
|
||||
from docx.comments import Comments
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
from docx.opc.packuri import PackURI
|
||||
from docx.oxml.comments import CT_Comments
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.package import Package
|
||||
from docx.parts.story import StoryPart
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.comments import CT_Comments
|
||||
from docx.package import Package
|
||||
|
||||
|
||||
class CommentsPart(StoryPart):
|
||||
"""Container part for comments added to the document."""
|
||||
|
||||
def __init__(
|
||||
self, partname: PackURI, content_type: str, element: CT_Comments, package: Package
|
||||
):
|
||||
super().__init__(partname, content_type, element, package)
|
||||
self._comments = element
|
||||
|
||||
@property
|
||||
def comments(self) -> Comments:
|
||||
"""A |Comments| proxy object for the `w:comments` root element of this part."""
|
||||
return Comments(self._comments, self)
|
||||
|
||||
@classmethod
|
||||
def default(cls, package: Package) -> Self:
|
||||
"""A newly created comments part, containing a default empty `w:comments` element."""
|
||||
partname = PackURI("/word/comments.xml")
|
||||
content_type = CT.WML_COMMENTS
|
||||
element = cast("CT_Comments", parse_xml(cls._default_comments_xml()))
|
||||
return cls(partname, content_type, element, package)
|
||||
|
||||
@classmethod
|
||||
def _default_comments_xml(cls) -> bytes:
|
||||
"""A byte-string containing XML for a default comments part."""
|
||||
path = os.path.join(os.path.split(__file__)[0], "..", "templates", "default-comments.xml")
|
||||
with open(path, "rb") as f:
|
||||
xml_bytes = f.read()
|
||||
return xml_bytes
|
||||
@@ -0,0 +1,169 @@
|
||||
"""|DocumentPart| and closely related objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, TYPE_CHECKING, cast
|
||||
|
||||
from docx.document import Document
|
||||
from docx.opc.constants import RELATIONSHIP_TYPE as RT
|
||||
from docx.parts.comments import CommentsPart
|
||||
from docx.parts.hdrftr import FooterPart, HeaderPart
|
||||
from docx.parts.numbering import NumberingPart
|
||||
from docx.parts.settings import SettingsPart
|
||||
from docx.parts.story import StoryPart
|
||||
from docx.parts.styles import StylesPart
|
||||
from docx.shape import InlineShapes
|
||||
from docx.shared import lazyproperty
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.comments import Comments
|
||||
from docx.enum.style import WD_STYLE_TYPE
|
||||
from docx.opc.coreprops import CoreProperties
|
||||
from docx.settings import Settings
|
||||
from docx.styles.style import BaseStyle
|
||||
|
||||
|
||||
class DocumentPart(StoryPart):
|
||||
"""Main document part of a WordprocessingML (WML) package, aka a .docx file.
|
||||
|
||||
Acts as broker to other parts such as image, core properties, and style parts. It
|
||||
also acts as a convenient delegate when a mid-document object needs a service
|
||||
involving a remote ancestor. The `Parented.part` property inherited by many content
|
||||
objects provides access to this part object for that purpose.
|
||||
"""
|
||||
|
||||
def add_footer_part(self):
|
||||
"""Return (footer_part, rId) pair for newly-created footer part."""
|
||||
footer_part = FooterPart.new(self.package)
|
||||
rId = self.relate_to(footer_part, RT.FOOTER)
|
||||
return footer_part, rId
|
||||
|
||||
def add_header_part(self):
|
||||
"""Return (header_part, rId) pair for newly-created header part."""
|
||||
header_part = HeaderPart.new(self.package)
|
||||
rId = self.relate_to(header_part, RT.HEADER)
|
||||
return header_part, rId
|
||||
|
||||
@property
|
||||
def comments(self) -> Comments:
|
||||
"""|Comments| object providing access to the comments added to this document."""
|
||||
return self._comments_part.comments
|
||||
|
||||
@property
|
||||
def core_properties(self) -> CoreProperties:
|
||||
"""A |CoreProperties| object providing read/write access to the core properties
|
||||
of this document."""
|
||||
return self.package.core_properties
|
||||
|
||||
@property
|
||||
def document(self):
|
||||
"""A |Document| object providing access to the content of this document."""
|
||||
return Document(self._element, self)
|
||||
|
||||
def drop_header_part(self, rId: str) -> None:
|
||||
"""Remove related header part identified by `rId`."""
|
||||
self.drop_rel(rId)
|
||||
|
||||
def footer_part(self, rId: str):
|
||||
"""Return |FooterPart| related by `rId`."""
|
||||
return self.related_parts[rId]
|
||||
|
||||
def get_style(self, style_id: str | None, style_type: WD_STYLE_TYPE) -> BaseStyle:
|
||||
"""Return the style in this document matching `style_id`.
|
||||
|
||||
Returns the default style for `style_type` if `style_id` is |None| or does not
|
||||
match a defined style of `style_type`.
|
||||
"""
|
||||
return self.styles.get_by_id(style_id, style_type)
|
||||
|
||||
def get_style_id(self, style_or_name, style_type):
|
||||
"""Return the style_id (|str|) of the style of `style_type` matching
|
||||
`style_or_name`.
|
||||
|
||||
Returns |None| if the style resolves to the default style for `style_type` or if
|
||||
`style_or_name` is itself |None|. Raises if `style_or_name` is a style of the
|
||||
wrong type or names a style not present in the document.
|
||||
"""
|
||||
return self.styles.get_style_id(style_or_name, style_type)
|
||||
|
||||
def header_part(self, rId: str):
|
||||
"""Return |HeaderPart| related by `rId`."""
|
||||
return self.related_parts[rId]
|
||||
|
||||
@lazyproperty
|
||||
def inline_shapes(self):
|
||||
"""The |InlineShapes| instance containing the inline shapes in the document."""
|
||||
return InlineShapes(self._element.body, self)
|
||||
|
||||
@lazyproperty
|
||||
def numbering_part(self) -> NumberingPart:
|
||||
"""A |NumberingPart| object providing access to the numbering definitions for this document.
|
||||
|
||||
Creates an empty numbering part if one is not present.
|
||||
"""
|
||||
try:
|
||||
return cast(NumberingPart, self.part_related_by(RT.NUMBERING))
|
||||
except KeyError:
|
||||
numbering_part = NumberingPart.new()
|
||||
self.relate_to(numbering_part, RT.NUMBERING)
|
||||
return numbering_part
|
||||
|
||||
def save(self, path_or_stream: str | IO[bytes]):
|
||||
"""Save this document to `path_or_stream`, which can be either a path to a
|
||||
filesystem location (a string) or a file-like object."""
|
||||
self.package.save(path_or_stream)
|
||||
|
||||
@property
|
||||
def settings(self) -> Settings:
|
||||
"""A |Settings| object providing access to the settings in the settings part of
|
||||
this document."""
|
||||
return self._settings_part.settings
|
||||
|
||||
@property
|
||||
def styles(self):
|
||||
"""A |Styles| object providing access to the styles in the styles part of this
|
||||
document."""
|
||||
return self._styles_part.styles
|
||||
|
||||
@property
|
||||
def _comments_part(self) -> CommentsPart:
|
||||
"""A |CommentsPart| object providing access to the comments added to this document.
|
||||
|
||||
Creates a default comments part if one is not present.
|
||||
"""
|
||||
try:
|
||||
return cast(CommentsPart, self.part_related_by(RT.COMMENTS))
|
||||
except KeyError:
|
||||
assert self.package is not None
|
||||
comments_part = CommentsPart.default(self.package)
|
||||
self.relate_to(comments_part, RT.COMMENTS)
|
||||
return comments_part
|
||||
|
||||
@property
|
||||
def _settings_part(self) -> SettingsPart:
|
||||
"""A |SettingsPart| object providing access to the document-level settings for
|
||||
this document.
|
||||
|
||||
Creates a default settings part if one is not present.
|
||||
"""
|
||||
try:
|
||||
return cast(SettingsPart, self.part_related_by(RT.SETTINGS))
|
||||
except KeyError:
|
||||
settings_part = SettingsPart.default(self.package)
|
||||
self.relate_to(settings_part, RT.SETTINGS)
|
||||
return settings_part
|
||||
|
||||
@property
|
||||
def _styles_part(self) -> StylesPart:
|
||||
"""Instance of |StylesPart| for this document.
|
||||
|
||||
Creates an empty styles part if one is not present.
|
||||
"""
|
||||
try:
|
||||
return cast(StylesPart, self.part_related_by(RT.STYLES))
|
||||
except KeyError:
|
||||
package = self.package
|
||||
assert package is not None
|
||||
styles_part = StylesPart.default(package)
|
||||
self.relate_to(styles_part, RT.STYLES)
|
||||
return styles_part
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Header and footer part objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.parts.story import StoryPart
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.package import Package
|
||||
|
||||
|
||||
class FooterPart(StoryPart):
|
||||
"""Definition of a section footer."""
|
||||
|
||||
@classmethod
|
||||
def new(cls, package: Package):
|
||||
"""Return newly created footer part."""
|
||||
partname = package.next_partname("/word/footer%d.xml")
|
||||
content_type = CT.WML_FOOTER
|
||||
element = parse_xml(cls._default_footer_xml())
|
||||
return cls(partname, content_type, element, package)
|
||||
|
||||
@classmethod
|
||||
def _default_footer_xml(cls):
|
||||
"""Return bytes containing XML for a default footer part."""
|
||||
path = os.path.join(os.path.split(__file__)[0], "..", "templates", "default-footer.xml")
|
||||
with open(path, "rb") as f:
|
||||
xml_bytes = f.read()
|
||||
return xml_bytes
|
||||
|
||||
|
||||
class HeaderPart(StoryPart):
|
||||
"""Definition of a section header."""
|
||||
|
||||
@classmethod
|
||||
def new(cls, package: Package):
|
||||
"""Return newly created header part."""
|
||||
partname = package.next_partname("/word/header%d.xml")
|
||||
content_type = CT.WML_HEADER
|
||||
element = parse_xml(cls._default_header_xml())
|
||||
return cls(partname, content_type, element, package)
|
||||
|
||||
@classmethod
|
||||
def _default_header_xml(cls):
|
||||
"""Return bytes containing XML for a default header part."""
|
||||
path = os.path.join(os.path.split(__file__)[0], "..", "templates", "default-header.xml")
|
||||
with open(path, "rb") as f:
|
||||
xml_bytes = f.read()
|
||||
return xml_bytes
|
||||
@@ -0,0 +1,80 @@
|
||||
"""The proxy class for an image part, and related objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.image.image import Image
|
||||
from docx.opc.part import Part
|
||||
from docx.shared import Emu, Inches
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.opc.package import OpcPackage
|
||||
from docx.opc.packuri import PackURI
|
||||
|
||||
|
||||
class ImagePart(Part):
|
||||
"""An image part.
|
||||
|
||||
Corresponds to the target part of a relationship with type RELATIONSHIP_TYPE.IMAGE.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, partname: PackURI, content_type: str, blob: bytes, image: Image | None = None
|
||||
):
|
||||
super(ImagePart, self).__init__(partname, content_type, blob)
|
||||
self._image = image
|
||||
|
||||
@property
|
||||
def default_cx(self):
|
||||
"""Native width of this image, calculated from its width in pixels and
|
||||
horizontal dots per inch (dpi)."""
|
||||
px_width = self.image.px_width
|
||||
horz_dpi = self.image.horz_dpi
|
||||
width_in_inches = px_width / horz_dpi
|
||||
return Inches(width_in_inches)
|
||||
|
||||
@property
|
||||
def default_cy(self):
|
||||
"""Native height of this image, calculated from its height in pixels and
|
||||
vertical dots per inch (dpi)."""
|
||||
px_height = self.image.px_height
|
||||
horz_dpi = self.image.horz_dpi
|
||||
height_in_emu = int(round(914400 * px_height / horz_dpi))
|
||||
return Emu(height_in_emu)
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
"""Filename from which this image part was originally created.
|
||||
|
||||
A generic name, e.g. 'image.png', is substituted if no name is available, for
|
||||
example when the image was loaded from an unnamed stream. In that case a default
|
||||
extension is applied based on the detected MIME type of the image.
|
||||
"""
|
||||
if self._image is not None:
|
||||
return self._image.filename
|
||||
return "image.%s" % self.partname.ext
|
||||
|
||||
@classmethod
|
||||
def from_image(cls, image: Image, partname: PackURI):
|
||||
"""Return an |ImagePart| instance newly created from `image` and assigned
|
||||
`partname`."""
|
||||
return ImagePart(partname, image.content_type, image.blob, image)
|
||||
|
||||
@property
|
||||
def image(self) -> Image:
|
||||
if self._image is None:
|
||||
self._image = Image.from_blob(self.blob)
|
||||
return self._image
|
||||
|
||||
@classmethod
|
||||
def load(cls, partname: PackURI, content_type: str, blob: bytes, package: OpcPackage):
|
||||
"""Called by ``docx.opc.package.PartFactory`` to load an image part from a
|
||||
package being opened by ``Document(...)`` call."""
|
||||
return cls(partname, content_type, blob)
|
||||
|
||||
@property
|
||||
def sha1(self):
|
||||
"""SHA1 hash digest of the blob of this image part."""
|
||||
return hashlib.sha1(self.blob).hexdigest()
|
||||
@@ -0,0 +1,32 @@
|
||||
"""|NumberingPart| and closely related objects."""
|
||||
|
||||
from ..opc.part import XmlPart
|
||||
from ..shared import lazyproperty
|
||||
|
||||
|
||||
class NumberingPart(XmlPart):
|
||||
"""Proxy for the numbering.xml part containing numbering definitions for a document
|
||||
or glossary."""
|
||||
|
||||
@classmethod
|
||||
def new(cls) -> "NumberingPart":
|
||||
"""Newly created numbering part, containing only the root ``<w:numbering>`` element."""
|
||||
raise NotImplementedError
|
||||
|
||||
@lazyproperty
|
||||
def numbering_definitions(self):
|
||||
"""The |_NumberingDefinitions| instance containing the numbering definitions
|
||||
(<w:num> element proxies) for this numbering part."""
|
||||
return _NumberingDefinitions(self._element)
|
||||
|
||||
|
||||
class _NumberingDefinitions:
|
||||
"""Collection of |_NumberingDefinition| instances corresponding to the ``<w:num>``
|
||||
elements in a numbering part."""
|
||||
|
||||
def __init__(self, numbering_elm):
|
||||
super(_NumberingDefinitions, self).__init__()
|
||||
self._numbering = numbering_elm
|
||||
|
||||
def __len__(self):
|
||||
return len(self._numbering.num_lst)
|
||||
@@ -0,0 +1,50 @@
|
||||
"""|SettingsPart| and closely related objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
from docx.opc.packuri import PackURI
|
||||
from docx.opc.part import XmlPart
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.settings import Settings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.settings import CT_Settings
|
||||
from docx.package import Package
|
||||
|
||||
|
||||
class SettingsPart(XmlPart):
|
||||
"""Document-level settings part of a WordprocessingML (WML) package."""
|
||||
|
||||
def __init__(
|
||||
self, partname: PackURI, content_type: str, element: CT_Settings, package: Package
|
||||
):
|
||||
super().__init__(partname, content_type, element, package)
|
||||
self._settings = element
|
||||
|
||||
@classmethod
|
||||
def default(cls, package: Package):
|
||||
"""Return a newly created settings part, containing a default `w:settings` element tree."""
|
||||
partname = PackURI("/word/settings.xml")
|
||||
content_type = CT.WML_SETTINGS
|
||||
element = cast("CT_Settings", parse_xml(cls._default_settings_xml()))
|
||||
return cls(partname, content_type, element, package)
|
||||
|
||||
@property
|
||||
def settings(self) -> Settings:
|
||||
"""A |Settings| proxy object for the `w:settings` element in this part.
|
||||
|
||||
Contains the document-level settings for this document.
|
||||
"""
|
||||
return Settings(self._settings)
|
||||
|
||||
@classmethod
|
||||
def _default_settings_xml(cls):
|
||||
"""Return a bytestream containing XML for a default settings part."""
|
||||
path = os.path.join(os.path.split(__file__)[0], "..", "templates", "default-settings.xml")
|
||||
with open(path, "rb") as f:
|
||||
xml_bytes = f.read()
|
||||
return xml_bytes
|
||||
@@ -0,0 +1,95 @@
|
||||
"""|StoryPart| and related objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, TYPE_CHECKING, Tuple, cast
|
||||
|
||||
from docx.opc.constants import RELATIONSHIP_TYPE as RT
|
||||
from docx.opc.part import XmlPart
|
||||
from docx.oxml.shape import CT_Inline
|
||||
from docx.shared import Length, lazyproperty
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.enum.style import WD_STYLE_TYPE
|
||||
from docx.image.image import Image
|
||||
from docx.parts.document import DocumentPart
|
||||
from docx.styles.style import BaseStyle
|
||||
|
||||
|
||||
class StoryPart(XmlPart):
|
||||
"""Base class for story parts.
|
||||
|
||||
A story part is one that can contain textual content, such as the document-part and
|
||||
header or footer parts. These all share content behaviors like `.paragraphs`,
|
||||
`.add_paragraph()`, `.add_table()` etc.
|
||||
"""
|
||||
|
||||
def get_or_add_image(self, image_descriptor: str | IO[bytes]) -> Tuple[str, Image]:
|
||||
"""Return (rId, image) pair for image identified by `image_descriptor`.
|
||||
|
||||
`rId` is the str key (often like "rId7") for the relationship between this story
|
||||
part and the image part, reused if already present, newly created if not.
|
||||
`image` is an |Image| instance providing access to the properties of the image,
|
||||
such as dimensions and image type.
|
||||
"""
|
||||
package = self._package
|
||||
assert package is not None
|
||||
image_part = package.get_or_add_image_part(image_descriptor)
|
||||
rId = self.relate_to(image_part, RT.IMAGE)
|
||||
return rId, image_part.image
|
||||
|
||||
def get_style(self, style_id: str | None, style_type: WD_STYLE_TYPE) -> BaseStyle:
|
||||
"""Return the style in this document matching `style_id`.
|
||||
|
||||
Returns the default style for `style_type` if `style_id` is |None| or does not
|
||||
match a defined style of `style_type`.
|
||||
"""
|
||||
return self._document_part.get_style(style_id, style_type)
|
||||
|
||||
def get_style_id(
|
||||
self, style_or_name: BaseStyle | str | None, style_type: WD_STYLE_TYPE
|
||||
) -> str | None:
|
||||
"""Return str style_id for `style_or_name` of `style_type`.
|
||||
|
||||
Returns |None| if the style resolves to the default style for `style_type` or if
|
||||
`style_or_name` is itself |None|. Raises if `style_or_name` is a style of the
|
||||
wrong type or names a style not present in the document.
|
||||
"""
|
||||
return self._document_part.get_style_id(style_or_name, style_type)
|
||||
|
||||
def new_pic_inline(
|
||||
self,
|
||||
image_descriptor: str | IO[bytes],
|
||||
width: int | Length | None = None,
|
||||
height: int | Length | None = None,
|
||||
) -> CT_Inline:
|
||||
"""Return a newly-created `w:inline` element.
|
||||
|
||||
The element contains the image specified by `image_descriptor` and is scaled
|
||||
based on the values of `width` and `height`.
|
||||
"""
|
||||
rId, image = self.get_or_add_image(image_descriptor)
|
||||
cx, cy = image.scaled_dimensions(width, height)
|
||||
shape_id, filename = self.next_id, image.filename
|
||||
return CT_Inline.new_pic_inline(shape_id, rId, filename, cx, cy)
|
||||
|
||||
@property
|
||||
def next_id(self) -> int:
|
||||
"""Next available positive integer id value in this story XML document.
|
||||
|
||||
The value is determined by incrementing the maximum existing id value. Gaps in
|
||||
the existing id sequence are not filled. The id attribute value is unique in the
|
||||
document, without regard to the element type it appears on.
|
||||
"""
|
||||
id_str_lst = self._element.xpath("//@id")
|
||||
used_ids = [int(id_str) for id_str in id_str_lst if id_str.isdigit()]
|
||||
if not used_ids:
|
||||
return 1
|
||||
return max(used_ids) + 1
|
||||
|
||||
@lazyproperty
|
||||
def _document_part(self) -> DocumentPart:
|
||||
"""|DocumentPart| object for this package."""
|
||||
package = self.package
|
||||
assert package is not None
|
||||
return cast("DocumentPart", package.main_document_part)
|
||||
@@ -0,0 +1,42 @@
|
||||
"""Provides StylesPart and related objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
from docx.opc.packuri import PackURI
|
||||
from docx.opc.part import XmlPart
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.styles.styles import Styles
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.opc.package import OpcPackage
|
||||
|
||||
|
||||
class StylesPart(XmlPart):
|
||||
"""Proxy for the styles.xml part containing style definitions for a document or
|
||||
glossary."""
|
||||
|
||||
@classmethod
|
||||
def default(cls, package: OpcPackage) -> StylesPart:
|
||||
"""Return a newly created styles part, containing a default set of elements."""
|
||||
partname = PackURI("/word/styles.xml")
|
||||
content_type = CT.WML_STYLES
|
||||
element = parse_xml(cls._default_styles_xml())
|
||||
return cls(partname, content_type, element, package)
|
||||
|
||||
@property
|
||||
def styles(self):
|
||||
"""The |_Styles| instance containing the styles (<w:style> element proxies) for
|
||||
this styles part."""
|
||||
return Styles(self.element)
|
||||
|
||||
@classmethod
|
||||
def _default_styles_xml(cls):
|
||||
"""Return a bytestream containing XML for a default styles part."""
|
||||
path = os.path.join(os.path.split(__file__)[0], "..", "templates", "default-styles.xml")
|
||||
with open(path, "rb") as f:
|
||||
xml_bytes = f.read()
|
||||
return xml_bytes
|
||||
Reference in New Issue
Block a user