refactor: excel parse
This commit is contained in:
@@ -0,0 +1,306 @@
|
||||
"""Constant values related to the Open Packaging Convention.
|
||||
|
||||
In particular it includes content types and relationship types.
|
||||
"""
|
||||
|
||||
|
||||
class CONTENT_TYPE:
|
||||
"""Content type URIs (like MIME-types) that specify a part's format."""
|
||||
|
||||
BMP = "image/bmp"
|
||||
DML_CHART = "application/vnd.openxmlformats-officedocument.drawingml.chart+xml"
|
||||
DML_CHARTSHAPES = "application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml"
|
||||
DML_DIAGRAM_COLORS = "application/vnd.openxmlformats-officedocument.drawingml.diagramColors+xml"
|
||||
DML_DIAGRAM_DATA = "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml"
|
||||
DML_DIAGRAM_LAYOUT = "application/vnd.openxmlformats-officedocument.drawingml.diagramLayout+xml"
|
||||
DML_DIAGRAM_STYLE = "application/vnd.openxmlformats-officedocument.drawingml.diagramStyle+xml"
|
||||
GIF = "image/gif"
|
||||
JPEG = "image/jpeg"
|
||||
MS_PHOTO = "image/vnd.ms-photo"
|
||||
OFC_CUSTOM_PROPERTIES = "application/vnd.openxmlformats-officedocument.custom-properties+xml"
|
||||
OFC_CUSTOM_XML_PROPERTIES = (
|
||||
"application/vnd.openxmlformats-officedocument.customXmlProperties+xml"
|
||||
)
|
||||
OFC_DRAWING = "application/vnd.openxmlformats-officedocument.drawing+xml"
|
||||
OFC_EXTENDED_PROPERTIES = (
|
||||
"application/vnd.openxmlformats-officedocument.extended-properties+xml"
|
||||
)
|
||||
OFC_OLE_OBJECT = "application/vnd.openxmlformats-officedocument.oleObject"
|
||||
OFC_PACKAGE = "application/vnd.openxmlformats-officedocument.package"
|
||||
OFC_THEME = "application/vnd.openxmlformats-officedocument.theme+xml"
|
||||
OFC_THEME_OVERRIDE = "application/vnd.openxmlformats-officedocument.themeOverride+xml"
|
||||
OFC_VML_DRAWING = "application/vnd.openxmlformats-officedocument.vmlDrawing"
|
||||
OPC_CORE_PROPERTIES = "application/vnd.openxmlformats-package.core-properties+xml"
|
||||
OPC_DIGITAL_SIGNATURE_CERTIFICATE = (
|
||||
"application/vnd.openxmlformats-package.digital-signature-certificate"
|
||||
)
|
||||
OPC_DIGITAL_SIGNATURE_ORIGIN = "application/vnd.openxmlformats-package.digital-signature-origin"
|
||||
OPC_DIGITAL_SIGNATURE_XMLSIGNATURE = (
|
||||
"application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml"
|
||||
)
|
||||
OPC_RELATIONSHIPS = "application/vnd.openxmlformats-package.relationships+xml"
|
||||
PML_COMMENTS = "application/vnd.openxmlformats-officedocument.presentationml.comments+xml"
|
||||
PML_COMMENT_AUTHORS = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.commentAuthors+xml"
|
||||
)
|
||||
PML_HANDOUT_MASTER = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml"
|
||||
)
|
||||
PML_NOTES_MASTER = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml"
|
||||
)
|
||||
PML_NOTES_SLIDE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml"
|
||||
PML_PRESENTATION_MAIN = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"
|
||||
)
|
||||
PML_PRES_PROPS = "application/vnd.openxmlformats-officedocument.presentationml.presProps+xml"
|
||||
PML_PRINTER_SETTINGS = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.printerSettings"
|
||||
)
|
||||
PML_SLIDE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"
|
||||
PML_SLIDESHOW_MAIN = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml"
|
||||
)
|
||||
PML_SLIDE_LAYOUT = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml"
|
||||
)
|
||||
PML_SLIDE_MASTER = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml"
|
||||
)
|
||||
PML_SLIDE_UPDATE_INFO = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideUpdateInfo+xml"
|
||||
)
|
||||
PML_TABLE_STYLES = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.tableStyles+xml"
|
||||
)
|
||||
PML_TAGS = "application/vnd.openxmlformats-officedocument.presentationml.tags+xml"
|
||||
PML_TEMPLATE_MAIN = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.template.main+xml"
|
||||
)
|
||||
PML_VIEW_PROPS = "application/vnd.openxmlformats-officedocument.presentationml.viewProps+xml"
|
||||
PNG = "image/png"
|
||||
SML_CALC_CHAIN = "application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml"
|
||||
SML_CHARTSHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml"
|
||||
SML_COMMENTS = "application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml"
|
||||
SML_CONNECTIONS = "application/vnd.openxmlformats-officedocument.spreadsheetml.connections+xml"
|
||||
SML_CUSTOM_PROPERTY = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.customProperty"
|
||||
)
|
||||
SML_DIALOGSHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.dialogsheet+xml"
|
||||
SML_EXTERNAL_LINK = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.externalLink+xml"
|
||||
)
|
||||
SML_PIVOT_CACHE_DEFINITION = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheDefinition+xml"
|
||||
)
|
||||
SML_PIVOT_CACHE_RECORDS = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheRecords+xml"
|
||||
)
|
||||
SML_PIVOT_TABLE = "application/vnd.openxmlformats-officedocument.spreadsheetml.pivotTable+xml"
|
||||
SML_PRINTER_SETTINGS = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.printerSettings"
|
||||
)
|
||||
SML_QUERY_TABLE = "application/vnd.openxmlformats-officedocument.spreadsheetml.queryTable+xml"
|
||||
SML_REVISION_HEADERS = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.revisionHeaders+xml"
|
||||
)
|
||||
SML_REVISION_LOG = "application/vnd.openxmlformats-officedocument.spreadsheetml.revisionLog+xml"
|
||||
SML_SHARED_STRINGS = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml"
|
||||
)
|
||||
SML_SHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
SML_SHEET_MAIN = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"
|
||||
SML_SHEET_METADATA = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheetMetadata+xml"
|
||||
)
|
||||
SML_STYLES = "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"
|
||||
SML_TABLE = "application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml"
|
||||
SML_TABLE_SINGLE_CELLS = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.tableSingleCells+xml"
|
||||
)
|
||||
SML_TEMPLATE_MAIN = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml"
|
||||
)
|
||||
SML_USER_NAMES = "application/vnd.openxmlformats-officedocument.spreadsheetml.userNames+xml"
|
||||
SML_VOLATILE_DEPENDENCIES = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.volatileDependencies+xml"
|
||||
)
|
||||
SML_WORKSHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"
|
||||
TIFF = "image/tiff"
|
||||
WML_COMMENTS = "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"
|
||||
WML_DOCUMENT = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
WML_DOCUMENT_GLOSSARY = (
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml"
|
||||
)
|
||||
WML_DOCUMENT_MAIN = (
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"
|
||||
)
|
||||
WML_ENDNOTES = "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml"
|
||||
WML_FONT_TABLE = "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml"
|
||||
WML_FOOTER = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"
|
||||
WML_FOOTNOTES = "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"
|
||||
WML_HEADER = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"
|
||||
WML_NUMBERING = "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"
|
||||
WML_PRINTER_SETTINGS = (
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.printerSettings"
|
||||
)
|
||||
WML_SETTINGS = "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"
|
||||
WML_STYLES = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"
|
||||
WML_WEB_SETTINGS = (
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml"
|
||||
)
|
||||
XML = "application/xml"
|
||||
X_EMF = "image/x-emf"
|
||||
X_FONTDATA = "application/x-fontdata"
|
||||
X_FONT_TTF = "application/x-font-ttf"
|
||||
X_WMF = "image/x-wmf"
|
||||
|
||||
|
||||
class NAMESPACE:
|
||||
"""Constant values for OPC XML namespaces."""
|
||||
|
||||
DML_WORDPROCESSING_DRAWING = (
|
||||
"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
|
||||
)
|
||||
OFC_RELATIONSHIPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
OPC_RELATIONSHIPS = "http://schemas.openxmlformats.org/package/2006/relationships"
|
||||
OPC_CONTENT_TYPES = "http://schemas.openxmlformats.org/package/2006/content-types"
|
||||
WML_MAIN = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
||||
|
||||
|
||||
class RELATIONSHIP_TARGET_MODE:
|
||||
"""Open XML relationship target modes."""
|
||||
|
||||
EXTERNAL = "External"
|
||||
INTERNAL = "Internal"
|
||||
|
||||
|
||||
class RELATIONSHIP_TYPE:
|
||||
AUDIO = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/audio"
|
||||
A_F_CHUNK = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk"
|
||||
CALC_CHAIN = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/calcChain"
|
||||
CERTIFICATE = (
|
||||
"http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/certificate"
|
||||
)
|
||||
CHART = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chart"
|
||||
CHARTSHEET = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chartsheet"
|
||||
CHART_USER_SHAPES = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/chartUserShapes"
|
||||
)
|
||||
COMMENTS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"
|
||||
COMMENT_AUTHORS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/commentAuthors"
|
||||
)
|
||||
CONNECTIONS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/connections"
|
||||
CONTROL = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/control"
|
||||
CORE_PROPERTIES = (
|
||||
"http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"
|
||||
)
|
||||
CUSTOM_PROPERTIES = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
|
||||
)
|
||||
CUSTOM_PROPERTY = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
|
||||
)
|
||||
CUSTOM_XML = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXml"
|
||||
CUSTOM_XML_PROPS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXmlProps"
|
||||
)
|
||||
DIAGRAM_COLORS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramColors"
|
||||
)
|
||||
DIAGRAM_DATA = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramData"
|
||||
DIAGRAM_LAYOUT = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramLayout"
|
||||
)
|
||||
DIAGRAM_QUICK_STYLE = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramQuickStyle"
|
||||
)
|
||||
DIALOGSHEET = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/dialogsheet"
|
||||
DRAWING = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing"
|
||||
ENDNOTES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes"
|
||||
EXTENDED_PROPERTIES = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"
|
||||
)
|
||||
EXTERNAL_LINK = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/externalLink"
|
||||
)
|
||||
FONT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/font"
|
||||
FONT_TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable"
|
||||
FOOTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
|
||||
FOOTNOTES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes"
|
||||
GLOSSARY_DOCUMENT = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/glossaryDocument"
|
||||
)
|
||||
HANDOUT_MASTER = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/handoutMaster"
|
||||
)
|
||||
HEADER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
|
||||
HYPERLINK = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
|
||||
IMAGE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
|
||||
NOTES_MASTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster"
|
||||
NOTES_SLIDE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide"
|
||||
NUMBERING = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering"
|
||||
OFFICE_DOCUMENT = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
|
||||
)
|
||||
OLE_OBJECT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"
|
||||
ORIGIN = "http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/origin"
|
||||
PACKAGE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/package"
|
||||
PIVOT_CACHE_DEFINITION = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotCacheDefinition"
|
||||
)
|
||||
PIVOT_CACHE_RECORDS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
"/spreadsheetml/pivotCacheRecords"
|
||||
)
|
||||
PIVOT_TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotTable"
|
||||
PRES_PROPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/presProps"
|
||||
PRINTER_SETTINGS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/printerSettings"
|
||||
)
|
||||
QUERY_TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/queryTable"
|
||||
REVISION_HEADERS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/revisionHeaders"
|
||||
)
|
||||
REVISION_LOG = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/revisionLog"
|
||||
SETTINGS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings"
|
||||
SHARED_STRINGS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"
|
||||
)
|
||||
SHEET_METADATA = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/sheetMetadata"
|
||||
)
|
||||
SIGNATURE = (
|
||||
"http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/signature"
|
||||
)
|
||||
SLIDE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide"
|
||||
SLIDE_LAYOUT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"
|
||||
SLIDE_MASTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideMaster"
|
||||
SLIDE_UPDATE_INFO = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideUpdateInfo"
|
||||
)
|
||||
STYLES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"
|
||||
TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/table"
|
||||
TABLE_SINGLE_CELLS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/tableSingleCells"
|
||||
)
|
||||
TABLE_STYLES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/tableStyles"
|
||||
TAGS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/tags"
|
||||
THEME = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme"
|
||||
THEME_OVERRIDE = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/themeOverride"
|
||||
)
|
||||
THUMBNAIL = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/thumbnail"
|
||||
USERNAMES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/usernames"
|
||||
VIDEO = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/video"
|
||||
VIEW_PROPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/viewProps"
|
||||
VML_DRAWING = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/vmlDrawing"
|
||||
VOLATILE_DEPENDENCIES = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/volatileDependencies"
|
||||
)
|
||||
WEB_SETTINGS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings"
|
||||
WORKSHEET_SOURCE = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheetSource"
|
||||
)
|
||||
XML_MAPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/xmlMaps"
|
||||
@@ -0,0 +1,142 @@
|
||||
"""Provides CoreProperties, Dublin-Core attributes of the document.
|
||||
|
||||
These are broadly-standardized attributes like author, last-modified, etc.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.oxml.coreprops import CT_CoreProperties
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.coreprops import CT_CoreProperties
|
||||
|
||||
|
||||
class CoreProperties:
|
||||
"""Corresponds to part named ``/docProps/core.xml``, containing the core document
|
||||
properties for this document package."""
|
||||
|
||||
def __init__(self, element: CT_CoreProperties):
|
||||
self._element = element
|
||||
|
||||
@property
|
||||
def author(self):
|
||||
return self._element.author_text
|
||||
|
||||
@author.setter
|
||||
def author(self, value: str):
|
||||
self._element.author_text = value
|
||||
|
||||
@property
|
||||
def category(self):
|
||||
return self._element.category_text
|
||||
|
||||
@category.setter
|
||||
def category(self, value: str):
|
||||
self._element.category_text = value
|
||||
|
||||
@property
|
||||
def comments(self):
|
||||
return self._element.comments_text
|
||||
|
||||
@comments.setter
|
||||
def comments(self, value: str):
|
||||
self._element.comments_text = value
|
||||
|
||||
@property
|
||||
def content_status(self):
|
||||
return self._element.contentStatus_text
|
||||
|
||||
@content_status.setter
|
||||
def content_status(self, value: str):
|
||||
self._element.contentStatus_text = value
|
||||
|
||||
@property
|
||||
def created(self):
|
||||
return self._element.created_datetime
|
||||
|
||||
@created.setter
|
||||
def created(self, value: dt.datetime):
|
||||
self._element.created_datetime = value
|
||||
|
||||
@property
|
||||
def identifier(self):
|
||||
return self._element.identifier_text
|
||||
|
||||
@identifier.setter
|
||||
def identifier(self, value: str):
|
||||
self._element.identifier_text = value
|
||||
|
||||
@property
|
||||
def keywords(self):
|
||||
return self._element.keywords_text
|
||||
|
||||
@keywords.setter
|
||||
def keywords(self, value: str):
|
||||
self._element.keywords_text = value
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._element.language_text
|
||||
|
||||
@language.setter
|
||||
def language(self, value: str):
|
||||
self._element.language_text = value
|
||||
|
||||
@property
|
||||
def last_modified_by(self):
|
||||
return self._element.lastModifiedBy_text
|
||||
|
||||
@last_modified_by.setter
|
||||
def last_modified_by(self, value: str):
|
||||
self._element.lastModifiedBy_text = value
|
||||
|
||||
@property
|
||||
def last_printed(self):
|
||||
return self._element.lastPrinted_datetime
|
||||
|
||||
@last_printed.setter
|
||||
def last_printed(self, value: dt.datetime):
|
||||
self._element.lastPrinted_datetime = value
|
||||
|
||||
@property
|
||||
def modified(self):
|
||||
return self._element.modified_datetime
|
||||
|
||||
@modified.setter
|
||||
def modified(self, value: dt.datetime):
|
||||
self._element.modified_datetime = value
|
||||
|
||||
@property
|
||||
def revision(self):
|
||||
return self._element.revision_number
|
||||
|
||||
@revision.setter
|
||||
def revision(self, value: int):
|
||||
self._element.revision_number = value
|
||||
|
||||
@property
|
||||
def subject(self):
|
||||
return self._element.subject_text
|
||||
|
||||
@subject.setter
|
||||
def subject(self, value: str):
|
||||
self._element.subject_text = value
|
||||
|
||||
@property
|
||||
def title(self):
|
||||
return self._element.title_text
|
||||
|
||||
@title.setter
|
||||
def title(self, value: str):
|
||||
self._element.title_text = value
|
||||
|
||||
@property
|
||||
def version(self):
|
||||
return self._element.version_text
|
||||
|
||||
@version.setter
|
||||
def version(self, value: str):
|
||||
self._element.version_text = value
|
||||
@@ -0,0 +1,12 @@
|
||||
"""Exceptions specific to python-opc.
|
||||
|
||||
The base exception class is OpcError.
|
||||
"""
|
||||
|
||||
|
||||
class OpcError(Exception):
|
||||
"""Base error class for python-opc."""
|
||||
|
||||
|
||||
class PackageNotFoundError(OpcError):
|
||||
"""Raised when a package cannot be found at the specified path."""
|
||||
@@ -0,0 +1,247 @@
|
||||
# pyright: reportPrivateUsage=false
|
||||
|
||||
"""Temporary stand-in for main oxml module.
|
||||
|
||||
This module came across with the PackageReader transplant. Probably much will get
|
||||
replaced with objects from the pptx.oxml.core and then this module will either get
|
||||
deleted or only hold the package related custom element classes.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import cast
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from docx.opc.constants import NAMESPACE as NS
|
||||
from docx.opc.constants import RELATIONSHIP_TARGET_MODE as RTM
|
||||
|
||||
# configure XML parser
|
||||
element_class_lookup = etree.ElementNamespaceClassLookup()
|
||||
oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
|
||||
oxml_parser.set_element_class_lookup(element_class_lookup)
|
||||
|
||||
nsmap = {
|
||||
"ct": NS.OPC_CONTENT_TYPES,
|
||||
"pr": NS.OPC_RELATIONSHIPS,
|
||||
"r": NS.OFC_RELATIONSHIPS,
|
||||
}
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# functions
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
def parse_xml(text: str) -> etree._Element:
|
||||
"""`etree.fromstring()` replacement that uses oxml parser."""
|
||||
return etree.fromstring(text, oxml_parser)
|
||||
|
||||
|
||||
def qn(tag: str) -> str:
|
||||
"""Stands for "qualified name", a utility function to turn a namespace prefixed tag
|
||||
name into a Clark-notation qualified tag name for lxml.
|
||||
|
||||
For
|
||||
example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``.
|
||||
"""
|
||||
prefix, tagroot = tag.split(":")
|
||||
uri = nsmap[prefix]
|
||||
return "{%s}%s" % (uri, tagroot)
|
||||
|
||||
|
||||
def serialize_part_xml(part_elm: etree._Element) -> bytes:
|
||||
"""Serialize `part_elm` etree element to XML suitable for storage as an XML part.
|
||||
|
||||
That is to say, no insignificant whitespace added for readability, and an
|
||||
appropriate XML declaration added with UTF-8 encoding specified.
|
||||
"""
|
||||
return etree.tostring(part_elm, encoding="UTF-8", standalone=True)
|
||||
|
||||
|
||||
def serialize_for_reading(element: etree._Element) -> str:
|
||||
"""Serialize `element` to human-readable XML suitable for tests.
|
||||
|
||||
No XML declaration.
|
||||
"""
|
||||
return etree.tostring(element, encoding="unicode", pretty_print=True)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Custom element classes
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class BaseOxmlElement(etree.ElementBase):
|
||||
"""Base class for all custom element classes, to add standardized behavior to all
|
||||
classes in one place."""
|
||||
|
||||
@property
|
||||
def xml(self) -> str:
|
||||
"""Return XML string for this element, suitable for testing purposes.
|
||||
|
||||
Pretty printed for readability and without an XML declaration at the top.
|
||||
"""
|
||||
return serialize_for_reading(self)
|
||||
|
||||
|
||||
class CT_Default(BaseOxmlElement):
|
||||
"""`<Default>` element that appears in `[Content_Types].xml` part.
|
||||
|
||||
Used to specify a default content type to be applied to any part with the specified extension.
|
||||
"""
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""String held in the ``ContentType`` attribute of this ``<Default>``
|
||||
element."""
|
||||
return self.get("ContentType")
|
||||
|
||||
@property
|
||||
def extension(self):
|
||||
"""String held in the ``Extension`` attribute of this ``<Default>`` element."""
|
||||
return self.get("Extension")
|
||||
|
||||
@staticmethod
|
||||
def new(ext: str, content_type: str):
|
||||
"""Return a new ``<Default>`` element with attributes set to parameter values."""
|
||||
xml = '<Default xmlns="%s"/>' % nsmap["ct"]
|
||||
default = parse_xml(xml)
|
||||
default.set("Extension", ext)
|
||||
default.set("ContentType", content_type)
|
||||
return default
|
||||
|
||||
|
||||
class CT_Override(BaseOxmlElement):
|
||||
"""``<Override>`` element, specifying the content type to be applied for a part with
|
||||
the specified partname."""
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""String held in the ``ContentType`` attribute of this ``<Override>``
|
||||
element."""
|
||||
return self.get("ContentType")
|
||||
|
||||
@staticmethod
|
||||
def new(partname, content_type):
|
||||
"""Return a new ``<Override>`` element with attributes set to parameter values."""
|
||||
xml = '<Override xmlns="%s"/>' % nsmap["ct"]
|
||||
override = parse_xml(xml)
|
||||
override.set("PartName", partname)
|
||||
override.set("ContentType", content_type)
|
||||
return override
|
||||
|
||||
@property
|
||||
def partname(self):
|
||||
"""String held in the ``PartName`` attribute of this ``<Override>`` element."""
|
||||
return self.get("PartName")
|
||||
|
||||
|
||||
class CT_Relationship(BaseOxmlElement):
|
||||
"""`<Relationship>` element, representing a single relationship from source to target part."""
|
||||
|
||||
@staticmethod
|
||||
def new(rId: str, reltype: str, target: str, target_mode: str = RTM.INTERNAL):
|
||||
"""Return a new ``<Relationship>`` element."""
|
||||
xml = '<Relationship xmlns="%s"/>' % nsmap["pr"]
|
||||
relationship = parse_xml(xml)
|
||||
relationship.set("Id", rId)
|
||||
relationship.set("Type", reltype)
|
||||
relationship.set("Target", target)
|
||||
if target_mode == RTM.EXTERNAL:
|
||||
relationship.set("TargetMode", RTM.EXTERNAL)
|
||||
return relationship
|
||||
|
||||
@property
|
||||
def rId(self):
|
||||
"""String held in the ``Id`` attribute of this ``<Relationship>`` element."""
|
||||
return self.get("Id")
|
||||
|
||||
@property
|
||||
def reltype(self):
|
||||
"""String held in the ``Type`` attribute of this ``<Relationship>`` element."""
|
||||
return self.get("Type")
|
||||
|
||||
@property
|
||||
def target_ref(self):
|
||||
"""String held in the ``Target`` attribute of this ``<Relationship>``
|
||||
element."""
|
||||
return self.get("Target")
|
||||
|
||||
@property
|
||||
def target_mode(self):
|
||||
"""String held in the ``TargetMode`` attribute of this ``<Relationship>``
|
||||
element, either ``Internal`` or ``External``.
|
||||
|
||||
Defaults to ``Internal``.
|
||||
"""
|
||||
return self.get("TargetMode", RTM.INTERNAL)
|
||||
|
||||
|
||||
class CT_Relationships(BaseOxmlElement):
|
||||
"""``<Relationships>`` element, the root element in a .rels file."""
|
||||
|
||||
def add_rel(self, rId: str, reltype: str, target: str, is_external: bool = False):
|
||||
"""Add a child ``<Relationship>`` element with attributes set according to
|
||||
parameter values."""
|
||||
target_mode = RTM.EXTERNAL if is_external else RTM.INTERNAL
|
||||
relationship = CT_Relationship.new(rId, reltype, target, target_mode)
|
||||
self.append(relationship)
|
||||
|
||||
@staticmethod
|
||||
def new() -> CT_Relationships:
|
||||
"""Return a new ``<Relationships>`` element."""
|
||||
xml = '<Relationships xmlns="%s"/>' % nsmap["pr"]
|
||||
return cast(CT_Relationships, parse_xml(xml))
|
||||
|
||||
@property
|
||||
def Relationship_lst(self):
|
||||
"""Return a list containing all the ``<Relationship>`` child elements."""
|
||||
return self.findall(qn("pr:Relationship"))
|
||||
|
||||
@property
|
||||
def xml(self):
|
||||
"""Return XML string for this element, suitable for saving in a .rels stream,
|
||||
not pretty printed and with an XML declaration at the top."""
|
||||
return serialize_part_xml(self)
|
||||
|
||||
|
||||
class CT_Types(BaseOxmlElement):
|
||||
"""``<Types>`` element, the container element for Default and Override elements in
|
||||
[Content_Types].xml."""
|
||||
|
||||
def add_default(self, ext, content_type):
|
||||
"""Add a child ``<Default>`` element with attributes set to parameter values."""
|
||||
default = CT_Default.new(ext, content_type)
|
||||
self.append(default)
|
||||
|
||||
def add_override(self, partname, content_type):
|
||||
"""Add a child ``<Override>`` element with attributes set to parameter
|
||||
values."""
|
||||
override = CT_Override.new(partname, content_type)
|
||||
self.append(override)
|
||||
|
||||
@property
|
||||
def defaults(self):
|
||||
return self.findall(qn("ct:Default"))
|
||||
|
||||
@staticmethod
|
||||
def new():
|
||||
"""Return a new ``<Types>`` element."""
|
||||
xml = '<Types xmlns="%s"/>' % nsmap["ct"]
|
||||
types = parse_xml(xml)
|
||||
return types
|
||||
|
||||
@property
|
||||
def overrides(self):
|
||||
return self.findall(qn("ct:Override"))
|
||||
|
||||
|
||||
ct_namespace = element_class_lookup.get_namespace(nsmap["ct"])
|
||||
ct_namespace["Default"] = CT_Default
|
||||
ct_namespace["Override"] = CT_Override
|
||||
ct_namespace["Types"] = CT_Types
|
||||
|
||||
pr_namespace = element_class_lookup.get_namespace(nsmap["pr"])
|
||||
pr_namespace["Relationship"] = CT_Relationship
|
||||
pr_namespace["Relationships"] = CT_Relationships
|
||||
@@ -0,0 +1,219 @@
|
||||
"""Objects that implement reading and writing OPC packages."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, TYPE_CHECKING, Iterator, cast
|
||||
|
||||
from docx.opc.constants import RELATIONSHIP_TYPE as RT
|
||||
from docx.opc.packuri import PACKAGE_URI, PackURI
|
||||
from docx.opc.part import PartFactory
|
||||
from docx.opc.parts.coreprops import CorePropertiesPart
|
||||
from docx.opc.pkgreader import PackageReader
|
||||
from docx.opc.pkgwriter import PackageWriter
|
||||
from docx.opc.rel import Relationships
|
||||
from docx.shared import lazyproperty
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import Self
|
||||
|
||||
from docx.opc.coreprops import CoreProperties
|
||||
from docx.opc.part import Part
|
||||
from docx.opc.rel import _Relationship # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
|
||||
class OpcPackage:
|
||||
"""Main API class for |python-opc|.
|
||||
|
||||
A new instance is constructed by calling the :meth:`open` class method with a path
|
||||
to a package file or file-like object containing one.
|
||||
"""
|
||||
|
||||
def after_unmarshal(self):
|
||||
"""Entry point for any post-unmarshaling processing.
|
||||
|
||||
May be overridden by subclasses without forwarding call to super.
|
||||
"""
|
||||
# don't place any code here, just catch call if not overridden by
|
||||
# subclass
|
||||
pass
|
||||
|
||||
@property
|
||||
def core_properties(self) -> CoreProperties:
|
||||
"""|CoreProperties| object providing read/write access to the Dublin Core
|
||||
properties for this document."""
|
||||
return self._core_properties_part.core_properties
|
||||
|
||||
def iter_rels(self) -> Iterator[_Relationship]:
|
||||
"""Generate exactly one reference to each relationship in the package by
|
||||
performing a depth-first traversal of the rels graph."""
|
||||
|
||||
def walk_rels(
|
||||
source: OpcPackage | Part, visited: list[Part] | None = None
|
||||
) -> Iterator[_Relationship]:
|
||||
visited = [] if visited is None else visited
|
||||
for rel in source.rels.values():
|
||||
yield rel
|
||||
if rel.is_external:
|
||||
continue
|
||||
part = rel.target_part
|
||||
if part in visited:
|
||||
continue
|
||||
visited.append(part)
|
||||
new_source = part
|
||||
for rel in walk_rels(new_source, visited):
|
||||
yield rel
|
||||
|
||||
for rel in walk_rels(self):
|
||||
yield rel
|
||||
|
||||
def iter_parts(self) -> Iterator[Part]:
|
||||
"""Generate exactly one reference to each of the parts in the package by
|
||||
performing a depth-first traversal of the rels graph."""
|
||||
|
||||
def walk_parts(source, visited=[]):
|
||||
for rel in source.rels.values():
|
||||
if rel.is_external:
|
||||
continue
|
||||
part = rel.target_part
|
||||
if part in visited:
|
||||
continue
|
||||
visited.append(part)
|
||||
yield part
|
||||
new_source = part
|
||||
for part in walk_parts(new_source, visited):
|
||||
yield part
|
||||
|
||||
for part in walk_parts(self):
|
||||
yield part
|
||||
|
||||
def load_rel(self, reltype: str, target: Part | str, rId: str, is_external: bool = False):
|
||||
"""Return newly added |_Relationship| instance of `reltype` between this part
|
||||
and `target` with key `rId`.
|
||||
|
||||
Target mode is set to ``RTM.EXTERNAL`` if `is_external` is |True|. Intended for
|
||||
use during load from a serialized package, where the rId is well known. Other
|
||||
methods exist for adding a new relationship to the package during processing.
|
||||
"""
|
||||
return self.rels.add_relationship(reltype, target, rId, is_external)
|
||||
|
||||
@property
|
||||
def main_document_part(self):
|
||||
"""Return a reference to the main document part for this package.
|
||||
|
||||
Examples include a document part for a WordprocessingML package, a presentation
|
||||
part for a PresentationML package, or a workbook part for a SpreadsheetML
|
||||
package.
|
||||
"""
|
||||
return self.part_related_by(RT.OFFICE_DOCUMENT)
|
||||
|
||||
def next_partname(self, template: str) -> PackURI:
|
||||
"""Return a |PackURI| instance representing partname matching `template`.
|
||||
|
||||
The returned part-name has the next available numeric suffix to distinguish it
|
||||
from other parts of its type. `template` is a printf (%)-style template string
|
||||
containing a single replacement item, a '%d' to be used to insert the integer
|
||||
portion of the partname. Example: "/word/header%d.xml"
|
||||
"""
|
||||
partnames = {part.partname for part in self.iter_parts()}
|
||||
for n in range(1, len(partnames) + 2):
|
||||
candidate_partname = template % n
|
||||
if candidate_partname not in partnames:
|
||||
return PackURI(candidate_partname)
|
||||
|
||||
@classmethod
|
||||
def open(cls, pkg_file: str | IO[bytes]) -> Self:
|
||||
"""Return an |OpcPackage| instance loaded with the contents of `pkg_file`."""
|
||||
pkg_reader = PackageReader.from_file(pkg_file)
|
||||
package = cls()
|
||||
Unmarshaller.unmarshal(pkg_reader, package, PartFactory)
|
||||
return package
|
||||
|
||||
def part_related_by(self, reltype: str) -> Part:
|
||||
"""Return part to which this package has a relationship of `reltype`.
|
||||
|
||||
Raises |KeyError| if no such relationship is found and |ValueError| if more than
|
||||
one such relationship is found.
|
||||
"""
|
||||
return self.rels.part_with_reltype(reltype)
|
||||
|
||||
@property
|
||||
def parts(self) -> list[Part]:
|
||||
"""Return a list containing a reference to each of the parts in this package."""
|
||||
return list(self.iter_parts())
|
||||
|
||||
def relate_to(self, part: Part, reltype: str):
|
||||
"""Return rId key of new or existing relationship to `part`.
|
||||
|
||||
If a relationship of `reltype` to `part` already exists, its rId is returned. Otherwise a
|
||||
new relationship is created and that rId is returned.
|
||||
"""
|
||||
rel = self.rels.get_or_add(reltype, part)
|
||||
return rel.rId
|
||||
|
||||
@lazyproperty
|
||||
def rels(self):
|
||||
"""Return a reference to the |Relationships| instance holding the collection of
|
||||
relationships for this package."""
|
||||
return Relationships(PACKAGE_URI.baseURI)
|
||||
|
||||
def save(self, pkg_file: str | IO[bytes]):
|
||||
"""Save this package to `pkg_file`.
|
||||
|
||||
`pkg_file` can be either a file-path or a file-like object.
|
||||
"""
|
||||
for part in self.parts:
|
||||
part.before_marshal()
|
||||
PackageWriter.write(pkg_file, self.rels, self.parts)
|
||||
|
||||
@property
|
||||
def _core_properties_part(self) -> CorePropertiesPart:
|
||||
"""|CorePropertiesPart| object related to this package.
|
||||
|
||||
Creates a default core properties part if one is not present (not common).
|
||||
"""
|
||||
try:
|
||||
return cast(CorePropertiesPart, self.part_related_by(RT.CORE_PROPERTIES))
|
||||
except KeyError:
|
||||
core_properties_part = CorePropertiesPart.default(self)
|
||||
self.relate_to(core_properties_part, RT.CORE_PROPERTIES)
|
||||
return core_properties_part
|
||||
|
||||
|
||||
class Unmarshaller:
|
||||
"""Hosts static methods for unmarshalling a package from a |PackageReader|."""
|
||||
|
||||
@staticmethod
|
||||
def unmarshal(pkg_reader, package, part_factory):
|
||||
"""Construct graph of parts and realized relationships based on the contents of
|
||||
`pkg_reader`, delegating construction of each part to `part_factory`.
|
||||
|
||||
Package relationships are added to `pkg`.
|
||||
"""
|
||||
parts = Unmarshaller._unmarshal_parts(pkg_reader, package, part_factory)
|
||||
Unmarshaller._unmarshal_relationships(pkg_reader, package, parts)
|
||||
for part in parts.values():
|
||||
part.after_unmarshal()
|
||||
package.after_unmarshal()
|
||||
|
||||
@staticmethod
|
||||
def _unmarshal_parts(pkg_reader, package, part_factory):
|
||||
"""Return a dictionary of |Part| instances unmarshalled from `pkg_reader`, keyed
|
||||
by partname.
|
||||
|
||||
Side-effect is that each part in `pkg_reader` is constructed using
|
||||
`part_factory`.
|
||||
"""
|
||||
parts = {}
|
||||
for partname, content_type, reltype, blob in pkg_reader.iter_sparts():
|
||||
parts[partname] = part_factory(partname, content_type, reltype, blob, package)
|
||||
return parts
|
||||
|
||||
@staticmethod
|
||||
def _unmarshal_relationships(pkg_reader, package, parts):
|
||||
"""Add a relationship to the source object corresponding to each of the
|
||||
relationships in `pkg_reader` with its target_part set to the actual target part
|
||||
in `parts`."""
|
||||
for source_uri, srel in pkg_reader.iter_srels():
|
||||
source = package if source_uri == "/" else parts[source_uri]
|
||||
target = srel.target_ref if srel.is_external else parts[srel.target_partname]
|
||||
source.load_rel(srel.reltype, target, srel.rId, srel.is_external)
|
||||
@@ -0,0 +1,109 @@
|
||||
"""Provides the PackURI value type.
|
||||
|
||||
Also some useful known pack URI strings such as PACKAGE_URI.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import posixpath
|
||||
import re
|
||||
|
||||
|
||||
class PackURI(str):
|
||||
"""Provides access to pack URI components such as the baseURI and the filename slice.
|
||||
|
||||
Behaves as |str| otherwise.
|
||||
"""
|
||||
|
||||
_filename_re = re.compile("([a-zA-Z]+)([1-9][0-9]*)?")
|
||||
|
||||
def __new__(cls, pack_uri_str: str):
|
||||
if pack_uri_str[0] != "/":
|
||||
tmpl = "PackURI must begin with slash, got '%s'"
|
||||
raise ValueError(tmpl % pack_uri_str)
|
||||
return str.__new__(cls, pack_uri_str)
|
||||
|
||||
@staticmethod
|
||||
def from_rel_ref(baseURI: str, relative_ref: str) -> PackURI:
|
||||
"""The absolute PackURI formed by translating `relative_ref` onto `baseURI`."""
|
||||
joined_uri = posixpath.join(baseURI, relative_ref)
|
||||
abs_uri = posixpath.abspath(joined_uri)
|
||||
return PackURI(abs_uri)
|
||||
|
||||
@property
|
||||
def baseURI(self) -> str:
|
||||
"""The base URI of this pack URI, the directory portion, roughly speaking.
|
||||
|
||||
E.g. ``'/ppt/slides'`` for ``'/ppt/slides/slide1.xml'``. For the package pseudo-
|
||||
partname '/', baseURI is '/'.
|
||||
"""
|
||||
return posixpath.split(self)[0]
|
||||
|
||||
@property
|
||||
def ext(self) -> str:
|
||||
"""The extension portion of this pack URI, e.g. ``'xml'`` for ``'/word/document.xml'``.
|
||||
|
||||
Note the period is not included.
|
||||
"""
|
||||
# raw_ext is either empty string or starts with period, e.g. '.xml'
|
||||
raw_ext = posixpath.splitext(self)[1]
|
||||
return raw_ext[1:] if raw_ext.startswith(".") else raw_ext
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
"""The "filename" portion of this pack URI, e.g. ``'slide1.xml'`` for
|
||||
``'/ppt/slides/slide1.xml'``.
|
||||
|
||||
For the package pseudo-partname '/', filename is ''.
|
||||
"""
|
||||
return posixpath.split(self)[1]
|
||||
|
||||
@property
|
||||
def idx(self):
|
||||
"""Return partname index as integer for tuple partname or None for singleton
|
||||
partname, e.g. ``21`` for ``'/ppt/slides/slide21.xml'`` and |None| for
|
||||
``'/ppt/presentation.xml'``."""
|
||||
filename = self.filename
|
||||
if not filename:
|
||||
return None
|
||||
name_part = posixpath.splitext(filename)[0] # filename w/ext removed
|
||||
match = self._filename_re.match(name_part)
|
||||
if match is None:
|
||||
return None
|
||||
if match.group(2):
|
||||
return int(match.group(2))
|
||||
return None
|
||||
|
||||
@property
|
||||
def membername(self):
|
||||
"""The pack URI with the leading slash stripped off, the form used as the Zip
|
||||
file membername for the package item.
|
||||
|
||||
Returns '' for the package pseudo-partname '/'.
|
||||
"""
|
||||
return self[1:]
|
||||
|
||||
def relative_ref(self, baseURI: str):
|
||||
"""Return string containing relative reference to package item from `baseURI`.
|
||||
|
||||
E.g. PackURI('/ppt/slideLayouts/slideLayout1.xml') would return
|
||||
'../slideLayouts/slideLayout1.xml' for baseURI '/ppt/slides'.
|
||||
"""
|
||||
# workaround for posixpath bug in 2.6, doesn't generate correct
|
||||
# relative path when `start` (second) parameter is root ('/')
|
||||
return self[1:] if baseURI == "/" else posixpath.relpath(self, baseURI)
|
||||
|
||||
@property
|
||||
def rels_uri(self):
|
||||
"""The pack URI of the .rels part corresponding to the current pack URI.
|
||||
|
||||
Only produces sensible output if the pack URI is a partname or the package
|
||||
pseudo-partname '/'.
|
||||
"""
|
||||
rels_filename = "%s.rels" % self.filename
|
||||
rels_uri_str = posixpath.join(self.baseURI, "_rels", rels_filename)
|
||||
return PackURI(rels_uri_str)
|
||||
|
||||
|
||||
PACKAGE_URI = PackURI("/")
|
||||
CONTENT_TYPES_URI = PackURI("/[Content_Types].xml")
|
||||
@@ -0,0 +1,247 @@
|
||||
# pyright: reportImportCycles=false
|
||||
|
||||
"""Open Packaging Convention (OPC) objects related to package parts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, Type, cast
|
||||
|
||||
from docx.opc.oxml import serialize_part_xml
|
||||
from docx.opc.packuri import PackURI
|
||||
from docx.opc.rel import Relationships
|
||||
from docx.opc.shared import cls_method_fn
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.shared import lazyproperty
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement
|
||||
from docx.package import Package
|
||||
|
||||
|
||||
class Part:
|
||||
"""Base class for package parts.
|
||||
|
||||
Provides common properties and methods, but intended to be subclassed in client code
|
||||
to implement specific part behaviors.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
partname: PackURI,
|
||||
content_type: str,
|
||||
blob: bytes | None = None,
|
||||
package: Package | None = None,
|
||||
):
|
||||
super(Part, self).__init__()
|
||||
self._partname = partname
|
||||
self._content_type = content_type
|
||||
self._blob = blob
|
||||
self._package = package
|
||||
|
||||
def after_unmarshal(self):
|
||||
"""Entry point for post-unmarshaling processing, for example to parse the part
|
||||
XML.
|
||||
|
||||
May be overridden by subclasses without forwarding call to super.
|
||||
"""
|
||||
# don't place any code here, just catch call if not overridden by
|
||||
# subclass
|
||||
pass
|
||||
|
||||
def before_marshal(self):
|
||||
"""Entry point for pre-serialization processing, for example to finalize part
|
||||
naming if necessary.
|
||||
|
||||
May be overridden by subclasses without forwarding call to super.
|
||||
"""
|
||||
# don't place any code here, just catch call if not overridden by
|
||||
# subclass
|
||||
pass
|
||||
|
||||
@property
|
||||
def blob(self) -> bytes:
|
||||
"""Contents of this package part as a sequence of bytes.
|
||||
|
||||
May be text or binary. Intended to be overridden by subclasses. Default behavior
|
||||
is to return load blob.
|
||||
"""
|
||||
return self._blob or b""
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""Content type of this part."""
|
||||
return self._content_type
|
||||
|
||||
def drop_rel(self, rId: str):
|
||||
"""Remove the relationship identified by `rId` if its reference count is less
|
||||
than 2.
|
||||
|
||||
Relationships with a reference count of 0 are implicit relationships.
|
||||
"""
|
||||
if self._rel_ref_count(rId) < 2:
|
||||
del self.rels[rId]
|
||||
|
||||
@classmethod
|
||||
def load(cls, partname: PackURI, content_type: str, blob: bytes, package: Package):
|
||||
return cls(partname, content_type, blob, package)
|
||||
|
||||
def load_rel(self, reltype: str, target: Part | str, rId: str, is_external: bool = False):
|
||||
"""Return newly added |_Relationship| instance of `reltype`.
|
||||
|
||||
The new relationship relates the `target` part to this part with key `rId`.
|
||||
|
||||
Target mode is set to ``RTM.EXTERNAL`` if `is_external` is |True|. Intended for
|
||||
use during load from a serialized package, where the rId is well-known. Other
|
||||
methods exist for adding a new relationship to a part when manipulating a part.
|
||||
"""
|
||||
return self.rels.add_relationship(reltype, target, rId, is_external)
|
||||
|
||||
@property
|
||||
def package(self):
|
||||
"""|OpcPackage| instance this part belongs to."""
|
||||
return self._package
|
||||
|
||||
@property
|
||||
def partname(self):
|
||||
"""|PackURI| instance holding partname of this part, e.g.
|
||||
'/ppt/slides/slide1.xml'."""
|
||||
return self._partname
|
||||
|
||||
@partname.setter
|
||||
def partname(self, partname: str):
|
||||
if not isinstance(partname, PackURI):
|
||||
tmpl = "partname must be instance of PackURI, got '%s'"
|
||||
raise TypeError(tmpl % type(partname).__name__)
|
||||
self._partname = partname
|
||||
|
||||
def part_related_by(self, reltype: str) -> Part:
|
||||
"""Return part to which this part has a relationship of `reltype`.
|
||||
|
||||
Raises |KeyError| if no such relationship is found and |ValueError| if more than
|
||||
one such relationship is found. Provides ability to resolve implicitly related
|
||||
part, such as Slide -> SlideLayout.
|
||||
"""
|
||||
return self.rels.part_with_reltype(reltype)
|
||||
|
||||
def relate_to(self, target: Part | str, reltype: str, is_external: bool = False) -> str:
|
||||
"""Return rId key of relationship of `reltype` to `target`.
|
||||
|
||||
The returned `rId` is from an existing relationship if there is one, otherwise a
|
||||
new relationship is created.
|
||||
"""
|
||||
if is_external:
|
||||
return self.rels.get_or_add_ext_rel(reltype, cast(str, target))
|
||||
else:
|
||||
rel = self.rels.get_or_add(reltype, cast(Part, target))
|
||||
return rel.rId
|
||||
|
||||
@property
|
||||
def related_parts(self):
|
||||
"""Dictionary mapping related parts by rId, so child objects can resolve
|
||||
explicit relationships present in the part XML, e.g. sldIdLst to a specific
|
||||
|Slide| instance."""
|
||||
return self.rels.related_parts
|
||||
|
||||
@lazyproperty
|
||||
def rels(self):
|
||||
"""|Relationships| instance holding the relationships for this part."""
|
||||
# -- prevent breakage in `python-docx-template` by retaining legacy `._rels` attribute --
|
||||
self._rels = Relationships(self._partname.baseURI)
|
||||
return self._rels
|
||||
|
||||
def target_ref(self, rId: str) -> str:
|
||||
"""Return URL contained in target ref of relationship identified by `rId`."""
|
||||
rel = self.rels[rId]
|
||||
return rel.target_ref
|
||||
|
||||
def _rel_ref_count(self, rId: str) -> int:
|
||||
"""Return the count of references in this part to the relationship identified by `rId`.
|
||||
|
||||
Only an XML part can contain references, so this is 0 for `Part`.
|
||||
"""
|
||||
return 0
|
||||
|
||||
|
||||
class PartFactory:
|
||||
"""Provides a way for client code to specify a subclass of |Part| to be constructed
|
||||
by |Unmarshaller| based on its content type and/or a custom callable.
|
||||
|
||||
Setting ``PartFactory.part_class_selector`` to a callable object will cause that
|
||||
object to be called with the parameters ``content_type, reltype``, once for each
|
||||
part in the package. If the callable returns an object, it is used as the class for
|
||||
that part. If it returns |None|, part class selection falls back to the content type
|
||||
map defined in ``PartFactory.part_type_for``. If no class is returned from either of
|
||||
these, the class contained in ``PartFactory.default_part_type`` is used to construct
|
||||
the part, which is by default ``opc.package.Part``.
|
||||
"""
|
||||
|
||||
part_class_selector: Callable[[str, str], Type[Part] | None] | None
|
||||
part_type_for: dict[str, Type[Part]] = {}
|
||||
default_part_type = Part
|
||||
|
||||
def __new__(
|
||||
cls,
|
||||
partname: PackURI,
|
||||
content_type: str,
|
||||
reltype: str,
|
||||
blob: bytes,
|
||||
package: Package,
|
||||
):
|
||||
PartClass: Type[Part] | None = None
|
||||
if cls.part_class_selector is not None:
|
||||
part_class_selector = cls_method_fn(cls, "part_class_selector")
|
||||
PartClass = part_class_selector(content_type, reltype)
|
||||
if PartClass is None:
|
||||
PartClass = cls._part_cls_for(content_type)
|
||||
return PartClass.load(partname, content_type, blob, package)
|
||||
|
||||
@classmethod
|
||||
def _part_cls_for(cls, content_type: str):
|
||||
"""Return the custom part class registered for `content_type`, or the default
|
||||
part class if no custom class is registered for `content_type`."""
|
||||
if content_type in cls.part_type_for:
|
||||
return cls.part_type_for[content_type]
|
||||
return cls.default_part_type
|
||||
|
||||
|
||||
class XmlPart(Part):
|
||||
"""Base class for package parts containing an XML payload, which is most of them.
|
||||
|
||||
Provides additional methods to the |Part| base class that take care of parsing and
|
||||
reserializing the XML payload and managing relationships to other parts.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, partname: PackURI, content_type: str, element: BaseOxmlElement, package: Package
|
||||
):
|
||||
super(XmlPart, self).__init__(partname, content_type, package=package)
|
||||
self._element = element
|
||||
|
||||
@property
|
||||
def blob(self):
|
||||
return serialize_part_xml(self._element)
|
||||
|
||||
@property
|
||||
def element(self):
|
||||
"""The root XML element of this XML part."""
|
||||
return self._element
|
||||
|
||||
@classmethod
|
||||
def load(cls, partname: PackURI, content_type: str, blob: bytes, package: Package):
|
||||
element = parse_xml(blob)
|
||||
return cls(partname, content_type, element, package)
|
||||
|
||||
@property
|
||||
def part(self):
|
||||
"""Part of the parent protocol, "children" of the document will not know the
|
||||
part that contains them so must ask their parent object.
|
||||
|
||||
That chain of delegation ends here for child objects.
|
||||
"""
|
||||
return self
|
||||
|
||||
def _rel_ref_count(self, rId: str) -> int:
|
||||
"""Return the count of references in this part's XML to the relationship
|
||||
identified by `rId`."""
|
||||
rIds = cast("list[str]", self._element.xpath("//@r:id"))
|
||||
return len([_rId for _rId in rIds if _rId == rId])
|
||||
@@ -0,0 +1,48 @@
|
||||
"""Core properties part, corresponds to ``/docProps/core.xml`` part in package."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
from docx.opc.coreprops import CoreProperties
|
||||
from docx.opc.packuri import PackURI
|
||||
from docx.opc.part import XmlPart
|
||||
from docx.oxml.coreprops import CT_CoreProperties
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.opc.package import OpcPackage
|
||||
|
||||
|
||||
class CorePropertiesPart(XmlPart):
|
||||
"""Corresponds to part named ``/docProps/core.xml``.
|
||||
|
||||
The "core" is short for "Dublin Core" and contains document metadata relatively common across
|
||||
documents of all types, not just DOCX.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def default(cls, package: OpcPackage):
|
||||
"""Return a new |CorePropertiesPart| object initialized with default values for
|
||||
its base properties."""
|
||||
core_properties_part = cls._new(package)
|
||||
core_properties = core_properties_part.core_properties
|
||||
core_properties.title = "Word Document"
|
||||
core_properties.last_modified_by = "python-docx"
|
||||
core_properties.revision = 1
|
||||
core_properties.modified = dt.datetime.now(dt.timezone.utc)
|
||||
return core_properties_part
|
||||
|
||||
@property
|
||||
def core_properties(self):
|
||||
"""A |CoreProperties| object providing read/write access to the core properties
|
||||
contained in this core properties part."""
|
||||
return CoreProperties(self.element)
|
||||
|
||||
@classmethod
|
||||
def _new(cls, package: OpcPackage) -> CorePropertiesPart:
|
||||
partname = PackURI("/docProps/core.xml")
|
||||
content_type = CT.OPC_CORE_PROPERTIES
|
||||
coreProperties = CT_CoreProperties.new()
|
||||
return CorePropertiesPart(partname, content_type, coreProperties, package)
|
||||
@@ -0,0 +1,119 @@
|
||||
"""Provides a general interface to a `physical` OPC package, such as a zip file."""
|
||||
|
||||
import os
|
||||
from zipfile import ZIP_DEFLATED, ZipFile, is_zipfile
|
||||
|
||||
from docx.opc.exceptions import PackageNotFoundError
|
||||
from docx.opc.packuri import CONTENT_TYPES_URI
|
||||
|
||||
|
||||
class PhysPkgReader:
|
||||
"""Factory for physical package reader objects."""
|
||||
|
||||
def __new__(cls, pkg_file):
|
||||
# if `pkg_file` is a string, treat it as a path
|
||||
if isinstance(pkg_file, str):
|
||||
if os.path.isdir(pkg_file):
|
||||
reader_cls = _DirPkgReader
|
||||
elif is_zipfile(pkg_file):
|
||||
reader_cls = _ZipPkgReader
|
||||
else:
|
||||
raise PackageNotFoundError("Package not found at '%s'" % pkg_file)
|
||||
else: # assume it's a stream and pass it to Zip reader to sort out
|
||||
reader_cls = _ZipPkgReader
|
||||
|
||||
return super(PhysPkgReader, cls).__new__(reader_cls)
|
||||
|
||||
|
||||
class PhysPkgWriter:
|
||||
"""Factory for physical package writer objects."""
|
||||
|
||||
def __new__(cls, pkg_file):
|
||||
return super(PhysPkgWriter, cls).__new__(_ZipPkgWriter)
|
||||
|
||||
|
||||
class _DirPkgReader(PhysPkgReader):
|
||||
"""Implements |PhysPkgReader| interface for an OPC package extracted into a
|
||||
directory."""
|
||||
|
||||
def __init__(self, path):
|
||||
"""`path` is the path to a directory containing an expanded package."""
|
||||
super(_DirPkgReader, self).__init__()
|
||||
self._path = os.path.abspath(path)
|
||||
|
||||
def blob_for(self, pack_uri):
|
||||
"""Return contents of file corresponding to `pack_uri` in package directory."""
|
||||
path = os.path.join(self._path, pack_uri.membername)
|
||||
with open(path, "rb") as f:
|
||||
blob = f.read()
|
||||
return blob
|
||||
|
||||
def close(self):
|
||||
"""Provides interface consistency with |ZipFileSystem|, but does nothing, a
|
||||
directory file system doesn't need closing."""
|
||||
pass
|
||||
|
||||
@property
|
||||
def content_types_xml(self):
|
||||
"""Return the `[Content_Types].xml` blob from the package."""
|
||||
return self.blob_for(CONTENT_TYPES_URI)
|
||||
|
||||
def rels_xml_for(self, source_uri):
|
||||
"""Return rels item XML for source with `source_uri`, or None if the item has no
|
||||
rels item."""
|
||||
try:
|
||||
rels_xml = self.blob_for(source_uri.rels_uri)
|
||||
except IOError:
|
||||
rels_xml = None
|
||||
return rels_xml
|
||||
|
||||
|
||||
class _ZipPkgReader(PhysPkgReader):
|
||||
"""Implements |PhysPkgReader| interface for a zip file OPC package."""
|
||||
|
||||
def __init__(self, pkg_file):
|
||||
super(_ZipPkgReader, self).__init__()
|
||||
self._zipf = ZipFile(pkg_file, "r")
|
||||
|
||||
def blob_for(self, pack_uri):
|
||||
"""Return blob corresponding to `pack_uri`.
|
||||
|
||||
Raises |ValueError| if no matching member is present in zip archive.
|
||||
"""
|
||||
return self._zipf.read(pack_uri.membername)
|
||||
|
||||
def close(self):
|
||||
"""Close the zip archive, releasing any resources it is using."""
|
||||
self._zipf.close()
|
||||
|
||||
@property
|
||||
def content_types_xml(self):
|
||||
"""Return the `[Content_Types].xml` blob from the zip package."""
|
||||
return self.blob_for(CONTENT_TYPES_URI)
|
||||
|
||||
def rels_xml_for(self, source_uri):
|
||||
"""Return rels item XML for source with `source_uri` or None if no rels item is
|
||||
present."""
|
||||
try:
|
||||
rels_xml = self.blob_for(source_uri.rels_uri)
|
||||
except KeyError:
|
||||
rels_xml = None
|
||||
return rels_xml
|
||||
|
||||
|
||||
class _ZipPkgWriter(PhysPkgWriter):
|
||||
"""Implements |PhysPkgWriter| interface for a zip file OPC package."""
|
||||
|
||||
def __init__(self, pkg_file):
|
||||
super(_ZipPkgWriter, self).__init__()
|
||||
self._zipf = ZipFile(pkg_file, "w", compression=ZIP_DEFLATED)
|
||||
|
||||
def close(self):
|
||||
"""Close the zip archive, flushing any pending physical writes and releasing any
|
||||
resources it's using."""
|
||||
self._zipf.close()
|
||||
|
||||
def write(self, pack_uri, blob):
|
||||
"""Write `blob` to this zip package with the membername corresponding to
|
||||
`pack_uri`."""
|
||||
self._zipf.writestr(pack_uri.membername, blob)
|
||||
@@ -0,0 +1,254 @@
|
||||
"""Low-level, read-only API to a serialized Open Packaging Convention (OPC) package."""
|
||||
|
||||
from docx.opc.constants import RELATIONSHIP_TARGET_MODE as RTM
|
||||
from docx.opc.oxml import parse_xml
|
||||
from docx.opc.packuri import PACKAGE_URI, PackURI
|
||||
from docx.opc.phys_pkg import PhysPkgReader
|
||||
from docx.opc.shared import CaseInsensitiveDict
|
||||
|
||||
|
||||
class PackageReader:
|
||||
"""Provides access to the contents of a zip-format OPC package via its
|
||||
:attr:`serialized_parts` and :attr:`pkg_srels` attributes."""
|
||||
|
||||
def __init__(self, content_types, pkg_srels, sparts):
|
||||
super(PackageReader, self).__init__()
|
||||
self._pkg_srels = pkg_srels
|
||||
self._sparts = sparts
|
||||
|
||||
@staticmethod
|
||||
def from_file(pkg_file):
|
||||
"""Return a |PackageReader| instance loaded with contents of `pkg_file`."""
|
||||
phys_reader = PhysPkgReader(pkg_file)
|
||||
content_types = _ContentTypeMap.from_xml(phys_reader.content_types_xml)
|
||||
pkg_srels = PackageReader._srels_for(phys_reader, PACKAGE_URI)
|
||||
sparts = PackageReader._load_serialized_parts(phys_reader, pkg_srels, content_types)
|
||||
phys_reader.close()
|
||||
return PackageReader(content_types, pkg_srels, sparts)
|
||||
|
||||
def iter_sparts(self):
|
||||
"""Generate a 4-tuple `(partname, content_type, reltype, blob)` for each of the
|
||||
serialized parts in the package."""
|
||||
for s in self._sparts:
|
||||
yield (s.partname, s.content_type, s.reltype, s.blob)
|
||||
|
||||
def iter_srels(self):
|
||||
"""Generate a 2-tuple `(source_uri, srel)` for each of the relationships in the
|
||||
package."""
|
||||
for srel in self._pkg_srels:
|
||||
yield (PACKAGE_URI, srel)
|
||||
for spart in self._sparts:
|
||||
for srel in spart.srels:
|
||||
yield (spart.partname, srel)
|
||||
|
||||
@staticmethod
|
||||
def _load_serialized_parts(phys_reader, pkg_srels, content_types):
|
||||
"""Return a list of |_SerializedPart| instances corresponding to the parts in
|
||||
`phys_reader` accessible by walking the relationship graph starting with
|
||||
`pkg_srels`."""
|
||||
sparts = []
|
||||
part_walker = PackageReader._walk_phys_parts(phys_reader, pkg_srels)
|
||||
for partname, blob, reltype, srels in part_walker:
|
||||
content_type = content_types[partname]
|
||||
spart = _SerializedPart(partname, content_type, reltype, blob, srels)
|
||||
sparts.append(spart)
|
||||
return tuple(sparts)
|
||||
|
||||
@staticmethod
|
||||
def _srels_for(phys_reader, source_uri):
|
||||
"""Return |_SerializedRelationships| instance populated with relationships for
|
||||
source identified by `source_uri`."""
|
||||
rels_xml = phys_reader.rels_xml_for(source_uri)
|
||||
return _SerializedRelationships.load_from_xml(source_uri.baseURI, rels_xml)
|
||||
|
||||
@staticmethod
|
||||
def _walk_phys_parts(phys_reader, srels, visited_partnames=None):
|
||||
"""Generate a 4-tuple `(partname, blob, reltype, srels)` for each of the parts
|
||||
in `phys_reader` by walking the relationship graph rooted at srels."""
|
||||
if visited_partnames is None:
|
||||
visited_partnames = []
|
||||
for srel in srels:
|
||||
if srel.is_external:
|
||||
continue
|
||||
partname = srel.target_partname
|
||||
if partname in visited_partnames:
|
||||
continue
|
||||
visited_partnames.append(partname)
|
||||
reltype = srel.reltype
|
||||
part_srels = PackageReader._srels_for(phys_reader, partname)
|
||||
blob = phys_reader.blob_for(partname)
|
||||
yield (partname, blob, reltype, part_srels)
|
||||
next_walker = PackageReader._walk_phys_parts(phys_reader, part_srels, visited_partnames)
|
||||
for partname, blob, reltype, srels in next_walker:
|
||||
yield (partname, blob, reltype, srels)
|
||||
|
||||
|
||||
class _ContentTypeMap:
|
||||
"""Value type providing dictionary semantics for looking up content type by part
|
||||
name, e.g. ``content_type = cti['/ppt/presentation.xml']``."""
|
||||
|
||||
def __init__(self):
|
||||
super(_ContentTypeMap, self).__init__()
|
||||
self._overrides = CaseInsensitiveDict()
|
||||
self._defaults = CaseInsensitiveDict()
|
||||
|
||||
def __getitem__(self, partname):
|
||||
"""Return content type for part identified by `partname`."""
|
||||
if not isinstance(partname, PackURI):
|
||||
tmpl = "_ContentTypeMap key must be <type 'PackURI'>, got %s"
|
||||
raise KeyError(tmpl % type(partname))
|
||||
if partname in self._overrides:
|
||||
return self._overrides[partname]
|
||||
if partname.ext in self._defaults:
|
||||
return self._defaults[partname.ext]
|
||||
tmpl = "no content type for partname '%s' in [Content_Types].xml"
|
||||
raise KeyError(tmpl % partname)
|
||||
|
||||
@staticmethod
|
||||
def from_xml(content_types_xml):
|
||||
"""Return a new |_ContentTypeMap| instance populated with the contents of
|
||||
`content_types_xml`."""
|
||||
types_elm = parse_xml(content_types_xml)
|
||||
ct_map = _ContentTypeMap()
|
||||
for o in types_elm.overrides:
|
||||
ct_map._add_override(o.partname, o.content_type)
|
||||
for d in types_elm.defaults:
|
||||
ct_map._add_default(d.extension, d.content_type)
|
||||
return ct_map
|
||||
|
||||
def _add_default(self, extension, content_type):
|
||||
"""Add the default mapping of `extension` to `content_type` to this content type
|
||||
mapping."""
|
||||
self._defaults[extension] = content_type
|
||||
|
||||
def _add_override(self, partname, content_type):
|
||||
"""Add the default mapping of `partname` to `content_type` to this content type
|
||||
mapping."""
|
||||
self._overrides[partname] = content_type
|
||||
|
||||
|
||||
class _SerializedPart:
|
||||
"""Value object for an OPC package part.
|
||||
|
||||
Provides access to the partname, content type, blob, and serialized relationships
|
||||
for the part.
|
||||
"""
|
||||
|
||||
def __init__(self, partname, content_type, reltype, blob, srels):
|
||||
super(_SerializedPart, self).__init__()
|
||||
self._partname = partname
|
||||
self._content_type = content_type
|
||||
self._reltype = reltype
|
||||
self._blob = blob
|
||||
self._srels = srels
|
||||
|
||||
@property
|
||||
def partname(self):
|
||||
return self._partname
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
return self._content_type
|
||||
|
||||
@property
|
||||
def blob(self):
|
||||
return self._blob
|
||||
|
||||
@property
|
||||
def reltype(self):
|
||||
"""The referring relationship type of this part."""
|
||||
return self._reltype
|
||||
|
||||
@property
|
||||
def srels(self):
|
||||
return self._srels
|
||||
|
||||
|
||||
class _SerializedRelationship:
|
||||
"""Value object representing a serialized relationship in an OPC package.
|
||||
|
||||
Serialized, in this case, means any target part is referred to via its partname
|
||||
rather than a direct link to an in-memory |Part| object.
|
||||
"""
|
||||
|
||||
def __init__(self, baseURI, rel_elm):
|
||||
super(_SerializedRelationship, self).__init__()
|
||||
self._baseURI = baseURI
|
||||
self._rId = rel_elm.rId
|
||||
self._reltype = rel_elm.reltype
|
||||
self._target_mode = rel_elm.target_mode
|
||||
self._target_ref = rel_elm.target_ref
|
||||
|
||||
@property
|
||||
def is_external(self):
|
||||
"""True if target_mode is ``RTM.EXTERNAL``"""
|
||||
return self._target_mode == RTM.EXTERNAL
|
||||
|
||||
@property
|
||||
def reltype(self):
|
||||
"""Relationship type, like ``RT.OFFICE_DOCUMENT``"""
|
||||
return self._reltype
|
||||
|
||||
@property
|
||||
def rId(self):
|
||||
"""Relationship id, like 'rId9', corresponds to the ``Id`` attribute on the
|
||||
``CT_Relationship`` element."""
|
||||
return self._rId
|
||||
|
||||
@property
|
||||
def target_mode(self):
|
||||
"""String in ``TargetMode`` attribute of ``CT_Relationship`` element, one of
|
||||
``RTM.INTERNAL`` or ``RTM.EXTERNAL``."""
|
||||
return self._target_mode
|
||||
|
||||
@property
|
||||
def target_ref(self):
|
||||
"""String in ``Target`` attribute of ``CT_Relationship`` element, a relative
|
||||
part reference for internal target mode or an arbitrary URI, e.g. an HTTP URL,
|
||||
for external target mode."""
|
||||
return self._target_ref
|
||||
|
||||
@property
|
||||
def target_partname(self):
|
||||
"""|PackURI| instance containing partname targeted by this relationship.
|
||||
|
||||
Raises ``ValueError`` on reference if target_mode is ``'External'``. Use
|
||||
:attr:`target_mode` to check before referencing.
|
||||
"""
|
||||
if self.is_external:
|
||||
msg = (
|
||||
"target_partname attribute on Relationship is undefined w"
|
||||
'here TargetMode == "External"'
|
||||
)
|
||||
raise ValueError(msg)
|
||||
# lazy-load _target_partname attribute
|
||||
if not hasattr(self, "_target_partname"):
|
||||
self._target_partname = PackURI.from_rel_ref(self._baseURI, self.target_ref)
|
||||
return self._target_partname
|
||||
|
||||
|
||||
class _SerializedRelationships:
|
||||
"""Read-only sequence of |_SerializedRelationship| instances corresponding to the
|
||||
relationships item XML passed to constructor."""
|
||||
|
||||
def __init__(self):
|
||||
super(_SerializedRelationships, self).__init__()
|
||||
self._srels = []
|
||||
|
||||
def __iter__(self):
|
||||
"""Support iteration, e.g. 'for x in srels:'."""
|
||||
return self._srels.__iter__()
|
||||
|
||||
@staticmethod
|
||||
def load_from_xml(baseURI, rels_item_xml):
|
||||
"""Return |_SerializedRelationships| instance loaded with the relationships
|
||||
contained in `rels_item_xml`.
|
||||
|
||||
Returns an empty collection if `rels_item_xml` is |None|.
|
||||
"""
|
||||
srels = _SerializedRelationships()
|
||||
if rels_item_xml is not None:
|
||||
rels_elm = parse_xml(rels_item_xml)
|
||||
for rel_elm in rels_elm.Relationship_lst:
|
||||
srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
|
||||
return srels
|
||||
@@ -0,0 +1,115 @@
|
||||
"""Provides low-level, write-only API to serialized (OPC) package.
|
||||
|
||||
OPC stands for Open Packaging Convention. This is e, essentially an implementation of
|
||||
OpcPackage.save().
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterable
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
from docx.opc.oxml import CT_Types, serialize_part_xml
|
||||
from docx.opc.packuri import CONTENT_TYPES_URI, PACKAGE_URI
|
||||
from docx.opc.phys_pkg import PhysPkgWriter
|
||||
from docx.opc.shared import CaseInsensitiveDict
|
||||
from docx.opc.spec import default_content_types
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.opc.part import Part
|
||||
|
||||
|
||||
class PackageWriter:
|
||||
"""Writes a zip-format OPC package to `pkg_file`, where `pkg_file` can be either a
|
||||
path to a zip file (a string) or a file-like object.
|
||||
|
||||
Its single API method, :meth:`write`, is static, so this class is not intended to be
|
||||
instantiated.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def write(pkg_file, pkg_rels, parts):
|
||||
"""Write a physical package (.pptx file) to `pkg_file` containing `pkg_rels` and
|
||||
`parts` and a content types stream based on the content types of the parts."""
|
||||
phys_writer = PhysPkgWriter(pkg_file)
|
||||
PackageWriter._write_content_types_stream(phys_writer, parts)
|
||||
PackageWriter._write_pkg_rels(phys_writer, pkg_rels)
|
||||
PackageWriter._write_parts(phys_writer, parts)
|
||||
phys_writer.close()
|
||||
|
||||
@staticmethod
|
||||
def _write_content_types_stream(phys_writer, parts):
|
||||
"""Write ``[Content_Types].xml`` part to the physical package with an
|
||||
appropriate content type lookup target for each part in `parts`."""
|
||||
cti = _ContentTypesItem.from_parts(parts)
|
||||
phys_writer.write(CONTENT_TYPES_URI, cti.blob)
|
||||
|
||||
@staticmethod
|
||||
def _write_parts(phys_writer: PhysPkgWriter, parts: Iterable[Part]):
|
||||
"""Write the blob of each part in `parts` to the package, along with a rels item
|
||||
for its relationships if and only if it has any."""
|
||||
for part in parts:
|
||||
phys_writer.write(part.partname, part.blob)
|
||||
if len(part.rels):
|
||||
phys_writer.write(part.partname.rels_uri, part.rels.xml)
|
||||
|
||||
@staticmethod
|
||||
def _write_pkg_rels(phys_writer, pkg_rels):
|
||||
"""Write the XML rels item for `pkg_rels` ('/_rels/.rels') to the package."""
|
||||
phys_writer.write(PACKAGE_URI.rels_uri, pkg_rels.xml)
|
||||
|
||||
|
||||
class _ContentTypesItem:
|
||||
"""Service class that composes a content types item ([Content_Types].xml) based on a
|
||||
list of parts.
|
||||
|
||||
Not meant to be instantiated directly, its single interface method is xml_for(),
|
||||
e.g. ``_ContentTypesItem.xml_for(parts)``.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._defaults = CaseInsensitiveDict()
|
||||
self._overrides = {}
|
||||
|
||||
@property
|
||||
def blob(self):
|
||||
"""Return XML form of this content types item, suitable for storage as
|
||||
``[Content_Types].xml`` in an OPC package."""
|
||||
return serialize_part_xml(self._element)
|
||||
|
||||
@classmethod
|
||||
def from_parts(cls, parts):
|
||||
"""Return content types XML mapping each part in `parts` to the appropriate
|
||||
content type and suitable for storage as ``[Content_Types].xml`` in an OPC
|
||||
package."""
|
||||
cti = cls()
|
||||
cti._defaults["rels"] = CT.OPC_RELATIONSHIPS
|
||||
cti._defaults["xml"] = CT.XML
|
||||
for part in parts:
|
||||
cti._add_content_type(part.partname, part.content_type)
|
||||
return cti
|
||||
|
||||
def _add_content_type(self, partname, content_type):
|
||||
"""Add a content type for the part with `partname` and `content_type`, using a
|
||||
default or override as appropriate."""
|
||||
ext = partname.ext
|
||||
if (ext.lower(), content_type) in default_content_types:
|
||||
self._defaults[ext] = content_type
|
||||
else:
|
||||
self._overrides[partname] = content_type
|
||||
|
||||
@property
|
||||
def _element(self):
|
||||
"""Return XML form of this content types item, suitable for storage as
|
||||
``[Content_Types].xml`` in an OPC package.
|
||||
|
||||
Although the sequence of elements is not strictly significant, as an aid to
|
||||
testing and readability Default elements are sorted by extension and Override
|
||||
elements are sorted by partname.
|
||||
"""
|
||||
_types_elm = CT_Types.new()
|
||||
for ext in sorted(self._defaults.keys()):
|
||||
_types_elm.add_default(ext, self._defaults[ext])
|
||||
for partname in sorted(self._overrides.keys()):
|
||||
_types_elm.add_override(partname, self._overrides[partname])
|
||||
return _types_elm
|
||||
@@ -0,0 +1,153 @@
|
||||
"""Relationship-related objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, cast
|
||||
|
||||
from docx.opc.oxml import CT_Relationships
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.opc.part import Part
|
||||
|
||||
|
||||
class Relationships(Dict[str, "_Relationship"]):
|
||||
"""Collection object for |_Relationship| instances, having list semantics."""
|
||||
|
||||
def __init__(self, baseURI: str):
|
||||
super(Relationships, self).__init__()
|
||||
self._baseURI = baseURI
|
||||
self._target_parts_by_rId: dict[str, Any] = {}
|
||||
|
||||
def add_relationship(
|
||||
self, reltype: str, target: Part | str, rId: str, is_external: bool = False
|
||||
) -> "_Relationship":
|
||||
"""Return a newly added |_Relationship| instance."""
|
||||
rel = _Relationship(rId, reltype, target, self._baseURI, is_external)
|
||||
self[rId] = rel
|
||||
if not is_external:
|
||||
self._target_parts_by_rId[rId] = target
|
||||
return rel
|
||||
|
||||
def get_or_add(self, reltype: str, target_part: Part) -> _Relationship:
|
||||
"""Return relationship of `reltype` to `target_part`, newly added if not already
|
||||
present in collection."""
|
||||
rel = self._get_matching(reltype, target_part)
|
||||
if rel is None:
|
||||
rId = self._next_rId
|
||||
rel = self.add_relationship(reltype, target_part, rId)
|
||||
return rel
|
||||
|
||||
def get_or_add_ext_rel(self, reltype: str, target_ref: str) -> str:
|
||||
"""Return rId of external relationship of `reltype` to `target_ref`, newly added
|
||||
if not already present in collection."""
|
||||
rel = self._get_matching(reltype, target_ref, is_external=True)
|
||||
if rel is None:
|
||||
rId = self._next_rId
|
||||
rel = self.add_relationship(reltype, target_ref, rId, is_external=True)
|
||||
return rel.rId
|
||||
|
||||
def part_with_reltype(self, reltype: str) -> Part:
|
||||
"""Return target part of rel with matching `reltype`, raising |KeyError| if not
|
||||
found and |ValueError| if more than one matching relationship is found."""
|
||||
rel = self._get_rel_of_type(reltype)
|
||||
return rel.target_part
|
||||
|
||||
@property
|
||||
def related_parts(self):
|
||||
"""Dict mapping rIds to target parts for all the internal relationships in the
|
||||
collection."""
|
||||
return self._target_parts_by_rId
|
||||
|
||||
@property
|
||||
def xml(self) -> str:
|
||||
"""Serialize this relationship collection into XML suitable for storage as a
|
||||
.rels file in an OPC package."""
|
||||
rels_elm = CT_Relationships.new()
|
||||
for rel in self.values():
|
||||
rels_elm.add_rel(rel.rId, rel.reltype, rel.target_ref, rel.is_external)
|
||||
return rels_elm.xml
|
||||
|
||||
def _get_matching(
|
||||
self, reltype: str, target: Part | str, is_external: bool = False
|
||||
) -> _Relationship | None:
|
||||
"""Return relationship of matching `reltype`, `target`, and `is_external` from
|
||||
collection, or None if not found."""
|
||||
|
||||
def matches(rel: _Relationship, reltype: str, target: Part | str, is_external: bool):
|
||||
if rel.reltype != reltype:
|
||||
return False
|
||||
if rel.is_external != is_external:
|
||||
return False
|
||||
rel_target = rel.target_ref if rel.is_external else rel.target_part
|
||||
return rel_target == target
|
||||
|
||||
for rel in self.values():
|
||||
if matches(rel, reltype, target, is_external):
|
||||
return rel
|
||||
return None
|
||||
|
||||
def _get_rel_of_type(self, reltype: str):
|
||||
"""Return single relationship of type `reltype` from the collection.
|
||||
|
||||
Raises |KeyError| if no matching relationship is found. Raises |ValueError| if
|
||||
more than one matching relationship is found.
|
||||
"""
|
||||
matching = [rel for rel in self.values() if rel.reltype == reltype]
|
||||
if len(matching) == 0:
|
||||
tmpl = "no relationship of type '%s' in collection"
|
||||
raise KeyError(tmpl % reltype)
|
||||
if len(matching) > 1:
|
||||
tmpl = "multiple relationships of type '%s' in collection"
|
||||
raise ValueError(tmpl % reltype)
|
||||
return matching[0]
|
||||
|
||||
@property
|
||||
def _next_rId(self) -> str: # pyright: ignore[reportReturnType]
|
||||
"""Next available rId in collection, starting from 'rId1' and making use of any
|
||||
gaps in numbering, e.g. 'rId2' for rIds ['rId1', 'rId3']."""
|
||||
for n in range(1, len(self) + 2):
|
||||
rId_candidate = "rId%d" % n # like 'rId19'
|
||||
if rId_candidate not in self:
|
||||
return rId_candidate
|
||||
|
||||
|
||||
class _Relationship:
|
||||
"""Value object for relationship to part."""
|
||||
|
||||
def __init__(
|
||||
self, rId: str, reltype: str, target: Part | str, baseURI: str, external: bool = False
|
||||
):
|
||||
super(_Relationship, self).__init__()
|
||||
self._rId = rId
|
||||
self._reltype = reltype
|
||||
self._target = target
|
||||
self._baseURI = baseURI
|
||||
self._is_external = bool(external)
|
||||
|
||||
@property
|
||||
def is_external(self) -> bool:
|
||||
return self._is_external
|
||||
|
||||
@property
|
||||
def reltype(self) -> str:
|
||||
return self._reltype
|
||||
|
||||
@property
|
||||
def rId(self) -> str:
|
||||
return self._rId
|
||||
|
||||
@property
|
||||
def target_part(self) -> Part:
|
||||
if self._is_external:
|
||||
raise ValueError(
|
||||
"target_part property on _Relationship is undefined when target mode is External"
|
||||
)
|
||||
return cast("Part", self._target)
|
||||
|
||||
@property
|
||||
def target_ref(self) -> str:
|
||||
if self._is_external:
|
||||
return cast(str, self._target)
|
||||
else:
|
||||
target = cast("Part", self._target)
|
||||
return target.partname.relative_ref(self._baseURI)
|
||||
@@ -0,0 +1,31 @@
|
||||
"""Objects shared by opc modules."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, TypeVar
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
||||
|
||||
class CaseInsensitiveDict(Dict[str, Any]):
|
||||
"""Mapping type that behaves like dict except that it matches without respect to the
|
||||
case of the key.
|
||||
|
||||
E.g. cid['A'] == cid['a']. Note this is not general-purpose, just complete enough to
|
||||
satisfy opc package needs. It assumes str keys, and that it is created empty; keys
|
||||
passed in constructor are not accounted for
|
||||
"""
|
||||
|
||||
def __contains__(self, key):
|
||||
return super(CaseInsensitiveDict, self).__contains__(key.lower())
|
||||
|
||||
def __getitem__(self, key):
|
||||
return super(CaseInsensitiveDict, self).__getitem__(key.lower())
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
return super(CaseInsensitiveDict, self).__setitem__(key.lower(), value)
|
||||
|
||||
|
||||
def cls_method_fn(cls: type, method_name: str):
|
||||
"""Return method of `cls` having `method_name`."""
|
||||
return getattr(cls, method_name)
|
||||
@@ -0,0 +1,24 @@
|
||||
"""Provides mappings that embody aspects of the Open XML spec ISO/IEC 29500."""
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
|
||||
default_content_types = (
|
||||
("bin", CT.PML_PRINTER_SETTINGS),
|
||||
("bin", CT.SML_PRINTER_SETTINGS),
|
||||
("bin", CT.WML_PRINTER_SETTINGS),
|
||||
("bmp", CT.BMP),
|
||||
("emf", CT.X_EMF),
|
||||
("fntdata", CT.X_FONTDATA),
|
||||
("gif", CT.GIF),
|
||||
("jpe", CT.JPEG),
|
||||
("jpeg", CT.JPEG),
|
||||
("jpg", CT.JPEG),
|
||||
("png", CT.PNG),
|
||||
("rels", CT.OPC_RELATIONSHIPS),
|
||||
("tif", CT.TIFF),
|
||||
("tiff", CT.TIFF),
|
||||
("wdp", CT.MS_PHOTO),
|
||||
("wmf", CT.X_WMF),
|
||||
("xlsx", CT.SML_SHEET),
|
||||
("xml", CT.XML),
|
||||
)
|
||||
Reference in New Issue
Block a user