Add replace_zipname()

This commit is contained in:
Eric Lapouyade 2020-04-09 14:26:22 +02:00
parent 114e5149bc
commit 444f862efa
9 changed files with 59 additions and 7 deletions

View File

@ -53,9 +53,9 @@ copyright = u'2015, Eric Lapouyade'
# built documents. # built documents.
# #
# The short X.Y version. # The short X.Y version.
version = '0.1' version = '0.7'
# The full version, including alpha/beta/rc tags. # The full version, including alpha/beta/rc tags.
release = '0.1.5' release = '0.7.x'
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.

View File

@ -256,6 +256,21 @@ WARNING : unlike replace_pic() method, embdded_dummy.docx MUST exist in the temp
file as the one inserted manually in the docx template. file as the one inserted manually in the docx template.
The replacement occurs in headers, footers and the whole document's body. The replacement occurs in headers, footers and the whole document's body.
Note that `replace_embedded()` may not work on other documents than embedded docx.
Instead, you should use zipname replacement::
tpl.replace_zipname(
'word/embeddings/Feuille_Microsoft_Office_Excel1.xlsx',
'my_excel_file.xlsx')
The zipname is the one you can find when you open docx with WinZip, 7zip (Windows) or unzip -l (Linux).
The zipname starts with "word/embeddings/". Note that the file to be replaced is renamed by MSWord, so you have to guess a little bit...
This works for embdded MSWord file like Excel or PowerPoint file, but won't work for others like PDF, Python or even Text files :
For these ones, MSWord generate an oleObjectNNN.bin file which is no use to be replaced as it is encoded.
Microsoft Word 2016 special cases Microsoft Word 2016 special cases
--------------------------------- ---------------------------------

View File

@ -7,7 +7,7 @@ Created : 2015-03-12
import functools import functools
import io import io
__version__ = '0.6.4' __version__ = '0.6.9'
from lxml import etree from lxml import etree
from docx import Document from docx import Document
@ -43,6 +43,7 @@ class DocxTemplate(object):
self.docx = Document(docx) self.docx = Document(docx)
self.crc_to_new_media = {} self.crc_to_new_media = {}
self.crc_to_new_embedded = {} self.crc_to_new_embedded = {}
self.zipname_to_replace = {}
self.pic_to_replace = {} self.pic_to_replace = {}
self.pic_map = {} self.pic_map = {}
@ -455,6 +456,36 @@ class DocxTemplate(object):
crc = self.get_file_crc(src_file) crc = self.get_file_crc(src_file)
self.crc_to_new_embedded[crc] = fh.read() self.crc_to_new_embedded[crc] = fh.read()
def replace_zipname(self,zipname,dst_file):
"""Replace one file in the docx file
First note that a MSWord .docx file is in fact a zip file.
This method can be used to replace document embedded in the docx template.
Some embedded document may have been modified by MSWord while saving
the template : thus replace_embedded() cannot be used as CRC is not the
same as the original file.
This method works for embdded MSWord file like Excel or PowerPoint file,
but won't work for others like PDF, Python or even Text files :
For these ones, MSWord generate an oleObjectNNN.bin file which is no
use to be replaced as it is encoded.
Syntax:
tpl.replace_zipname(
'word/embeddings/Feuille_Microsoft_Office_Excel1.xlsx',
'my_excel_file.xlsx')
The zipname is the one you can find when you open docx with WinZip,
7zip (Windows) or unzip -l (Linux). The zipname starts with
"word/embeddings/". Note that the file is renamed by MSWord,
so you have to guess a little bit...
"""
with open(dst_file, 'rb') as fh:
self.zipname_to_replace[zipname] = fh.read()
def post_processing(self, docx_file): def post_processing(self, docx_file):
if self.crc_to_new_media or self.crc_to_new_embedded: if self.crc_to_new_media or self.crc_to_new_embedded:
@ -474,12 +505,11 @@ class DocxTemplate(object):
with zipfile.ZipFile(docx_file, 'w') as zout: with zipfile.ZipFile(docx_file, 'w') as zout:
for item in zin.infolist(): for item in zin.infolist():
buf = zin.read(item.filename) buf = zin.read(item.filename)
if ( item.filename.startswith('word/media/') and if item.filename in self.zipname_to_replace:
zout.writestr(item, self.zipname_to_replace[item.filename])
elif ( item.filename.startswith('word/media/') and
item.CRC in self.crc_to_new_media ): item.CRC in self.crc_to_new_media ):
zout.writestr(item, self.crc_to_new_media[item.CRC]) zout.writestr(item, self.crc_to_new_media[item.CRC])
elif ( item.filename.startswith('word/embeddings/')
and item.CRC in self.crc_to_new_embedded ):
zout.writestr(item, self.crc_to_new_embedded[item.CRC])
else: else:
zout.writestr(item, buf) zout.writestr(item, buf)

View File

@ -25,5 +25,12 @@ context = {
tpl.replace_embedded('templates/embedded_dummy.docx','templates/embedded_static_docx.docx') tpl.replace_embedded('templates/embedded_dummy.docx','templates/embedded_static_docx.docx')
tpl.replace_embedded('templates/embedded_dummy2.docx','output/embedded_embedded_docx.docx') tpl.replace_embedded('templates/embedded_dummy2.docx','output/embedded_embedded_docx.docx')
# The zipname is the one you can find when you open docx with WinZip, 7zip (Windows)
# or unzip -l (Linux). The zipname starts with "word/embeddings/".
# Note that the file is renamed by MSWord, so you have to guess a little bit...
tpl.replace_zipname('word/embeddings/Feuille_Microsoft_Office_Excel3.xlsx','templates/real_Excel.xlsx')
tpl.replace_zipname('word/embeddings/Pr_sentation_Microsoft_Office_PowerPoint4.pptx','templates/real_PowerPoint.pptx')
tpl.render(context) tpl.render(context)
tpl.save('output/embedded.docx') tpl.save('output/embedded.docx')

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.