Add replace_zipname()
This commit is contained in:
parent
114e5149bc
commit
444f862efa
@ -53,9 +53,9 @@ copyright = u'2015, Eric Lapouyade'
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = '0.1'
|
||||
version = '0.7'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = '0.1.5'
|
||||
release = '0.7.x'
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
|
||||
@ -256,6 +256,21 @@ WARNING : unlike replace_pic() method, embdded_dummy.docx MUST exist in the temp
|
||||
file as the one inserted manually in the docx template.
|
||||
The replacement occurs in headers, footers and the whole document's body.
|
||||
|
||||
Note that `replace_embedded()` may not work on other documents than embedded docx.
|
||||
Instead, you should use zipname replacement::
|
||||
|
||||
tpl.replace_zipname(
|
||||
'word/embeddings/Feuille_Microsoft_Office_Excel1.xlsx',
|
||||
'my_excel_file.xlsx')
|
||||
|
||||
The zipname is the one you can find when you open docx with WinZip, 7zip (Windows) or unzip -l (Linux).
|
||||
The zipname starts with "word/embeddings/". Note that the file to be replaced is renamed by MSWord, so you have to guess a little bit...
|
||||
|
||||
This works for embdded MSWord file like Excel or PowerPoint file, but won't work for others like PDF, Python or even Text files :
|
||||
For these ones, MSWord generate an oleObjectNNN.bin file which is no use to be replaced as it is encoded.
|
||||
|
||||
|
||||
|
||||
Microsoft Word 2016 special cases
|
||||
---------------------------------
|
||||
|
||||
|
||||
@ -7,7 +7,7 @@ Created : 2015-03-12
|
||||
import functools
|
||||
import io
|
||||
|
||||
__version__ = '0.6.4'
|
||||
__version__ = '0.6.9'
|
||||
|
||||
from lxml import etree
|
||||
from docx import Document
|
||||
@ -43,6 +43,7 @@ class DocxTemplate(object):
|
||||
self.docx = Document(docx)
|
||||
self.crc_to_new_media = {}
|
||||
self.crc_to_new_embedded = {}
|
||||
self.zipname_to_replace = {}
|
||||
self.pic_to_replace = {}
|
||||
self.pic_map = {}
|
||||
|
||||
@ -455,6 +456,36 @@ class DocxTemplate(object):
|
||||
crc = self.get_file_crc(src_file)
|
||||
self.crc_to_new_embedded[crc] = fh.read()
|
||||
|
||||
def replace_zipname(self,zipname,dst_file):
|
||||
"""Replace one file in the docx file
|
||||
|
||||
First note that a MSWord .docx file is in fact a zip file.
|
||||
|
||||
This method can be used to replace document embedded in the docx template.
|
||||
|
||||
Some embedded document may have been modified by MSWord while saving
|
||||
the template : thus replace_embedded() cannot be used as CRC is not the
|
||||
same as the original file.
|
||||
|
||||
This method works for embdded MSWord file like Excel or PowerPoint file,
|
||||
but won't work for others like PDF, Python or even Text files :
|
||||
For these ones, MSWord generate an oleObjectNNN.bin file which is no
|
||||
use to be replaced as it is encoded.
|
||||
|
||||
Syntax:
|
||||
|
||||
tpl.replace_zipname(
|
||||
'word/embeddings/Feuille_Microsoft_Office_Excel1.xlsx',
|
||||
'my_excel_file.xlsx')
|
||||
|
||||
The zipname is the one you can find when you open docx with WinZip,
|
||||
7zip (Windows) or unzip -l (Linux). The zipname starts with
|
||||
"word/embeddings/". Note that the file is renamed by MSWord,
|
||||
so you have to guess a little bit...
|
||||
"""
|
||||
with open(dst_file, 'rb') as fh:
|
||||
self.zipname_to_replace[zipname] = fh.read()
|
||||
|
||||
def post_processing(self, docx_file):
|
||||
if self.crc_to_new_media or self.crc_to_new_embedded:
|
||||
|
||||
@ -474,12 +505,11 @@ class DocxTemplate(object):
|
||||
with zipfile.ZipFile(docx_file, 'w') as zout:
|
||||
for item in zin.infolist():
|
||||
buf = zin.read(item.filename)
|
||||
if ( item.filename.startswith('word/media/') and
|
||||
if item.filename in self.zipname_to_replace:
|
||||
zout.writestr(item, self.zipname_to_replace[item.filename])
|
||||
elif ( item.filename.startswith('word/media/') and
|
||||
item.CRC in self.crc_to_new_media ):
|
||||
zout.writestr(item, self.crc_to_new_media[item.CRC])
|
||||
elif ( item.filename.startswith('word/embeddings/')
|
||||
and item.CRC in self.crc_to_new_embedded ):
|
||||
zout.writestr(item, self.crc_to_new_embedded[item.CRC])
|
||||
else:
|
||||
zout.writestr(item, buf)
|
||||
|
||||
|
||||
@ -25,5 +25,12 @@ context = {
|
||||
|
||||
tpl.replace_embedded('templates/embedded_dummy.docx','templates/embedded_static_docx.docx')
|
||||
tpl.replace_embedded('templates/embedded_dummy2.docx','output/embedded_embedded_docx.docx')
|
||||
|
||||
# The zipname is the one you can find when you open docx with WinZip, 7zip (Windows)
|
||||
# or unzip -l (Linux). The zipname starts with "word/embeddings/".
|
||||
# Note that the file is renamed by MSWord, so you have to guess a little bit...
|
||||
tpl.replace_zipname('word/embeddings/Feuille_Microsoft_Office_Excel3.xlsx','templates/real_Excel.xlsx')
|
||||
tpl.replace_zipname('word/embeddings/Pr_sentation_Microsoft_Office_PowerPoint4.pptx','templates/real_PowerPoint.pptx')
|
||||
|
||||
tpl.render(context)
|
||||
tpl.save('output/embedded.docx')
|
||||
BIN
tests/templates/dummy_Excel.xlsx
Normal file
BIN
tests/templates/dummy_Excel.xlsx
Normal file
Binary file not shown.
BIN
tests/templates/dummy_PowerPoint.pptx
Normal file
BIN
tests/templates/dummy_PowerPoint.pptx
Normal file
Binary file not shown.
Binary file not shown.
BIN
tests/templates/real_Excel.xlsx
Normal file
BIN
tests/templates/real_Excel.xlsx
Normal file
Binary file not shown.
BIN
tests/templates/real_PowerPoint.pptx
Normal file
BIN
tests/templates/real_PowerPoint.pptx
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user