Add replace_zipname()
This commit is contained in:
parent
114e5149bc
commit
444f862efa
@ -53,9 +53,9 @@ copyright = u'2015, Eric Lapouyade'
|
|||||||
# built documents.
|
# built documents.
|
||||||
#
|
#
|
||||||
# The short X.Y version.
|
# The short X.Y version.
|
||||||
version = '0.1'
|
version = '0.7'
|
||||||
# The full version, including alpha/beta/rc tags.
|
# The full version, including alpha/beta/rc tags.
|
||||||
release = '0.1.5'
|
release = '0.7.x'
|
||||||
|
|
||||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||||
# for a list of supported languages.
|
# for a list of supported languages.
|
||||||
|
|||||||
@ -256,6 +256,21 @@ WARNING : unlike replace_pic() method, embdded_dummy.docx MUST exist in the temp
|
|||||||
file as the one inserted manually in the docx template.
|
file as the one inserted manually in the docx template.
|
||||||
The replacement occurs in headers, footers and the whole document's body.
|
The replacement occurs in headers, footers and the whole document's body.
|
||||||
|
|
||||||
|
Note that `replace_embedded()` may not work on other documents than embedded docx.
|
||||||
|
Instead, you should use zipname replacement::
|
||||||
|
|
||||||
|
tpl.replace_zipname(
|
||||||
|
'word/embeddings/Feuille_Microsoft_Office_Excel1.xlsx',
|
||||||
|
'my_excel_file.xlsx')
|
||||||
|
|
||||||
|
The zipname is the one you can find when you open docx with WinZip, 7zip (Windows) or unzip -l (Linux).
|
||||||
|
The zipname starts with "word/embeddings/". Note that the file to be replaced is renamed by MSWord, so you have to guess a little bit...
|
||||||
|
|
||||||
|
This works for embdded MSWord file like Excel or PowerPoint file, but won't work for others like PDF, Python or even Text files :
|
||||||
|
For these ones, MSWord generate an oleObjectNNN.bin file which is no use to be replaced as it is encoded.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Microsoft Word 2016 special cases
|
Microsoft Word 2016 special cases
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
||||||
|
|||||||
@ -7,7 +7,7 @@ Created : 2015-03-12
|
|||||||
import functools
|
import functools
|
||||||
import io
|
import io
|
||||||
|
|
||||||
__version__ = '0.6.4'
|
__version__ = '0.6.9'
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from docx import Document
|
from docx import Document
|
||||||
@ -43,6 +43,7 @@ class DocxTemplate(object):
|
|||||||
self.docx = Document(docx)
|
self.docx = Document(docx)
|
||||||
self.crc_to_new_media = {}
|
self.crc_to_new_media = {}
|
||||||
self.crc_to_new_embedded = {}
|
self.crc_to_new_embedded = {}
|
||||||
|
self.zipname_to_replace = {}
|
||||||
self.pic_to_replace = {}
|
self.pic_to_replace = {}
|
||||||
self.pic_map = {}
|
self.pic_map = {}
|
||||||
|
|
||||||
@ -455,6 +456,36 @@ class DocxTemplate(object):
|
|||||||
crc = self.get_file_crc(src_file)
|
crc = self.get_file_crc(src_file)
|
||||||
self.crc_to_new_embedded[crc] = fh.read()
|
self.crc_to_new_embedded[crc] = fh.read()
|
||||||
|
|
||||||
|
def replace_zipname(self,zipname,dst_file):
|
||||||
|
"""Replace one file in the docx file
|
||||||
|
|
||||||
|
First note that a MSWord .docx file is in fact a zip file.
|
||||||
|
|
||||||
|
This method can be used to replace document embedded in the docx template.
|
||||||
|
|
||||||
|
Some embedded document may have been modified by MSWord while saving
|
||||||
|
the template : thus replace_embedded() cannot be used as CRC is not the
|
||||||
|
same as the original file.
|
||||||
|
|
||||||
|
This method works for embdded MSWord file like Excel or PowerPoint file,
|
||||||
|
but won't work for others like PDF, Python or even Text files :
|
||||||
|
For these ones, MSWord generate an oleObjectNNN.bin file which is no
|
||||||
|
use to be replaced as it is encoded.
|
||||||
|
|
||||||
|
Syntax:
|
||||||
|
|
||||||
|
tpl.replace_zipname(
|
||||||
|
'word/embeddings/Feuille_Microsoft_Office_Excel1.xlsx',
|
||||||
|
'my_excel_file.xlsx')
|
||||||
|
|
||||||
|
The zipname is the one you can find when you open docx with WinZip,
|
||||||
|
7zip (Windows) or unzip -l (Linux). The zipname starts with
|
||||||
|
"word/embeddings/". Note that the file is renamed by MSWord,
|
||||||
|
so you have to guess a little bit...
|
||||||
|
"""
|
||||||
|
with open(dst_file, 'rb') as fh:
|
||||||
|
self.zipname_to_replace[zipname] = fh.read()
|
||||||
|
|
||||||
def post_processing(self, docx_file):
|
def post_processing(self, docx_file):
|
||||||
if self.crc_to_new_media or self.crc_to_new_embedded:
|
if self.crc_to_new_media or self.crc_to_new_embedded:
|
||||||
|
|
||||||
@ -474,12 +505,11 @@ class DocxTemplate(object):
|
|||||||
with zipfile.ZipFile(docx_file, 'w') as zout:
|
with zipfile.ZipFile(docx_file, 'w') as zout:
|
||||||
for item in zin.infolist():
|
for item in zin.infolist():
|
||||||
buf = zin.read(item.filename)
|
buf = zin.read(item.filename)
|
||||||
if ( item.filename.startswith('word/media/') and
|
if item.filename in self.zipname_to_replace:
|
||||||
|
zout.writestr(item, self.zipname_to_replace[item.filename])
|
||||||
|
elif ( item.filename.startswith('word/media/') and
|
||||||
item.CRC in self.crc_to_new_media ):
|
item.CRC in self.crc_to_new_media ):
|
||||||
zout.writestr(item, self.crc_to_new_media[item.CRC])
|
zout.writestr(item, self.crc_to_new_media[item.CRC])
|
||||||
elif ( item.filename.startswith('word/embeddings/')
|
|
||||||
and item.CRC in self.crc_to_new_embedded ):
|
|
||||||
zout.writestr(item, self.crc_to_new_embedded[item.CRC])
|
|
||||||
else:
|
else:
|
||||||
zout.writestr(item, buf)
|
zout.writestr(item, buf)
|
||||||
|
|
||||||
|
|||||||
@ -25,5 +25,12 @@ context = {
|
|||||||
|
|
||||||
tpl.replace_embedded('templates/embedded_dummy.docx','templates/embedded_static_docx.docx')
|
tpl.replace_embedded('templates/embedded_dummy.docx','templates/embedded_static_docx.docx')
|
||||||
tpl.replace_embedded('templates/embedded_dummy2.docx','output/embedded_embedded_docx.docx')
|
tpl.replace_embedded('templates/embedded_dummy2.docx','output/embedded_embedded_docx.docx')
|
||||||
|
|
||||||
|
# The zipname is the one you can find when you open docx with WinZip, 7zip (Windows)
|
||||||
|
# or unzip -l (Linux). The zipname starts with "word/embeddings/".
|
||||||
|
# Note that the file is renamed by MSWord, so you have to guess a little bit...
|
||||||
|
tpl.replace_zipname('word/embeddings/Feuille_Microsoft_Office_Excel3.xlsx','templates/real_Excel.xlsx')
|
||||||
|
tpl.replace_zipname('word/embeddings/Pr_sentation_Microsoft_Office_PowerPoint4.pptx','templates/real_PowerPoint.pptx')
|
||||||
|
|
||||||
tpl.render(context)
|
tpl.render(context)
|
||||||
tpl.save('output/embedded.docx')
|
tpl.save('output/embedded.docx')
|
||||||
BIN
tests/templates/dummy_Excel.xlsx
Normal file
BIN
tests/templates/dummy_Excel.xlsx
Normal file
Binary file not shown.
BIN
tests/templates/dummy_PowerPoint.pptx
Normal file
BIN
tests/templates/dummy_PowerPoint.pptx
Normal file
Binary file not shown.
Binary file not shown.
BIN
tests/templates/real_Excel.xlsx
Normal file
BIN
tests/templates/real_Excel.xlsx
Normal file
Binary file not shown.
BIN
tests/templates/real_PowerPoint.pptx
Normal file
BIN
tests/templates/real_PowerPoint.pptx
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user