Change XPath query to get all <t> nodes under the <si> node of interest, regardless of their depth. This concatenates the resulting text() nodes together.
845 lines
29 KiB
Plaintext
Executable File
845 lines
29 KiB
Plaintext
Executable File
create or replace package body ooxml_util_pkg
|
|
as
|
|
|
|
/*
|
|
|
|
Purpose: Package handles Office Open XML (OOXML) formats, ie Office 2007 docx, xlsx, etc.
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 25.01.2011 Created
|
|
|
|
*/
|
|
|
|
--g_date_format constant varchar2(30) := 'YYYY-MM-DD"T"HH24:MI:SS".00Z"';
|
|
|
|
g_namespace_coreprops constant string_util_pkg.t_max_db_varchar2 := 'xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcmitype="http://purl.org/dc/dcmitype/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"';
|
|
g_namespace_extendedprops constant string_util_pkg.t_max_db_varchar2 := 'xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"';
|
|
|
|
g_namespace_xlsx_worksheet constant string_util_pkg.t_max_db_varchar2 := 'xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"';
|
|
g_namespace_xlsx_sharedstrings constant string_util_pkg.t_max_db_varchar2 := 'xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"';
|
|
g_namespace_xlsx_relationships constant string_util_pkg.t_max_db_varchar2 := 'xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"';
|
|
g_namespace_package_rels constant string_util_pkg.t_max_db_varchar2 := 'xmlns="http://schemas.openxmlformats.org/package/2006/relationships"';
|
|
g_namespace_pptx_slide constant string_util_pkg.t_max_db_varchar2 := 'xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"';
|
|
|
|
g_line_break_hack constant varchar2(10) := chr(24) || chr(164) || chr(164);
|
|
|
|
|
|
function get_xml (p_blob in blob,
|
|
p_file_name in varchar2) return xmltype
|
|
as
|
|
l_blob blob;
|
|
l_clob clob;
|
|
l_returnvalue xmltype;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get xml file from ooxml document
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 25.01.2011 Created
|
|
|
|
*/
|
|
|
|
l_blob := zip_util_pkg.get_file (p_blob, p_file_name);
|
|
|
|
l_clob := sql_util_pkg.blob_to_clob (l_blob);
|
|
|
|
l_returnvalue := xmltype (l_clob);
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xml;
|
|
|
|
|
|
function get_docx_properties (p_docx in blob) return t_docx_properties
|
|
as
|
|
l_returnvalue t_docx_properties;
|
|
l_xml xmltype;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get docx properties
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 25.01.2011 Created
|
|
|
|
*/
|
|
|
|
l_xml := get_xml (p_docx, 'docProps/core.xml');
|
|
|
|
l_returnvalue.core.title := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:title/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.subject := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:subject/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.creator := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:creator/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.keywords := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/cp:keywords/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.description := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:description/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.last_modified_by := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/cp:lastModifiedBy/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.revision := xml_util_pkg.extract_value_number (l_xml, '/cp:coreProperties/cp:revision/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.created_date := xml_util_pkg.extract_value_date (l_xml, '/cp:coreProperties/dcterms:created/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.modified_date := xml_util_pkg.extract_value_date (l_xml, '/cp:coreProperties/dcterms:modified/text()', g_namespace_coreprops);
|
|
|
|
l_xml := get_xml (p_docx, 'docProps/app.xml');
|
|
|
|
l_returnvalue.app.application := xml_util_pkg.extract_value (l_xml, '/Properties/Application/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.app_version := xml_util_pkg.extract_value (l_xml, '/Properties/AppVersion/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.company := xml_util_pkg.extract_value (l_xml, '/Properties/Company/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.pages := xml_util_pkg.extract_value_number (l_xml, '/Properties/Pages/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.words := xml_util_pkg.extract_value_number (l_xml, '/Properties/Words/text()', g_namespace_extendedprops);
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_docx_properties;
|
|
|
|
|
|
function get_docx_to_txt_stylesheet return varchar2
|
|
as
|
|
l_returnvalue varchar2(32000);
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get XSL stylesheet that transforms docx to plain text
|
|
|
|
Remarks: see http://forums.oracle.com/forums/thread.jspa?messageID=3368284087
|
|
abbreviated quite a bit, check out original posting by "user304344" for the original
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 25.01.2011 Created
|
|
|
|
*/
|
|
|
|
l_returnvalue := '<?xml version="1.0" encoding="utf-8"?>'
|
|
||chr(10)||'<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" '
|
|
||chr(10)||'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"'
|
|
||chr(10)||'xmlns:v="urn:schemas-microsoft-com:vml"'
|
|
||chr(10)||'exclude-result-prefixes="w v">'
|
|
||chr(10)||'<xsl:output method="text" indent="no" encoding="UTF-8" version="1.0"/>'
|
|
||chr(10)||'<!-- document root -->'
|
|
||chr(10)||'<xsl:template match="/">'
|
|
||chr(10)||'<!-- root element in document --> '
|
|
||chr(10)||'<xsl:apply-templates select="w:document"/> '
|
|
||chr(10)||'</xsl:template>'
|
|
||chr(10)||'<!-- ****************************start document**************************** -->'
|
|
||chr(10)||'<xsl:template match="w:document">'
|
|
||chr(10)||'<xsl:for-each select="//w:p">'
|
|
||chr(10)||'<xsl:apply-templates select="*/w:t"/> '
|
|
||chr(10)||'<xsl:text>' || g_line_break_hack || '</xsl:text> '
|
|
||chr(10)||'</xsl:for-each> '
|
|
||chr(10)||'</xsl:template>'
|
|
||chr(10)||'<!-- get all text nodes within a para -->'
|
|
||chr(10)||'<xsl:template match="*/w:t">'
|
|
||chr(10)||'<xsl:value-of select="."/>'
|
|
||chr(10)||'</xsl:template>'
|
|
||chr(10)||'<!-- **************************** end document**************************** -->'
|
|
||chr(10)||'</xsl:stylesheet>';
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_docx_to_txt_stylesheet;
|
|
|
|
|
|
function get_docx_plaintext (p_docx in blob) return clob
|
|
as
|
|
l_document_blob blob;
|
|
l_document_clob clob;
|
|
l_returnvalue clob;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: extracts plain text from docx
|
|
|
|
Remarks: based on concepts from http://monkeyonoracle.blogspot.com/2010/03/docx-part-i-how-to-extract-document.html
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 25.01.2011 Created
|
|
|
|
*/
|
|
|
|
l_document_blob := zip_util_pkg.get_file (p_docx, 'word/document.xml');
|
|
l_document_clob := sql_util_pkg.blob_to_clob (l_document_blob);
|
|
l_returnvalue := xml_stylesheet_pkg.transform_clob(l_document_clob, get_docx_to_txt_stylesheet);
|
|
l_returnvalue := replace (l_returnvalue, g_line_break_hack, chr(10));
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_docx_plaintext;
|
|
|
|
|
|
function get_file_from_template (p_template in blob,
|
|
p_names in t_str_array,
|
|
p_values in t_str_array) return blob
|
|
as
|
|
l_file_list zip_util_pkg.t_file_list;
|
|
l_docx blob;
|
|
l_blob blob;
|
|
l_clob clob;
|
|
|
|
l_returnvalue blob;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: performs substitutions on a template
|
|
|
|
Remarks: template file can be Word (docx), Excel (xlsx), or Powerpoint (pptx)
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 25.01.2011 Created
|
|
|
|
*/
|
|
|
|
l_file_list := zip_util_pkg.get_file_list (p_template);
|
|
|
|
for i in 1 .. l_file_list.count loop
|
|
|
|
l_blob := zip_util_pkg.get_file (p_template, l_file_list(i));
|
|
|
|
if l_file_list(i) in ('word/document.xml', 'word/footer1.xml', 'xl/sharedStrings.xml') or (l_file_list(i) like 'ppt/slides/slide%.xml') then
|
|
|
|
l_clob := sql_util_pkg.blob_to_clob (l_blob);
|
|
l_clob := string_util_pkg.multi_replace (l_clob, p_names, p_values);
|
|
l_blob := sql_util_pkg.clob_to_blob (l_clob);
|
|
|
|
end if;
|
|
|
|
zip_util_pkg.add_file (l_returnvalue, l_file_list(i), l_blob);
|
|
|
|
end loop;
|
|
|
|
zip_util_pkg.finish_zip (l_returnvalue);
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_file_from_template;
|
|
|
|
|
|
function get_xlsx_properties (p_xlsx in blob) return t_xlsx_properties
|
|
as
|
|
l_returnvalue t_xlsx_properties;
|
|
l_xml xmltype;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get xlsx properties
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 30.01.2011 Created
|
|
|
|
*/
|
|
|
|
l_xml := get_xml (p_xlsx, 'docProps/core.xml');
|
|
|
|
l_returnvalue.core.title := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:title/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.subject := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:subject/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.creator := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:creator/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.keywords := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/cp:keywords/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.description := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:description/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.last_modified_by := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/cp:lastModifiedBy/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.revision := xml_util_pkg.extract_value_number (l_xml, '/cp:coreProperties/cp:revision/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.created_date := xml_util_pkg.extract_value_date (l_xml, '/cp:coreProperties/dcterms:created/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.modified_date := xml_util_pkg.extract_value_date (l_xml, '/cp:coreProperties/dcterms:modified/text()', g_namespace_coreprops);
|
|
|
|
l_xml := get_xml (p_xlsx, 'docProps/app.xml');
|
|
|
|
l_returnvalue.app.application := xml_util_pkg.extract_value (l_xml, '/Properties/Application/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.app_version := xml_util_pkg.extract_value (l_xml, '/Properties/AppVersion/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.company := xml_util_pkg.extract_value (l_xml, '/Properties/Company/text()', g_namespace_extendedprops);
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xlsx_properties;
|
|
|
|
|
|
function get_xlsx_column_number (p_column_ref in varchar2) return number
|
|
as
|
|
l_returnvalue number;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get column number from column reference
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 11.07.2011 Created
|
|
|
|
*/
|
|
|
|
if length(p_column_ref) > 1 then
|
|
l_returnvalue := ascii(substr(p_column_ref,1,1)) - 64;
|
|
l_returnvalue := l_returnvalue * 26;
|
|
l_returnvalue := l_returnvalue + (ascii(substr(p_column_ref,2,1)) - 64);
|
|
else
|
|
l_returnvalue := ascii(p_column_ref) - 64;
|
|
end if;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xlsx_column_number;
|
|
|
|
|
|
function get_xlsx_column_ref (p_column_number in varchar2) return varchar2
|
|
as
|
|
l_offset number;
|
|
l_returnvalue varchar2(2);
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get column reference from column number
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 11.07.2011 Created
|
|
|
|
*/
|
|
|
|
if p_column_number < 27 then
|
|
l_returnvalue := chr(p_column_number + 64);
|
|
else
|
|
l_offset := trunc(p_column_number/26);
|
|
l_returnvalue := chr(l_offset + 64);
|
|
l_returnvalue := l_returnvalue || chr(p_column_number - (l_offset * 26) + 64);
|
|
end if;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xlsx_column_ref;
|
|
|
|
|
|
function get_worksheet_file_name (p_xlsx in blob,
|
|
p_worksheet in varchar2) return varchar2
|
|
as
|
|
l_relationship_id varchar2(255);
|
|
l_returnvalue string_util_pkg.t_max_pl_varchar2;
|
|
l_xml xmltype;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get file name of worksheet based on worksheet name
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 30.01.2011 Created
|
|
|
|
*/
|
|
|
|
l_xml := get_xml (p_xlsx, 'xl/workbook.xml');
|
|
|
|
l_relationship_id := xml_util_pkg.extract_value (l_xml, '/workbook/sheets/sheet[@name="' || p_worksheet || '"]/@r:id', g_namespace_xlsx_relationships);
|
|
|
|
if l_relationship_id is not null then
|
|
l_xml := get_xml (p_xlsx, 'xl/_rels/workbook.xml.rels');
|
|
l_returnvalue := xml_util_pkg.extract_value (l_xml, '/Relationships/Relationship[@Id="' || l_relationship_id || '"]/@Target', g_namespace_package_rels);
|
|
end if;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_worksheet_file_name;
|
|
|
|
|
|
function get_xlsx_cell_value (p_xlsx in blob,
|
|
p_worksheet in varchar2,
|
|
p_cell in varchar2) return varchar2
|
|
as
|
|
l_returnvalue string_util_pkg.t_max_pl_varchar2;
|
|
l_file_name string_util_pkg.t_max_db_varchar2;
|
|
l_type varchar2(20);
|
|
l_string_index pls_integer;
|
|
l_xml xmltype;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get cell value from XLSX file
|
|
|
|
Remarks: see http://msdn.microsoft.com/en-us/library/bb332058(v=office.12).aspx
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 30.01.2011 Created
|
|
|
|
*/
|
|
|
|
l_file_name := get_worksheet_file_name (p_xlsx, p_worksheet);
|
|
|
|
if l_file_name is null then
|
|
raise_application_error (-20000, 'Worksheet not found!');
|
|
end if;
|
|
|
|
l_xml := get_xml (p_xlsx, 'xl/' || l_file_name);
|
|
|
|
l_returnvalue := xml_util_pkg.extract_value (l_xml, '/worksheet/sheetData/row/c[@r="' || p_cell || '"]/v/text()', g_namespace_xlsx_worksheet);
|
|
|
|
l_type := xml_util_pkg.extract_value (l_xml, '/worksheet/sheetData/row/c[@r="' || p_cell || '"]/@t', g_namespace_xlsx_worksheet);
|
|
|
|
if l_type = 's' then
|
|
l_string_index := to_number (l_returnvalue);
|
|
l_xml := get_xml (p_xlsx, 'xl/sharedStrings.xml');
|
|
l_returnvalue := xml_util_pkg.extract_value (l_xml, '/sst/si[' || (l_string_index + 1) || ']//t/text()', g_namespace_xlsx_sharedstrings);
|
|
end if;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xlsx_cell_value;
|
|
|
|
|
|
function get_xlsx_cell_values (p_xlsx in blob,
|
|
p_worksheet in varchar2,
|
|
p_cells in t_str_array) return t_str_array
|
|
as
|
|
l_returnvalue t_str_array := t_str_array ();
|
|
l_file_name string_util_pkg.t_max_db_varchar2;
|
|
l_type varchar2(20);
|
|
l_string_index pls_integer;
|
|
l_xml xmltype;
|
|
l_shared_strings xmltype;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get multiple cell values from XLSX file
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 30.01.2011 Created
|
|
|
|
*/
|
|
|
|
l_returnvalue.extend (p_cells.count);
|
|
|
|
l_file_name := get_worksheet_file_name (p_xlsx, p_worksheet);
|
|
|
|
if l_file_name is null then
|
|
raise_application_error (-20000, 'Worksheet not found!');
|
|
end if;
|
|
|
|
l_xml := get_xml (p_xlsx, 'xl/' || l_file_name);
|
|
|
|
l_shared_strings := get_xml (p_xlsx, 'xl/sharedStrings.xml');
|
|
|
|
for i in 1 .. p_cells.count loop
|
|
|
|
l_returnvalue(i) := xml_util_pkg.extract_value (l_xml, '/worksheet/sheetData/row/c[@r="' || p_cells(i) || '"]/v/text()', g_namespace_xlsx_worksheet);
|
|
|
|
l_type := xml_util_pkg.extract_value (l_xml, '/worksheet/sheetData/row/c[@r="' || p_cells(i) || '"]/@t', g_namespace_xlsx_worksheet);
|
|
|
|
if l_type = 's' then
|
|
l_string_index := to_number (l_returnvalue(i));
|
|
l_returnvalue(i) := xml_util_pkg.extract_value (l_shared_strings, '/sst/si[' || (l_string_index + 1) || ']//t/text()', g_namespace_xlsx_sharedstrings);
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xlsx_cell_values;
|
|
|
|
|
|
function get_xlsx_cell (p_cell_reference in varchar2) return t_xlsx_cell
|
|
as
|
|
l_returnvalue t_xlsx_cell;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get cell by string reference
|
|
|
|
Remarks: given a reference of "A42", returns "A" (column) and 42 (row) as separate values
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 26.03.2011 Created
|
|
|
|
*/
|
|
|
|
if p_cell_reference is not null then
|
|
|
|
l_returnvalue.column := rtrim(upper(p_cell_reference), '1234567890');
|
|
l_returnvalue.row := to_number(ltrim(upper(p_cell_reference), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'));
|
|
|
|
end if;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xlsx_cell;
|
|
|
|
|
|
function get_xlsx_cell_values_as_sheet (p_xlsx in blob,
|
|
p_worksheet in varchar2,
|
|
p_cells in t_str_array) return t_xlsx_sheet
|
|
as
|
|
l_values t_str_array;
|
|
l_cell t_xlsx_cell;
|
|
l_returnvalue t_xlsx_sheet;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get multiple cell values from XLSX file
|
|
|
|
Remarks: return the values as an index-by table, allowing them to be referenced by name like this: l_sheet('A1').value
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 28.03.2011 Created
|
|
|
|
*/
|
|
|
|
l_values := get_xlsx_cell_values (p_xlsx, p_worksheet, p_cells);
|
|
|
|
for i in 1 .. p_cells.count loop
|
|
l_cell := get_xlsx_cell (p_cells(i));
|
|
l_cell.value := l_values(i);
|
|
l_returnvalue (p_cells(i)) := l_cell;
|
|
end loop;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xlsx_cell_values_as_sheet;
|
|
|
|
|
|
function get_xlsx_cell_array_by_range (p_from_cell in varchar2,
|
|
p_to_cell in varchar2) return t_str_array
|
|
as
|
|
l_from_cell t_xlsx_cell;
|
|
l_to_cell t_xlsx_cell;
|
|
l_returnvalue t_str_array := t_str_array();
|
|
|
|
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get an array of cell references by range
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 26.03.2011 Created
|
|
MBR 11.07.2011 Handle two-letter column references
|
|
|
|
*/
|
|
|
|
l_from_cell := get_xlsx_cell (p_from_cell);
|
|
l_to_cell := get_xlsx_cell (p_to_cell);
|
|
|
|
/*
|
|
for l_column in ascii(l_from_cell.column) .. ascii(l_to_cell.column) loop
|
|
for l_row in l_from_cell.row .. l_to_cell.row loop
|
|
l_returnvalue.extend;
|
|
l_returnvalue (l_returnvalue.last) := chr(l_column) || l_row;
|
|
end loop;
|
|
end loop;
|
|
*/
|
|
|
|
for l_column in get_xlsx_column_number (l_from_cell.column) .. get_xlsx_column_number(l_to_cell.column) loop
|
|
for l_row in l_from_cell.row .. l_to_cell.row loop
|
|
l_returnvalue.extend;
|
|
l_returnvalue (l_returnvalue.last) := get_xlsx_column_ref (l_column) || l_row;
|
|
end loop;
|
|
end loop;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xlsx_cell_array_by_range;
|
|
|
|
|
|
function get_xlsx_number (p_str in varchar2) return number
|
|
as
|
|
l_str string_util_pkg.t_max_db_varchar2;
|
|
l_e_pos pls_integer;
|
|
l_power pls_integer;
|
|
l_returnvalue number;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get number from Excel internal format
|
|
|
|
Remarks: note that values may not be exactly what you expect,
|
|
see http://stackoverflow.com/questions/606730/numeric-precision-issue-in-excel-2007-when-saving-as-xml
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 26.03.2011 Created
|
|
MBR 11.07.2011 Handle scientific notation
|
|
MBR 18.05.2012 Handle exception when string fails conversion to number
|
|
|
|
*/
|
|
|
|
l_str := p_str;
|
|
|
|
l_e_pos := instr(l_str, 'E');
|
|
|
|
if l_e_pos > 0 then
|
|
l_power := to_number(substr(l_str, l_e_pos + 1));
|
|
l_str := substr(l_str, 1, l_e_pos - 1);
|
|
end if;
|
|
|
|
begin
|
|
l_returnvalue := to_number(l_str, '999999999999999999999999D99999999999999999999999999999999999999', 'NLS_NUMERIC_CHARACTERS=.,');
|
|
exception
|
|
when value_error then
|
|
-- conversion failed
|
|
l_returnvalue := null;
|
|
end;
|
|
|
|
if l_e_pos > 0 then
|
|
l_returnvalue := l_returnvalue * power (10, l_power);
|
|
end if;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xlsx_number;
|
|
|
|
|
|
function get_xlsx_date (p_date_str in varchar2,
|
|
p_time_str in varchar2 := null) return date
|
|
as
|
|
l_days number;
|
|
l_returnvalue date;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get date from Excel internal format
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 26.03.2011 Created
|
|
|
|
*/
|
|
|
|
l_days := get_xlsx_number (p_date_str);
|
|
|
|
if p_time_str is not null then
|
|
l_days := l_days + get_xlsx_number (p_time_str);
|
|
end if;
|
|
|
|
l_returnvalue := to_date('01.01.1900', 'dd.mm.yyyy') + l_days - 2;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_xlsx_date;
|
|
|
|
|
|
function get_pptx_properties (p_pptx in blob) return t_pptx_properties
|
|
as
|
|
l_returnvalue t_pptx_properties;
|
|
l_xml xmltype;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get pptx properties
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 11.07.2011 Created
|
|
|
|
*/
|
|
|
|
l_xml := get_xml (p_pptx, 'docProps/core.xml');
|
|
|
|
l_returnvalue.core.title := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:title/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.subject := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:subject/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.creator := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:creator/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.keywords := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/cp:keywords/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.description := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/dc:description/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.last_modified_by := xml_util_pkg.extract_value (l_xml, '/cp:coreProperties/cp:lastModifiedBy/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.revision := xml_util_pkg.extract_value_number (l_xml, '/cp:coreProperties/cp:revision/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.created_date := xml_util_pkg.extract_value_date (l_xml, '/cp:coreProperties/dcterms:created/text()', g_namespace_coreprops);
|
|
l_returnvalue.core.modified_date := xml_util_pkg.extract_value_date (l_xml, '/cp:coreProperties/dcterms:modified/text()', g_namespace_coreprops);
|
|
|
|
l_xml := get_xml (p_pptx, 'docProps/app.xml');
|
|
|
|
l_returnvalue.app.application := xml_util_pkg.extract_value (l_xml, '/Properties/Application/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.app_version := xml_util_pkg.extract_value (l_xml, '/Properties/AppVersion/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.company := xml_util_pkg.extract_value (l_xml, '/Properties/Company/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.slides := xml_util_pkg.extract_value_number (l_xml, '/Properties/Slides/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.hidden_slides := xml_util_pkg.extract_value_number (l_xml, '/Properties/HiddenSlides/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.paragraphs := xml_util_pkg.extract_value_number (l_xml, '/Properties/Paragraphs/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.words := xml_util_pkg.extract_value_number (l_xml, '/Properties/Words/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.notes := xml_util_pkg.extract_value_number (l_xml, '/Properties/Notes/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.template := xml_util_pkg.extract_value (l_xml, '/Properties/Template/text()', g_namespace_extendedprops);
|
|
l_returnvalue.app.presentation_format := xml_util_pkg.extract_value (l_xml, '/Properties/PresentationFormat/text()', g_namespace_extendedprops);
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_pptx_properties;
|
|
|
|
|
|
function get_pptx_media_list (p_pptx in blob,
|
|
p_slide in number := null) return t_str_array
|
|
as
|
|
l_xml xmltype;
|
|
l_file_list zip_util_pkg.t_file_list;
|
|
l_returnvalue t_str_array := t_str_array();
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get list of media files embedded in presentation
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 11.07.2011 Created
|
|
MBR 28.04.2012 Specify specific slide number to get media (images) related to that slide
|
|
|
|
*/
|
|
|
|
if p_slide is not null then
|
|
|
|
-- see http://msdn.microsoft.com/en-us/library/bb332455(v=office.12).aspx
|
|
|
|
l_xml := get_xml (p_pptx, 'ppt/slides/_rels/slide' || p_slide || '.xml.rels');
|
|
|
|
for l_rec in (
|
|
select extractValue(value(t), '*/@Target', g_namespace_package_rels) as target
|
|
from table(xmlsequence(l_xml.extract('//Relationship[@Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"]', g_namespace_package_rels))) t
|
|
)
|
|
loop
|
|
l_returnvalue.extend();
|
|
l_returnvalue(l_returnvalue.last) := replace(l_rec.target, '../media', 'ppt/media');
|
|
end loop;
|
|
|
|
else
|
|
|
|
l_file_list := zip_util_pkg.get_file_list (p_pptx);
|
|
|
|
for i in 1 .. l_file_list.count loop
|
|
|
|
if substr(l_file_list(i), 1, 10) = 'ppt/media/' then
|
|
l_returnvalue.extend();
|
|
l_returnvalue(l_returnvalue.last) := l_file_list(i);
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
end if;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_pptx_media_list;
|
|
|
|
|
|
function get_pptx_to_txt_stylesheet return varchar2
|
|
as
|
|
l_returnvalue varchar2(32000);
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get XSL stylesheet that transforms pptx to plain text
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 11.07.2011 Created
|
|
|
|
*/
|
|
|
|
l_returnvalue := '<?xml version="1.0" encoding="utf-8"?>
|
|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" ' || g_namespace_pptx_slide || '>
|
|
<xsl:template match="/">
|
|
<xsl:for-each select="//a:r">
|
|
<xsl:value-of select="a:t"/><xsl:text>' || g_line_break_hack || '</xsl:text>
|
|
</xsl:for-each>
|
|
</xsl:template>
|
|
</xsl:stylesheet>';
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_pptx_to_txt_stylesheet;
|
|
|
|
|
|
function get_pptx_plaintext (p_pptx in blob,
|
|
p_slide in number := null,
|
|
p_note in number := null) return clob
|
|
as
|
|
l_null_argument_exception exception; -- ORA-30625: method dispatch on NULL SELF argument is disallowed
|
|
pragma exception_init (l_null_argument_exception, -30625);
|
|
l_document_blob blob;
|
|
l_document_clob clob;
|
|
l_returnvalue clob;
|
|
begin
|
|
|
|
/*
|
|
|
|
Purpose: get plain text from slide
|
|
|
|
Remarks:
|
|
|
|
Who Date Description
|
|
------ ---------- --------------------------------
|
|
MBR 11.07.2011 Created
|
|
MBR 08.07.2011 Handle empty slides
|
|
|
|
*/
|
|
|
|
if p_note is not null then
|
|
l_document_blob := zip_util_pkg.get_file (p_pptx, 'ppt/notesSlides/notesSlide' || p_note || '.xml');
|
|
elsif p_slide is not null then
|
|
l_document_blob := zip_util_pkg.get_file (p_pptx, 'ppt/slides/slide' || p_slide || '.xml');
|
|
end if;
|
|
|
|
if l_document_blob is not null then
|
|
l_document_clob := sql_util_pkg.blob_to_clob (l_document_blob);
|
|
begin
|
|
l_returnvalue := xml_stylesheet_pkg.transform_clob(l_document_clob, get_pptx_to_txt_stylesheet);
|
|
exception
|
|
when l_null_argument_exception then
|
|
l_returnvalue := null;
|
|
end;
|
|
l_returnvalue := replace (l_returnvalue, g_line_break_hack, chr(10));
|
|
else
|
|
l_returnvalue := 'Slide or note must be specified.';
|
|
end if;
|
|
|
|
return l_returnvalue;
|
|
|
|
end get_pptx_plaintext;
|
|
|
|
|
|
end ooxml_util_pkg;
|
|
/
|
|
|
|
|
|
|