Commit 4e4b1c2e authored by Julien Muchembled's avatar Julien Muchembled

Fix conversion from html to spreadsheet

A lot of changes because the import filter depends on the destination format.
This is equivalent to specifying --calc when using the command-line API.
parent 59a04869
...@@ -77,17 +77,17 @@ Options: ...@@ -77,17 +77,17 @@ Options:
class UnoConverter(object): class UnoConverter(object):
"""A module to easily work with OpenOffice.org.""" """A module to easily work with OpenOffice.org."""
def __init__(self, hostname, port, document_url, source_format, uno_path, def __init__(self, service_manager, document_url,
office_binary_path, refresh): source_format, destination_format, refresh):
""" """ self.service_manager = service_manager
self.hostname = hostname
self.port = port
self.document_url = document_url self.document_url = document_url
self.document_dir_path = dirname(document_url)
self.source_format = source_format self.source_format = source_format
self.refresh = refresh self.refresh = refresh
self.uno_path = uno_path self.destination_format = destination_format
self.office_binary_path = office_binary_path self.filter_list = [(x[1], x[2])
for x in mimemapper.get("filter_list", ())
if destination_format == x[0] and x[2]
] if mimemapper else ()
self._load() self._load()
def _createProperty(self, name, value): def _createProperty(self, name, value):
...@@ -129,33 +129,16 @@ class UnoConverter(object): ...@@ -129,33 +129,16 @@ class UnoConverter(object):
return [property, ] return [property, ]
def _getFilterName(self, destination_format, type): def _getPropertyToImport(self,
for filter_tuple in mimemapper["filter_list"]: _ods="com.sun.star.sheet.SpreadsheetDocument"):
if destination_format == filter_tuple[0] and filter_tuple[1] == type:
return filter_tuple[2]
def _getPropertyToExport(self, destination_format=None):
"""Create the property according to the extension of the file."""
if destination_format and self.document_loaded:
filter_name = self._getFilterName(destination_format, self.document_type)
property_list = []
property = self._createProperty("Overwrite", True)
property_list.append(property)
property = self._createProperty("FilterName", filter_name)
property_list.append(property)
property_list.extend(self._createSpecificProperty(filter_name))
return property_list
else:
return ()
def _getPropertyToImport(self, source_url):
"""Create the property for import filter, according to the extension of the file.""" """Create the property for import filter, according to the extension of the file."""
_, extension = splitext(source_url) candidates = (x[0] for x in self.filter_list)
if extension == '.csv': if self.source_format == 'csv':
if _ods in candidates:
# https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options # https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
# Try to sniff the csv delimiter # Try to sniff the csv delimiter
with codecs.open(source_url, 'rb', 'utf-8', errors="ignore") as csvfile: with codecs.open(self.document_url, 'rb', 'utf-8', errors="ignore") as csvfile:
try: try:
dialect = csv.Sniffer().sniff(csvfile.read(1024)) dialect = csv.Sniffer().sniff(csvfile.read(1024))
delimiter = ord(dialect.delimiter) delimiter = ord(dialect.delimiter)
...@@ -164,7 +147,13 @@ class UnoConverter(object): ...@@ -164,7 +147,13 @@ class UnoConverter(object):
return ( return (
self._createProperty("FilterName", "Text - txt - csv (StarCalc)"), self._createProperty("FilterName", "Text - txt - csv (StarCalc)"),
self._createProperty("FilterOptions", "{delimiter},34,UTF-8".format(**locals())), ) self._createProperty("FilterOptions", "%s,34,UTF-8" % delimiter))
elif self.source_format == 'html':
if next(candidates, None) == _ods:
return (
self._createProperty("FilterName", "calc_HTML_WebQuery"),
)
return () return ()
...@@ -173,27 +162,25 @@ class UnoConverter(object): ...@@ -173,27 +162,25 @@ class UnoConverter(object):
refresh argument tells to uno environment to refresh argument tells to uno environment to
replace dynamic properties of document before conversion replace dynamic properties of document before conversion
""" """
service_manager = helper_util.getServiceManager(self.hostname, self.port, createInstance = self.service_manager.createInstance
self.uno_path, desktop = createInstance("com.sun.star.frame.Desktop")
self.office_binary_path)
desktop = service_manager.createInstance("com.sun.star.frame.Desktop")
uno_url = self.systemPathToFileUrl(self.document_url) uno_url = self.systemPathToFileUrl(self.document_url)
uno_document = desktop.loadComponentFromURL( uno_document = desktop.loadComponentFromURL(
uno_url, uno_url,
"_blank", "_blank",
0, 0,
self._getPropertyToImport(self.document_url)) self._getPropertyToImport())
if not uno_document: if not uno_document:
raise AttributeError("This document can not be loaded or is empty") raise AttributeError("This document can not be loaded or is empty")
if self.refresh: if self.refresh:
# Before converting to expected format, refresh dynamic # Before converting to expected format, refresh dynamic
# value inside document. # value inside document.
dispatcher = service_manager.createInstance("com.sun.star.frame.DispatchHelper") dispatcher = createInstance("com.sun.star.frame.DispatchHelper")
for uno_command in ('UpdateFields', 'UpdateAll', 'UpdateInputFields', for uno_command in ('UpdateFields', 'UpdateAll', 'UpdateInputFields',
'UpdateAllLinks', 'UpdateCharts',): 'UpdateAllLinks', 'UpdateCharts',):
dispatcher.executeDispatch(uno_document.getCurrentController().getFrame(), dispatcher.executeDispatch(uno_document.getCurrentController().getFrame(),
'.uno:%s' % uno_command, '', 0, ()) '.uno:%s' % uno_command, '', 0, ())
module_manager = service_manager.createInstance("com.sun.star.frame.ModuleManager") module_manager = createInstance("com.sun.star.frame.ModuleManager")
self.document_type = module_manager.identify(uno_document) self.document_type = module_manager.identify(uno_document)
self.document_loaded = uno_document self.document_loaded = uno_document
...@@ -202,19 +189,28 @@ class UnoConverter(object): ...@@ -202,19 +189,28 @@ class UnoConverter(object):
from unohelper import systemPathToFileUrl from unohelper import systemPathToFileUrl
return systemPathToFileUrl(path) return systemPathToFileUrl(path)
def convert(self, output_format=None): def convert(self):
"""it converts a document to specific format""" """it converts a document to specific format"""
if output_format in ("html", "htm", "xhtml"): for document_type, filter_name in self.filter_list:
destination_format = "impr.html" if document_type == self.document_type:
property_list = [
self._createProperty("Overwrite", True),
self._createProperty("FilterName", filter_name),
]
property_list += self._createSpecificProperty(filter_name)
property_list = tuple(property_list)
break
else: else:
destination_format = output_format property_list = ()
output_url = mktemp(suffix='.%s' % destination_format,
dir=self.document_dir_path)
property_list = self._getPropertyToExport(output_format) ext = self.destination_format
if ext in ("html", "htm", "xhtml"):
ext = "impr.html"
output_url = mktemp(suffix='.' + ext if ext else '',
dir=dirname(self.document_url))
try: try:
self.document_loaded.storeToURL(self.systemPathToFileUrl(output_url), self.document_loaded.storeToURL(self.systemPathToFileUrl(output_url),
tuple(property_list)) property_list)
finally: finally:
self.document_loaded.dispose() self.document_loaded.dispose()
return output_url return output_url
...@@ -244,11 +240,9 @@ class UnoConverter(object): ...@@ -244,11 +240,9 @@ class UnoConverter(object):
except AttributeError: except AttributeError:
pass pass
service_manager = helper_util.getServiceManager(self.hostname, self.port, createInstance = self.service_manager.createInstance
self.uno_path, type_detection = createInstance("com.sun.star.document.TypeDetection")
self.office_binary_path) uno_file_access = createInstance("com.sun.star.ucb.SimpleFileAccess")
type_detection = service_manager.createInstance("com.sun.star.document.TypeDetection")
uno_file_access = service_manager.createInstance("com.sun.star.ucb.SimpleFileAccess")
doc = uno_file_access.openFileRead(self.systemPathToFileUrl(self.document_url)) doc = uno_file_access.openFileRead(self.systemPathToFileUrl(self.document_url))
input_stream = self._createProperty("InputStream", doc) input_stream = self._createProperty("InputStream", doc)
open_new_view = self._createProperty("OpenNewView", True) open_new_view = self._createProperty("OpenNewView", True)
...@@ -347,28 +341,24 @@ def main(): ...@@ -347,28 +341,24 @@ def main():
elif opt == '--mimemapper': elif opt == '--mimemapper':
mimemapper = json.loads(arg) mimemapper = json.loads(arg)
service_manager = helper_util.getServiceManager(
unoconverter = UnoConverter(hostname, port, document_url, source_format, hostname, port, uno_path, office_binary_path)
uno_path, office_binary_path, refresh) unoconverter = UnoConverter(service_manager, document_url,
if "--convert" in param_list and not '--getmetadata' in param_list \ source_format, destination_format, refresh)
and not destination_format: if '--setmetadata' in param_list:
output = unoconverter.convert() unoconverter.setMetadata(metadata)
elif '--convert' in param_list and destination_format: output = document_url
output = unoconverter.convert(destination_format) else:
elif '--getmetadata' in param_list and not '--convert' in param_list: output = unoconverter.convert() if "--convert" in param_list else None
metadata_dict = unoconverter.getMetadata() if '--getmetadata' in param_list:
output = b64encode(json.dumps(metadata_dict).encode('utf-8')).decode() if output:
elif '--getmetadata' in param_list and '--convert' in param_list:
document_url = unoconverter.convert()
# Instanciate new UnoConverter instance with new url # Instanciate new UnoConverter instance with new url
unoconverter = UnoConverter(hostname, port, document_url, source_format, unoconverter = UnoConverter(service_manager, output,
uno_path, office_binary_path, refresh) destination_format or source_format, None, refresh)
metadata_dict = unoconverter.getMetadata() metadata_dict = unoconverter.getMetadata()
metadata_dict['document_url'] = document_url if output:
metadata_dict['document_url'] = output
output = b64encode(json.dumps(metadata_dict).encode('utf-8')).decode() output = b64encode(json.dumps(metadata_dict).encode('utf-8')).decode()
elif '--setmetadata' in param_list:
unoconverter.setMetadata(metadata)
output = document_url
sys.stdout.write(output) sys.stdout.write(output)
......
...@@ -222,7 +222,8 @@ class MimeMapper(object): ...@@ -222,7 +222,8 @@ class MimeMapper(object):
'sxd': ['com.sun.star.drawing.DrawingDocument'], 'sxd': ['com.sun.star.drawing.DrawingDocument'],
'txt': ['com.sun.star.text.TextDocument'], 'txt': ['com.sun.star.text.TextDocument'],
'odg': ['com.sun.star.drawing.DrawingDocument'], 'odg': ['com.sun.star.drawing.DrawingDocument'],
'html': ['com.sun.star.text.WebDocument'], 'html': ['com.sun.star.text.WebDocument',
'com.sun.star.sheet.SpreadsheetDocument'],
'sda': ['com.sun.star.drawing.DrawingDocument'], 'sda': ['com.sun.star.drawing.DrawingDocument'],
'sdd': ['com.sun.star.drawing.DrawingDocument'], 'sdd': ['com.sun.star.drawing.DrawingDocument'],
'pdf': ['com.sun.star.drawing.DrawingDocument'], 'pdf': ['com.sun.star.drawing.DrawingDocument'],
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment