Commit a6f4ff9a authored by Jérome Perrin's avatar Jérome Perrin

split CategoryTool_importCategoryFile in two scripts: one to parse the

spreadsheet, one to actually create categories


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@22598 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 3653d426
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<tuple>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
<tuple/>
</tuple>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Python_magic</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>__ac_local_roles__</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_body</string> </key>
<value> <string encoding="cdata"><![CDATA[
"""Parses a spreadsheet containing categories and returns a mapping.\n
\n
import_file must be a spreadsheet in a format supported by openoffice\n
\n
The returned mapping has the following structure:\n
\n
{ \'base_category_id\':\n
# list of category info\n
( { \'path\': \'bc/1\',\n
\'id\': \'1\',\n
\'title\': \'Title 1\' },\n
{ \'path\': \'bc/1/2\'\n
\'id\': \'2\',\n
\'title\': \'Title 2\' }, ), }\n
\n
This scripts guarantees that the list of category info is sorted in such a\n
way that parent always precedes their children.\n
"""\n
from Products.ERP5OOo.OOoUtils import OOoParser\n
parser = OOoParser()\n
categories_spreadsheet_mapping = dict()\n
\n
def getIDFromString(string=None):\n
"""\n
This function transform a string to a safe and beautiful ID.\n
It is used here to create a safe category ID from a string.\n
"""\n
if string is None:\n
return None\n
clean_id = \'\'\n
translation_map = { \'a\' : [u\'\\xe0\', u\'\\xe3\']\n
, \'e\' : [u\'\\xe9\', u\'\\xe8\']\n
, \'i\' : [u\'\\xed\']\n
, \'u\' : [u\'\\xf9\']\n
, \'_\' : [\' \', \'+\']\n
, \'-\' : [\'-\', u\'\\u2013\']\n
, \'and\': [\'&\']\n
}\n
# Replace odd chars by safe ascii\n
string = string.lower()\n
string = string.strip()\n
for (safe_char, char_list) in translation_map.items():\n
for char in char_list:\n
string = string.replace(char, safe_char)\n
# Exclude all non alphanumeric chars\n
for char in string:\n
if char.isalnum() or char in translation_map.keys():\n
clean_id += char\n
# Delete leading and trailing char which are not alpha-numerics\n
# This prevent having IDs with starting underscores\n
while len(clean_id) > 0 and not clean_id[0].isalnum():\n
clean_id = clean_id[1:]\n
while len(clean_id) > 0 and not clean_id[-1].isalnum():\n
clean_id = clean_id[:-1]\n
return clean_id\n
\n
# if the file is not an open office format, try to convert it using oood\n
# FIXME: use portal_transforms\n
content_type = \'unknown\'\n
if hasattr(import_file, \'headers\'):\n
content_type = import_file.headers.get(\'Content-Type\', \'\')\n
if not (content_type.startswith(\'application/vnd.sun.xml\')\n
or content_type.startswith(\'application/vnd.oasis.opendocument\')):\n
from Products.ERP5Type.Document import newTempOOoDocument\n
tmp_ooo = newTempOOoDocument(context, "_")\n
tmp_ooo.edit(data=import_file.read(),\n
content_type=content_type)\n
tmp_ooo.convertToBaseFormat()\n
ignored, import_file_content = tmp_ooo.convert(\'sxc\')\n
parser.openFromString(str(import_file_content))\n
else:\n
parser.openFile(import_file)\n
\n
# Extract tables from the speadsheet file\n
filename = parser.getFilename()\n
spreadsheets = parser.getSpreadsheetsMapping(no_empty_lines=True)\n
\n
\n
for table_name in spreadsheets.keys():\n
# Get the header of the table\n
columns_header = spreadsheets[table_name][0]\n
# Get the mapping to help us know the property according a cell index\n
property_map = {}\n
column_index = 0\n
path_index = 0\n
for column in columns_header:\n
column_id = getIDFromString(column)\n
# This give us the information that the path definition has started\n
path_def_started = \'path_0\' in property_map.values()\n
# The path of the category has started to be expressed\n
if column_id == \'path\':\n
property_map[column_index] = \'path_\' + str(path_index)\n
path_index += 1\n
# The column has no header information\n
elif column_id in (None, \'\'):\n
# Are we in the middle of the path definition ?\n
# If the path definition has started and not ended\n
if path_def_started and path_index != None:\n
property_map[column_index] = \'path_\' + str(path_index)\n
path_index += 1\n
# else : The path definition is not started or is finished, so ignore the column\n
# The column has a normal header\n
else:\n
# If there is a new column with a header and the path definition has\n
# started, that seems the path definition has ended\n
if \'path_0\' in property_map.values():\n
path_index == None\n
property_map[column_index] = column_id\n
column_index += 1\n
\n
# Construct categories data (with absolute path) from table lines\n
# The first category is the Base category\n
# 1 table = 1 base category\n
base_category_name = table_name\n
base_category_id = getIDFromString(base_category_name)\n
categories = categories_spreadsheet_mapping.setdefault(base_category_id, [])\n
categories.append({ \'path\' : base_category_id\n
, \'title\': base_category_name\n
})\n
\n
# This path_elements help us to reconstruct the absolute path\n
path_elements = []\n
for line in spreadsheets[table_name][1:]:\n
\n
# Exclude empty lines\n
if line.count(\'\') + line.count(None) == len(line):\n
continue\n
\n
# Prefetch line datas\n
cell_index = 0\n
line_data = {}\n
for cell in line:\n
# Get the property corresponding to the cell data\n
property_id = property_map[cell_index]\n
line_data[property_id] = cell\n
cell_index += 1\n
\n
# Analyse every cells of the line\n
category_properties = {}\n
cell_index = 0\n
for (property_id, cell_data) in line_data.items():\n
\n
# Try to generate a cell id from cell data\n
cell_id = getIDFromString(cell_data)\n
# Returned cell_id can be None or \'\' (empty string). Both have different meaning:\n
# None : no data was inputed by the user.\n
# \'\' : data entered by the user, but no good transformation of the string to a safe ID.\n
\n
# If the cell_id tranformation return an empty string, and if the cell is a path item,\n
# we should try to use other line data to get a safe id.\n
if cell_id == \'\' and property_id.startswith(\'path_\'):\n
for alt_id_source in [\'id\', \'title\']:\n
if line_data.has_key(alt_id_source):\n
cell_id = getIDFromString(line_data[alt_id_source])\n
if cell_id not in (\'\', None):\n
break\n
\n
# Ignore empty cells\n
if cell_id not in (\'\', None):\n
# Handle normal properties\n
if not property_id.startswith(\'path_\'):\n
category_properties[property_id] = cell_data\n
# Handle \'path\' property\n
else:\n
path_element_id = cell_id\n
# Initialize the list of path elements to the cell element\n
absolut_path_element_list = [path_element_id,]\n
# Get the depth of the current element\n
element_depth = int(property_id[5:]) # 5 == len(\'path_\')\n
# Get a path element for each depth level to reach the 0-level\n
for searched_depth in range(element_depth)[::-1]:\n
# Get the first path element that correspond to the searched depth\n
for element in path_elements[::-1]:\n
if element[\'depth\'] == searched_depth:\n
# Element found, add it to the list\n
absolut_path_element_list.append(element[\'value\'])\n
# Get the next depth\n
break\n
category_properties[\'path\'] = \'/\'.join([base_category_id,] + absolut_path_element_list[::-1])\n
\n
# Save the current raw path item value as title if no title column defined\n
if \'title\' not in category_properties.keys():\n
clean_title = cell_data.strip()\n
# Only set title if it look like a title\n
# (i.e. its tranformation to ID is not the same as the original value)\n
if clean_title != cell_id:\n
category_properties[\'title\'] = clean_title\n
\n
# Save the path element\n
path_elements.append({ \'depth\': element_depth\n
, \'value\': path_element_id\n
})\n
\n
# Proceed to next cell\n
cell_index += 1\n
\n
if len(category_properties) > 0 and \'path\' in category_properties.keys():\n
categories.append(category_properties)\n
\n
return categories_spreadsheet_mapping\n
]]></string> </value>
</item>
<item>
<key> <string>_code</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>_filepath</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>import_file</string> </value>
</item>
<item>
<key> <string>errors</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>func_code</string> </key>
<value>
<object>
<klass>
<global name="FuncCode" module="Shared.DC.Scripts.Signature"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>co_argcount</string> </key>
<value> <int>1</int> </value>
</item>
<item>
<key> <string>co_varnames</string> </key>
<value>
<tuple>
<string>import_file</string>
<string>Products.ERP5OOo.OOoUtils</string>
<string>OOoParser</string>
<string>parser</string>
<string>dict</string>
<string>categories_spreadsheet_mapping</string>
<string>None</string>
<string>getIDFromString</string>
<string>content_type</string>
<string>hasattr</string>
<string>_getattr_</string>
<string>Products.ERP5Type.Document</string>
<string>newTempOOoDocument</string>
<string>context</string>
<string>tmp_ooo</string>
<string>_getiter_</string>
<string>ignored</string>
<string>import_file_content</string>
<string>str</string>
<string>filename</string>
<string>True</string>
<string>spreadsheets</string>
<string>table_name</string>
<string>_getitem_</string>
<string>columns_header</string>
<string>property_map</string>
<string>column_index</string>
<string>path_index</string>
<string>column</string>
<string>column_id</string>
<string>path_def_started</string>
<string>_write_</string>
<string>_inplacevar_</string>
<string>base_category_name</string>
<string>base_category_id</string>
<string>categories</string>
<string>path_elements</string>
<string>line</string>
<string>len</string>
<string>cell_index</string>
<string>line_data</string>
<string>cell</string>
<string>property_id</string>
<string>category_properties</string>
<string>cell_data</string>
<string>cell_id</string>
<string>alt_id_source</string>
<string>path_element_id</string>
<string>absolut_path_element_list</string>
<string>int</string>
<string>element_depth</string>
<string>range</string>
<string>searched_depth</string>
<string>element</string>
<string>clean_title</string>
</tuple>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>func_defaults</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Base_getCategoriesSpreadSheetMapping</string> </value>
</item>
<item>
<key> <string>warnings</string> </key>
<value>
<tuple/>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
......@@ -65,12 +65,7 @@
</item>
<item>
<key> <string>_body</string> </key>
<value> <string encoding="cdata"><![CDATA[
from Products.ERP5OOo.OOoUtils import OOoParser\n
OOoParser = OOoParser()\n
\n
# Initialise some general variables\n
<value> <string># Initialise some general variables\n
detailed_report_result = []\n
detailed_report_append = detailed_report_result.append\n
base_category_id_list = []\n
......@@ -100,63 +95,9 @@ def Object_hasRelation(obj):\n
return result\n
\n
\n
def getIDFromString(string=None):\n
"""\n
This function transform a string to a safe and beautiful ID.\n
It is used here to create a safe category ID from a string.\n
"""\n
if string is None:\n
return None\n
clean_id = \'\'\n
translation_map = { \'a\' : [u\'\\xe0\', u\'\\xe3\']\n
, \'e\' : [u\'\\xe9\', u\'\\xe8\']\n
, \'i\' : [u\'\\xed\']\n
, \'u\' : [u\'\\xf9\']\n
, \'_\' : [\' \', \'+\']\n
, \'-\' : [\'-\', u\'\\u2013\']\n
, \'and\': [\'&\']\n
}\n
# Replace odd chars by safe ascii\n
string = string.lower()\n
string = string.strip()\n
for (safe_char, char_list) in translation_map.items():\n
for char in char_list:\n
string = string.replace(char, safe_char)\n
# Exclude all non alphanumeric chars\n
for char in string:\n
if char.isalnum() or char in translation_map.keys():\n
clean_id += char\n
# Delete leading and trailing char which are not alpha-numerics\n
# This prevent having IDs with starting underscores\n
while len(clean_id) > 0 and not clean_id[0].isalnum():\n
clean_id = clean_id[1:]\n
while len(clean_id) > 0 and not clean_id[-1].isalnum():\n
clean_id = clean_id[:-1]\n
return clean_id\n
\n
\n
def isValidID(id):\n
return id not in property_id_list\n
\n
\n
content_type = import_file.headers.get(\'Content-Type\', \'\')\n
# if the file is not an open office format, try to convert it using oood\n
if not (content_type.startswith(\'application/vnd.sun.xml\')\n
or content_type.startswith(\'application/vnd.oasis.opendocument\')):\n
from Products.ERP5Type.Document import newTempOOoDocument\n
tmp_ooo = newTempOOoDocument(context, "_")\n
tmp_ooo.edit(data=import_file.read(),\n
content_type=content_type)\n
tmp_ooo.convertToBaseFormat()\n
ignored, import_file_content = tmp_ooo.convert(\'sxc\')\n
OOoParser.openFromString(str(import_file_content))\n
else:\n
OOoParser.openFile(import_file)\n
\n
# Extract tables from the speadsheet file\n
filename = OOoParser.getFilename()\n
spreadsheets = OOoParser.getSpreadsheetsMapping(no_empty_lines=True)\n
\n
# Some statistics\n
new_category_counter = 0\n
updated_category_counter = 0\n
......@@ -165,128 +106,11 @@ invalid_category_id_counter = 0\n
deleted_category_counter = 0\n
kept_category_counter = 0\n
\n
for table_name in spreadsheets.keys():\n
# Get the header of the table\n
columns_header = spreadsheets[table_name][0]\n
# Get the mapping to help us know the property according a cell index\n
property_map = {}\n
column_index = 0\n
path_index = 0\n
for column in columns_header:\n
column_id = getIDFromString(column)\n
# This give us the information that the path definition has started\n
path_def_started = \'path_0\' in property_map.values()\n
# The path of the category has started to be expressed\n
if column_id == \'path\':\n
property_map[column_index] = \'path_\' + str(path_index)\n
path_index += 1\n
# The column has no header information\n
elif column_id in (None, \'\'):\n
# Are we in the middle of the path definition ?\n
# If the path definition has started and not ended\n
if path_def_started and path_index != None:\n
property_map[column_index] = \'path_\' + str(path_index)\n
path_index += 1\n
# else : The path definition is not started or is finished, so ignore the column\n
# The column has a normal header\n
else:\n
# If there is a new column with a header and the path definition has\n
# started, that seems the path definition has ended\n
if \'path_0\' in property_map.values():\n
path_index == None\n
property_map[column_index] = column_id\n
column_index += 1\n
\n
# Construct categories data (with absolute path) from table lines\n
categories = []\n
# The first category is the Base category\n
# 1 table = 1 base category\n
base_category_name = table_name\n
base_category_id = getIDFromString(base_category_name)\n
base_category_id_list.append(base_category_id)\n
categories.append({ \'path\' : base_category_id\n
, \'title\': base_category_name\n
})\n
\n
# This path_elements help us to reconstruct the absolute path\n
path_elements = []\n
for line in spreadsheets[table_name][1:]:\n
\n
# Exclude empty lines\n
if line.count(\'\') + line.count(None) == len(line):\n
continue\n
\n
# Prefetch line datas\n
cell_index = 0\n
line_data = {}\n
for cell in line:\n
# Get the property corresponding to the cell data\n
property_id = property_map[cell_index]\n
line_data[property_id] = cell\n
cell_index += 1\n
\n
# Analyse every cells of the line\n
category_properties = {}\n
cell_index = 0\n
for (property_id, cell_data) in line_data.items():\n
\n
# Try to generate a cell id from cell data\n
cell_id = getIDFromString(cell_data)\n
# Returned cell_id can be None or \'\' (empty string). Both have different meaning:\n
# None : no data was inputed by the user.\n
# \'\' : data entered by the user, but no good transformation of the string to a safe ID.\n
\n
# If the cell_id tranformation return an empty string, and if the cell is a path item,\n
# we should try to use other line data to get a safe id.\n
if cell_id == \'\' and property_id.startswith(\'path_\'):\n
for alt_id_source in [\'id\', \'title\']:\n
if line_data.has_key(alt_id_source):\n
cell_id = getIDFromString(line_data[alt_id_source])\n
if cell_id not in (\'\', None):\n
break\n
\n
# Ignore empty cells\n
if cell_id not in (\'\', None):\n
# Handle normal properties\n
if not property_id.startswith(\'path_\'):\n
category_properties[property_id] = cell_data\n
# Handle \'path\' property\n
else:\n
path_element_id = cell_id\n
# Initialize the list of path elements to the cell element\n
absolut_path_element_list = [path_element_id,]\n
# Get the depth of the current element\n
element_depth = int(property_id[5:])\n
# Get a path element for each depth level to reach the 0-level\n
for searched_depth in range(element_depth)[::-1]:\n
# Get the first path element that correspond to the searched depth\n
for element in path_elements[::-1]:\n
if element[\'depth\'] == searched_depth:\n
# Element found, add it to the list\n
absolut_path_element_list.append(element[\'value\'])\n
# Get the next depth\n
break\n
category_properties[\'path\'] = \'/\'.join([base_category_id,] + absolut_path_element_list[::-1])\n
\n
# Save the current raw path item value as title if no title column defined\n
if \'title\' not in category_properties.keys():\n
clean_title = cell_data.strip()\n
# Only set title if it look like a title\n
# (i.e. its tranformation to ID is not the same as the original value)\n
if clean_title != cell_id:\n
category_properties[\'title\'] = clean_title\n
\n
# Save the path element\n
path_elements.append({ \'depth\': element_depth\n
, \'value\': path_element_id\n
})\n
\n
# Proceed to next cell\n
cell_index += 1\n
\n
if len(category_properties) > 0 and \'path\' in category_properties.keys():\n
categories.append(category_properties)\n
filename = getattr(import_file, \'filename\', \'?\')\n
categories_spreadsheet_mapping = context.Base_getCategoriesSpreadSheetMapping(import_file)\n
\n
for base_category, categories in \\\n
categories_spreadsheet_mapping.items():\n
# Create categories\n
total_category_counter += len(categories)\n
for category in categories:\n
......@@ -358,6 +182,7 @@ for base_category_id in base_category_id_list:\n
if not category_path_dict.has_key(category.getRelativeUrl()):\n
if keep_existing_category or Object_hasRelation(category):\n
if Object_hasRelation(category):\n
# TODO: add a dialog parameter allowing to delete this path\n
detailed_report_append(\'WARNING: Category %s is used and can not be deleted\' % category.getRelativeUrl())\n
else:\n
detailed_report_append(\'Kept category %s\' % category.getRelativeUrl())\n
......@@ -378,7 +203,6 @@ if not keep_existing_category:\n
\n
if detailed_report:\n
# Return a detailed report if requested\n
# return repr(detailed_report_result)\n
return \'\\n\'.join(detailed_report_result)\n
\n
# Import is a success, go back to the portal_categories tool\n
......@@ -397,9 +221,7 @@ return context.REQUEST.RESPONSE.redirect(\n
, kept_category_counter\n
)\n
)\n
]]></string> </value>
</string> </value>
</item>
<item>
<key> <string>_code</string> </key>
......@@ -452,8 +274,6 @@ return context.REQUEST.RESPONSE.redirect(\n
<string>keep_existing_category</string>
<string>detailed_report</string>
<string>kw</string>
<string>Products.ERP5OOo.OOoUtils</string>
<string>OOoParser</string>
<string>detailed_report_result</string>
<string>_getattr_</string>
<string>detailed_report_append</string>
......@@ -463,68 +283,35 @@ return context.REQUEST.RESPONSE.redirect(\n
<string>property_id_list</string>
<string>AttributeError</string>
<string>Object_hasRelation</string>
<string>None</string>
<string>getIDFromString</string>
<string>isValidID</string>
<string>content_type</string>
<string>Products.ERP5Type.Document</string>
<string>newTempOOoDocument</string>
<string>tmp_ooo</string>
<string>_getiter_</string>
<string>ignored</string>
<string>import_file_content</string>
<string>str</string>
<string>filename</string>
<string>True</string>
<string>spreadsheets</string>
<string>new_category_counter</string>
<string>updated_category_counter</string>
<string>total_category_counter</string>
<string>invalid_category_id_counter</string>
<string>deleted_category_counter</string>
<string>kept_category_counter</string>
<string>table_name</string>
<string>_getitem_</string>
<string>columns_header</string>
<string>property_map</string>
<string>column_index</string>
<string>path_index</string>
<string>column</string>
<string>column_id</string>
<string>path_def_started</string>
<string>_write_</string>
<string>_inplacevar_</string>
<string>getattr</string>
<string>filename</string>
<string>categories_spreadsheet_mapping</string>
<string>_getiter_</string>
<string>base_category</string>
<string>categories</string>
<string>base_category_name</string>
<string>base_category_id</string>
<string>path_elements</string>
<string>line</string>
<string>_inplacevar_</string>
<string>len</string>
<string>cell_index</string>
<string>line_data</string>
<string>cell</string>
<string>property_id</string>
<string>category_properties</string>
<string>cell_data</string>
<string>cell_id</string>
<string>alt_id_source</string>
<string>path_element_id</string>
<string>absolut_path_element_list</string>
<string>int</string>
<string>element_depth</string>
<string>range</string>
<string>searched_depth</string>
<string>element</string>
<string>clean_title</string>
<string>category</string>
<string>False</string>
<string>is_new_category</string>
<string>keys</string>
<string>base_path_obj</string>
<string>True</string>
<string>is_base_category</string>
<string>is_valid_category</string>
<string>_getitem_</string>
<string>category_id</string>
<string>str</string>
<string>category_type</string>
<string>None</string>
<string>_write_</string>
<string>new_category</string>
<string>category_update_dict</string>
<string>key</string>
......@@ -532,7 +319,7 @@ return context.REQUEST.RESPONSE.redirect(\n
<string>_apply_</string>
<string>KeyError</string>
<string>category_to_delete_list</string>
<string>base_category</string>
<string>base_category_id</string>
<string>parent</string>
</tuple>
</value>
......@@ -556,6 +343,12 @@ return context.REQUEST.RESPONSE.redirect(\n
<key> <string>id</string> </key>
<value> <string>CategoryTool_importCategoryFile</string> </value>
</item>
<item>
<key> <string>uid</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>warnings</string> </key>
<value>
......
909
\ No newline at end of file
911
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment