Commit 4952a5c2 authored by Ivan Tyagov's avatar Ivan Tyagov

Clean up of unused code.

parent 7cf9b582
import os
import csv
import mne
import json
import numpy as np
from DateTime import DateTime
from mne.report import Report
from Products.ERP5Type.Log import log
from lxml.html import parse
CHUNK_SIZE = 200000
CHUNK_SIZE_TXT = 50000
CHUNK_SIZE_CSV = 25
BIG_FILE = 5000000000 #5GB
def saveRawFile(data_stream, file_name):
data_stream_chunk = None
n_chunk = 0
chunk_size = CHUNK_SIZE
while True:
start_offset = n_chunk*chunk_size
end_offset = n_chunk*chunk_size+chunk_size
try:
data_stream_chunk = ''.join(data_stream.readChunkList(start_offset, end_offset))
except:
raise StandardError("Empty Data Stream")
if data_stream_chunk == "": break
with open(file_name, 'a') as fif_file:
fif_file.write(data_stream_chunk)
n_chunk += 1
def getJSONfromTextFile(file_name):
try:
data = {}
chunk_size = CHUNK_SIZE_TXT
with open(file_name, 'r') as text_file:
first_chunk = text_file.read(chunk_size)
data["File content sample: "] = first_chunk
json_data = json.dumps(data)
return json_data
except Exception as e:
log("Error while getting JSON from text file: " + str(e))
return ""
def getMNEReportJSON(file_name, raw):
try:
pattern = file_name + "_raw.fif"
report_file = file_name + 'report.html'
os.rename(file_name, pattern)
report = Report(verbose=True)
report.parse_folder(data_path="./", pattern=[pattern])
report.save(report_file, overwrite=True, open_browser=False)
data = {}
doc = parse(report_file)
results = doc.xpath("//table[@class = 'table table-hover']")
rows = iter(results[0])
for row in rows:
data[row[0].text] = row[1].text
json_data = json.dumps(data)
return json_data
except Exception as e:
log("Error while getting JSON Report: " + str(e))
return ""
finally:
if os.path.exists(pattern):
os.rename(pattern, file_name)
if os.path.exists(report_file):
os.remove(report_file)
def getRawData(file_name):
raw = None
try:
raw = mne.io.read_raw_fif(file_name, preload=False, verbose=None)
except:
pass
if raw is None:
try:
raw = mne.io.read_raw_edf(file_name, preload=False, verbose=None)
except:
pass
if raw is None: raise StandardError("the file does not contain raw data.")
return raw
def processFifData(file_name, data_array, data_descriptor):
raw = getRawData(file_name)
try:
json_report = getMNEReportJSON(file_name, raw)
data_descriptor.setTextContent(json_report)
except Exception as e:
log("Error handling Data Descriptor content: " + str(e))
picks = mne.pick_types(raw.info)
if len(picks) == 0: raise StandardError("The raw data does not contain any element")
data, times = raw[picks[:1]] # get data from first pick to get shape
dtype = data.dtype
data_array.initArray(data.shape, dtype)
zarray = data_array.getArray()
zarray[0] = data[0]
data_array.setArray(zarray)
for pick in xrange(1, len(picks)):
data, times = raw[pick]
zarray = data_array.getArray()
zarray.append(data)
data_array.setArray(zarray)
def processTextData(file_name, data_array, data_descriptor):
try:
json_report = getJSONfromTextFile(file_name)
data_descriptor.setTextContent(json_report)
except Exception as e:
log("Error handling Data Descriptor content: " + str(e))
def processCsvData(file_name, data_array, data_descriptor, delimiter=","):
def gen_csv_chunks(reader, chunksize=CHUNK_SIZE_CSV):
chunk = []
for index, line in enumerate(reader):
if (index % chunksize == 0 and index > 0):
yield chunk
del chunk[:]
chunk.append(line)
yield chunk
def appendArray(array, data_array, columns, initialize=False):
def getFloat(s):
try:
float(s)
return float(s)
except:
return float(0)
init = 0
if initialize:
data_array.initArray((1, columns), np.dtype('float64'))
zarray = data_array.getArray()
zarray[0] = [getFloat(x) for x in array[0]]
data_array.setArray(zarray)
init = 1
for i in xrange(init, len(array)):
if len(array[i]) == columns:
zarray = data_array.getArray()
zarray.append([[getFloat(x) for x in array[i]]])
data_array.setArray(zarray)
try:
with open(file_name, 'r') as csv_file:
reader = csv.reader(csv_file, delimiter=delimiter)
csv_file.seek(0)
for i, chunk in enumerate(gen_csv_chunks(reader)):
if i==0:
try:
initialize = True
columns = len(chunk[0])
data = {}
data["csv"] = chunk
json_data = json.dumps(data)
except Exception as e:
log("Error while getting JSON from csv file: " + str(e))
return ""
# super inefficient: takes minutes for a csv file of 5MB (57860 rows)
appendArray(chunk, data_array, columns, initialize)
# so returning after first chunk
data_descriptor.setTextContent(json_data)
return
initialize = False
data_descriptor.setTextContent(json_data)
except Exception as e:
log("Error handling csv Data Descriptor content: " + str(e))
def processTsvData(file_name, data_array, data_descriptor):
processCsvData(file_name, data_array, data_descriptor, delimiter="\t")
def processNiiData(file_name, data_array, data_descriptor, gz=False):
old_file = file_name
file_name = file_name + ".nii.gz" if gz else file_name + ".nii"
os.rename(old_file, file_name)
try:
import nibabel as nib
img = nib.load(file_name)
data = {}
for key in img.header:
try:
if type(img.header[key]) is np.ndarray:
content = img.header[key].tolist()
try:
if np.isnan(img.header[key]): content = "nan"
except:
pass
else:
content = img.header[key]
if content == 0: content = "0"
json.dumps(content)
if content != "":
data[key] = content
except Exception as e:
pass # ignore non serializable info
json_data = json.dumps(data)
data_descriptor.setTextContent(json_data)
except Exception as e:
log("Error handling Data Descriptor nii content: " + str(e))
raise e
finally:
os.rename(file_name, old_file)
def processGZData(file_name, data_array, data_descriptor):
try:
processNiiData(file_name, data_array, data_descriptor, gz=True)
except:
raise KeyError("gz")
def processRawData(data_stream, data_array, data_descriptor, reference_extension):
content = {"File content":"empty"}
if data_stream.getSize() == 0:
data_descriptor.setTextContent(json.dumps(content))
return "Empty Data Stream"
if data_stream.getSize() > BIG_FILE:
log("Ingested file bigger than 5GB, get metadata process skiped")
return "File bigger than 5GB"
file_name = "temporal_file_%s" % DateTime().strftime('%Y%m%d-%H%M%S')
try:
saveRawFile(data_stream, file_name)
except Exception as e:
if os.path.exists(file_name):
os.remove(file_name)
return "Error while processing raw data - saving file: " + str(e)
options = {"fif" : processFifData,
"nii" : processNiiData,
"mgz" : processNiiData,
"txt" : processTextData,
"csv" : processCsvData,
"tsv" : processTsvData,
"gz" : processGZData,
"default" : processTextData,
}
try:
if reference_extension in options:
options[reference_extension](file_name, data_array, data_descriptor)
else:
options["default"](file_name, data_array, data_descriptor)
except KeyError, e:
return "Proccessing for data in this %s not implemented yet." % reference_extension
except Exception as e:
return "Error while processing raw data: " + str(e)
finally:
if os.path.exists(file_name):
os.remove(file_name)
return "Raw data processed."
\ No newline at end of file
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Extension Component" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_recorded_property_dict</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
<item>
<key> <string>default_reference</string> </key>
<value> <string>external_process_raw_data</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>extension.erp5.external_process_raw_data</string> </value>
</item>
<item>
<key> <string>language</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Extension Component</string> </value>
</item>
<item>
<key> <string>sid</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>text_content_error_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple>
<string>W: 74, 2: No exception type(s) specified (bare-except)</string>
<string>W: 79, 4: No exception type(s) specified (bare-except)</string>
<string>W: 95, 8: Unused variable \'times\' (unused-variable)</string>
<string>W:131, 6: No exception type(s) specified (bare-except)</string>
<string>W:166, 8: Unreachable code (unreachable)</string>
<string>W:184, 11: Using type() instead of isinstance() for a typecheck. (unidiomatic-typecheck)</string>
<string>W:188, 10: No exception type(s) specified (bare-except)</string>
</tuple>
</value>
</item>
<item>
<key> <string>version</string> </key>
<value> <string>erp5</string> </value>
</item>
<item>
<key> <string>workflow_history</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAM=</string> </persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary/>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary>
<item>
<key> <string>component_validation_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAQ=</string> </persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="4" aka="AAAAAAAAAAQ=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.patches.WorkflowTool"/>
</pickle>
<pickle>
<tuple>
<none/>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>validate</string> </value>
</item>
<item>
<key> <string>validation_state</string> </key>
<value> <string>validated</string> </value>
</item>
</dictionary>
</list>
</tuple>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PyData Script" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_local_properties</string> </key>
<value>
<tuple>
<dictionary>
<item>
<key> <string>id</string> </key>
<value> <string>reference</string> </value>
</item>
<item>
<key> <string>type</string> </key>
<value> <string>string</string> </value>
</item>
</dictionary>
</tuple>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>input_stream_data, output_array, output_descriptor</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>DataTransformation_transformFIFDataToDataArray</string> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>PyData Script</string> </value>
</item>
<item>
<key> <string>reference</string> </key>
<value> <string>DataTransformation_transformFIFDataToDataArray</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
portal = context.getPortalObject()
reference_separator = portal.getIngestionReferenceDictionary()["reference_separator"]
reference_length = portal.getIngestionReferenceDictionary()["reference_length"]
invalid_chars = portal.getIngestionReferenceDictionary()["invalid_chars"]
record = reference.rsplit(reference_separator)
length = len(record)
if (length < reference_length):
context.logEntry("[ERROR] In HandleFifEmbulkIngestion: Data Ingestion reference is not well formated")
raise ValueError("Data Ingestion reference is not well formated.")
for char in invalid_chars:
if char in reference:
context.logEntry("[ERROR] In HandleFifEmbulkIngestion: Data Ingestion reference contains chars that are not allowed")
raise ValueError("Data Ingestion reference contains chars that are not allowed.")
supplier = record[0]
dataset_reference = record[1]
filename = reference_separator.join(record[2:-4])
extension = record[length-4]
eof = record[length-3]
size = record[length-2]
hash = record[length-1]
dict = { 'filename': filename,
'extension': extension,
'eof': eof,
'supplier': supplier,
'dataset_reference': dataset_reference,
'resource_reference': 'fif',
'size': size,
'hash': hash
}
return dict
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PyData Script" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_local_properties</string> </key>
<value>
<tuple>
<dictionary>
<item>
<key> <string>id</string> </key>
<value> <string>reference</string> </value>
</item>
<item>
<key> <string>type</string> </key>
<value> <string>string</string> </value>
</item>
</dictionary>
</tuple>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>reference</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value> <string>Handles the ingestion and returns the dictionary with the corresponding references.</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>HandleFifEmbulkIngestion</string> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>PyData Script</string> </value>
</item>
<item>
<key> <string>reference</string> </key>
<value> <string>HandleFifEmbulkIngestion</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Handle Fif Embulk Ingestion</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ExternalMethod" module="Products.ExternalMethod.ExternalMethod"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_function</string> </key>
<value> <string>processRawData</string> </value>
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>external_process_raw_data</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>processRawData</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment