Commit d8651adb authored by francois's avatar francois

erp5_receipt_recognition Add function to load model from datastream

This commit allow the erp5_receipt_recogniton module to unpickle its
model from wendelin datastream.

Loading model from datastream is a bit slower.
parent e5454815
...@@ -14,9 +14,10 @@ from matplotlib import pylab ...@@ -14,9 +14,10 @@ from matplotlib import pylab
import matplotlib.image as mpimg import matplotlib.image as mpimg
import scipy.stats as stats import scipy.stats as stats
import re import re
import cPickle
import ocrolib import ocrolib
def getReceiptValue(self, image_data): def getReceiptValue(self, image_data, model_name = "en-default.pyrnn", from_stream=False):
""" """
Function called from an erp5 script through externalMethod Function called from an erp5 script through externalMethod
that take an image and its name and save its binarized that take an image and its name and save its binarized
...@@ -29,7 +30,7 @@ def getReceiptValue(self, image_data): ...@@ -29,7 +30,7 @@ def getReceiptValue(self, image_data):
- image_data: - image_data:
base64 representation of the image to analyse base64 representation of the image to analyse
@return: @return:
- ret: float - anon: float
Represent total value paid on the receipt Represent total value paid on the receipt
---------------------------- ----------------------------
This function return the total value of the receipt in euros. This function return the total value of the receipt in euros.
...@@ -39,8 +40,36 @@ def getReceiptValue(self, image_data): ...@@ -39,8 +40,36 @@ def getReceiptValue(self, image_data):
line_list, cleared = getLinesFromPicture(image_as_array) line_list, cleared = getLinesFromPicture(image_as_array)
# Start the neural network # Start the neural network
network, lnorm = initRnnModel() if not from_stream:
network, lnorm = initRnnModel(model_name)
else:
network, lnorm = getRnnModelFromDataStream(self, model_name)
return findReceiptValue(line_list, cleared, network, lnorm)
def findReceiptValue(line_list, cleared, network, lnorm):
"""
Function that run the neural network through the receipt and extract
meaningfull value
-----------------------------
@args:
- lines: array list
Represent lines of text that will be extracted
from the image
- cleared:2D array
Represent binarized image cropped and cleaned,
from which we will extract text lines
- network: lstm object
Represent the trained neural net
- lnorm: method from lstm object
Represent the size of the lstm object. Is used to scale the objects
to recognize from original size to the average network object.
@return:
- anon: float
Represent total value paid on the receipt
-----------------------------
This function can bemodified to add more field to detect. It might be
possible to run a classification neural net on the result.
"""
value_list = [] value_list = []
tofind = r"(EUR)|€|(TOT)" tofind = r"(EUR)|€|(TOT)"
for _, line in enumerate(line_list): for _, line in enumerate(line_list):
...@@ -48,15 +77,32 @@ def getReceiptValue(self, image_data): ...@@ -48,15 +77,32 @@ def getReceiptValue(self, image_data):
# Corner case: he dewarping function from the normalizer fail # Corner case: he dewarping function from the normalizer fail
# sometimes on empty lines. Can be corrected with better segmentation # sometimes on empty lines. Can be corrected with better segmentation
try: try:
evaluate = getStringFromImage(binline, lnorm, network) evaluate = getStringFromImage(binline, lnorm, network)
if re.search(tofind, evaluate.upper()): if re.search(tofind, evaluate.upper()):
number = re.findall(r"\d+[\.|,]\d\d", evaluate) number = re.findall(r"\d+[\.|,]\d\d", evaluate)
value_list += [float(char.replace(',', '.')) for char in number] value_list += [float(char.replace(',', '.')) for char in number]
except ValueError: except ValueError:
pass pass
return round(max(value_list), 2) return round(max(value_list), 2)
def getRnnModelFromDataStream(self, model_name="en-default.pyrnn"):
"""
This function load a neural network from a dataStream
----------------------------
@args:
- model_name: string, default: en-default.pyrnn
Id of the object in data_stream_module that contain the rnn model
@return:
- network: lstm object
Represent the trained neural net
- lnorm: method from lstm object
Represent the size of the lstm object. Is used to scale the objects
to recognize from original size to the average network object.
----------------------------
"""
network = cPickle.loads(self.data_stream_module[model_name].getData())
lnorm = getattr(network, "lnorm", None)
return network, lnorm
def initRnnModel(model_name = "en-default.pyrnn"): def initRnnModel(model_name = "en-default.pyrnn"):
""" """
......
...@@ -46,8 +46,8 @@ ...@@ -46,8 +46,8 @@
<key> <string>text_content_warning_message</string> </key> <key> <string>text_content_warning_message</string> </key>
<value> <value>
<tuple> <tuple>
<string>W:243, 2: Attempting to unpack a non-sequence defined at line 181 of scipy.ndimage.measurements (unpacking-non-sequence)</string> <string>W:289, 2: Attempting to unpack a non-sequence defined at line 181 of scipy.ndimage.measurements (unpacking-non-sequence)</string>
<string>W:272, 2: Attempting to unpack a non-sequence defined at line 181 of scipy.ndimage.measurements (unpacking-non-sequence)</string> <string>W:318, 2: Attempting to unpack a non-sequence defined at line 181 of scipy.ndimage.measurements (unpacking-non-sequence)</string>
</tuple> </tuple>
</value> </value>
</item> </item>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment