Commit 056866e8 authored by francois's avatar francois Committed by Kazuhiko Shiozaki

Add component/ocropy and scipy to erp5

Needed for receipt_recognition module. It contain the ocropy library and its binarization, segementation and recognition tools, as well as the scipy library containing the ndimage and stats module used in receipt_recognition_module for the binarization and inside ocropy. 

This is separated in 3 commits:
The first one add ocropy and dependancies inside slapos component;
The second one add the ocropy component to stack/erp5 buildout config file, and add gcc to the binaries callable from erp5 (needed by ocropy);
The third  is only required for the module to work without installing the wendelin software.

Edit: To keep discussions posts, i wont rebase in master, to see the rebased final commits: https://lab.nexedi.com/francois.lecorre/slapos/commits/rebased_branch

/reviewed-on !153
parents c68e991b 09036a60
......@@ -29,7 +29,10 @@ CFLAGS = -I${libpng:location}/include -I${freetype:location}/include
recipe = zc.recipe.egg:custom
egg = matplotlib
environment = matplotlib-env
setup-eggs = ${numpy:egg}
setup-eggs =
cycler
python-dateutil
${numpy:egg}
rpath =
${libpng:location}/lib
${freetype:location}/lib
......
[buildout]
extends =
../git/buildout.cfg
../gzip/buildout.cfg
../scipy/buildout.cfg
../lxml-python/buildout.cfg
../matplotlib/buildout.cfg
../numpy/buildout.cfg
../patch/buildout.cfg
../pillow/buildout.cfg
../numpy/buildout.cfg
parts = ocropy
[ocropy-eng-traineddata]
recipe = hexagonit.recipe.download
filename = en-default.pyrnn.gz
md5sum = cedd140c7d7650e910f0550ad0f04727
download-only = true
url = http://www.tmbdev.net/en-default.pyrnn.gz
[ocropy-env]
OCROPY_MODEL_PATH = ${ocropy-eng-traineddata:location}/${ocropy-eng-traineddata:filename}
HOME = ${ocropy:egg}
[ocropy]
recipe = zc.recipe.egg:custom
egg = ocropy
setup-eggs =
${numpy:egg}
${scipy:egg}
${matplotlib:egg}
${pillow-python:egg}
patches =
${:_profile_base_location_}/ocropy.patch
patch-options = -p0
patch-binary = ${patch:location}/bin/patch
environment = ocropy-env
find-links = https://github.com/tmbdev/ocropy/tarball/4efbddca22bb2f0c639af0694e7a1386f2f097b5/ocropy-1.0.tar.gz
md5sum = 240b8866dd7248816e01af469a328c09
diff --git ocrolib/__init__.py ocrolib/__init__.py
index 1e0d627..81e85fb 100644
--- ocrolib/__init__.py
+++ ocrolib/__init__.py
@@ -1,7 +1,7 @@
__all__ = [
"binnednn","cairoextras","common","components","dbtables",
"fgen","gmmtree","gtkyield","hocr","lang","native",
- "mlp","multiclass","default","lineest"
+ "mlp","multiclass","default","lineest", "psegutils"
]
################################################################
@@ -9,5 +9,6 @@ __all__ = [
################################################################
import default
+from psegutils import *
from common import *
from default import traceback as trace
diff --git ocrolib/common.py ocrolib/common.py
index 27c0f26..14f088f 100644
--- ocrolib/common.py
+++ ocrolib/common.py
@@ -14,6 +14,7 @@ import unicodedata
import inspect
import glob
import cPickle
+import gzip
from ocrolib.exceptions import (BadClassLabel, BadInput, FileNotFound,
OcropusException)
@@ -428,6 +429,7 @@ def unpickle_find_global(mname,cname):
exec "import "+mname
return getattr(sys.modules[mname],cname)
+
def load_object(fname,zip=0,nofind=0,verbose=0):
"""Loads an object from disk. By default, this handles zipped files
and searches in the usual places for OCRopus. It also handles some
@@ -439,8 +441,7 @@ def load_object(fname,zip=0,nofind=0,verbose=0):
if zip==0 and fname.endswith(".gz"):
zip = 1
if zip>0:
- # with gzip.GzipFile(fname,"rb") as stream:
- with os.popen("gunzip < '%s'"%fname,"rb") as stream:
+ with gzip.GzipFile(fname,"rb") as stream:
unpickler = cPickle.Unpickler(stream)
unpickler.find_global = unpickle_find_global
return unpickler.load()
@@ -618,7 +619,7 @@ def ocropus_find_file(fname, gz=True):
possible_prefixes.append(os.path.normpath(os.path.join(
os.path.dirname(inspect.getfile(inspect.currentframe())),
- os.pardir, os.pardir, os.pardir, os.pardir, "share", "ocropus")))
+ os.pardir, "share", "ocropus")))
possible_prefixes.append("/usr/local/share/ocropus")
diff --git ocrolib/native.py ocrolib/native.py
index b7a207f..240450b 100644
--- ocrolib/native.py
+++ ocrolib/native.py
@@ -44,6 +44,7 @@ class CompileError(Exception):
def compile_and_find(c_string,prefix=".pynative",opt="-g -O4",libs="-lm",
options="-shared -fopenmp -std=c99 -fPIC",verbose=0):
+ prefix = os.path.join(os.path.dirname(__file__), prefix)
if not os.path.exists(prefix):
os.mkdir(prefix)
m = hashlib.md5()
diff --git setup.py setup.py
index 2ec5832..6697b12 100644
--- setup.py
+++ setup.py
@@ -10,7 +10,9 @@ assert sys.version_info[0]==2 and sys.version_info[1]>=7,\
from distutils.core import setup #, Extension, Command
#from distutils.command.install_data import install_data
-if not os.path.exists("models/en-default.pyrnn.gz"):
+models = os.environ.get('OCROPY_MODEL_PATH', '').split(':') or \
+ [c for c in glob.glob("models/*pyrnn.gz")]
+if not models:
print()
print("You should download the default model 'en-default.pyrnn.gz'")
print("and put it into ./models.")
@@ -18,16 +20,23 @@ if not os.path.exists("models/en-default.pyrnn.gz"):
print("Check https://github.com/tmbdev/ocropy for the location")
print("of model files.")
print()
+ sys.exit(1)
-models = [c for c in glob.glob("models/*pyrnn.gz")]
scripts = [c for c in glob.glob("ocropus-*") if "." not in c and "~" not in c]
+# compile pynative files now and include them in the build
+sys.path.insert(0, os.curdir)
+import ocrolib.nutils
+pynative_files = [os.path.join(*c.split(os.path.sep)[1:]) \
+ for c in glob.glob('ocrolib/.pynative/*')]
+
setup(
name = 'ocropy',
version = 'v1.0',
author = "Thomas Breuel",
description = "The OCRopy RNN-based Text Line Recognizer",
packages = ["ocrolib"],
- data_files= [('share/ocropus', models)],
+ package_data = {'ocrolib': pynative_files},
+ data_files= [('share/ocropus', models), ("", ["LICENSE"])],
scripts = scripts,
)
......@@ -56,9 +56,10 @@ configure-options =
make-binary =
make-targets = make profile-opt && make install
extra-ldflags =
# the entry "-Wl,-rpath=${file:location}/lib" below is needed by python-magic,
# which would otherwise load the system libmagic.so with ctypes
environment =
PATH=${patch:location}/bin:${xz-utils:location}/bin:%(PATH)s
CPPFLAGS=-I${zlib:location}/include -I${readline:location}/include -I${libexpat:location}/include -I${libffi:location}/include -I${ncurses:location}/include -I${ncurses:location}/include/ncursesw -I${bzip2:location}/include -I${gdbm:location}/include -I${openssl:location}/include -I${sqlite3:location}/include -I${gettext:location}/include
LDFLAGS=-L${zlib:location}/lib -L${readline:location}/lib -L${libexpat:location}/lib -L${libffi:location}/lib -L${ncurses:location}/lib -L${bzip2:location}/lib -L${gdbm:location}/lib -L${openssl:location}/lib -L${sqlite3:location}/lib -Wl,-rpath=${zlib:location}/lib -Wl,-rpath=${readline:location}/lib -Wl,-rpath=${libexpat:location}/lib -Wl,-rpath=${libffi:location}/lib -Wl,-rpath=${ncurses:location}/lib -Wl,-rpath=${bzip2:location}/lib -Wl,-rpath=${gdbm:location}/lib -Wl,-rpath=${openssl:location}/lib -Wl,-rpath=${sqlite3:location}/lib -L${gettext:location}/lib -Wl,-rpath=${gettext:location}/lib -Wl,-rpath=${file:location}/lib
LDFLAGS=-L${zlib:location}/lib -L${readline:location}/lib -L${libexpat:location}/lib -L${libffi:location}/lib -L${ncurses:location}/lib -L${bzip2:location}/lib -L${gdbm:location}/lib -L${openssl:location}/lib -L${sqlite3:location}/lib -Wl,-rpath=${zlib:location}/lib -Wl,-rpath=${readline:location}/lib -Wl,-rpath=${libexpat:location}/lib -Wl,-rpath=${libffi:location}/lib -Wl,-rpath=${ncurses:location}/lib -Wl,-rpath=${bzip2:location}/lib -Wl,-rpath=${gdbm:location}/lib -Wl,-rpath=${openssl:location}/lib -Wl,-rpath=${sqlite3:location}/lib -L${gettext:location}/lib -Wl,-rpath=${gettext:location}/lib -Wl,-rpath=${file:location}/lib ${:extra-ldflags}
......@@ -9,6 +9,7 @@ extends =
../../component/dbus/buildout.cfg
../../component/file/buildout.cfg
../../component/fonts/buildout.cfg
../../component/gcc/buildout.cfg
../../component/ghostscript/buildout.cfg
../../component/git/buildout.cfg
../../component/graphviz/buildout.cfg
......@@ -27,6 +28,7 @@ extends =
../../component/matplotlib/buildout.cfg
../../component/mesa/buildout.cfg
../../component/numpy/buildout.cfg
../../component/ocropy/buildout.cfg
../../component/onlyoffice-x2t/buildout.cfg
../../component/pandas/buildout.cfg
../../component/percona-toolkit/buildout.cfg
......@@ -90,6 +92,8 @@ parts +=
libreoffice-bin
w3-validator
tesseract
scipy
ocropy
hookbox
percona-toolkit
zabbix-agent
......@@ -136,6 +140,10 @@ parts +=
ipython-notebook
instance-jupyter
# override python2.7 to add SlapOS libstdc++ in RPATH.
[python2.7]
extra-ldflags = -Wl,-rpath=${gcc:location}/lib -Wl,-rpath=${gcc:location}/lib64
# override instance-jupyter not to render into default template.cfg
[instance-jupyter]
rendered = ${buildout:directory}/template-jupyter.cfg
......@@ -437,11 +445,13 @@ eggs = ${neoppod:eggs}
${numpy:egg}
${matplotlib:egg}
${lxml-python:egg}
${ocropy:egg}
${pandas:egg}
${pillow-python:egg}
${python-ldap-python:egg}
${pysvn-python:egg}
${pycrypto-python:egg}
${scipy:egg}
${scikit-learn:egg}
lock_file
astor
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment