Commit 98a50bae authored by Valentin Benozillo's avatar Valentin Benozillo

version up: Tesseract 4.0.0-beta.3 & Leptonica 1.76.0

Add osd trainned data
parent 7e2fa75d
......@@ -11,14 +11,12 @@ extends =
[leptonica]
recipe = slapos.recipe.cmmi
url = http://leptonica.googlecode.com/files/leptonica-1.68.tar.gz
md5sum = 5cd7092f9ff2ca7e3f3e73bfcd556403
url = http://www.leptonica.com/source/leptonica-1.76.0.tar.gz
md5sum = a263a5e4f7e8f8a661fb121a265d2d20
configure-options =
--disable-static
patch-options = -p1
patches =
${:_profile_base_location_}/leptonica-1.69-zlib-include.patch#cff3dc942075190939b407c38e0d3201
environment =
CPPFLAGS=-I${zlib:location}/include -I${libjpeg:location}/include -I${libpng:location}/include -I${libtiff:location}/include -I${webp:location}/include -I${giflib:location}/include
LDFLAGS=-L${zlib:location}/lib -Wl,-rpath=${zlib:location}/lib -L${libjpeg:location}/lib -Wl,-rpath=${libjpeg:location}/lib -L${libpng:location}/lib -Wl,-rpath=${libpng:location}/lib -L${libtiff:location}/lib -Wl,-rpath=${libtiff:location}/lib -L${webp:location}/lib -Wl,-rpath=${webp:location}/lib -L${giflib:location}/lib -Wl,-rpath=${giflib:location}/lib
PATH=${patch:location}/bin:%(PATH)s
PATH=${patch:location}/bin:%(PATH)s
\ No newline at end of file
[buildout]
extends =
../pkgconfig/buildout.cfg
../autoconf/buildout.cfg
../automake/buildout.cfg
../jbigkit/buildout.cfg
......@@ -8,45 +10,56 @@ extends =
../libtool/buildout.cfg
../patch/buildout.cfg
../webp/buildout.cfg
parts =
tesseract
tesseract-traineddata
tesseract-eng-traineddata
tesseract-osd-traineddata
[tesseract]
recipe = slapos.recipe.cmmi
url = http://tesseract-ocr.googlecode.com/files/tesseract-3.01.tar.gz
md5sum = 1ba496e51a42358fb9d3ffe781b2d20a
patch-options =
-p1
patches =
${:_profile_base_location_}/tesseract-3.00-gcc-4.7-build.patch#ca80db3ec489c547b03f3ee48879c1b1
${:_profile_base_location_}/tesseract-3.01-remove-bom.patch#2e691858cb492b7c17d23bf0912b3d24
${:_profile_base_location_}/tesseract-3.01-gcc6-ftbs.patch#f7a6140c0fe390b96fe753a70e9d59fd
url = https://github.com/tesseract-ocr/tesseract/archive/6b250b58121a9858d3e3019a78a6f7d421bd0fc7.tar.gz
md5sum = fdc38148ad8eb1bd0485a217503dd6d5
pkg_config_depends = ${leptonica:location}/lib/pkgconfig:${fontconfig:location}/lib/pkgconfig:${fontconfig:pkg_config_depends}:${lcms2:location}/lib/pkgconfig:${xz-utils:location}/lib/pkgconfig
pre-configure =
libtoolize -f -c
aclocal -I ${libtool:location}/share/aclocal -I config
autoheader -f
automake -c -a -f
aclocal -I${pkgconfig:location}/share/aclocal -I${libtool:location}/share/aclocal
./autogen.sh
autoreconf -ivf
autoconf -Wno-portability
configure-options =
--disable-static
--datarootdir=${tesseract-eng-traineddata:location}
--datarootdir=${tesseract-traineddata:location}
# XXX: tesseract seems not easily configurable at runtime about where to find
# its trained data, so we set its datarootdir above to a controlled location
# tesseract has a non-standard way of testing for leptonica, hence the
# LIBLEPT_HEADERSDIR entry below:
environment =
PATH=${autoconf:location}/bin:${automake:location}/bin:${libtool:location}/bin:${m4:location}/bin:${patch:location}/bin:%(PATH)s
PATH=${pkgconfig:location}/bin:${autoconf:location}/bin:${automake:location}/bin:${libtool:location}/bin:${m4:location}/bin:${patch:location}/bin:%(PATH)s
PKG_CONFIG_PATH=${:pkg_config_depends}
ACLOCAL_ARGS=-I${libtool:location}/share/aclocal
ACLOCAL_PATH=${pkgconfig:location}/share/aclocal:${libtool:location}/share/aclocal
LIBLEPT_HEADERSDIR=${leptonica:location}/include
CPPFLAGS=-I${leptonica:location}/include
LDFLAGS =-L${leptonica:location}/lib -Wl,-rpath=${leptonica:location}/lib -L${jbigkit:location}/lib -Wl,-rpath=${jbigkit:location}/lib -L${zlib:location}/lib -Wl,-rpath=${zlib:location}/lib
make-options =
LIBTOOL=${libtool:location}/bin/libtool
[tesseract-traineddata]
location = ${buildout:parts-directory}/${:_buildout_section_name_}
[tesseract-eng-traineddata]
recipe = slapos.recipe.build:download-unpacked
strip-top-level-dir = true
url = http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.01.eng.tar.gz
md5sum = 89c139a73e0e7b1225809fc7b226b6c9
recipe = slapos.recipe.build:download
destination = ${tesseract-traineddata:location}/tessdata/eng.traineddata
url = https://github.com/tesseract-ocr/tessdata/raw/590567f20dc044f6948a8e2c61afc714c360ad0e/eng.traineddata
md5sum = 57e0df3d84fed9fbf8c7a8e589f8f012
[tesseract-osd-traineddata]
recipe = slapos.recipe.build:download
destination = ${tesseract-traineddata:location}/tessdata/osd.traineddata
url = https://github.com/tesseract-ocr/tessdata/raw/590567f20dc044f6948a8e2c61afc714c360ad0e/osd.traineddata
md5sum = 7611737524efd1ce2dde67eff629bbcf
--- tesseract-3.00/viewer/svutil.cpp.old 2012-04-11 09:34:26.168608940 +0200
+++ tesseract-3.00/viewer/svutil.cpp 2012-04-11 09:34:46.108565692 +0200
@@ -21,6 +21,7 @@
// thread/process creation & synchronization and network connection.
#include <stdio.h>
+#include <unistd.h>
#ifdef WIN32
#include <windows.h>
#include <winsock.h>
commit 58e79a222e12280984ed19ab4d3bcac654e121fa
Author: Arnaud Fontaine <arnaud.fontaine@nexedi.com>
Date: Fri Nov 4 19:10:13 2016 +0900
Fix compilation errors.
diff --git a/cube/char_set.cpp b/cube/char_set.cpp
index 3cf4798..6b29883 100644
--- a/cube/char_set.cpp
+++ b/cube/char_set.cpp
@@ -65,13 +65,13 @@ CharSet *CharSet::Create(TessdataManager *tessdata_manager,
!tessdata_manager->SeekToStart(TESSDATA_UNICHARSET)) {
fprintf(stderr, "Cube ERROR (CharSet::Create): could not find "
"either cube or tesseract unicharset\n");
- return false;
+ return NULL;
}
FILE *charset_fp = tessdata_manager->GetDataFilePtr();
if (!charset_fp) {
fprintf(stderr, "Cube ERROR (CharSet::Create): could not load "
"a unicharset\n");
- return false;
+ return NULL;
}
// If we found a cube unicharset separate from tesseract's, load it and
@@ -84,7 +84,7 @@ CharSet *CharSet::Create(TessdataManager *tessdata_manager,
loaded = char_set->LoadSupportedCharList(charset_fp, NULL);
if (!loaded) {
delete char_set;
- return false;
+ return NULL;
}
char_set->init_ = true;
diff --git a/cube/cube_line_segmenter.cpp b/cube/cube_line_segmenter.cpp
index deee573..3f0b762 100644
--- a/cube/cube_line_segmenter.cpp
+++ b/cube/cube_line_segmenter.cpp
@@ -124,7 +124,7 @@ Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
if (line_con_comps == NULL) {
delete []lines_pixa;
- return false;
+ return NULL;
}
// assign each conn comp to the a line based on its centroid
@@ -142,7 +142,7 @@ Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
delete []lines_pixa;
boxaDestroy(&line_con_comps);
pixaDestroy(&line_con_comps_pix);
- return false;
+ return NULL;
}
}
@@ -413,14 +413,14 @@ Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box,
(*dest_box) = boxCreate(min_x, min_y, max_x - min_x, max_y - min_y);
if ((*dest_box) == NULL) {
- return false;
+ return NULL;
}
// create the union pix
Pix *union_pix = pixCreate((*dest_box)->w, (*dest_box)->h, img_->d);
if (union_pix == NULL) {
boxDestroy(dest_box);
- return false;
+ return NULL;
}
// create a pix corresponding to the union of all pixs
diff --git a/cube/cube_object.cpp b/cube/cube_object.cpp
index 48bce64..b9a7113 100644
--- a/cube/cube_object.cpp
+++ b/cube/cube_object.cpp
@@ -165,7 +165,7 @@ WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
if (deslanted_beam_obj_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
"construct deslanted BeamSearch\n");
- return false;
+ return NULL;
}
}
diff --git a/cube/word_list_lang_model.cpp b/cube/word_list_lang_model.cpp
index 18f85c1..0f7f562 100644
--- a/cube/word_list_lang_model.cpp
+++ b/cube/word_list_lang_model.cpp
@@ -74,7 +74,7 @@ LangModEdge **WordListLangModel::GetEdges(CharAltList *alt_list,
// initialize if necessary
if (init_ == false) {
if (Init() == false) {
- return false;
+ return NULL;
}
}
The patch below removes a utf-8 BOM mark.
Avoid touching it as the BOM is invisible, and copy/pasting might not work.
It is needed because old compilers treat the BOM as garbage instead of
whitespace.
--- tesseract-3.01/ccutil/strngs.h.orig 2012-05-24 15:13:22.743808379 +0200
+++ tesseract-3.01/ccutil/strngs.h 2012-05-24 15:16:54.468858282 +0200
@@ -1,4 +1,4 @@
-/**********************************************************************
+/**********************************************************************
* File: strngs.h (Formerly strings.h)
* Description: STRING class definition.
* Author: Ray Smith
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment