Import Debian changes 5.3.4-ok1

tesseract (5.3.4-ok1) nile; urgency=medium

  * Build for openKylin.
This commit is contained in:
Luoyaoming 2024-04-24 17:50:55 +08:00 committed by luoyaoming
parent 29b002c6c9
commit 454466e830
15 changed files with 277 additions and 0 deletions

11
debian/README.debian vendored Normal file
View File

@ -0,0 +1,11 @@
Upstream user documentation:
https://tesseract-ocr.github.io/
Auto generated API documentation:
http://tesseract-ocr.github.io/tessapi/5.x/
Training documentation:
https://tesseract-ocr.github.io/tessdoc/tess4/TrainingTesseract-4.00

5
debian/changelog vendored Normal file
View File

@ -0,0 +1,5 @@
tesseract (5.3.4-ok1) nile; urgency=medium
* Build for openKylin.
-- Luoyaoming <Luoyaoming@kylinos.cn> Wed, 24 Apr 2024 17:50:55 +0800

101
debian/control vendored Normal file
View File

@ -0,0 +1,101 @@
Source: tesseract
Section: graphics
Priority: optional
Maintainer: openKylin Developers <packaging@lists.openkylin.top>
XSBC-Original-Maintainer: Alexander Pozdnyakov <almipo@mail.ru>
Build-Depends: debhelper-compat (= 12), libleptonica-dev (>= 1.75.3),
automake, libtool, libarchive-dev, libpango1.0-dev, libcairo2-dev, libicu-dev,
libpng-dev, libjpeg-dev, libtiff-dev, zlib1g-dev, git, asciidoc,
xsltproc, docbook-xsl, docbook-xml, tesseract-ocr-eng (>= 5.0.0~), autoconf-archive, libcurl4-openssl-dev
Standards-Version: 4.5.1
Homepage: https://github.com/tesseract-ocr/
Rules-Requires-Root: no
Package: tesseract-ocr
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends},
tesseract-ocr-eng (>= 4.0.9~), tesseract-ocr-osd (>= 4.0.9~), libtesseract5 (= ${binary:Version}), liblept5 (>= 1.75.3)
Replaces: tesseract-ocr-data
Description: Tesseract command line OCR tool
Tesseract is an open source Optical Character Recognition (OCR)
Engine. It can be used directly, or (for programmers) using an API to
extract printed text from images. It supports a wide variety of
languages. This package includes the command line tool.
Package: libtesseract-dev
Section: libdevel
Architecture: any
Multi-Arch: same
Depends: libtesseract5 (= ${binary:Version}), ${misc:Depends}, libleptonica-dev, libarchive-dev, libcurl4-dev | libcurl4-gnutls-dev
Replaces: tesseract-ocr-dev (<< 3.0.1-1~)
Breaks: tesseract-ocr-dev (<< 3.0.1-1~)
Description: Development files for the tesseract command line OCR tool
Tesseract is an open source Optical Character Recognition (OCR)
Engine. It can be used directly, or (for programmers) using an API to
extract printed text from images. It supports a wide variety of
languages. This package includes the development files, static
library, and header files.
Package: libtesseract5
Section: libs
Multi-Arch: same
Breaks: tesseract-ocr (<< 3.01~), ocropus (<< 0.4.0~)
Replaces: tesseract-ocr (<< 3.01~)
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}, liblept5 (>= 1.75.3)
Description: Tesseract OCR library
Tesseract is an open source Optical Character Recognition (OCR)
Engine. It can be used directly, or (for programmers) using an API to
extract printed text from images. It supports a wide variety of
languages. This package includes the shared library.
Package: tesseract-ocr-all
Architecture: all
Depends: ${misc:Depends}, tesseract-ocr,
tesseract-ocr-bul, tesseract-ocr-cat, tesseract-ocr-ces, tesseract-ocr-dan,
tesseract-ocr-deu, tesseract-ocr-ell, tesseract-ocr-eng, tesseract-ocr-fin,
tesseract-ocr-fra, tesseract-ocr-hun, tesseract-ocr-ind, tesseract-ocr-ita,
tesseract-ocr-lav, tesseract-ocr-lit, tesseract-ocr-nld, tesseract-ocr-nor,
tesseract-ocr-pol, tesseract-ocr-por, tesseract-ocr-ron, tesseract-ocr-rus,
tesseract-ocr-slk, tesseract-ocr-slv, tesseract-ocr-spa, tesseract-ocr-srp,
tesseract-ocr-swe, tesseract-ocr-tur, tesseract-ocr-ukr, tesseract-ocr-vie,
tesseract-ocr-chi-sim, tesseract-ocr-chi-tra, tesseract-ocr-amh, tesseract-ocr-asm,
tesseract-ocr-aze-cyrl, tesseract-ocr-bod, tesseract-ocr-bos, tesseract-ocr-ceb,
tesseract-ocr-cym, tesseract-ocr-dzo, tesseract-ocr-fas, tesseract-ocr-gle,
tesseract-ocr-guj, tesseract-ocr-hat, tesseract-ocr-iku, tesseract-ocr-jav,
tesseract-ocr-kat, tesseract-ocr-kat-old, tesseract-ocr-kaz, tesseract-ocr-khm,
tesseract-ocr-kir, tesseract-ocr-lao, tesseract-ocr-lat, tesseract-ocr-mar,
tesseract-ocr-mya, tesseract-ocr-nep, tesseract-ocr-ori, tesseract-ocr-pan,
tesseract-ocr-pus, tesseract-ocr-san, tesseract-ocr-sin, tesseract-ocr-srp-latn,
tesseract-ocr-syr, tesseract-ocr-tgk, tesseract-ocr-tir, tesseract-ocr-uig,
tesseract-ocr-urd, tesseract-ocr-uzb, tesseract-ocr-uzb-cyrl, tesseract-ocr-yid,
tesseract-ocr-osd, tesseract-ocr-afr, tesseract-ocr-ara, tesseract-ocr-aze,
tesseract-ocr-bel, tesseract-ocr-ben, tesseract-ocr-chr, tesseract-ocr-enm,
tesseract-ocr-epo, tesseract-ocr-est, tesseract-ocr-eus, tesseract-ocr-frk,
tesseract-ocr-frm, tesseract-ocr-glg, tesseract-ocr-heb, tesseract-ocr-hin,
tesseract-ocr-hrv, tesseract-ocr-isl, tesseract-ocr-ita-old, tesseract-ocr-jpn,
tesseract-ocr-kan, tesseract-ocr-kor, tesseract-ocr-mal, tesseract-ocr-mkd,
tesseract-ocr-mlt, tesseract-ocr-msa, tesseract-ocr-spa-old, tesseract-ocr-sqi,
tesseract-ocr-swa, tesseract-ocr-tam, tesseract-ocr-tel, tesseract-ocr-tha,
tesseract-ocr-bre, tesseract-ocr-chi-sim-vert, tesseract-ocr-chi-tra-vert,
tesseract-ocr-cos, tesseract-ocr-div, tesseract-ocr-fao, tesseract-ocr-fil,
tesseract-ocr-fry, tesseract-ocr-gla, tesseract-ocr-hye, tesseract-ocr-jpn-vert,
tesseract-ocr-kor-vert, tesseract-ocr-kmr, tesseract-ocr-ltz, tesseract-ocr-mon,
tesseract-ocr-mri, tesseract-ocr-oci, tesseract-ocr-que, tesseract-ocr-snd,
tesseract-ocr-sun, tesseract-ocr-tat, tesseract-ocr-ton, tesseract-ocr-yor,
tesseract-ocr-script-arab, tesseract-ocr-script-armn, tesseract-ocr-script-beng,
tesseract-ocr-script-cans, tesseract-ocr-script-cher, tesseract-ocr-script-cyrl,
tesseract-ocr-script-deva, tesseract-ocr-script-ethi, tesseract-ocr-script-frak,
tesseract-ocr-script-geor, tesseract-ocr-script-grek, tesseract-ocr-script-gujr,
tesseract-ocr-script-guru, tesseract-ocr-script-hans, tesseract-ocr-script-hans-vert,
tesseract-ocr-script-hant, tesseract-ocr-script-hant-vert, tesseract-ocr-script-hang,
tesseract-ocr-script-hang-vert, tesseract-ocr-script-hebr, tesseract-ocr-script-jpan,
tesseract-ocr-script-jpan-vert, tesseract-ocr-script-knda, tesseract-ocr-script-khmr,
tesseract-ocr-script-laoo, tesseract-ocr-script-latn, tesseract-ocr-script-mlym,
tesseract-ocr-script-mymr, tesseract-ocr-script-orya, tesseract-ocr-script-sinh,
tesseract-ocr-script-syrc, tesseract-ocr-script-taml, tesseract-ocr-script-telu,
tesseract-ocr-script-thaa, tesseract-ocr-script-thai, tesseract-ocr-script-tibt,
tesseract-ocr-script-viet, tesseract-ocr-grc
Description: Tesseract OCR with all language and script packages
This is a metapackage for Tesseract OCR and includes all supported
languages and scripts.

32
debian/copyright vendored Normal file
View File

@ -0,0 +1,32 @@
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: tesseract-ocr
Upstream-Contact: Ray Smith <theraysmith@users.sourceforge.net>
Source: https://github.com/tesseract-ocr/
Files: *
Copyright: 1988-1995 Hewlett Packard Company.
2006-2024 Google Inc.
License: Apache-2.0
Files: debian/*
Copyright: 2007-2009 Jeffrey Ratcliffe <Jeffrey.Ratcliffe@gmail.com>
2010-2019 Jeff Breidenbach <jab@debian.org>
2010-2024 Alexander Pozdnyakov <almipo@mail.ru>
License: Apache-2.0
License: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the license at
.
http://www.apache.org/licenses/LICENSE-2.0
.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
.
On Debian systems, the complete text of the Apache 2.0 license can be
found in the file
`/usr/share/common-licenses/Apache-2.0`.

1
debian/docs vendored Normal file
View File

@ -0,0 +1 @@
README.md

4
debian/libtesseract-dev.install vendored Normal file
View File

@ -0,0 +1,4 @@
usr/lib/*/libtesseract.so
usr/lib/*/*.a
usr/include/tesseract/*.h
usr/lib/*/pkgconfig/*

4
debian/libtesseract5.install vendored Normal file
View File

@ -0,0 +1,4 @@
usr/lib/*/*.so.*
usr/share/tessdata/configs/* usr/share/tesseract-ocr/5/tessdata/configs/
usr/share/tessdata/tessconfigs/* usr/share/tesseract-ocr/5/tessdata/tessconfigs/
usr/share/tessdata/pdf.ttf usr/share/tesseract-ocr/5/tessdata/

18
debian/patches/disable_NEON.diff vendored Normal file
View File

@ -0,0 +1,18 @@
Description: disable NEON instructions for architecture armel
Author: Alexander Pozdnyakov <almipo@mail.ru>
Last-Update: 2023-10-07
--- tesseract-5.2.0.orig/configure.ac
+++ tesseract-5.2.0/configure.ac
@@ -170,6 +170,11 @@
;;
+ arm)
+
+ AC_MSG_WARN([No compiler options for $host_cpu])
+ ;;
+
aarch64*|arm64)
# ARMv8 always has NEON and does not need special compiler flags.

34
debian/patches/helptext vendored Normal file
View File

@ -0,0 +1,34 @@
Description: Reduce bug reports by tweaking help text.
Some advanced modes require data not shipped by distribution.
Adjust documentation to reduce confusion from that.
.
tesseract (4.00~git2188-cdc35338-5) unstable; urgency=medium
.
* Update README.debian and move to tesseract-ocr package.
* Sponsor Alexander Pozdnyakov as maintainer.
* Tweak tesseact --help to reduce bug reports.
Author: Jeff Breidenbach <jab@debian.org>
---
The information above should follow the Patch Tagging Guidelines, please
checkout http://dep.debian.net/deps/dep3/ to learn about the format. Here
are templates for supplementary fields that you might want to add:
Origin: vendor, N/A
Bug: https://github.com/tesseract-ocr/tesseract/issues/707#issuecomment-364159579
Forwarded: no
Reviewed-By: Jeff Breidenbach <jab@debian.org>
Last-Update: <2021-12-02>
--- tesseract-5.0.0.orig/src/tesseract.cpp
+++ tesseract-5.0.0/src/tesseract.cpp
@@ -209,7 +209,7 @@
#ifndef DISABLED_LEGACY_ENGINE
static void PrintHelpForOEM() {
const char *msg =
- "OCR Engine modes:\n"
+ "OCR Engine modes: (see https://tesseract-ocr.github.io/tessdoc/#40-with-lstm)\n"
" 0 Legacy engine only.\n"
" 1 Neural nets LSTM engine only.\n"
" 2 Legacy + LSTM engines.\n"

5
debian/patches/series vendored Normal file
View File

@ -0,0 +1,5 @@
#man.diff
#fix-up-headers
helptext
#shebang.diff
#no-march-native

48
debian/rules vendored Executable file
View File

@ -0,0 +1,48 @@
#!/usr/bin/make -f
#export DH_VERBOSE=1
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
CFLAGS:=$(shell dpkg-buildflags --get CFLAGS)
LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS)
CFLAGS += -Wall -g -fPIC -DTESSDATA_PREFIX='\"/usr/share/tesseract-ocr/5\"'
ifneq (,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
NUMJOBS = $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
endif
ifeq (,$(NUMJOBS))
NUMJOBS = 1
endif
%:
dh $@ --with autoreconf
override_dh_auto_build:
make -j$(NUMJOBS)
make -j$(NUMJOBS) training
override_dh_auto_install:
$(MAKE) DESTDIR=$$(pwd)/debian/tmp prefix=/usr install
$(MAKE) DESTDIR=$$(pwd)/debian/tmp prefix=/usr training-install
override_dh_auto_test:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
./tesseract -v
./tesseract ./test/testing/phototest.tif -
endif
override_dh_auto_clean:
dh_auto_clean
dh_clean java/com/Makefile java/com/google/Makefile java/com/google/scrollview/Makefile java/com/google/scrollview/events/Makefile java/com/google/scrollview/ui/Makefile
test ! -f configure.ac.orig || mv configure.ac.orig configure.ac
override_dh_auto_configure:
ifeq ($(DEB_BUILD_GNU_TYPE),arm-linux-gnueabi)
cp configure.ac configure.ac.orig
patch -p1 < debian/patches/disable_NEON.diff
else
echo "no patch"
endif
./autogen.sh
./configure --host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) --disable-tessdata-prefix --prefix=/usr --libdir=\$${prefix}/lib/$(DEB_HOST_MULTIARCH) --mandir=\$${prefix}/share/man --infodir=\$${prefix}/share/info CXXFLAGS="$(CFLAGS)" LDFLAGS="-llept -Wl,-z,defs $(LDFLAGS)"

1
debian/source/format vendored Normal file
View File

@ -0,0 +1 @@
3.0 (quilt)

3
debian/tesseract-ocr.install vendored Normal file
View File

@ -0,0 +1,3 @@
usr/bin/*
/usr/share/man/man1/*
/usr/share/man/man5/*

7
debian/upstream/metadata vendored Normal file
View File

@ -0,0 +1,7 @@
Name: tesseract-ocr
Bug-Database: https://github.com/tesseract-ocr/tesseract/issues
Changelog: https://github.com/tesseract-ocr/tesseract/releases
FAQ: https://tesseract-ocr.github.io/tessdoc/FAQ
Contact: http://groups.google.com/group/tesseract-ocr
Repository: https://github.com/tesseract-ocr/tesseract.git
Repository-Browse: https://github.com/tesseract-ocr/tesseract

3
debian/watch vendored Normal file
View File

@ -0,0 +1,3 @@
version=4
https://github.com/tesseract-ocr/tesseract/tags .*/(\d[^\s/]*)\.tar\.gz debian
opts=component=test,mode=git https://github.com/tesseract-ocr/test.git HEAD ignore uupdate