diff --git a/.gitignore b/.gitignore index 2d239ba9..a07eb912 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,15 @@ -/venv +*~ +/venv* +# manual control: local.mk +# generated by all: ocrd-all-tool.json ocrd-all-module-dir.json +ocrd-all-meta.json +ocrd-all-images.yaml +# generated by network-setup: +docker-compose.yml +.env +ocrd-processing-server-config.yaml +# generated by images: +/ocrd/* diff --git a/.gitmodules b/.gitmodules index 694ceff4..778144ff 100644 --- a/.gitmodules +++ b/.gitmodules @@ -30,9 +30,6 @@ [submodule "cor-asv-ann"] path = cor-asv-ann url = https://github.com/ASVLeipzig/cor-asv-ann.git -[submodule "format-converters"] - path = format-converters - url = https://github.com/OCR-D/format-converters.git [submodule "ocrd_cis"] path = ocrd_cis url = https://github.com/cisocrgroup/ocrd_cis.git @@ -42,10 +39,6 @@ [submodule "ocrd_anybaseocr"] path = ocrd_anybaseocr url = https://github.com/OCR-D/ocrd_anybaseocr.git -[submodule "opencv-python"] - path = opencv-python - url = https://github.com/skvark/opencv-python.git - shallow = true [submodule "workflow-configuration"] path = workflow-configuration url = https://github.com/bertsky/workflow-configuration.git diff --git a/Makefile b/Makefile index 34759552..254dab47 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,6 @@ # Python version (python3 required). export PYTHON ?= python3 -# PIP_OPTIONS ?= # empty -# Derived variable to allow filtering -e, or inserting other options -# (the option --editable must always be last and only applies to src install) -PIP_OPTIONS_E = $(filter-out -e,$(PIP_OPTIONS)) # Set to 1 to skip all submodule updates. For development. NO_UPDATE ?= 0 # Set to non-empty to try running all executables with --help / -h during make check @@ -24,78 +20,27 @@ GIT_RECURSIVE = # --recursive GIT_DEPTH = # --depth 1 or --single-branch # directory for virtual Python environment -# (but re-use if already active); overridden -# to nested venv in recursive calls for modules -# that have known dependency clashes with others +# (but re-use if already active) export VIRTUAL_ENV ?= $(CURDIR)/venv -ifeq (0, $(MAKELEVEL)) -SUB_VENV = $(VIRTUAL_ENV)/sub-venv -SUB_VENV_TF1 = $(SUB_VENV)/headless-tf1 -else -SUB_VENV_TF1 = $(VIRTUAL_ENV) -endif BIN = $(VIRTUAL_ENV)/bin SHARE = $(VIRTUAL_ENV)/share ACTIVATE_VENV = $(BIN)/activate -# Get Python major and minor versions for some conditional rules. -PYTHON_VERSION := $(shell $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))') - -# core version to ensure docker images are based on the latest tagged release -CORE_VERSION = $(shell git -C core describe --tags --abbrev=0) - define SEMGIT $(if $(shell sem --version 2>/dev/null),sem -q --will-cite --fg --id ocrd_all_git,$(error cannot find package GNU parallel)) endef -define SEMPIP -$(if $(shell sem --version 2>/dev/null),sem -q --will-cite --fg --id ocrd_all_pip$(notdir $(VIRTUAL_ENV)),$(error cannot find package GNU parallel)) -endef - -define WGET -$(if $(shell wget --version 2>/dev/null),wget -nv -O $(1) $(2),$(if $(shell curl --version 2>/dev/null),curl -L -o $(1) $(2),$(error found no cmdline downloader (wget/curl)))) -endef - -ifeq ($(PKG_CONFIG_PATH),) -PKG_CONFIG_PATH := $(VIRTUAL_ENV)/lib/pkgconfig -else -PKG_CONFIG_PATH := $(VIRTUAL_ENV)/lib/pkgconfig:$(PKG_CONFIG_PATH) -endif -export PKG_CONFIG_PATH - SHELL := $(shell which bash) -OCRD_EXECUTABLES = $(BIN)/ocrd # add more CLIs below -CUSTOM_DEPS = unzip wget parallel git less # add more packages for deps-ubuntu below (or modules as preqrequisites) +OCRD_EXECUTABLES = # add more CLIs below +OCRD_IMAGES = # add Docker images below -DEFAULT_DISABLED_MODULES = cor-asv-fst opencv-python ocrd_ocropy ocrd_neat -ifeq ($(filter docker-%,$(MAKECMDGOALS)),) -ifneq ($(PYTHON_VERSION),3.8) -# Disable modules which require tensorflow-gpu 1.15 unless running a Python version which provides it. -DEFAULT_DISABLED_MODULES += cor-asv-ann ocrd_keraslm -endif -endif -ifeq ($(PYTHON_VERSION),3.12) -# The required tensorflow is not available for Python 3.12. -DEFAULT_DISABLED_MODULES += eynollah ocrd_anybaseocr ocrd_calamari -# The required coremltools does not support Python 3.12. -DEFAULT_DISABLED_MODULES += ocrd_kraken -endif -ifeq ($(shell uname -s),Darwin) -# Disable ocrd_detectron2 because of missing dependency torchvision==0.16.2. -DEFAULT_DISABLED_MODULES += ocrd_detectron2 -# Disable ocrd_olena for macOS because build is broken. -DEFAULT_DISABLED_MODULES += ocrd_olena -# Disable ocrd_segment for macOS, see https://github.com/OCR-D/ocrd_segment/issues/64.. -DEFAULT_DISABLED_MODULES += ocrd_segment -endif +DEFAULT_DISABLED_MODULES = cor-asv-fst ocrd_ocropy ocrd_neat DISABLED_MODULES ?= $(DEFAULT_DISABLED_MODULES) # Default to all submodules, but allow overriding by user # (and treat the empty value as if it was unset) -# opencv-python is only needed for aarch64-linux-gnu and other less common platforms, -# so don't include it by default. ifeq ($(strip $(OCRD_MODULES)),) override OCRD_MODULES := $(filter-out $(DISABLED_MODULES),$(shell $(GIT) submodule status | while read commit dir ref; do echo $$dir; done)) endif @@ -109,11 +54,14 @@ endif .PHONY: all modules clean help show check always-update install-models -clean: # add more prerequisites for clean below - $(RM) -r $(SUB_VENV) +clean: network-clean $(RM) -r $(CURDIR)/venv # deliberately not using VIRTUAL_ENV here $(RM) -r $(HOME)/.parallel/semaphores/id-ocrd_* - $(RM) ocrd-all-tool.json ocrd-all-module-dir.json ocrd-all-meta.json + $(RM) ocrd-all-tool.json ocrd-all-module-dir.json ocrd-all-meta.json ocrd-all-images.yaml + +.PHONY: images-clean +images-clean: + for image in $(OCRD_IMAGES); do docker rmi $$image; $(RM) $$image; done define HELP cat <<"EOF" @@ -128,45 +76,55 @@ Targets (general): Targets (module management): modules: download all submodules to the managed revision deinit: clean, then deinit and rmdir all submodules - tidy: clean, then deinit opencv-python and git-clean all submodules + tidy: clean, then git-clean all submodules (WARNING: potential data loss; if unsure, try with `make -n` and `git clean -n`) Targets (system dependencies, may need root privileges): - deps-ubuntu: install all system dependencies of all modules - deps-cuda: install CUDA toolkit and libraries (via micromamba and nvidia-pyindex) + deps-ubuntu: install system dependencies Targets (build and installation into venv): - all: install all executables of all modules - ocrd: only install the virtual environment and OCR-D/core packages - fix-cuda: workaround for non-conflicting CUDA libs after installation - clean: remove the virtual environment directory, and make clean-* + images: download or rebuild Docker images associated with submodules (see `DOCKER_PULL_POLICY`) + all: install all executables of all modules/images (see `DOCKER_RUN_OPTS`) + ocrd: only install the multi-purpose CLI of OCR-D/core + clean: remove the virtual environment directory + images-clean: remove the Docker images + +Targets (ocrd_network specific): + network-setup: generate `docker-compose.yml` and `.env` based on `OCRD_NETWORK_CONFIG` + network-start: alias for `docker compose up --wait --wait-timeout 30 -d` + network-stop: alias for `docker compose down` + network-clean: remove files generated during `network-setup` Targets (testing): - check: verify that all executables are runnable and the venv is consistent - test-core: verify ocrd via core module regression tests + check: verify that all executables are runnable test-cuda: verify that CUDA is available for Tensorflow and Pytorch test-workflow: verify that most executables work correctly via test runs on test data Targets (auxiliary data): - ocrd-all-tool.json: generate union of ocrd-tool.json's tools section for all executables of all modules - ocrd-all-meta.json: map executable to ocrd-tool.json's metadata section for all executables of all modules - ocrd-all-module-dir.json: map executable to module location for all executables of all modules + ocrd-all-tool.json: generate union of ocrd-tool.json's tools section for all executables of all `OCRD_MODULES` + ocrd-all-meta.json: map executable to ocrd-tool.json's metadata section for all executables of all `OCRD_MODULES` + ocrd-all-images.yaml: list all `OCRD_IMAGES` in a file + init-vol-models: initialise shared Docker volume `DOCKER_VOL_MODELS` with files from module images but user permissions install-models: download commonly used models to appropriate locations - -Targets (build of container images): - docker: (re)build a docker image including all executables - dockers: (re)build docker images for some pre-selected subsets of modules + clean-vol-models: remove shared Docker volume `DOCKER_VOL_MODELS` Variables: OCRD_MODULES: selection of submodules to include. Default: all git submodules (see `show`) DISABLED_MODULES: list of disabled modules. Default: "$(DISABLED_MODULES)" + DOCKER_PULL_POLICY: use `build` or `pull` to get Docker images. Default: $(DOCKER_PULL_POLICY) + DOCKER_VOL_MODELS: name of Docker volume to be mounted for processor resources (see `init-vol-models`). + Default: "$(DOCKER_VOL_MODELS)" + DOCKER_RUN_OPTS: additional options for `docker run` (volumes like `DOCKER_VOL_MODELS`, user mapping etc.). + Default: "$(DOCKER_RUN_OPTS)" + DOCKER_RUN_POLICY: behaviour of executables - set to `local` to use `docker run`, or `client` to use ocrd_network + (overrides auto-detection based on state after `network-setup` vs. `network-clean`) + OCRD_NETWORK_CONFIG: configuration file for ocrd_network. Default: "$(OCRD_NETWORK_CONFIG)" GIT_RECURSIVE: set to `--recursive` to checkout/update all submodules recursively GIT_DEPTH: set to `--depth 1` to truncate all history when cloning subrepos NO_UPDATE: set to `1` to omit git submodule sync and update VIRTUAL_ENV: absolute path to (re-)use for the virtual environment TMPDIR: path to use for temporary storage instead of the system default PYTHON: name of the Python binary (also used for target `deps-ubuntu` unless set to `python`) - PIP_OPTIONS: extra options for the `pip install` command like `-q` or `-v` or `-e` CHECK_HELP: set to `1` to also check each executable can generate help output EOF endef @@ -202,7 +160,6 @@ deinit: clean .PHONY: tidy tidy: clean - git submodule status opencv-python | grep -q ^- || git submodule deinit opencv-python git submodule foreach --recursive git clean -fxd # if you already have a clone with too many refs, consider the following recipe: #git submodule foreach 'for ref in $(git for-each-ref --no-contains=HEAD --format="%(refname)" refs/remotes/ | sed s,^refs/remotes/,,); do git branch -d -r $ref; done' @@ -211,115 +168,100 @@ tidy: clean # Get Python modules. -$(BIN)/pip: $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && $(SEMPIP) pip install --upgrade pip setuptools - %/bin/activate: - $(SEMPIP) $(PYTHON) -m venv $(subst /bin/activate,,$@) - . $@ && $(SEMPIP) pip install --upgrade pip setuptools wheel - -.PHONY: wheel -wheel: $(BIN)/wheel -$(BIN)/wheel: | $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && $(SEMPIP) pip install --force-reinstall $(PIP_OPTIONS_E) wheel + $(PYTHON) -m venv $(subst /bin/activate,,$@) + . $@ && pip install --upgrade pip setuptools wheel + . $@ && pip install click requests pyyaml ocrd dotenv # Install modules from source. .PHONY: ocrd ocrd: $(BIN)/ocrd ifneq ($(filter core, $(OCRD_MODULES)),) -deps-ubuntu-modules: core -$(BIN)/ocrd: core - . $(ACTIVATE_VENV) && $(MAKE) -C $< install PIP="$(SEMPIP) pip" PIP_INSTALL="$(SEMPIP) pip install $(PIP_OPTIONS)" && touch -c $@ -else -CUSTOM_DEPS += python3 imagemagick libgeos-dev -$(BIN)/ocrd: | $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && $(SEMPIP) pip install $(PIP_OPTIONS_E) ocrd +OCRD_EXECUTABLES += $(CORE) +CORE := $(BIN)/ocrd +CORE += $(BIN)/ocrd-dummy +CORE += $(BIN)/ocrd-filter +OCRD_IMAGES += ocrd/core +$(CORE): ocrd/core + $(call delegate_docker,$@,$<) +OCRD_EXECUTABLES += $(BIN)/ocrd-process +$(BIN)/ocrd-process: ocrd/core + . $(ACTIVATE_VENV) && python run-network/creator.py create-workflow-client $@ +ocrd/core: DOCKER_PROFILES = +ocrd/core: ./core + $(call pullpolicy_docker,$<,$@) endif -.PHONY: test-core -test-core: core $(BIN)/ocrd - . $(ACTIVATE_VENV) && $(MAKE) -C $< deps-test test - # Convert the executable names (1) to a pattern rule, # so that the recipe will be used with single-recipe- -# multiple-output semantics: +# multiple-output semantics (make >= 4.3 implements +# 'grouped-target' for this, but we cannot rely on that): multirule = $(patsubst $(BIN)/%,\%/%,$(1)) -ifneq ($(filter format-converters, $(OCRD_MODULES)),) -OCRD_EXECUTABLES += $(PAGE2IMG) -PAGE2IMG := $(BIN)/page2img -format-converters/page2img.py: format-converters -$(PAGE2IMG): format-converters/page2img.py - . $(ACTIVATE_VENV) && $(SEMPIP) pip install validators - echo "#!$(BIN)/python3" | cat - $< >$@ - chmod +x $@ -endif - -ifneq ($(filter opencv-python, $(OCRD_MODULES)),) -CUSTOM_DEPS += cmake gcc g++ -# libavcodec-dev libavformat-dev libswscale-dev libgstreamer-plugins-base1.0-dev libgstreamer1.0-dev -# libpng-dev libjpeg-dev libopenexr-dev libtiff-dev libwebp-dev libjasper-dev -opencv-python: GIT_RECURSIVE = --recursive -opencv-python/setup.py: opencv-python -$(SHARE)/opencv-python: opencv-python/setup.py | $(ACTIVATE_VENV) $(SHARE) - . $(ACTIVATE_VENV) && cd $(=2 in other modules -ifeq (0,$(MAKELEVEL)) - $(MAKE) -o $< $(notdir $(OCRD_KRAKEN)) VIRTUAL_ENV=$(SUB_VENV_TF1) - $(call delegate_venv,$(OCRD_KRAKEN),$(SUB_VENV_TF1)) -ocrd_kraken-check: - $(MAKE) check OCRD_MODULES=ocrd_kraken VIRTUAL_ENV=$(SUB_VENV_TF1) -else - $(pip_install) -endif +OCRD_KRAKEN += $(BIN)/kraken +OCRD_KRAKEN += $(BIN)/ketos +OCRD_IMAGES += ocrd/kraken +$(OCRD_KRAKEN): ocrd/kraken + $(call delegate_docker,$@,$<) +ocrd/kraken: DOCKER_PROFILES = maximum +ocrd/kraken: ./ocrd_kraken + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_detectron2, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_DETECTRON2) -OCRD_DETECTRON2 := $(BIN)/ocrd-detectron2-segment -$(OCRD_DETECTRON2): ocrd_detectron2 $(BIN)/ocrd | $(OCRD_KRAKEN) - . $(ACTIVATE_VENV) && $(MAKE) -C $< deps - # pre-empt conflict around typing-extensions - . $(ACTIVATE_VENV) && $(SEMPIP) pip install -i https://download.pytorch.org/whl/cpu torchvision==0.16.2 torch==2.1.2 - $(pip_install) +OCRD_DETECTRON2 := $(BIN)/ocrd-detectron2-ocrd +OCRD_DETECTRON2 += $(BIN)/ocrd-detectron2-segment +OCRD_IMAGES += ocrd/detectron2 +$(OCRD_DETECTRON2): ocrd/detectron2 + $(call delegate_docker,$@,$<) +ocrd/detectron2: DOCKER_PROFILES = maximum +ocrd/detectron2: ./ocrd_detectron2 + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_page2alto, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_PAGE_TO_ALTO) -OCRD_PAGE_TO_ALTO := $(BIN)/ocrd-page2alto-transform +OCRD_PAGE_TO_ALTO := $(BIN)/ocrd-page2alto-ocrd +OCRD_PAGE_TO_ALTO += $(BIN)/ocrd-page2alto-transform OCRD_PAGE_TO_ALTO += $(BIN)/page-to-alto -$(call multirule,$(OCRD_PAGE_TO_ALTO)): ocrd_page2alto $(BIN)/ocrd - $(pip_install) +OCRD_IMAGES += ocrd/page2alto +$(OCRD_PAGE_TO_ALTO): ocrd/page2alto + $(call delegate_docker,$@,$<) +ocrd/page2alto: DOCKER_PROFILES = +ocrd/page2alto: ./ocrd_page2alto + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_ocropy, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_OCROPY) -OCRD_OCROPY := $(BIN)/ocrd-ocropy-segment -$(OCRD_OCROPY): ocrd_ocropy $(BIN)/ocrd - $(pip_install) +OCRD_OCROPY := $(BIN)/ocrd-ocropy-ocrd +OCRD_OCROPY += $(BIN)/ocrd-ocropy-segment +OCRD_IMAGES += ocrd/ocropy +$(OCRD_OCROPY): ocrd/ocropy + $(call delegate_docker,$@,$<) +ocrd/ocropy: DOCKER_PROFILES = +ocrd/ocropy: ./ocrd_ocropy + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter cor-asv-ann, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_COR_ASV_ANN) -OCRD_COR_ASV_ANN := $(BIN)/ocrd-cor-asv-ann-evaluate +OCRD_COR_ASV_ANN := $(BIN)/ocrd-cor-asv-ann-ocrd +OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-evaluate OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-process OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-align OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-join @@ -329,50 +271,38 @@ OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-proc OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-eval OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-compare OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-repl -$(call multirule,$(OCRD_COR_ASV_ANN)): cor-asv-ann $(BIN)/ocrd -ifeq (0,$(MAKELEVEL)) - $(MAKE) -o $< $(notdir $(OCRD_COR_ASV_ANN)) VIRTUAL_ENV=$(SUB_VENV_TF1) - $(call delegate_venv,$(OCRD_COR_ASV_ANN),$(SUB_VENV_TF1)) -cor-asv-ann-check: - $(MAKE) check OCRD_MODULES=cor-asv-ann VIRTUAL_ENV=$(SUB_VENV_TF1) -else - $(pip_install_tf1nvidia) - $(pip_install) -endif +OCRD_IMAGES += ocrd/cor-asv-ann +$(OCRD_COR_ASV_ANN): ocrd/cor-asv-ann + $(call delegate_docker,$@,$<) +ocrd/cor-asv-ann: DOCKER_PROFILES = medium maximum +ocrd/cor-asv-ann: ./cor-asv-ann + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter cor-asv-fst, $(OCRD_MODULES)),) -deps-ubuntu-modules: cor-asv-fst OCRD_EXECUTABLES += $(OCRD_COR_ASV_FST) -OCRD_COR_ASV_FST := $(BIN)/ocrd-cor-asv-fst-process +OCRD_COR_ASV_FST := $(BIN)/ocrd-cor-asv-fst-ocrd +OCRD_COR_ASV_FST += $(BIN)/ocrd-cor-asv-fst-process OCRD_COR_ASV_FST += $(BIN)/cor-asv-fst-train -$(call multirule,$(OCRD_COR_ASV_FST)): cor-asv-fst $(BIN)/ocrd -ifeq (0,$(MAKELEVEL)) - $(MAKE) -o $< $(notdir $(OCRD_COR_ASV_FST)) VIRTUAL_ENV=$(SUB_VENV_TF1) - $(call delegate_venv,$(OCRD_COR_ASV_FST),$(SUB_VENV_TF1)) -cor-asv-fst-check: - $(MAKE) check OCRD_MODULES=cor-asv-fst VIRTUAL_ENV=$(SUB_VENV_TF1) -else - $(pip_install_tf1nvidia) - . $(ACTIVATE_VENV) && $(MAKE) -C $< deps - $(pip_install) -endif +OCRD_IMAGES += ocrd/cor-asv-fst +$(OCRD_COR_ASV_FST): ocrd/cor-asv-fst + $(call delegate_docker,$@,$<) +ocrd/cor-asv-fst: DOCKER_PROFILES = maximum +ocrd/cor-asv-fst: ./cor-asv-fst + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_keraslm, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_KERASLM) -OCRD_KERASLM := $(BIN)/ocrd-keraslm-rate +OCRD_KERASLM := $(BIN)/ocrd-keraslm-ocrd +OCRD_KERASLM += $(BIN)/ocrd-keraslm-rate OCRD_KERASLM += $(BIN)/keraslm-rate -$(call multirule,$(OCRD_KERASLM)): ocrd_keraslm $(BIN)/ocrd -ifeq (0,$(MAKELEVEL)) - $(MAKE) -o $< $(notdir $(OCRD_KERASLM)) VIRTUAL_ENV=$(SUB_VENV_TF1) - $(call delegate_venv,$(OCRD_KERASLM),$(SUB_VENV_TF1)) -ocrd_keraslm-check: - $(MAKE) check OCRD_MODULES=ocrd_keraslm VIRTUAL_ENV=$(SUB_VENV_TF1) -else - $(pip_install_tf1nvidia) - $(pip_install) -endif +OCRD_IMAGES += ocrd/keraslm +$(OCRD_KERASLM): DOCKER_PROFILES = medium, maximum +$(OCRD_KERASLM): ocrd/keraslm + $(call delegate_docker,$@,$<) +ocrd/keraslm: ./ocrd_keraslm + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_neat, $(OCRD_MODULES)),) @@ -384,63 +314,105 @@ OCRD_NEAT += $(BIN)/annotate-tsv OCRD_NEAT += $(BIN)/page2tsv OCRD_NEAT += $(BIN)/tsv2page OCRD_NEAT += $(BIN)/make-page2tsv-commands -$(call multirule,$(OCRD_NEAT)): ocrd_neat $(BIN)/ocrd - $(pip_install) +OCRD_IMAGES += ocrd/neat +$(OCRD_NEAT): ocrd/neat + $(call delegate_docker,$@,$<) +ocrd/neat: DOCKER_PROFILES = maximum +ocrd/neat: ./ocrd_neat + $(call pullpolicy_docker,$<,$@) endif - ifneq ($(filter ocrd_wrap, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_WRAP) -OCRD_WRAP := $(BIN)/ocrd-preprocess-image +OCRD_WRAP := $(BIN)/ocrd-wrap-ocrd +OCRD_WRAP += $(BIN)/ocrd-preprocess-image OCRD_WRAP += $(BIN)/ocrd-skimage-normalize OCRD_WRAP += $(BIN)/ocrd-skimage-denoise-raw OCRD_WRAP += $(BIN)/ocrd-skimage-binarize OCRD_WRAP += $(BIN)/ocrd-skimage-denoise -$(call multirule,$(OCRD_WRAP)): ocrd_wrap $(BIN)/ocrd - $(pip_install) +OCRD_IMAGES += ocrd/wrap +$(OCRD_WRAP): ocrd/wrap + $(call delegate_docker,$@,$<) +ocrd/wrap: DOCKER_PROFILES = +ocrd/wrap: ./ocrd_wrap + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_fileformat, $(OCRD_MODULES)),) ocrd_fileformat: GIT_RECURSIVE = --recursive -OCRD_EXECUTABLES += $(BIN)/ocrd-fileformat-transform -$(BIN)/ocrd-fileformat-transform: ocrd_fileformat $(BIN)/ocrd - . $(ACTIVATE_VENV) && $(MAKE) -C $< install-fileformat install +OCRD_EXECUTABLES += $(OCRD_FILEFORMAT) +OCRD_FILEFORMAT := $(BIN)/ocrd-fileformat-ocrd +OCRD_FILEFORMAT += $(BIN)/ocrd-fileformat-transform +OCRD_FILEFORMAT += $(BIN)/ocr-transform +OCRD_FILEFORMAT += $(BIN)/ocr-validate +OCRD_IMAGES += ocrd/fileformat +$(OCRD_FILEFORMAT): ocrd/fileformat + $(call delegate_docker,$@,$<) +ocrd/fileformat: DOCKER_PROFILES = +ocrd/fileformat: ./ocrd_fileformat + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_olena, $(OCRD_MODULES)),) ocrd_olena: GIT_RECURSIVE = --recursive -deps-ubuntu-modules: ocrd_olena -OCRD_EXECUTABLES += $(BIN)/ocrd-olena-binarize -$(BIN)/ocrd-olena-binarize: ocrd_olena $(BIN)/ocrd - . $(ACTIVATE_VENV) && $(MAKE) -C $< install BUILD_DIR=$(VIRTUAL_ENV)/build/ocrd_olena +OCRD_EXECUTABLES += $(OCRD_OLENA) +OCRD_OLENA := $(BIN)/ocrd-olena-ocrd +OCRD_OLENA += $(BIN)/ocrd-olena-binarize +OCRD_OLENA += $(BIN)/scribo-cli +OCRD_IMAGES += ocrd/olena +$(OCRD_OLENA): ocrd/olena + $(call delegate_docker,$@,$<) +ocrd/olena: DOCKER_PROFILES = medium maximum +ocrd/olena: ./ocrd_olena + $(call pullpolicy_docker,$<,$@) endif -clean: clean-olena -.PHONY: clean-olena -clean-olena: - test ! -f ocrd_olena/Makefile || \ - $(MAKE) -C ocrd_olena clean-olena BUILD_DIR=$(VIRTUAL_ENV)/build/ocrd_olena ifneq ($(filter dinglehopper, $(OCRD_MODULES)),) -OCRD_EXECUTABLES += $(BIN)/ocrd-dinglehopper -$(BIN)/ocrd-dinglehopper: dinglehopper $(BIN)/ocrd - $(pip_install) +OCRD_EXECUTABLES += $(OCRD_DINGLEHOPPER) +OCRD_DINGLEHOPPER := $(BIN)/ocrd-dinglehopper-ocrd +OCRD_DINGLEHOPPER += $(BIN)/ocrd-dinglehopper +OCRD_DINGLEHOPPER += $(BIN)/dinglehopper +OCRD_DINGLEHOPPER += $(BIN)/dinglehopper-extract +OCRD_DINGLEHOPPER += $(BIN)/dinglehopper-summarize +OCRD_DINGLEHOPPER += $(BIN)/dinglehopper-line-dirs +OCRD_IMAGES += ocrd/dinglehopper +$(OCRD_DINGLEHOPPER): ocrd/dinglehopper + $(call delegate_docker,$@,$<) +ocrd/dinglehopper: DOCKER_PROFILES = medium maximum +ocrd/dinglehopper: ./dinglehopper + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter docstruct, $(OCRD_MODULES)),) -OCRD_EXECUTABLES += $(BIN)/ocrd-docstruct -$(BIN)/ocrd-docstruct: docstruct $(BIN)/ocrd - $(pip_install) +OCRD_EXECUTABLES += $(OCRD_DOCSTRUCT) +OCRD_DOCSTRUCT := $(BIN)/ocrd-docstruct-ocrd +OCRD_DOCSTRUCT += $(BIN)/ocrd-docstruct +OCRD_IMAGES += ocrd/docstruct +$(OCRD_DOCSTRUCT): ocrd/docstruct + $(call delegate_docker,$@,$<) +ocrd/docstruct: DOCKER_PROFILES = medium maximum +ocrd/docstruct: ./docstruct + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter nmalign, $(OCRD_MODULES)),) -OCRD_EXECUTABLES += $(BIN)/ocrd-nmalign-merge -$(BIN)/ocrd-nmalign-merge: nmalign $(BIN)/ocrd - $(pip_install) +OCRD_EXECUTABLES += $(OCRD_NMALIGN) +OCRD_NMALIGN := $(BIN)/ocrd-nmalign-ocrd +OCRD_NMALIGN += $(BIN)/ocrd-nmalign-merge +OCRD_NMALIGN += $(BIN)/nmalign +OCRD_IMAGES += ocrd/nmalign +$(OCRD_NMALIGN): ocrd/nmalign + $(call delegate_docker,$@,$<) +ocrd/nmalign: DOCKER_PROFILES = medium maximum +ocrd/nmalign: ./nmalign + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_segment, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_SEGMENT) -OCRD_SEGMENT := $(BIN)/ocrd-segment-evaluate +OCRD_SEGMENT := $(BIN)/ocrd-segment-ocrd +OCRD_SEGMENT += $(BIN)/ocrd-segment-evaluate +OCRD_SEGMENT += $(BIN)/page-segment-evaluate OCRD_SEGMENT += $(BIN)/ocrd-segment-from-masks OCRD_SEGMENT += $(BIN)/ocrd-segment-from-coco OCRD_SEGMENT += $(BIN)/ocrd-segment-extract-glyphs @@ -453,66 +425,66 @@ OCRD_SEGMENT += $(BIN)/ocrd-segment-replace-page OCRD_SEGMENT += $(BIN)/ocrd-segment-replace-text OCRD_SEGMENT += $(BIN)/ocrd-segment-repair OCRD_SEGMENT += $(BIN)/ocrd-segment-project -$(call multirule,$(OCRD_SEGMENT)): ocrd_segment $(BIN)/ocrd - $(pip_install) +OCRD_IMAGES += ocrd/segment +$(OCRD_SEGMENT): ocrd/segment + $(call delegate_docker,$@,$<) +ocrd/segment: DOCKER_PROFILES = medium maximum +ocrd/segment: ./ocrd_segment + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_tesserocr, $(OCRD_MODULES)),) ocrd_tesserocr: GIT_RECURSIVE = --recursive install-models: install-models-tesseract .PHONY: install-models-tesseract -install-models-tesseract: - . $(ACTIVATE_VENV) && ocrd resmgr download ocrd-tesserocr-recognize '*' +install-models-tesseract: ocrd-tesserocr-ocrd + ocrd-tesserocr-ocrd resmgr download ocrd-tesserocr-recognize '*' OCRD_EXECUTABLES += $(OCRD_TESSEROCR) -deps-ubuntu-modules: ocrd_tesserocr -OCRD_TESSEROCR := $(BIN)/ocrd-tesserocr-binarize +OCRD_TESSEROCR := $(BIN)/ocrd-tesserocr-ocrd +OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-fontshape +OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-binarize OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-crop OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-deskew OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-recognize OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-line OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-region +OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-table OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-word OCRD_TESSEROCR += $(BIN)/tesseract -TESSTRAIN_EXECUTABLES = -TESSTRAIN_EXECUTABLES += $(BIN)/ambiguous_words -TESSTRAIN_EXECUTABLES += $(BIN)/classifier_tester -TESSTRAIN_EXECUTABLES += $(BIN)/cntraining -TESSTRAIN_EXECUTABLES += $(BIN)/combine_lang_model -TESSTRAIN_EXECUTABLES += $(BIN)/combine_tessdata -TESSTRAIN_EXECUTABLES += $(BIN)/dawg2wordlist -TESSTRAIN_EXECUTABLES += $(BIN)/lstmeval -TESSTRAIN_EXECUTABLES += $(BIN)/lstmtraining -TESSTRAIN_EXECUTABLES += $(BIN)/merge_unicharsets -TESSTRAIN_EXECUTABLES += $(BIN)/mftraining -TESSTRAIN_EXECUTABLES += $(BIN)/set_unicharset_properties -TESSTRAIN_EXECUTABLES += $(BIN)/shapeclustering -TESSTRAIN_EXECUTABLES += $(BIN)/text2image -TESSTRAIN_EXECUTABLES += $(BIN)/unicharset_extractor -TESSTRAIN_EXECUTABLES += $(BIN)/wordlist2dawg -$(call multirule,$(OCRD_TESSEROCR)): ocrd_tesserocr $(BIN)/ocrd - . $(ACTIVATE_VENV) && $(MAKE) -C $< install # install-tesseract-training - -endif -clean: clean-tesseract -clean-tesseract: ocrd_tesserocr - -$(MAKE) -C $< $@ clean-assets -# (keep these rules merely for backwards compatibility) -install-tesseract: ocrd_tesserocr $(BIN)/tesseract -install-tesseract-training: ocrd_tesserocr $(TESSTRAIN_EXECUTABLES) -install-tesseract install-tesseract-training: - $(MAKE) -C $< $@ -.PHONY: clean-tesseract install-tesseract install-tesseract-training +OCRD_TESSEROCR += $(BIN)/ambiguous_words +OCRD_TESSEROCR += $(BIN)/classifier_tester +OCRD_TESSEROCR += $(BIN)/cntraining +OCRD_TESSEROCR += $(BIN)/combine_lang_model +OCRD_TESSEROCR += $(BIN)/combine_tessdata +OCRD_TESSEROCR += $(BIN)/dawg2wordlist +OCRD_TESSEROCR += $(BIN)/lstmeval +OCRD_TESSEROCR += $(BIN)/lstmtraining +OCRD_TESSEROCR += $(BIN)/merge_unicharsets +OCRD_TESSEROCR += $(BIN)/mftraining +OCRD_TESSEROCR += $(BIN)/set_unicharset_properties +OCRD_TESSEROCR += $(BIN)/shapeclustering +OCRD_TESSEROCR += $(BIN)/text2image +OCRD_TESSEROCR += $(BIN)/unicharset_extractor +OCRD_TESSEROCR += $(BIN)/wordlist2dawg +OCRD_IMAGES += ocrd/tesserocr +$(OCRD_TESSEROCR): ocrd/tesserocr + $(call delegate_docker,$@,$<) +ocrd/tesserocr: DOCKER_PROFILES = +ocrd/tesserocr: ./ocrd_tesserocr + $(call pullpolicy_docker,$<,$@) +endif ifneq ($(filter ocrd_cis, $(OCRD_MODULES)),) install-models: install-models-ocropus .PHONY: install-models-ocropus -install-models-ocropus: - . $(ACTIVATE_VENV) && ocrd resmgr download ocrd-cis-ocropy-recognize '*' +install-models-ocropus: ocrd-cis-ocrd + ocrd-cis-ocrd resmgr download ocrd-cis-ocropy-recognize '*' OCRD_EXECUTABLES += $(OCRD_CIS) -OCRD_CIS := $(BIN)/ocrd-cis-align +OCRD_CIS := $(BIN)/ocrd-cis-ocrd +OCRD_CIS += $(BIN)/ocrd-cis-align OCRD_CIS += $(BIN)/ocrd-cis-data OCRD_CIS += $(BIN)/ocrd-cis-ocropy-binarize OCRD_CIS += $(BIN)/ocrd-cis-ocropy-clip @@ -522,213 +494,216 @@ OCRD_CIS += $(BIN)/ocrd-cis-ocropy-dewarp OCRD_CIS += $(BIN)/ocrd-cis-ocropy-recognize OCRD_CIS += $(BIN)/ocrd-cis-ocropy-resegment OCRD_CIS += $(BIN)/ocrd-cis-ocropy-segment -#OCRD_CIS += $(BIN)/ocrd-cis-ocropy-train +OCRD_CIS += $(BIN)/ocrd-cis-ocropy-train OCRD_CIS += $(BIN)/ocrd-cis-postcorrect -$(call multirule,$(OCRD_CIS)): ocrd_cis $(BIN)/ocrd - $(pip_install) +OCRD_IMAGES += ocrd/cis +$(OCRD_CIS): ocrd/cis + $(call delegate_docker,$@,$<) +ocrd/cis: DOCKER_PROFILES = +ocrd/cis: ./ocrd_cis + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_pagetopdf, $(OCRD_MODULES)),) -deps-ubuntu-modules: ocrd_pagetopdf OCRD_EXECUTABLES += $(OCRD_PAGETOPDF) -OCRD_PAGETOPDF := $(BIN)/ocrd-pagetopdf -$(OCRD_PAGETOPDF): ocrd_pagetopdf $(BIN)/ocrd - . $(ACTIVATE_VENV) && $(MAKE) -C $< install +OCRD_PAGETOPDF := $(BIN)/ocrd-pagetopdf-ocrd +OCRD_PAGETOPDF += $(BIN)/ocrd-pagetopdf +OCRD_PAGETOPDF += $(BIN)/ocrd-altotopdf +OCRD_IMAGES += ocrd/pagetopdf +$(OCRD_PAGETOPDF): ocrd/pagetopdf + $(call delegate_docker,$@,$<) +ocrd/pagetopdf: DOCKER_PROFILES = +ocrd/pagetopdf: ./ocrd_pagetopdf + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_calamari, $(OCRD_MODULES)),) install-models: install-models-calamari .PHONY: install-models-calamari -install-models-calamari: $(BIN)/ocrd - . $(ACTIVATE_VENV) && ocrd resmgr download ocrd-calamari-recognize '*' +install-models-calamari: ocrd-calamari-ocrd + ocrd-calamari-ocrd resmgr download ocrd-calamari-recognize '*' OCRD_EXECUTABLES += $(OCRD_CALAMARI) -OCRD_CALAMARI := $(BIN)/ocrd-calamari-recognize -$(OCRD_CALAMARI): ocrd_calamari $(BIN)/ocrd - $(pip_install) +OCRD_CALAMARI := $(BIN)/ocrd-calamari-ocrd +OCRD_CALAMARI += $(BIN)/ocrd-calamari-recognize +OCRD_CALAMARI += $(BIN)/calamari-ocr +OCRD_IMAGES += ocrd/calamari +$(OCRD_CALAMARI): ocrd/calamari + $(call delegate_docker,$@,$<) +ocrd/calamari: DOCKER_PROFILES = medium maximum +ocrd/calamari: ./ocrd_calamari + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_anybaseocr, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_ANYBASEOCR) -OCRD_ANYBASEOCR := $(BIN)/ocrd-anybaseocr-crop +OCRD_ANYBASEOCR := $(BIN)/ocrd-anybaseocr-ocrd +OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-crop OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-layout-analysis -$(call multirule,$(OCRD_ANYBASEOCR)): ocrd_anybaseocr $(BIN)/ocrd - $(pip_install) +OCRD_IMAGES += ocrd/anybaseocr +$(OCRD_ANYBASEOCR): ocrd/anybaseocr + $(call delegate_docker,$@,$<) +ocrd/anybaseocr: DOCKER_PROFILES = maximum +ocrd/anybaseocr: ./ocrd_anybaseocr + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_froc, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_FROC) -OCRD_FROC := $(BIN)/ocrd-froc-recognize -$(OCRD_FROC): ocrd_froc $(BIN)/ocrd - $(pip_install) +OCRD_FROC := $(BIN)/ocrd-froc-ocrd +OCRD_FROC += $(BIN)/ocrd-froc-recognize +OCRD_IMAGES += ocrd/froc +$(OCRD_FROC): ocrd/froc + $(call delegate_docker,$@,$<) +ocrd/froc: DOCKER_PROFILES = maximum +ocrd/froc: ./ocrd_froc + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_doxa, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_DOXA) -OCRD_DOXA := $(BIN)/ocrd-doxa-binarize -$(OCRD_DOXA): ocrd_doxa $(BIN)/ocrd - $(pip_install) +OCRD_DOXA := $(BIN)/ocrd-doxa-ocrd +OCRD_DOXA += $(BIN)/ocrd-doxa-binarize +OCRD_IMAGES += ocrd/doxa +$(OCRD_DOXA): ocrd/doxa + $(call delegate_docker,$@,$<) +ocrd/doxa: DOCKER_PROFILES = maximum +ocrd/doxa: ./ocrd_doxa + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter eynollah, $(OCRD_MODULES)),) install-models: install-models-eynollah .PHONY: install-models-eynollah -install-models-eynollah: - . $(ACTIVATE_VENV) && ocrd resmgr download ocrd-eynollah-segment '*' +install-models-eynollah: ocrd-eynollah-ocrd + ocrd-eynollah-ocrd resmgr download ocrd-eynollah-segment '*' OCRD_EXECUTABLES += $(EYNOLLAH_SEGMENT) -EYNOLLAH_SEGMENT := $(BIN)/ocrd-eynollah-segment -EYNOLLAH_SEGMENT := $(BIN)/ocrd-sbb-binarize -$(EYNOLLAH_SEGMENT): eynollah $(BIN)/ocrd - $(pip_install) - # solve conflict with ocrd_calamari: - . $(ACTIVATE_VENV) && $(SEMPIP) pip install "protobuf<4" +EYNOLLAH_SEGMENT := $(BIN)/ocrd-eynollah-ocrd +EYNOLLAH_SEGMENT += $(BIN)/ocrd-eynollah-segment +EYNOLLAH_SEGMENT += $(BIN)/ocrd-sbb-binarize +EYNOLLAH_SEGMENT += $(BIN)/eynollah +OCRD_IMAGES += ocrd/eynollah +$(EYNOLLAH_SEGMENT): ocrd/eynollah + $(call delegate_docker,$@,$<) +ocrd/eynollah: DOCKER_PROFILES = maximum +ocrd/eynollah: ./eynollah + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter ocrd_olahd_client, $(OCRD_MODULES)),) OCRD_EXECUTABLES += $(OCRD_OLAHD_CLIENT) -OCRD_OLAHD_CLIENT := $(BIN)/ocrd-olahd-client -$(OCRD_OLAHD_CLIENT): ocrd_olahd_client $(BIN)/ocrd - $(pip_install) +OCRD_OLAHD_CLIENT := $(BIN)/ocrd-olahd-ocrd +OCRD_OLAHD_CLIENT += $(BIN)/ocrd-olahd-client +OCRD_IMAGES += ocrd/olahd-client +$(OCRD_OLAHD_CLIENT): ocrd/olahd-client + $(call delegate_docker,$@,$<) +ocrd/olahd-client: DOCKER_PROFILES = +ocrd/olahd-client: ./ocrd_olahd_client + $(call pullpolicy_docker,$<,$@) endif ifneq ($(filter workflow-configuration, $(OCRD_MODULES)),) -deps-ubuntu-modules: workflow-configuration OCRD_EXECUTABLES += $(WORKFLOW_CONFIGURATION) -WORKFLOW_CONFIGURATION := $(BIN)/ocrd-make +WORKFLOW_CONFIGURATION := $(BIN)/ocrd-make-ocrd +WORKFLOW_CONFIGURATION += $(BIN)/ocrd-make WORKFLOW_CONFIGURATION += $(BIN)/ocrd-import WORKFLOW_CONFIGURATION += $(BIN)/ocrd-page-transform -$(BIN)/ocrd-make-check: override CHECK_HELP= -$(call multirule,$(WORKFLOW_CONFIGURATION)): workflow-configuration $(BIN)/ocrd - $(MAKE) -C $< install -endif - -define pip_install -. $(ACTIVATE_VENV) && cd $< && $(SEMPIP) pip install $(PIP_OPTIONS) . && touch -c $@ +OCRD_IMAGES += ocrd/workflow-configuration +# fixme: add others... +$(WORKFLOW_CONFIGURATION): ocrd/workflow-configuration + $(call delegate_docker,$@,$<) +ocrd/workflow-configuration: DOCKER_PROFILES = +ocrd/workflow-configuration: ./workflow-configuration + $(call pullpolicy_docker,$<,$@) +endif + +# canned recipes for executables as Docker runners: + +# create shell scripts for each executable that either +# 1) run the ocrd network client for the passed Processing Server +# (if a .env config for the network setup exists) +# 2) run the standalone CLI in the passed Docker image +# (otherwise) +define delegate_docker +. $(ACTIVATE_VENV) && python run-network/creator.py create-client --docker-run-opts "$(DOCKER_RUN_OPTS)" $(1) $(notdir $(1:%-ocrd=ocrd)) $(2) endef -# Workaround for missing prebuilt versions of TF<2 for Python==3.8 -# todo: find another solution for 3.9, 3.10 etc -# https://docs.nvidia.com/deeplearning/frameworks/tensorflow-wheel-release-notes/tf-wheel-rel.html -# Nvidia has them, but under a different name, so let's rewrite that: -# (hold at nv22.12, because newer releases require CUDA 12, which is not supported by TF2, -# and therefore not in our ocrd/core-cuda base image yet) -define pip_install_tf1nvidia = -. $(ACTIVATE_VENV) && if test $(PYTHON_VERSION) = 3.8 && ! pip show -q tensorflow-gpu; then \ - $(SEMPIP) pip install nvidia-pyindex && \ - pushd $$(mktemp -d) && \ - $(SEMPIP) pip download --no-deps "nvidia-tensorflow==1.15.5+nv22.12" && \ - for name in nvidia_tensorflow-*.whl; do name=$${name%.whl}; done && \ - $(PYTHON) -m wheel unpack $$name.whl && \ - for name in nvidia_tensorflow-*/; do name=$${name%/}; done && \ - newname=$${name/nvidia_tensorflow/tensorflow_gpu} &&\ - sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/METADATA && \ - sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/RECORD && \ - sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/tensorflow_core/tools/pip_package/setup.py && \ - pushd $$name && for path in $$name*; do mv $$path $${path/$$name/$$newname}; done && popd && \ - $(PYTHON) -m wheel pack $$name && \ - $(SEMPIP) pip install --no-cache-dir $$newname*.whl && popd && rm -fr $$OLDPWD; fi -# - preempt conflict over numpy between h5py and tensorflow -. $(ACTIVATE_VENV) && $(SEMPIP) pip install "numpy<1.24" -endef -# pattern for recursive make: -# $(executables...): module... $(BIN)/ocrd -# ifeq (0,$(MAKELEVEL)) -# $(MAKE) -o $< $(notdir $(executables...)) VIRTUAL_ENV=$(SUB_VENV_name) -# $(call delegate_venv,$(executables...),$(SUB_VENV_name)) -# else -# actual recipe... -# fi -# -- adds ocrd as dependency to ensure the venv gets created first, -# but also with -o $< to avoid updating the submodule twice); -# overrides the venv path for nested make via target-specific var - -# canned recipes after recursive make for -# modules in nested venvs: - -# echo a shell script that relays to -# the (currently active) sub-venv -# (replacing the outer by the inner -# venv directory to ensure there -# is no infinite recursion when -# the sub-venv does not have the -# executable) -# TODO: variant for relay to Docker -ifeq ($(firstword $(subst ., ,$(MAKE_VERSION))),4) -# make >=4 has file function -define delegator -#!/usr/bin/env bash -. $(2)/bin/activate && $(2)/bin/$(notdir $(1)) "$$@" -endef -# create shell scripts that relay to -# the (currently active) sub-venv -define delegate_venv -$(foreach executable,$(1),$(file >$(executable),$(call delegator,$(executable),$(2)))) -chmod +x $(1) -endef -else -# make <4 needs to echo (third recipe line must be empty!) -define delegator -@echo '#!/usr/bin/env bash' > $(1) -@echo '. $(2)/bin/activate && $(2)/bin/$(notdir $(1)) "$$@"' >> $(1) +DOCKER_PULL_POLICY ?= pull +DOCKER_VOL_MODELS ?= ocrd-models +DOCKER_RUN_OPTS ?= -v $(DOCKER_VOL_MODELS):/usr/local/share/ocrd-resources -u $$UID +ifeq ($(DOCKER_PULL_POLICY),build) +define pullpolicy_docker +$(MAKE) -C $1 docker DOCKER_TAG=$2 +mkdir -p $(dir $2) +@echo built: `date -Iseconds` > $2 +@echo tools: `$(PYTHON) -c "import json; print(json.dumps(list(json.load(open('$1/ocrd-tool.json'))['tools'])))"` >> $2 +@echo profiles: $(DOCKER_PROFILES) >> $2 endef -define delegate_venv -$(foreach executable,$(1),$(call delegator,$(executable),$(2))) -chmod +x $(1) +else +define pullpolicy_docker +docker pull $2 +mkdir -p $(dir $2) +@echo pulled: `date -Iseconds` > $2 +@echo tools: `$(PYTHON) -c "import json; print(json.dumps(list(json.load(open('$1/ocrd-tool.json'))['tools'])))"` >> $2 +@echo profiles: $(DOCKER_PROFILES) >> $2 endef endif -$(SHARE): - @mkdir -p "$@" - -# At last, add venv dependency (must not become first): -$(OCRD_EXECUTABLES) $(BIN)/wheel: | $(BIN)/pip -$(OCRD_EXECUTABLES): | $(BIN)/wheel -# Also, add core dependency (but in a non-circular way): -$(filter-out $(BIN)/ocrd,$(OCRD_EXECUTABLES)): $(BIN)/ocrd +# copy any module-provided models into the shared named-volume +# allow non-root model updates in the named-volume +define initmodels_docker +docker run --rm -v $(DOCKER_VOL_MODELS):/models $1 bash -O nullglob -c 'for executable in /usr/local/share/ocrd-resources/*; do cp -t /models -rv $$executable; done; find /models -type d -exec chmod 777 {} \;; find /models -type f -exec chmod 666 {} \;' +endef # At last, we know what all OCRD_EXECUTABLES are: # (json targets depend on OCRD_MODULES and OCRD_EXECUTABLES) -all: ocrd-all-tool.json ocrd-all-module-dir.json ocrd-all-meta.json - . $(ACTIVATE_VENV) && cp -f $^ `python -c "import ocrd; print(ocrd.__path__[0])"` - if test -d $(SUB_VENV_TF1); then . $(SUB_VENV_TF1)/bin/activate && cp -f $^ `python -c "import ocrd; print(ocrd.__path__[0])"`; fi +all: $(OCRD_EXECUTABLES) +all: ocrd-all-tool.json ocrd-all-meta.json ocrd-all-images.yaml + +images: $(OCRD_IMAGES) +#.PHONY: $(OCRD_IMAGES) show: @echo VIRTUAL_ENV = $(VIRTUAL_ENV) @echo OCRD_MODULES = $(OCRD_MODULES) + @echo OCRD_IMAGES = $(OCRD_IMAGES) @echo OCRD_EXECUTABLES = $(OCRD_EXECUTABLES:$(BIN)/%=%) show-%: ; @echo $($*) -check: $(OCRD_EXECUTABLES:%=%-check) $(OCRD_MODULES:%=%-check) - . $(ACTIVATE_VENV) && pip check -%-check: ; +.PHONY: init-vol-models clean-vol-models +init-vol-models: $(OCRD_IMAGES:%=init-vol-models/%) +init-vol-models/%: % + $(call initmodels_docker,$*) + +clean-vol-models: + docker volume rm $(DOCKER_VOL_MODELS) + +check: $(OCRD_EXECUTABLES:%=%-check) -.PHONY: testcuda test-cuda test-assets test-workflow -# ensure shapely#1598 workaround works -# ensure CUDA works for Torch and TF -testcuda test-cuda: $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && $(PYTHON) -c "from shapely.geometry import Polygon; import torch; torch.randn(10).cuda()" - . $(ACTIVATE_VENV) && $(PYTHON) -c "import torch, sys; sys.exit(0 if torch.cuda.is_available() else 1)" - . $(ACTIVATE_VENV) && $(PYTHON) -c "import tensorflow as tf, sys; sys.exit(0 if tf.test.is_gpu_available() else 1)" - . $(SUB_VENV_TF1)/bin/activate && $(PYTHON) -c "import tensorflow as tf, sys; sys.exit(0 if tf.test.is_gpu_available() else 1)" - @echo everything seems to be fine +.PHONY: test-workflow # download models and run some processors (not for result quality, only coverage) -test-workflow: test-assets core $(BIN)/ocrd $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && cd core/tests/assets/SBB0000F29300010000/data/ && bash -x $(CURDIR)/test-workflow.sh +test-workflow: core/tests/assets + cd core/tests/assets/SBB0000F29300010000/data/ && bash -x $(CURDIR)/test-workflow.sh -test-assets: +core/tests/assets: core $(MAKE) -C core assets -ocrd-all-tool.json: $(OCRD_MODULES) $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && $(PYTHON) ocrd-all-tool.py $(wildcard $(OCRD_MODULES:%=%/ocrd-tool.json)) > $@ +# concatenate executables +ocrd-all-tool.json: $(OCRD_MODULES:%=%/ocrd-tool.json) $(ACTIVATE_VENV) + . $(ACTIVATE_VENV) && python ocrd-all-tool.py $(wildcard $(OCRD_MODULES:%=%/ocrd-tool.json)) > $@ -ocrd-all-module-dir.json: ocrd-all-tool.json $(OCRD_EXECUTABLES) $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && TF_CPP_MIN_LOG_LEVEL=3 $(PYTHON) ocrd-all-module-dir.py $< > $@ +# concatenate everything but tools, and add current git revision +ocrd-all-meta.json: $(OCRD_MODULES:%=%/ocrd-tool.json) $(ACTIVATE_VENV) + . $(ACTIVATE_VENV) && python ocrd-all-meta.py $(wildcard $(OCRD_MODULES:%=%/ocrd-tool.json)) > $@ -ocrd-all-meta.json: $(OCRD_MODULES) $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && $(PYTHON) ocrd-all-meta.py $(wildcard $(OCRD_MODULES:%=%/ocrd-tool.json)) > $@ +ocrd-all-images.yaml: $(OCRD_IMAGES) + $(file > $@) + $(foreach IMAGE, $^, $(file >> $@, - $(IMAGE))) + +%/ocrd-tool.json: % .PHONY: $(OCRD_EXECUTABLES:%=%-check) $(OCRD_EXECUTABLES:%=%-check): @@ -743,8 +718,7 @@ $(OCRD_EXECUTABLES:%=%-check): .PHONY: $(OCRD_EXECUTABLES:$(BIN)/%=%) $(OCRD_EXECUTABLES:$(BIN)/%=%): %: $(BIN)/% -XDG_DATA_HOME ?= $(if $(HOME),$(HOME)/.local/share,/usr/local/share) -DEFAULT_RESLOC ?= $(XDG_DATA_HOME)/ocrd-resources +$(OCRD_EXECUTABLES): $(ACTIVATE_VENV) # do not delete intermediate targets: .SECONDARY: @@ -752,115 +726,42 @@ DEFAULT_RESLOC ?= $(XDG_DATA_HOME)/ocrd-resources # suppress all built-in suffix rules: .SUFFIXES: -# allow installing system dependencies for all modules -# (mainly intended for docker, not recommended to use directly for live systems) -# reset ownership of submodules to that of ocrd_all -# (in case deps-ubuntu has been used with sudo and some modules were updated) # install git and parallel first (which is required for the module updates) deps-ubuntu: apt-get -y install git parallel ifneq ($(PYTHON),python) -ifneq ($(suffix $(PYTHON)),) -# install specific Python version in system via PPA - apt-get install -y software-properties-common - add-apt-repository -y ppa:deadsnakes/ppa - apt-get update -endif apt-get install -y --no-install-recommends $(notdir $(PYTHON))-dev $(notdir $(PYTHON))-venv endif - $(MAKE) deps-ubuntu-modules - -deps-ubuntu-modules: - set -e; for dir in $^; do $(MAKE) -C $$dir deps-ubuntu PYTHON=$(PYTHON); done - apt-get -y install $(CUSTOM_DEPS) - -.PHONY: deps-ubuntu deps-ubuntu-modules - -# For native (non-Docker) installations, install CUDA system dependencies -deps-cuda: core $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && $(MAKE) -C $< $@ - -# For standalone use ("just get me tensorflow-gpu<2.0 into the current venv") -tf1nvidia: $(ACTIVATE_VENV) - $(pip_install_tf1nvidia) - -# post-fix workaround for clash between cuDNN of Tensorflow 2.12 (→8.6) and Pytorch 1.13 (→8.5) / 2.1 (8.7) -# (which also involves conflict around typing-extensions version) -# the latter is explicit (but unnecessary), the former is implicit (and causes "DNN library not found" crashes at runtime) -# so we have three potential options: -# 1. revert to the version required by TF after pip overruled our choice via Torch dependency -# pip3 install nvidia-cudnn-cu11==8.6.0.* -# 2. downgrade TF so there is no overt conflict -# pip3 install "tensorflow<2.12" -# 3. upgrade Torch so there is no overt conflict -# pip install "torch>=2.0" -# Since ATM we already need TF 2.12, we choose for (modified) option 3: -fix-cuda: $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && $(SEMPIP) pip install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118 -# displace CUDA 12 libs pulled via Pytorch from PyPI - if test -d $(SUB_VENV_TF1); then \ - . $(SUB_VENV_TF1)/bin/activate && pip install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118; \ - fi - -.PHONY: deps-cuda tf1nvidia fix-cuda - - -# Docker builds. -DOCKER_TAG ?= ocrd/all -DOCKER_BASE_IMAGE ?= ocrd/core:$(CORE_VERSION) - -# Several predefined selections -# (note: to arrive at smallest possible image size individually, -# these variants won't share common layers / steps / data, -# so build-time and bandwidth are n-fold) -.PHONY: dockers -dockers: docker-minimum docker-minimum-cuda docker-medium docker-medium-cuda docker-maximum docker-maximum-cuda - -# keep git repos and reference them for install -# (so components can be updated via git from the container alone) -docker-%: PIP_OPTIONS = -e - -# Minimum-size selection: use Ocropy binarization, use Tesseract from git -DOCKER_MODULES_MINI := core ocrd_cis ocrd_fileformat ocrd_olahd_client ocrd_page2alto ocrd_pagetopdf ocrd_tesserocr ocrd_wrap workflow-configuration -docker-mini%: DOCKER_MODULES := $(DOCKER_MODULES_MINI) -# Medium-size selection: add Olena binarization and Calamari, add evaluation -DOCKER_MODULES_MEDI := $(DOCKER_MODULES_MINI) cor-asv-ann dinglehopper docstruct format-converters nmalign ocrd_calamari ocrd_keraslm ocrd_olena ocrd_segment -docker-medi%: DOCKER_MODULES := $(DOCKER_MODULES_MEDI) -# Maximum-size selection: use all enabled modules -docker-maxi%: DOCKER_MODULES := $(OCRD_MODULES) - -# DOCKER_BASE_IMAGE -docker-minimum: DOCKER_BASE_IMAGE = ocrd/core:$(CORE_VERSION) -docker-medium: DOCKER_BASE_IMAGE = $(DOCKER_TAG):minimum -docker-maximum: DOCKER_BASE_IMAGE = $(DOCKER_TAG):medium -# CUDA variants -docker-minimum-cuda: DOCKER_BASE_IMAGE = ocrd/core-cuda:$(CORE_VERSION) -docker-medium-cuda: DOCKER_BASE_IMAGE = $(DOCKER_TAG):minimum-cuda -docker-maximum-cuda: DOCKER_BASE_IMAGE = $(DOCKER_TAG):medium-cuda -# explicit interdependencies -docker-medium: docker-minimum -docker-maximum: docker-medium -docker-medium-cuda: docker-minimum-cuda -docker-maximum-cuda: docker-medium-cuda - -# Build rule for all selections -# FIXME: $(DOCKER_MODULES) ref does not work at phase 1; workaround: all modules -docker-%: Dockerfile modules - docker build \ - --progress=plain \ - --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) \ - --build-arg VCS_REF=$$(git rev-parse --short HEAD) \ - --build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \ - --build-arg OCRD_MODULES="$(DOCKER_MODULES)" \ - --build-arg PIP_OPTIONS="$(PIP_OPTIONS)" \ - --build-arg PARALLEL="$(DOCKER_PARALLEL)" \ - --build-arg PYTHON="$(PYTHON)" \ - --network=host \ - -t $(DOCKER_TAG):$* . - -docker: DOCKER_MODULES ?= $(OCRD_MODULES) -docker: DOCKER_PARALLEL ?= -j1 -docker: docker-latest + +.PHONY: deps-ubuntu + +OCRD_NETWORK_CONFIG ?= run-network/ocrd-all-config.yaml + +.PHONY: network-setup network-start network-stop network-clean +network-setup: init-vol-models +network-setup: docker-compose.yml +network-setup: .env +network-setup: ocrd-processing-server-config.yaml + +docker-compose.yml: $(ACTIVATE_VENV) ocrd-all-images.yaml + . $(ACTIVATE_VENV) && python run-network/creator.py create-compose $(OCRD_NETWORK_CONFIG) $@ +.env: $(ACTIVATE_VENV) + . $(ACTIVATE_VENV) && python run-network/creator.py create-dotenv $(OCRD_NETWORK_CONFIG) $@ + @# overrides + @echo RES_VOL=$(DOCKER_VOL_MODELS) >> $@ + @echo USER_ID=`id -u` >> $@ + @echo GROUP_ID=`id -g` >> $@ +# . $(ACTIVATE_VENV) && python run-network/creator.py create-clients $(BIN) $(OCRD_NETWORK_CONFIG) +ocrd-processing-server-config.yaml: $(ACTIVATE_VENV) + . $(ACTIVATE_VENV) && python run-network/creator.py create-psconfig $(OCRD_NETWORK_CONFIG) $@ +network-start: + # docker compose up -d + . $(ACTIVATE_VENV) && python run-network/creator.py start +network-stop: + # docker compose down + . $(ACTIVATE_VENV) && python run-network/creator.py stop +network-clean: + $(RM) -r $(VIRTUAL_ENV) .env docker-compose.yml ocrd-processing-server-config.yaml # do not search for implicit rules here: Makefile: ; diff --git a/format-converters b/format-converters deleted file mode 160000 index 08948016..00000000 --- a/format-converters +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 08948016189c0105cf9442cbbeeeaf87d4fddb65 diff --git a/opencv-python b/opencv-python deleted file mode 160000 index 255564a3..00000000 --- a/opencv-python +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 255564a37c4a23275485e827abbe19031347d458 diff --git a/run-network/creator.py b/run-network/creator.py new file mode 100644 index 00000000..19637eba --- /dev/null +++ b/run-network/creator.py @@ -0,0 +1,605 @@ +import re +import subprocess +import time +from collections import Counter +from dataclasses import dataclass, field +from os import chdir, environ, chmod +from os.path import dirname +from pathlib import Path +from typing import Any, Dict, ForwardRef, List, Type + +import click +import requests +import json +import yaml + + +@click.group() +def cli() -> None: + """A simple CLI program""" + pass + + +@cli.command("create-compose") +@click.argument("config_path", type=click.Path(dir_okay=False,exists=True)) +@click.argument("out_path", type=click.Path(dir_okay=False), default="docker-compose.yml") +def create_compose_cli(config_path: str, out_path: str) -> None: + """Creates a docker-compose file""" + config: Config = Config.from_file(config_path) + create_docker_compose(config, out_path) + + +@cli.command("create-dotenv") +@click.argument("config_path", type=click.Path(dir_okay=False,exists=True)) +@click.argument("out_path", type=click.Path(dir_okay=False), default=".env") +def create_dotenv_cli(config_path: str, out_path: str) -> None: + """Creates .env for docker-compose""" + config: Config = Config.from_file(config_path) + create_dotenv(config.environment, out_path) + + +@cli.command("create-psconfig") +@click.argument("config_path", type=click.Path(dir_okay=False,exists=True)) +@click.argument("out_path", type=click.Path(dir_okay=False), default="ocrd-processing-server-config.yaml") +def create_psconfig_cli(config_path: str, out_path: str) -> None: + """Creates configuration file for ocrd network processing-server""" + config: Config = Config.from_file(config_path) + create_psconfig(config.environment, out_path) + + +@cli.command() +def start() -> None: + """Start all services via docker compose""" + command = ["docker", "compose", "up", "--wait", "--wait-timeout", "30", "-d"] + subprocess.run(command) + #wait_for_startup(f"http://localhost:{config.environment.ocrd_ps_port}") + + +@cli.command() +def stop() -> None: + """Stop all services via docker compose""" + command = ["docker", "compose", "down"] + subprocess.run(command) + + +@cli.command("create-client") +@click.option("--docker-run-opts") +@click.argument("path") +@click.argument("executable") +@click.argument("image") +def create_client(docker_run_opts: str, path: str, executable: str, image: str) -> None: + """Creates an executable script for the specified processor + + The script will either call the standalone CLI via Docker run, + or (if the network-setup has been run) the client CLI for the + Processing Server (assuming network-start has been run as well). + """ + with open(image, "r") as meta_file: + meta = yaml.safe_load(meta_file) + content = DELEGATOR_SHEBANG_TEMPLATE + if executable in meta['tools']: + content += DELEGATOR_DETECTENV_TEMPLATE + else: + content += "ps_port = None\n\n" + content += DELEGATOR_PROCESSOR_TEMPLATE.format( + processor_name=executable, docker_image=image, + docker_run_opts=docker_run_opts) + dest = Path(path) + if not dest.parent.exists(): + exit(f"target {dest} parent directory does not exist") + with open(dest, "w") as fout: + fout.write(content) + chmod(dest, 0o755) + + +@cli.command("create-workflow-client") +@click.argument("path") +def create_workflow_client(path: str) -> None: + """Creates an executable script for the 'ocrd process' functionality (workflow processing) + + After validating fileGrp dependencies for the passed workspace, + for each processor in the passed workflow, + the script will either call the respective standalone CLI via Docker run, + or (if the network-setup has been run) the client CLI for the + Processing Server (assuming network-start has been run as well). + """ + content = DELEGATOR_SHEBANG_TEMPLATE + content += DELEGATOR_DETECTENV_TEMPLATE + content += DELEGATOR_WORKFLOW_TEMPLATE + dest = Path(path) + if not dest.parent.exists(): + exit(f"target {dest} parent directory does not exist") + with open(dest, "w") as fout: + fout.write(content) + chmod(dest, 0o755) + + +# @cli.command() +# @click.argument("config_path") +# def test_config(config_path): +# """Validate the configuration file. +# +# This needs external dependency jsonschema""" +# from jsonschema import validate +# config_path = Path(config_path) +# schema_path = Path("creator_schema.yaml") +# if not config_path.exists(): +# print("config file not found") +# exit(1) +# assert schema_path.exists() +# +# with open(schema_path, "r") as fin: +# schema = yaml.safe_load(fin) +# +# with open(config_path, "r") as fin: +# instance = yaml.safe_load(fin) +# validate(instance, schema) + + +def create_docker_compose(config: Type[ForwardRef("Config")], dest: str) -> None: + """Create docker-compose file from config-object + + The parts of the docker-compose are defined in the config-object. Basically there is a template + string for all needed services. These templates are configurable and parts of it are set via + info specified in the config file + """ + with open(dest, "w") as fout: + + if config.environment.mtu: + fout.write(config.network_template) + fout.write("\n") + fout.write("services:") + ps_template = config.processing_server_template.format( + image=config.processing_server_image + ) + fout.write(ps_template) + fout.write(config.mongodb_template) + fout.write(config.rabbitmq_template) + fout.write(create_workers(config)) + fout.write(config.volumes_template) + + +def create_workers(config: Type[ForwardRef("Config")]) -> str: + """Create service definition of docker-compose for needed processors + + This function reads the processor-template and replaces placeholders with info from the + config-object + """ + res = "" + services_counter = Counter() + for p in config.processors: + service_name = p.name + services_counter[service_name] += 1 + if services_counter[service_name] > 1: + service_name = f"{service_name}{services_counter[service_name]}" + + depends_on_str = "" + for depends_on in p.depends_on: + depends_on_str += "\n" + depends_on_str += f" - {depends_on}" + + proc_str = config.proc_template.format( + service_name=service_name, + processor_name=p.name, + image=p.image, + depends_on=depends_on_str, + profiles=", ".join(p.profiles) + ) + + # add volume mounts for some containers + for vol in p.volumes: + proc_str = re.sub( + r" volumes:", + f' volumes:\n - "{vol}"', + proc_str, + ) + + for env in p.environment: + proc_str = re.sub( + r" environment:", + f" environment:\n - {env}", + proc_str, + ) + + res += proc_str + return res + + +def create_dotenv(env: Type[ForwardRef("Environment")], dest: str) -> None: + """Create .env file to configure docker-compose + + Info is read from the config-object and written to the env file + """ + lines = [ + f"OCRD_PS_MTU={env.mtu}", + f"OCRD_PS_PORT={env.ocrd_ps_port}", + f"MONGODB_USER={env.mongodb_user}", + f"MONGODB_PASS={env.mongodb_pass}", + f"MONGODB_URL={env.mongodb_url}", + f"RABBITMQ_USER={env.rabbitmq_user}", + f"RABBITMQ_PASS={env.rabbitmq_pass}", + f"RABBITMQ_URL={env.rabbitmq_url}", + f"USER_ID={env.user_id}", + f"GROUP_ID={env.group_id}", + f"DATA_DIR={env.data_dir}", + f"RES_VOL={env.res_vol}", + f"INTERNAL_CALLBACK_URL={env.internal_callback_url}", + ] + + with open(dest, "w+") as fout: + fout.write("\n".join(lines) + "\n") + + +def create_psconfig(env: Type[ForwardRef("Environment")], dest: str) -> None: + """Create configuration file for ocrd network processing-server + + Info is read from the config-object and written to the yaml file + """ + content = PROCESSING_SERVER_CONFIG_TEMPLATE.format(env=env).replace("${OCRD_PS_PORT}", str(env.ocrd_ps_port)) + with open(dest, "w") as fout: + fout.write(content) + + +def wait_for_startup(processing_server_url: str) -> None: + """Wait for completed startup of all docker-compose services + + After the startup the containers need some time to be usable. This function ensures their + availability + """ + counter = 0 + while True: + try: + response = requests.get(processing_server_url) + response.raise_for_status() + break + except requests.exceptions.ConnectionError: + time.sleep(1) + counter += 1 + if counter > 30: + raise Exception("processing-server startup failed") from None + except requests.HTTPError: + # unexpected error + exit(1) + + +NETWORK_TEMPLATE = """ +networks: + default: + driver: bridge + driver_opts: + com.docker.network.driver.mtu: ${OCRD_PS_MTU} +""" + +PROC_TEMPLATE = """ + {service_name}: + image: {image} + container_name: {service_name} + command: {processor_name} worker --database $MONGODB_URL --queue $RABBITMQ_URL + profiles: [{profiles}] + depends_on: {depends_on} + user: "${{USER_ID}}:${{GROUP_ID}}" + volumes: + - "${{DATA_DIR}}:/data" + - ocrd-resources:/usr/local/share/ocrd-resources + environment: + - OCRD_NETWORK_LOGS_ROOT_DIR=${{LOGS_DIR:-/data/logs}} + - XDG_CONFIG_HOME=/usr/local/share/ocrd-resources +""" + +PROCESSING_SERVER_TEMPLATE = """ + ocrd-processing-server: + container_name: ocrd-processing-server + image: {image} + environment: + - MONGODB_USER=${{MONGODB_USER:-admin}} + - MONGODB_PASS=${{MONGODB_PASS:-admin}} + - RABBITMQ_USER=${{RABBITMQ_USER:-admin}} + - RABBITMQ_PASS=${{RABBITMQ_PASS:-admin}} + - OCRD_NETWORK_SOCKETS_ROOT_DIR=${{SOCKETS_DIR:-/data/sockets}} + - OCRD_NETWORK_LOGS_ROOT_DIR=${{LOGS_DIR:-/data/logs}} + - XDG_CONFIG_HOME=/usr/local/share/ocrd-resources + command: ocrd network processing-server -a 0.0.0.0:8000 /data/ocrd-processing-server-config.yaml + healthcheck: + test: ["CMD", "curl", "-f", "http://0.0.0.0:8000"] + interval: 60s + timeout: 10s + start_period: 30s + retries: 2 + user: "${{USER_ID}}:${{GROUP_ID}}" + volumes: + - ocrd-resources:/usr/local/share/ocrd-resources + - "${{DATA_DIR}}:/data" + - "${{PWD}}/ocrd-all-tool.json:/build/core/src/ocrd/ocrd-all-tool.json" + - "${{PWD}}/ocrd-processing-server-config.yaml:/data/ocrd-processing-server-config.yaml" + ports: + - ${{OCRD_PS_PORT}}:8000 +""" + +PROCESSING_SERVER_CONFIG_TEMPLATE = """ +internal_callback_url: {env.internal_callback_url} +use_tcp_mets: true +process_queue: + address: ocrd-rabbitmq + port: 5672 + skip_deployment: true + credentials: + username: {env.rabbitmq_user} + password: {env.rabbitmq_pass} +database: + address: ocrd-mongodb + port: 27017 + skip_deployment: true + credentials: + username: {env.mongodb_user} + password: {env.mongodb_pass} +hosts: [] +""" + +MONGODB_TEMPLATE = """ + ocrd-mongodb: + container_name: ocrd-mongodb + image: mongo:latest + environment: + - MONGO_INITDB_ROOT_USERNAME=${MONGODB_USER:-admin} + - MONGO_INITDB_ROOT_PASSWORD=${MONGODB_PASS:-admin} + ports: + - "27018:27017" + healthcheck: + test: ["CMD", "mongosh", "--eval", "db.adminCommand('ping')"] + interval: 60s + timeout: 10s + start_period: 10s + retries: 3 +""" + +RABBITMQ_TEMPLATE = """ + ocrd-rabbitmq: + container_name: ocrd-rabbitmq + image: rabbitmq:3-management + environment: + - RABBITMQ_DEFAULT_USER=${RABBITMQ_USER:-admin} + - RABBITMQ_DEFAULT_PASS=${RABBITMQ_PASS:-admin} + ports: + - "5672:5672" + - "15672:15672" + healthcheck: + test: ["CMD", "rabbitmq-diagnostics", "check_port_connectivity"] + interval: 60s + timeout: 10s + start_period: 10s + retries: 3 +""" + +VOLUMES_TEMPLATE = """ +volumes: + ocrd-resources: + external: true + name: ${RES_VOL} +""" + + +DELEGATOR_SHEBANG_TEMPLATE = """#!/usr/bin/env python + +""" + +DELEGATOR_DETECTENV_TEMPLATE = """ + +import os +import sys +import pathlib + +# detect whether to run standalone or in ocrd_network +dotenv = pathlib.Path(".env") +if dotenv.exists(): + from dotenv import dotenv_values + dotenv = dotenv_values(dotenv) + ps_port = dotenv.get('OCRD_PS_PORT', '') + if ps_port and ps_port.isdecimal(): + pass + print("using Processing Server at localhost:%s" % ps_port, file=sys.stderr) + else: + ps_port = '' + print("no OCRD_PS_PORT found in .env - starting local container", file=sys.stderr) +else: + ps_port = '' + print("no .env found - starting local container", file=sys.stderr) + +# allow overriding detection +if policy := os.environ.get('DOCKER_RUN_POLICY', None): + if policy in ['ocrd_network', 'client'] and not ps_port: + exit("cannot apply DOCKER_RUN_POLICY=" + policy + ": needs OCRD_PS_PORT via .env") + if policy in ['local', 'standalone'] and ps_port: + ps_port = '' + print("DOCKER_RUN_POLICY overrides: starting local container", file=sys.stderr) + +""" + +DELEGATOR_PROCESSOR_TEMPLATE = """ + +import sys +import os +import pathlib +import subprocess + +if not ps_port: + # avoid re-interpreting by shell + args = ['docker', 'run', '--rm'] + args.extend('{docker_run_opts}'.split()) + args.extend(os.environ.get('DOCKER_RUN_OPTS', '').split()) + # try to be smart: bind-mount CWD as /data if not mounted yet + datapath = None + for arg in args: + if arg.endswith(':/data'): + datapath = arg[:arg.index(':')] + break + if arg.endswith(',destination=/data'): + typ, src, dst = arg.split(',') + datapath = src.replace('source=', '') + break + if not datapath: + datapath = str(pathlib.Path().absolute()) + args.extend(['-v', datapath + ':/data']) + if datapath.endswith('/'): + datapath = datapath[:-1] + args.append('{docker_image}') + args.append('{processor_name}') + # try to be smart: translate host to container data paths + args.extend([arg.replace(datapath + '/', '/data/') if arg.startswith('/') else arg + for arg in sys.argv[1:]]) + ret = subprocess.run(args) + sys.exit(ret.returncode) + +import click +from ocrd_network.cli import client_cli + +run_cli = client_cli.commands['processing'].commands['run'] + + +def callback(*args, **kwargs): + kwargs['address'] = "http://localhost:" + ps_port + kwargs['block'] = True + kwargs['print_state'] = True + return run_cli.callback("{processor_name}", *args, **kwargs) + + +params = [param for param in run_cli.params + if param.name not in [ + 'processor_name', + 'address', + 'block', + 'print_state', + ]] +cli = click.Command(name="{processor_name}", + callback=callback, + params=params, + help=run_cli.help) + +if __name__ == "__main__": + cli() +""" + + +DELEGATOR_WORKFLOW_TEMPLATE = """ + +if not ps_port: + from ocrd.cli.process import process_cli + + + if __name__ == "__main__": + process_cli() + +from ocrd_network.cli import client_cli +import click + + +run_cli = client_cli.commands['workflow'].commands['run'] + +def callback(*args, **kwargs): + kwargs['address'] = "http://localhost:" + ps_port + kwargs['block'] = True + kwargs['print_state'] = True + return run_cli.callback(*args, **kwargs) + +params = [param for param in run_cli.params + if param.name not in [ + 'address', + 'block', + 'print_state', + ]] +cli = click.Command(name="ocrd-process", callback=callback, params=params, help=run_cli.help) + +if __name__ == "__main__": + cli() +""" + + +@dataclass +class Processor: + """Configuration of an ocr-d processor""" + + name: str + image: str + volumes: List[str] = field(default_factory=list) + environment: List[str] = field(default_factory=list) + profiles: List[str] = field(default_factory=list) + depends_on: List[str] = field( + default_factory=lambda: [ + "ocrd-mongodb", + "ocrd-rabbitmq", + "ocrd-processing-server", + ] + ) + + +@dataclass +class Environment: + """Conains info for .env file""" + + ocrd_ps_port: int = 8000 + mtu: int = 0 + mongodb_user: str = "admin" + mongodb_pass: str = "admin" + mongodb_url: str = "mongodb://${MONGODB_USER}:${MONGODB_PASS}@ocrd-mongodb:27017" + rabbitmq_user: str = "admin" + rabbitmq_pass: str = "admin" + rabbitmq_url: str = "amqp://${RABBITMQ_USER}:${RABBITMQ_PASS}@ocrd-rabbitmq:5672" + user_id: int = 1000 + group_id: int = 1000 + data_dir: str = "/tmp/data" + res_vol: str = "ocrd-models" + internal_callback_url: str = "http://ocrd-processing-server:${OCRD_PS_PORT}" + + +@dataclass +class Config: + """This object determines how the docker-compose will finally look like""" + + processors: List[Processor] + environment: Environment = field(default_factory=Environment) + processing_server_image: str = "ocrd/core:latest" + processing_server_template: str = PROCESSING_SERVER_TEMPLATE + mongodb_template: str = MONGODB_TEMPLATE + rabbitmq_template: str = RABBITMQ_TEMPLATE + proc_template: str = PROC_TEMPLATE + network_template: str = NETWORK_TEMPLATE + volumes_template: str = VOLUMES_TEMPLATE + + @staticmethod + def from_file(yaml_file_path: str) -> "Config": + with open(yaml_file_path, "r") as file: + yamldict: Dict[str, Any] = yaml.safe_load(file) + + if "processors" in yamldict: + # manual config + processors = [Processor(**processor) for processor in yamldict["processors"]] + print("loaded %d processors from manual config %s" % (len(processors), yaml_file_path)) + else: + # controlled by Makefile at build time (OCRD_MODULES etc.) + with open("ocrd-all-images.yaml", "r") as file: + images = yaml.safe_load(file) + processors = [] + for image in images: + with open(image, "r") as file: + meta = yaml.safe_load(file) + processors.extend([ + Processor(name=executable, image=image, + # DOCKER_PROFILES is whitespace-separated in Makefile + profiles=(meta['profiles'] or "").split()) + for executable in meta['tools']]) + print("loaded %d processors from generated config" % len(processors)) + yamldict["processors"] = processors + + if "environment" in yamldict: + # manual config + environment = Environment(**yamldict["environment"]) + else: + # defaults + environment = Environment() + yamldict["environment"] = environment + + res = Config(**yamldict) + return res + + +if __name__ == "__main__": + cli() diff --git a/run-network/my_ocrd_logging.conf b/run-network/my_ocrd_logging.conf new file mode 100644 index 00000000..43df8a6b --- /dev/null +++ b/run-network/my_ocrd_logging.conf @@ -0,0 +1,150 @@ +# This is a template configuration file which allows customizing +# format and destination of log messages with OCR-D. +# It is meant as an example, and should be customized. +# To get into effect, you must put a copy (under the same name) +# into your CWD, HOME or /etc. These directories are searched +# in said order, and the first find wins. When no config file +# is found, the default logging configuration applies (cf. ocrd.logging.py). +# +# mandatory loggers section +# configure loggers with corresponding keys "root", "" +# each logger requires a corresponding configuration section below +# +[loggers] +keys=root,ocrd,ocrd_network,ocrd_tensorflow,ocrd_shapely_geos,ocrd_PIL,uvicorn,uvicorn_access,uvicorn_error,multipart + +# +# mandatory handlers section +# handle output for each logging "channel" +# i.e. console, file, smtp, syslog, http, ... +# each handler requires a corresponding configuration section below +# +[handlers] +keys=consoleHandler,fileHandler,processingServerHandler + +# +# optional custom formatters section +# format message fields, to be used differently by logging handlers +# each formatter requires a corresponding formatter section below +# +[formatters] +keys=defaultFormatter,detailedFormatter + +# +# default logger "root" using consoleHandler +# +[logger_root] +level=INFO +handlers=consoleHandler,fileHandler + + +# +# additional logger configurations can be added +# as separate configuration sections like below +# +# example logger "ocrd_workspace" uses fileHandler and overrides +# default log level "INFO" with custom level "DEBUG" +# "qualname" must match the logger label used in the corresponding +# ocrd module +# see in the module-of-interest (moi) +# +#[logger_ocrd_workspace] +#level=DEBUG +#handlers=fileHandler +#qualname=ocrd.workspace + +# ocrd loggers +[logger_ocrd] +level=INFO +handlers=consoleHandler,fileHandler +qualname=ocrd +propagate=0 + +[logger_ocrd_network] +level=INFO +handlers=consoleHandler,processingServerHandler +qualname=ocrd_network +propagate=0 + +# +# logger tensorflow +# +[logger_ocrd_tensorflow] +level=ERROR +handlers=consoleHandler +qualname=tensorflow + +# +# logger shapely.geos +# +[logger_ocrd_shapely_geos] +level=ERROR +handlers=consoleHandler +qualname=shapely.geos + + +# +# logger PIL +# +[logger_ocrd_PIL] +level=INFO +handlers=consoleHandler +qualname=PIL + +# +# uvicorn loggers +# +[logger_uvicorn] +level=INFO +handlers=consoleHandler +qualname=uvicorn +[logger_uvicorn_access] +level=DEBUG +handlers=consoleHandler +qualname=uvicorn.access +[logger_uvicorn_error] +level=DEBUG +handlers=consoleHandler +qualname=uvicorn.error +[logger_multipart] +level=INFO +handlers=consoleHandler +qualname=multipart + + + +# +# handle stderr output +# +[handler_consoleHandler] +class=StreamHandler +formatter=defaultFormatter +args=(sys.stderr,) + +# +# example logfile handler +# handle output with logfile +# +[handler_fileHandler] +class=FileHandler +formatter=defaultFormatter +args=('ocrd.log','a+') + +[handler_processingServerHandler] +class=FileHandler +formatter=defaultFormatter +args=('/tmp/ocrd_processing_server_newer.log','a+') + +# +# default log format conforming to OCR-D (https://ocr-d.de/en/spec/cli#logging) +# +[formatter_defaultFormatter] +format=%(asctime)s.%(msecs)03d %(levelname)s %(name)s - %(message)s +datefmt=%H:%M:%S + +# +# store more logging context information +# +[formatter_detailedFormatter] +format=%(asctime)s.%(msecs)03d %(levelname)-8s (%(name)s)[%(filename)s:%(lineno)d] - %(message)s +datefmt=%H:%M:%S diff --git a/run-network/ocrd-all-config.yaml b/run-network/ocrd-all-config.yaml new file mode 100644 index 00000000..d44a3ed2 --- /dev/null +++ b/run-network/ocrd-all-config.yaml @@ -0,0 +1,4 @@ +environment: + # this folder contains the workspaces and must be created by the user + data_dir: $PWD + mtu: 1300 diff --git a/run-network/ocrd-all-tool.json b/run-network/ocrd-all-tool.json new file mode 100644 index 00000000..9ba3bf22 --- /dev/null +++ b/run-network/ocrd-all-tool.json @@ -0,0 +1,4761 @@ +{ + "ocrd-cor-asv-ann-process": { + "executable": "ocrd-cor-asv-ann-process", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/post-correction" + ], + "description": "Improve text annotation by character-level encoder-attention-decoder ANN model", + "input_file_grp": [ + "OCR-D-OCR-TESS", + "OCR-D-OCR-KRAK", + "OCR-D-OCR-OCRO", + "OCR-D-OCR-CALA", + "OCR-D-OCR-ANY" + ], + "output_file_grp": [ + "OCR-D-COR-ASV" + ], + "parameters": { + "model_file": { + "type": "string", + "format": "uri", + "content-type": "application/x-hdf;subtype=bag", + "description": "path of h5py weight/config file for model trained with cor-asv-ann-train", + "required": true, + "cacheable": true + }, + "textequiv_level": { + "type": "string", + "enum": [ + "line", + "word", + "glyph" + ], + "default": "glyph", + "description": "PAGE XML hierarchy level to read/write TextEquiv input/output on" + }, + "charmap": { + "type": "object", + "default": {}, + "description": "mapping for input characters before passing to correction; can be used to adapt to character set mismatch between input and model (without relying on underspecification alone)" + }, + "rejection_threshold": { + "type": "number", + "format": "float", + "default": 0.5, + "description": "minimum probability of the candidate corresponding to the input character in each hypothesis during beam search, helps balance precision/recall trade-off; set to 0 to disable rejection (max recall) or 1 to disable correction (max precision)" + }, + "relative_beam_width": { + "type": "number", + "format": "float", + "default": 0.2, + "description": "minimum fraction of the best candidate's probability required to enter the beam in each hypothesis; controls the quality/performance trade-off" + }, + "fixed_beam_width": { + "type": "number", + "format": "integer", + "default": 15, + "description": "maximum number of candidates allowed to enter the beam in each hypothesis; controls the quality/performance trade-off" + }, + "fast_mode": { + "type": "boolean", + "default": false, + "description": "decode greedy instead of beamed, with batches of parallel lines instead of parallel alternatives; also disables rejection and beam parameters; enable if performance is far more important than quality" + } + } + }, + "ocrd-cor-asv-ann-evaluate": { + "executable": "ocrd-cor-asv-ann-evaluate", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/evaluation" + ], + "description": "Align different textline annotations and compute distance", + "input_file_grp": [ + "OCR-D-GT-SEG-LINE", + "OCR-D-OCR-TESS", + "OCR-D-OCR-KRAK", + "OCR-D-OCR-OCRO", + "OCR-D-OCR-CALA", + "OCR-D-OCR-ANY", + "OCR-D-COR-ASV" + ], + "output_file_grp": [ + "OCR-D-EVAL-CER" + ], + "parameters": { + "match_on": { + "type": "string", + "enum": [ + "index", + "id", + "coords", + "baseline" + ], + "default": "id", + "description": "Attribute to differentiate input annotations by: either `TextEquiv/@index` of the same TextLine and input file, or `TextLine/@id` (or `./Coords/@points` or `./Baseline/@points`) of input files across input fileGrps." + }, + "metric": { + "type": "string", + "enum": [ + "Levenshtein-fast", + "Levenshtein", + "NFC", + "NFKC", + "historic_latin" + ], + "default": "Levenshtein-fast", + "description": "Distance metric to calculate and aggregate: `historic_latin` for GT level 1-3, `NFKC` for roughly GT level 2 (but including reduction of `\u017f/s` and superscript numerals etc), `Levenshtein` for GT level 3 (or `Levenshtein-fast` for faster alignment - but using maximum sequence length instead of path length as CER denominator, and without confusion statistics)." + }, + "gt_level": { + "type": "number", + "enum": [ + 1, + 2, + 3 + ], + "default": 1, + "description": "When `metric=historic_latin`, normalize and equate at this GT transcription level." + }, + "confusion": { + "type": "number", + "format": "integer", + "minimum": 0, + "default": 0, + "description": "Count edits and show that number of most frequent confusions (non-identity) in the end." + }, + "histogram": { + "type": "boolean", + "default": false, + "description": "Aggregate and show mutual character histograms." + } + } + }, + "ocrd-cor-asv-ann-align": { + "executable": "ocrd-cor-asv-ann-align", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/post-correction" + ], + "description": "Align different textline annotations and pick best", + "input_file_grp": [ + "OCR-D-GT-SEG-LINE", + "OCR-D-OCR-TESS", + "OCR-D-OCR-KRAK", + "OCR-D-OCR-OCRO", + "OCR-D-OCR-CALA", + "OCR-D-OCR-ANY", + "OCR-D-COR-ASV" + ], + "output_file_grp": [ + "OCR-D-OCR-MULTI" + ], + "parameters": { + "method": { + "type": "string", + "enum": [ + "majority", + "confidence", + "combined" + ], + "default": "majority", + "description": "decide by majority of OCR hypotheses, by highest confidence of OCRs or by a combination thereof" + } + } + }, + "ocrd-cor-asv-ann-join": { + "executable": "ocrd-cor-asv-ann-join", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/post-correction" + ], + "description": "Join different textline annotations by concatenation", + "input_file_grp": [ + "OCR-D-GT-SEG-LINE", + "OCR-D-OCR-TESS", + "OCR-D-OCR-KRAK", + "OCR-D-OCR-OCRO", + "OCR-D-OCR-CALA", + "OCR-D-OCR-ANY", + "OCR-D-COR-ASV" + ], + "output_file_grp": [ + "OCR-D-OCR-MULTI" + ], + "parameters": { + "add-filegrp-comments": { + "type": "boolean", + "default": false, + "description": "set @comments of each TextEquiv to the fileGrp/@USE it came from" + }, + "add-filegrp-index": { + "type": "boolean", + "default": false, + "description": "set @index of each TextEquiv to the fileGrp index (zero based) it came from" + }, + "match-on": { + "type": "string", + "enum": [ + "id", + "coords", + "baseline" + ], + "default": "id", + "description": "information to match lines on (element @id, Coords/@points, Baseline/@points)" + } + } + }, + "ocrd-cor-asv-ann-mark": { + "executable": "ocrd-cor-asv-ann-mark", + "description": "mark words not found by a spellchecker", + "steps": [ + "recognition/post-correction" + ], + "categories": [ + "Text recognition and optimization" + ], + "parameters": { + "command": { + "type": "string", + "required": true, + "description": "external tool to query word forms, e.g. 'hunspell -i utf-8 -d de_DE,en_US -w'" + }, + "normalization": { + "type": "object", + "default": {}, + "description": "mapping of characters prior to spellcheck, e.g. {\"\u017f\": \"s\", \"a\u0364\": \"\u00e4\"}" + }, + "format": { + "type": "string", + "default": "conf", + "description": "how unknown words should be marked; if 'conf', then writes @conf=0.123, otherwise writes that value into @comments" + } + } + }, + "ocrd-dummy": { + "executable": "ocrd-dummy", + "description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group", + "steps": [ + "preprocessing/optimization" + ], + "categories": [ + "Image preprocessing" + ], + "input_file_grp": "DUMMY_INPUT", + "output_file_grp": "DUMMY_OUTPUT", + "parameters": { + "copy_files": { + "type": "boolean", + "default": false, + "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)" + } + } + }, + "ocrd-dinglehopper": { + "executable": "ocrd-dinglehopper", + "description": "Evaluate OCR text against ground truth with dinglehopper", + "input_file_grp": [ + "OCR-D-GT-PAGE", + "OCR-D-OCR" + ], + "output_file_grp": [ + "OCR-D-OCR-EVAL" + ], + "categories": [ + "Quality assurance" + ], + "steps": [ + "recognition/text-recognition" + ], + "parameters": { + "metrics": { + "type": "boolean", + "default": true, + "description": "Enable/disable metrics and green/red" + }, + "textequiv_level": { + "type": "string", + "enum": [ + "region", + "line" + ], + "default": "region", + "description": "PAGE XML hierarchy level to extract the text from" + } + } + }, + "ocrd-docstruct": { + "executable": "ocrd-docstruct", + "categories": [ + "Layout analysis" + ], + "description": "Parsing page-level text regions with headings and reading order, create a dummy logical structMap", + "steps": [ + "layout/analysis" + ], + "parameters": { + "mode": { + "type": "string", + "enum": [ + "enmap", + "dfg" + ], + "default": "dfg", + "description": "representational convention to use in the METS; either ENMAP profile (using mets:area) or DFG profile (using only mets:structLink)" + }, + "type": { + "type": "string", + "enum": [ + "chapter", + "section", + "article" + ], + "default": "article", + "description": "mets:div type to use for headings" + } + } + }, + "ocrd-eynollah-segment": { + "executable": "ocrd-eynollah-segment", + "categories": [ + "Layout analysis" + ], + "description": "Segment page into regions and lines and do reading order detection with eynollah", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-GT-SEG-PAGE" + ], + "output_file_grp": [ + "OCR-D-SEG-LINE" + ], + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line" + ], + "parameters": { + "models": { + "type": "string", + "format": "file", + "content-type": "text/directory", + "cacheable": true, + "description": "Path to directory containing models to be used (See https://qurator-data.de/eynollah)", + "required": true + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); ignored if <= 0 (with fall-back 230)", + "default": 0 + }, + "full_layout": { + "type": "boolean", + "default": true, + "description": "Try to detect all element subtypes, including drop-caps and headings" + }, + "tables": { + "type": "boolean", + "default": false, + "description": "Try to detect table regions" + }, + "curved_line": { + "type": "boolean", + "default": false, + "description": "try to return contour of textlines instead of just rectangle bounding box. Needs more processing time" + }, + "allow_scaling": { + "type": "boolean", + "default": false, + "description": "check the resolution against the number of detected columns and if needed, scale the image up or down during layout detection (heuristic to improve quality and performance)" + }, + "headers_off": { + "type": "boolean", + "default": false, + "description": "ignore the special role of headings during reading order detection" + } + }, + "resources": [ + { + "description": "models for eynollah (TensorFlow SavedModel format)", + "url": "https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz", + "name": "default", + "size": 1894627041, + "type": "archive", + "path_in_archive": "models_eynollah" + } + ] + }, + "ocrd-nmalign-merge": { + "executable": "ocrd-nmalign-merge", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/post-correction" + ], + "description": "forced alignment of lists of string by fuzzy string matching", + "parameters": { + "normalization": { + "type": "object", + "default": {}, + "additionalProperties": { + "type": "string" + }, + "description": "replacement pairs (regex patterns and regex backrefs) to be applied prior to matching (but not on the result itself)" + }, + "allow_splits": { + "type": "boolean", + "default": false, + "description": "allow line strings of the first input fileGrp to be matched by multiple line strings of the second input fileGrp (so concatenate all the latter before inserting into the former)" + } + } + }, + "ocrd-anybaseocr-binarize": { + "executable": "ocrd-anybaseocr-binarize", + "description": "Binarizes images with the algorithm from ocropy and outputs it as an AlternativeImage.", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/binarization" + ], + "input_file_grp": [ + "OCR-D-IMG" + ], + "output_file_grp": [ + "OCR-D-IMG-BIN" + ], + "parameters": { + "nocheck": { + "type": "boolean", + "default": false, + "description": "disable error checking on inputs" + }, + "show": { + "type": "boolean", + "default": false, + "description": "display final results" + }, + "raw_copy": { + "type": "boolean", + "default": false, + "description": "also copy the raw image" + }, + "gray": { + "type": "boolean", + "default": false, + "description": "force grayscale processing even if image seems binary" + }, + "bignore": { + "type": "number", + "format": "float", + "default": 0.1, + "description": "ignore this much of the border for threshold estimation" + }, + "debug": { + "type": "number", + "format": "integer", + "default": 0, + "description": "display intermediate results" + }, + "escale": { + "type": "number", + "format": "float", + "default": 1.0, + "description": "scale for estimating a mask over the text region" + }, + "hi": { + "type": "number", + "format": "float", + "default": 90, + "description": "percentile for white estimation" + }, + "lo": { + "type": "number", + "format": "float", + "default": 5, + "description": "percentile for black estimation" + }, + "perc": { + "type": "number", + "format": "float", + "default": 80, + "description": "percentage for filters" + }, + "range": { + "type": "number", + "format": "integer", + "default": 20, + "description": "range for filters" + }, + "threshold": { + "type": "number", + "format": "float", + "default": 0.5, + "description": "threshold, determines lightness" + }, + "zoom": { + "type": "number", + "format": "float", + "default": 0.5, + "description": "zoom for page background estimation, smaller=faster" + }, + "operation_level": { + "type": "string", + "enum": [ + "page", + "region", + "line" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + } + } + }, + "ocrd-anybaseocr-deskew": { + "executable": "ocrd-anybaseocr-deskew", + "description": "Deskews images with the algorithm from ocropy and outputs a deskew angle.", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/deskewing" + ], + "input_file_grp": [ + "OCR-D-IMG-BIN" + ], + "output_file_grp": [ + "OCR-D-IMG-DESKEW" + ], + "parameters": { + "escale": { + "type": "number", + "format": "float", + "default": 1.0, + "description": "scale for estimating a mask over the text region" + }, + "bignore": { + "type": "number", + "format": "float", + "default": 0.1, + "description": "ignore this much of the border for threshold estimation" + }, + "threshold": { + "type": "number", + "format": "float", + "default": 0.5, + "description": "threshold, determines lightness" + }, + "maxskew": { + "type": "number", + "format": "float", + "default": 1.0, + "description": "skew angle estimation parameters (degrees)" + }, + "skewsteps": { + "type": "number", + "format": "integer", + "default": 8, + "description": "steps for skew angle estimation (per degree)" + }, + "debug": { + "type": "number", + "format": "integer", + "default": 0, + "description": "display intermediate results" + }, + "parallel": { + "type": "number", + "format": "integer", + "default": 0, + "description": "???" + }, + "lo": { + "type": "number", + "format": "integer", + "default": 5, + "description": "percentile for black estimation" + }, + "hi": { + "type": "number", + "format": "integer", + "default": 90, + "description": "percentile for white estimation" + }, + "operation_level": { + "type": "string", + "enum": [ + "page", + "region", + "line" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + } + } + }, + "ocrd-anybaseocr-crop": { + "executable": "ocrd-anybaseocr-crop", + "description": "Detect the input images' page frame, annotate it as border polygon and add a cropped derived image.", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/cropping" + ], + "input_file_grp": [ + "OCR-D-IMG-DESKEW" + ], + "output_file_grp": [ + "OCR-D-IMG-CROP" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "default": 0, + "description": "pixel density in dots per inch (used to zoom/scale during processing; overrides any meta-data in the images); disabled when zero or negative" + }, + "rulerRatioMax": { + "type": "number", + "format": "float", + "default": 50.0, + "description": "ruler detection and suppression: maximum aspect ratio of bbox" + }, + "rulerRatioMin": { + "type": "number", + "format": "float", + "default": 3.0, + "description": "ruler detection and suppression: minimum aspect ratio of bbox" + }, + "rulerAreaMax": { + "type": "number", + "format": "float", + "default": 0.3, + "description": "ruler detection and suppression: maximum area of bbox (as ratio of total image pixels)" + }, + "rulerAreaMin": { + "type": "number", + "format": "float", + "default": 0.01, + "description": "ruler detection and suppression: minimum area of bbox (as ratio of total image pixels)" + }, + "rulerWidthMax": { + "type": "number", + "format": "float", + "default": 0.95, + "description": "ruler detection and suppression: maximum width of bbox (as ratio of total image width)" + }, + "columnAreaMin": { + "type": "number", + "format": "float", + "default": 0.05, + "description": "text block detection: minimum area of individual columns (as ratio of total image pixels)" + }, + "columnSepWidthMax": { + "type": "number", + "format": "float", + "default": 0.04, + "description": "text block detection: maximum width between individual columns (as ratio of total image width)" + }, + "marginTop": { + "type": "number", + "format": "float", + "default": 0.25, + "description": "ruler / edge / text detection: maximum y position to crop from above (as ratio of total image height)" + }, + "marginBottom": { + "type": "number", + "format": "float", + "default": 0.75, + "description": "ruler / edge / text detection: minimum y position to crop from below (as ratio of total image height)" + }, + "marginLeft": { + "type": "number", + "format": "float", + "default": 0.3, + "description": "ruler / edge / text detection: maximum x position to crop from left (as ratio of total image width)" + }, + "marginRight": { + "type": "number", + "format": "float", + "default": 0.7, + "description": "ruler / edge / text detection: minimum x position to crop from right (as ratio of total image width)" + }, + "padding": { + "type": "number", + "format": "integer", + "default": 10, + "description": "extend / shrink border resulting from edge detection / text detection by this many px in each direction" + } + } + }, + "ocrd-anybaseocr-dewarp": { + "executable": "ocrd-anybaseocr-dewarp", + "description": "Dewarps the input image with anyBaseOCR and outputs it as an AlternativeImage", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/dewarping" + ], + "input_file_grp": [ + "OCR-D-IMG-CROP" + ], + "output_file_grp": [ + "OCR-D-IMG-DEWARP" + ], + "parameters": { + "resize_mode": { + "type": "string", + "enum": [ + "resize_and_crop", + "crop", + "scale_width", + "scale_width_and_crop", + "none" + ], + "default": "resize_and_crop", + "description": "transformation to apply to the original image before input to the network" + }, + "resize_height": { + "type": "number", + "format": "integer", + "default": 1024, + "description": "target image height before input to the network" + }, + "resize_width": { + "type": "number", + "format": "integer", + "default": 1024, + "description": "target image width before input to the network" + }, + "model_path": { + "type": "string", + "format": "uri", + "default": "latest_net_G.pth", + "description": "Path to the trained pix2pixHD model", + "cacheable": true, + "content-type": "application/vnd.pytorch" + }, + "gpu_id": { + "type": "number", + "format": "integer", + "default": -1, + "description": "CUDA device ID of GPU to use, or -1 for CPU only" + }, + "operation_level": { + "type": "string", + "enum": [ + "page", + "region" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on (should match what model was trained on!)" + } + }, + "resources": [ + { + "url": "https://s3.gwdg.de/ocr-d/models/dfki/dewarping/latest_net_G.pth", + "name": "latest_net_G.pth", + "description": "dewarping model for anybaseocr", + "size": 805292230 + } + ] + }, + "ocrd-anybaseocr-tiseg": { + "executable": "ocrd-anybaseocr-tiseg", + "input_file_grp": [ + "OCR-D-IMG-CROP" + ], + "output_file_grp": [ + "OCR-D-SEG-TISEG" + ], + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/text-nontext" + ], + "description": "Separates the text and non-text elements with anyBaseOCR. Outputs clipped versions of the input image as AlternativeImage containing either only text or non-text elements.", + "parameters": { + "use_deeplr": { + "type": "boolean", + "default": true, + "description": "Whether to use deep learning model (UNet pixel classifier) instead of rule-based implementation (multi-resolution morphology)." + }, + "seg_weights": { + "type": "string", + "format": "uri", + "content-type": "text/directory", + "cacheable": true, + "default": "seg_model", + "description": "Directory path to deep learning model when use_deeplr is true." + } + }, + "resources": [ + { + "url": "https://s3.gwdg.de/ocr-d/models/seg_model.tar.gz", + "name": "seg_model", + "description": "text image segmentation model for anybaseocr", + "type": "archive", + "path_in_archive": "seg_model", + "size": 61388872 + } + ] + }, + "ocrd-anybaseocr-textline": { + "executable": "ocrd-anybaseocr-textline", + "input_file_grp": [ + "OCR-D-SEG-TISEG" + ], + "output_file_grp": [ + "OCR-D-SEG-LINE-ANY" + ], + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/line" + ], + "description": "Finds region polygons for each text line in the input image.", + "parameters": { + "minscale": { + "type": "number", + "format": "float", + "default": 12.0, + "description": "minimum scale permitted" + }, + "maxlines": { + "type": "number", + "format": "float", + "default": 300, + "description": "non-standard scaling of horizontal parameters" + }, + "scale": { + "type": "number", + "format": "float", + "default": 0.0, + "description": "the basic scale of the document (roughly, xheight) 0=automatic" + }, + "hscale": { + "type": "number", + "format": "float", + "default": 1.0, + "description": "non-standard scaling of horizontal parameters" + }, + "vscale": { + "type": "number", + "format": "float", + "default": 1.7, + "description": "non-standard scaling of vertical parameters" + }, + "threshold": { + "type": "number", + "format": "float", + "default": 0.2, + "description": "baseline threshold" + }, + "noise": { + "type": "number", + "format": "integer", + "default": 8, + "description": "noise threshold for removing small components from lines" + }, + "usegauss": { + "type": "boolean", + "default": false, + "description": "use gaussian instead of uniform" + }, + "maxseps": { + "type": "number", + "format": "integer", + "default": 2, + "description": "maximum black column separators" + }, + "sepwiden": { + "type": "number", + "format": "integer", + "default": 10, + "description": "widen black separators (to account for warping)" + }, + "blackseps": { + "type": "boolean", + "default": false, + "description": "also check for black column separators" + }, + "maxcolseps": { + "type": "number", + "format": "integer", + "default": 2, + "description": "maximum # whitespace column separators" + }, + "csminaspect": { + "type": "number", + "format": "float", + "default": 1.1, + "description": "minimum aspect ratio for column separators" + }, + "csminheight": { + "type": "number", + "format": "float", + "default": 6.5, + "description": "minimum column height (units=scale)" + }, + "pad": { + "type": "number", + "format": "integer", + "default": 3, + "description": "padding for extracted lines" + }, + "expand": { + "type": "number", + "format": "integer", + "default": 3, + "description": "expand mask for grayscale extraction" + }, + "parallel": { + "type": "number", + "format": "integer", + "default": 0, + "description": "number of CPUs to use" + }, + "libpath": { + "type": "string", + "default": ".", + "description": "Library Path for C Executables" + }, + "operation_level": { + "type": "string", + "enum": [ + "page", + "region" + ], + "default": "region", + "description": "PAGE XML hierarchy level to operate on" + }, + "overwrite": { + "type": "boolean", + "default": false, + "description": "check whether to overwrite existing text lines" + } + } + }, + "ocrd-anybaseocr-layout-analysis": { + "executable": "ocrd-anybaseocr-layout-analysis", + "input_file_grp": [ + "OCR-D-IMG-CROP" + ], + "output_file_grp": [ + "OCR-D-SEG-LAYOUT" + ], + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/analysis" + ], + "description": "Generates a table-of-content like document structure of the whole document.", + "parameters": { + "batch_size": { + "type": "number", + "format": "integer", + "default": 4, + "description": "Batch size for generating test images" + }, + "model_path": { + "type": "string", + "format": "uri", + "content-type": "text/directory", + "cacheable": true, + "default": "structure_analysis", + "description": "Directory path to layout structure classification model" + }, + "class_mapping_path": { + "type": "string", + "format": "uri", + "content-type": "application/python-pickle", + "cacheable": true, + "default": "mapping_densenet.pickle", + "description": "File path to layout structure classes" + } + }, + "resources": [ + { + "url": "https://s3.gwdg.de/ocr-d/models/structure_analysis.tar.gz", + "name": "structure_analysis", + "description": "structure analysis model for anybaseocr", + "type": "archive", + "path_in_archive": "structure_analysis", + "size": 29002514 + }, + { + "url": "https://s3.gwdg.de/ocr-d/models/dfki/layoutAnalysis/mapping_densenet.pickle", + "name": "mapping_densenet.pickle", + "description": "mapping model for anybaseocr", + "size": 374 + } + ] + }, + "ocrd-anybaseocr-block-segmentation": { + "executable": "ocrd-anybaseocr-block-segmentation", + "input_file_grp": [ + "OCR-D-IMG" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK" + ], + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/region" + ], + "description": "Segments and classifies regions in each single page and annotates the the region polygons and classes.", + "parameters": { + "block_segmentation_weights": { + "type": "string", + "format": "uri", + "content-type": "application/x-hdf;subtype=bag", + "cacheable": true, + "default": "block_segmentation_weights.h5", + "description": "Path to model weights" + }, + "overwrite": { + "type": "boolean", + "default": false, + "description": "whether to delete existing text lines prior to segmentation" + }, + "th": { + "type": "number", + "format": "integer", + "default": 15, + "description": "num of pixels to include in the area region (when applying text/non-text mask from tiseg)" + }, + "active_classes": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "page-number", + "paragraph", + "catch-word", + "heading", + "drop-capital", + "signature-mark", + "header", + "marginalia", + "footnote", + "footnote-continued", + "caption", + "endnote", + "footer", + "keynote", + "image", + "table", + "graphics" + ] + }, + "default": [ + "page-number", + "paragraph", + "catch-word", + "heading", + "drop-capital", + "signature-mark", + "marginalia", + "caption" + ], + "description": "Restrict types of regions to be detected." + }, + "post_process": { + "type": "boolean", + "default": true, + "description": "whether to apply non-maximum suppression (across classes) on the detections" + }, + "use_masks": { + "type": "boolean", + "default": true, + "description": "whether to segment from the mask as polygon instead of just the bbox" + }, + "min_confidence": { + "type": "number", + "format": "float", + "default": 0.9, + "description": "Confidence threshold for region detections" + }, + "min_share_drop": { + "type": "number", + "format": "float", + "default": 0.9, + "description": "Minimum required overlap (intersection over single) of mask-derived contour area between neighbours to suppress smaller prediction" + }, + "min_share_merge": { + "type": "number", + "format": "float", + "default": 0.8, + "description": "Minimum required overlap (intersection over single) of mask-derived contour area between neighbours to merge smaller prediction" + }, + "min_iou_drop": { + "type": "number", + "format": "float", + "default": 0.8, + "description": "Minimum required overlap (intersection over union) of mask-derived contour area between neighbours to suppress prediction scoring worse" + }, + "min_iou_merge": { + "type": "number", + "format": "float", + "default": 0.2, + "description": "Minimum required overlap (intersection over union) of mask-derived contour area between neighbours to merge prediction scoring worse" + } + }, + "resources": [ + { + "url": "https://s3.gwdg.de/ocr-d/models/dfki/segmentation/block_segmentation_weights.h5", + "name": "block_segmentation_weights.h5", + "description": "block segmentation model for anybaseocr", + "size": 256139800 + } + ] + }, + "ocrd-calamari-recognize": { + "executable": "ocrd-calamari-recognize", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/text-recognition" + ], + "description": "Recognize lines with Calamari", + "input_file_grp": [ + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-OCR-CALAMARI" + ], + "parameters": { + "checkpoint_dir": { + "description": "The directory containing calamari model files (*.ckpt.json). Uses all checkpoints in that directory", + "type": "string", + "format": "uri", + "content-type": "text/directory", + "cacheable": true, + "default": "qurator-gt4histocr-1.0" + }, + "voter": { + "description": "The voting algorithm to use", + "type": "string", + "default": "confidence_voter_default_ctc" + }, + "textequiv_level": { + "type": "string", + "enum": [ + "line", + "word", + "glyph" + ], + "default": "line", + "description": "Deepest PAGE XML hierarchy level to include TextEquiv results for" + }, + "glyph_conf_cutoff": { + "type": "number", + "format": "float", + "default": 0.001, + "description": "Only include glyph alternatives with confidences above this threshold" + } + }, + "resources": [ + { + "url": "https://qurator-data.de/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/model.tar.xz", + "type": "archive", + "name": "qurator-gt4histocr-1.0", + "description": "Calamari model trained with GT4HistOCR", + "size": 90275264, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.1-pre1/c1_fraktur19-1.tar.gz", + "type": "archive", + "name": "zpd-fraktur19", + "description": "Model trained on 19th century german fraktur", + "path_in_archive": "c1_fraktur19-1", + "size": 86009886, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.1-pre1/c1_latin-script-hist-3.tar.gz", + "type": "archive", + "name": "zpd-latin-script-hist-3", + "path_in_archive": "c1_latin-script-hist-3", + "description": "Model trained on historical latin-script texts", + "size": 88416863, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/antiqua_historical.zip", + "type": "archive", + "name": "antiqua_historical", + "path_in_archive": "antiqua_historical", + "description": "Antiqua parts of GT4HistOCR from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale, NFC)", + "size": 89615540, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/antiqua_historical_ligs.zip", + "type": "archive", + "name": "antiqua_historical_ligs", + "path_in_archive": "antiqua_historical_ligs", + "description": "Antiqua parts of GT4HistOCR with enriched ligatures from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale, NFC)", + "size": 87540762, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/fraktur_19th_century.zip", + "type": "archive", + "name": "fraktur_19th_century", + "path_in_archive": "fraktur_19th_century", + "description": "Fraktur 19th century parts of GT4HistOCR mixed with Fraktur data from Archiscribe and jze from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale and nlbin, NFC)", + "size": 83895140, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/fraktur_historical.zip", + "type": "archive", + "name": "fraktur_historical", + "path_in_archive": "fraktur_historical", + "description": "Fraktur parts of GT4HistOCR from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale, NFC)", + "size": 87807639, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/fraktur_historical_ligs.zip", + "type": "archive", + "name": "fraktur_historical_ligs", + "path_in_archive": "fraktur_historical_ligs", + "description": "Fraktur parts of GT4HistOCR with enriched ligatures from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale, NFC)", + "size": 88039551, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/gt4histocr.zip", + "type": "archive", + "name": "gt4histocr", + "path_in_archive": "gt4histocr", + "description": "GT4HistOCR from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale, NFC)", + "size": 90107851, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/historical_french.zip", + "type": "archive", + "name": "historical_french", + "path_in_archive": "historical_french", + "description": "17-19th century French prints from Calamari-OCR/calamari_models (5-fold ensemble, nlbin, NFC)", + "size": 87335250, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/idiotikon.zip", + "type": "archive", + "name": "idiotikon", + "path_in_archive": "idiotikon", + "description": "Antiqua UW3 finetuned on Antiqua Idiotikon dictionary with many diacritics from Calamari-OCR/calamari_models (5-fold ensemble, nlbin, NFD)", + "size": 100807764, + "version_range": ">= 1.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/uw3-modern-english.zip", + "type": "archive", + "name": "uw3-modern-english", + "path_in_archive": "uw3-modern-english", + "description": "Antiqua UW3 corpus from Calamari-OCR/calamari_models (5-fold ensemble, nlbin, NFC)", + "size": 85413520, + "version_range": ">= 1.0.0" + } + ] + }, + "ocrd-cis-ocropy-binarize": { + "executable": "ocrd-cis-ocropy-binarize", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/binarization", + "preprocessing/optimization/grayscale_normalization", + "preprocessing/optimization/deskewing" + ], + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-IMG-BIN", + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "description": "Binarize (and optionally deskew/despeckle) pages / regions / lines with ocropy", + "parameters": { + "method": { + "type": "string", + "enum": [ + "none", + "global", + "otsu", + "gauss-otsu", + "ocropy" + ], + "description": "binarization method to use (only 'ocropy' will include deskewing and denoising)", + "default": "ocropy" + }, + "threshold": { + "type": "number", + "format": "float", + "description": "for the 'ocropy' and ' global' method, black/white threshold to apply on the whitelevel normalized image (the larger the more/heavier foreground)", + "default": 0.5 + }, + "grayscale": { + "type": "boolean", + "description": "for the 'ocropy' method, produce grayscale-normalized instead of thresholded image", + "default": false + }, + "maxskew": { + "type": "number", + "format": "float", + "description": "modulus of maximum skewing angle (in degrees) to detect (larger will be slower, 0 will deactivate deskewing)", + "default": 0.0 + }, + "noise_maxsize": { + "type": "number", + "format": "int", + "description": "maximum pixel number for connected components to regard as noise (0 will deactivate denoising)", + "default": 0 + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "default": 0 + }, + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "table", + "region", + "line" + ], + "description": "PAGE XML hierarchy level granularity to annotate images for", + "default": "page" + } + } + }, + "ocrd-cis-ocropy-deskew": { + "executable": "ocrd-cis-ocropy-deskew", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/deskewing" + ], + "input_file_grp": [ + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "description": "Deskew regions with ocropy (by annotating orientation angle and adding AlternativeImage)", + "parameters": { + "maxskew": { + "type": "number", + "description": "modulus of maximum skewing angle to detect (larger will be slower, 0 will deactivate deskewing)", + "default": 5.0 + }, + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "table", + "region" + ], + "description": "PAGE XML hierarchy level granularity to annotate images for", + "default": "region" + } + } + }, + "ocrd-cis-ocropy-denoise": { + "executable": "ocrd-cis-ocropy-denoise", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/despeckling" + ], + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-IMG-DESPECK", + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "description": "Despeckle pages / regions / lines with ocropy", + "parameters": { + "noise_maxsize": { + "type": "number", + "format": "float", + "description": "maximum size in points (pt) for connected components to regard as noise (0 will deactivate denoising)", + "default": 3.0 + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "default": 0 + }, + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line" + ], + "description": "PAGE XML hierarchy level granularity to annotate images for", + "default": "page" + } + } + }, + "ocrd-cis-ocropy-clip": { + "executable": "ocrd-cis-ocropy-clip", + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line" + ], + "input_file_grp": [ + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "description": "Clip text regions / lines at intersections with neighbours", + "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "region", + "line" + ], + "description": "PAGE XML hierarchy level granularity to annotate images for", + "default": "region" + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "default": 0 + }, + "min_fraction": { + "type": "number", + "format": "float", + "description": "share of foreground pixels that must be retained by the largest label", + "default": 0.7 + } + } + }, + "ocrd-cis-ocropy-resegment": { + "executable": "ocrd-cis-ocropy-resegment", + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/line" + ], + "input_file_grp": [ + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-SEG-LINE" + ], + "description": "Improve coordinates of text lines", + "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region" + ], + "description": "PAGE XML hierarchy level to segment textlines in ('region' abides by existing text region boundaries, 'page' optimises lines in the whole page once", + "default": "page" + }, + "method": { + "type": "string", + "enum": [ + "lineest", + "baseline", + "ccomps" + ], + "description": "source for new line polygon candidates ('lineest' for line estimation, i.e. how Ocropy would have segmented text lines; 'baseline' tries to re-polygonize from the baseline annotation; 'ccomps' avoids crossing connected components by majority rule)", + "default": "lineest" + }, + "baseline_only": { + "type": "boolean", + "description": "ignore existing textline coords completely and use baseline as input if possible", + "default": false + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "default": 0 + }, + "min_fraction": { + "type": "number", + "format": "float", + "description": "share of foreground pixels that must be retained by the output polygons", + "default": 0.75 + }, + "spread": { + "type": "number", + "format": "float", + "description": "distance in points (pt) from the foreground to project textline labels into the background for polygonal contours; if zero, project half a scale/capheight", + "default": 2.4 + }, + "extend_margins": { + "type": "number", + "format": "integer", + "description": "(ignored)", + "default": 3 + } + } + }, + "ocrd-cis-ocropy-dewarp": { + "executable": "ocrd-cis-ocropy-dewarp", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/dewarping" + ], + "description": "Dewarp line images with ocropy", + "input_file_grp": [ + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-SEG-LINE" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "default": 0 + }, + "range": { + "type": "number", + "format": "float", + "description": "maximum vertical disposition or maximum margin (will be multiplied by mean centerline deltas to yield pixels); also the mean vertical padding", + "default": 4.0 + }, + "smoothness": { + "type": "number", + "format": "float", + "description": "kernel size (relative to image height) of horizontal blur applied to foreground to find the center line; the smaller the more dynamic (0.1 would be a better default)", + "default": 1.0 + }, + "max_neighbour": { + "type": "number", + "format": "float", + "description": "maximum rate of foreground pixels intruding from neighbouring lines (line will not be processed above that)", + "default": 0.05 + } + } + }, + "ocrd-cis-ocropy-recognize": { + "executable": "ocrd-cis-ocropy-recognize", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/text-recognition" + ], + "description": "Recognize text in (binarized+deskewed+dewarped) lines with ocropy", + "input_file_grp": [ + "OCR-D-SEG-LINE", + "OCR-D-SEG-WORD", + "OCR-D-SEG-GLYPH" + ], + "output_file_grp": [ + "OCR-D-OCR-OCRO" + ], + "parameters": { + "textequiv_level": { + "type": "string", + "enum": [ + "line", + "word", + "glyph" + ], + "description": "PAGE XML hierarchy level granularity to add the TextEquiv results to", + "default": "line" + }, + "model": { + "type": "string", + "format": "uri", + "content-type": "application/gzip", + "description": "ocropy model to apply (e.g. fraktur.pyrnn.gz)" + } + }, + "resources": [ + { + "url": "https://github.com/zuphilip/ocropy-models/raw/master/en-default.pyrnn.gz", + "name": "en-default.pyrnn.gz", + "description": "Default ocropy model for English", + "size": 83826134 + }, + { + "url": "https://github.com/zuphilip/ocropy-models/raw/master/fraktur.pyrnn.gz", + "name": "fraktur.pyrnn.gz", + "description": "Default ocropy fraktur model", + "size": 43882365 + }, + { + "url": "https://github.com/jze/ocropus-model_fraktur/raw/master/fraktur.pyrnn.gz", + "name": "fraktur-jze.pyrnn.gz", + "description": "ocropy fraktur model by github.com/jze", + "size": 2961298 + }, + { + "url": "https://github.com/chreul/OCR_Testdata_EarlyPrintedBooks/raw/master/LatinHist-98000.pyrnn.gz", + "name": "LatinHist.pyrnn.gz", + "description": "ocropy historical latin model by github.com/chreul", + "size": 16989864 + } + ] + }, + "ocrd-cis-ocropy-segment": { + "executable": "ocrd-cis-ocropy-segment", + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line" + ], + "input_file_grp": [ + "OCR-D-GT-SEG-BLOCK", + "OCR-D-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-SEG-LINE" + ], + "description": "Segment pages into regions and lines, tables into cells and lines, or regions into lines with ocropy", + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative; when disabled and no meta-data is found, 300 is assumed", + "default": 0 + }, + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "table", + "region" + ], + "description": "PAGE XML hierarchy level to read images from and add elements to", + "default": "region" + }, + "maxcolseps": { + "type": "number", + "format": "integer", + "default": 20, + "description": "(when operating on the page/table level) maximum number of white/background column separators to detect, counted piece-wise" + }, + "maxseps": { + "type": "number", + "format": "integer", + "default": 20, + "description": "(when operating on the page/table level) number of black/foreground column separators to detect (and suppress), counted piece-wise" + }, + "maximages": { + "type": "number", + "format": "integer", + "default": 10, + "description": "(when operating on the page level) maximum number of black/foreground very large components to detect (and suppress), counted piece-wise" + }, + "csminheight": { + "type": "number", + "format": "integer", + "default": 4, + "description": "(when operating on the page/table level) minimum height of white/background or black/foreground column separators in multiples of scale/capheight, counted piece-wise" + }, + "hlminwidth": { + "type": "number", + "format": "integer", + "default": 10, + "description": "(when operating on the page/table level) minimum width of black/foreground horizontal separators in multiples of scale/capheight, counted piece-wise" + }, + "gap_height": { + "type": "number", + "format": "float", + "default": 0.01, + "description": "(when operating on the page/table level) largest minimum pixel average in the horizontal or vertical profiles (across the binarized image) to still be regarded as a gap during recursive X-Y cut from lines to regions; needs to be larger when more foreground noise is present, reduce to avoid mistaking text for noise" + }, + "gap_width": { + "type": "number", + "format": "float", + "default": 1.5, + "description": "(when operating on the page/table level) smallest width in multiples of scale/capheight of a valley in the horizontal or vertical profiles (across the binarized image) to still be regarded as a gap during recursive X-Y cut from lines to regions; needs to be smaller when more foreground noise is present, increase to avoid mistaking inter-line as paragraph gaps and inter-word as inter-column gaps" + }, + "overwrite_order": { + "type": "boolean", + "default": true, + "description": "(when operating on the page/table level) remove any references for existing TextRegion elements within the top (page/table) reading order; otherwise append" + }, + "overwrite_separators": { + "type": "boolean", + "default": true, + "description": "(when operating on the page/table level) remove any existing SeparatorRegion elements; otherwise append" + }, + "overwrite_regions": { + "type": "boolean", + "default": true, + "description": "(when operating on the page/table level) remove any existing TextRegion elements; otherwise append" + }, + "overwrite_lines": { + "type": "boolean", + "default": true, + "description": "(when operating on the region level) remove any existing TextLine elements; otherwise append" + }, + "spread": { + "type": "number", + "format": "float", + "default": 2.4, + "description": "distance in points (pt) from the foreground to project text line (or text region) labels into the background for polygonal contours; if zero, project half a scale/capheight" + } + } + }, + "ocrd-cis-ocropy-train": { + "executable": "ocrd-cis-ocropy-train", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/text-recognition" + ], + "input_file_grp": [ + "OCR-D-GT-SEG-BLOCK", + "OCR-D-SEG-BLOCK" + ], + "description": "train model with ground truth from mets data", + "parameters": { + "textequiv_level": { + "type": "string", + "description": "hierarchy level to extract GT pairs from", + "enum": [ + "line", + "word", + "glyph" + ], + "default": "line" + }, + "model": { + "type": "string", + "format": "uri", + "content-type": "application/gzip", + "description": "load model (e.g. 'fraktur.pyrnn.gz') to init weights, or none to train from scratch" + }, + "ntrain": { + "type": "number", + "format": "integer", + "description": "lines to train before stopping", + "default": 1000000 + }, + "outputpath": { + "type": "string", + "description": "(existing) path for the trained model" + } + } + }, + "ocrd-cis-align": { + "executable": "ocrd-cis-align", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/post-correction" + ], + "input_file_grp": [ + "OCR-D-OCR-1", + "OCR-D-OCR-2", + "OCR-D-OCR-N" + ], + "output_file_grp": [ + "OCR-D-ALIGNED" + ], + "description": "Align multiple OCRs and/or GTs" + }, + "ocrd-cis-postcorrect": { + "executable": "ocrd-cis-postcorrect", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/post-correction" + ], + "description": "Post correct OCR results", + "input_file_grp": [ + "OCR-D-LINE-ALIGNED" + ], + "output_file_grp": [ + "OCR-D-POST-CORRECTED" + ], + "parameters": { + "maxCandidates": { + "description": "Maximum number of considered correction candidates per suspicious token", + "type": "number", + "format": "integer", + "default": 10 + }, + "profilerPath": { + "description": "Path to the profiler executable", + "required": true, + "type": "string" + }, + "profilerConfig": { + "description": "Path to the profiler's language config file", + "required": true, + "type": "string" + }, + "model": { + "description": "Path to the post correction model file", + "type": "string", + "required": true + }, + "nOCR": { + "description": "Number of parallel OCR's to use for the post correction", + "type": "number", + "format": "integer", + "default": 1 + }, + "runLE": { + "description": "Do run the lexicon extension step for the post correction", + "type": "boolean", + "default": false + } + } + }, + "ocrd-detectron2-segment": { + "executable": "ocrd-detectron2-segment", + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/region" + ], + "description": "Detect regions with Detectron2 models", + "input_file_grp": [ + "OCR-D-IMG" + ], + "output_file_grp": [ + "OCR-D-SEG-REGION" + ], + "parameters": { + "operation_level": { + "type": "string", + "enum": [ + "page", + "table" + ], + "default": "page", + "description": "hierarchy level which to predict and assign regions for" + }, + "categories": { + "type": "array", + "required": true, + "description": "maps each region category (position) of the model to a PAGE region type (and @type or @custom if separated by colon), e.g. ['TextRegion:paragraph', 'TextRegion:heading', 'TextRegion:floating', 'TableRegion', 'ImageRegion'] for PubLayNet; categories with an empty string will be skipped during prediction" + }, + "model_config": { + "type": "string", + "format": "uri", + "content-type": "text/yaml", + "required": true, + "description": "path name of model config" + }, + "model_weights": { + "type": "string", + "format": "uri", + "content-type": "application/octet-stream", + "required": true, + "description": "path name of model weights" + }, + "min_confidence": { + "type": "number", + "format": "float", + "default": 0.5, + "description": "confidence threshold for detections" + }, + "postprocessing": { + "type": "string", + "enum": [ + "full", + "only-nms", + "only-morph", + "none" + ], + "default": "full", + "description": "which postprocessing steps to enable: by default, applies a custom non-maximum suppression (to avoid overlaps) and morphological operations (using connected component analysis on the binarized input image to shrink or expand regions)" + }, + "debug_img": { + "type": "string", + "enum": [ + "none", + "instance_colors", + "instance_colors_only", + "category_colors" + ], + "default": "none", + "description": "paint an AlternativeImage which blends the input image and all raw decoded region candidates" + }, + "device": { + "type": "string", + "default": "cuda", + "description": "select computing device for Torch (e.g. cpu or cuda:0); will fall back to CPU if no GPU is available" + } + }, + "resources": [ + { + "description": "TableBank via LayoutLM X152-FPN config", + "name": "TableBank_X152.yaml", + "size": 536, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/TableBank_X152.yaml" + }, + { + "description": "TableBank via LayoutLM X152-FPN weights", + "name": "TableBank_X152.pth", + "size": 1103832675, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/TableBank_X152.pth" + }, + { + "description": "TableBank via Psarpei X152-FPN config", + "name": "TableBank_X152_Psarpei.yaml", + "size": 534, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/TableBank_X152_Psarpei.yaml" + }, + { + "description": "TableBank via Psarpei X152-FPN weights", + "name": "TableBank_X152_Psarpei.pth", + "size": 1103832675, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/TableBank_X152_Psarpei.pth" + }, + { + "description": "PubLayNet via hpanwar08 R50-FPN config", + "name": "PubLayNet_R_50_FPN_3x.yaml", + "size": 388, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_50_FPN_3x.yaml" + }, + { + "description": "PubLayNet via hpanwar08 R50-FPN weights", + "name": "PubLayNet_R_50_FPN_3x.pth", + "size": 176249718, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_50_FPN_3x.pth" + }, + { + "description": "PubLayNet via hpanwar08 R101-FPN config", + "name": "PubLayNet_R_101_FPN_3x.yaml", + "size": 392, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_101_FPN_3x.yaml" + }, + { + "description": "PubLayNet via hpanwar08 R101-FPN weights", + "name": "PubLayNet_R_101_FPN_3x.pth", + "size": 503147199, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_101_FPN_3x.pth" + }, + { + "description": "PubLayNet via hpanwar08 X101-FPN config", + "name": "PubLayNet_X_101_32x8d_FPN_3x.yaml", + "size": 592, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_X_101_32x8d_FPN_3x.yaml" + }, + { + "description": "PubLayNet via hpanwar08 X101-FPN weights", + "name": "PubLayNet_X_101_32x8d_FPN_3x.pth", + "size": 429840864, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_X_101_32x8d_FPN_3x.pth" + }, + { + "description": "PubLayNet via JPLeoRX R50-FPN config", + "name": "PubLayNet_R_50_FPN_3x_JPLeoRX.yaml", + "size": 388, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_50_FPN_3x_JPLeoRX.yaml" + }, + { + "description": "PubLayNet via JPLeoRX R50-FPN weights", + "name": "PubLayNet_R_50_FPN_3x_JPLeoRX.pth", + "size": 176299422, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_50_FPN_3x_JPLeoRX.pth" + }, + { + "description": "PubLayNet via JPLeoRX R101-FPN config", + "name": "PubLayNet_R_101_FPN_3x_JPLeoRX.yaml", + "size": 392, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_101_FPN_3x_JPLeoRX.yaml" + }, + { + "description": "PubLayNet via JPLeoRX R101-FPN weights", + "name": "PubLayNet_R_101_FPN_3x_JPLeoRX.pth", + "size": 252572745, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_101_FPN_3x_JPLeoRX.pth" + }, + { + "description": "Modern Magazines via Jambo-sudo X101-FPN (pre-trained on PubLayNet, fine-tuned on 500 p. 20th cent. magazines) config", + "name": "Jambo-sudo_X101.yaml", + "size": 592, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/Jambo-sudo_X101.yaml" + }, + { + "description": "Modern Magazines via Jambo-sudo X101-FPN (pre-trained on PubLayNet, fine-tuned on 500 p. 20th cent. magazines) weights", + "name": "Jambo-sudo_X101.pth", + "size": 856430002, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/Jambo-sudo_X101.pth" + }, + { + "description": "PRImALayout via LayoutLM R50-FPN config", + "name": "PRImALayout_R50.yaml", + "size": 934, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PRImALayout_R50.yaml" + }, + { + "description": "PRImALayout via LayoutLM R50-FPN weights", + "name": "PRImALayout_R50.pth", + "size": 351229486, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PRImALayout_R50.pth" + }, + { + "description": "DocBank via LayoutLM X101-FPN config", + "name": "DocBank_X101.yaml", + "size": 523, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/DocBank_X101.yaml" + }, + { + "description": "DocBank via LayoutLM X101-FPN config", + "name": "DocBank_X101.pth", + "size": 835606605, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/DocBank_X101.pth" + }, + { + "description": "NewspaperNavigator via LayoutParser R50-PanopticFPN config", + "name": "NewspaperNavigator_R_50_PFPN_3x.yaml", + "size": 330226761, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/NewspaperNavigator_R_50_PFPN_3x.yaml" + }, + { + "description": "NewspaperNavigator via LayoutParser R50-PanopticFPN weights", + "name": "NewspaperNavigator_R_50_PFPN_3x.pth", + "size": 330226761, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/NewspaperNavigator_R_50_PFPN_3x.pth" + }, + { + "description": "MathFormulaDetection via LayoutParser R50-FPN config", + "name": "Math_R_50_FPN_3x.yaml", + "size": 5632, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/Math_R_50_FPN_3x.yaml" + }, + { + "description": "MathFormulaDetection via LayoutParser R50-FPN weights", + "name": "Math_R_50_FPN_3x.pth", + "size": 330084629, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/Math_R_50_FPN_3x.pth" + } + ] + }, + "ocrd-doxa-binarize": { + "executable": "ocrd-doxa-binarize", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/binarization" + ], + "description": "binarize via locally adaptive thresholding", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 + }, + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "algorithm": { + "type": "string", + "enum": [ + "Otsu", + "Bernsen", + "Niblack", + "Sauvola", + "Wolf", + "Gatos", + "NICK", + "Su", + "Singh", + "Bataineh", + "ISauvola", + "WAN" + ], + "default": "ISauvola", + "description": "Thresholding algorithm to use." + }, + "parameters": { + "type": "object", + "default": {}, + "description": "Dictionary of algorithm-specific parameters. Unless overridden here, the following defaults are used: \nBernsen:\t{'window': 75, 'threshold': 100, 'contrast-limit': 25}\nNICK:\t{'window': 75, 'k': -0.2}\nNiblack:\t{'window': 75, 'k': 0.2}\nSingh:\t{'window': 75, 'k', 0.2}\nGatos:\t{'glyph': 60}\nSauvola:\t{'window': 75, 'k': 0.2}\nWolf:\t{'window': 75, 'k': 0.2}\nWAN:\t{'window': 75, 'k': 0.2}\nSu:\t{'window': 0 (based on stroke size), 'minN': windowSize (roughly based on size of window)}\n\n(window/glyph sizes are in px, threshold/limits in uint8 [0,255])" + } + } + }, + "ocrd-fileformat-transform": { + "executable": "ocrd-fileformat-transform", + "description": "Convert between OCR file formats", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization" + ], + "input_file_grp": [ + "OCR-D-OCR-PAGE", + "OCR-D-OCR-ALTO", + "OCR-D-OCR-HOCR" + ], + "output_file_grp": [ + "OCR-D-OCR-PAGE", + "OCR-D-OCR-ALTO", + "OCR-D-OCR-HOCR" + ], + "parameters": { + "from-to": { + "description": "Transformation scenario, see ocr-fileformat -L", + "type": "string", + "default": "page alto", + "enum": [ + "abbyy hocr", + "abbyy page", + "alto2.0 alto3.0", + "alto2.0 alto3.1", + "alto2.0 hocr", + "alto2.1 alto3.0", + "alto2.1 alto3.1", + "alto2.1 hocr", + "alto page", + "alto text", + "gcv hocr", + "gcv page", + "hocr alto2.0", + "hocr alto2.1", + "hocr page", + "hocr text", + "page alto", + "page alto_legacy", + "page hocr", + "page page2019", + "page text", + "tei hocr", + "textract page" + ] + }, + "ext": { + "description": "Output extension. Set to empty string to derive extension from the media type.", + "type": "string", + "default": "" + }, + "script-args": { + "description": "Arguments to Saxon (for XSLT transformations) or to transformation script", + "type": "string", + "default": "" + } + } + }, + "ocrd-froc-recognize": { + "executable": "ocrd-froc", + "description": "Recognise font family/shape (annotating TextStyle) along with text (annotating TextEquiv)", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/font-identification", + "recognition/text-recognition" + ], + "input_file_grp": [ + "OCR-D-SEG" + ], + "output_file_grp": [ + "OCR-D-OCR" + ], + "parameters": { + "ocr_method": { + "description": "The method to use for text recognition", + "type": "string", + "enum": [ + "none", + "SelOCR", + "COCR", + "adaptive" + ], + "default": "none" + }, + "overwrite_style": { + "description": "Whether to overwrite existing TextStyle/@fontFamily attributes", + "type": "boolean", + "required": false, + "default": true + }, + "min_score_style": { + "description": "The minimum score of a font classification to be serialized/used as input for OCR", + "type": "number", + "format": "float", + "required": false, + "default": 0 + }, + "overwrite_text": { + "description": "Whether to remove any existing TextEquiv before adding text", + "type": "boolean", + "required": false, + "default": false + }, + "model": { + "description": "The file name of the neural network to use, including sufficient path information. Defaults to the model bundled with ocrd_froc.", + "type": "string", + "required": false + }, + "fast_cocr": { + "description": "Whether to use optimization steps on the COCR strategy", + "type": "boolean", + "default": true + }, + "adaptive_threshold": { + "description": "Threshold of certitude needed to use SelOCR when using the adaptive strategy", + "type": "number", + "format": "integer", + "default": 95 + }, + "font_class_priors": { + "description": "List of font classes which are known to be present on the data when using the adaptive/SelOCR strategies. If this option is specified, any font classes not included are ignored. If 'other' is included in the list, no font classification is output and a generic model is used for transcriptions.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "antiqua", + "bastarda", + "fraktur", + "textura", + "schwabacher", + "greek", + "italic", + "hebrew", + "gotico-antiqua", + "manuscript", + "rotunda", + "other" + ] + }, + "default": [] + } + } + }, + "ocrd-im6convert": { + "executable": "ocrd-im6convert", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization" + ], + "description": "Convert and transform images", + "input_file_grp": [ + "OCR-D-IMG" + ], + "output_file_grp": [ + "OCR-D-IMG" + ], + "parameters": { + "input-options": { + "type": "string", + "description": "e.g. -density 600x600 -wavelet-denoise 1%x0.1", + "default": "" + }, + "output-format": { + "type": "string", + "description": "Desired media type of output", + "required": true, + "enum": [ + "image/tiff", + "image/jp2", + "image/png" + ] + }, + "output-options": { + "type": "string", + "description": "e.g. -resample 300x300 -alpha deactivate -normalize -despeckle -noise 2 -negate -morphology close diamond", + "default": "" + } + } + }, + "ocrd-keraslm-rate": { + "executable": "ocrd-keraslm-rate", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/text-recognition" + ], + "description": "Rate elements of the text with a character-level LSTM language model in Keras", + "input_file_grp": [ + "OCR-D-OCR-TESS", + "OCR-D-OCR-KRAK", + "OCR-D-OCR-OCRO", + "OCR-D-OCR-CALA", + "OCR-D-OCR-ANY", + "OCR-D-COR-CIS", + "OCR-D-COR-ASV" + ], + "output_file_grp": [ + "OCR-D-COR-LM" + ], + "parameters": { + "model_file": { + "type": "string", + "format": "uri", + "content-type": "application/x-hdf;subtype=bag", + "description": "path of h5py weight/config file for model trained with keraslm", + "required": true, + "cacheable": true + }, + "textequiv_level": { + "type": "string", + "enum": [ + "region", + "line", + "word", + "glyph" + ], + "default": "glyph", + "description": "PAGE XML hierarchy level to evaluate TextEquiv sequences on" + }, + "alternative_decoding": { + "type": "boolean", + "description": "whether to process all TextEquiv alternatives, finding the best path via beam search, and delete each non-best alternative", + "default": true + }, + "beam_width": { + "type": "number", + "format": "integer", + "description": "maximum number of best partial paths to consider during search with alternative_decoding", + "default": 10 + }, + "lm_weight": { + "type": "number", + "format": "float", + "description": "share of the LM scores over the input confidences", + "default": 0.5 + } + }, + "resources": [ + { + "url": "https://github.com/OCR-D/ocrd_keraslm/releases/download/v0.4.3/model_dta_full.h5", + "name": "model_dta_full.h5", + "description": "character-level LM as stateful contiguous LSTM model (2 layers, 128 hidden nodes each, window length 256) trained on complete Deutsches Textarchiv", + "size": 1769684 + } + ] + }, + "ocrd-kraken-binarize": { + "executable": "ocrd-kraken-binarize", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-PRE-CROP", + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-PRE-BIN" + ], + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/binarization" + ], + "description": "Binarize images with kraken", + "parameters": { + "level-of-operation": { + "description": "segment hierarchy level to operate on", + "type": "string", + "default": "page", + "enum": [ + "page", + "region", + "line" + ] + } + } + }, + "ocrd-kraken-segment": { + "executable": "ocrd-kraken-segment", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-PRE-CROP", + "OCR-D-PRE-BIN" + ], + "output_file_grp": [ + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" + ], + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line" + ], + "description": "Layout segmentation with Kraken", + "parameters": { + "level-of-operation": { + "description": "segment hierarchy level to operate on (page into regions+lines, or regions into lines)", + "type": "string", + "default": "page", + "enum": [ + "page", + "table", + "region" + ] + }, + "overwrite_segments": { + "description": "remove any existing regions/lines", + "type": "boolean", + "default": false + }, + "text_direction": { + "type": "string", + "description": "Sets principal text direction", + "enum": [ + "horizontal-lr", + "horizontal-rl", + "vertical-lr", + "vertical-rl" + ], + "default": "horizontal-lr" + }, + "maxcolseps": { + "description": "Maximum number of column separators. Set to 0 for single-column text to avoid unnecessary computation.", + "type": "number", + "format": "integer", + "default": 2 + }, + "scale": { + "description": "mean xheight size of glyphs (guessed if zero)", + "type": "number", + "format": "float", + "default": 0 + }, + "black_colseps": { + "description": "Whether column separators are assumed to be vertical black lines or not", + "type": "boolean", + "default": false + }, + "remove_hlines": { + "description": "Remove horizontal colseps before segmentation", + "type": "boolean", + "default": true + }, + "blla_model": { + "description": "Model used for baseline detection and page segmentation. Ignored if use_legacy.", + "type": "string", + "format": "uri", + "content-type": "application/python-cpickle", + "cacheable": true, + "default": "blla.mlmodel" + }, + "blla_classes": { + "description": "Class mapping for the region types trained into blla_model.", + "type": "object", + "minProperties": 2, + "additionalProperties": { + "type": "string", + "enum": [ + "TextRegion", + "ImageRegion", + "LineDrawingRegion", + "GraphicRegion", + "TableRegion", + "ChartRegion", + "MapRegion", + "SeparatorRegion", + "MathsRegion", + "ChemRegion", + "MusicRegion", + "AdvertRegion", + "NoiseRegion", + "UnknownRegion", + "CustomRegion" + ] + }, + "default": { + "text": "TextRegion", + "image": "ImageRegion", + "line drawing": "LineDrawingRegion", + "graphic": "GraphicRegion", + "table": "TableRegion", + "chart": "ChartRegion", + "map": "MapRegion", + "separator": "SeparatorRegion", + "maths": "MathsRegion", + "chem": "ChemRegion", + "music": "MusicRegion", + "advert": "AdvertRegion", + "noise": "NoiseRegion", + "unknown": "UnknownRegion", + "custom": "CustomRegion" + } + }, + "device": { + "description": "CUDA ID (e.g. 'cuda:0') for computation on GPU (if available), or 'cpu' to run on CPU only", + "type": "string", + "default": "cuda:0" + }, + "use_legacy": { + "description": "Use legacy box segmenter as opposed to neural net baseline segmenter", + "type": "boolean", + "default": false + } + }, + "resources": [ + { + "url": "https://github.com/mittagessen/kraken/raw/main/kraken/blla.mlmodel", + "size": 5047020, + "name": "blla.mlmodel", + "parameter_usage": "without-extension", + "description": "Pretrained region+baseline segmentation model (trained on handwriting)" + }, + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/ubma_segmentation/ubma_segmentation.mlmodel", + "size": 5047020, + "name": "ubma_segmentation.mlmodel", + "parameter_usage": "without-extension", + "description": "region+baseline segmentation model trained by UBMA (on print)" + } + ] + }, + "ocrd-kraken-recognize": { + "executable": "ocrd-kraken-recognize", + "input_file_grp": [ + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-OCR-KRAK" + ], + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/text-recognition" + ], + "description": "Text recognition with Kraken", + "parameters": { + "overwrite_text": { + "description": "remove any existing TextEquiv", + "type": "boolean", + "default": false + }, + "model": { + "description": "OCR model to recognize with", + "type": "string", + "format": "uri", + "content-type": "application/python-cpickle", + "cacheable": true, + "default": "en_best.mlmodel" + }, + "pad": { + "description": "Extra blank padding to the left and right of text line.", + "type": "number", + "format": "integer", + "default": 16 + }, + "bidi_reordering": { + "description": "Reorder classes in the ocr_record according to the Unicode bidirectional algorithm for correct display.", + "type": "boolean", + "default": true + }, + "device": { + "description": "CUDA ID (e.g. 'cuda:0') for computation on GPU (if available), or 'cpu' to run on CPU only", + "type": "string", + "default": "cuda:0" + } + }, + "resources": [ + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/austriannewspapers/20220520/austriannewspapers_best.mlmodel", + "size": 16243476, + "name": "austriannewspapers.mlmodel", + "parameter_usage": "without-extension", + "description": "19th and 20th century German Fraktur; https://github.com/UB-Mannheim/AustrianNewspapers/wiki/Training-with-Kraken" + }, + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/reichsanzeiger-gt/reichsanzeiger_best.mlmodel", + "size": 16358636, + "name": "reichsanzeiger.mlmodel", + "parameter_usage": "without-extension", + "description": "19th and 20th century German Fraktur ('Deutscher Reichsanzeiger'); https://github.com/UB-Mannheim/reichsanzeiger-gt/wiki/Training-with-Kraken" + }, + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/digitue-gt/digitue_best.mlmodel", + "size": 16364343, + "name": "digitue.mlmodel", + "parameter_usage": "without-extension", + "description": "mostly 19th century German Fraktur; https://github.com/UB-Mannheim/digitue-gt/wiki/Training-with-Kraken" + }, + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/digi-gt/luther_best.mlmodel", + "size": 16305851, + "name": "luther.mlmodel", + "parameter_usage": "without-extension", + "description": "16th century German Gothic; https://github.com/UB-Mannheim/digi-gt/wiki/Training" + }, + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/typewriter/typewriter.mlmodel", + "size": 16364780, + "name": "typewriter.mlmodel", + "parameter_usage": "without-extension", + "description": "20th century typewriter http://idb.ub.uni-tuebingen.de/opendigi/walz_1976, pretrained on austriannewspapers.mlmodel" + }, + { + "url": "https://zenodo.org/record/2577813/files/en_best.mlmodel?download=1", + "size": 2930723, + "name": "en_best.mlmodel", + "parameter_usage": "without-extension", + "description": "This model has been trained on a large corpus of modern printed English text augmented with ~10000 lines of historical pages" + } + ] + }, + "ocrd-olahd-client": { + "executable": "ocrd-olahd-client", + "description": "Post a workspace to OLA-HD", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization" + ], + "input_file_grp": [], + "output_file_grp": [], + "parameters": { + "endpoint": { + "description": "URL of the OLA-HD instance", + "type": "string", + "required": true + }, + "strict": { + "description": "Whether to log or raise bagging issues", + "type": "boolean", + "default": true + }, + "username": { + "description": "Username", + "type": "string", + "required": true + }, + "password": { + "description": "Password", + "type": "string", + "required": true + }, + "pid_previous_version": { + "description": "PID of the previous version of this work, already stored in OLA-HD", + "type": "string", + "required": false + } + } + }, + "ocrd-olena-binarize": { + "executable": "ocrd-olena-binarize", + "description": "popular binarization algorithms implemented by Olena/SCRIBO, wrapped for OCR-D (on page level only)", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/binarization" + ], + "input_file_grp": [ + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE", + "OCR-D-SEG-WORD", + "OCR-D-IMG" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE", + "OCR-D-SEG-WORD" + ], + "parameters": { + "impl": { + "description": "The name of the actual binarization algorithm", + "type": "string", + "default": "sauvola-ms-split", + "enum": [ + "sauvola", + "sauvola-ms", + "sauvola-ms-fg", + "sauvola-ms-split", + "kim", + "wolf", + "niblack", + "singh", + "otsu" + ] + }, + "k": { + "description": "Sauvola's formulae parameter (foreground weight decreases with k); for Multiscale, multiplied to yield default 0.2/0.3/0.5; for Singh, multiplied to yield default 0.06; for Niblack, multiplied to yield default -0.2; for Wolf/Kim, used directly; for Otsu, does not apply", + "format": "float", + "type": "number", + "default": 0.34 + }, + "win-size": { + "description": "The (odd) window size in pixels; when zero (default), set to DPI (or 301); for Otsu, does not apply", + "type": "number", + "format": "integer", + "default": 0 + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 + } + } + }, + "ocrd-page2alto-transform": { + "executable": "ocrd-page2alto-transform", + "categories": [ + "Layout analysis" + ], + "description": "Transform PAGE-XML to ALTO", + "input_file_grp": [ + "OBSOLETE" + ], + "output_file_grp": [ + "ALSO-OBSOLETE" + ], + "steps": [ + "whatevs" + ], + "parameters": { + "check_border": { + "type": "boolean", + "description": "Whether to create full-page WIDTH/HEIGHT etc. if no border/pagespace present", + "default": false + }, + "check_words": { + "type": "boolean", + "description": "Check whether PAGE-XML contains any Words and fail if not", + "default": true + }, + "skip_empty_lines": { + "type": "boolean", + "description": "Whether to omit or keep empty lines in PAGE-XML", + "default": false + }, + "trailing_dash_to_hyp": { + "type": "boolean", + "description": "Whether to add a element if the last word in a line ends in '-'", + "default": false + }, + "dummy_word": { + "type": "boolean", + "description": "Whether to create a Word for TextLine that have TextEquiv/Unicode but no Word", + "default": true + }, + "dummy_textline": { + "type": "boolean", + "description": "Whether to create a TextLine for regions that have TextEquiv/Unicode but no TextLine", + "default": true + }, + "textequiv_index": { + "type": "number", + "description": "If multiple textequiv, use the n-th TextEquiv by @index", + "default": 0 + }, + "region_order": { + "type": "string", + "description": "Order in which to iterate over the regions", + "enum": [ + "document", + "reading-order", + "reading-order-only" + ], + "default": "document" + }, + "textline_order": { + "type": "string", + "description": "Order in which to iterate over the textlines", + "enum": [ + "document", + "index", + "textline-order" + ], + "default": "document" + }, + "textequiv_fallback_strategy": { + "type": "string", + "description": "What to do if selected TextEquiv @index is not available: 'raise' will lead to a runtime error, 'first' will use the first TextEquiv, 'last' will use the last TextEquiv on the element", + "enum": [ + "raise", + "first", + "last" + ], + "default": "first" + }, + "alto_version": { + "type": "string", + "description": "Whether to create full-page WIDTH/HEIGHT etc. if no border/pagespace present", + "default": "v4.2", + "enum": [ + "v4.2", + "v4.1", + "v4.0", + "v3.1", + "v3.0", + "v2.1", + "v2.0" + ] + }, + "timestamp_src": { + "type": "string", + "description": "Which element to use for the timestamp", + "default": "LastChange", + "enum": [ + "Created", + "LastChange", + "none" + ] + } + }, + "resources": [] + }, + "ocrd-pagetopdf": { + "executable": "ocrd-pagetopdf", + "description": "Convert text and layout annotations to PDF format (overlaying original image with text layer and polygon outlines)", + "categories": [ + "Long-term preservation" + ], + "steps": [ + "postprocessing/format-conversion" + ], + "input_file_grp": [ + "OCR-D-OCR-PAGE" + ], + "output_file_grp": [ + "OCR-D-OCR-PDF" + ], + "parameters": { + "font": { + "description": "Font file to be used in PDF file. If unset, AletheiaSans.ttf is used. (Make sure to pick a font which covers all glyphs!)", + "type": "string", + "format": "uri", + "content-type": "application/x-font-ttf", + "default": "" + }, + "outlines": { + "description": "What segment hierarchy to draw coordinate outlines for. If unset, no outlines are drawn.", + "type": "string", + "default": "", + "enum": [ + "", + "region", + "line", + "word", + "glyph" + ] + }, + "textequiv_level": { + "description": "What segment hierarchy level to render text output from. If unset, no text is rendered.", + "type": "string", + "default": "", + "enum": [ + "", + "region", + "line", + "word", + "glyph" + ] + }, + "negative2zero": { + "description": "Set all negative box values to 0", + "type": "boolean", + "default": false + }, + "ext": { + "description": "Output filename extension", + "type": "string", + "default": ".pdf" + }, + "multipage": { + "description": "Merge all PDFs into one multipage file. The value is used as filename for the pdf.", + "type": "string", + "default": "" + }, + "pagelabel": { + "description": "Parameter for 'multipage': Set the page information, which will be used as pagelabel. Default is 'pageId', e.g. the option 'pagenumber' will create numbered pagelabel consecutively", + "type": "string", + "default": "pageId", + "enum": [ + "pagenumber", + "pageId", + "basename", + "basename_without_extension", + "local_filename", + "ID", + "url" + ] + }, + "script-args": { + "description": "Extra arguments to PageToPdf (see https://github.com/PRImA-Research-Lab/prima-page-to-pdf)", + "type": "string", + "default": "" + } + } + }, + "ocrd-repair-inconsistencies": { + "executable": "ocrd-repair-inconsistencies", + "categories": [ + "Layout analysis" + ], + "description": "Re-order glyphs/words/lines top-down-left-right when textually inconsistent with their parents", + "input_file_grp": [ + "OCR-D-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK-FIXED" + ], + "steps": [ + "layout/segmentation/line", + "layout/segmentation/word", + "layout/segmentation/glyph" + ] + }, + "ocrd-segment-repair": { + "executable": "ocrd-segment-repair", + "categories": [ + "Layout analysis" + ], + "description": "Analyse and repair region segmentation; at least ensure validity and consistency of coordinates.", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK" + ], + "steps": [ + "layout/segmentation/region" + ], + "parameters": { + "sanitize": { + "type": "boolean", + "default": false, + "description": "Shrink each region such that its coordinates become the minimal concave hull of its binary foreground. (Assumes that a perfect binarization is available.)" + }, + "sanitize_padding": { + "type": "number", + "format": "integer", + "minimum": 1, + "default": 5, + "description": "When shrinking a region, enlarge the resulting hull by this amount of pixels in each direction." + }, + "simplify": { + "type": "number", + "format": "float", + "minimum": 0, + "default": 0, + "description": "Distance (in px) used to simplify all segment polygons. (Avoid values larger than xheight/scale, or corners will be chopped off.) Set to 0 to disable." + }, + "plausibilize": { + "type": "boolean", + "default": false, + "description": "Identify and remove redundancies on text regions and text lines (deleting/merging/shrinking where overlaps occur)." + }, + "plausibilize_merge_min_overlap": { + "type": "number", + "format": "float", + "minimum": 0.0, + "maximum": 1.0, + "default": 0.9, + "description": "When merging a region or line almost contained in another, require at least this ratio of area is shared with the other." + }, + "spread": { + "type": "number", + "format": "integer", + "default": 0, + "description": "After all other steps, enlarge segments by this many pixels into the background." + }, + "spread_level": { + "type": "string", + "enum": [ + "page", + "table", + "region", + "line", + "word" + ], + "default": "region", + "description": "Hierarchy level spread operates on" + } + } + }, + "ocrd-segment-project": { + "executable": "ocrd-segment-project", + "categories": [ + "Layout analysis" + ], + "description": "Project segment coordinates to their structural parents", + "input_file_grp": [ + "OCR-D-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK" + ], + "steps": [ + "layout/segmentation" + ], + "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "table", + "region", + "line", + "word" + ], + "default": "page", + "description": "hierarchy level which to assign new coordinates to" + }, + "padding": { + "type": "number", + "format": "integer", + "minimum": 0, + "default": 10, + "description": "margin (in px) to extend the hull in every direction" + } + } + }, + "ocrd-segment-from-masks": { + "executable": "ocrd-segment-from-masks", + "categories": [ + "Layout analysis" + ], + "description": "Import region segmentation from mask images (segments filled with colors encoding classes). Input fileGrp format is `base,mask` (i.e. PAGE or original image files first, mask image files second).", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK" + ], + "steps": [ + "layout/segmentation/region" + ], + "parameters": { + "colordict": { + "type": "object", + "default": { + "FFFFFF00": "", + "FFFFFFFF": "Border", + "8B4513FF": "TableRegion", + "4682B4FF": "AdvertRegion", + "FF8C00FF": "ChemRegion", + "9400D3FF": "MusicRegion", + "9ACDD2FF": "MapRegion", + "0000FFFF": "TextRegion", + "0000FFFA": "TextRegion:paragraph", + "0000FFF5": "TextRegion:heading", + "0000FFF0": "TextRegion:caption", + "0000FFEB": "TextRegion:header", + "0000FFE6": "TextRegion:footer", + "0000FFE1": "TextRegion:page-number", + "0000FFDC": "TextRegion:drop-capital", + "0000FFD7": "TextRegion:credit", + "0000FFD2": "TextRegion:floating", + "0000FFCD": "TextRegion:signature-mark", + "0000FFC8": "TextRegion:catch-word", + "0000FFC3": "TextRegion:marginalia", + "0000FFBE": "TextRegion:footnote", + "0000FFB9": "TextRegion:footnote-continued", + "0000FFB4": "TextRegion:endnote", + "0000FFAF": "TextRegion:TOC-entry", + "0000FFA5": "TextRegion:list-label", + "0000FFA0": "TextRegion:other", + "800080FF": "ChartRegion", + "800080FA": "ChartRegion:bar", + "800080F5": "ChartRegion:line", + "800080F0": "ChartRegion:pie", + "800080EB": "ChartRegion:scatter", + "800080E6": "ChartRegion:surface", + "800080E1": "ChartRegion:other", + "008000FF": "GraphicRegion", + "008000FA": "GraphicRegion:logo", + "008000F0": "GraphicRegion:letterhead", + "008000EB": "GraphicRegion:decoration", + "008000E6": "GraphicRegion:frame", + "008000E1": "GraphicRegion:handwritten-annotation", + "008000DC": "GraphicRegion:stamp", + "008000D7": "GraphicRegion:signature", + "008000D2": "GraphicRegion:barcode", + "008000CD": "GraphicRegion:paper-grow", + "008000C8": "GraphicRegion:punch-hole", + "008000C3": "GraphicRegion:other", + "00CED1FF": "ImageRegion", + "B8860BFF": "LineDrawingRegion", + "00BFFFFF": "MathsRegion", + "FF0000FF": "NoiseRegion", + "FF00FFFF": "SeparatorRegion", + "646464FF": "UnknownRegion", + "637C81FF": "CustomRegion" + }, + "description": "Mapping from color values in the input masks to region types to annotate; color must be encoded hexadecimal (e.g. '00FF00'); region type equals the element name in PAGE-XML, optionally followed by a colon and a subtype (e.g. 'TextRegion:paragraph'; unmapped colors will be ignored (i.e. treated as background)). Default is PageViewer color scheme. Cf. colordict.json output and colordict parameter of ocrd-segment-extract-pages." + } + } + }, + "ocrd-segment-from-coco": { + "executable": "ocrd-segment-from-coco", + "categories": [ + "Layout analysis" + ], + "description": "Import region segmentation from COCO detection format JSON (for all pages). Input fileGrp format is `base,COCO` (i.e. PAGE or original image files first, COCO file second).", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK" + ], + "steps": [ + "layout/segmentation/region" + ], + "parameters": {} + }, + "ocrd-segment-extract-pages": { + "executable": "ocrd-segment-extract-pages", + "categories": [ + "Image preprocessing" + ], + "description": "Extract page segmentation as page images (deskewed according to `/Page/@orientation` and cropped+masked along `/Page/Border`) + JSON (including region coordinates/classes and meta-data), as binarized images, and as mask images (segments filled with colors encoding classes) + COCO detection format JSON (for all pages). Output fileGrp format is `raw[,binarized[,mask]]` (i.e. fall back to first group).", + "input_file_grp": [ + "OCR-D-SEG-PAGE", + "OCR-D-GT-SEG-PAGE", + "OCR-D-SEG-BLOCK", + "OCR-D-GT-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-IMG-PAGE" + ], + "steps": [ + "layout/analysis" + ], + "parameters": { + "feature_filter": { + "type": "string", + "default": "", + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)." + }, + "mimetype": { + "type": "string", + "enum": [ + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" + ], + "default": "image/png", + "description": "File format to save extracted images in." + }, + "transparency": { + "type": "boolean", + "default": true, + "description": "Add alpha channels with segment masks to the images" + }, + "plot_overlay": { + "type": "boolean", + "default": false, + "description": "When generating mask images with `plot_segmasks`, instead of starting with a blank image and having layers and segments replace each other, start with the raw image and superimpose (alpha-composite) layers and segments." + }, + "plot_segmasks": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "order", + "page", + "region", + "line", + "word", + "glyph" + ] + }, + "default": [ + "region" + ], + "description": "Generate mask images of the page segmentation in the last output fileGrp. Draw filled polygons for each specified PAGE hierarchy level in the list (in that order), where 'page' denotes the Border polygon, 'region' denotes Region types, 'line' denotes TextLine, 'word' denotes Word and 'glyph' denotes Glyph. Each type must be mapped in `colordict`. Where neighbors of the same type intersect, show a warning (unless `plot_overlay` is true). If 'order' is present, then draw arrows for reading order, too." + }, + "colordict": { + "type": "object", + "default": { + "": "FFFFFF00", + "ReadingOrderLevel0": "DC143CFF", + "ReadingOrderLevel1": "9400D3FF", + "ReadingOrderLevelN": "8B0000FF", + "Border": "FFFFFFFF", + "TableRegion": "8B4513FF", + "AdvertRegion": "4682B4FF", + "ChemRegion": "FF8C00FF", + "MusicRegion": "9400D3FF", + "MapRegion": "9ACDD2FF", + "TextRegion": "0000FFFF", + "TextRegion:paragraph": "0000FFFA", + "TextRegion:heading": "0000FFF5", + "TextRegion:caption": "0000FFF0", + "TextRegion:header": "0000FFEB", + "TextRegion:footer": "0000FFE6", + "TextRegion:page-number": "0000FFE1", + "TextRegion:drop-capital": "0000FFDC", + "TextRegion:credit": "0000FFD7", + "TextRegion:floating": "0000FFD2", + "TextRegion:signature-mark": "0000FFCD", + "TextRegion:catch-word": "0000FFC8", + "TextRegion:marginalia": "0000FFC3", + "TextRegion:footnote": "0000FFBE", + "TextRegion:footnote-continued": "0000FFB9", + "TextRegion:endnote": "0000FFB4", + "TextRegion:TOC-entry": "0000FFAF", + "TextRegion:list-label": "0000FFA5", + "TextRegion:other": "0000FFA0", + "ChartRegion": "800080FF", + "ChartRegion:bar": "800080FA", + "ChartRegion:line": "800080F5", + "ChartRegion:pie": "800080F0", + "ChartRegion:scatter": "800080EB", + "ChartRegion:surface": "800080E6", + "ChartRegion:other": "800080E1", + "GraphicRegion": "008000FF", + "GraphicRegion:logo": "008000FA", + "GraphicRegion:letterhead": "008000F0", + "GraphicRegion:decoration": "008000EB", + "GraphicRegion:frame": "008000E6", + "GraphicRegion:handwritten-annotation": "008000E1", + "GraphicRegion:stamp": "008000DC", + "GraphicRegion:signature": "008000D7", + "GraphicRegion:barcode": "008000D2", + "GraphicRegion:paper-grow": "008000CD", + "GraphicRegion:punch-hole": "008000C8", + "GraphicRegion:other": "008000C3", + "ImageRegion": "00CED1FF", + "LineDrawingRegion": "B8860BFF", + "MathsRegion": "00BFFFFF", + "NoiseRegion": "FF0000FF", + "SeparatorRegion": "FF00FFFF", + "UnknownRegion": "646464FF", + "CustomRegion": "637C81FF", + "TextLine": "32CD32FF", + "Word": "B22222FF", + "Glyph": "2E8B08FF" + }, + "description": "Mapping from segment types to extract to color values in the output mask images and COCO; color must be encoded hexadecimal (e.g. '00FF00'); region type equals the element name in PAGE-XML, optionally followed by a colon and a subtype (e.g. 'TextRegion:paragraph'; unmapped region types will be ignored (i.e. treated as background)). Default is PageViewer color scheme. Cf. colordict parameter of ocrd-segment-from-masks." + } + } + }, + "ocrd-segment-extract-regions": { + "executable": "ocrd-segment-extract-regions", + "categories": [ + "Image preprocessing" + ], + "description": "Extract region segmentation as region images (deskewed according to `*/@orientation` and cropped+masked along `*/Coords` polygon) + JSON (including region coordinates/classes and meta-data).", + "input_file_grp": [ + "OCR-D-SEG-BLOCK", + "OCR-D-GT-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-IMG-REGION" + ], + "steps": [ + "layout/analysis" + ], + "parameters": { + "feature_filter": { + "type": "string", + "default": "", + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)." + }, + "classes": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "NoiseRegion", + "LineDrawingRegion", + "AdvertRegion", + "ImageRegion", + "ChartRegion", + "MusicRegion", + "GraphicRegion", + "UnknownRegion", + "CustomRegion", + "SeparatorRegion", + "MathsRegion", + "TextRegion", + "MapRegion", + "ChemRegion", + "TableRegion" + ] + }, + "default": [ + "NoiseRegion", + "LineDrawingRegion", + "AdvertRegion", + "ImageRegion", + "ChartRegion", + "MusicRegion", + "GraphicRegion", + "UnknownRegion", + "CustomRegion", + "SeparatorRegion", + "MathsRegion", + "TextRegion", + "MapRegion", + "ChemRegion", + "TableRegion" + ], + "description": "Array of region types to extract e.g. -P classes '[\"TextRegion\", \"TableRegion\", \"ImageRegion\"]' . If empty, all regions are allowed." + }, + "mimetype": { + "type": "string", + "enum": [ + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" + ], + "default": "image/png", + "description": "File format to save extracted images in." + }, + "transparency": { + "type": "boolean", + "default": true, + "description": "Add alpha channels with segment masks to the images" + } + } + }, + "ocrd-segment-extract-lines": { + "executable": "ocrd-segment-extract-lines", + "categories": [ + "Image preprocessing" + ], + "description": "Extract line segmentation as line images + text file + JSON.", + "input_file_grp": [ + "OCR-D-SEG-LINE", + "OCR-D-GT-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-IMG-LINE" + ], + "steps": [ + "layout/analysis" + ], + "parameters": { + "feature_filter": { + "type": "string", + "default": "", + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)." + }, + "mimetype": { + "type": "string", + "enum": [ + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" + ], + "default": "image/png", + "description": "File format to save extracted images in." + }, + "transparency": { + "type": "boolean", + "default": true, + "description": "Add alpha channels with segment masks to the images" + }, + "output-types": { + "type": "array", + "default": [ + "text", + "json", + "xlsx" + ], + "items": { + "type": "string", + "enum": [ + "text", + "json", + "xlsx" + ] + }, + "description": "What kind of files to extract besides the line image itself (text/json files for each line, xlsx per page)." + }, + "library-convention": { + "type": "string", + "enum": [ + "slub", + "sbb", + "none" + ], + "default": "none", + "description": "For xlsx extraction, to make line images hyperlinked, use this scheme in reconstructing presentation URLs of original pages. Libraries have different conventions in their METS files. Set to none to disable." + }, + "min-line-length": { + "type": "number", + "format": "integer", + "minimum": 0, + "default": 0, + "description": "Only extract lines with at least this many characters." + }, + "min-line-width": { + "type": "number", + "format": "integer", + "minimum": 1, + "default": 1, + "description": "Only extract lines that are at least this wide (in px)." + }, + "min-line-height": { + "type": "number", + "format": "integer", + "minimum": 1, + "default": 1, + "description": "Only extract lines that are at least this high (in px)." + }, + "textequiv-index": { + "type": "string", + "enum": [ + "first", + "last", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], + "default": "first", + "description": "Only extract lines with the specified TextEquiv/@index entries; 'first' and 'last' denote the first and last TextEquiv elements, regardless of their @index, respectively." + } + } + }, + "ocrd-segment-extract-words": { + "executable": "ocrd-segment-extract-words", + "categories": [ + "Image preprocessing" + ], + "description": "Extract word segmentation as word images (deskewed according to `*/@orientation` and cropped+masked along `*/Coords` polygon and dewarped as in `*/AlternativeImage`) + text file (according to `*/TextEquiv`) + JSON (including line coordinates and meta-data).", + "input_file_grp": [ + "OCR-D-SEG-WORD", + "OCR-D-GT-SEG-WORD" + ], + "output_file_grp": [ + "OCR-D-IMG-WORD" + ], + "steps": [ + "layout/analysis" + ], + "parameters": { + "feature_filter": { + "type": "string", + "default": "", + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)." + }, + "mimetype": { + "type": "string", + "enum": [ + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" + ], + "default": "image/png", + "description": "File format to save extracted images in." + }, + "transparency": { + "type": "boolean", + "default": true, + "description": "Add alpha channels with segment masks to the images" + } + } + }, + "ocrd-segment-extract-glyphs": { + "executable": "ocrd-segment-extract-glyphs", + "categories": [ + "Image preprocessing" + ], + "description": "Extract glyph segmentation as glyph images (deskewed according to `*/@orientation` and cropped+masked along `*/Coords` polygon and dewarped as in `*/AlternativeImage`) + text file (according to `*/TextEquiv`) + JSON (including line coordinates and meta-data).", + "input_file_grp": [ + "OCR-D-SEG-GLYPH", + "OCR-D-GT-SEG-GLYPH" + ], + "output_file_grp": [ + "OCR-D-IMG-GLYPH" + ], + "steps": [ + "layout/analysis" + ], + "parameters": { + "feature_filter": { + "type": "string", + "default": "", + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)." + }, + "mimetype": { + "type": "string", + "enum": [ + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" + ], + "default": "image/png", + "description": "File format to save extracted images in." + }, + "transparency": { + "type": "boolean", + "default": true, + "description": "Add alpha channels with segment masks to the images" + } + } + }, + "ocrd-segment-replace-original": { + "executable": "ocrd-segment-replace-original", + "categories": [ + "Image preprocessing" + ], + "description": "Extract page image (deskewed according to `/Page/@orientation` and cropped+masked along `/Page/Border`) and use it as @imageFilename, adjusting all coordinates", + "input_file_grp": [ + "OCR-D-SEG-LINE", + "OCR-D-GT-SEG-LINE", + "OCR-D-OCR" + ], + "output_file_grp": [ + "OCR-D-SEG-CROP" + ], + "steps": [ + "layout/analysis" + ], + "parameters": { + "feature_selector": { + "type": "string", + "default": "", + "description": "Comma-separated list of required image features (e.g. `binarized,despeckled`)" + }, + "feature_filter": { + "type": "string", + "default": "", + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)" + }, + "transform_coordinates": { + "type": "boolean", + "default": true, + "description": "re-calculate coordinates for all segments of the structural hierarchy to be consistent with the coordinate system of the chosen image again (vital after cropping, deskewing etc; disable only if input coordinates must be assumed to be inconsistent with the original)" + } + } + }, + "ocrd-segment-replace-page": { + "executable": "ocrd-segment-replace-page", + "categories": [ + "Image preprocessing" + ], + "description": "Replace everything below page level with another annotation, adjusting all coordinates", + "input_file_grp": [ + "OCR-D-SEG-LINE", + "OCR-D-GT-SEG-LINE", + "OCR-D-OCR" + ], + "output_file_grp": [ + "OCR-D-SEG-LINE", + "OCR-D-OCR" + ], + "steps": [ + "layout/analysis" + ], + "parameters": { + "transform_coordinates": { + "type": "boolean", + "default": true, + "description": "re-calculate coordinates for all segments of the structural hierarchy to be consistent with the coordinate system of the first input file group (vital after cropping, deskewing etc; disable only if input coordinates can be assumed to be consistent with the second input file group)" + } + } + }, + "ocrd-segment-replace-text": { + "executable": "ocrd-segment-replace-text", + "categories": [ + "Text recognition and optimization" + ], + "description": "Insert text from annotations in single-segment text files", + "steps": [ + "recognition/post-correction" + ], + "parameters": { + "file_glob": { + "type": "string", + "default": "*.gt.txt", + "description": "glob expression which expands to file names to match against page IDs and segment IDs in order to be ingested" + } + } + }, + "ocrd-segment-evaluate": { + "executable": "ocrd-segment-evaluate", + "categories": [ + "Layout analysis" + ], + "description": "Compare segmentations", + "input_file_grp": [ + "OCR-D-GT-SEG-BLOCK", + "OCR-D-SEG-BLOCK" + ], + "steps": [ + "layout/analysis" + ], + "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "region", + "line" + ], + "default": "region", + "description": "segment hierarchy level to compare GT and predictions at" + }, + "only-fg": { + "type": "boolean", + "default": false, + "description": "only overlap and compare the foregrounds in the binarized image" + }, + "ignore-subtype": { + "type": "boolean", + "default": false, + "description": "on region level, ignore @type differentiation (where applicable)" + }, + "for-categories": { + "type": "string", + "default": "", + "description": "on region level, only compare these region types (comma-separated list; unless `ignore-subtype` is given, append subtypes via `.`; e.g. `TextRegion.page-number,TextRegion.marginalia`)" + } + } + }, + "ocrd-tesserocr-deskew": { + "executable": "ocrd-tesserocr-deskew", + "categories": [ + "Image preprocessing" + ], + "description": "Detect script, orientation and skew angle for pages or regions", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-DESKEW-BLOCK" + ], + "steps": [ + "preprocessing/optimization/deskewing" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "operation_level": { + "type": "string", + "enum": [ + "page", + "region", + "line" + ], + "default": "region", + "description": "PAGE XML hierarchy level to operate on" + }, + "min_orientation_confidence": { + "type": "number", + "format": "float", + "default": 1.5, + "description": "Minimum confidence score to apply orientation as detected by OSD" + } + } + }, + "ocrd-tesserocr-fontshape": { + "executable": "ocrd-tesserocr-fontshape", + "categories": [ + "Text recognition and optimization" + ], + "description": "Recognize font shapes (family/monospace/bold/italic) and size in segmented words with Tesseract (using annotated derived images, or masking and cropping images from coordinate polygons), annotating TextStyle", + "input_file_grp": [ + "OCR-D-SEG-WORD", + "OCR-D-OCR" + ], + "output_file_grp": [ + "OCR-D-OCR-FONTSTYLE" + ], + "steps": [ + "recognition/font-identification" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "padding": { + "type": "number", + "format": "integer", + "default": 0, + "description": "Number of background-filled pixels to add around the word image (i.e. the annotated AlternativeImage if it exists or the higher-level image cropped to the bounding box and masked by the polygon otherwise) on each side before recognition." + }, + "model": { + "type": "string", + "format": "uri", + "content-type": "application/octet-stream", + "default": "osd", + "description": "tessdata model to apply (an ISO 639-3 language specification or some other basename, e.g. deu-frak or osd); must be an old (pre-LSTM) model" + } + } + }, + "ocrd-tesserocr-recognize": { + "executable": "ocrd-tesserocr-recognize", + "categories": [ + "Text recognition and optimization" + ], + "description": "Segment and/or recognize text with Tesseract (using annotated derived images, or masking and cropping images from coordinate polygons) on any level of the PAGE hierarchy.", + "input_file_grp": [ + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", + "OCR-D-SEG-TABLE", + "OCR-D-SEG-LINE", + "OCR-D-SEG-WORD" + ], + "output_file_grp": [ + "OCR-D-SEG-REGION", + "OCR-D-SEG-TABLE", + "OCR-D-SEG-LINE", + "OCR-D-SEG-WORD", + "OCR-D-SEG-GLYPH", + "OCR-D-OCR-TESS" + ], + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line", + "recognition/text-recognition" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "padding": { + "type": "number", + "format": "integer", + "default": 0, + "description": "Extend detected region/cell/line/word rectangles by this many (true) pixels, or extend existing region/line/word images (i.e. the annotated AlternativeImage if it exists or the higher-level image cropped to the bounding box and masked by the polygon otherwise) by this many (background/white) pixels on each side before recognition." + }, + "segmentation_level": { + "type": "string", + "enum": [ + "region", + "cell", + "line", + "word", + "glyph", + "none" + ], + "default": "word", + "description": "Highest PAGE XML hierarchy level to remove existing annotation from and detect segments for (before iterating downwards); if ``none``, does not attempt any new segmentation; if ``cell``, starts at table regions, detecting text regions (cells). Ineffective when lower than ``textequiv_level``." + }, + "textequiv_level": { + "type": "string", + "enum": [ + "region", + "cell", + "line", + "word", + "glyph", + "none" + ], + "default": "word", + "description": "Lowest PAGE XML hierarchy level to re-use or detect segments for and add the TextEquiv results to (before projecting upwards); if ``none``, adds segmentation down to the glyph level, but does not attempt recognition at all; if ``cell``, stops short before text lines, adding text of text regions inside tables (cells) or on page level only." + }, + "overwrite_segments": { + "type": "boolean", + "default": false, + "description": "If ``segmentation_level`` is not none, but an element already contains segments, remove them and segment again. Otherwise use the existing segments of that element." + }, + "overwrite_text": { + "type": "boolean", + "default": true, + "description": "If ``textequiv_level`` is not none, but a segment already contains TextEquivs, remove them and replace with recognised text. Otherwise add new text as alternative. (Only the first entry is projected upwards.)" + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "When detecting any segments, annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols." + }, + "block_polygons": { + "type": "boolean", + "default": false, + "description": "When detecting regions, annotate polygon coordinates instead of bounding box rectangles by querying Tesseract accordingly." + }, + "find_tables": { + "type": "boolean", + "default": true, + "description": "When detecting regions, recognise tables as table regions (Tesseract's ``textord_tabfind_find_tables=1``)." + }, + "find_staves": { + "type": "boolean", + "default": false, + "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves." + }, + "sparse_text": { + "type": "boolean", + "default": false, + "description": "When detecting regions, use 'sparse text' page segmentation mode (finding as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space." + }, + "raw_lines": { + "type": "boolean", + "default": false, + "description": "When detecting lines, do not attempt additional segmentation (baseline+xheight+ascenders/descenders prediction) on line images. Can increase accuracy for certain workflows. Disable when line segments/images may contain components of more than 1 line, or larger gaps/white-spaces." + }, + "char_whitelist": { + "type": "string", + "default": "", + "description": "When recognizing text, enumeration of character hypotheses (from the model) to allow exclusively; overruled by blacklist if set." + }, + "char_blacklist": { + "type": "string", + "default": "", + "description": "When recognizing text, enumeration of character hypotheses (from the model) to suppress; overruled by unblacklist if set." + }, + "char_unblacklist": { + "type": "string", + "default": "", + "description": "When recognizing text, enumeration of character hypotheses (from the model) to allow inclusively." + }, + "tesseract_parameters": { + "type": "object", + "default": {}, + "description": "Dictionary of additional Tesseract runtime variables (cf. tesseract --print-parameters), string values." + }, + "xpath_parameters": { + "type": "object", + "default": {}, + "description": "Set additional Tesseract runtime variables according to results of XPath queries into the segment. (As a convenience, `@language` and `@script` also match their upwards `@primary*` and `@secondary*` variants where applicable.) (Example: {'ancestor::TextRegion/@type=\"page-number\"': {'char_whitelist': '0123456789-'}, 'contains(@custom,\"ISBN\")': {'char_whitelist': '0123456789-'}})" + }, + "xpath_model": { + "type": "object", + "default": {}, + "description": "Prefer models mapped according to results of XPath queries into the segment. (As a convenience, `@language` and `@script` also match their upwards `@primary*` and `@secondary*` variants where applicable.) If no queries / mappings match (or under the default empty parameter), then fall back to `model`. If there are multiple matches, combine their results. (Example: {'starts-with(@script,\"Latn\")': 'Latin', 'starts-with(@script,\"Grek\")': 'Greek', '@language=\"Latin\"': 'lat', '@language=\"Greek\"': 'grc+ell', 'ancestor::TextRegion/@type=\"page-number\"': 'eng'})" + }, + "auto_model": { + "type": "boolean", + "default": false, + "description": "Prefer models performing best (by confidence) per segment (if multiple given in `model`). Repeats the OCR of the best model once (i.e. slower). (Use as a fallback to xpath_model if you do not trust script/language detection.)" + }, + "model": { + "type": "string", + "format": "uri", + "content-type": "application/octet-stream", + "description": "The tessdata text recognition model to apply (an ISO 639-3 language specification or some other basename, e.g. deu-frak or Fraktur)." + }, + "oem": { + "type": "string", + "enum": [ + "TESSERACT_ONLY", + "LSTM_ONLY", + "TESSERACT_LSTM_COMBINED", + "DEFAULT" + ], + "default": "DEFAULT", + "description": "Tesseract OCR engine mode to use:\n* Run Tesseract only - fastest,\n* Run just the LSTM line recognizer. (>=v4.00),\n*Run the LSTM recognizer, but allow fallback to Tesseract when things get difficult. (>=v4.00),\n*Run both and combine results - best accuracy." + } + }, + "resource_locations": [ + "module" + ], + "resources": [ + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/ocrd-train/data/Fraktur_5000000/tessdata_best/Fraktur_50000000.334_450937.traineddata", + "name": "Fraktur_GT4HistOCR.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model trained on GT4HistOCR", + "size": 1058487 + }, + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/ocrd-train/data/ONB/tessdata_best/ONB_1.195_300718_989100.traineddata", + "name": "ONB.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model based on Austrian National Library newspaper data", + "size": 4358948 + }, + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/frak2021/tessdata_best/frak2021-0.905.traineddata", + "name": "frak2021.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model based on a mix of mostly German and Latin ground truth data", + "size": 3421140 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/equ.traineddata", + "name": "equ.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract legacy model for mathematical equations", + "size": 2251950 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/osd.traineddata", + "name": "osd.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract legacy model for orientation and script detection", + "size": 10562727 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata", + "name": "eng.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for contemporary (computer typesetting and offset printing) English", + "size": 4113088 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/deu.traineddata", + "name": "deu.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for contemporary (computer typesetting and offset printing) German", + "size": 1525436 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/deu_latf.traineddata", + "name": "deu_latf.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for historical (Fraktur typesetting and letterpress printing) German", + "size": 6423052 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/deu_latf.traineddata", + "name": "frk.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for historical German (deprecated, replaced by deu_latf)", + "size": 6423052 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Fraktur.traineddata", + "name": "Fraktur.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for historical Latin script with Fraktur typesetting", + "size": 10915632 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Latin.traineddata", + "name": "Latin.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for contemporary and historical Latin script", + "size": 89384811 + }, + { + "url": "https://github.com/tesseract-ocr/tesseract/archive/main.tar.gz", + "name": "configs", + "description": "Tesseract configs (parameter sets) for use with the standalone tesseract CLI", + "size": 1915529, + "type": "archive", + "path_in_archive": "tesseract-main/tessdata/configs" + } + ] + }, + "ocrd-tesserocr-segment": { + "executable": "ocrd-tesserocr-segment", + "categories": [ + "Layout analysis" + ], + "description": "Segment page into regions and lines with Tesseract", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-GT-SEG-PAGE" + ], + "output_file_grp": [ + "OCR-D-SEG-LINE" + ], + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "padding": { + "type": "number", + "format": "integer", + "description": "extend detected region rectangles by this many (true) pixels", + "default": 4 + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols" + }, + "block_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles" + }, + "find_tables": { + "type": "boolean", + "default": true, + "description": "recognise tables as table regions (textord_tabfind_find_tables)" + }, + "find_staves": { + "type": "boolean", + "default": false, + "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves." + }, + "sparse_text": { + "type": "boolean", + "default": false, + "description": "use 'sparse text' page segmentation mode (find as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space" + } + } + }, + "ocrd-tesserocr-segment-region": { + "executable": "ocrd-tesserocr-segment-region", + "categories": [ + "Layout analysis" + ], + "description": "Segment page into regions with Tesseract", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-GT-SEG-PAGE" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK" + ], + "steps": [ + "layout/segmentation/region" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "overwrite_regions": { + "type": "boolean", + "default": true, + "description": "Remove existing layout and text annotation below the Page level (otherwise skip page; no incremental annotation yet)." + }, + "padding": { + "type": "number", + "format": "integer", + "description": "extend detected region rectangles by this many (true) pixels", + "default": 0 + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols" + }, + "crop_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles" + }, + "find_tables": { + "type": "boolean", + "default": true, + "description": "recognise tables as table regions (textord_tabfind_find_tables)" + }, + "find_staves": { + "type": "boolean", + "default": false, + "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves." + }, + "sparse_text": { + "type": "boolean", + "default": false, + "description": "use 'sparse text' page segmentation mode (find as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space" + } + } + }, + "ocrd-tesserocr-segment-table": { + "executable": "ocrd-tesserocr-segment-table", + "categories": [ + "Layout analysis" + ], + "description": "Segment table regions into cell text regions with Tesseract", + "input_file_grp": [ + "OCR-D-SEG-BLOCK", + "OCR-D-GT-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK" + ], + "steps": [ + "layout/segmentation/region" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "overwrite_cells": { + "type": "boolean", + "default": true, + "description": "Remove existing layout and text annotation below the TableRegion level (otherwise skip table; no incremental annotation yet)." + }, + "padding": { + "type": "number", + "format": "integer", + "description": "extend detected cell rectangles by this many (true) pixels", + "default": 0 + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols" + } + } + }, + "ocrd-tesserocr-segment-line": { + "executable": "ocrd-tesserocr-segment-line", + "categories": [ + "Layout analysis" + ], + "description": "Segment regions into lines with Tesseract", + "input_file_grp": [ + "OCR-D-SEG-BLOCK", + "OCR-D-GT-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-SEG-LINE" + ], + "steps": [ + "layout/segmentation/line" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "overwrite_lines": { + "type": "boolean", + "default": true, + "description": "Remove existing layout and text annotation below the TextRegion level (otherwise skip region; no incremental annotation yet)." + }, + "padding": { + "type": "number", + "format": "integer", + "description": "extend detected line rectangles by this many (true) pixels", + "default": 0 + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols" + } + } + }, + "ocrd-tesserocr-segment-word": { + "executable": "ocrd-tesserocr-segment-word", + "categories": [ + "Layout analysis" + ], + "description": "Segment lines into words with Tesseract", + "input_file_grp": [ + "OCR-D-SEG-LINE", + "OCR-D-GT-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-SEG-WORD" + ], + "steps": [ + "layout/segmentation/word" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "overwrite_words": { + "type": "boolean", + "default": true, + "description": "Remove existing layout and text annotation below the TextLine level (otherwise skip line; no incremental annotation yet)." + }, + "padding": { + "type": "number", + "format": "integer", + "description": "extend detected cell rectangles by this many (true) pixels", + "default": 0 + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols" + } + } + }, + "ocrd-tesserocr-crop": { + "executable": "ocrd-tesserocr-crop", + "categories": [ + "Image preprocessing" + ], + "description": "Poor man's cropping via region segmentation", + "input_file_grp": [ + "OCR-D-IMG" + ], + "output_file_grp": [ + "OCR-D-SEG-PAGE" + ], + "steps": [ + "preprocessing/optimization/cropping" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "padding": { + "type": "number", + "format": "integer", + "description": "extend detected border by this many (true) pixels on every side", + "default": 4 + } + } + }, + "ocrd-tesserocr-binarize": { + "executable": "ocrd-tesserocr-binarize", + "categories": [ + "Image preprocessing" + ], + "description": "Binarize regions or lines with Tesseract's global Otsu", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-BIN-BLOCK", + "OCR-D-BIN-LINE" + ], + "steps": [ + "preprocessing/optimization/binarization" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "operation_level": { + "type": "string", + "enum": [ + "page", + "region", + "line" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "tiseg": { + "type": "boolean", + "default": false, + "description": "also separate text vs image by detecting and suppressing photo+sepline mask" + } + } + }, + "ocrd-preprocess-image": { + "executable": "ocrd-preprocess-image", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization" + ], + "description": "Convert or enhance images", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" + ], + "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line", + "word", + "glyph" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "input_feature_selector": { + "type": "string", + "default": "", + "description": "comma-separated list of required image features (e.g. binarized,despeckled)" + }, + "input_feature_filter": { + "type": "string", + "default": "", + "description": "comma-separated list of forbidden image features (e.g. binarized,despeckled)" + }, + "output_feature_added": { + "type": "string", + "required": true, + "description": "image feature(s) to be added after this operation (if multiple, separate by comma)" + }, + "input_mimetype": { + "type": "string", + "default": "image/png", + "enum": [ + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" + ], + "description": "File format to save input images to (tool's expected input)" + }, + "output_mimetype": { + "type": "string", + "default": "image/png", + "enum": [ + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" + ], + "description": "File format to load output images from (tool's expected output)" + }, + "command": { + "type": "string", + "required": true, + "description": "shell command to operate on image files, with @INFILE as place-holder for the input file path, and @OUTFILE as place-holder for the output file path" + } + } + }, + "ocrd-skimage-binarize": { + "executable": "ocrd-skimage-binarize", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/binarization" + ], + "description": "Binarize images with Scikit-image", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-IMG-BIN", + "OCR-D-SEG-PAGE-BIN", + "OCR-D-SEG-REGION-BIN", + "OCR-D-SEG-LINE-BIN" + ], + "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line", + "word", + "glyph" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 + }, + "method": { + "type": "string", + "default": "sauvola", + "enum": [ + "sauvola", + "niblack", + "otsu", + "gauss", + "yen", + "li" + ], + "description": "Thresholding algorithm to use" + }, + "window_size": { + "type": "number", + "format": "integer", + "default": 0, + "description": "For Sauvola/Niblack/Gauss, the (odd) window size in pixels; when zero (default), set to DPI" + }, + "k": { + "type": "number", + "format": "float", + "default": 0.34, + "description": "For Sauvola/Niblack, formula parameter influencing the threshold bias; larger is lighter foreground" + } + } + }, + "ocrd-skimage-denoise-raw": { + "executable": "ocrd-skimage-denoise-raw", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/despeckling" + ], + "description": "Denoise raw images with Scikit-image", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-IMG-DEN", + "OCR-D-SEG-PAGE-DEN", + "OCR-D-SEG-REGION-DEN", + "OCR-D-SEG-LINE-DEN" + ], + "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line", + "word", + "glyph" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 + }, + "method": { + "type": "string", + "default": "VisuShrink", + "enum": [ + "BayesShrink", + "VisuShrink" + ], + "description": "Wavelet filtering scheme to use" + } + } + }, + "ocrd-skimage-denoise": { + "executable": "ocrd-skimage-denoise", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/despeckling" + ], + "description": "Denoise binarized images with Scikit-image", + "input_file_grp": [ + "OCR-D-IMG-BIN", + "OCR-D-SEG-PAGE-BIN", + "OCR-D-SEG-REGION-BIN", + "OCR-D-SEG-LINE-BIN" + ], + "output_file_grp": [ + "OCR-D-IMG-DEN", + "OCR-D-SEG-PAGE-DEN", + "OCR-D-SEG-REGION-DEN", + "OCR-D-SEG-LINE-DEN" + ], + "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line", + "word", + "glyph" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 + }, + "protect": { + "type": "number", + "format": "float", + "default": 0.0, + "description": "avoid removing fg specks near larger fg components by up to this distance in pt" + }, + "maxsize": { + "type": "number", + "format": "float", + "default": 1.0, + "description": "maximum component size of (bg holes or fg specks) noise in pt" + } + } + }, + "ocrd-skimage-normalize": { + "executable": "ocrd-skimage-normalize", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization" + ], + "description": "Equalize contrast/exposure of images with Scikit-image; stretches the color value/tone to the full dynamic range", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-IMG-NRM", + "OCR-D-SEG-PAGE-NRM", + "OCR-D-SEG-REGION-NRM", + "OCR-D-SEG-LINE-NRM" + ], + "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line", + "word", + "glyph" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 + }, + "black-point": { + "type": "number", + "format": "float", + "default": 1.0, + "description": "black point point in percent of luminance/value/tone histogram; up to ``black-point`` darkest pixels will be clipped to black when stretching" + }, + "white-point": { + "type": "number", + "format": "float", + "default": 7.0, + "description": "white point in percent of luminance/value/tone histogram; up to ``white-point`` brightest pixels will be clipped to white when stretching" + }, + "method": { + "type": "string", + "default": "stretch", + "enum": [ + "stretch", + "adapthist" + ], + "description": "contrast-enhancing transformation to use after clipping; ``stretch`` uses ``skimage.exposure.rescale_intensity`` (globally linearly stretching to full dynamic range) and ``adapthist`` uses ``skimage.exposure.equalize_adapthist`` (applying over tiles with context from 1/8th of the image's width)" + } + } + }, + "ocrd-sbb-binarize": { + "executable": "ocrd-sbb-binarize", + "description": "Pixelwise binarization with selectional auto-encoders in Keras", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/binarization" + ], + "input_file_grp": [], + "output_file_grp": [], + "parameters": { + "operation_level": { + "type": "string", + "enum": [ + "page", + "region" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "model": { + "description": "Directory containing HDF5 or SavedModel/ProtoBuf models. Can be an absolute path or a path relative to the OCR-D resource location, the current working directory or the $SBB_BINARIZE_DATA environment variable (if set)", + "type": "string", + "format": "uri", + "content-type": "text/directory", + "required": true + } + }, + "resources": [ + { + "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2020_01_16.zip", + "name": "default", + "type": "archive", + "path_in_archive": "saved_model_2020_01_16", + "size": 563147331, + "description": "default models provided by github.com/qurator-spk (SavedModel format)" + }, + { + "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip", + "name": "default-2021-03-09", + "type": "archive", + "path_in_archive": ".", + "size": 133230419, + "description": "updated default models provided by github.com/qurator-spk (SavedModel format)" + } + ] + }, + "ocrd-page-transform": { + "executable": "ocrd-page-transform", + "description": "apply arbitrary XSL transformation file for PAGE-XML", + "parameters": { + "xsl": { + "description": "File path of the XSL transformation script", + "type": "string", + "format": "uri", + "content-type": "text/xsl", + "required": true + }, + "xslt-params": { + "description": "Assignment of XSL transformation parameter values, given as in `xmlstarlet` (which differentiates between `-s name=value` for literal `value` and `-p name=value` for XPath expression `value`), white-space separated.", + "type": "string", + "default": "" + }, + "pretty-print": { + "description": "Reformat with line breaks and this many spaces of indentation after XSL transformation (unless zero).", + "type": "number", + "format": "integer", + "default": 0 + }, + "mimetype": { + "description": "MIME type to register the output files under (should correspond to `xsl` result)", + "type": "string", + "default": "application/vnd.prima.page+xml" + } + } + } +} diff --git a/run-network/odem-workflow-config.yaml b/run-network/odem-workflow-config.yaml new file mode 100644 index 00000000..d9b77c7e --- /dev/null +++ b/run-network/odem-workflow-config.yaml @@ -0,0 +1,27 @@ +processors: + - name: ocrd-cis-ocropy-binarize + image: ocrd/cis + - name: ocrd-cis-ocropy-denoise + image: ocrd/cis + - name: ocrd-cis-ocropy-deskew + image: ocrd/cis + - name: ocrd-cis-ocropy-clip + image: ocrd/cis + - name: ocrd-cis-ocropy-segment + image: ocrd/cis + - name: ocrd-cis-ocropy-dewarp + image: ocrd/cis + - name: ocrd-fileformat-transform + image: ocrd/fileformat + - name: ocrd-segment-repair + image: ocrd/segment + - name: ocrd-tesserocr-segment-region + image: ocrd/tesserocr + - name: ocrd-tesserocr-recognize + image: ocrd/tesserocr + - name: ocrd-anybaseocr-crop + image: ocrd/anybaseocr +environment: + # this folder contains the workspaces and must be created by the user + data_dir_host: /tmp/mydata + mtu: 1300