qurator-spk · kba · Oct 20, 2025 · Oct 20, 2025 · Oct 20, 2025 · Oct 20, 2025
diff --git a/Makefile b/Makefile
@@ -6,21 +6,23 @@ EXTRAS ?=
 DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest
 DOCKER_TAG ?= ocrd/eynollah
 DOCKER ?= docker
+WGET = wget -O
 
 #SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz
 #SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz
 # SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz
 #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
 #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
-SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
+#SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
+SEG_MODEL := https://zenodo.org/records/17295988/files/models_layout_v0_6_0.tar.gz?download=1
 SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL)))
 SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%)
 
-BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip
+BIN_MODEL := https://zenodo.org/records/17295988/files/models_binarization_v0_6_0.tar.gz?download=1
 BIN_MODELFILE = $(notdir $(BIN_MODEL))
 BIN_MODELNAME := default-2021-03-09
 
-OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1
+OCR_MODEL := https://zenodo.org/records/17295988/files/models_ocr_v0_6_0.tar.gz?download=1
 OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL)))
 OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%)
 
@@ -55,22 +57,21 @@ help:
 # END-EVAL
 
 
-# Download and extract models to $(PWD)/models_layout_v0_5_0
+# Download and extract models to $(PWD)/models_layout_v0_6_0
 models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
 
 # do not download these files if we already have the directories
 .INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE)
 
 $(BIN_MODELFILE):
-	wget -O $@ $(BIN_MODEL)
+	$(WGET) $@ $(BIN_MODEL)
 $(SEG_MODELFILE):
-	wget -O $@ $(SEG_MODEL)
+	$(WGET) $@ $(SEG_MODEL)
 $(OCR_MODELFILE):
-	wget -O $@ $(OCR_MODEL)
+	$(WGET) $@ $(OCR_MODEL)
 
 $(BIN_MODELNAME): $(BIN_MODELFILE)
-	mkdir $@
-	unzip -d $@ $<
+	tar zxf $<
 $(SEG_MODELNAME): $(SEG_MODELFILE)
 	tar zxf $<
 $(OCR_MODELNAME): $(OCR_MODELFILE)

diff --git a/README.md b/README.md
@@ -55,7 +55,7 @@ make install EXTRAS=OCR
 
 ## Models
 
-Pretrained models can be downloaded from [zenodo](https://zenodo.org/records/17194824) or [huggingface](https://huggingface.co/SBB?search_models=eynollah). 
+Pretrained models can be downloaded from [zenodo](https://doi.org/10.5281/zenodo.17194823) or [huggingface](https://huggingface.co/SBB?search_models=eynollah). 
 
 For documentation on models, have a look at [`models.md`](https://github.com/qurator-spk/eynollah/tree/main/docs/models.md). 
 Model cards are also provided for our trained models.
@@ -162,7 +162,7 @@ formally described in [`ocrd-tool.json`](https://github.com/qurator-spk/eynollah
 
 In this case, the source image file group with (preferably) RGB images should be used as input like this:
 
-    ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models eynollah_layout_v0_5_0
+    ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models eynollah_layout_v0_6_0
 
 If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows:
 - existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results)
@@ -174,7 +174,7 @@ If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynol
   (because some other preprocessing step was in effect like `denoised`), then
   the output PAGE-XML will be based on that as new top-level (`@imageFilename`)
 
-      ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models eynollah_layout_v0_5_0
+      ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models eynollah_layout_v0_6_0
 
 In general, it makes more sense to add other workflow steps **after** Eynollah.
 

diff --git a/requirements.txt b/requirements.txt
@@ -6,3 +6,4 @@ tensorflow < 2.13
 numba <= 0.58.1
 scikit-image
 biopython
+tabulate
diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
@@ -1,16 +1,24 @@
+from dataclasses import dataclass
 import sys
 import click
 import logging
+from typing import Tuple, List
 from ocrd_utils import initLogging, getLevelName, getLogger
-from eynollah.eynollah import Eynollah, Eynollah_ocr
+from eynollah.eynollah import Eynollah
+from eynollah.eynollah_ocr import Eynollah_ocr
 from eynollah.sbb_binarize import SbbBinarizer
 from eynollah.image_enhancer import Enhancer
 from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout
+from eynollah.model_zoo import EynollahModelZoo
+
+from .cli_models import models_cli
 
 @click.group()
 def main():
     pass
 
+main.add_command(models_cli, 'models')
+
 @main.command()
 @click.option(
     "--input",
@@ -79,18 +87,38 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
     type=click.Path(file_okay=True, dir_okay=True),
     required=True,
 )
+@click.option(
+    '-M',
+    '--mode',
+    type=click.Choice(['single', 'multi']),
+    default='single',
+    help="Whether to use the (faster) single-model binarization or the (slightly better) multi-model binarization"
+)
 @click.option(
     "--log_level",
     "-l",
     type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
     help="Override log level globally to this",
 )
-def binarization(patches, model_dir, input_image, dir_in, output, log_level):
+def binarization(
+    patches,
+    model_dir,
+    input_image,
+    mode,
+    dir_in,
+    output,
+    log_level,
+):
     assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
-    binarizer = SbbBinarizer(model_dir)
+    binarizer = SbbBinarizer(model_dir, mode=mode)
     if log_level:
         binarizer.log.setLevel(getLevelName(log_level))
-    binarizer.run(image_path=input_image, use_patches=patches, output=output, dir_in=dir_in)
+    binarizer.run(
+        image_path=input_image,
+        use_patches=patches,
+        output=output,
+        dir_in=dir_in
+    )
 
 
 @main.command()
@@ -198,15 +226,17 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
 @click.option(
     "--model",
     "-m",
+    'model_basedir',
     help="directory of models",
     type=click.Path(exists=True, file_okay=False),
+    # default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
     required=True,
 )
 @click.option(
     "--model_version",
     "-mv",
-    help="override default versions of model categories",
-    type=(str, str),
+    help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
+    type=(str, str, str),
     multiple=True,
 )
 @click.option(
@@ -380,7 +410,43 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
     help="Setup a basic console logger",
 )
 
-def layout(image, out, overwrite, dir_in, model, model_version, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging):
+def layout(
+    image,
+    out,
+    overwrite,
+    dir_in,
+    model_basedir,
+    model_version,
+    save_images,
+    save_layout,
+    save_deskewed,
+    save_all,
+    extract_only_images,
+    save_page,
+    enable_plotting,
+    allow_enhancement,
+    curved_line,
+    textline_light,
+    full_layout,
+    tables,
+    right2left,
+    input_binary,
+    allow_scaling,
+    headers_off,
+    light_version,
+    reading_order_machine_based,
+    do_ocr,
+    transformer_ocr,
+    batch_size_ocr,
+    num_col_upper,
+    num_col_lower,
+    threshold_art_class_textline,
+    threshold_art_class_layout,
+    skip_layout_and_reading_order,
+    ignore_page_extraction,
+    log_level,
+    setup_logging,
+):
     if setup_logging:
         console_handler = logging.StreamHandler(sys.stdout)
         console_handler.setLevel(logging.INFO)
@@ -410,8 +476,8 @@ def layout(image, out, overwrite, dir_in, model, model_version, save_images, sav
     assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
     assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
     eynollah = Eynollah(
-        model,
-        model_versions=model_version,
+        model_basedir,
+        model_overrides=model_version,
         extract_only_images=extract_only_images,
         enable_plotting=enable_plotting,
         allow_enhancement=allow_enhancement,

diff --git a/src/eynollah/cli_models.py b/src/eynollah/cli_models.py
@@ -0,0 +1,93 @@
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Set, Tuple
+import click
+
+from eynollah.model_zoo.default_specs import MODELS_VERSION
+from .model_zoo import EynollahModelZoo
+
+
+@dataclass()
+class EynollahCliCtx:
+    model_zoo: EynollahModelZoo
+
+
+@click.group()
+@click.pass_context
+@click.option(
+    "--model",
+    "-m",
+    'model_basedir',
+    help="directory of models",
+    type=click.Path(exists=True, file_okay=False),
+    # default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
+    required=True,
+)
+@click.option(
+    "--model-overrides",
+    "-mv",
+    help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
+    type=(str, str, str),
+    multiple=True,
+)
+def models_cli(
+    ctx,
+    model_basedir: str,
+    model_overrides: List[Tuple[str, str, str]],
+):
+    """
+    Organize models for the various runners in eynollah.
+    """
+    ctx.obj = EynollahCliCtx(model_zoo=EynollahModelZoo(basedir=model_basedir, model_overrides=model_overrides))
+
+
+@models_cli.command('list')
+@click.pass_context
+def list_models(
+    ctx,
+):
+    """
+    List all the models in the zoo
+    """
+    print(ctx.obj.model_zoo)
+
+
+@models_cli.command('package')
+@click.option(
+    '--set-version', '-V', 'version', help="Version to use for packaging", default=MODELS_VERSION, show_default=True
+)
+@click.argument('output_dir')
+@click.pass_context
+def package(
+    ctx,
+    version,
+    output_dir,
+):
+    """
+    Generate shell code to copy all the models in the zoo into properly named folders in OUTPUT_DIR for distribution.
+
+    eynollah models -m SRC package OUTPUT_DIR
+
+    SRC should contain a directory "models_eynollah" containing all the models.
+    """
+    mkdirs: Set[Path] = set([])
+    copies: Set[Tuple[Path, Path]] = set([])
+    for spec in ctx.obj.model_zoo.specs.specs:
+        # skip these as they are dependent on the ocr model
+        if spec.category in ('num_to_char', 'characters'):
+            continue
+        src: Path = ctx.obj.model_zoo.model_path(spec.category, spec.variant)
+        # Only copy the top-most directory relative to models_eynollah
+        while src.parent.name != 'models_eynollah':
+            src = src.parent
+        for dist in spec.dists:
+            dist_dir = Path(f"{output_dir}/models_{dist}_{version}/models_eynollah")
+            copies.add((src, dist_dir))
+            mkdirs.add(dist_dir)
+    for dir in mkdirs:
+        print(f"mkdir -p {dir}")
+    for (src, dst) in copies:
+        print(f"cp -r {src} {dst}")
+    for dir in mkdirs:
+        zip_path = Path(f'../{dir.parent.name}.zip')
+        print(f"(cd {dir}/..; zip -r {zip_path} models_eynollah)")