Skip to content


RecognizedText dataclass

Recognized text class

This class represents a result from a text recognition model.


Name Type Description
texts list[str]

A sequence of candidate texts

scores list[float]

The scores of the candidate texts

Source code in src/htrflow_core/
class RecognizedText:
    """Recognized text class

    This class represents a result from a text recognition model.

        texts: A sequence of candidate texts
        scores: The scores of the candidate texts

    texts: list[str]
    scores: list[float]

    def __post_init__(self):
        if not isinstance(self.texts, list):
            self.texts = [self.texts]
        if not isinstance(self.scores, list):
            self.scores = [self.scores]

    def top_candidate(self) -> str:
        """The candidate with the highest confidence score"""
        return self.texts[self.scores.index(self.top_score())]

    def top_score(self):
        """The highest confidence score"""
        return max(self.scores)


The candidate with the highest confidence score

Source code in src/htrflow_core/
def top_candidate(self) -> str:
    """The candidate with the highest confidence score"""
    return self.texts[self.scores.index(self.top_score())]


The highest confidence score

Source code in src/htrflow_core/
def top_score(self):
    """The highest confidence score"""
    return max(self.scores)


A result from an arbitrary model (or process)

One result instance corresponds to one input image.


Name Type Description

Metadata regarding the result, model-dependent.


Segment instances representing results from an object detection or instance segmentation model, or similar. May be empty if not applicable.


Any other data associated with the result, stored as a sequence of dictionaries. Is assumed to correspond one-to-one with segments when segments is non-empty. If segments is empty, the first entry in data is assumed to apply for the entire input image.

Source code in src/htrflow_core/
class Result:
    A result from an arbitrary model (or process)

    One result instance corresponds to one input image.

        metadata: Metadata regarding the result, model-dependent.
        segments: `Segment` instances representing results from an object
            detection or instance segmentation model, or similar. May
            be empty if not applicable.
        data: Any other data associated with the result, stored as a
            sequence of dictionaries. Is assumed to correspond one-to-one
            with `segments` when `segments` is non-empty. If `segments`
            is empty, the first entry in `data` is assumed to apply for
            the entire input image.

    def __init__(
        metadata: dict[str, str] | None = None,
        segments: Sequence[Segment] | None = None,
        data: Sequence[dict[str, Any]] | None = None,
        texts: Sequence[RecognizedText] | None = None,
        self.metadata = metadata or {}
        self.segments = segments or [] = []
        for entry, text in _zip_longest_none(data, texts, fillvalue={}):
   | {TEXT_RESULT_KEY: text})

    def rescale(self, factor: float):
        """Rescale the Result's segments"""
        for segment in self.segments:

    def bboxes(self) -> Sequence[Bbox]:
        """Bounding boxes relative to input image"""
        return [segment.bbox for segment in self.segments]

    def global_masks(self) -> Sequence[Mask | None]:
        """Global masks relative to input image"""
        return [segment.global_mask for segment in self.segments]

    def local_mask(self) -> Sequence[Mask | None]:
        """Local masks relative to bounding boxes"""
        return [segment.local_mask for segment in self.segments]

    def polygons(self) -> Sequence[Polygon | None]:
        """Polygons relative to input image"""
        return [segment.polygon for segment in self.segments]

    def class_labels(self) -> Sequence[str | None]:
        """Class labels of segments"""
        return [segment.class_label for segment in self.segments]

    def text_recognition_result(cls, metadata: dict[str, Any], texts: list[str], scores: list[float]) -> "Result":
        """Create a text recognition result

            metadata: Result metadata
            text: The recognized text

            A Result instance with the specified data and no segments.
        return cls(metadata, texts=[RecognizedText(texts, scores)])

    def segmentation_result(
        orig_shape: tuple[int, int],
        metadata: dict[str, Any],
        bboxes: Sequence[Bbox | Iterable[int]] | None = None,
        masks: Sequence[Mask] | None = None,
        polygons: Sequence[Polygon] | None = None,
        scores: Iterable[float] | None = None,
        labels: Iterable[str] | None = None,
    ) -> "Result":
        """Create a segmentation result

            image: The original image
            metadata: Result metadata
            segments: The segments

            A Result instance with the specified data and no texts.
        segments = []
        for item in _zip_longest_none(bboxes, masks, scores, labels, polygons):
            segments.append(Segment(*item, orig_shape=orig_shape))
        return cls(metadata, segments=segments)

    def reorder(self, index: Sequence[int]) -> None:
        """Reorder result

        Example: Given a `Result` with three segments s0, s1 and s2,
        index = [2, 0, 1] will put the segments in order [s2, s0, s1].
        Any indices not in `index` will be dropped from the result.

            index: A list of indices representing the new ordering.
        if self.segments:
            self.segments = [self.segments[i] for i in index]
   = [[i] for i in index]

    def drop_indices(self, index: Sequence[int]) -> None:
        """Drop segments from result

        Example: Given a `Result` with three segments s0, s1 and s2,
        index = [0, 2] will drop segments s0 and s2.

            index: Indices of segments to drop
        keep = [i for i in range(len(self.segments)) if i not in index]

    def filter(self, key: str, predicate: Callable[[Any], bool]) -> None:
        """Filter segments and data based on a predicate applied to a specified key.

            key: The key in the data dictionary to test the predicate against.
            predicate [Callable]: A function that takes a value associated with the key
            and returns True if the segment should be kept.

        >>> def remove_certain_text(text_results):
        >>>    return text_results != 'lorem'
        >>> result.filter('text_results', remove_certain_text)
        keep = [i for i, item in enumerate( if predicate(item.get(key, None))]

bboxes: Sequence[Bbox] property

Bounding boxes relative to input image

class_labels: Sequence[str | None] property

Class labels of segments

global_masks: Sequence[Mask | None] property

Global masks relative to input image

local_mask: Sequence[Mask | None] property

Local masks relative to bounding boxes

polygons: Sequence[Polygon | None] property

Polygons relative to input image


Drop segments from result

Example: Given a Result with three segments s0, s1 and s2, index = [0, 2] will drop segments s0 and s2.


Name Type Description Default
index Sequence[int]

Indices of segments to drop

Source code in src/htrflow_core/
def drop_indices(self, index: Sequence[int]) -> None:
    """Drop segments from result

    Example: Given a `Result` with three segments s0, s1 and s2,
    index = [0, 2] will drop segments s0 and s2.

        index: Indices of segments to drop
    keep = [i for i in range(len(self.segments)) if i not in index]

filter(key, predicate)

Filter segments and data based on a predicate applied to a specified key.


Name Type Description Default
key str

The key in the data dictionary to test the predicate against.

predicate [Callable]

A function that takes a value associated with the key



>>> def remove_certain_text(text_results):
>>>    return text_results != 'lorem'
>>> result.filter('text_results', remove_certain_text)

Source code in src/htrflow_core/
def filter(self, key: str, predicate: Callable[[Any], bool]) -> None:
    """Filter segments and data based on a predicate applied to a specified key.

        key: The key in the data dictionary to test the predicate against.
        predicate [Callable]: A function that takes a value associated with the key
        and returns True if the segment should be kept.

    >>> def remove_certain_text(text_results):
    >>>    return text_results != 'lorem'
    >>> result.filter('text_results', remove_certain_text)
    keep = [i for i, item in enumerate( if predicate(item.get(key, None))]


Reorder result

Example: Given a Result with three segments s0, s1 and s2, index = [2, 0, 1] will put the segments in order [s2, s0, s1]. Any indices not in index will be dropped from the result.


Name Type Description Default
index Sequence[int]

A list of indices representing the new ordering.

Source code in src/htrflow_core/
def reorder(self, index: Sequence[int]) -> None:
    """Reorder result

    Example: Given a `Result` with three segments s0, s1 and s2,
    index = [2, 0, 1] will put the segments in order [s2, s0, s1].
    Any indices not in `index` will be dropped from the result.

        index: A list of indices representing the new ordering.
    if self.segments:
        self.segments = [self.segments[i] for i in index]
    if = [[i] for i in index]


Rescale the Result's segments

Source code in src/htrflow_core/
def rescale(self, factor: float):
    """Rescale the Result's segments"""
    for segment in self.segments:

segmentation_result(orig_shape, metadata, bboxes=None, masks=None, polygons=None, scores=None, labels=None) classmethod

Create a segmentation result


Name Type Description Default

The original image

metadata dict[str, Any]

Result metadata


The segments



Type Description

A Result instance with the specified data and no texts.

Source code in src/htrflow_core/
def segmentation_result(
    orig_shape: tuple[int, int],
    metadata: dict[str, Any],
    bboxes: Sequence[Bbox | Iterable[int]] | None = None,
    masks: Sequence[Mask] | None = None,
    polygons: Sequence[Polygon] | None = None,
    scores: Iterable[float] | None = None,
    labels: Iterable[str] | None = None,
) -> "Result":
    """Create a segmentation result

        image: The original image
        metadata: Result metadata
        segments: The segments

        A Result instance with the specified data and no texts.
    segments = []
    for item in _zip_longest_none(bboxes, masks, scores, labels, polygons):
        segments.append(Segment(*item, orig_shape=orig_shape))
    return cls(metadata, segments=segments)

text_recognition_result(metadata, texts, scores) classmethod

Create a text recognition result


Name Type Description Default
metadata dict[str, Any]

Result metadata


The recognized text



Type Description

A Result instance with the specified data and no segments.

Source code in src/htrflow_core/
def text_recognition_result(cls, metadata: dict[str, Any], texts: list[str], scores: list[float]) -> "Result":
    """Create a text recognition result

        metadata: Result metadata
        text: The recognized text

        A Result instance with the specified data and no segments.
    return cls(metadata, texts=[RecognizedText(texts, scores)])


Segment class

Class representing a segment of an image, typically a result from a segmentation model or a detection model.


Name Type Description
bbox Bbox

The bounding box of the segment

mask Mask | None

The segment's mask, if available. The mask is stored relative to the bounding box. Use the global_mask() method to retrieve the mask relative to the original image.

score float | None

Segment confidence score, if available.

class_label str | None

Segment class label, if available.

polygon Polygon | None

An approximation of the segment mask, relative to the original image. If no mask is available, polygon defaults to a polygon representation of the segment's bounding box.

orig_shape tuple[int, int] | None

The shape of the orginal input image.

Source code in src/htrflow_core/
class Segment:
    """Segment class

    Class representing a segment of an image, typically a result from
    a segmentation model or a detection model.

        bbox: The bounding box of the segment
        mask: The segment's mask, if available. The mask is stored
            relative to the bounding box. Use the `global_mask()`
            method to retrieve the mask relative to the original image.
        score: Segment confidence score, if available.
        class_label: Segment class label, if available.
        polygon: An approximation of the segment mask, relative to the
            original image. If no mask is available, `polygon` defaults
            to a polygon representation of the segment's bounding box.
        orig_shape: The shape of the orginal input image.

    bbox: Bbox
    mask: Mask | None
    score: float | None
    class_label: str | None
    polygon: Polygon | None
    orig_shape: tuple[int, int] | None

    def __init__(
        bbox: tuple[int, int, int, int] | Bbox | None = None,
        mask: Mask | None = None,
        score: float | None = None,
        class_label: str | None = None,
        polygon: Polygon | Sequence[tuple[int, int]] | None = None,
        orig_shape: tuple[int, int] | None = None,
        """Create a `Segment` instance

        A segment can be created from a bounding box, a polygon, a mask
        or any combination of the three.

            bbox: The segment's bounding box, as either a `geometry.Bbox`
                instance or as a (xmin, ymin, xmax, ymax) tuple. Required
                if `mask` and `polygon` are None. Defaults to None.
            mask: The segment's mask relative to the original input image.
                Required if both `polygon` and `bbox` are None. Defaults
                to None.
            score: Segment confidence score. Defaults to None.
            class_label: Segment class label. Defaults to None.
            polygon: A polygon defining the segment, relative to the input
                image. Defaults to None. Required if both `mask` and `bbox`
                are None.
            orig_shape: The shape of the orginal input image. Defaults to
        if all(item is None for item in (bbox, mask, polygon)):
            raise ValueError("Cannot create a Segment without bbox, mask or polygon")

        # Mask (and possibly bbox) is given: The mask is assumed to be aligned
        # with the original image. The bounding box is discarded (if given) and
        # recomputed from the mask. A polygon is also inferred from the mask.
        # The mask is then converted to a local mask.
        if mask is not None:
            bbox = geometry.mask2bbox(mask)
            polygon = geometry.mask2polygon(mask)
            mask = imgproc.crop(mask, bbox)

        if polygon is not None:
            polygon = geometry.Polygon(polygon)

            # Use the polygon's bounding box if no other bounding box was provided
            if bbox is None:
                bbox = polygon.bbox()

        self.bbox = geometry.Bbox(*bbox)
        self.polygon = polygon
        self.mask = mask
        self.score = score
        self.class_label = class_label
        self.orig_shape = orig_shape

    def __str__(self):
        return f"Segment(class_label={self.class_label}, score={self.score}, bbox={self.bbox}, polygon={self.polygon}, mask={self.mask})"  # noqa: E501

    def global_mask(self, orig_shape: tuple[int, int] | None = None) -> Mask | None:
        The segment mask relative to the original input image.

            orig_shape: Pass this argument to use another original shape
                than the segment's `orig_shape` attribute. Defaults to None.
        if self.mask is None:
            return None

        orig_shape = self.orig_shape if orig_shape is None else orig_shape
        if orig_shape is None:
            raise ValueError("Cannot compute the global mask without knowing the original shape.")

        x1, y1, x2, y2 = self.bbox
        mask = np.zeros(orig_shape, dtype=np.uint8)
        mask[y1:y2, x1:x2] = self.mask
        return mask

    def approximate_mask(self, ratio: float) -> Mask | None:
        """A lower resolution version of the global mask

            ratio: Size of approximate mask relative to the original.
        global_mask = self.global_mask
        if global_mask is None:
            return None
        return imgproc.rescale(global_mask, ratio)

    def local_mask(self):
        """The segment mask relative to the bounding box (alias for self.mask)"""
        return self.mask

    def rescale(self, factor: float) -> None:
        """Rescale the segment's mask, bounding box and polygon by `factor`"""
        if self.mask is not None:
            self.mask = imgproc.rescale_linear(self.mask, factor)
        self.bbox = self.bbox.rescale(factor)
        if self.polygon is not None:
            self.polygon = self.polygon.rescale(factor)

global_mask: Mask | None property

The segment mask relative to the original input image.


Name Type Description Default

Pass this argument to use another original shape than the segment's orig_shape attribute. Defaults to None.


local_mask property

The segment mask relative to the bounding box (alias for self.mask)

__init__(bbox=None, mask=None, score=None, class_label=None, polygon=None, orig_shape=None)

Create a Segment instance

A segment can be created from a bounding box, a polygon, a mask or any combination of the three.


Name Type Description Default
bbox tuple[int, int, int, int] | Bbox | None

The segment's bounding box, as either a geometry.Bbox instance or as a (xmin, ymin, xmax, ymax) tuple. Required if mask and polygon are None. Defaults to None.

mask Mask | None

The segment's mask relative to the original input image. Required if both polygon and bbox are None. Defaults to None.

score float | None

Segment confidence score. Defaults to None.

class_label str | None

Segment class label. Defaults to None.

polygon Polygon | Sequence[tuple[int, int]] | None

A polygon defining the segment, relative to the input image. Defaults to None. Required if both mask and bbox are None.

orig_shape tuple[int, int] | None

The shape of the orginal input image. Defaults to None.

Source code in src/htrflow_core/
def __init__(
    bbox: tuple[int, int, int, int] | Bbox | None = None,
    mask: Mask | None = None,
    score: float | None = None,
    class_label: str | None = None,
    polygon: Polygon | Sequence[tuple[int, int]] | None = None,
    orig_shape: tuple[int, int] | None = None,
    """Create a `Segment` instance

    A segment can be created from a bounding box, a polygon, a mask
    or any combination of the three.

        bbox: The segment's bounding box, as either a `geometry.Bbox`
            instance or as a (xmin, ymin, xmax, ymax) tuple. Required
            if `mask` and `polygon` are None. Defaults to None.
        mask: The segment's mask relative to the original input image.
            Required if both `polygon` and `bbox` are None. Defaults
            to None.
        score: Segment confidence score. Defaults to None.
        class_label: Segment class label. Defaults to None.
        polygon: A polygon defining the segment, relative to the input
            image. Defaults to None. Required if both `mask` and `bbox`
            are None.
        orig_shape: The shape of the orginal input image. Defaults to
    if all(item is None for item in (bbox, mask, polygon)):
        raise ValueError("Cannot create a Segment without bbox, mask or polygon")

    # Mask (and possibly bbox) is given: The mask is assumed to be aligned
    # with the original image. The bounding box is discarded (if given) and
    # recomputed from the mask. A polygon is also inferred from the mask.
    # The mask is then converted to a local mask.
    if mask is not None:
        bbox = geometry.mask2bbox(mask)
        polygon = geometry.mask2polygon(mask)
        mask = imgproc.crop(mask, bbox)

    if polygon is not None:
        polygon = geometry.Polygon(polygon)

        # Use the polygon's bounding box if no other bounding box was provided
        if bbox is None:
            bbox = polygon.bbox()

    self.bbox = geometry.Bbox(*bbox)
    self.polygon = polygon
    self.mask = mask
    self.score = score
    self.class_label = class_label
    self.orig_shape = orig_shape


A lower resolution version of the global mask


Name Type Description Default
ratio float

Size of approximate mask relative to the original.

Source code in src/htrflow_core/
def approximate_mask(self, ratio: float) -> Mask | None:
    """A lower resolution version of the global mask

        ratio: Size of approximate mask relative to the original.
    global_mask = self.global_mask
    if global_mask is None:
        return None
    return imgproc.rescale(global_mask, ratio)


Rescale the segment's mask, bounding box and polygon by factor

Source code in src/htrflow_core/
def rescale(self, factor: float) -> None:
    """Rescale the segment's mask, bounding box and polygon by `factor`"""
    if self.mask is not None:
        self.mask = imgproc.rescale_linear(self.mask, factor)
    self.bbox = self.bbox.rescale(factor)
    if self.polygon is not None:
        self.polygon = self.polygon.rescale(factor)