MCP Tools

MCP tool and resource registrations exposed by the ra-mcp server.

Search Tools

`search_tool`

Search MCP tool for Riksarkivet transcribed documents.

Provides the search_transcribed tool with pagination and formatting helpers.

`register_search_tool(mcp)`

Register the search tools with the MCP server.

Source code in packages/search-mcp/src/ra_mcp_search_mcp/search_tool.py

def register_search_tool(mcp) -> None:
    """Register the search tools with the MCP server."""

    @mcp.tool(
        name="transcribed",
        version="1.0",
        timeout=30.0,
        tags={"search"},
        annotations={"readOnlyHint": True, "openWorldHint": True},
        description=(
            "Search AI-transcribed text in digitised historical documents from the Swedish National Archives. "
            "IMPORTANT: Transcriptions are AI-generated (HTR/OCR) and contain recognition errors — "
            "always use fuzzy search (~) to compensate for misread characters and increase hits.\n"
            'Supports Solr syntax: wildcards (troll*), fuzzy (stockholm~1), Boolean ((A AND B)), proximity ("term1 term2"~10). '
            "Always group Boolean queries with outer parentheses. Use fuzzy (~) for OCR/HTR errors and old Swedish variants (präst/prest, silver/silfver).\n"
            "Paginate with offset (0, 50, 100...). Session dedup: re-calling returns stubs for already-seen documents."
        ),
    )
    async def search_transcribed(
        keyword: Annotated[
            str, Field(description='Search term or Solr query. Supports wildcards (*), fuzzy (~), Boolean (AND/OR/NOT), proximity ("term1 term2"~N).')
        ],
        offset: Annotated[int, Field(description="Pagination start position. Use 0 for first page, then 50, 100, etc.")],
        limit: Annotated[int, Field(description="Maximum documents to return per query.")] = 25,
        max_snippets_per_record: Annotated[int, Field(description="Maximum matching pages shown per document.")] = 3,
        max_response_tokens: Annotated[int, Field(description="Maximum tokens in response.")] = 15000,
        sort: Annotated[str, Field(description="Sort order: 'relevance', 'timeAsc', 'timeDesc', 'alphaAsc', 'alphaDesc'.")] = "relevance",
        year_min: Annotated[int | None, Field(description="Start year filter (e.g. 1700).")] = None,
        year_max: Annotated[int | None, Field(description="End year filter (e.g. 1750).")] = None,
        dedup: Annotated[bool, Field(description="Session deduplication. True compacts already-seen documents; False forces full results.")] = True,
        research_context: Annotated[str | None, Field(description="Brief summary of the user's research goal. Used for telemetry only.")] = None,
        ctx: Context | None = None,
    ) -> str:
        """Search AI-transcribed text in digitised historical documents.

        This tool searches only transcribed text (not metadata).
        For metadata search, use search_metadata instead.
        """
        validation_error = _validate_search_input(keyword, offset, year_min, year_max, sort, limit)
        if validation_error:
            return validation_error

        if research_context:
            logger.info("MCP Tool: search_transcribed | context: %s", research_context)
        logger.info("MCP Tool: search_transcribed called with keyword='%s', offset=%d", keyword, offset)

        try:
            logger.debug("Initializing search operations...")
            search_operations = SearchOperations(http_client=default_http_client)
            formatter = PlainTextFormatter()

            logger.info("Executing transcribed text search for '%s'...", keyword)
            session_id = ctx.session_id if ctx is not None else None
            search_result = await search_operations.search(
                keyword=keyword,
                transcribed_only=True,  # Always search transcribed text
                only_digitised=True,  # Transcriptions only exist for digitised materials
                offset=offset,
                limit=limit,
                max_snippets_per_record=max_snippets_per_record,
                sort=sort,
                year_min=year_min,
                year_max=year_max,
                research_context=research_context,
                session_id=session_id,
            )

            # Load session state for dedup
            seen: dict[str, list[int]] | None = None
            if dedup and ctx is not None:
                seen = await ctx.get_state("seen_search") or {}
                logger.info("[search_transcribed] Dedup state loaded: %d documents previously seen", len(seen))

            logger.info("Formatting %d search results...", len(search_result.items))
            formatted_results = formatter.format_search_results(
                search_result,
                maximum_documents_to_display=limit,
                seen_pages=seen,
            )

            # Update session state with only the documents actually scanned by the formatter
            if dedup and ctx is not None:
                updated = _update_seen_search_state(seen or {}, search_result, max_displayed=formatter.items_scanned)
                await ctx.set_state("seen_search", updated)
                logger.info("[search_transcribed] Dedup state saved: %d documents now tracked", len(updated))

            formatted_results = _apply_token_limit_if_needed(formatted_results, max_response_tokens)
            formatted_results = _append_pagination_info_if_needed(formatted_results, search_result, offset, limit)

            logger.info("✓ Search completed successfully, returning results")
            return formatted_results

        except Exception as e:
            logger.error("✗ MCP search_transcribed failed: %s: %s", type(e).__name__, e, exc_info=True)
            formatter = PlainTextFormatter()
            return formatter.format_error_message(
                f"Search failed: {e!s}",
                error_suggestions=[
                    "Try a simpler search term",
                    "Check if the service is available",
                    "Reduce limit",
                    "Check Hugging Face logs for timeout details",
                ],
            )

    @mcp.tool(
        name="metadata",
        version="1.0",
        timeout=30.0,
        tags={"search"},
        annotations={"readOnlyHint": True, "openWorldHint": True},
        description=(
            "Search document metadata (titles, names, places, descriptions) across the Swedish National Archives catalog. "
            "Covers 2M+ records when only_digitised=False, including non-digitised materials. "
            "Use the dedicated name parameter for person searches and place parameter for place searches — these can be combined with keyword.\n"
            "Does NOT search transcribed page text — use search_transcribed for that. "
            "Same Solr syntax as search_transcribed. Session dedup: re-calling returns stubs for already-seen documents.\n"
            "Important: name and place filter a dedicated metadata field that is sparsely populated. "
            "Most person/place matches are NOT digitised, so set only_digitised=False when using name or place to avoid empty results."
        ),
    )
    async def search_metadata(
        keyword: Annotated[str, Field(description="Free-text search across all metadata fields. Supports Solr syntax (wildcards, fuzzy, Boolean).")],
        offset: Annotated[int, Field(description="Pagination start position. Use 0 for first page, then 50, 100, etc.")],
        only_digitised: Annotated[bool, Field(description="True = digitised materials only. False = all 2M+ records including non-digitised.")] = True,
        limit: Annotated[int, Field(description="Maximum documents to return per query.")] = 25,
        max_response_tokens: Annotated[int, Field(description="Maximum tokens in response.")] = 15000,
        sort: Annotated[str, Field(description="Sort order: 'relevance', 'timeAsc', 'timeDesc', 'alphaAsc', 'alphaDesc'.")] = "relevance",
        year_min: Annotated[int | None, Field(description="Start year filter (e.g. 1700).")] = None,
        year_max: Annotated[int | None, Field(description="End year filter (e.g. 1750).")] = None,
        name: Annotated[
            str | None,
            Field(
                description="Person name search in dedicated name field (e.g. 'Nobel', 'Linné'). Combinable with keyword and place. Most name matches are non-digitised — set only_digitised=False."
            ),
        ] = None,
        place: Annotated[
            str | None,
            Field(
                description="Place name search in dedicated place field (e.g. 'Stockholm', 'Göteborg'). Combinable with keyword and name. Most place matches are non-digitised — set only_digitised=False."
            ),
        ] = None,
        dedup: Annotated[bool, Field(description="Session deduplication. True compacts already-seen documents; False forces full results.")] = True,
        research_context: Annotated[str | None, Field(description="Brief summary of the user's research goal. Used for telemetry only.")] = None,
        ctx: Context | None = None,
    ) -> str:
        """Search document metadata (titles, names, places, provenance).

        This tool searches metadata fields, not transcribed text.
        For transcription search, use search_transcribed instead.
        """
        validation_error = _validate_search_input(keyword, offset, year_min, year_max, sort, limit)
        if validation_error:
            return validation_error

        if research_context:
            logger.info("MCP Tool: search_metadata | context: %s", research_context)
        material_scope = "digitised materials" if only_digitised else "all materials (2M+ records)"
        logger.info("MCP Tool: search_metadata called with keyword='%s', offset=%d, scope=%s", keyword, offset, material_scope)

        try:
            logger.debug("Initializing search operations...")
            search_operations = SearchOperations(http_client=default_http_client)
            formatter = PlainTextFormatter()

            logger.info("Executing metadata search for '%s' in %s...", keyword, material_scope)
            session_id = ctx.session_id if ctx is not None else None
            search_result = await search_operations.search(
                keyword=keyword,
                transcribed_only=False,  # Search metadata fields
                only_digitised=only_digitised,
                offset=offset,
                limit=limit,
                max_snippets_per_record=None,  # Metadata search doesn't have snippets
                sort=sort,
                year_min=year_min,
                year_max=year_max,
                name=name,
                place=place,
                research_context=research_context,
                session_id=session_id,
            )

            # Load session state for dedup
            seen: dict[str, list[int]] | None = None
            if dedup and ctx is not None:
                seen = await ctx.get_state("seen_search") or {}
                logger.info("[search_metadata] Dedup state loaded: %d documents previously seen", len(seen))

            logger.info("Formatting %d search results...", len(search_result.items))
            formatted_results = formatter.format_search_results(
                search_result,
                maximum_documents_to_display=limit,
                seen_pages=seen,
            )

            # Update session state with only the documents actually scanned by the formatter
            if dedup and ctx is not None:
                updated = _update_seen_search_state(seen or {}, search_result, max_displayed=formatter.items_scanned)
                await ctx.set_state("seen_search", updated)
                logger.info("[search_metadata] Dedup state saved: %d documents now tracked", len(updated))

            formatted_results = _apply_token_limit_if_needed(formatted_results, max_response_tokens)
            formatted_results = _append_pagination_info_if_needed(formatted_results, search_result, offset, limit)

            logger.info("✓ Metadata search completed successfully, returning results")
            return formatted_results

        except Exception as e:
            logger.error("✗ MCP search_metadata failed: %s: %s", type(e).__name__, e, exc_info=True)
            formatter = PlainTextFormatter()
            return formatter.format_error_message(
                f"Metadata search failed: {e!s}",
                error_suggestions=[
                    "Try a simpler search term",
                    "Check if the service is available",
                    "Reduce limit",
                    "Try with only_digitised=True for faster results",
                ],
            )

Formatter

`PlainTextFormatter`

Formatter that produces plain text without any Rich markup.

`format_browse_results(browse_result, highlight_term=None, show_links=False, show_success_message=True)`

Format browse results as plain text with emojis for MCP/LLM consumption.

Parameters:

Name	Type	Description	Default
`browse_result`		BrowseResult containing page contexts and metadata	required
`highlight_term`		Optional term to highlight in text	`None`
`show_links`	`bool`	Whether to show ALTO/Image/Bildvisning links	`False`
`show_success_message`	`bool`	Whether to show success message (ignored in plain text)	`True`

Returns:

Type	Description
`str`	Formatted plain text browse results

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py

def format_browse_results(
    self,
    browse_result,
    highlight_term=None,
    show_links: bool = False,
    show_success_message: bool = True,
) -> str:
    """
    Format browse results as plain text with emojis for MCP/LLM consumption.

    Args:
        browse_result: BrowseResult containing page contexts and metadata
        highlight_term: Optional term to highlight in text
        show_links: Whether to show ALTO/Image/Bildvisning links
        show_success_message: Whether to show success message (ignored in plain text)

    Returns:
        Formatted plain text browse results
    """
    if not browse_result.contexts:
        return f"No page contexts found for {browse_result.reference_code}"

    lines = []
    lines.append(f"📚 Document: {browse_result.reference_code}")

    # Add OAI-PMH metadata if available
    if browse_result.oai_metadata:
        metadata = browse_result.oai_metadata

        # Display title
        if metadata.title and metadata.title != "(No title)":
            lines.append(f"📋 Title: {metadata.title}")

        # Display repository
        if metadata.repository:
            lines.append(f"🏛️  Repository: {metadata.repository}")

        # Display unitid
        if metadata.unitid and metadata.unitid != browse_result.reference_code:
            lines.append(f"🔖 Unit ID: {metadata.unitid}")

        # Display NAD link
        if metadata.nad_link:
            lines.append(f"🔗 NAD Link: {metadata.nad_link}")

    lines.append(f"📖 Pages loaded: {len(browse_result.contexts)}")
    lines.append("")

    for context in browse_result.contexts:
        lines.append(f"📄 Page {context.page_number}")
        lines.append("─" * 40)

        display_text = context.full_text
        if highlight_term:
            display_text = self.highlight_search_keyword(display_text, highlight_term)

        lines.append(display_text)
        lines.append("")
        lines.append("🔗 Links:")
        lines.append(f"  📝 ALTO XML: {context.alto_url}")
        if context.image_url:
            lines.append(f"  🖼️  Image: {context.image_url}")
        if context.bildvisning_url:
            lines.append(f"  👁️  Bildvisning: {context.bildvisning_url}")

        lines.append("")

    return "\n".join(lines)

`format_error_message(error_message, error_suggestions=None)`

Format an error message with optional suggestions.

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py

def format_error_message(self, error_message: str, error_suggestions: list[str] | None = None) -> str:
    """Format an error message with optional suggestions."""
    return format_error_message(error_message, error_suggestions)

`format_no_results_message(search_result)`

Generate appropriate message when no results are found.

Parameters:

Name	Type	Description	Default
`search_result`		SearchResult containing keyword, offset, and total_hits	required

Returns:

Type	Description
`str`	Formatted no results message

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py

def format_no_results_message(self, search_result) -> str:
    """
    Generate appropriate message when no results are found.

    Args:
        search_result: SearchResult containing keyword, offset, and total_hits

    Returns:
        Formatted no results message
    """
    if search_result.offset > 0:
        return f"No more results found for '{search_result.keyword}' at offset {search_result.offset}. Total results: {search_result.total_hits}"
    return f"No results found for '{search_result.keyword}'. make sure to use \"\" "

`format_panel(panel_content, panel_title='', panel_border_style='')`

Format content as plain text without panels or borders.

Parameters:

Name	Type	Description	Default
`panel_content`	`str`	Content to display	required
`panel_title`	`str`	Optional title	`''`
`panel_border_style`	`str`	Ignored in plain text mode	`''`

Returns:

Type	Description
`str`	Plain text formatted content

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py

def format_panel(self, panel_content: str, panel_title: str = "", panel_border_style: str = "") -> str:
    """
    Format content as plain text without panels or borders.

    Args:
        panel_content: Content to display
        panel_title: Optional title
        panel_border_style: Ignored in plain text mode

    Returns:
        Plain text formatted content
    """
    formatted_lines = []
    if panel_title:
        formatted_lines.append(panel_title)
        formatted_lines.append("")
    formatted_lines.append(panel_content)
    return "\n".join(formatted_lines)

`format_search_results(search_result, maximum_documents_to_display=20, seen_pages=None)`

Format search results as plain text with emojis for MCP/LLM consumption.

Parameters:

Name	Type	Description	Default
`search_result`		SearchResult containing documents and metadata	required
`maximum_documents_to_display`	`int`	Maximum number of documents to display	`20`
`seen_pages`	`dict[str, list[int]] \| None`	Optional dict mapping reference_code to list of already-seen page numbers. When provided, documents/snippets that were already shown are compacted or skipped.	`None`

Returns:

Type	Description
`str`	Formatted plain text search results

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py

def format_search_results(
    self,
    search_result,
    maximum_documents_to_display: int = 20,
    seen_pages: dict[str, list[int]] | None = None,
) -> str:
    """
    Format search results as plain text with emojis for MCP/LLM consumption.

    Args:
        search_result: SearchResult containing documents and metadata
        maximum_documents_to_display: Maximum number of documents to display
        seen_pages: Optional dict mapping reference_code to list of already-seen page numbers.
                    When provided, documents/snippets that were already shown are compacted or skipped.

    Returns:
        Formatted plain text search results
    """
    if not search_result.items:
        self.items_scanned = 0
        return self.format_no_results_message(search_result)

    lines = []
    snippet_count = search_result.count_snippets()
    skipped_count = 0
    displayed_count = 0
    items_scanned = 0

    # Show "100+" if we hit the max limit, indicating more are available
    document_count = len(search_result.items)
    if document_count >= search_result.limit:
        document_display = f"{document_count}+"
    else:
        document_display = str(document_count)

    # Different summary for metadata search vs transcribed search
    if snippet_count > 0:
        lines.append(f"Found {snippet_count} page-level hits across {document_display} volumes")
    else:
        lines.append(f"Found {document_display} volumes matching metadata")
    lines.append("")

    # Iterate all items — skipped (deduped) docs don't count against the display limit
    for idx, document in enumerate(search_result.items):
        if displayed_count >= maximum_documents_to_display:
            break
        items_scanned = idx + 1

        has_snippets = document.transcribed_text and document.transcribed_text.snippets
        if snippet_count > 0 and not has_snippets:
            continue

        ref_code = document.metadata.reference_code

        # --- Dedup logic ---
        if seen_pages is not None and ref_code in seen_pages:
            prev_page_nums = set(seen_pages.get(ref_code, []))

            if has_snippets:
                new_snippets = [s for s in document.transcribed_text.snippets if any(page_id_to_number(p.id) not in prev_page_nums for p in s.pages)]
                if not new_snippets:
                    skipped_count += 1
                    continue
                lines.append(f"📚 Document: {ref_code} (previously shown — new pages only)")
                self._format_compact_snippets(lines, new_snippets, search_result.keyword)
                lines.append("")
                displayed_count += 1
                continue
            skipped_count += 1
            continue

        # --- Full rendering ---
        displayed_count += 1
        self._format_document_header(lines, document)

        if not has_snippets:
            self._format_metadata_fields(lines, document)

        if has_snippets:
            self._format_document_snippets(lines, document, search_result.keyword)

        lines.append("")

    if skipped_count > 0:
        lines.append(f"({skipped_count} previously shown document(s) omitted)")
        lines.append("")

    # Track how many items were scanned so the caller can limit state updates
    self.items_scanned = items_scanned

    total_remaining = len(search_result.items) - items_scanned
    if total_remaining > 0:
        lines.append(f"... and {total_remaining} more documents")

    lines.append("")
    lines.append("Tip: Use browse_document to read full pages of interesting results.")

    return "\n".join(lines)

`format_table(column_headers, table_rows, table_title='')`

Create a plain text table without borders.

Parameters:

Name	Type	Description	Default
`column_headers`	`list[str]`	List of column headers	required
`table_rows`	`list[list[str]]`	List of row data	required
`table_title`	`str`	Optional table title	`''`

Returns:

Type	Description
`str`	Plain text formatted table

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py

def format_table(
    self,
    column_headers: list[str],
    table_rows: list[list[str]],
    table_title: str = "",
) -> str:
    """
    Create a plain text table without borders.

    Args:
        column_headers: List of column headers
        table_rows: List of row data
        table_title: Optional table title

    Returns:
        Plain text formatted table
    """
    formatted_lines = []
    if table_title:
        formatted_lines.append(table_title)
        formatted_lines.append("")

    # Calculate column widths
    all_table_rows = [column_headers, *table_rows]
    column_widths = [max(len(str(row[column_index])) for row in all_table_rows) for column_index in range(len(column_headers))]

    # Format header
    formatted_header = " | ".join(column_headers[column_index].ljust(column_widths[column_index]) for column_index in range(len(column_headers)))
    formatted_lines.append(formatted_header)

    # Add simple separator
    formatted_lines.append("-" * len(formatted_header))

    # Format rows
    for data_row in table_rows:
        formatted_row = " | ".join(str(data_row[column_index]).ljust(column_widths[column_index]) for column_index in range(len(data_row)))
        formatted_lines.append(formatted_row)

    return "\n".join(formatted_lines)

`format_text(text_content, style_name='')`

Return plain text without any styling.

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py

def format_text(self, text_content: str, style_name: str = "") -> str:
    """Return plain text without any styling."""
    return text_content

`highlight_search_keyword(text_content, search_keyword)`

Highlight search keywords using markdown-style bold.

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py

def highlight_search_keyword(self, text_content: str, search_keyword: str) -> str:
    """Highlight search keywords using markdown-style bold."""
    return highlight_keyword_markdown(text_content, search_keyword)

Browse Tool

`browse_tool`

Browse MCP tool for Riksarkivet document pages.

Provides the browse_document tool for viewing full page transcriptions.

`register_browse_tool(mcp)`

Register the browse tool with the MCP server.

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/browse_tool.py

def register_browse_tool(mcp) -> None:
    """Register the browse tool with the MCP server."""

    @mcp.tool(
        name="document",
        version="1.0",
        timeout=60.0,
        tags={"browse"},
        annotations={"readOnlyHint": True, "openWorldHint": True},
        description=(
            "View full page transcriptions of a document by reference code. Use reference codes from search results. "
            "Returns original text (usually Swedish), links to bildvisaren (image viewer), and ALTO XML.\n"
            "Blank pages are normal (digitised but no text). Non-digitised materials return metadata only. "
            "Session dedup: re-browsing same pages returns stubs. Set dedup=False to force full text.\n"
            "TOKEN COST: ~300 tokens overhead per response + ~200-1500 tokens per page depending on content density. "
            "Dense court protocol pages average ~1000 tokens each; title/cover pages ~300. "
            "Request only the pages you need — start with 3-5 pages and paginate."
        ),
    )
    async def browse_document(
        reference_code: Annotated[str, Field(description="Document reference code from search results (e.g. 'SE/RA/420422/01').")],
        pages: Annotated[str, Field(description="Page specification: single ('5'), range ('1-10'), or comma-separated ('5,7,9').")],
        highlight_term: Annotated[str | None, Field(description="Optional keyword to highlight in the transcription.")] = None,
        max_pages: Annotated[int, Field(description="Maximum pages to retrieve.", le=20)] = 20,
        dedup: Annotated[bool, Field(description="Session deduplication. True replaces already-shown pages with stubs; False forces full text.")] = True,
        research_context: Annotated[str | None, Field(description="Brief summary of the user's research goal. Used for telemetry only.")] = None,
        ctx: Context | None = None,
    ) -> str:
        """
        Browse specific pages of a document by reference code.

        Returns:
        - Full transcribed text for each requested page
        - Optional keyword highlighting
        - Direct links to images and ALTO XML

        Examples:
        - browse_document("SE/RA/420422/01", "5") - View page 5
        - browse_document("SE/RA/420422/01", "1-10") - View pages 1 through 10
        - browse_document("SE/RA/420422/01", "5,7,9", highlight_term="Stockholm") - View specific pages with highlighting
        """
        # Input validation
        if not reference_code or not reference_code.strip():
            return format_error_message("reference_code must not be empty", error_suggestions=["Provide a document reference code, e.g. 'SE/RA/420422/01'"])
        if not pages or not pages.strip():
            return format_error_message("pages must not be empty", error_suggestions=["Specify pages like '1-5', '1,3,5', or '7'"])

        if research_context:
            logger.info("MCP Tool: browse_document | context: %s", research_context)

        try:
            browse_operations = BrowseOperations(http_client=default_http_client)
            formatter = PlainTextFormatter()

            session_id = ctx.session_id if ctx is not None else None
            browse_result = await _fetch_document_pages(
                browse_operations,
                reference_code=reference_code,
                pages=pages,
                highlight_term=highlight_term,
                max_pages=max_pages,
                research_context=research_context,
                session_id=session_id,
            )

            # Load session state for dedup
            seen_page_numbers: set[int] | None = None
            if dedup and ctx is not None:
                seen_browse: dict[str, list[int]] = await ctx.get_state("seen_browse") or {}
                seen_page_numbers = set(seen_browse.get(reference_code, []))
                logger.info(
                    "[browse] Dedup state loaded: %d documents tracked, %d pages previously seen for %s",
                    len(seen_browse),
                    len(seen_page_numbers),
                    reference_code,
                )

            if not browse_result.contexts:
                # Check if we have metadata to display for non-digitised materials
                if browse_result.oai_metadata:
                    return formatter.format_browse_results(browse_result, highlight_term, seen_page_numbers=seen_page_numbers)
                return _generate_no_pages_found_message(reference_code)

            result = formatter.format_browse_results(browse_result, highlight_term, seen_page_numbers=seen_page_numbers)

            # Update session state with newly shown pages
            if dedup and ctx is not None:
                all_pages = set(seen_browse.get(reference_code, []))
                for context in browse_result.contexts:
                    all_pages.add(context.page_number)
                seen_browse[reference_code] = sorted(all_pages)
                await ctx.set_state("seen_browse", seen_browse)
                logger.info("[browse] Dedup state saved: %s now has %d pages tracked", reference_code, len(all_pages))

            return result

        except Exception as e:
            logger.error("MCP browse_document failed: %s: %s", type(e).__name__, e, exc_info=True)
            return format_error_message(
                f"Browse failed: {e!s}",
                error_suggestions=[
                    "Check the reference code format",
                    "Verify page numbers are valid",
                    "Try with fewer pages",
                ],
            )

Formatter

`PlainTextFormatter`

Formatter that produces plain text without any Rich markup.

`format_browse_results(browse_result, highlight_term=None, show_links=False, show_success_message=True, seen_page_numbers=None)`

Format browse results as plain text with emojis for MCP/LLM consumption.

Parameters:

Name	Type	Description	Default
`browse_result`		BrowseResult containing page contexts and metadata	required
`highlight_term`		Optional term to highlight in text	`None`
`show_links`	`bool`	Whether to show ALTO/Image/Bildvisning links	`False`
`show_success_message`	`bool`	Whether to show success message (ignored in plain text)	`True`
`seen_page_numbers`	`set[int] \| None`	Optional set of page numbers already shown in this session. When provided, previously-shown pages get a one-liner stub.	`None`

Returns:

Type	Description
`str`	Formatted plain text browse results

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py

def format_browse_results(
    self,
    browse_result,
    highlight_term=None,
    show_links: bool = False,
    show_success_message: bool = True,
    seen_page_numbers: set[int] | None = None,
) -> str:
    """
    Format browse results as plain text with emojis for MCP/LLM consumption.

    Args:
        browse_result: BrowseResult containing page contexts and metadata
        highlight_term: Optional term to highlight in text
        show_links: Whether to show ALTO/Image/Bildvisning links
        show_success_message: Whether to show success message (ignored in plain text)
        seen_page_numbers: Optional set of page numbers already shown in this session.
                           When provided, previously-shown pages get a one-liner stub.

    Returns:
        Formatted plain text browse results
    """
    lines: list[str] = []

    if self._format_non_digitised_metadata(lines, browse_result):
        return "\n".join(lines)

    if not browse_result.contexts:
        return f"No page contexts found for {browse_result.reference_code}"

    seen = seen_page_numbers or set()

    lines.append(f"📚 Document: {browse_result.reference_code}")

    if browse_result.oai_metadata:
        self._format_oai_metadata(lines, browse_result.oai_metadata, browse_result.reference_code)

    # Summary line with dedup info
    new_count = sum(1 for c in browse_result.contexts if c.page_number not in seen)
    reseen_count = len(browse_result.contexts) - new_count
    if seen and reseen_count > 0:
        lines.append(f"📖 Pages loaded: {len(browse_result.contexts)} ({new_count} new, {reseen_count} previously shown)")
    else:
        lines.append(f"📖 Pages loaded: {len(browse_result.contexts)}")
    lines.append("")

    for context in browse_result.contexts:
        if context.page_number in seen:
            lines.append(f"📄 Page {context.page_number} (previously shown in this session)")
            lines.append("")
            continue

        lines.append(f"📄 Page {context.page_number}")
        lines.append("─" * 40)

        if context.full_text.strip():
            display_text = context.full_text
            if highlight_term:
                display_text = self.highlight_search_keyword(display_text, highlight_term)
            lines.append(display_text)
        else:
            lines.append("(Empty page - no transcribed text)")

        lines.append("")
        lines.append("🔗 Links:")
        lines.append(f"  📝 ALTO XML: {context.alto_url}")
        if context.image_url:
            lines.append(f"  🖼️  Image: {context.image_url}")
        if context.bildvisning_url:
            lines.append(f"  👁️  Bildvisning: {context.bildvisning_url}")

        lines.append("")

    lines.append(
        "Tip: Present the original text (quoted), provide a translation in the user's language, and include the bildvisaren link. Note uncertain readings."
    )

    return "\n".join(lines)

`format_no_results_message(search_result)`

Generate appropriate message when no results are found.

Parameters:

Name	Type	Description	Default
`search_result`		SearchResult containing keyword, offset, and total_hits	required

Returns:

Type	Description
`str`	Formatted no results message

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py

def format_no_results_message(self, search_result) -> str:
    """
    Generate appropriate message when no results are found.

    Args:
        search_result: SearchResult containing keyword, offset, and total_hits

    Returns:
        Formatted no results message
    """
    if search_result.offset > 0:
        return f"No more results found for '{search_result.keyword}' at offset {search_result.offset}. Total results: {search_result.total_hits}"
    return f"No results found for '{search_result.keyword}'. make sure to use \"\" "

`format_panel(panel_content, panel_title='', panel_border_style='')`

Format content as plain text without panels or borders.

Parameters:

Name	Type	Description	Default
`panel_content`	`str`	Content to display	required
`panel_title`	`str`	Optional title	`''`
`panel_border_style`	`str`	Ignored in plain text mode	`''`

Returns:

Type	Description
`str`	Plain text formatted content

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py

def format_panel(self, panel_content: str, panel_title: str = "", panel_border_style: str = "") -> str:
    """
    Format content as plain text without panels or borders.

    Args:
        panel_content: Content to display
        panel_title: Optional title
        panel_border_style: Ignored in plain text mode

    Returns:
        Plain text formatted content
    """
    formatted_lines = []
    if panel_title:
        formatted_lines.append(panel_title)
        formatted_lines.append("")
    formatted_lines.append(panel_content)
    return "\n".join(formatted_lines)

`format_search_results(search_result, maximum_documents_to_display=20)`

Format search results as plain text with emojis for MCP/LLM consumption.

Parameters:

Name	Type	Description	Default
`search_result`		SearchResult containing documents and metadata	required
`maximum_documents_to_display`	`int`	Maximum number of documents to display	`20`

Returns:

Type	Description
`str`	Formatted plain text search results

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py

def format_search_results(self, search_result, maximum_documents_to_display: int = 20) -> str:
    """
    Format search results as plain text with emojis for MCP/LLM consumption.

    Args:
        search_result: SearchResult containing documents and metadata
        maximum_documents_to_display: Maximum number of documents to display

    Returns:
        Formatted plain text search results
    """
    if not search_result.items:
        return self.format_no_results_message(search_result)

    lines = []
    snippet_count = search_result.count_snippets()

    # Show "100+" if we hit the max limit, indicating more are available
    document_count = len(search_result.items)
    if document_count >= search_result.limit:
        document_display = f"{document_count}+"
    else:
        document_display = str(document_count)

    if snippet_count > 0:
        lines.append(f"Found {snippet_count} page-level hits across {document_display} volumes")
    else:
        lines.append(f"Found {document_display} volumes matching metadata")
    lines.append("")

    for _idx, document in enumerate(search_result.items[:maximum_documents_to_display]):
        has_snippets = document.transcribed_text and document.transcribed_text.snippets
        if snippet_count > 0 and not has_snippets:
            continue

        self._format_document_header(lines, document)

        if not has_snippets:
            self._format_metadata_fields(lines, document)

        if has_snippets:
            self._format_document_snippets(lines, document, search_result.keyword)

        lines.append("")

    total_document_count = len(search_result.items)
    if total_document_count > maximum_documents_to_display:
        remaining_documents = total_document_count - maximum_documents_to_display
        lines.append(f"... and {remaining_documents} more documents")

    return "\n".join(lines)

`format_table(column_headers, table_rows, table_title='')`

Create a plain text table without borders.

Parameters:

Name	Type	Description	Default
`column_headers`	`list[str]`	List of column headers	required
`table_rows`	`list[list[str]]`	List of row data	required
`table_title`	`str`	Optional table title	`''`

Returns:

Type	Description
`str`	Plain text formatted table

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py

def format_table(
    self,
    column_headers: list[str],
    table_rows: list[list[str]],
    table_title: str = "",
) -> str:
    """
    Create a plain text table without borders.

    Args:
        column_headers: List of column headers
        table_rows: List of row data
        table_title: Optional table title

    Returns:
        Plain text formatted table
    """
    formatted_lines = []
    if table_title:
        formatted_lines.append(table_title)
        formatted_lines.append("")

    # Calculate column widths
    all_table_rows = [column_headers, *table_rows]
    column_widths = [max(len(str(row[column_index])) for row in all_table_rows) for column_index in range(len(column_headers))]

    # Format header
    formatted_header = " | ".join(column_headers[column_index].ljust(column_widths[column_index]) for column_index in range(len(column_headers)))
    formatted_lines.append(formatted_header)

    # Add simple separator
    formatted_lines.append("-" * len(formatted_header))

    # Format rows
    for data_row in table_rows:
        formatted_row = " | ".join(str(data_row[column_index]).ljust(column_widths[column_index]) for column_index in range(len(data_row)))
        formatted_lines.append(formatted_row)

    return "\n".join(formatted_lines)

`format_text(text_content, style_name='')`

Return plain text without any styling.

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py

def format_text(self, text_content: str, style_name: str = "") -> str:
    """Return plain text without any styling."""
    return text_content

`highlight_search_keyword(text_content, search_keyword)`

Highlight search keywords using markdown-style bold.

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py

def highlight_search_keyword(self, text_content: str, search_keyword: str) -> str:
    """Highlight search keywords using markdown-style bold."""
    return highlight_keyword_markdown(text_content, search_keyword)

Guide Resources

`tools`

Riksarkivet Historical Guide MCP Server.

Provides MCP resources for accessing historical documentation about Swedish archives.

`get_guide_content(filename)`

Load content from specific sections of the Riksarkivet historical guide.

Parameters:

Name	Type	Description	Default
`filename`	`str`	Markdown filename to load (e.g., '01_Domstolar.md', '02_Fangelse.md')	required

Returns:

Type	Description
`str`	The content of the requested guide section

Source code in packages/guide-mcp/src/ra_mcp_guide_mcp/tools.py

@guide_mcp.resource("riksarkivet://guide/{filename}")
def get_guide_content(filename: str) -> str:
    """
    Load content from specific sections of the Riksarkivet historical guide.

    Args:
        filename: Markdown filename to load (e.g., '01_Domstolar.md', '02_Fangelse.md')

    Returns:
        The content of the requested guide section
    """
    try:
        if not _validate_markdown_filename(filename):
            return _generate_invalid_filename_message()

        if not _check_file_exists(filename):
            return _generate_file_not_found_message(filename)

        content = _load_markdown_file(filename)
        return content

    except Exception as e:
        return format_error_message(
            f"Failed to load guide content '{filename}': {e!s}",
            error_suggestions=[
                "Check file permissions",
                "Verify file encoding is UTF-8",
                "Ensure the filename is valid",
            ],
        )

`get_table_of_contents()`

Get the table of contents (Innehållsförteckning) for the Riksarkivet historical guide.

Source code in packages/guide-mcp/src/ra_mcp_guide_mcp/tools.py

@guide_mcp.resource("riksarkivet://contents/table_of_contents")
def get_table_of_contents() -> str:
    """
    Get the table of contents (Innehållsförteckning) for the Riksarkivet historical guide.
    """
    try:
        content = _load_markdown_file("00_Innehallsforteckning.md")
        return content

    except FileNotFoundError:
        return format_error_message(
            "Table of contents file not found",
            error_suggestions=[
                "Check if the markdown/00_Innehallsforteckning.md file exists",
                "Verify the file path is correct",
            ],
        )
    except Exception as e:
        return format_error_message(
            f"Failed to load table of contents: {e!s}",
            error_suggestions=[
                "Check file permissions",
                "Verify file encoding is UTF-8",
            ],
        )

HTR Tools

`tools`

Riksarkivet HTR MCP Server.

This module sets up the FastMCP server and registers the htr_transcribe tool, which delegates to a remote Gradio Space via gradio_client.

`HtrResult`

Bases: BaseModel

Result from an HTR transcription job.

`htr_transcribe(image_urls, language='swedish', layout='single_page', export_format='alto_xml', custom_yaml=None)` `async`

Transcribe handwritten documents and return results as file URLs.

Sends images to the HTRflow Gradio Space for AI-powered handwritten text recognition. Returns URLs to an interactive viewer, per-page JSON transcriptions, and an archival export file.

Source code in packages/htr-mcp/src/ra_mcp_htr_mcp/tools.py

@htr_mcp.tool(
    annotations={
        "title": "Transcribe Handwritten Documents",
        "readOnlyHint": True,
        "openWorldHint": True,
        "idempotentHint": True,
    },
    timeout=HTR_TIMEOUT,
)
async def htr_transcribe(
    image_urls: Annotated[list[str], Field(description="Image URLs to process (http/https URLs)")],
    language: Annotated[
        Literal["swedish", "norwegian", "english", "medieval"],
        Field(description="Document language"),
    ] = "swedish",
    layout: Annotated[
        Literal["single_page", "spread"],
        Field(description="Page layout: single_page or spread (two-page opening)"),
    ] = "single_page",
    export_format: Annotated[
        Literal["alto_xml", "page_xml", "json"],
        Field(description="Archival export format"),
    ] = "alto_xml",
    custom_yaml: Annotated[
        str | None,
        Field(description="Optional HTRflow YAML pipeline config. Overrides language/layout when provided"),
    ] = None,
) -> HtrResult:
    """Transcribe handwritten documents and return results as file URLs.

    Sends images to the HTRflow Gradio Space for AI-powered handwritten text
    recognition. Returns URLs to an interactive viewer, per-page JSON
    transcriptions, and an archival export file.
    """
    try:
        client = _get_client()
    except Exception as e:
        raise ToolError(f"Failed to connect to HTR Space at {HTR_SPACE_URL}: {e}") from e

    try:
        result = client.predict(
            image_urls=image_urls,
            language=language,
            layout=layout,
            export_format=export_format,
            custom_yaml=custom_yaml,
            api_name="/htr_transcribe",
        )
    except Exception as e:
        raise ToolError(f"HTR transcription failed: {e}") from e

    return HtrResult(**result)

Viewer Tools

`tools`

`load_page(image_url, text_layer_url, page_index)` `async`

Fetch a single page on demand.

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py

@mcp.tool(
    name="load_page",
    description="Load a single document page (image + text layer). Used by the viewer for pagination.",
    app=AppConfig(resource_uri=RESOURCE_URI, visibility=["app"]),
)
async def load_page(
    image_url: Annotated[str, "Image URL for the page."],
    text_layer_url: Annotated[str, "Text layer XML URL (ALTO/PAGE) for the page."],
    page_index: Annotated[int, "Zero-based page index."],
) -> ToolResult:
    """Fetch a single page on demand."""
    page, errors = await build_page_data(page_index, image_url, text_layer_url)

    total_lines = len(page.get("textLayer", {}).get("textLines", []))
    summary = f"Page {page_index + 1}: {total_lines} text lines."
    if errors:
        summary += f" Errors: {'; '.join(errors)}"

    logger.info(f"load_page: page {page_index + 1} loaded, {total_lines} text lines")
    logger.debug(f"load_page: image_url={image_url}, text_layer_url={text_layer_url}")
    return ToolResult(
        content=[types.TextContent(type="text", text=summary)],
        structured_content={"page": page},
    )

`load_thumbnails(image_urls, page_indices)` `async`

Fetch and resize a batch of page images into thumbnails (concurrent).

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py

@mcp.tool(
    name="load_thumbnails",
    description="Load thumbnail images for a batch of document pages. Used by the viewer for lazy-loading the thumbnail strip.",
    app=AppConfig(resource_uri=RESOURCE_URI, visibility=["app"]),
)
async def load_thumbnails(
    image_urls: Annotated[list[str], "Image URLs for the pages to thumbnail."],
    page_indices: Annotated[list[int], "Zero-based page indices corresponding to image_urls."],
) -> ToolResult:
    """Fetch and resize a batch of page images into thumbnails (concurrent)."""
    thumbnails: list[dict] = []
    errors: list[str] = []
    sem = asyncio.Semaphore(4)

    async def _fetch_one(url: str, idx: int) -> dict | None:
        async with sem:
            try:
                data_url = await fetch_thumbnail_as_data_url(url)
                return {"index": idx, "dataUrl": data_url}
            except Exception as e:
                logger.error(f"Thumbnail failed for page {idx}: {e}")
                return None

    async with asyncio.TaskGroup() as tg:
        tasks = [tg.create_task(_fetch_one(url, idx)) for url, idx in zip(image_urls, page_indices, strict=True)]

    for task, idx in zip(tasks, page_indices, strict=True):
        result = task.result()
        if result:
            thumbnails.append(result)
        else:
            errors.append(f"Page {idx + 1}: failed")

    thumbnails.sort(key=lambda t: t["index"])

    summary = f"Generated {len(thumbnails)} thumbnails."
    if errors:
        summary += f" Errors: {'; '.join(errors)}"

    logger.info(f"load_thumbnails: generated {len(thumbnails)} thumbnail(s)")
    return ToolResult(
        content=[types.TextContent(type="text", text=summary)],
        structured_content={"thumbnails": thumbnails},
    )

`search_all_pages(text_layer_urls, term)` `async`

Search all pages concurrently and return per-page match counts.

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py

@mcp.tool(
    name="search_all_pages",
    description="Search for a term across all document pages. Returns match counts per page.",
    app=AppConfig(resource_uri=RESOURCE_URI, visibility=["app"]),
)
async def search_all_pages(
    text_layer_urls: Annotated[list[str], "List of text layer XML URLs to search across."],
    term: Annotated[str, "The search term to find in page transcriptions."],
) -> ToolResult:
    """Search all pages concurrently and return per-page match counts."""
    if not term or not term.strip():
        return ToolResult(
            content=[types.TextContent(type="text", text="No search term provided.")],
            structured_content={"pageMatches": [], "totalMatches": 0},
        )

    term_lower = term.strip().lower()
    sem = asyncio.Semaphore(6)

    async def _search_page(page_index: int, url: str) -> dict | None:
        if not url or not url.startswith(("http://", "https://")):
            return None
        async with sem:
            try:
                text_layer = await fetch_and_parse_text_layer(url)
            except Exception as e:
                logger.warning("search_all_pages: failed to fetch page %d: %s", page_index, e)
                return None
            count = 0
            for line in text_layer.get("textLines", []):
                transcription = line.get("transcription", "")
                if term_lower in transcription.lower():
                    count += 1
            if count > 0:
                return {"pageIndex": page_index, "matchCount": count}
            return None

    async with asyncio.TaskGroup() as tg:
        tasks = [tg.create_task(_search_page(i, url)) for i, url in enumerate(text_layer_urls)]

    page_matches = [r for t in tasks if (r := t.result()) is not None]
    page_matches.sort(key=lambda m: m["pageIndex"])
    total_matches = sum(m["matchCount"] for m in page_matches)

    pages_with_matches = len(page_matches)
    summary = f"Found {total_matches} match{'es' if total_matches != 1 else ''} across {pages_with_matches} page{'s' if pages_with_matches != 1 else ''}."
    logger.info("search_all_pages: term=%r, %s", term, summary)

    return ToolResult(
        content=[types.TextContent(type="text", text=summary)],
        structured_content={"pageMatches": page_matches, "totalMatches": total_matches},
    )

`view_document(reference_code, pages, ctx, highlight_term=None, max_pages=20)` `async`

View document pages with zoomable images and text layer overlays.

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py

@mcp.tool(
    name="view_document",
    description=(
        "Display document pages with zoomable images and text layer overlays. "
        "Takes a reference code and page specification (same as browse_document). "
        "Use after search to visually inspect document pages with transcription overlay. "
        "Use highlight_term to pre-populate the search bar and highlight matching text lines."
    ),
    app=AppConfig(resource_uri=RESOURCE_URI),
)
async def view_document(
    reference_code: Annotated[str, Field(description="Document reference code from search results (e.g. 'SE/RA/420422/01').")],
    pages: Annotated[str, Field(description="Page specification: single ('5'), range ('1-10'), or comma-separated ('5,7,9').")],
    ctx: Context,
    highlight_term: Annotated[str | None, Field(description="Optional search term to pre-populate the search bar and highlight matching text lines.")] = None,
    max_pages: Annotated[int, Field(description="Maximum pages to retrieve.", le=20)] = 20,
) -> ToolResult:
    """View document pages with zoomable images and text layer overlays."""
    try:
        resolved = await browse_resolve_document(
            reference_code,
            pages,
            highlight_term,
            max_pages,
        )
    except (ValueError, LookupError) as e:
        return error_result(str(e))
    except Exception as e:
        logger.error("view_document: failed to resolve document: %s", e)
        return error_result(f"Error resolving document: {e}")

    has_ui = ctx.client_supports_extension(UI_EXTENSION_ID)
    summary = build_summary(
        len(resolved.image_urls),
        resolved.page_numbers,
        has_ui,
        resolved.image_urls,
        reference_code,
    )

    view_id = str(uuid4())
    state = ViewerState(
        view_id=view_id,
        image_urls=resolved.image_urls,
        text_layer_urls=resolved.text_layer_urls,
        page_numbers=resolved.page_numbers,
        bildvisning_urls=resolved.bildvisning_urls,
        document_info=resolved.document_info,
        highlight_term=highlight_term or "",
        reference_code=reference_code,
    )
    sc = await put_state(state)

    logger.info("view_document: %s pages=%s, resolved %d page(s), view_id=%s", reference_code, pages, len(resolved.image_urls), view_id)
    return ToolResult(
        content=[types.TextContent(type="text", text=summary)],
        structured_content=sc,
    )

`view_document_urls(image_urls, text_layer_urls, ctx, document_info=None, highlight_term=None)` `async`

View document pages from raw image and text layer URLs.

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py

@mcp.tool(
    name="view_document_urls",
    description=(
        "Display document pages with zoomable images and text layer overlays from raw URLs. "
        "Provide paired lists: image_urls[i] pairs with text_layer_urls[i]. "
        "Use empty string for pages without transcription. "
        "Use this when you already have IIIF image URLs and ALTO XML URLs. "
        "Prefer view_document (with reference_code) when you have an archive reference code."
    ),
    app=AppConfig(resource_uri=RESOURCE_URI),
)
async def view_document_urls(
    image_urls: Annotated[list[str], Field(description="List of image URLs (one per page, IIIF or direct JPEG/PNG).")],
    text_layer_urls: Annotated[
        list[str], Field(description="List of text layer XML URLs (ALTO/PAGE) paired 1:1 with image_urls. Use empty string for pages without transcription.")
    ],
    ctx: Context,
    document_info: Annotated[str | None, Field(description="Optional markdown-formatted document metadata shown in the info panel.")] = None,
    highlight_term: Annotated[str | None, Field(description="Optional search term to pre-populate the search bar and highlight matching text lines.")] = None,
) -> ToolResult:
    """View document pages from raw image and text layer URLs."""
    if err := validate_url_pairs(image_urls, text_layer_urls):
        return error_result(err)

    page_numbers = list(range(1, len(image_urls) + 1))

    view_id = str(uuid4())
    state = ViewerState(
        view_id=view_id,
        image_urls=image_urls,
        text_layer_urls=text_layer_urls,
        page_numbers=page_numbers,
        document_info=document_info or "",
        highlight_term=highlight_term or "",
        reference_code="",
    )

    sc = await put_state(state)

    has_ui = ctx.client_supports_extension(UI_EXTENSION_ID)
    summary = build_summary(len(image_urls), page_numbers, has_ui, image_urls)

    logger.info("view_document_urls: displaying %d page(s), view_id=%s", len(image_urls), view_id)
    return ToolResult(
        content=[types.TextContent(type="text", text=summary)],
        structured_content=sc,
    )

`view_manifest(manifest_url, ctx, highlight_term=None, max_pages=20, document_info=None)` `async`

View document pages from a IIIF manifest URL.

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py

@mcp.tool(
    name="view_manifest",
    description=(
        "Display document pages from a IIIF manifest URL. "
        "Use this to view documents from SDHK or MPO search results, or any Riksarkivet IIIF manifest. "
        "Fetches the manifest, extracts page images, and opens the interactive viewer."
    ),
    app=AppConfig(resource_uri=RESOURCE_URI),
)
async def view_manifest(
    manifest_url: Annotated[str, Field(description="Full IIIF manifest URL (e.g. 'https://lbiiif.riksarkivet.se/sdhk!85/manifest').")],
    ctx: Context,
    highlight_term: Annotated[str | None, Field(description="Optional search term to highlight.")] = None,
    max_pages: Annotated[int, Field(description="Maximum pages to load.", le=20)] = 20,
    document_info: Annotated[str | None, Field(description="Optional markdown-formatted document metadata for the info panel. Overrides the manifest label if provided.")] = None,
) -> ToolResult:
    """View document pages from a IIIF manifest URL."""
    try:
        resolved = await manifest_resolve_document(manifest_url, max_pages)
    except (ValueError, LookupError) as e:
        return error_result(str(e))
    except Exception as e:
        logger.error("view_manifest: failed to resolve manifest: %s", e)
        return error_result(f"Error resolving manifest: {e}")

    has_ui = ctx.client_supports_extension(UI_EXTENSION_ID)
    summary = build_summary(
        len(resolved.image_urls),
        resolved.page_numbers,
        has_ui,
        resolved.image_urls,
    )

    view_id = str(uuid4())
    state = ViewerState(
        view_id=view_id,
        image_urls=resolved.image_urls,
        text_layer_urls=resolved.text_layer_urls,
        page_numbers=resolved.page_numbers,
        document_info=document_info or resolved.document_info,
        highlight_term=highlight_term or "",
        reference_code="",
    )
    sc = await put_state(state)

    logger.info("view_manifest: %s, resolved %d page(s), view_id=%s", manifest_url, len(resolved.image_urls), view_id)
    return ToolResult(
        content=[types.TextContent(type="text", text=summary)],
        structured_content=sc,
    )

`viewer_navigate(reference_code, pages, highlight_term=None, max_pages=20)` `async`

Navigate the existing viewer to new pages.

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py

@mcp.tool(
    name="viewer_navigate",
    description=(
        "Navigate an already-open document viewer to different pages of the same or a new document. "
        "Use this INSTEAD of calling view_document/view_document_urls again when the viewer is already open "
        "and the user asks to see different pages."
    ),
)
async def viewer_navigate(
    reference_code: Annotated[str, Field(description="Document reference code (e.g. 'SE/RA/420422/01').")],
    pages: Annotated[str, Field(description="Page specification: single ('5'), range ('1-10'), or comma-separated ('5,7,9').")],
    highlight_term: Annotated[str | None, Field(description="Optional search term to highlight.")] = None,
    max_pages: Annotated[int, Field(description="Maximum pages to retrieve.", le=20)] = 20,
) -> ToolResult:
    """Navigate the existing viewer to new pages."""
    try:
        state = await get_active_state()
    except LookupError as e:
        return error_result(str(e))

    try:
        resolved = await browse_resolve_document(
            reference_code,
            pages,
            highlight_term,
            max_pages,
        )
    except (ValueError, LookupError) as e:
        return error_result(str(e))
    except Exception as e:
        logger.error("viewer_navigate: failed to resolve document: %s", e)
        return error_result(f"Error resolving document: {e}")

    state.image_urls = resolved.image_urls
    state.text_layer_urls = resolved.text_layer_urls
    state.page_numbers = resolved.page_numbers
    state.bildvisning_urls = resolved.bildvisning_urls
    state.document_info = resolved.document_info
    state.highlight_term = highlight_term or ""
    state.reference_code = reference_code
    await put_state(state)

    logger.info("viewer_navigate: %s pages=%s, resolved %d page(s)", reference_code, pages, len(resolved.image_urls))
    return text_result(f"Navigated to {len(resolved.image_urls)} page(s) of {reference_code}.")

MCP Tools

Search Tools

search_tool

register_search_tool(mcp)

Formatter

PlainTextFormatter

format_browse_results(browse_result, highlight_term=None, show_links=False, show_success_message=True)

format_error_message(error_message, error_suggestions=None)

format_no_results_message(search_result)

format_panel(panel_content, panel_title='', panel_border_style='')

format_search_results(search_result, maximum_documents_to_display=20, seen_pages=None)

format_table(column_headers, table_rows, table_title='')

format_text(text_content, style_name='')

highlight_search_keyword(text_content, search_keyword)

Browse Tool

browse_tool

register_browse_tool(mcp)

Formatter

PlainTextFormatter

format_browse_results(browse_result, highlight_term=None, show_links=False, show_success_message=True, seen_page_numbers=None)

format_no_results_message(search_result)

format_panel(panel_content, panel_title='', panel_border_style='')

format_search_results(search_result, maximum_documents_to_display=20)

format_table(column_headers, table_rows, table_title='')

format_text(text_content, style_name='')

highlight_search_keyword(text_content, search_keyword)

Guide Resources

tools

get_guide_content(filename)

get_table_of_contents()

HTR Tools

tools

HtrResult

htr_transcribe(image_urls, language='swedish', layout='single_page', export_format='alto_xml', custom_yaml=None) async

Viewer Tools

tools

load_page(image_url, text_layer_url, page_index) async

load_thumbnails(image_urls, page_indices) async

search_all_pages(text_layer_urls, term) async

view_document(reference_code, pages, ctx, highlight_term=None, max_pages=20) async

view_document_urls(image_urls, text_layer_urls, ctx, document_info=None, highlight_term=None) async

view_manifest(manifest_url, ctx, highlight_term=None, max_pages=20, document_info=None) async

viewer_navigate(reference_code, pages, highlight_term=None, max_pages=20) async

`search_tool`

`register_search_tool(mcp)`

`PlainTextFormatter`

`format_browse_results(browse_result, highlight_term=None, show_links=False, show_success_message=True)`

`format_error_message(error_message, error_suggestions=None)`

`format_no_results_message(search_result)`

`format_panel(panel_content, panel_title='', panel_border_style='')`

`format_search_results(search_result, maximum_documents_to_display=20, seen_pages=None)`

`format_table(column_headers, table_rows, table_title='')`

`format_text(text_content, style_name='')`

`highlight_search_keyword(text_content, search_keyword)`

`browse_tool`

`register_browse_tool(mcp)`

`PlainTextFormatter`

`format_browse_results(browse_result, highlight_term=None, show_links=False, show_success_message=True, seen_page_numbers=None)`

`format_no_results_message(search_result)`

`format_panel(panel_content, panel_title='', panel_border_style='')`

`format_search_results(search_result, maximum_documents_to_display=20)`

`format_table(column_headers, table_rows, table_title='')`

`format_text(text_content, style_name='')`

`highlight_search_keyword(text_content, search_keyword)`

`tools`

`get_guide_content(filename)`

`get_table_of_contents()`

`tools`

`HtrResult`

`htr_transcribe(image_urls, language='swedish', layout='single_page', export_format='alto_xml', custom_yaml=None)` `async`

`tools`

`load_page(image_url, text_layer_url, page_index)` `async`

`load_thumbnails(image_urls, page_indices)` `async`

`search_all_pages(text_layer_urls, term)` `async`

`view_document(reference_code, pages, ctx, highlight_term=None, max_pages=20)` `async`

`view_document_urls(image_urls, text_layer_urls, ctx, document_info=None, highlight_term=None)` `async`

`view_manifest(manifest_url, ctx, highlight_term=None, max_pages=20, document_info=None)` `async`

`viewer_navigate(reference_code, pages, highlight_term=None, max_pages=20)` `async`