Skip to content

MCP Tools

MCP tool and resource registrations exposed by the ra-mcp server.

Search Tools

search_tool

Search MCP tool for Riksarkivet transcribed documents.

Provides the search_transcribed tool with pagination and formatting helpers.

register_search_tool(mcp)

Register the search tools with the MCP server.

Source code in packages/search-mcp/src/ra_mcp_search_mcp/search_tool.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
def register_search_tool(mcp) -> None:
    """Register the search tools with the MCP server."""

    @mcp.tool(
        name="transcribed",
        version="1.0",
        timeout=30.0,
        tags={"search"},
        annotations={"readOnlyHint": True, "openWorldHint": True},
        description=(
            "Search AI-transcribed text in digitised historical documents from the Swedish National Archives. "
            "IMPORTANT: Transcriptions are AI-generated (HTR/OCR) and contain recognition errors — "
            "always use fuzzy search (~) to compensate for misread characters and increase hits.\n"
            'Supports Solr syntax: wildcards (troll*), fuzzy (stockholm~1), Boolean ((A AND B)), proximity ("term1 term2"~10). '
            "Always group Boolean queries with outer parentheses. Use fuzzy (~) for OCR/HTR errors and old Swedish variants (präst/prest, silver/silfver).\n"
            "Paginate with offset (0, 50, 100...). Session dedup: re-calling returns stubs for already-seen documents."
        ),
    )
    async def search_transcribed(
        keyword: Annotated[
            str, Field(description='Search term or Solr query. Supports wildcards (*), fuzzy (~), Boolean (AND/OR/NOT), proximity ("term1 term2"~N).')
        ],
        offset: Annotated[int, Field(description="Pagination start position. Use 0 for first page, then 50, 100, etc.")],
        limit: Annotated[int, Field(description="Maximum documents to return per query.")] = 25,
        max_snippets_per_record: Annotated[int, Field(description="Maximum matching pages shown per document.")] = 3,
        max_response_tokens: Annotated[int, Field(description="Maximum tokens in response.")] = 15000,
        sort: Annotated[str, Field(description="Sort order: 'relevance', 'timeAsc', 'timeDesc', 'alphaAsc', 'alphaDesc'.")] = "relevance",
        year_min: Annotated[int | None, Field(description="Start year filter (e.g. 1700).")] = None,
        year_max: Annotated[int | None, Field(description="End year filter (e.g. 1750).")] = None,
        dedup: Annotated[bool, Field(description="Session deduplication. True compacts already-seen documents; False forces full results.")] = True,
        research_context: Annotated[str | None, Field(description="Brief summary of the user's research goal. Used for telemetry only.")] = None,
        ctx: Context | None = None,
    ) -> str:
        """Search AI-transcribed text in digitised historical documents.

        This tool searches only transcribed text (not metadata).
        For metadata search, use search_metadata instead.
        """
        validation_error = _validate_search_input(keyword, offset, year_min, year_max, sort, limit)
        if validation_error:
            return validation_error

        if research_context:
            logger.info("MCP Tool: search_transcribed | context: %s", research_context)
        logger.info("MCP Tool: search_transcribed called with keyword='%s', offset=%d", keyword, offset)

        try:
            logger.debug("Initializing search operations...")
            search_operations = SearchOperations(http_client=default_http_client)
            formatter = PlainTextFormatter()

            logger.info("Executing transcribed text search for '%s'...", keyword)
            session_id = ctx.session_id if ctx is not None else None
            search_result = await search_operations.search(
                keyword=keyword,
                transcribed_only=True,  # Always search transcribed text
                only_digitised=True,  # Transcriptions only exist for digitised materials
                offset=offset,
                limit=limit,
                max_snippets_per_record=max_snippets_per_record,
                sort=sort,
                year_min=year_min,
                year_max=year_max,
                research_context=research_context,
                session_id=session_id,
            )

            # Load session state for dedup
            seen: dict[str, list[int]] | None = None
            if dedup and ctx is not None:
                seen = await ctx.get_state("seen_search") or {}
                logger.info("[search_transcribed] Dedup state loaded: %d documents previously seen", len(seen))

            logger.info("Formatting %d search results...", len(search_result.items))
            formatted_results = formatter.format_search_results(
                search_result,
                maximum_documents_to_display=limit,
                seen_pages=seen,
            )

            # Update session state with only the documents actually scanned by the formatter
            if dedup and ctx is not None:
                updated = _update_seen_search_state(seen or {}, search_result, max_displayed=formatter.items_scanned)
                await ctx.set_state("seen_search", updated)
                logger.info("[search_transcribed] Dedup state saved: %d documents now tracked", len(updated))

            formatted_results = _apply_token_limit_if_needed(formatted_results, max_response_tokens)
            formatted_results = _append_pagination_info_if_needed(formatted_results, search_result, offset, limit)

            logger.info("✓ Search completed successfully, returning results")
            return formatted_results

        except Exception as e:
            logger.error("✗ MCP search_transcribed failed: %s: %s", type(e).__name__, e, exc_info=True)
            formatter = PlainTextFormatter()
            return formatter.format_error_message(
                f"Search failed: {e!s}",
                error_suggestions=[
                    "Try a simpler search term",
                    "Check if the service is available",
                    "Reduce limit",
                    "Check Hugging Face logs for timeout details",
                ],
            )

    @mcp.tool(
        name="metadata",
        version="1.0",
        timeout=30.0,
        tags={"search"},
        annotations={"readOnlyHint": True, "openWorldHint": True},
        description=(
            "Search document metadata (titles, names, places, descriptions) across the Swedish National Archives catalog. "
            "Covers 2M+ records when only_digitised=False, including non-digitised materials. "
            "Use the dedicated name parameter for person searches and place parameter for place searches — these can be combined with keyword.\n"
            "Does NOT search transcribed page text — use search_transcribed for that. "
            "Same Solr syntax as search_transcribed. Session dedup: re-calling returns stubs for already-seen documents.\n"
            "Important: name and place filter a dedicated metadata field that is sparsely populated. "
            "Most person/place matches are NOT digitised, so set only_digitised=False when using name or place to avoid empty results."
        ),
    )
    async def search_metadata(
        keyword: Annotated[str, Field(description="Free-text search across all metadata fields. Supports Solr syntax (wildcards, fuzzy, Boolean).")],
        offset: Annotated[int, Field(description="Pagination start position. Use 0 for first page, then 50, 100, etc.")],
        only_digitised: Annotated[bool, Field(description="True = digitised materials only. False = all 2M+ records including non-digitised.")] = True,
        limit: Annotated[int, Field(description="Maximum documents to return per query.")] = 25,
        max_response_tokens: Annotated[int, Field(description="Maximum tokens in response.")] = 15000,
        sort: Annotated[str, Field(description="Sort order: 'relevance', 'timeAsc', 'timeDesc', 'alphaAsc', 'alphaDesc'.")] = "relevance",
        year_min: Annotated[int | None, Field(description="Start year filter (e.g. 1700).")] = None,
        year_max: Annotated[int | None, Field(description="End year filter (e.g. 1750).")] = None,
        name: Annotated[
            str | None,
            Field(
                description="Person name search in dedicated name field (e.g. 'Nobel', 'Linné'). Combinable with keyword and place. Most name matches are non-digitised — set only_digitised=False."
            ),
        ] = None,
        place: Annotated[
            str | None,
            Field(
                description="Place name search in dedicated place field (e.g. 'Stockholm', 'Göteborg'). Combinable with keyword and name. Most place matches are non-digitised — set only_digitised=False."
            ),
        ] = None,
        dedup: Annotated[bool, Field(description="Session deduplication. True compacts already-seen documents; False forces full results.")] = True,
        research_context: Annotated[str | None, Field(description="Brief summary of the user's research goal. Used for telemetry only.")] = None,
        ctx: Context | None = None,
    ) -> str:
        """Search document metadata (titles, names, places, provenance).

        This tool searches metadata fields, not transcribed text.
        For transcription search, use search_transcribed instead.
        """
        validation_error = _validate_search_input(keyword, offset, year_min, year_max, sort, limit)
        if validation_error:
            return validation_error

        if research_context:
            logger.info("MCP Tool: search_metadata | context: %s", research_context)
        material_scope = "digitised materials" if only_digitised else "all materials (2M+ records)"
        logger.info("MCP Tool: search_metadata called with keyword='%s', offset=%d, scope=%s", keyword, offset, material_scope)

        try:
            logger.debug("Initializing search operations...")
            search_operations = SearchOperations(http_client=default_http_client)
            formatter = PlainTextFormatter()

            logger.info("Executing metadata search for '%s' in %s...", keyword, material_scope)
            session_id = ctx.session_id if ctx is not None else None
            search_result = await search_operations.search(
                keyword=keyword,
                transcribed_only=False,  # Search metadata fields
                only_digitised=only_digitised,
                offset=offset,
                limit=limit,
                max_snippets_per_record=None,  # Metadata search doesn't have snippets
                sort=sort,
                year_min=year_min,
                year_max=year_max,
                name=name,
                place=place,
                research_context=research_context,
                session_id=session_id,
            )

            # Load session state for dedup
            seen: dict[str, list[int]] | None = None
            if dedup and ctx is not None:
                seen = await ctx.get_state("seen_search") or {}
                logger.info("[search_metadata] Dedup state loaded: %d documents previously seen", len(seen))

            logger.info("Formatting %d search results...", len(search_result.items))
            formatted_results = formatter.format_search_results(
                search_result,
                maximum_documents_to_display=limit,
                seen_pages=seen,
            )

            # Update session state with only the documents actually scanned by the formatter
            if dedup and ctx is not None:
                updated = _update_seen_search_state(seen or {}, search_result, max_displayed=formatter.items_scanned)
                await ctx.set_state("seen_search", updated)
                logger.info("[search_metadata] Dedup state saved: %d documents now tracked", len(updated))

            formatted_results = _apply_token_limit_if_needed(formatted_results, max_response_tokens)
            formatted_results = _append_pagination_info_if_needed(formatted_results, search_result, offset, limit)

            logger.info("✓ Metadata search completed successfully, returning results")
            return formatted_results

        except Exception as e:
            logger.error("✗ MCP search_metadata failed: %s: %s", type(e).__name__, e, exc_info=True)
            formatter = PlainTextFormatter()
            return formatter.format_error_message(
                f"Metadata search failed: {e!s}",
                error_suggestions=[
                    "Try a simpler search term",
                    "Check if the service is available",
                    "Reduce limit",
                    "Try with only_digitised=True for faster results",
                ],
            )

Formatter

PlainTextFormatter

Formatter that produces plain text without any Rich markup.

format_browse_results(browse_result, highlight_term=None, show_links=False, show_success_message=True)

Format browse results as plain text with emojis for MCP/LLM consumption.

Parameters:

Name Type Description Default
browse_result

BrowseResult containing page contexts and metadata

required
highlight_term

Optional term to highlight in text

None
show_links bool

Whether to show ALTO/Image/Bildvisning links

False
show_success_message bool

Whether to show success message (ignored in plain text)

True

Returns:

Type Description
str

Formatted plain text browse results

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
def format_browse_results(
    self,
    browse_result,
    highlight_term=None,
    show_links: bool = False,
    show_success_message: bool = True,
) -> str:
    """
    Format browse results as plain text with emojis for MCP/LLM consumption.

    Args:
        browse_result: BrowseResult containing page contexts and metadata
        highlight_term: Optional term to highlight in text
        show_links: Whether to show ALTO/Image/Bildvisning links
        show_success_message: Whether to show success message (ignored in plain text)

    Returns:
        Formatted plain text browse results
    """
    if not browse_result.contexts:
        return f"No page contexts found for {browse_result.reference_code}"

    lines = []
    lines.append(f"📚 Document: {browse_result.reference_code}")

    # Add OAI-PMH metadata if available
    if browse_result.oai_metadata:
        metadata = browse_result.oai_metadata

        # Display title
        if metadata.title and metadata.title != "(No title)":
            lines.append(f"📋 Title: {metadata.title}")

        # Display repository
        if metadata.repository:
            lines.append(f"🏛️  Repository: {metadata.repository}")

        # Display unitid
        if metadata.unitid and metadata.unitid != browse_result.reference_code:
            lines.append(f"🔖 Unit ID: {metadata.unitid}")

        # Display NAD link
        if metadata.nad_link:
            lines.append(f"🔗 NAD Link: {metadata.nad_link}")

    lines.append(f"📖 Pages loaded: {len(browse_result.contexts)}")
    lines.append("")

    for context in browse_result.contexts:
        lines.append(f"📄 Page {context.page_number}")
        lines.append("─" * 40)

        display_text = context.full_text
        if highlight_term:
            display_text = self.highlight_search_keyword(display_text, highlight_term)

        lines.append(display_text)
        lines.append("")
        lines.append("🔗 Links:")
        lines.append(f"  📝 ALTO XML: {context.alto_url}")
        if context.image_url:
            lines.append(f"  🖼️  Image: {context.image_url}")
        if context.bildvisning_url:
            lines.append(f"  👁️  Bildvisning: {context.bildvisning_url}")

        lines.append("")

    return "\n".join(lines)

format_error_message(error_message, error_suggestions=None)

Format an error message with optional suggestions.

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py
82
83
84
def format_error_message(self, error_message: str, error_suggestions: list[str] | None = None) -> str:
    """Format an error message with optional suggestions."""
    return format_error_message(error_message, error_suggestions)

format_no_results_message(search_result)

Generate appropriate message when no results are found.

Parameters:

Name Type Description Default
search_result

SearchResult containing keyword, offset, and total_hits

required

Returns:

Type Description
str

Formatted no results message

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py
86
87
88
89
90
91
92
93
94
95
96
97
98
def format_no_results_message(self, search_result) -> str:
    """
    Generate appropriate message when no results are found.

    Args:
        search_result: SearchResult containing keyword, offset, and total_hits

    Returns:
        Formatted no results message
    """
    if search_result.offset > 0:
        return f"No more results found for '{search_result.keyword}' at offset {search_result.offset}. Total results: {search_result.total_hits}"
    return f"No results found for '{search_result.keyword}'. make sure to use \"\" "

format_panel(panel_content, panel_title='', panel_border_style='')

Format content as plain text without panels or borders.

Parameters:

Name Type Description Default
panel_content str

Content to display

required
panel_title str

Optional title

''
panel_border_style str

Ignored in plain text mode

''

Returns:

Type Description
str

Plain text formatted content

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def format_panel(self, panel_content: str, panel_title: str = "", panel_border_style: str = "") -> str:
    """
    Format content as plain text without panels or borders.

    Args:
        panel_content: Content to display
        panel_title: Optional title
        panel_border_style: Ignored in plain text mode

    Returns:
        Plain text formatted content
    """
    formatted_lines = []
    if panel_title:
        formatted_lines.append(panel_title)
        formatted_lines.append("")
    formatted_lines.append(panel_content)
    return "\n".join(formatted_lines)

format_search_results(search_result, maximum_documents_to_display=20, seen_pages=None)

Format search results as plain text with emojis for MCP/LLM consumption.

Parameters:

Name Type Description Default
search_result

SearchResult containing documents and metadata

required
maximum_documents_to_display int

Maximum number of documents to display

20
seen_pages dict[str, list[int]] | None

Optional dict mapping reference_code to list of already-seen page numbers. When provided, documents/snippets that were already shown are compacted or skipped.

None

Returns:

Type Description
str

Formatted plain text search results

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
def format_search_results(
    self,
    search_result,
    maximum_documents_to_display: int = 20,
    seen_pages: dict[str, list[int]] | None = None,
) -> str:
    """
    Format search results as plain text with emojis for MCP/LLM consumption.

    Args:
        search_result: SearchResult containing documents and metadata
        maximum_documents_to_display: Maximum number of documents to display
        seen_pages: Optional dict mapping reference_code to list of already-seen page numbers.
                    When provided, documents/snippets that were already shown are compacted or skipped.

    Returns:
        Formatted plain text search results
    """
    if not search_result.items:
        self.items_scanned = 0
        return self.format_no_results_message(search_result)

    lines = []
    snippet_count = search_result.count_snippets()
    skipped_count = 0
    displayed_count = 0
    items_scanned = 0

    # Show "100+" if we hit the max limit, indicating more are available
    document_count = len(search_result.items)
    if document_count >= search_result.limit:
        document_display = f"{document_count}+"
    else:
        document_display = str(document_count)

    # Different summary for metadata search vs transcribed search
    if snippet_count > 0:
        lines.append(f"Found {snippet_count} page-level hits across {document_display} volumes")
    else:
        lines.append(f"Found {document_display} volumes matching metadata")
    lines.append("")

    # Iterate all items — skipped (deduped) docs don't count against the display limit
    for idx, document in enumerate(search_result.items):
        if displayed_count >= maximum_documents_to_display:
            break
        items_scanned = idx + 1

        has_snippets = document.transcribed_text and document.transcribed_text.snippets
        if snippet_count > 0 and not has_snippets:
            continue

        ref_code = document.metadata.reference_code

        # --- Dedup logic ---
        if seen_pages is not None and ref_code in seen_pages:
            prev_page_nums = set(seen_pages.get(ref_code, []))

            if has_snippets:
                new_snippets = [s for s in document.transcribed_text.snippets if any(page_id_to_number(p.id) not in prev_page_nums for p in s.pages)]
                if not new_snippets:
                    skipped_count += 1
                    continue
                lines.append(f"📚 Document: {ref_code} (previously shown — new pages only)")
                self._format_compact_snippets(lines, new_snippets, search_result.keyword)
                lines.append("")
                displayed_count += 1
                continue
            skipped_count += 1
            continue

        # --- Full rendering ---
        displayed_count += 1
        self._format_document_header(lines, document)

        if not has_snippets:
            self._format_metadata_fields(lines, document)

        if has_snippets:
            self._format_document_snippets(lines, document, search_result.keyword)

        lines.append("")

    if skipped_count > 0:
        lines.append(f"({skipped_count} previously shown document(s) omitted)")
        lines.append("")

    # Track how many items were scanned so the caller can limit state updates
    self.items_scanned = items_scanned

    total_remaining = len(search_result.items) - items_scanned
    if total_remaining > 0:
        lines.append(f"... and {total_remaining} more documents")

    lines.append("")
    lines.append("Tip: Use browse_document to read full pages of interesting results.")

    return "\n".join(lines)

format_table(column_headers, table_rows, table_title='')

Create a plain text table without borders.

Parameters:

Name Type Description Default
column_headers list[str]

List of column headers

required
table_rows list[list[str]]

List of row data

required
table_title str

Optional table title

''

Returns:

Type Description
str

Plain text formatted table

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def format_table(
    self,
    column_headers: list[str],
    table_rows: list[list[str]],
    table_title: str = "",
) -> str:
    """
    Create a plain text table without borders.

    Args:
        column_headers: List of column headers
        table_rows: List of row data
        table_title: Optional table title

    Returns:
        Plain text formatted table
    """
    formatted_lines = []
    if table_title:
        formatted_lines.append(table_title)
        formatted_lines.append("")

    # Calculate column widths
    all_table_rows = [column_headers, *table_rows]
    column_widths = [max(len(str(row[column_index])) for row in all_table_rows) for column_index in range(len(column_headers))]

    # Format header
    formatted_header = " | ".join(column_headers[column_index].ljust(column_widths[column_index]) for column_index in range(len(column_headers)))
    formatted_lines.append(formatted_header)

    # Add simple separator
    formatted_lines.append("-" * len(formatted_header))

    # Format rows
    for data_row in table_rows:
        formatted_row = " | ".join(str(data_row[column_index]).ljust(column_widths[column_index]) for column_index in range(len(data_row)))
        formatted_lines.append(formatted_row)

    return "\n".join(formatted_lines)

format_text(text_content, style_name='')

Return plain text without any styling.

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py
11
12
13
def format_text(self, text_content: str, style_name: str = "") -> str:
    """Return plain text without any styling."""
    return text_content

highlight_search_keyword(text_content, search_keyword)

Highlight search keywords using markdown-style bold.

Source code in packages/search-mcp/src/ra_mcp_search_mcp/formatter.py
74
75
76
def highlight_search_keyword(self, text_content: str, search_keyword: str) -> str:
    """Highlight search keywords using markdown-style bold."""
    return highlight_keyword_markdown(text_content, search_keyword)

Browse Tool

browse_tool

Browse MCP tool for Riksarkivet document pages.

Provides the browse_document tool for viewing full page transcriptions.

register_browse_tool(mcp)

Register the browse tool with the MCP server.

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/browse_tool.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def register_browse_tool(mcp) -> None:
    """Register the browse tool with the MCP server."""

    @mcp.tool(
        name="document",
        version="1.0",
        timeout=60.0,
        tags={"browse"},
        annotations={"readOnlyHint": True, "openWorldHint": True},
        description=(
            "View full page transcriptions of a document by reference code. Use reference codes from search results. "
            "Returns original text (usually Swedish), links to bildvisaren (image viewer), and ALTO XML.\n"
            "Blank pages are normal (digitised but no text). Non-digitised materials return metadata only. "
            "Session dedup: re-browsing same pages returns stubs. Set dedup=False to force full text.\n"
            "TOKEN COST: ~300 tokens overhead per response + ~200-1500 tokens per page depending on content density. "
            "Dense court protocol pages average ~1000 tokens each; title/cover pages ~300. "
            "Request only the pages you need — start with 3-5 pages and paginate."
        ),
    )
    async def browse_document(
        reference_code: Annotated[str, Field(description="Document reference code from search results (e.g. 'SE/RA/420422/01').")],
        pages: Annotated[str, Field(description="Page specification: single ('5'), range ('1-10'), or comma-separated ('5,7,9').")],
        highlight_term: Annotated[str | None, Field(description="Optional keyword to highlight in the transcription.")] = None,
        max_pages: Annotated[int, Field(description="Maximum pages to retrieve.", le=20)] = 20,
        dedup: Annotated[bool, Field(description="Session deduplication. True replaces already-shown pages with stubs; False forces full text.")] = True,
        research_context: Annotated[str | None, Field(description="Brief summary of the user's research goal. Used for telemetry only.")] = None,
        ctx: Context | None = None,
    ) -> str:
        """
        Browse specific pages of a document by reference code.

        Returns:
        - Full transcribed text for each requested page
        - Optional keyword highlighting
        - Direct links to images and ALTO XML

        Examples:
        - browse_document("SE/RA/420422/01", "5") - View page 5
        - browse_document("SE/RA/420422/01", "1-10") - View pages 1 through 10
        - browse_document("SE/RA/420422/01", "5,7,9", highlight_term="Stockholm") - View specific pages with highlighting
        """
        # Input validation
        if not reference_code or not reference_code.strip():
            return format_error_message("reference_code must not be empty", error_suggestions=["Provide a document reference code, e.g. 'SE/RA/420422/01'"])
        if not pages or not pages.strip():
            return format_error_message("pages must not be empty", error_suggestions=["Specify pages like '1-5', '1,3,5', or '7'"])

        if research_context:
            logger.info("MCP Tool: browse_document | context: %s", research_context)

        try:
            browse_operations = BrowseOperations(http_client=default_http_client)
            formatter = PlainTextFormatter()

            session_id = ctx.session_id if ctx is not None else None
            browse_result = await _fetch_document_pages(
                browse_operations,
                reference_code=reference_code,
                pages=pages,
                highlight_term=highlight_term,
                max_pages=max_pages,
                research_context=research_context,
                session_id=session_id,
            )

            # Load session state for dedup
            seen_page_numbers: set[int] | None = None
            if dedup and ctx is not None:
                seen_browse: dict[str, list[int]] = await ctx.get_state("seen_browse") or {}
                seen_page_numbers = set(seen_browse.get(reference_code, []))
                logger.info(
                    "[browse] Dedup state loaded: %d documents tracked, %d pages previously seen for %s",
                    len(seen_browse),
                    len(seen_page_numbers),
                    reference_code,
                )

            if not browse_result.contexts:
                # Check if we have metadata to display for non-digitised materials
                if browse_result.oai_metadata:
                    return formatter.format_browse_results(browse_result, highlight_term, seen_page_numbers=seen_page_numbers)
                return _generate_no_pages_found_message(reference_code)

            result = formatter.format_browse_results(browse_result, highlight_term, seen_page_numbers=seen_page_numbers)

            # Update session state with newly shown pages
            if dedup and ctx is not None:
                all_pages = set(seen_browse.get(reference_code, []))
                for context in browse_result.contexts:
                    all_pages.add(context.page_number)
                seen_browse[reference_code] = sorted(all_pages)
                await ctx.set_state("seen_browse", seen_browse)
                logger.info("[browse] Dedup state saved: %s now has %d pages tracked", reference_code, len(all_pages))

            return result

        except Exception as e:
            logger.error("MCP browse_document failed: %s: %s", type(e).__name__, e, exc_info=True)
            return format_error_message(
                f"Browse failed: {e!s}",
                error_suggestions=[
                    "Check the reference code format",
                    "Verify page numbers are valid",
                    "Try with fewer pages",
                ],
            )

Formatter

PlainTextFormatter

Formatter that produces plain text without any Rich markup.

format_browse_results(browse_result, highlight_term=None, show_links=False, show_success_message=True, seen_page_numbers=None)

Format browse results as plain text with emojis for MCP/LLM consumption.

Parameters:

Name Type Description Default
browse_result

BrowseResult containing page contexts and metadata

required
highlight_term

Optional term to highlight in text

None
show_links bool

Whether to show ALTO/Image/Bildvisning links

False
show_success_message bool

Whether to show success message (ignored in plain text)

True
seen_page_numbers set[int] | None

Optional set of page numbers already shown in this session. When provided, previously-shown pages get a one-liner stub.

None

Returns:

Type Description
str

Formatted plain text browse results

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
def format_browse_results(
    self,
    browse_result,
    highlight_term=None,
    show_links: bool = False,
    show_success_message: bool = True,
    seen_page_numbers: set[int] | None = None,
) -> str:
    """
    Format browse results as plain text with emojis for MCP/LLM consumption.

    Args:
        browse_result: BrowseResult containing page contexts and metadata
        highlight_term: Optional term to highlight in text
        show_links: Whether to show ALTO/Image/Bildvisning links
        show_success_message: Whether to show success message (ignored in plain text)
        seen_page_numbers: Optional set of page numbers already shown in this session.
                           When provided, previously-shown pages get a one-liner stub.

    Returns:
        Formatted plain text browse results
    """
    lines: list[str] = []

    if self._format_non_digitised_metadata(lines, browse_result):
        return "\n".join(lines)

    if not browse_result.contexts:
        return f"No page contexts found for {browse_result.reference_code}"

    seen = seen_page_numbers or set()

    lines.append(f"📚 Document: {browse_result.reference_code}")

    if browse_result.oai_metadata:
        self._format_oai_metadata(lines, browse_result.oai_metadata, browse_result.reference_code)

    # Summary line with dedup info
    new_count = sum(1 for c in browse_result.contexts if c.page_number not in seen)
    reseen_count = len(browse_result.contexts) - new_count
    if seen and reseen_count > 0:
        lines.append(f"📖 Pages loaded: {len(browse_result.contexts)} ({new_count} new, {reseen_count} previously shown)")
    else:
        lines.append(f"📖 Pages loaded: {len(browse_result.contexts)}")
    lines.append("")

    for context in browse_result.contexts:
        if context.page_number in seen:
            lines.append(f"📄 Page {context.page_number} (previously shown in this session)")
            lines.append("")
            continue

        lines.append(f"📄 Page {context.page_number}")
        lines.append("─" * 40)

        if context.full_text.strip():
            display_text = context.full_text
            if highlight_term:
                display_text = self.highlight_search_keyword(display_text, highlight_term)
            lines.append(display_text)
        else:
            lines.append("(Empty page - no transcribed text)")

        lines.append("")
        lines.append("🔗 Links:")
        lines.append(f"  📝 ALTO XML: {context.alto_url}")
        if context.image_url:
            lines.append(f"  🖼️  Image: {context.image_url}")
        if context.bildvisning_url:
            lines.append(f"  👁️  Bildvisning: {context.bildvisning_url}")

        lines.append("")

    lines.append(
        "Tip: Present the original text (quoted), provide a translation in the user's language, and include the bildvisaren link. Note uncertain readings."
    )

    return "\n".join(lines)

format_no_results_message(search_result)

Generate appropriate message when no results are found.

Parameters:

Name Type Description Default
search_result

SearchResult containing keyword, offset, and total_hits

required

Returns:

Type Description
str

Formatted no results message

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py
82
83
84
85
86
87
88
89
90
91
92
93
94
def format_no_results_message(self, search_result) -> str:
    """
    Generate appropriate message when no results are found.

    Args:
        search_result: SearchResult containing keyword, offset, and total_hits

    Returns:
        Formatted no results message
    """
    if search_result.offset > 0:
        return f"No more results found for '{search_result.keyword}' at offset {search_result.offset}. Total results: {search_result.total_hits}"
    return f"No results found for '{search_result.keyword}'. make sure to use \"\" "

format_panel(panel_content, panel_title='', panel_border_style='')

Format content as plain text without panels or borders.

Parameters:

Name Type Description Default
panel_content str

Content to display

required
panel_title str

Optional title

''
panel_border_style str

Ignored in plain text mode

''

Returns:

Type Description
str

Plain text formatted content

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def format_panel(self, panel_content: str, panel_title: str = "", panel_border_style: str = "") -> str:
    """
    Format content as plain text without panels or borders.

    Args:
        panel_content: Content to display
        panel_title: Optional title
        panel_border_style: Ignored in plain text mode

    Returns:
        Plain text formatted content
    """
    formatted_lines = []
    if panel_title:
        formatted_lines.append(panel_title)
        formatted_lines.append("")
    formatted_lines.append(panel_content)
    return "\n".join(formatted_lines)

format_search_results(search_result, maximum_documents_to_display=20)

Format search results as plain text with emojis for MCP/LLM consumption.

Parameters:

Name Type Description Default
search_result

SearchResult containing documents and metadata

required
maximum_documents_to_display int

Maximum number of documents to display

20

Returns:

Type Description
str

Formatted plain text search results

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def format_search_results(self, search_result, maximum_documents_to_display: int = 20) -> str:
    """
    Format search results as plain text with emojis for MCP/LLM consumption.

    Args:
        search_result: SearchResult containing documents and metadata
        maximum_documents_to_display: Maximum number of documents to display

    Returns:
        Formatted plain text search results
    """
    if not search_result.items:
        return self.format_no_results_message(search_result)

    lines = []
    snippet_count = search_result.count_snippets()

    # Show "100+" if we hit the max limit, indicating more are available
    document_count = len(search_result.items)
    if document_count >= search_result.limit:
        document_display = f"{document_count}+"
    else:
        document_display = str(document_count)

    if snippet_count > 0:
        lines.append(f"Found {snippet_count} page-level hits across {document_display} volumes")
    else:
        lines.append(f"Found {document_display} volumes matching metadata")
    lines.append("")

    for _idx, document in enumerate(search_result.items[:maximum_documents_to_display]):
        has_snippets = document.transcribed_text and document.transcribed_text.snippets
        if snippet_count > 0 and not has_snippets:
            continue

        self._format_document_header(lines, document)

        if not has_snippets:
            self._format_metadata_fields(lines, document)

        if has_snippets:
            self._format_document_snippets(lines, document, search_result.keyword)

        lines.append("")

    total_document_count = len(search_result.items)
    if total_document_count > maximum_documents_to_display:
        remaining_documents = total_document_count - maximum_documents_to_display
        lines.append(f"... and {remaining_documents} more documents")

    return "\n".join(lines)

format_table(column_headers, table_rows, table_title='')

Create a plain text table without borders.

Parameters:

Name Type Description Default
column_headers list[str]

List of column headers

required
table_rows list[list[str]]

List of row data

required
table_title str

Optional table title

''

Returns:

Type Description
str

Plain text formatted table

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def format_table(
    self,
    column_headers: list[str],
    table_rows: list[list[str]],
    table_title: str = "",
) -> str:
    """
    Create a plain text table without borders.

    Args:
        column_headers: List of column headers
        table_rows: List of row data
        table_title: Optional table title

    Returns:
        Plain text formatted table
    """
    formatted_lines = []
    if table_title:
        formatted_lines.append(table_title)
        formatted_lines.append("")

    # Calculate column widths
    all_table_rows = [column_headers, *table_rows]
    column_widths = [max(len(str(row[column_index])) for row in all_table_rows) for column_index in range(len(column_headers))]

    # Format header
    formatted_header = " | ".join(column_headers[column_index].ljust(column_widths[column_index]) for column_index in range(len(column_headers)))
    formatted_lines.append(formatted_header)

    # Add simple separator
    formatted_lines.append("-" * len(formatted_header))

    # Format rows
    for data_row in table_rows:
        formatted_row = " | ".join(str(data_row[column_index]).ljust(column_widths[column_index]) for column_index in range(len(data_row)))
        formatted_lines.append(formatted_row)

    return "\n".join(formatted_lines)

format_text(text_content, style_name='')

Return plain text without any styling.

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py
11
12
13
def format_text(self, text_content: str, style_name: str = "") -> str:
    """Return plain text without any styling."""
    return text_content

highlight_search_keyword(text_content, search_keyword)

Highlight search keywords using markdown-style bold.

Source code in packages/browse-mcp/src/ra_mcp_browse_mcp/formatter.py
74
75
76
def highlight_search_keyword(self, text_content: str, search_keyword: str) -> str:
    """Highlight search keywords using markdown-style bold."""
    return highlight_keyword_markdown(text_content, search_keyword)

Guide Resources

tools

Riksarkivet Historical Guide MCP Server.

Provides MCP resources for accessing historical documentation about Swedish archives.

get_guide_content(filename)

Load content from specific sections of the Riksarkivet historical guide.

Parameters:

Name Type Description Default
filename str

Markdown filename to load (e.g., '01_Domstolar.md', '02_Fangelse.md')

required

Returns:

Type Description
str

The content of the requested guide section

Source code in packages/guide-mcp/src/ra_mcp_guide_mcp/tools.py
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
@guide_mcp.resource("riksarkivet://guide/{filename}")
def get_guide_content(filename: str) -> str:
    """
    Load content from specific sections of the Riksarkivet historical guide.

    Args:
        filename: Markdown filename to load (e.g., '01_Domstolar.md', '02_Fangelse.md')

    Returns:
        The content of the requested guide section
    """
    try:
        if not _validate_markdown_filename(filename):
            return _generate_invalid_filename_message()

        if not _check_file_exists(filename):
            return _generate_file_not_found_message(filename)

        content = _load_markdown_file(filename)
        return content

    except Exception as e:
        return format_error_message(
            f"Failed to load guide content '{filename}': {e!s}",
            error_suggestions=[
                "Check file permissions",
                "Verify file encoding is UTF-8",
                "Ensure the filename is valid",
            ],
        )

get_table_of_contents()

Get the table of contents (Innehållsförteckning) for the Riksarkivet historical guide.

Source code in packages/guide-mcp/src/ra_mcp_guide_mcp/tools.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
@guide_mcp.resource("riksarkivet://contents/table_of_contents")
def get_table_of_contents() -> str:
    """
    Get the table of contents (Innehållsförteckning) for the Riksarkivet historical guide.
    """
    try:
        content = _load_markdown_file("00_Innehallsforteckning.md")
        return content

    except FileNotFoundError:
        return format_error_message(
            "Table of contents file not found",
            error_suggestions=[
                "Check if the markdown/00_Innehallsforteckning.md file exists",
                "Verify the file path is correct",
            ],
        )
    except Exception as e:
        return format_error_message(
            f"Failed to load table of contents: {e!s}",
            error_suggestions=[
                "Check file permissions",
                "Verify file encoding is UTF-8",
            ],
        )

HTR Tools

tools

Riksarkivet HTR MCP Server.

This module sets up the FastMCP server and registers the htr_transcribe tool, which delegates to a remote Gradio Space via gradio_client.

HtrResult

Bases: BaseModel

Result from an HTR transcription job.

htr_transcribe(image_urls, language='swedish', layout='single_page', export_format='alto_xml', custom_yaml=None) async

Transcribe handwritten documents and return results as file URLs.

Sends images to the HTRflow Gradio Space for AI-powered handwritten text recognition. Returns URLs to an interactive viewer, per-page JSON transcriptions, and an archival export file.

Source code in packages/htr-mcp/src/ra_mcp_htr_mcp/tools.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
@htr_mcp.tool(
    annotations={
        "title": "Transcribe Handwritten Documents",
        "readOnlyHint": True,
        "openWorldHint": True,
        "idempotentHint": True,
    },
    timeout=HTR_TIMEOUT,
)
async def htr_transcribe(
    image_urls: Annotated[list[str], Field(description="Image URLs to process (http/https URLs)")],
    language: Annotated[
        Literal["swedish", "norwegian", "english", "medieval"],
        Field(description="Document language"),
    ] = "swedish",
    layout: Annotated[
        Literal["single_page", "spread"],
        Field(description="Page layout: single_page or spread (two-page opening)"),
    ] = "single_page",
    export_format: Annotated[
        Literal["alto_xml", "page_xml", "json"],
        Field(description="Archival export format"),
    ] = "alto_xml",
    custom_yaml: Annotated[
        str | None,
        Field(description="Optional HTRflow YAML pipeline config. Overrides language/layout when provided"),
    ] = None,
) -> HtrResult:
    """Transcribe handwritten documents and return results as file URLs.

    Sends images to the HTRflow Gradio Space for AI-powered handwritten text
    recognition. Returns URLs to an interactive viewer, per-page JSON
    transcriptions, and an archival export file.
    """
    try:
        client = _get_client()
    except Exception as e:
        raise ToolError(f"Failed to connect to HTR Space at {HTR_SPACE_URL}: {e}") from e

    try:
        result = client.predict(
            image_urls=image_urls,
            language=language,
            layout=layout,
            export_format=export_format,
            custom_yaml=custom_yaml,
            api_name="/htr_transcribe",
        )
    except Exception as e:
        raise ToolError(f"HTR transcription failed: {e}") from e

    return HtrResult(**result)

Viewer Tools

tools

Document Viewer MCP App — Tool & resource registrations.

Tools
  • view_document: entry point, resolves reference code → URLs, returns transcription for the model
  • load_page: fetches a single page on demand (called by View via callServerTool)
  • load_thumbnails: batch-fetches thumbnail images (called by View via callServerTool)

load_page(image_url, text_layer_url, page_index) async

Fetch a single page on demand.

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
@mcp.tool(
    name="load_page",
    description="Load a single document page (image + text layer). Used by the viewer for pagination.",
    app=AppConfig(resource_uri=RESOURCE_URI, visibility=["app"]),
)
async def load_page(
    image_url: Annotated[str, "Image URL for the page."],
    text_layer_url: Annotated[str, "Text layer XML URL (ALTO/PAGE) for the page."],
    page_index: Annotated[int, "Zero-based page index."],
) -> ToolResult:
    """Fetch a single page on demand."""
    page, errors = await build_page_data(page_index, image_url, text_layer_url)

    total_lines = len(page.get("textLayer", {}).get("textLines", []))
    summary = f"Page {page_index + 1}: {total_lines} text lines."
    if errors:
        summary += f" Errors: {'; '.join(errors)}"

    logger.info(f"load_page: page {page_index + 1} loaded, {total_lines} text lines")
    logger.debug(f"load_page: image_url={image_url}, text_layer_url={text_layer_url}")
    return ToolResult(
        content=[types.TextContent(type="text", text=summary)],
        structured_content={"page": page},
    )

load_thumbnails(image_urls, page_indices) async

Fetch and resize a batch of page images into thumbnails (concurrent).

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
@mcp.tool(
    name="load_thumbnails",
    description="Load thumbnail images for a batch of document pages. Used by the viewer for lazy-loading the thumbnail strip.",
    app=AppConfig(resource_uri=RESOURCE_URI, visibility=["app"]),
)
async def load_thumbnails(
    image_urls: Annotated[list[str], "Image URLs for the pages to thumbnail."],
    page_indices: Annotated[list[int], "Zero-based page indices corresponding to image_urls."],
) -> ToolResult:
    """Fetch and resize a batch of page images into thumbnails (concurrent)."""
    thumbnails: list[dict] = []
    errors: list[str] = []
    sem = asyncio.Semaphore(4)

    async def _fetch_one(url: str, idx: int) -> dict | None:
        async with sem:
            try:
                data_url = await fetch_thumbnail_as_data_url(url)
                return {"index": idx, "dataUrl": data_url}
            except Exception as e:
                logger.error(f"Thumbnail failed for page {idx}: {e}")
                return None

    async with asyncio.TaskGroup() as tg:
        tasks = [tg.create_task(_fetch_one(url, idx)) for url, idx in zip(image_urls, page_indices, strict=True)]

    for task, idx in zip(tasks, page_indices, strict=True):
        result = task.result()
        if result:
            thumbnails.append(result)
        else:
            errors.append(f"Page {idx + 1}: failed")

    thumbnails.sort(key=lambda t: t["index"])

    summary = f"Generated {len(thumbnails)} thumbnails."
    if errors:
        summary += f" Errors: {'; '.join(errors)}"

    logger.info(f"load_thumbnails: generated {len(thumbnails)} thumbnail(s)")
    return ToolResult(
        content=[types.TextContent(type="text", text=summary)],
        structured_content={"thumbnails": thumbnails},
    )

search_all_pages(text_layer_urls, term) async

Search all pages concurrently and return per-page match counts.

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
@mcp.tool(
    name="search_all_pages",
    description="Search for a term across all document pages. Returns match counts per page.",
    app=AppConfig(resource_uri=RESOURCE_URI, visibility=["app"]),
)
async def search_all_pages(
    text_layer_urls: Annotated[list[str], "List of text layer XML URLs to search across."],
    term: Annotated[str, "The search term to find in page transcriptions."],
) -> ToolResult:
    """Search all pages concurrently and return per-page match counts."""
    if not term or not term.strip():
        return ToolResult(
            content=[types.TextContent(type="text", text="No search term provided.")],
            structured_content={"pageMatches": [], "totalMatches": 0},
        )

    term_lower = term.strip().lower()
    sem = asyncio.Semaphore(6)

    async def _search_page(page_index: int, url: str) -> dict | None:
        if not url or not url.startswith(("http://", "https://")):
            return None
        async with sem:
            try:
                text_layer = await fetch_and_parse_text_layer(url)
            except Exception as e:
                logger.warning("search_all_pages: failed to fetch page %d: %s", page_index, e)
                return None
            count = 0
            for line in text_layer.get("textLines", []):
                transcription = line.get("transcription", "")
                if term_lower in transcription.lower():
                    count += 1
            if count > 0:
                return {"pageIndex": page_index, "matchCount": count}
            return None

    async with asyncio.TaskGroup() as tg:
        tasks = [tg.create_task(_search_page(i, url)) for i, url in enumerate(text_layer_urls)]

    page_matches = [r for t in tasks if (r := t.result()) is not None]
    page_matches.sort(key=lambda m: m["pageIndex"])
    total_matches = sum(m["matchCount"] for m in page_matches)

    pages_with_matches = len(page_matches)
    summary = f"Found {total_matches} match{'es' if total_matches != 1 else ''} across {pages_with_matches} page{'s' if pages_with_matches != 1 else ''}."
    logger.info("search_all_pages: term=%r, %s", term, summary)

    return ToolResult(
        content=[types.TextContent(type="text", text=summary)],
        structured_content={"pageMatches": page_matches, "totalMatches": total_matches},
    )

view_document(reference_code, pages, ctx, highlight_term=None, max_pages=20) async

View document pages with zoomable images and text layer overlays.

Source code in packages/viewer-mcp/src/ra_mcp_viewer_mcp/tools.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
@mcp.tool(
    name="view_document",
    description=(
        "Display document pages with zoomable images and text layer overlays. "
        "Takes a reference code and page specification (same as browse_document). "
        "Use after search to visually inspect document pages with transcription overlay. "
        "Use highlight_term to pre-populate the search bar and highlight matching text lines."
    ),
    app=AppConfig(resource_uri=RESOURCE_URI),
)
async def view_document(
    reference_code: Annotated[str, Field(description="Document reference code from search results (e.g. 'SE/RA/420422/01').")],
    pages: Annotated[str, Field(description="Page specification: single ('5'), range ('1-10'), or comma-separated ('5,7,9').")],
    ctx: Context,
    highlight_term: Annotated[str | None, Field(description="Optional search term to pre-populate the search bar and highlight matching text lines.")] = None,
    max_pages: Annotated[int, Field(description="Maximum pages to retrieve.", le=20)] = 20,
) -> ToolResult:
    """View document pages with zoomable images and text layer overlays."""
    if not reference_code or not reference_code.strip():
        return ToolResult(content=[types.TextContent(type="text", text="Error: reference_code must not be empty.")])
    if not pages or not pages.strip():
        return ToolResult(content=[types.TextContent(type="text", text="Error: pages must not be empty.")])

    try:
        browse_ops = BrowseOperations(http_client=default_http_client)
        browse_result = await browse_ops.browse_document(
            reference_code=reference_code,
            pages=pages,
            highlight_term=highlight_term,
            max_pages=max_pages,
        )
    except Exception as e:
        logger.error("view_document: failed to resolve document: %s", e)
        return ToolResult(content=[types.TextContent(type="text", text=f"Error resolving document: {e}")])

    if not browse_result.contexts:
        return ToolResult(content=[types.TextContent(type="text", text=f"No pages found for {reference_code} pages={pages}.")])

    image_urls = [page_ctx.image_url for page_ctx in browse_result.contexts]
    text_layer_urls = [page_ctx.alto_url for page_ctx in browse_result.contexts]
    page_numbers = [page_ctx.page_number for page_ctx in browse_result.contexts]

    has_ui = ctx.client_supports_extension(UI_EXTENSION_ID)

    # Build summary with first page transcription
    first_page = browse_result.contexts[0]
    transcription = first_page.full_text.strip() if first_page.full_text else ""

    summary_parts = [f"Displaying {len(browse_result.contexts)} page(s) of {reference_code}."]
    if transcription:
        summary_parts.append(f"Page {first_page.page_number} transcription:")
        summary_parts.append(transcription)
    else:
        summary_parts.append(f"Page {first_page.page_number}: (no transcribed text)")

    if not has_ui:
        summary_parts.append("\nImage URLs:\n" + "\n".join(image_urls))
    summary = "\n".join(summary_parts)

    logger.info("view_document: %s pages=%s, resolved %d page(s)", reference_code, pages, len(browse_result.contexts))
    return ToolResult(
        content=[types.TextContent(type="text", text=summary)],
        structured_content={
            "image_urls": image_urls,
            "text_layer_urls": text_layer_urls,
            "page_numbers": page_numbers,
            "highlight_term": highlight_term or "",
            "reference_code": reference_code,
        },
    )