Skip to content

Common

Shared utilities used across all ra-mcp packages.

HTTP Client

HTTPClient(user_agent=None, max_retries=_DEFAULT_MAX_RETRIES, backoff_base=_DEFAULT_BACKOFF_BASE, *, http2=False, connect_timeout=10.0, read_timeout=30.0, write_timeout=10.0, pool_timeout=5.0)

Centralized async HTTP client using httpx with comprehensive logging and retry.

Source code in packages/common/src/ra_mcp_common/http_client.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def __init__(
    self,
    user_agent: str | None = None,
    max_retries: int = _DEFAULT_MAX_RETRIES,
    backoff_base: float = _DEFAULT_BACKOFF_BASE,
    *,
    http2: bool = False,
    connect_timeout: float = 10.0,
    read_timeout: float = 30.0,
    write_timeout: float = 10.0,
    pool_timeout: float = 5.0,
):
    if user_agent is None:
        from importlib.metadata import version

        user_agent = f"ra-mcp/{version('ra-mcp-common')}"
    self.user_agent = user_agent
    self.max_retries = max_retries
    self.backoff_base = backoff_base

    self._client = httpx.AsyncClient(
        headers={"User-Agent": user_agent},
        timeout=httpx.Timeout(connect=connect_timeout, read=read_timeout, write=write_timeout, pool=pool_timeout),
        limits=httpx.Limits(max_connections=20, max_keepalive_connections=10),
        follow_redirects=True,
        http2=http2,
    )

    # Telemetry
    self._tracer = get_tracer("ra_mcp.http_client")
    meter = get_meter("ra_mcp.http_client")
    self._request_counter = meter.create_counter("ra_mcp.http.requests", description="HTTP requests made")
    self._error_counter = meter.create_counter("ra_mcp.http.errors", description="HTTP request errors")
    self._duration_histogram = meter.create_histogram("ra_mcp.http.request.duration", unit="s", description="HTTP request duration")
    self._response_size_histogram = meter.create_histogram("ra_mcp.http.response.size", unit="By", description="HTTP response body size")
    self._retry_counter = meter.create_counter("ra_mcp.http.retries", description="HTTP request retry attempts")

aclose() async

Close the underlying httpx client.

Source code in packages/common/src/ra_mcp_common/http_client.py
383
384
385
async def aclose(self) -> None:
    """Close the underlying httpx client."""
    await self._client.aclose()

get_content(url, timeout=30, headers=None) async

Make a GET request and return raw content. Returns None on 404 or errors.

Parameters:

Name Type Description Default
url str

URL to fetch

required
timeout int

Request timeout in seconds

30
headers dict[str, str] | None

Additional headers

None

Returns:

Type Description
bytes | None

Response content as bytes or None

Source code in packages/common/src/ra_mcp_common/http_client.py
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
async def get_content(self, url: str, timeout: int = 30, headers: dict[str, str] | None = None) -> bytes | None:
    """
    Make a GET request and return raw content.
    Returns None on 404 or errors.

    Args:
        url: URL to fetch
        timeout: Request timeout in seconds
        headers: Additional headers

    Returns:
        Response content as bytes or None
    """
    logger.debug("GET CONTENT: %s", url)

    request_headers: dict[str, str] = {}
    if headers:
        request_headers.update(headers)

    span_attrs = {"http.request.method": "GET", "url.full": url}

    with self._tracer.start_as_current_span("HTTP GET", kind=SpanKind.CLIENT, attributes=span_attrs) as span:
        start_time = time.perf_counter()

        try:
            response = await self._execute_with_retry("GET", url, headers=request_headers, timeout=float(timeout))
            duration = time.perf_counter() - start_time
            span.set_attribute("http.response.status_code", response.status_code)

            if response.status_code == 404:
                logger.info("GET %s - %.3fs - 404 NOT FOUND", url, duration)
                return None
            if response.status_code != 200:
                logger.warning("GET %s - %.3fs - %d", url, duration, response.status_code)
                return None

            content = response.content
            content_size = len(content)
            logger.info("GET %s - %.3fs - 200 OK", url, duration)

            span.set_attribute("http.response.body.size", content_size)
            self._response_size_histogram.record(content_size, {"http.request.method": "GET"})
            return content

        except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
            duration = time.perf_counter() - start_time
            error_type = "TimeoutError" if isinstance(e, httpx.TimeoutException) else type(e).__name__
            error_msg = str(e.response.status_code) if isinstance(e, httpx.HTTPStatusError) else str(e)
            logger.error("GET %s - %.3fs - ERROR: %s", url, duration, error_msg)
            span.set_status(StatusCode.ERROR, f"{error_type}: {error_msg}")
            span.record_exception(e)
            self._error_counter.add(1, {"error.type": error_type})
            return None
        except Exception as e:
            duration = time.perf_counter() - start_time
            logger.error("GET %s - ERROR: %s", url, e)
            span.set_status(StatusCode.ERROR, str(e))
            span.record_exception(e)
            self._error_counter.add(1, {"error.type": type(e).__name__})
            return None
        finally:
            self._request_counter.add(1, {"http.request.method": "GET"})
            self._duration_histogram.record(time.perf_counter() - start_time, {"http.request.method": "GET"})

get_json(url, params=None, timeout=30, headers=None) async

Make a GET request and return JSON response.

Parameters:

Name Type Description Default
url str

Base URL

required
params dict[str, str | int] | None

Query parameters

None
timeout int

Request timeout in seconds (can be overridden by RA_MCP_TIMEOUT env var)

30
headers dict[str, str] | None

Additional headers

None

Returns:

Type Description
dict

Parsed JSON response

Raises:

Type Description
TimeoutError

On request timeout

HTTPStatusError

On non-success HTTP status code

ConnectError

On network connection error

JSONDecodeError

On invalid JSON response

Source code in packages/common/src/ra_mcp_common/http_client.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
async def get_json(
    self,
    url: str,
    params: dict[str, str | int] | None = None,
    timeout: int = 30,
    headers: dict[str, str] | None = None,
) -> dict:
    """
    Make a GET request and return JSON response.

    Args:
        url: Base URL
        params: Query parameters
        timeout: Request timeout in seconds (can be overridden by RA_MCP_TIMEOUT env var)
        headers: Additional headers

    Returns:
        Parsed JSON response

    Raises:
        TimeoutError: On request timeout
        httpx.HTTPStatusError: On non-success HTTP status code
        httpx.ConnectError: On network connection error
        json.JSONDecodeError: On invalid JSON response
    """
    # Allow timeout override from environment (useful for Hugging Face)
    timeout = int(os.getenv("RA_MCP_TIMEOUT", timeout))

    # Log request details
    logger.info("GET JSON: %s", url)
    logger.debug("Timeout: %ds, Params: %s", timeout, params)

    request_headers = {"Accept": "application/json"}
    if headers:
        request_headers.update(headers)
        logger.debug("Headers: %s", headers)

    span_attrs = {"http.request.method": "GET", "url.full": url}

    with self._tracer.start_as_current_span("HTTP GET", kind=SpanKind.CLIENT, attributes=span_attrs) as span:
        start_time = time.perf_counter()

        try:
            logger.debug("Opening connection to %s...", url)
            response = await self._execute_with_retry("GET", url, params=params, headers=request_headers, timeout=float(timeout))
            logger.debug("Connection established, status: %d", response.status_code)

            if response.status_code != 200:
                logger.error("Unexpected status code: %d", response.status_code)
                raise Exception(f"HTTP {response.status_code}")

            logger.debug("Reading response content...")
            content = response.content
            content_size = len(content)
            logger.debug("Received %d bytes", content_size)

            logger.debug("Parsing JSON...")
            result = json.loads(content)

            duration = time.perf_counter() - start_time
            logger.info("✓ GET JSON %s - %.3fs - %d bytes - 200 OK", url, duration, content_size)

            span.set_attribute("http.response.status_code", response.status_code)
            span.set_attribute("http.response.body.size", content_size)
            self._response_size_histogram.record(content_size, {"http.request.method": "GET"})

            return result

        except httpx.TimeoutException as e:
            duration = time.perf_counter() - start_time
            logger.error("✗ TIMEOUT after %.3fs on %s", duration, url)
            logger.error("Timeout limit was %ds", timeout)
            span.set_status(StatusCode.ERROR, f"Timeout after {timeout}s")
            span.record_exception(e)
            self._error_counter.add(1, {"error.type": "TimeoutError"})
            raise TimeoutError(f"Request timeout after {timeout}s: {url}") from e

        except httpx.HTTPStatusError as e:
            duration = time.perf_counter() - start_time
            error_body = ""
            try:
                error_body = e.response.text[:500]
                logger.error("Error response body: %s", error_body)
            except Exception:
                pass

            logger.error("✗ GET JSON %s - %.3fs - HTTPStatusError: %s", url, duration, e.response.status_code)

            span.set_status(StatusCode.ERROR, f"HTTPStatusError: {e.response.status_code}")
            span.record_exception(e)
            span.set_attribute("http.response.status_code", e.response.status_code)
            self._error_counter.add(1, {"error.type": "HTTPStatusError"})
            raise

        except (httpx.ConnectError, json.JSONDecodeError) as e:
            duration = time.perf_counter() - start_time
            error_type = type(e).__name__
            logger.error("✗ GET JSON %s - %.3fs - %s: %s", url, duration, error_type, e)

            span.set_status(StatusCode.ERROR, f"{error_type}: {e}")
            span.record_exception(e)
            self._error_counter.add(1, {"error.type": error_type})
            raise

        except Exception as e:
            duration = time.perf_counter() - start_time
            logger.error("✗ Unexpected error after %.3fs: %s: %s", duration, type(e).__name__, e)
            span.set_status(StatusCode.ERROR, str(e))
            span.record_exception(e)
            self._error_counter.add(1, {"error.type": type(e).__name__})
            raise

        finally:
            self._request_counter.add(1, {"http.request.method": "GET"})
            self._duration_histogram.record(time.perf_counter() - start_time, {"http.request.method": "GET"})

get_xml(url, params=None, timeout=30, headers=None) async

Make a GET request and return XML response as bytes.

Parameters:

Name Type Description Default
url str

Base URL

required
params dict[str, str | int] | None

Query parameters

None
timeout int

Request timeout in seconds

30
headers dict[str, str] | None

Additional headers

None

Returns:

Type Description
bytes

XML response as bytes

Raises:

Type Description
TimeoutError

On request timeout

HTTPStatusError

On non-success HTTP status code

ConnectError

On network connection error

Source code in packages/common/src/ra_mcp_common/http_client.py
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
async def get_xml(
    self,
    url: str,
    params: dict[str, str | int] | None = None,
    timeout: int = 30,
    headers: dict[str, str] | None = None,
) -> bytes:
    """
    Make a GET request and return XML response as bytes.

    Args:
        url: Base URL
        params: Query parameters
        timeout: Request timeout in seconds
        headers: Additional headers

    Returns:
        XML response as bytes

    Raises:
        TimeoutError: On request timeout
        httpx.HTTPStatusError: On non-success HTTP status code
        httpx.ConnectError: On network connection error
    """
    logger.debug("GET XML: %s", url)

    request_headers = {"Accept": "application/xml, text/xml, */*"}
    if headers:
        request_headers.update(headers)

    span_attrs = {"http.request.method": "GET", "url.full": url}

    with self._tracer.start_as_current_span("HTTP GET", kind=SpanKind.CLIENT, attributes=span_attrs) as span:
        start_time = time.perf_counter()

        try:
            response = await self._execute_with_retry("GET", url, params=params, headers=request_headers, timeout=float(timeout))
            if response.status_code != 200:
                raise Exception(f"HTTP {response.status_code}")

            content = response.content
            content_size = len(content)
            duration = time.perf_counter() - start_time
            logger.info("GET XML %s - %.3fs - 200 OK", url, duration)

            span.set_attribute("http.response.status_code", response.status_code)
            span.set_attribute("http.response.body.size", content_size)
            self._response_size_histogram.record(content_size, {"http.request.method": "GET"})

            return content

        except httpx.TimeoutException as e:
            duration = time.perf_counter() - start_time
            logger.error("GET XML %s - %.3fs - ERROR: TimeoutError", url, duration)
            span.set_status(StatusCode.ERROR, f"TimeoutError: {e}")
            span.record_exception(e)
            self._error_counter.add(1, {"error.type": "TimeoutError"})
            raise TimeoutError(f"Request timeout: {url}") from e

        except httpx.HTTPStatusError as e:
            duration = time.perf_counter() - start_time
            error_body = ""
            with contextlib.suppress(Exception):
                error_body = f" - Body: {e.response.text[:500]}"
            logger.error("GET XML %s - %.3fs - ERROR: %s%s", url, duration, e.response.status_code, error_body)

            span.set_status(StatusCode.ERROR, f"HTTPStatusError: {e.response.status_code}")
            span.record_exception(e)
            span.set_attribute("http.response.status_code", e.response.status_code)
            self._error_counter.add(1, {"error.type": "HTTPStatusError"})
            raise

        except httpx.ConnectError as e:
            duration = time.perf_counter() - start_time
            logger.error("GET XML %s - %.3fs - ERROR: %s", url, duration, e)
            span.set_status(StatusCode.ERROR, f"ConnectError: {e}")
            span.record_exception(e)
            self._error_counter.add(1, {"error.type": "ConnectError"})
            raise

        except Exception as e:
            duration = time.perf_counter() - start_time
            logger.error("GET XML %s - %.3fs - ERROR: %s", url, duration, e)
            span.set_status(StatusCode.ERROR, str(e))
            span.record_exception(e)
            self._error_counter.add(1, {"error.type": type(e).__name__})
            raise

        finally:
            self._request_counter.add(1, {"http.request.method": "GET"})
            self._duration_histogram.record(time.perf_counter() - start_time, {"http.request.method": "GET"})

Formatting Utilities

formatting

Shared formatting utilities for MCP tool output.

These functions are used by both search-mcp and browse-mcp formatters to produce plain-text/markdown output suitable for LLM consumption.

format_error_message(error_message, error_suggestions=None)

Format an error message with optional suggestions.

Source code in packages/common/src/ra_mcp_common/formatting.py
15
16
17
18
19
20
21
def format_error_message(error_message: str, error_suggestions: list[str] | None = None) -> str:
    """Format an error message with optional suggestions."""
    formatted_lines = [f"\u26a0\ufe0f **Error**: {error_message}"]
    if error_suggestions:
        formatted_lines.append("\n**Suggestions**:")
        formatted_lines.extend(f"- {suggestion_text}" for suggestion_text in error_suggestions)
    return "\n".join(formatted_lines)

format_example_browse_command(reference_code, page_numbers, search_term='')

Format an example browse command for display.

Source code in packages/common/src/ra_mcp_common/formatting.py
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def format_example_browse_command(reference_code: str, page_numbers: list[str], search_term: str = "") -> str:
    """Format an example browse command for display."""
    if len(page_numbers) == 0:
        return ""

    if len(page_numbers) == 1:
        cmd = f'ra browse "{reference_code}" --page {page_numbers[0]}'
    else:
        pages_str = ",".join(page_numbers[:5])  # Show max 5 pages
        cmd = f'ra browse "{reference_code}" --page "{pages_str}"'

    if search_term:
        cmd += f' --search-term "{search_term}"'

    return cmd

highlight_keyword_markdown(text_content, search_keyword)

Highlight search keywords using markdown-style bold.

The text markers from the API are already in the correct format. If no markers present, fallback to manual keyword highlighting.

Parameters:

Name Type Description Default
text_content str

Text to search in (may already contain text markers)

required
search_keyword str

Keyword to highlight

required

Returns:

Type Description
str

Text with keywords wrapped in bold

Source code in packages/common/src/ra_mcp_common/formatting.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def highlight_keyword_markdown(text_content: str, search_keyword: str) -> str:
    """Highlight search keywords using markdown-style bold.

    The **text** markers from the API are already in the correct format.
    If no markers present, fallback to manual keyword highlighting.

    Args:
        text_content: Text to search in (may already contain **text** markers)
        search_keyword: Keyword to highlight

    Returns:
        Text with keywords wrapped in **bold**
    """
    if re.search(r"\*\*[^*]+\*\*", text_content):
        return text_content

    if not search_keyword:
        return text_content
    keyword_pattern = re.compile(re.escape(search_keyword), re.IGNORECASE)
    return keyword_pattern.sub(lambda match: f"**{match.group()}**", text_content)

iiif_manifest_to_bildvisaren(iiif_manifest_url)

Convert IIIF manifest URL to bildvisaren URL.

Parameters:

Name Type Description Default
iiif_manifest_url str

IIIF manifest URL (e.g., https://lbiiif.riksarkivet.se/arkis!R0002497/manifest)

required

Returns:

Type Description
str

Bildvisaren URL (e.g., https://sok.riksarkivet.se/bildvisning/R0002497) or empty string if conversion fails

Source code in packages/common/src/ra_mcp_common/formatting.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def iiif_manifest_to_bildvisaren(iiif_manifest_url: str) -> str:
    """Convert IIIF manifest URL to bildvisaren URL.

    Args:
        iiif_manifest_url: IIIF manifest URL (e.g., https://lbiiif.riksarkivet.se/arkis!R0002497/manifest)

    Returns:
        Bildvisaren URL (e.g., https://sok.riksarkivet.se/bildvisning/R0002497) or empty string if conversion fails
    """
    try:
        if "arkis!" in iiif_manifest_url and "/manifest" in iiif_manifest_url:
            start_idx = iiif_manifest_url.find("arkis!") + len("arkis!")
            end_idx = iiif_manifest_url.find("/manifest", start_idx)
            manifest_id = iiif_manifest_url[start_idx:end_idx]
            return f"https://sok.riksarkivet.se/bildvisning/{manifest_id}"
        return ""
    except Exception as e:
        logger.warning("Failed to convert IIIF manifest URL to bildvisning: %s: %s", iiif_manifest_url, e)
        return ""

page_id_to_number(page_id)

Extract the numeric page number from a page ID like '_00066' or '_H0000459_00005'.

Splits by underscore and takes the last non-empty part, stripping leading zeros.

Source code in packages/common/src/ra_mcp_common/formatting.py
24
25
26
27
28
29
30
31
32
33
34
def page_id_to_number(page_id: str) -> int:
    """Extract the numeric page number from a page ID like '_00066' or '_H0000459_00005'.

    Splits by underscore and takes the last non-empty part, stripping leading zeros.
    """
    parts = page_id.split("_")
    if parts:
        last_part = parts[-1]
        trimmed = last_part.lstrip("0") or "0"
        return int(trimmed)
    return int(page_id)

trim_page_number(page_number)

Remove leading underscores and zeros from page number, keeping at least one digit.

Source code in packages/common/src/ra_mcp_common/formatting.py
58
59
60
def trim_page_number(page_number: str) -> str:
    """Remove leading underscores and zeros from page number, keeping at least one digit."""
    return page_number.lstrip("_0") or "0"

trim_page_numbers(page_numbers)

Remove leading zeros from multiple page numbers.

Source code in packages/common/src/ra_mcp_common/formatting.py
63
64
65
def trim_page_numbers(page_numbers: list[str]) -> list[str]:
    """Remove leading zeros from multiple page numbers."""
    return [trim_page_number(p) for p in page_numbers]

truncate_text(text, max_length, add_ellipsis=True)

Truncate text to maximum length, optionally adding ellipsis.

Source code in packages/common/src/ra_mcp_common/formatting.py
68
69
70
71
72
73
74
75
def truncate_text(text: str, max_length: int, add_ellipsis: bool = True) -> str:
    """Truncate text to maximum length, optionally adding ellipsis."""
    if len(text) <= max_length:
        return text

    if add_ellipsis and max_length > 3:
        return text[: max_length - 3] + "..."
    return text[:max_length]

Telemetry

telemetry

Telemetry convenience wrappers using only opentelemetry-api.

Returns no-op instances when no SDK is configured (zero overhead). All packages get these helpers transitively through ra-mcp-common.

get_meter(name)

Get a meter for the given module name.

Returns a no-op meter when no MeterProvider SDK is configured.

Source code in packages/common/src/ra_mcp_common/telemetry.py
19
20
21
22
23
24
def get_meter(name: str) -> metrics.Meter:
    """Get a meter for the given module name.

    Returns a no-op meter when no MeterProvider SDK is configured.
    """
    return metrics.get_meter(name)

get_tracer(name)

Get a tracer for the given module name.

Returns a no-op tracer when no TracerProvider SDK is configured.

Source code in packages/common/src/ra_mcp_common/telemetry.py
11
12
13
14
15
16
def get_tracer(name: str) -> trace.Tracer:
    """Get a tracer for the given module name.

    Returns a no-op tracer when no TracerProvider SDK is configured.
    """
    return trace.get_tracer(name)