hmrc-sync/tests/test_fraud_headers.py

293 lines
11 KiB
Python
Raw Normal View History

2026-05-07 17:06:11 +00:00
"""Fraud-header compliance checks.
Two layers:
1. **Local shape assertions** pure-python checks that every mandatory
Gov-Client-*/Gov-Vendor-* header is present and shaped per HMRC spec.
Runs in every CI build.
2. **HMRC validator API smoke test** (`test_headers_pass_hmrc_validator`):
POSTs the generated header set to the HMRC sandbox validator and
asserts a clean 200 with no rejected headers. Gated on the
`HMRC_VALIDATOR` env var so `pytest` still runs fine offline.
HMRC audits fraud headers during production-access review a failing
validator smoke test MUST block deploy.
Spec references (primary):
https://developer.service.hmrc.gov.uk/guides/fraud-prevention/connection-method/batch-process-direct/
https://developer.service.hmrc.gov.uk/api-documentation/docs/api/service/txm-fph-validator-api/1.0
"""
from __future__ import annotations
import hashlib
import os
import re
import httpx
import pytest
from hmrc_sync.fraud_headers import (
CONNECTION_METHOD_BATCH,
CONNECTION_METHOD_WEB_APP,
RUNTIME_CONTEXT,
VENDOR_LICENSE_ID,
VENDOR_PRODUCT_NAME,
RuntimeContext,
SessionContext,
as_validator_payload,
build_headers,
)
VALIDATOR_URL = (
"https://test-api.service.hmrc.gov.uk/test/fraud-prevention-headers/validate")
# Per HMRC BATCH_PROCESS_DIRECT spec (11 mandatory headers).
BATCH_MANDATORY = {
"Gov-Client-Connection-Method",
"Gov-Client-Device-ID",
"Gov-Client-Local-IPs",
"Gov-Client-Local-IPs-Timestamp",
"Gov-Client-MAC-Addresses",
"Gov-Client-Timezone",
"Gov-Client-User-Agent",
"Gov-Client-User-IDs",
"Gov-Vendor-License-IDs",
"Gov-Vendor-Product-Name",
"Gov-Vendor-Version",
}
# WEB_APP_VIA_SERVER adds browser-origin context on top of the batch set.
WEB_APP_EXTRAS = {
"Gov-Client-Screens",
"Gov-Client-Window-Size",
"Gov-Client-Public-IP",
"Gov-Client-Public-Port",
}
def _full_session() -> SessionContext:
return SessionContext(
user_agent="Mozilla/5.0 (X11; Linux x86_64) hmrc-sync-test",
screen_width=1920,
screen_height=1080,
screen_colour_depth=24,
window_width=1600,
window_height=900,
timezone_offset=0,
device_id="6c3a9f60-1111-2222-3333-abcdef012345",
public_ip="203.0.113.5",
public_port=443,
)
# --------------------------------------------------------------------------
# BATCH_PROCESS_DIRECT — the CronJob path. All 11 headers must be present.
# --------------------------------------------------------------------------
def test_batch_process_includes_all_11_mandatory_headers() -> None:
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
missing = BATCH_MANDATORY - hdrs.keys()
assert not missing, f"BATCH_PROCESS_DIRECT missing mandatory headers: {missing}"
def test_batch_process_omits_browser_only_headers() -> None:
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
# Screens / Window-Size are browser-origin; Public-IP/Port route via a
# client-facing IP which doesn't apply to a batch job.
for h in ("Gov-Client-Screens", "Gov-Client-Window-Size",
"Gov-Client-Public-IP", "Gov-Client-Public-Port"):
assert h not in hdrs, f"BATCH emitted browser-only header: {h}"
def test_batch_process_connection_method_value() -> None:
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
assert hdrs["Gov-Client-Connection-Method"] == "BATCH_PROCESS_DIRECT"
# --------------------------------------------------------------------------
# Header-value shape assertions (per HMRC spec).
# --------------------------------------------------------------------------
def test_user_ids_starts_with_os_field() -> None:
"""Per spec: `os=<device-user>&<app>=<app-user>`. `os=` is mandatory."""
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
value = hdrs["Gov-Client-User-IDs"]
assert value.startswith("os="), f"User-IDs missing os= prefix: {value!r}"
# Key-value pairs separated by & — at least one beyond `os=`.
pairs = value.split("&")
assert len(pairs) >= 2, f"User-IDs should have app identifier too: {value!r}"
def test_user_agent_has_all_four_spec_fields() -> None:
"""Spec: `os-family=…&os-version=…&device-manufacturer=…&device-model=…`."""
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
value = hdrs["Gov-Client-User-Agent"]
for key in ("os-family=", "os-version=", "device-manufacturer=", "device-model="):
assert key in value, f"User-Agent missing {key!r}: {value!r}"
def test_mac_addresses_percent_encoded() -> None:
"""Spec: colons in MACs must be percent-encoded (%3A)."""
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
value = hdrs["Gov-Client-MAC-Addresses"]
assert value, "MAC-Addresses must never be empty"
assert ":" not in value, f"MAC-Addresses contains raw colons: {value!r}"
assert "%3A" in value, f"MAC-Addresses must use %3A: {value!r}"
def test_local_ips_ipv6_percent_encoded() -> None:
"""IPv6 entries percent-encoded; IPv4 passes through."""
hdrs = build_headers(
connection_method=CONNECTION_METHOD_BATCH,
runtime=_runtime_with_ips(["10.0.0.4", "fe80::1"]),
)
value = hdrs["Gov-Client-Local-IPs"]
assert "10.0.0.4" in value
assert "fe80::1" not in value # raw v6 forbidden
assert "fe80%3A%3A1" in value, f"IPv6 not encoded: {value!r}"
def test_vendor_license_id_is_sha256_hashed() -> None:
"""Spec: `Gov-Vendor-License-IDs: <name>=<hashed-value>`."""
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
value = hdrs["Gov-Vendor-License-IDs"]
expected_hash = hashlib.sha256(VENDOR_LICENSE_ID.encode()).hexdigest()
assert value == f"{VENDOR_PRODUCT_NAME}={expected_hash}", value
# Hash must be 64 hex chars — catches accidental plaintext leakage.
assert re.fullmatch(r"[a-z0-9-]+=[0-9a-f]{64}", value), value
def test_vendor_product_name_percent_encoded() -> None:
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
assert hdrs["Gov-Vendor-Product-Name"] == "hmrc-sync" # no reserved chars in name
def test_vendor_version_format() -> None:
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
value = hdrs["Gov-Vendor-Version"]
assert re.fullmatch(r"[a-z0-9-]+=\d+\.\d+\.\d+", value), value
def test_local_ips_timestamp_spec_format() -> None:
"""Spec: `yyyy-MM-ddThh:mm:ss.sssZ` — 24-hour, UTC, 3-digit millis."""
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
value = hdrs["Gov-Client-Local-IPs-Timestamp"]
assert re.fullmatch(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z", value), value
def test_timezone_utc_offset_format() -> None:
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
assert re.fullmatch(r"UTC[+-]\d{2}:\d{2}", hdrs["Gov-Client-Timezone"])
def test_device_id_is_valid_uuid() -> None:
"""UUID shape check: 8-4-4-4-12 hex — applies to fallback too."""
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
value = hdrs["Gov-Client-Device-ID"]
assert re.fullmatch(
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
value,
), value
# --------------------------------------------------------------------------
# MFA gating + per-call variance.
# --------------------------------------------------------------------------
def test_mfa_timestamp_only_emitted_for_mfa_method() -> None:
"""Gov-Client-MFA-Timestamp is for AUTH_USING_MFA; batch must not emit it."""
batch = build_headers(connection_method=CONNECTION_METHOD_BATCH)
assert "Gov-Client-MFA-Timestamp" not in batch
session = _full_session()
session.mfa_timestamp = "2026-04-19T21:30:00.000Z"
mfa = build_headers(session, connection_method="AUTH_USING_MFA")
assert mfa.get("Gov-Client-MFA-Timestamp") == "2026-04-19T21:30:00.000Z"
def test_correlation_id_differs_per_call() -> None:
a = build_headers(connection_method=CONNECTION_METHOD_BATCH)
b = build_headers(connection_method=CONNECTION_METHOD_BATCH)
assert a["x-correlation-id"] != b["x-correlation-id"]
# --------------------------------------------------------------------------
# WEB_APP_VIA_SERVER — batch set + browser extras.
# --------------------------------------------------------------------------
def test_web_app_includes_batch_mandatory_plus_browser_extras() -> None:
hdrs = build_headers(_full_session(), connection_method=CONNECTION_METHOD_WEB_APP)
missing = (BATCH_MANDATORY | WEB_APP_EXTRAS) - hdrs.keys()
assert not missing, f"WEB_APP missing headers: {missing}"
# --------------------------------------------------------------------------
# Payload reshape (used by the validator smoke test + CI self-tests).
# --------------------------------------------------------------------------
def test_as_validator_payload_reshape() -> None:
hdrs = {"Gov-Client-Connection-Method": "X", "Gov-Vendor-Product-Name": "y"}
payload = as_validator_payload(hdrs)
assert payload["headers"] == [
{"name": "Gov-Client-Connection-Method", "value": "X"},
{"name": "Gov-Vendor-Product-Name", "value": "y"},
]
# --------------------------------------------------------------------------
# HMRC sandbox validator smoke test — set HMRC_VALIDATOR=1 to enable.
# --------------------------------------------------------------------------
@pytest.mark.skipif(
not (os.environ.get("HMRC_VALIDATOR")
and os.environ.get("HMRC_SANDBOX_TOKEN")),
reason=("HMRC sandbox validator smoke test — set HMRC_VALIDATOR=1 AND "
"HMRC_SANDBOX_TOKEN=<app-token>. Dev Hub app must be subscribed "
"to txm-fph-validator-api/1.0 (application-restricted)."),
)
def test_headers_pass_hmrc_validator() -> None:
"""GET /test/fraud-prevention-headers/validate with BATCH headers.
Per the OAS spec the validator is a GET endpoint headers go in the
actual HTTP request, not a JSON body. Auth is application-restricted
(client_credentials bearer). A successful response has code=VALID_HEADERS;
POTENTIALLY_INVALID_HEADERS emits warnings but still passes; only
INVALID_HEADERS is a hard fail.
"""
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
request_headers = {
**hdrs,
"Accept": "application/vnd.hmrc.1.0+json",
"Authorization": f"Bearer {os.environ['HMRC_SANDBOX_TOKEN']}",
}
resp = httpx.get(VALIDATOR_URL, headers=request_headers, timeout=30.0)
assert resp.status_code == 200, (
f"validator refused: {resp.status_code} {resp.text[:500]}")
body = resp.json()
code = body.get("code")
assert code != "INVALID_HEADERS", f"validator rejected: {body}"
# POTENTIALLY_INVALID_HEADERS is allowed — HMRC surfaces them as warnings;
# log for visibility but don't fail the build on them.
if code == "POTENTIALLY_INVALID_HEADERS":
print(f"validator warnings: {body.get('warnings')}")
def _runtime_with_ips(ips: list[str]) -> RuntimeContext:
"""Build a RuntimeContext override with caller-specified local_ips."""
return RuntimeContext(
mac_addresses=RUNTIME_CONTEXT.mac_addresses,
local_ips=ips,
os_family=RUNTIME_CONTEXT.os_family,
os_version=RUNTIME_CONTEXT.os_version,
device_manufacturer=RUNTIME_CONTEXT.device_manufacturer,
device_model=RUNTIME_CONTEXT.device_model,
os_user=RUNTIME_CONTEXT.os_user,
)