293 lines
11 KiB
Python
293 lines
11 KiB
Python
|
|
"""Fraud-header compliance checks.
|
||
|
|
|
||
|
|
Two layers:
|
||
|
|
|
||
|
|
1. **Local shape assertions** — pure-python checks that every mandatory
|
||
|
|
Gov-Client-*/Gov-Vendor-* header is present and shaped per HMRC spec.
|
||
|
|
Runs in every CI build.
|
||
|
|
|
||
|
|
2. **HMRC validator API smoke test** (`test_headers_pass_hmrc_validator`):
|
||
|
|
POSTs the generated header set to the HMRC sandbox validator and
|
||
|
|
asserts a clean 200 with no rejected headers. Gated on the
|
||
|
|
`HMRC_VALIDATOR` env var so `pytest` still runs fine offline.
|
||
|
|
|
||
|
|
HMRC audits fraud headers during production-access review — a failing
|
||
|
|
validator smoke test MUST block deploy.
|
||
|
|
|
||
|
|
Spec references (primary):
|
||
|
|
https://developer.service.hmrc.gov.uk/guides/fraud-prevention/connection-method/batch-process-direct/
|
||
|
|
https://developer.service.hmrc.gov.uk/api-documentation/docs/api/service/txm-fph-validator-api/1.0
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import hashlib
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
|
||
|
|
import httpx
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
from hmrc_sync.fraud_headers import (
|
||
|
|
CONNECTION_METHOD_BATCH,
|
||
|
|
CONNECTION_METHOD_WEB_APP,
|
||
|
|
RUNTIME_CONTEXT,
|
||
|
|
VENDOR_LICENSE_ID,
|
||
|
|
VENDOR_PRODUCT_NAME,
|
||
|
|
RuntimeContext,
|
||
|
|
SessionContext,
|
||
|
|
as_validator_payload,
|
||
|
|
build_headers,
|
||
|
|
)
|
||
|
|
|
||
|
|
VALIDATOR_URL = (
|
||
|
|
"https://test-api.service.hmrc.gov.uk/test/fraud-prevention-headers/validate")
|
||
|
|
|
||
|
|
# Per HMRC BATCH_PROCESS_DIRECT spec (11 mandatory headers).
|
||
|
|
BATCH_MANDATORY = {
|
||
|
|
"Gov-Client-Connection-Method",
|
||
|
|
"Gov-Client-Device-ID",
|
||
|
|
"Gov-Client-Local-IPs",
|
||
|
|
"Gov-Client-Local-IPs-Timestamp",
|
||
|
|
"Gov-Client-MAC-Addresses",
|
||
|
|
"Gov-Client-Timezone",
|
||
|
|
"Gov-Client-User-Agent",
|
||
|
|
"Gov-Client-User-IDs",
|
||
|
|
"Gov-Vendor-License-IDs",
|
||
|
|
"Gov-Vendor-Product-Name",
|
||
|
|
"Gov-Vendor-Version",
|
||
|
|
}
|
||
|
|
|
||
|
|
# WEB_APP_VIA_SERVER adds browser-origin context on top of the batch set.
|
||
|
|
WEB_APP_EXTRAS = {
|
||
|
|
"Gov-Client-Screens",
|
||
|
|
"Gov-Client-Window-Size",
|
||
|
|
"Gov-Client-Public-IP",
|
||
|
|
"Gov-Client-Public-Port",
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def _full_session() -> SessionContext:
|
||
|
|
return SessionContext(
|
||
|
|
user_agent="Mozilla/5.0 (X11; Linux x86_64) hmrc-sync-test",
|
||
|
|
screen_width=1920,
|
||
|
|
screen_height=1080,
|
||
|
|
screen_colour_depth=24,
|
||
|
|
window_width=1600,
|
||
|
|
window_height=900,
|
||
|
|
timezone_offset=0,
|
||
|
|
device_id="6c3a9f60-1111-2222-3333-abcdef012345",
|
||
|
|
public_ip="203.0.113.5",
|
||
|
|
public_port=443,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
# BATCH_PROCESS_DIRECT — the CronJob path. All 11 headers must be present.
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
def test_batch_process_includes_all_11_mandatory_headers() -> None:
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
missing = BATCH_MANDATORY - hdrs.keys()
|
||
|
|
assert not missing, f"BATCH_PROCESS_DIRECT missing mandatory headers: {missing}"
|
||
|
|
|
||
|
|
|
||
|
|
def test_batch_process_omits_browser_only_headers() -> None:
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
# Screens / Window-Size are browser-origin; Public-IP/Port route via a
|
||
|
|
# client-facing IP which doesn't apply to a batch job.
|
||
|
|
for h in ("Gov-Client-Screens", "Gov-Client-Window-Size",
|
||
|
|
"Gov-Client-Public-IP", "Gov-Client-Public-Port"):
|
||
|
|
assert h not in hdrs, f"BATCH emitted browser-only header: {h}"
|
||
|
|
|
||
|
|
|
||
|
|
def test_batch_process_connection_method_value() -> None:
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
assert hdrs["Gov-Client-Connection-Method"] == "BATCH_PROCESS_DIRECT"
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
# Header-value shape assertions (per HMRC spec).
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
def test_user_ids_starts_with_os_field() -> None:
|
||
|
|
"""Per spec: `os=<device-user>&<app>=<app-user>`. `os=` is mandatory."""
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
value = hdrs["Gov-Client-User-IDs"]
|
||
|
|
assert value.startswith("os="), f"User-IDs missing os= prefix: {value!r}"
|
||
|
|
# Key-value pairs separated by & — at least one beyond `os=`.
|
||
|
|
pairs = value.split("&")
|
||
|
|
assert len(pairs) >= 2, f"User-IDs should have app identifier too: {value!r}"
|
||
|
|
|
||
|
|
|
||
|
|
def test_user_agent_has_all_four_spec_fields() -> None:
|
||
|
|
"""Spec: `os-family=…&os-version=…&device-manufacturer=…&device-model=…`."""
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
value = hdrs["Gov-Client-User-Agent"]
|
||
|
|
for key in ("os-family=", "os-version=", "device-manufacturer=", "device-model="):
|
||
|
|
assert key in value, f"User-Agent missing {key!r}: {value!r}"
|
||
|
|
|
||
|
|
|
||
|
|
def test_mac_addresses_percent_encoded() -> None:
|
||
|
|
"""Spec: colons in MACs must be percent-encoded (%3A)."""
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
value = hdrs["Gov-Client-MAC-Addresses"]
|
||
|
|
assert value, "MAC-Addresses must never be empty"
|
||
|
|
assert ":" not in value, f"MAC-Addresses contains raw colons: {value!r}"
|
||
|
|
assert "%3A" in value, f"MAC-Addresses must use %3A: {value!r}"
|
||
|
|
|
||
|
|
|
||
|
|
def test_local_ips_ipv6_percent_encoded() -> None:
|
||
|
|
"""IPv6 entries percent-encoded; IPv4 passes through."""
|
||
|
|
hdrs = build_headers(
|
||
|
|
connection_method=CONNECTION_METHOD_BATCH,
|
||
|
|
runtime=_runtime_with_ips(["10.0.0.4", "fe80::1"]),
|
||
|
|
)
|
||
|
|
value = hdrs["Gov-Client-Local-IPs"]
|
||
|
|
assert "10.0.0.4" in value
|
||
|
|
assert "fe80::1" not in value # raw v6 forbidden
|
||
|
|
assert "fe80%3A%3A1" in value, f"IPv6 not encoded: {value!r}"
|
||
|
|
|
||
|
|
|
||
|
|
def test_vendor_license_id_is_sha256_hashed() -> None:
|
||
|
|
"""Spec: `Gov-Vendor-License-IDs: <name>=<hashed-value>`."""
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
value = hdrs["Gov-Vendor-License-IDs"]
|
||
|
|
expected_hash = hashlib.sha256(VENDOR_LICENSE_ID.encode()).hexdigest()
|
||
|
|
assert value == f"{VENDOR_PRODUCT_NAME}={expected_hash}", value
|
||
|
|
# Hash must be 64 hex chars — catches accidental plaintext leakage.
|
||
|
|
assert re.fullmatch(r"[a-z0-9-]+=[0-9a-f]{64}", value), value
|
||
|
|
|
||
|
|
|
||
|
|
def test_vendor_product_name_percent_encoded() -> None:
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
assert hdrs["Gov-Vendor-Product-Name"] == "hmrc-sync" # no reserved chars in name
|
||
|
|
|
||
|
|
|
||
|
|
def test_vendor_version_format() -> None:
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
value = hdrs["Gov-Vendor-Version"]
|
||
|
|
assert re.fullmatch(r"[a-z0-9-]+=\d+\.\d+\.\d+", value), value
|
||
|
|
|
||
|
|
|
||
|
|
def test_local_ips_timestamp_spec_format() -> None:
|
||
|
|
"""Spec: `yyyy-MM-ddThh:mm:ss.sssZ` — 24-hour, UTC, 3-digit millis."""
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
value = hdrs["Gov-Client-Local-IPs-Timestamp"]
|
||
|
|
assert re.fullmatch(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z", value), value
|
||
|
|
|
||
|
|
|
||
|
|
def test_timezone_utc_offset_format() -> None:
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
assert re.fullmatch(r"UTC[+-]\d{2}:\d{2}", hdrs["Gov-Client-Timezone"])
|
||
|
|
|
||
|
|
|
||
|
|
def test_device_id_is_valid_uuid() -> None:
|
||
|
|
"""UUID shape check: 8-4-4-4-12 hex — applies to fallback too."""
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
value = hdrs["Gov-Client-Device-ID"]
|
||
|
|
assert re.fullmatch(
|
||
|
|
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
|
||
|
|
value,
|
||
|
|
), value
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
# MFA gating + per-call variance.
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
def test_mfa_timestamp_only_emitted_for_mfa_method() -> None:
|
||
|
|
"""Gov-Client-MFA-Timestamp is for AUTH_USING_MFA; batch must not emit it."""
|
||
|
|
batch = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
assert "Gov-Client-MFA-Timestamp" not in batch
|
||
|
|
|
||
|
|
session = _full_session()
|
||
|
|
session.mfa_timestamp = "2026-04-19T21:30:00.000Z"
|
||
|
|
mfa = build_headers(session, connection_method="AUTH_USING_MFA")
|
||
|
|
assert mfa.get("Gov-Client-MFA-Timestamp") == "2026-04-19T21:30:00.000Z"
|
||
|
|
|
||
|
|
|
||
|
|
def test_correlation_id_differs_per_call() -> None:
|
||
|
|
a = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
b = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
assert a["x-correlation-id"] != b["x-correlation-id"]
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
# WEB_APP_VIA_SERVER — batch set + browser extras.
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
def test_web_app_includes_batch_mandatory_plus_browser_extras() -> None:
|
||
|
|
hdrs = build_headers(_full_session(), connection_method=CONNECTION_METHOD_WEB_APP)
|
||
|
|
missing = (BATCH_MANDATORY | WEB_APP_EXTRAS) - hdrs.keys()
|
||
|
|
assert not missing, f"WEB_APP missing headers: {missing}"
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
# Payload reshape (used by the validator smoke test + CI self-tests).
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
def test_as_validator_payload_reshape() -> None:
|
||
|
|
hdrs = {"Gov-Client-Connection-Method": "X", "Gov-Vendor-Product-Name": "y"}
|
||
|
|
payload = as_validator_payload(hdrs)
|
||
|
|
assert payload["headers"] == [
|
||
|
|
{"name": "Gov-Client-Connection-Method", "value": "X"},
|
||
|
|
{"name": "Gov-Vendor-Product-Name", "value": "y"},
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
# HMRC sandbox validator smoke test — set HMRC_VALIDATOR=1 to enable.
|
||
|
|
# --------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.skipif(
|
||
|
|
not (os.environ.get("HMRC_VALIDATOR")
|
||
|
|
and os.environ.get("HMRC_SANDBOX_TOKEN")),
|
||
|
|
reason=("HMRC sandbox validator smoke test — set HMRC_VALIDATOR=1 AND "
|
||
|
|
"HMRC_SANDBOX_TOKEN=<app-token>. Dev Hub app must be subscribed "
|
||
|
|
"to txm-fph-validator-api/1.0 (application-restricted)."),
|
||
|
|
)
|
||
|
|
def test_headers_pass_hmrc_validator() -> None:
|
||
|
|
"""GET /test/fraud-prevention-headers/validate with BATCH headers.
|
||
|
|
|
||
|
|
Per the OAS spec the validator is a GET endpoint — headers go in the
|
||
|
|
actual HTTP request, not a JSON body. Auth is application-restricted
|
||
|
|
(client_credentials bearer). A successful response has code=VALID_HEADERS;
|
||
|
|
POTENTIALLY_INVALID_HEADERS emits warnings but still passes; only
|
||
|
|
INVALID_HEADERS is a hard fail.
|
||
|
|
"""
|
||
|
|
hdrs = build_headers(connection_method=CONNECTION_METHOD_BATCH)
|
||
|
|
request_headers = {
|
||
|
|
**hdrs,
|
||
|
|
"Accept": "application/vnd.hmrc.1.0+json",
|
||
|
|
"Authorization": f"Bearer {os.environ['HMRC_SANDBOX_TOKEN']}",
|
||
|
|
}
|
||
|
|
resp = httpx.get(VALIDATOR_URL, headers=request_headers, timeout=30.0)
|
||
|
|
assert resp.status_code == 200, (
|
||
|
|
f"validator refused: {resp.status_code} {resp.text[:500]}")
|
||
|
|
body = resp.json()
|
||
|
|
code = body.get("code")
|
||
|
|
assert code != "INVALID_HEADERS", f"validator rejected: {body}"
|
||
|
|
# POTENTIALLY_INVALID_HEADERS is allowed — HMRC surfaces them as warnings;
|
||
|
|
# log for visibility but don't fail the build on them.
|
||
|
|
if code == "POTENTIALLY_INVALID_HEADERS":
|
||
|
|
print(f"validator warnings: {body.get('warnings')}")
|
||
|
|
|
||
|
|
|
||
|
|
def _runtime_with_ips(ips: list[str]) -> RuntimeContext:
|
||
|
|
"""Build a RuntimeContext override with caller-specified local_ips."""
|
||
|
|
return RuntimeContext(
|
||
|
|
mac_addresses=RUNTIME_CONTEXT.mac_addresses,
|
||
|
|
local_ips=ips,
|
||
|
|
os_family=RUNTIME_CONTEXT.os_family,
|
||
|
|
os_version=RUNTIME_CONTEXT.os_version,
|
||
|
|
device_manufacturer=RUNTIME_CONTEXT.device_manufacturer,
|
||
|
|
device_model=RUNTIME_CONTEXT.device_model,
|
||
|
|
os_user=RUNTIME_CONTEXT.os_user,
|
||
|
|
)
|