"""Phase-1 HMRC MTD OAuth sandbox smoke test.
Runs the authorization_code flow against HMRC's test environment, captures
the callback on localhost:8080, exchanges for tokens, then calls
/individuals/income-received/employments/{nino}/{taxYear} for a test user.
Prerequisites (do once in the HMRC dev hub for the app):
1. Add http://localhost:8080/oauth/callback as a Redirect URI.
2. Subscribe to "Individuals Income Received API" (and accept terms).
3. Create a sandbox test user (Individuals → Create Test User) and note
the NINO + Government Gateway user ID + password.
Credentials are read from Vault (secret/viktor/hmrc_mtd_sandbox_client_{id,secret})
with env-var fallback for portability.
Run:
python3 oauth_dance.py --nino NH000000A --tax-year 2025-26
"""
from __future__ import annotations
import argparse
import http.server
import json
import os
import secrets
import socketserver
import sys
import threading
import urllib.parse
import webbrowser
from dataclasses import dataclass
import httpx
SANDBOX_BASE = "https://test-api.service.hmrc.gov.uk"
AUTH_PATH = "/oauth/authorize"
TOKEN_PATH = "/oauth/token"
# Legacy "Individual Income API" v1.2 — annual SA summary. Path uses
# the 10-digit Self-Assessment UTR, NOT the NINO. MTD
# "Individuals Income Received API" would be richer (in-year YTD) but
# isn't available to this app's subscription list.
INCOME_PATH = "/individual-income/sa/{utr}/annual-summary/{tax_year}"
INCOME_ACCEPT = "application/vnd.hmrc.1.2+json"
REDIRECT_URI = "http://localhost:8080/oauth/callback"
CALLBACK_PORT = 8080
SCOPE = "read:individual-income"
@dataclass
class Creds:
client_id: str
client_secret: str
def load_creds() -> Creds:
env_id = os.environ.get("HMRC_CLIENT_ID")
env_secret = os.environ.get("HMRC_CLIENT_SECRET")
if env_id and env_secret:
return Creds(env_id, env_secret)
import subprocess
cid = subprocess.check_output(
["vault", "kv", "get", "-field=hmrc_mtd_sandbox_client_id", "secret/viktor"],
text=True,
).strip()
csec = subprocess.check_output(
["vault", "kv", "get", "-field=hmrc_mtd_sandbox_client_secret", "secret/viktor"],
text=True,
).strip()
return Creds(cid, csec)
class _CallbackHandler(http.server.BaseHTTPRequestHandler):
captured: dict[str, str] = {}
def do_GET(self) -> None:
parsed = urllib.parse.urlparse(self.path)
if parsed.path != "/oauth/callback":
self.send_response(404)
self.end_headers()
return
qs = urllib.parse.parse_qs(parsed.query)
_CallbackHandler.captured.update({k: v[0] for k, v in qs.items()})
self.send_response(200)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.end_headers()
body = b"
HMRC auth received. You can close this tab.
"
self.wfile.write(body)
def log_message(self, *_args) -> None: # silence default stderr spam
pass
def run_callback_server_until_code(expected_state: str) -> dict[str, str]:
with socketserver.TCPServer(("127.0.0.1", CALLBACK_PORT), _CallbackHandler) as srv:
t = threading.Thread(target=srv.serve_forever, daemon=True)
t.start()
while "code" not in _CallbackHandler.captured and "error" not in _CallbackHandler.captured:
threading.Event().wait(0.25)
srv.shutdown()
got = dict(_CallbackHandler.captured)
if got.get("state") != expected_state:
raise SystemExit(f"CSRF: state mismatch (got {got.get('state')!r}, want {expected_state!r})")
if "error" in got:
raise SystemExit(f"HMRC returned error: {got}")
return got
def exchange_code(creds: Creds, code: str) -> dict:
r = httpx.post(
f"{SANDBOX_BASE}{TOKEN_PATH}",
data={
"grant_type": "authorization_code",
"client_id": creds.client_id,
"client_secret": creds.client_secret,
"redirect_uri": REDIRECT_URI,
"code": code,
},
headers={"Accept": "application/vnd.hmrc.1.0+json"},
timeout=30,
)
r.raise_for_status()
return r.json()
def call_income_received(access_token: str, utr: str, tax_year: str) -> httpx.Response:
"""tax_year is '2015-16' style (legacy Individual Income API)."""
url = f"{SANDBOX_BASE}{INCOME_PATH.format(utr=utr, tax_year=tax_year)}"
return httpx.get(
url,
headers={
"Accept": INCOME_ACCEPT,
"Authorization": f"Bearer {access_token}",
},
timeout=30,
)
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--utr", required=True, help="Sandbox test-user 10-digit SA UTR, e.g. 2762163393")
parser.add_argument("--tax-year", default="2015-16", help="Format 2015-16. Sandbox may only have canned data for certain years.")
args = parser.parse_args()
creds = load_creds()
state = secrets.token_urlsafe(24)
auth_url = (
f"{SANDBOX_BASE}{AUTH_PATH}?"
+ urllib.parse.urlencode({
"response_type": "code",
"client_id": creds.client_id,
"scope": SCOPE,
"redirect_uri": REDIRECT_URI,
"state": state,
})
)
print(f"Opening browser to HMRC sandbox login...\n {auth_url}\n")
webbrowser.open(auth_url)
captured = run_callback_server_until_code(expected_state=state)
print(f"Got auth code (truncated): {captured['code'][:12]}...")
tokens = exchange_code(creds, captured["code"])
access = tokens["access_token"]
print(f"Got access_token (exp {tokens.get('expires_in')}s), refresh_token present={('refresh_token' in tokens)}")
resp = call_income_received(access, args.utr, args.tax_year)
print(f"\nGET /individual-income/sa/{args.utr}/annual-summary/{args.tax_year} → HTTP {resp.status_code}")
try:
print(json.dumps(resp.json(), indent=2))
except Exception:
print(resp.text)
return 0 if resp.status_code < 400 else 2
if __name__ == "__main__":
sys.exit(main())