Files
iban-ex/docs/international_wide_ibans/parse_local_registry.py
2025-11-29 21:20:32 -05:00

302 lines
9.9 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Parse IBAN Registry from local TXT file to create comprehensive test fixtures.
This is the single source of truth for IBAN validation rules.
"""
import json
import re
COUNTRY_CODE_PATTERN = r"[A-Z]{2}"
EMPTY_RANGE = (0, 0)
def parse_int(raw):
"""Extract first integer from string."""
if not raw or raw == "N/A":
return 0
match = re.search(r"\d+", raw)
return int(match.group()) if match else 0
def parse_range(raw):
"""Parse position range like '1-4' to zero-indexed tuple (0, 4)."""
if not raw or raw == "N/A" or raw.strip() == "":
return EMPTY_RANGE
pattern = r".*?(?P<from>\d+)\s*-\s*(?P<to>\d+)"
match = re.search(pattern, raw)
if not match:
return EMPTY_RANGE
# Convert to zero-indexed: position 1-4 becomes (0, 4)
return (int(match["from"]) - 1, int(match["to"]))
def parse_registry(filepath):
"""Parse the SWIFT IBAN Registry TXT file."""
with open(filepath, "r", encoding="latin1") as f:
lines = f.readlines()
# Parse line by line
data = {}
for line in lines:
parts = line.rstrip("\r\n").split("\t")
if len(parts) < 2:
continue
header = parts[0].strip()
values = [p.strip() for p in parts[1:]]
if header:
data[header] = values
# Build records from columns
if "IBAN prefix country code (ISO 3166)" not in data:
raise ValueError("Could not find country code row")
num_countries = len(data["IBAN prefix country code (ISO 3166)"])
records = []
for i in range(num_countries):
record = {}
# Extract data for each column
for header, values in data.items():
if i < len(values):
record[header] = values[i]
else:
record[header] = ""
records.append(record)
return records
def process_positions(record):
"""Process position information for bank code, branch code, and account number."""
bank_code = parse_range(record.get("Bank identifier position within the BBAN", ""))
branch_code = parse_range(
record.get("Branch identifier position within the BBAN", "")
)
bban_length = parse_int(record.get("BBAN length", "0"))
# If no branch code, set it to end of bank code
if branch_code == EMPTY_RANGE:
branch_code = (bank_code[1], bank_code[1])
# Account code starts after bank and branch codes
account_start = max(bank_code[1], branch_code[1])
return {
"bank_code": {
"start": bank_code[0],
"end": bank_code[1],
"pattern": record.get("Bank identifier pattern", ""),
"example": record.get("Bank identifier example", ""),
},
"branch_code": {
"start": branch_code[0],
"end": branch_code[1],
"pattern": record.get("Branch identifier pattern", ""),
"example": record.get("Branch identifier example", ""),
},
"account_code": {
"start": account_start,
"end": bban_length,
"example": record.get("Domestic account number example", ""),
},
}
def parse_other_territories(value):
"""Parse other territories from string."""
if not value or value == "N/A":
return []
# Extract all country codes
return re.findall(COUNTRY_CODE_PATTERN, value)
def process_registry(records):
"""Process raw records into structured registry."""
registry = {}
for record in records:
country_code_raw = record.get("IBAN prefix country code (ISO 3166)", "")
if not country_code_raw:
continue
# Extract country code
match = re.search(COUNTRY_CODE_PATTERN, country_code_raw)
if not match:
continue
country_code = match.group()
# Parse SEPA status
sepa = record.get("SEPA country", "").strip().lower() == "yes"
# Parse other territories
other_territories = parse_other_territories(
record.get("Country code includes other countries/territories", "")
)
# Build registry entry
entry = {
"country_name": record.get("Name of country", ""),
"country_code": country_code,
"sepa_country": sepa,
"bban": {
"spec": record.get("BBAN structure", ""),
"length": parse_int(record.get("BBAN length", "0")),
"example": record.get("BBAN example", ""),
},
"iban": {
"spec": record.get("IBAN structure", ""),
"length": parse_int(record.get("IBAN length", "0")),
"example_electronic": record.get("IBAN electronic format example", ""),
"example_print": record.get("IBAN print format example", ""),
},
"positions": process_positions(record),
"effective_date": record.get("Effective date", ""),
"other_territories": other_territories,
}
registry[country_code] = entry
# Also register other territories under the same rules
for territory_code in other_territories:
if territory_code and territory_code not in registry:
registry[territory_code] = {
**entry,
"country_code": territory_code,
"parent_country": country_code,
}
return registry
def generate_test_fixtures(registry):
"""Generate test fixtures for validation."""
fixtures = {
"valid_ibans": {},
"country_specs": {},
"metadata": {
"total_countries": len(registry),
"sepa_countries": sum(
1 for c in registry.values() if c.get("sepa_country")
),
"source": "SWIFT IBAN Registry",
"format_version": "TXT Release 100",
},
}
for code, entry in sorted(registry.items()):
# Valid IBAN examples
if entry["iban"]["example_electronic"]:
fixtures["valid_ibans"][code] = {
"electronic": entry["iban"]["example_electronic"],
"print": entry["iban"]["example_print"],
"country_name": entry["country_name"],
}
# Country specifications
fixtures["country_specs"][code] = {
"country_name": entry["country_name"],
"iban_length": entry["iban"]["length"],
"bban_length": entry["bban"]["length"],
"iban_spec": entry["iban"]["spec"],
"bban_spec": entry["bban"]["spec"],
"sepa": entry["sepa_country"],
"positions": entry["positions"],
"effective_date": entry["effective_date"],
}
return fixtures
if __name__ == "__main__":
print("Parsing IBAN Registry from local file...")
# Parse the registry
records = parse_registry("iban-registry-100.txt")
print(f"✓ Parsed {len(records)} records")
# Process into structured format
registry = process_registry(records)
print(f"✓ Processed {len(registry)} country codes")
# Generate test fixtures
fixtures = generate_test_fixtures(registry)
print(f"✓ Generated fixtures for {len(fixtures['valid_ibans'])} countries")
print(f"✓ SEPA countries: {fixtures['metadata']['sepa_countries']}")
# Save full registry
with open("iban_registry_full.json", "w") as f:
json.dump(registry, f, indent=2, ensure_ascii=False)
print("✓ Saved: iban_registry_full.json")
# Save test fixtures
with open("iban_test_fixtures.json", "w") as f:
json.dump(fixtures, f, indent=2, ensure_ascii=False)
print("✓ Saved: iban_test_fixtures.json")
# Generate summary report
print("\n" + "=" * 70)
print("IBAN REGISTRY SUMMARY - SINGLE SOURCE OF TRUTH")
print("=" * 70)
print(f"Total countries/territories: {fixtures['metadata']['total_countries']}")
print(f"SEPA countries: {fixtures['metadata']['sepa_countries']}")
print(f"\nIBAN Length Distribution:")
length_dist = {}
for spec in fixtures["country_specs"].values():
length = spec["iban_length"]
if length > 0:
length_dist[length] = length_dist.get(length, 0) + 1
for length in sorted(length_dist.keys()):
print(f" {length:2d} chars: {length_dist[length]:2d} countries")
if length_dist:
print(f"\nShortest IBAN: {min(length_dist.keys())} characters")
print(f"Longest IBAN: {max(length_dist.keys())} characters")
# Show sample countries
print(f"\nSample Countries (first 15):")
print(f"{'Code':<5} {'Country Name':<35} {'Length':<7} {'SEPA':<6} {'Example'}")
print("-" * 100)
for code in sorted(list(fixtures["valid_ibans"].keys()))[:15]:
entry = fixtures["country_specs"][code]
iban_ex = fixtures["valid_ibans"][code]["electronic"][:30]
print(
f"{code:<5} {entry['country_name'][:35]:<35} {entry['iban_length']:<7} "
f"{'Yes' if entry['sepa'] else 'No':<6} {iban_ex}"
)
# Show countries with special characteristics
print(f"\nSpecial Characteristics:")
# Find shortest and longest
shortest_code = min(
fixtures["country_specs"].items(),
key=lambda x: x[1]["iban_length"] if x[1]["iban_length"] > 0 else 999,
)
longest_code = max(
fixtures["country_specs"].items(), key=lambda x: x[1]["iban_length"]
)
print(
f" Shortest: {shortest_code[0]} ({shortest_code[1]['country_name']}) - "
f"{shortest_code[1]['iban_length']} chars"
)
print(
f" Longest: {longest_code[0]} ({longest_code[1]['country_name']}) - "
f"{longest_code[1]['iban_length']} chars"
)
print("\n" + "=" * 70)
print("✓ Processing complete! Use these files for testing:")
print(" • iban_registry_full.json - Complete registry with all fields")
print(" • iban_test_fixtures.json - Test fixtures for valid IBANs")
print("=" * 70)