302 lines
9.9 KiB
Python
Executable File
302 lines
9.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Parse IBAN Registry from local TXT file to create comprehensive test fixtures.
|
|
This is the single source of truth for IBAN validation rules.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
|
|
COUNTRY_CODE_PATTERN = r"[A-Z]{2}"
|
|
EMPTY_RANGE = (0, 0)
|
|
|
|
|
|
def parse_int(raw):
|
|
"""Extract first integer from string."""
|
|
if not raw or raw == "N/A":
|
|
return 0
|
|
match = re.search(r"\d+", raw)
|
|
return int(match.group()) if match else 0
|
|
|
|
|
|
def parse_range(raw):
|
|
"""Parse position range like '1-4' to zero-indexed tuple (0, 4)."""
|
|
if not raw or raw == "N/A" or raw.strip() == "":
|
|
return EMPTY_RANGE
|
|
pattern = r".*?(?P<from>\d+)\s*-\s*(?P<to>\d+)"
|
|
match = re.search(pattern, raw)
|
|
if not match:
|
|
return EMPTY_RANGE
|
|
# Convert to zero-indexed: position 1-4 becomes (0, 4)
|
|
return (int(match["from"]) - 1, int(match["to"]))
|
|
|
|
|
|
def parse_registry(filepath):
|
|
"""Parse the SWIFT IBAN Registry TXT file."""
|
|
with open(filepath, "r", encoding="latin1") as f:
|
|
lines = f.readlines()
|
|
|
|
# Parse line by line
|
|
data = {}
|
|
for line in lines:
|
|
parts = line.rstrip("\r\n").split("\t")
|
|
if len(parts) < 2:
|
|
continue
|
|
|
|
header = parts[0].strip()
|
|
values = [p.strip() for p in parts[1:]]
|
|
|
|
if header:
|
|
data[header] = values
|
|
|
|
# Build records from columns
|
|
if "IBAN prefix country code (ISO 3166)" not in data:
|
|
raise ValueError("Could not find country code row")
|
|
|
|
num_countries = len(data["IBAN prefix country code (ISO 3166)"])
|
|
records = []
|
|
|
|
for i in range(num_countries):
|
|
record = {}
|
|
|
|
# Extract data for each column
|
|
for header, values in data.items():
|
|
if i < len(values):
|
|
record[header] = values[i]
|
|
else:
|
|
record[header] = ""
|
|
|
|
records.append(record)
|
|
|
|
return records
|
|
|
|
|
|
def process_positions(record):
|
|
"""Process position information for bank code, branch code, and account number."""
|
|
bank_code = parse_range(record.get("Bank identifier position within the BBAN", ""))
|
|
branch_code = parse_range(
|
|
record.get("Branch identifier position within the BBAN", "")
|
|
)
|
|
bban_length = parse_int(record.get("BBAN length", "0"))
|
|
|
|
# If no branch code, set it to end of bank code
|
|
if branch_code == EMPTY_RANGE:
|
|
branch_code = (bank_code[1], bank_code[1])
|
|
|
|
# Account code starts after bank and branch codes
|
|
account_start = max(bank_code[1], branch_code[1])
|
|
|
|
return {
|
|
"bank_code": {
|
|
"start": bank_code[0],
|
|
"end": bank_code[1],
|
|
"pattern": record.get("Bank identifier pattern", ""),
|
|
"example": record.get("Bank identifier example", ""),
|
|
},
|
|
"branch_code": {
|
|
"start": branch_code[0],
|
|
"end": branch_code[1],
|
|
"pattern": record.get("Branch identifier pattern", ""),
|
|
"example": record.get("Branch identifier example", ""),
|
|
},
|
|
"account_code": {
|
|
"start": account_start,
|
|
"end": bban_length,
|
|
"example": record.get("Domestic account number example", ""),
|
|
},
|
|
}
|
|
|
|
|
|
def parse_other_territories(value):
|
|
"""Parse other territories from string."""
|
|
if not value or value == "N/A":
|
|
return []
|
|
# Extract all country codes
|
|
return re.findall(COUNTRY_CODE_PATTERN, value)
|
|
|
|
|
|
def process_registry(records):
|
|
"""Process raw records into structured registry."""
|
|
registry = {}
|
|
|
|
for record in records:
|
|
country_code_raw = record.get("IBAN prefix country code (ISO 3166)", "")
|
|
if not country_code_raw:
|
|
continue
|
|
|
|
# Extract country code
|
|
match = re.search(COUNTRY_CODE_PATTERN, country_code_raw)
|
|
if not match:
|
|
continue
|
|
|
|
country_code = match.group()
|
|
|
|
# Parse SEPA status
|
|
sepa = record.get("SEPA country", "").strip().lower() == "yes"
|
|
|
|
# Parse other territories
|
|
other_territories = parse_other_territories(
|
|
record.get("Country code includes other countries/territories", "")
|
|
)
|
|
|
|
# Build registry entry
|
|
entry = {
|
|
"country_name": record.get("Name of country", ""),
|
|
"country_code": country_code,
|
|
"sepa_country": sepa,
|
|
"bban": {
|
|
"spec": record.get("BBAN structure", ""),
|
|
"length": parse_int(record.get("BBAN length", "0")),
|
|
"example": record.get("BBAN example", ""),
|
|
},
|
|
"iban": {
|
|
"spec": record.get("IBAN structure", ""),
|
|
"length": parse_int(record.get("IBAN length", "0")),
|
|
"example_electronic": record.get("IBAN electronic format example", ""),
|
|
"example_print": record.get("IBAN print format example", ""),
|
|
},
|
|
"positions": process_positions(record),
|
|
"effective_date": record.get("Effective date", ""),
|
|
"other_territories": other_territories,
|
|
}
|
|
|
|
registry[country_code] = entry
|
|
|
|
# Also register other territories under the same rules
|
|
for territory_code in other_territories:
|
|
if territory_code and territory_code not in registry:
|
|
registry[territory_code] = {
|
|
**entry,
|
|
"country_code": territory_code,
|
|
"parent_country": country_code,
|
|
}
|
|
|
|
return registry
|
|
|
|
|
|
def generate_test_fixtures(registry):
|
|
"""Generate test fixtures for validation."""
|
|
fixtures = {
|
|
"valid_ibans": {},
|
|
"country_specs": {},
|
|
"metadata": {
|
|
"total_countries": len(registry),
|
|
"sepa_countries": sum(
|
|
1 for c in registry.values() if c.get("sepa_country")
|
|
),
|
|
"source": "SWIFT IBAN Registry",
|
|
"format_version": "TXT Release 100",
|
|
},
|
|
}
|
|
|
|
for code, entry in sorted(registry.items()):
|
|
# Valid IBAN examples
|
|
if entry["iban"]["example_electronic"]:
|
|
fixtures["valid_ibans"][code] = {
|
|
"electronic": entry["iban"]["example_electronic"],
|
|
"print": entry["iban"]["example_print"],
|
|
"country_name": entry["country_name"],
|
|
}
|
|
|
|
# Country specifications
|
|
fixtures["country_specs"][code] = {
|
|
"country_name": entry["country_name"],
|
|
"iban_length": entry["iban"]["length"],
|
|
"bban_length": entry["bban"]["length"],
|
|
"iban_spec": entry["iban"]["spec"],
|
|
"bban_spec": entry["bban"]["spec"],
|
|
"sepa": entry["sepa_country"],
|
|
"positions": entry["positions"],
|
|
"effective_date": entry["effective_date"],
|
|
}
|
|
|
|
return fixtures
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Parsing IBAN Registry from local file...")
|
|
|
|
# Parse the registry
|
|
records = parse_registry("iban-registry-100.txt")
|
|
print(f"✓ Parsed {len(records)} records")
|
|
|
|
# Process into structured format
|
|
registry = process_registry(records)
|
|
print(f"✓ Processed {len(registry)} country codes")
|
|
|
|
# Generate test fixtures
|
|
fixtures = generate_test_fixtures(registry)
|
|
print(f"✓ Generated fixtures for {len(fixtures['valid_ibans'])} countries")
|
|
print(f"✓ SEPA countries: {fixtures['metadata']['sepa_countries']}")
|
|
|
|
# Save full registry
|
|
with open("iban_registry_full.json", "w") as f:
|
|
json.dump(registry, f, indent=2, ensure_ascii=False)
|
|
print("✓ Saved: iban_registry_full.json")
|
|
|
|
# Save test fixtures
|
|
with open("iban_test_fixtures.json", "w") as f:
|
|
json.dump(fixtures, f, indent=2, ensure_ascii=False)
|
|
print("✓ Saved: iban_test_fixtures.json")
|
|
|
|
# Generate summary report
|
|
print("\n" + "=" * 70)
|
|
print("IBAN REGISTRY SUMMARY - SINGLE SOURCE OF TRUTH")
|
|
print("=" * 70)
|
|
print(f"Total countries/territories: {fixtures['metadata']['total_countries']}")
|
|
print(f"SEPA countries: {fixtures['metadata']['sepa_countries']}")
|
|
print(f"\nIBAN Length Distribution:")
|
|
|
|
length_dist = {}
|
|
for spec in fixtures["country_specs"].values():
|
|
length = spec["iban_length"]
|
|
if length > 0:
|
|
length_dist[length] = length_dist.get(length, 0) + 1
|
|
|
|
for length in sorted(length_dist.keys()):
|
|
print(f" {length:2d} chars: {length_dist[length]:2d} countries")
|
|
|
|
if length_dist:
|
|
print(f"\nShortest IBAN: {min(length_dist.keys())} characters")
|
|
print(f"Longest IBAN: {max(length_dist.keys())} characters")
|
|
|
|
# Show sample countries
|
|
print(f"\nSample Countries (first 15):")
|
|
print(f"{'Code':<5} {'Country Name':<35} {'Length':<7} {'SEPA':<6} {'Example'}")
|
|
print("-" * 100)
|
|
|
|
for code in sorted(list(fixtures["valid_ibans"].keys()))[:15]:
|
|
entry = fixtures["country_specs"][code]
|
|
iban_ex = fixtures["valid_ibans"][code]["electronic"][:30]
|
|
print(
|
|
f"{code:<5} {entry['country_name'][:35]:<35} {entry['iban_length']:<7} "
|
|
f"{'Yes' if entry['sepa'] else 'No':<6} {iban_ex}"
|
|
)
|
|
|
|
# Show countries with special characteristics
|
|
print(f"\nSpecial Characteristics:")
|
|
|
|
# Find shortest and longest
|
|
shortest_code = min(
|
|
fixtures["country_specs"].items(),
|
|
key=lambda x: x[1]["iban_length"] if x[1]["iban_length"] > 0 else 999,
|
|
)
|
|
longest_code = max(
|
|
fixtures["country_specs"].items(), key=lambda x: x[1]["iban_length"]
|
|
)
|
|
|
|
print(
|
|
f" Shortest: {shortest_code[0]} ({shortest_code[1]['country_name']}) - "
|
|
f"{shortest_code[1]['iban_length']} chars"
|
|
)
|
|
print(
|
|
f" Longest: {longest_code[0]} ({longest_code[1]['country_name']}) - "
|
|
f"{longest_code[1]['iban_length']} chars"
|
|
)
|
|
|
|
print("\n" + "=" * 70)
|
|
print("✓ Processing complete! Use these files for testing:")
|
|
print(" • iban_registry_full.json - Complete registry with all fields")
|
|
print(" • iban_test_fixtures.json - Test fixtures for valid IBANs")
|
|
print("=" * 70)
|