Parser for EDRPOU completed

This commit is contained in:
Danil Negrienko 2024-12-12 01:32:31 -05:00
parent a7692124e9
commit f9a45fb29d
10 changed files with 449 additions and 38 deletions

View File

@ -1,7 +1,9 @@
defmodule UkraineTaxidEx.Base do
@callback length() :: non_neg_integer()
@callback parse(data :: {:ok, String.t()} | String.t(), options :: Keyword.t()) ::
{:ok, term} | {:error, atom()}
@callback to_map(data :: term) :: map()
@callback to_string(data :: term) :: String.t()
@callback length() :: non_neg_integer()
defmacro __using__(_) do
quote do

View File

@ -1,5 +1,5 @@
defmodule UkraineTaxidEx.BaseParser do
@type options :: [incomplete: boolean]
@type options :: [normalize?: boolean, clean?: boolean]
@callback parse(string :: String.t(), options :: options()) :: {:ok, term} | {:error, atom}
defmacro __using__(_) do
@ -7,13 +7,6 @@ defmodule UkraineTaxidEx.BaseParser do
@behaviour UkraineTaxidEx.BaseParser
alias UkraineTaxidEx.BaseParser
@impl BaseParser
@spec parse(string :: String.t(), options :: BaseParser.options()) ::
{:ok, term} | {:error, atom}
def parse(data, options \\ [incomplete: false])
defoverridable parse: 2, parse: 1
end
end
end

View File

@ -11,14 +11,38 @@ defmodule UkraineTaxidEx.Commons do
@pad "0"
@doc """
Normalizes the input value to a string of the specified length.
Takes a value and required length parameter.
Pads the result with leading zeros.
Returns a string.
## Examples
```elixir
iex> UkraineTaxidEx.Commons.normalize(123, 5)
"00123"
iex> UkraineTaxidEx.Commons.normalize("987", 5)
"00987"
```
"""
def normalize(value, length) do
value
|> digits(length)
|> undigits()
end
@doc """
Converts a string or integer to a list of digits.
Takes a value and optional length parameter.
Takes a value and optional length and clean parameters.
When length is provided, pads the result with leading zeros.
When clean is true, remove all non digit character from string.
Returns list of digits as integers.
## Examples
```elixir
iex> UkraineTaxidEx.Commons.digits("123")
[1, 2, 3]
@ -27,31 +51,112 @@ defmodule UkraineTaxidEx.Commons do
iex> UkraineTaxidEx.Commons.digits("987", 5)
[0, 0, 9, 8, 7]
```
"""
@spec digits(value :: String.t() | integer, length :: non_neg_integer()) :: digits
def digits(value, length \\ 0)
def digits(value, length) when is_integer(value), do: digits("#{value}", length)
@spec digits(value :: String.t() | integer, length :: non_neg_integer(), clean? :: boolean()) ::
digits
def digits(value, length \\ 0, clean? \\ false)
def digits(value, length, _clean?) when is_integer(value), do: digits("#{value}", length, false)
def digits(value, length) when is_binary(value) do
def digits(value, length, clean?) when is_binary(value) do
value
|> clean()
|> then(fn v -> (clean? && clean(v)) || v end)
|> String.pad_leading(length, @pad)
|> String.graphemes()
|> Enum.map(&String.to_integer/1)
end
@doc """
Converts list of digits to a string.
## Examples
```elixir
iex> UkraineTaxidEx.Commons.undigits([1, 2, 3])
"123"
```
"""
@spec undigits(digits :: digits) :: String.t()
def undigits(digits), do: Enum.join(digits)
@doc """
Gets the check digit (last digit) from a list of digits.
## Examples
```elixir
iex> UkraineTaxidEx.Commons.check_digit([1, 2, 3, 4])
4
```
"""
@spec check_digit(digits :: digits) :: digit
def check_digit(digits), do: List.last(digits)
@doc """
Gets all digits except the check digit from a list of digits.
## Examples
```elixir
iex> UkraineTaxidEx.Commons.value_digits([1, 2, 3, 4])
[1, 2, 3]
```
"""
@spec value_digits(digits :: digits) :: digits
def value_digits(digits), do: Enum.take(digits, length(digits) - 1)
@spec value_and_check_digits(digits :: digits) :: {digits, digit}
@doc """
Splits a list of digits into value digits and check digit.
## Examples
```elixir
iex> UkraineTaxidEx.Commons.value_and_check_digits([1, 2, 3, 4])
{[1, 2, 3], 4}
```
"""
@spec value_and_check_digits(digits :: digits) :: {value_digits :: digits, check_digit :: digit}
def value_and_check_digits(digits), do: {value_digits(digits), check_digit(digits)}
@doc """
Return digits and check digit separatly in one tuple.
## Examples
```elixir
iex> UkraineTaxidEx.Commons.digits_and_check_digit([1, 2, 3, 4])
{[1, 2, 3, 4], 4}
```
"""
@spec digits_and_check_digit(digits :: digits) :: {value_digits :: digits, check_digit :: digit}
def digits_and_check_digit(digits), do: {digits, check_digit(digits)}
@spec clean(string :: String.t()) :: String.t()
defp clean(string), do: String.replace(string, ~r/[^\d]/, "")
@doc """
Wraps data in an :ok tuple.
## Examples
```elixir
iex> UkraineTaxidEx.Commons.ok("data")
{:ok, "data"}
```
"""
@spec ok(data :: any()) :: {:ok, any()}
def ok(data), do: {:ok, data}
@doc """
Wraps error in an :error tuple.
## Examples
```elixir
iex> UkraineTaxidEx.Commons.error("error")
{:error, "error"}
```
"""
@spec error(error :: any()) :: {:error, any()}
def error(error), do: {:error, error}
end

View File

@ -6,6 +6,7 @@ defmodule UkraineTaxidEx.Edrpou do
"""
@length 8
alias UkraineTaxidEx.Edrpou.Parser
use UkraineTaxidEx.Base
@type t :: %__MODULE__{
@ -15,4 +16,9 @@ defmodule UkraineTaxidEx.Edrpou do
}
defstruct code: nil, check_digit: nil, check_sum: nil
@impl Base
@spec parse(data :: {:ok, String.t()} | String.t(), options :: Keyword.t()) ::
{:ok, t()} | {:error, atom()}
defdelegate parse(data, options \\ [normalize?: false, clean?: false]), to: Parser
end

View File

@ -5,8 +5,8 @@ defmodule UkraineTaxidEx.Edrpou.CheckSum do
@typedoc """
Coefficients (weights) for digits to calculate EDRPOU check sum may be two types:
base ([1, 2, 3, 4, 5, 6, 7] for EDRPOU < 30M or EDRPOU > 60M)
or alternative ([7, 1, 2, 3, 4, 5, 6] if EDRPOU between 30M and 60M)
base (`[1, 2, 3, 4, 5, 6, 7] for EDRPOU < 30M or EDRPOU > 60M`)
or alternative (`[7, 1, 2, 3, 4, 5, 6] if EDRPOU between 30M and 60M`)
"""
@type weights_type :: :base | :alternative
@ -17,7 +17,7 @@ defmodule UkraineTaxidEx.Edrpou.CheckSum do
2. Multiply each digit by its corresponding weight
3. Sum the products
4. Take mod 11 of the sum
5. If mod 11 is greater or equal than 10, repeat steps 2-4 with doubled weights
5. If mod 11 is greater or equal than 10, repeat steps 2-4 with weights +2
"""
@spec check_sum(digits :: C.digits()) :: integer()
def check_sum(digits) do
@ -29,7 +29,7 @@ defmodule UkraineTaxidEx.Edrpou.CheckSum do
value_digits = value_digits(digits)
case calculate_check_sum(value_digits, weights(type, false)) do
s when s >= 10 -> calculate_check_sum(value_digits, weights(type, true))
s when s >= 10 -> rem(calculate_check_sum(value_digits, weights(type, true)), 10)
s -> s
end
end

View File

@ -2,23 +2,24 @@ defmodule UkraineTaxidEx.Edrpou.Error do
@type error() ::
:invalid_length
| :invalid_checksum
| :length_to_long
| :length_to_short
| :length_too_long
| :length_too_short
@type errors() :: [error()]
@errors [
:invalid_length,
:invalid_checksum,
:length_to_long,
:length_to_short
:length_too_long,
:length_too_short
]
@messages [
invalid_length: "EDRPOU violates the required length",
invalid_checksum: "EDRPOU checksum is invalid",
length_to_long: "EDRPOU longer then required length",
length_to_short: "EDRPOU shorter then required length"
length_too_long: "EDRPOU longer then required length",
length_too_short: "EDRPOU shorter then required length"
]
@spec message(error()) :: String.t()
@spec message({:error, error()} | error()) :: String.t()
def message({:error, error}) when error in @errors, do: @messages[error]
def message(error) when error in @errors, do: @messages[error]
def message(_error), do: "Undefined error"
end

View File

@ -1,20 +1,84 @@
defmodule UkraineTaxidEx.Edrpou.Parser do
@moduledoc """
Parser module for EDRPOU (Unified State Register of Ukrainian Enterprises and Organizations) codes.
Handles validation and structure creation for EDRPOU codes with additional options for normalization and cleaning.
"""
alias UkraineTaxidEx.Edrpou
import UkraineTaxidEx.Edrpou, only: [length: 0]
import UkraineTaxidEx.Edrpou.CheckSum, only: [check_sum: 1]
import UkraineTaxidEx.Commons, only: [check_digit: 1, digits: 2, ok: 1]
import UkraineTaxidEx.Edrpou.Validator, only: [validate: 1]
import UkraineTaxidEx.Commons, only: [check_digit: 1, digits: 1, digits: 3, undigits: 1, ok: 1]
use UkraineTaxidEx.BaseParser
def parse(edrpou_string, incomplete: false) do
digits = digits(edrpou_string, length())
@type edrpou_string() :: String.t()
@type edrpou_string_or_ok() :: edrpou_string() | {:ok, edrpou_string()}
@type edrpou() :: Edrpou.t()
@type edrpou_or_error() ::
{:ok, Edrpou.t()}
| {:error,
:length_too_short
| :length_too_long
| :invalid_length
| :invalid_checksum}
%{
code: edrpou_string,
check_sum: check_sum(digits),
check_digit: check_digit(digits)
}
@impl BaseParser
@doc """
Parses an EDRPOU code string into a structured format.
Options:
- normalize?: When true, pads string to full EDRPOU length. Defaults to false.
- clean?: When true, removes non-digit characters before processing. Defaults to false.
Returns {:ok, %Edrpou{}} for valid codes or {:error, reason} for invalid.
## Examples
iex> UkraineTaxidEx.Edrpou.Parser.parse("00032112")
{:ok, %UkraineTaxidEx.Edrpou{code: "00032112", check_digit: 2, check_sum: 2}}
iex> UkraineTaxidEx.Edrpou.Parser.parse({:ok, "00032112"})
{:ok, %UkraineTaxidEx.Edrpou{code: "00032112", check_digit: 2, check_sum: 2}}
iex> UkraineTaxidEx.Edrpou.Parser.parse("32129", normalize?: true)
{:ok, %UkraineTaxidEx.Edrpou{code: "00032129", check_digit: 9, check_sum: 9}}
iex> UkraineTaxidEx.Edrpou.Parser.parse("9 30test62 78", normalize?: true, clean?: true)
{:ok, %UkraineTaxidEx.Edrpou{code: "09306278", check_digit: 8, check_sum: 8}}
iex> UkraineTaxidEx.Edrpou.Parser.parse("123")
{:error, :length_too_short}
iex> UkraineTaxidEx.Edrpou.Parser.parse("123456789")
{:error, :length_too_long}
iex> UkraineTaxidEx.Edrpou.Parser.parse("123", normalize?: true)
{:error, :invalid_checksum}
"""
@spec parse(data :: edrpou_string_or_ok, options :: BaseParser.options()) ::
edrpou_or_error()
def parse(data, options \\ [normalize?: false, clean?: false])
def parse({:ok, edrpou_string}, options), do: parse(edrpou_string, options)
def parse({:error, error}, _options), do: {:error, error}
def parse(edrpou_string, options) do
length = (Keyword.get(options, :normalize?, false) && length()) || 0
clean? = Keyword.get(options, :clean?, false)
edrpou_string
|> digits(length, clean?)
|> undigits()
|> validate()
|> generate_edrpou()
end
defp generate_edrpou({:error, error}), do: {:error, error}
defp generate_edrpou({:ok, edrpou_string}) do
digits = digits(edrpou_string)
%{code: edrpou_string, check_sum: check_sum(digits), check_digit: check_digit(digits)}
|> create_struct()
|> ok()
end

View File

@ -0,0 +1,60 @@
defmodule UkraineTaxidEx.Edrpou.Validator do
@moduledoc """
Functions for validating EDRPOU number format and checksum.
This module provides validation functions to verify if an EDRPOU number meets the standard requirements including length and checksum validation.
"""
import UkraineTaxidEx.Commons, only: [digits: 1, digits_and_check_digit: 1, error: 1, ok: 1]
import UkraineTaxidEx.Edrpou, only: [length: 0]
import UkraineTaxidEx.Edrpou.CheckSum, only: [check_sum: 1]
@doc """
Validates an EDRPOU number to check if it meets length requirements and has a valid checksum.
Returns:
* `{:ok, edrpou}` if validation successful
* `{:error, :length_too_short}` if shorter than required length
* `{:error, :length_too_long}` if longer than required length
* `{:error, :invalid_checksum}` if checksum is invalid
"""
@spec validate(String.t()) ::
{:ok, String.t()}
| {:error, :length_too_short | :length_too_long | :invalid_length | :invalid_checksum}
def validate(edrpou) do
cond do
violates_length_too_short?(edrpou) -> error(:length_too_short)
violates_length_too_long?(edrpou) -> error(:length_too_long)
violates_checksum?(edrpou) -> error(:invalid_checksum)
true -> ok(edrpou)
end
end
@doc "Check whether a given EDRPOU violates the required length"
@spec violates_length?(String.t()) :: boolean
def violates_length?(edrpou),
do: String.length(edrpou) != length()
@doc "Check whether a given EDRPOU too short"
@spec violates_length_too_short?(String.t()) :: boolean
def violates_length_too_short?(edrpou),
do: String.length(edrpou) < length()
@doc "Check whether a given EDRPOU too long"
@spec violates_length_too_long?(String.t()) :: boolean
def violates_length_too_long?(edrpou),
do: String.length(edrpou) > length()
@doc "Check whether a given EDRPOU has correct checksum"
@spec violates_checksum?(String.t()) :: boolean
def violates_checksum?(edrpou) do
{digits, check_digit} =
edrpou
|> digits()
|> digits_and_check_digit()
check_sum = check_sum(digits)
check_sum != check_digit
end
end

View File

@ -23,9 +23,9 @@ defmodule UkraineTaxidEx.CommonsTest do
end
test "handles strings with non-digit characters" do
assert Commons.digits("1-2-3") == [1, 2, 3]
assert Commons.digits("A1B2C3") == [1, 2, 3]
assert Commons.digits("12.34") == [1, 2, 3, 4]
assert Commons.digits("1-2-3", 0, true) == [1, 2, 3]
assert Commons.digits("A1B2C3", 0, true) == [1, 2, 3]
assert Commons.digits("12.34", 0, true) == [1, 2, 3, 4]
end
test "handles empty string" do
@ -75,6 +75,25 @@ defmodule UkraineTaxidEx.CommonsTest do
end
end
describe "digits_and_check_digit/1" do
test "returns tuple with original digits and check digit" do
assert Commons.digits_and_check_digit([1, 2, 3, 4]) == {[1, 2, 3, 4], 4}
end
test "works with different digit sequences" do
assert Commons.digits_and_check_digit([5, 6, 7, 8]) == {[5, 6, 7, 8], 8}
assert Commons.digits_and_check_digit([9, 0, 1, 2]) == {[9, 0, 1, 2], 2}
end
test "handles single digit list" do
assert Commons.digits_and_check_digit([1]) == {[1], 1}
end
test "handles empty list" do
assert Commons.digits_and_check_digit([]) == {[], nil}
end
end
describe "ok/1" do
test "wraps data in ok tuple" do
assert Commons.ok(123) == {:ok, 123}

View File

@ -0,0 +1,161 @@
defmodule UkraineTaxidEx.Edrpou.ParserTest do
use ExUnit.Case
alias UkraineTaxidEx.Edrpou
alias UkraineTaxidEx.Edrpou.Parser
doctest UkraineTaxidEx.Edrpou.Parser
describe "parse/2" do
test "successfully parses valid EDRPOU codes (strict cases, correct without normalization)" do
valid_codes = [
"00032112",
"00032129",
"09306278",
"09620081",
"09801546",
"09806443",
"09807595",
"09807750",
"09807862",
"09809192",
"13857564",
"14070197",
"14282829",
"14305909",
"14352406",
"14359845",
"14360080",
"14360506",
"14360570",
"14360920",
"14361575",
"19355562",
"19356610",
"19358784",
"19390819",
"20023569",
"20034231",
"20042839",
"20496061",
"20953647",
"21133352",
"21322127",
"21580639",
"21650966",
"21665382",
"21677333",
"21684818",
"21685166",
"21685485",
"22868414",
"23494714",
"23697280",
"26237202",
"26410155",
"26519933",
"26520688",
"32388371",
"33695095",
"34575675",
"34576883",
"35345213",
"35590956",
"35591059",
"35810511",
"35960913",
"36002395",
"36061927",
"36520434",
"37515069",
"38324133",
"38690683",
"38870739",
"39544699",
"39849797",
"43650988"
]
for code <- valid_codes do
assert {:ok, %Edrpou{code: ^code}} = Parser.parse(code)
end
end
test "successfully parses valid EDRPOU codes (correct with normalization)" do
valid_codes = [
"32112",
"032129",
"0032129",
"9306278",
"9620081",
"9801546",
"9806443",
"9807595",
"13857564",
"14361575",
"19390819",
"22868414",
"34575675",
"35960913",
"36002395",
"36061927",
"43650988"
]
for code <- valid_codes do
leaded_code = String.pad_leading(code, 8, "0")
assert {:ok, %Edrpou{code: ^leaded_code}} = Parser.parse(code, normalize?: true)
end
end
test "successfully parses valid EDRPOU codes (correct after clean from non digit symbols)" do
valid_codes = [
"00-03-21-12",
"00_03_21_29",
"00 03 21 29",
"0930 6278",
"096 200 81",
"09 80 1546",
"09.80.64.43",
"09/80/75/95",
"13 857 564",
"14361575",
"193.908 19",
"22868414",
"345f75u67ck5",
"35960913",
"36002395",
"36061927",
"43650988"
]
for code <- valid_codes do
cleaned_code = String.replace(code, ~r/[^0-9]/, "")
assert {:ok, %Edrpou{code: ^cleaned_code}} =
Parser.parse(code, normalize?: false, clean?: true)
end
end
test "successfully parses valid EDRPOU codes (correct after clean and normalize)" do
valid_codes = [
"-3-21-12",
"0_3_21_29",
" 0003 21 29",
"9306278.",
"96 200 81",
"09 80 1546",
"09.80.64.43",
"09/80/75/95"
]
for code <- valid_codes do
cleaned_and_normalized_code =
code
|> String.replace(~r/[^0-9]/, "")
|> String.pad_leading(8, "0")
assert {:ok, %Edrpou{code: ^cleaned_and_normalized_code}} =
Parser.parse(code, normalize?: true, clean?: true)
end
end
end
end