From f9a45fb29d560455e3f7735e9bbeda772a9f6ff8 Mon Sep 17 00:00:00 2001 From: Danylo Negrienko Date: Thu, 12 Dec 2024 01:32:31 -0500 Subject: [PATCH] Parser for EDRPOU completed --- lib/ukraine_taxid_ex/base.ex | 4 +- lib/ukraine_taxid_ex/base_parser.ex | 9 +- lib/ukraine_taxid_ex/commons.ex | 119 +++++++++++++- lib/ukraine_taxid_ex/edrpou.ex | 6 + lib/ukraine_taxid_ex/edrpou/check_sum.ex | 8 +- lib/ukraine_taxid_ex/edrpou/error.ex | 15 +- lib/ukraine_taxid_ex/edrpou/parser.ex | 80 ++++++++- lib/ukraine_taxid_ex/edrpou/validator.ex | 60 +++++++ test/ukraine_taxid_ex/commons_test.exs | 25 ++- test/ukraine_taxid_ex/edrpou/parser_test.exs | 161 +++++++++++++++++++ 10 files changed, 449 insertions(+), 38 deletions(-) create mode 100644 lib/ukraine_taxid_ex/edrpou/validator.ex create mode 100644 test/ukraine_taxid_ex/edrpou/parser_test.exs diff --git a/lib/ukraine_taxid_ex/base.ex b/lib/ukraine_taxid_ex/base.ex index e95c2f4..5e80364 100644 --- a/lib/ukraine_taxid_ex/base.ex +++ b/lib/ukraine_taxid_ex/base.ex @@ -1,7 +1,9 @@ defmodule UkraineTaxidEx.Base do + @callback length() :: non_neg_integer() + @callback parse(data :: {:ok, String.t()} | String.t(), options :: Keyword.t()) :: + {:ok, term} | {:error, atom()} @callback to_map(data :: term) :: map() @callback to_string(data :: term) :: String.t() - @callback length() :: non_neg_integer() defmacro __using__(_) do quote do diff --git a/lib/ukraine_taxid_ex/base_parser.ex b/lib/ukraine_taxid_ex/base_parser.ex index 5629e94..0a15945 100644 --- a/lib/ukraine_taxid_ex/base_parser.ex +++ b/lib/ukraine_taxid_ex/base_parser.ex @@ -1,5 +1,5 @@ defmodule UkraineTaxidEx.BaseParser do - @type options :: [incomplete: boolean] + @type options :: [normalize?: boolean, clean?: boolean] @callback parse(string :: String.t(), options :: options()) :: {:ok, term} | {:error, atom} defmacro __using__(_) do @@ -7,13 +7,6 @@ defmodule UkraineTaxidEx.BaseParser do @behaviour UkraineTaxidEx.BaseParser alias UkraineTaxidEx.BaseParser - - @impl BaseParser - @spec parse(string :: String.t(), options :: BaseParser.options()) :: - {:ok, term} | {:error, atom} - def parse(data, options \\ [incomplete: false]) - - defoverridable parse: 2, parse: 1 end end end diff --git a/lib/ukraine_taxid_ex/commons.ex b/lib/ukraine_taxid_ex/commons.ex index 8d639ea..4595d07 100644 --- a/lib/ukraine_taxid_ex/commons.ex +++ b/lib/ukraine_taxid_ex/commons.ex @@ -11,14 +11,38 @@ defmodule UkraineTaxidEx.Commons do @pad "0" + @doc """ + Normalizes the input value to a string of the specified length. + Takes a value and required length parameter. + Pads the result with leading zeros. + Returns a string. + + ## Examples + + ```elixir + iex> UkraineTaxidEx.Commons.normalize(123, 5) + "00123" + + iex> UkraineTaxidEx.Commons.normalize("987", 5) + "00987" + ``` + """ + def normalize(value, length) do + value + |> digits(length) + |> undigits() + end + @doc """ Converts a string or integer to a list of digits. - Takes a value and optional length parameter. + Takes a value and optional length and clean parameters. When length is provided, pads the result with leading zeros. + When clean is true, remove all non digit character from string. Returns list of digits as integers. ## Examples + ```elixir iex> UkraineTaxidEx.Commons.digits("123") [1, 2, 3] @@ -27,31 +51,112 @@ defmodule UkraineTaxidEx.Commons do iex> UkraineTaxidEx.Commons.digits("987", 5) [0, 0, 9, 8, 7] + ``` """ - @spec digits(value :: String.t() | integer, length :: non_neg_integer()) :: digits - def digits(value, length \\ 0) - def digits(value, length) when is_integer(value), do: digits("#{value}", length) + @spec digits(value :: String.t() | integer, length :: non_neg_integer(), clean? :: boolean()) :: + digits + def digits(value, length \\ 0, clean? \\ false) + def digits(value, length, _clean?) when is_integer(value), do: digits("#{value}", length, false) - def digits(value, length) when is_binary(value) do + def digits(value, length, clean?) when is_binary(value) do value - |> clean() + |> then(fn v -> (clean? && clean(v)) || v end) |> String.pad_leading(length, @pad) |> String.graphemes() |> Enum.map(&String.to_integer/1) end + @doc """ + Converts list of digits to a string. + + ## Examples + + ```elixir + iex> UkraineTaxidEx.Commons.undigits([1, 2, 3]) + "123" + ``` + """ + @spec undigits(digits :: digits) :: String.t() + def undigits(digits), do: Enum.join(digits) + + @doc """ + Gets the check digit (last digit) from a list of digits. + + ## Examples + + ```elixir + iex> UkraineTaxidEx.Commons.check_digit([1, 2, 3, 4]) + 4 + ``` + """ @spec check_digit(digits :: digits) :: digit def check_digit(digits), do: List.last(digits) + @doc """ + Gets all digits except the check digit from a list of digits. + + ## Examples + + ```elixir + iex> UkraineTaxidEx.Commons.value_digits([1, 2, 3, 4]) + [1, 2, 3] + ``` + """ @spec value_digits(digits :: digits) :: digits def value_digits(digits), do: Enum.take(digits, length(digits) - 1) - @spec value_and_check_digits(digits :: digits) :: {digits, digit} + @doc """ + Splits a list of digits into value digits and check digit. + + ## Examples + + ```elixir + iex> UkraineTaxidEx.Commons.value_and_check_digits([1, 2, 3, 4]) + {[1, 2, 3], 4} + ``` + """ + @spec value_and_check_digits(digits :: digits) :: {value_digits :: digits, check_digit :: digit} def value_and_check_digits(digits), do: {value_digits(digits), check_digit(digits)} + @doc """ + Return digits and check digit separatly in one tuple. + + ## Examples + + ```elixir + iex> UkraineTaxidEx.Commons.digits_and_check_digit([1, 2, 3, 4]) + {[1, 2, 3, 4], 4} + ``` + """ + @spec digits_and_check_digit(digits :: digits) :: {value_digits :: digits, check_digit :: digit} + def digits_and_check_digit(digits), do: {digits, check_digit(digits)} + @spec clean(string :: String.t()) :: String.t() defp clean(string), do: String.replace(string, ~r/[^\d]/, "") + @doc """ + Wraps data in an :ok tuple. + + ## Examples + + ```elixir + iex> UkraineTaxidEx.Commons.ok("data") + {:ok, "data"} + ``` + """ + @spec ok(data :: any()) :: {:ok, any()} def ok(data), do: {:ok, data} + + @doc """ + Wraps error in an :error tuple. + + ## Examples + + ```elixir + iex> UkraineTaxidEx.Commons.error("error") + {:error, "error"} + ``` + """ + @spec error(error :: any()) :: {:error, any()} def error(error), do: {:error, error} end diff --git a/lib/ukraine_taxid_ex/edrpou.ex b/lib/ukraine_taxid_ex/edrpou.ex index 63759b0..2706c0b 100644 --- a/lib/ukraine_taxid_ex/edrpou.ex +++ b/lib/ukraine_taxid_ex/edrpou.ex @@ -6,6 +6,7 @@ defmodule UkraineTaxidEx.Edrpou do """ @length 8 + alias UkraineTaxidEx.Edrpou.Parser use UkraineTaxidEx.Base @type t :: %__MODULE__{ @@ -15,4 +16,9 @@ defmodule UkraineTaxidEx.Edrpou do } defstruct code: nil, check_digit: nil, check_sum: nil + + @impl Base + @spec parse(data :: {:ok, String.t()} | String.t(), options :: Keyword.t()) :: + {:ok, t()} | {:error, atom()} + defdelegate parse(data, options \\ [normalize?: false, clean?: false]), to: Parser end diff --git a/lib/ukraine_taxid_ex/edrpou/check_sum.ex b/lib/ukraine_taxid_ex/edrpou/check_sum.ex index 8993d62..1c116fa 100644 --- a/lib/ukraine_taxid_ex/edrpou/check_sum.ex +++ b/lib/ukraine_taxid_ex/edrpou/check_sum.ex @@ -5,8 +5,8 @@ defmodule UkraineTaxidEx.Edrpou.CheckSum do @typedoc """ Coefficients (weights) for digits to calculate EDRPOU check sum may be two types: - base ([1, 2, 3, 4, 5, 6, 7] for EDRPOU < 30M or EDRPOU > 60M) - or alternative ([7, 1, 2, 3, 4, 5, 6] if EDRPOU between 30M and 60M) + base (`[1, 2, 3, 4, 5, 6, 7] for EDRPOU < 30M or EDRPOU > 60M`) + or alternative (`[7, 1, 2, 3, 4, 5, 6] if EDRPOU between 30M and 60M`) """ @type weights_type :: :base | :alternative @@ -17,7 +17,7 @@ defmodule UkraineTaxidEx.Edrpou.CheckSum do 2. Multiply each digit by its corresponding weight 3. Sum the products 4. Take mod 11 of the sum - 5. If mod 11 is greater or equal than 10, repeat steps 2-4 with doubled weights + 5. If mod 11 is greater or equal than 10, repeat steps 2-4 with weights +2 """ @spec check_sum(digits :: C.digits()) :: integer() def check_sum(digits) do @@ -29,7 +29,7 @@ defmodule UkraineTaxidEx.Edrpou.CheckSum do value_digits = value_digits(digits) case calculate_check_sum(value_digits, weights(type, false)) do - s when s >= 10 -> calculate_check_sum(value_digits, weights(type, true)) + s when s >= 10 -> rem(calculate_check_sum(value_digits, weights(type, true)), 10) s -> s end end diff --git a/lib/ukraine_taxid_ex/edrpou/error.ex b/lib/ukraine_taxid_ex/edrpou/error.ex index b520f6d..b0d5166 100644 --- a/lib/ukraine_taxid_ex/edrpou/error.ex +++ b/lib/ukraine_taxid_ex/edrpou/error.ex @@ -2,23 +2,24 @@ defmodule UkraineTaxidEx.Edrpou.Error do @type error() :: :invalid_length | :invalid_checksum - | :length_to_long - | :length_to_short + | :length_too_long + | :length_too_short @type errors() :: [error()] @errors [ :invalid_length, :invalid_checksum, - :length_to_long, - :length_to_short + :length_too_long, + :length_too_short ] @messages [ invalid_length: "EDRPOU violates the required length", invalid_checksum: "EDRPOU checksum is invalid", - length_to_long: "EDRPOU longer then required length", - length_to_short: "EDRPOU shorter then required length" + length_too_long: "EDRPOU longer then required length", + length_too_short: "EDRPOU shorter then required length" ] - @spec message(error()) :: String.t() + @spec message({:error, error()} | error()) :: String.t() + def message({:error, error}) when error in @errors, do: @messages[error] def message(error) when error in @errors, do: @messages[error] def message(_error), do: "Undefined error" end diff --git a/lib/ukraine_taxid_ex/edrpou/parser.ex b/lib/ukraine_taxid_ex/edrpou/parser.ex index a554eb5..5d80059 100644 --- a/lib/ukraine_taxid_ex/edrpou/parser.ex +++ b/lib/ukraine_taxid_ex/edrpou/parser.ex @@ -1,20 +1,84 @@ defmodule UkraineTaxidEx.Edrpou.Parser do + @moduledoc """ + Parser module for EDRPOU (Unified State Register of Ukrainian Enterprises and Organizations) codes. + Handles validation and structure creation for EDRPOU codes with additional options for normalization and cleaning. + """ + alias UkraineTaxidEx.Edrpou import UkraineTaxidEx.Edrpou, only: [length: 0] import UkraineTaxidEx.Edrpou.CheckSum, only: [check_sum: 1] - import UkraineTaxidEx.Commons, only: [check_digit: 1, digits: 2, ok: 1] + import UkraineTaxidEx.Edrpou.Validator, only: [validate: 1] + import UkraineTaxidEx.Commons, only: [check_digit: 1, digits: 1, digits: 3, undigits: 1, ok: 1] use UkraineTaxidEx.BaseParser - def parse(edrpou_string, incomplete: false) do - digits = digits(edrpou_string, length()) + @type edrpou_string() :: String.t() + @type edrpou_string_or_ok() :: edrpou_string() | {:ok, edrpou_string()} + @type edrpou() :: Edrpou.t() + @type edrpou_or_error() :: + {:ok, Edrpou.t()} + | {:error, + :length_too_short + | :length_too_long + | :invalid_length + | :invalid_checksum} - %{ - code: edrpou_string, - check_sum: check_sum(digits), - check_digit: check_digit(digits) - } + @impl BaseParser + + @doc """ + Parses an EDRPOU code string into a structured format. + Options: + - normalize?: When true, pads string to full EDRPOU length. Defaults to false. + - clean?: When true, removes non-digit characters before processing. Defaults to false. + Returns {:ok, %Edrpou{}} for valid codes or {:error, reason} for invalid. + + ## Examples + + iex> UkraineTaxidEx.Edrpou.Parser.parse("00032112") + {:ok, %UkraineTaxidEx.Edrpou{code: "00032112", check_digit: 2, check_sum: 2}} + + iex> UkraineTaxidEx.Edrpou.Parser.parse({:ok, "00032112"}) + {:ok, %UkraineTaxidEx.Edrpou{code: "00032112", check_digit: 2, check_sum: 2}} + + iex> UkraineTaxidEx.Edrpou.Parser.parse("32129", normalize?: true) + {:ok, %UkraineTaxidEx.Edrpou{code: "00032129", check_digit: 9, check_sum: 9}} + + iex> UkraineTaxidEx.Edrpou.Parser.parse("9 30test62 78", normalize?: true, clean?: true) + {:ok, %UkraineTaxidEx.Edrpou{code: "09306278", check_digit: 8, check_sum: 8}} + + iex> UkraineTaxidEx.Edrpou.Parser.parse("123") + {:error, :length_too_short} + + iex> UkraineTaxidEx.Edrpou.Parser.parse("123456789") + {:error, :length_too_long} + + iex> UkraineTaxidEx.Edrpou.Parser.parse("123", normalize?: true) + {:error, :invalid_checksum} + """ + @spec parse(data :: edrpou_string_or_ok, options :: BaseParser.options()) :: + edrpou_or_error() + def parse(data, options \\ [normalize?: false, clean?: false]) + def parse({:ok, edrpou_string}, options), do: parse(edrpou_string, options) + def parse({:error, error}, _options), do: {:error, error} + + def parse(edrpou_string, options) do + length = (Keyword.get(options, :normalize?, false) && length()) || 0 + clean? = Keyword.get(options, :clean?, false) + + edrpou_string + |> digits(length, clean?) + |> undigits() + |> validate() + |> generate_edrpou() + end + + defp generate_edrpou({:error, error}), do: {:error, error} + + defp generate_edrpou({:ok, edrpou_string}) do + digits = digits(edrpou_string) + + %{code: edrpou_string, check_sum: check_sum(digits), check_digit: check_digit(digits)} |> create_struct() |> ok() end diff --git a/lib/ukraine_taxid_ex/edrpou/validator.ex b/lib/ukraine_taxid_ex/edrpou/validator.ex new file mode 100644 index 0000000..5394647 --- /dev/null +++ b/lib/ukraine_taxid_ex/edrpou/validator.ex @@ -0,0 +1,60 @@ +defmodule UkraineTaxidEx.Edrpou.Validator do + @moduledoc """ + Functions for validating EDRPOU number format and checksum. + + This module provides validation functions to verify if an EDRPOU number meets the standard requirements including length and checksum validation. + """ + + import UkraineTaxidEx.Commons, only: [digits: 1, digits_and_check_digit: 1, error: 1, ok: 1] + import UkraineTaxidEx.Edrpou, only: [length: 0] + import UkraineTaxidEx.Edrpou.CheckSum, only: [check_sum: 1] + + @doc """ + Validates an EDRPOU number to check if it meets length requirements and has a valid checksum. + + Returns: + * `{:ok, edrpou}` if validation successful + * `{:error, :length_too_short}` if shorter than required length + * `{:error, :length_too_long}` if longer than required length + * `{:error, :invalid_checksum}` if checksum is invalid + """ + @spec validate(String.t()) :: + {:ok, String.t()} + | {:error, :length_too_short | :length_too_long | :invalid_length | :invalid_checksum} + def validate(edrpou) do + cond do + violates_length_too_short?(edrpou) -> error(:length_too_short) + violates_length_too_long?(edrpou) -> error(:length_too_long) + violates_checksum?(edrpou) -> error(:invalid_checksum) + true -> ok(edrpou) + end + end + + @doc "Check whether a given EDRPOU violates the required length" + @spec violates_length?(String.t()) :: boolean + def violates_length?(edrpou), + do: String.length(edrpou) != length() + + @doc "Check whether a given EDRPOU too short" + @spec violates_length_too_short?(String.t()) :: boolean + def violates_length_too_short?(edrpou), + do: String.length(edrpou) < length() + + @doc "Check whether a given EDRPOU too long" + @spec violates_length_too_long?(String.t()) :: boolean + def violates_length_too_long?(edrpou), + do: String.length(edrpou) > length() + + @doc "Check whether a given EDRPOU has correct checksum" + @spec violates_checksum?(String.t()) :: boolean + def violates_checksum?(edrpou) do + {digits, check_digit} = + edrpou + |> digits() + |> digits_and_check_digit() + + check_sum = check_sum(digits) + + check_sum != check_digit + end +end diff --git a/test/ukraine_taxid_ex/commons_test.exs b/test/ukraine_taxid_ex/commons_test.exs index 65a2ee4..86288ad 100644 --- a/test/ukraine_taxid_ex/commons_test.exs +++ b/test/ukraine_taxid_ex/commons_test.exs @@ -23,9 +23,9 @@ defmodule UkraineTaxidEx.CommonsTest do end test "handles strings with non-digit characters" do - assert Commons.digits("1-2-3") == [1, 2, 3] - assert Commons.digits("A1B2C3") == [1, 2, 3] - assert Commons.digits("12.34") == [1, 2, 3, 4] + assert Commons.digits("1-2-3", 0, true) == [1, 2, 3] + assert Commons.digits("A1B2C3", 0, true) == [1, 2, 3] + assert Commons.digits("12.34", 0, true) == [1, 2, 3, 4] end test "handles empty string" do @@ -75,6 +75,25 @@ defmodule UkraineTaxidEx.CommonsTest do end end + describe "digits_and_check_digit/1" do + test "returns tuple with original digits and check digit" do + assert Commons.digits_and_check_digit([1, 2, 3, 4]) == {[1, 2, 3, 4], 4} + end + + test "works with different digit sequences" do + assert Commons.digits_and_check_digit([5, 6, 7, 8]) == {[5, 6, 7, 8], 8} + assert Commons.digits_and_check_digit([9, 0, 1, 2]) == {[9, 0, 1, 2], 2} + end + + test "handles single digit list" do + assert Commons.digits_and_check_digit([1]) == {[1], 1} + end + + test "handles empty list" do + assert Commons.digits_and_check_digit([]) == {[], nil} + end + end + describe "ok/1" do test "wraps data in ok tuple" do assert Commons.ok(123) == {:ok, 123} diff --git a/test/ukraine_taxid_ex/edrpou/parser_test.exs b/test/ukraine_taxid_ex/edrpou/parser_test.exs new file mode 100644 index 0000000..e1d9ca9 --- /dev/null +++ b/test/ukraine_taxid_ex/edrpou/parser_test.exs @@ -0,0 +1,161 @@ +defmodule UkraineTaxidEx.Edrpou.ParserTest do + use ExUnit.Case + alias UkraineTaxidEx.Edrpou + alias UkraineTaxidEx.Edrpou.Parser + doctest UkraineTaxidEx.Edrpou.Parser + + describe "parse/2" do + test "successfully parses valid EDRPOU codes (strict cases, correct without normalization)" do + valid_codes = [ + "00032112", + "00032129", + "09306278", + "09620081", + "09801546", + "09806443", + "09807595", + "09807750", + "09807862", + "09809192", + "13857564", + "14070197", + "14282829", + "14305909", + "14352406", + "14359845", + "14360080", + "14360506", + "14360570", + "14360920", + "14361575", + "19355562", + "19356610", + "19358784", + "19390819", + "20023569", + "20034231", + "20042839", + "20496061", + "20953647", + "21133352", + "21322127", + "21580639", + "21650966", + "21665382", + "21677333", + "21684818", + "21685166", + "21685485", + "22868414", + "23494714", + "23697280", + "26237202", + "26410155", + "26519933", + "26520688", + "32388371", + "33695095", + "34575675", + "34576883", + "35345213", + "35590956", + "35591059", + "35810511", + "35960913", + "36002395", + "36061927", + "36520434", + "37515069", + "38324133", + "38690683", + "38870739", + "39544699", + "39849797", + "43650988" + ] + + for code <- valid_codes do + assert {:ok, %Edrpou{code: ^code}} = Parser.parse(code) + end + end + + test "successfully parses valid EDRPOU codes (correct with normalization)" do + valid_codes = [ + "32112", + "032129", + "0032129", + "9306278", + "9620081", + "9801546", + "9806443", + "9807595", + "13857564", + "14361575", + "19390819", + "22868414", + "34575675", + "35960913", + "36002395", + "36061927", + "43650988" + ] + + for code <- valid_codes do + leaded_code = String.pad_leading(code, 8, "0") + assert {:ok, %Edrpou{code: ^leaded_code}} = Parser.parse(code, normalize?: true) + end + end + + test "successfully parses valid EDRPOU codes (correct after clean from non digit symbols)" do + valid_codes = [ + "00-03-21-12", + "00_03_21_29", + "00 03 21 29", + "0930 6278", + "096 200 81", + "09 80 1546", + "09.80.64.43", + "09/80/75/95", + "13 857 564", + "14361575", + "193.908 19", + "22868414", + "345f75u67ck5", + "35960913", + "36002395", + "36061927", + "43650988" + ] + + for code <- valid_codes do + cleaned_code = String.replace(code, ~r/[^0-9]/, "") + + assert {:ok, %Edrpou{code: ^cleaned_code}} = + Parser.parse(code, normalize?: false, clean?: true) + end + end + + test "successfully parses valid EDRPOU codes (correct after clean and normalize)" do + valid_codes = [ + "-3-21-12", + "0_3_21_29", + " 0003 21 29", + "9306278.", + "96 200 81", + "09 80 1546", + "09.80.64.43", + "09/80/75/95" + ] + + for code <- valid_codes do + cleaned_and_normalized_code = + code + |> String.replace(~r/[^0-9]/, "") + |> String.pad_leading(8, "0") + + assert {:ok, %Edrpou{code: ^cleaned_and_normalized_code}} = + Parser.parse(code, normalize?: true, clean?: true) + end + end + end +end