Added HTML translator

This commit is contained in:
Danil Negrienko 2020-06-08 01:18:22 +03:00
parent 23406ca728
commit 5f8f401e90
7 changed files with 84 additions and 4 deletions

View File

@ -1,5 +1,6 @@
alias Localizator.Parser
alias Localizator.Parser.{Base, JSON}
alias Localizator.Commons
alias Localizator.Locale
alias Localizator.Direction
alias Localizator.Translator

View File

@ -1,4 +1,10 @@
defmodule Localizator.Commons do
@html_regex ~r/<(br|basefont|hr|input|source|frame|param|area|meta|!--|col|link|option|base|img|wbr|!DOCTYPE).*?>|<(a|abbr|acronym|address|applet|article|aside|audio|b|bdi|bdo|big|blockquote|body|button|canvas|caption|center|cite|code|colgroup|command|datalist|dd|del|details|dfn|dialog|dir|div|dl|dt|em|embed|fieldset|figcaption|figure|font|footer|form|frameset|head|header|hgroup|h1|h2|h3|h4|h5|h6|html|i|iframe|ins|kbd|keygen|label|legend|li|map|mark|menu|meter|nav|noframes|noscript|object|ol|optgroup|output|p|pre|progress|q|rp|rt|ruby|s|samp|script|section|select|small|span|strike|strong|style|sub|summary|sup|table|tbody|td|textarea|tfoot|th|thead|time|title|tr|track|tt|u|ul|var|video).*?<\/\2>/i
def is_html?(string) when is_binary(string) do
String.match?(string, @html_regex)
end
def struct_from_map(a_map, as: a_struct) do
# Find the keys within the map
keys =

View File

@ -19,4 +19,7 @@ defmodule Localizator.Translator.Base do
@callback detect(text) :: {:ok, locale} | {:error, message}
@callback translate(text, to) :: {:ok, text} | {:error, message}
@callback translate(text, to, from) :: {:ok, text} | {:error, message}
@callback translate!(text, to) :: text | nil
@callback translate!(text, to, from) :: text | nil
end

View File

@ -1,4 +1,5 @@
defmodule Localizator.Translator do
alias Localizator.Commons
alias Localizator.Direction
@typedoc """
@ -39,9 +40,63 @@ defmodule Localizator.Translator do
@spec default() :: translator
def default(), do: list() |> List.first()
@spec translate(source, direction, translator) :: {:ok, result} | {:error, message}
@spec translate(source, direction, translator) :: result
def translate(source, direction, translator \\ default()) do
map = Direction.get(direction)
translator.translate(source, map.to, map.from)
translate(source, map.to, map.from, translator)
end
@spec translate(String.t(), to, from_may_be_nil, translator) :: result
def translate(string, to, from, translator) when is_binary(string) do
case Commons.is_html?(string) do
true -> translate_html(string, to, from, translator)
false -> translate_plain(string, to, from, translator)
end
end
@spec translate(Map.t(), to, from_may_be_nil, translator) :: result
def translate(map, to, from, translator) when is_map(map) do
Enum.map(map, fn {key, value} ->
{key, translate(value, to, from, translator)}
end)
|> Map.new()
end
@spec translate(List.t(), to, from_may_be_nil, translator) :: result
def translate(list, to, from, translator) when is_list(list) do
Enum.map(list, fn element -> translate(element, to, from, translator) end)
end
defp translate_plain(string, to, from, translator) do
translator.translate!(string, to, from)
end
defp translate_html(string, to, from, translator) do
string
|> Meeseeks.parse()
|> Meeseeks.tree()
|> translate_html_element(to, from, translator)
|> Meeseeks.parse(:tuple_tree)
|> Meeseeks.html()
end
# Plain Text Content
defp translate_html_element([element], to, from, translator) when is_binary(element) do
[translator.translate!(element, to, from)]
end
# Part of the Plain Text Content
defp translate_html_element(element, to, from, translator) when is_binary(element) do
translator.translate!(element, to, from)
end
# List of html elements
defp translate_html_element(elements, to, from, translator) when is_list(elements) do
Enum.map(elements, &translate_html_element(&1, to, from, translator))
end
# Html element
defp translate_html_element({tag, attributes, content}, to, from, translator) do
{tag, attributes, translate_html_element(content, to, from, translator)}
end
end

View File

@ -2,6 +2,7 @@ defmodule Localizator.Translator.Yandex do
@type text :: String.t()
@type locale :: String.t()
@type from :: locale
@type optional_from :: from | nil
@type to :: locale
@type message :: String.t()
@ -17,7 +18,7 @@ defmodule Localizator.Translator.Yandex do
end
@impl true
@spec translate(text, to) :: {:ok, text} | {:error, message}
@spec translate(text, to, optional_from) :: {:ok, text} | {:error, message}
def translate(text, to, from \\ nil)
def translate(text, to, from) when is_bitstring(text) and is_bitstring(to) do
@ -37,4 +38,13 @@ defmodule Localizator.Translator.Yandex do
{:error, message}
end
end
@impl true
@spec translate(text, to, optional_from) :: text | nil
def translate!(text, to, from \\ nil) when is_bitstring(text) and is_bitstring(to) do
case translate(text, to, from) do
{:ok, result} -> result
_ -> nil
end
end
end

View File

@ -4,7 +4,7 @@ defmodule Localizator.MixProject do
def project do
[
app: :localizator,
version: "0.1.1",
version: "0.1.2",
elixir: "~> 1.10",
start_permanent: Mix.env() == :prod,
deps: deps()
@ -24,6 +24,7 @@ defmodule Localizator.MixProject do
{:yandex_translate, "~> 0.4.0"},
# Parsers/generators
{:meeseeks, "~> 0.15.1"},
{:jason, "~> 1.2.1"},
{:yaml_elixir, "~> 2.4.0"},

View File

@ -9,9 +9,13 @@
"jose": {:hex, :jose, "1.10.1", "16d8e460dae7203c6d1efa3f277e25b5af8b659febfc2f2eb4bacf87f128b80a", [:mix, :rebar3], [], "hexpm", "3c7ddc8a9394b92891db7c2771da94bf819834a1a4c92e30857b7d582e2f8257"},
"makeup": {:hex, :makeup, "1.0.2", "0b9f7bfb7a88bed961341b359bc2cc1b233517af891ba4890ec5a580ffe738b4", [:mix], [{:nimble_parsec, "~> 0.5", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "43833299231c6a6983afc75a34e43eeba638521d5527ff89809fa6372424fd7e"},
"makeup_elixir": {:hex, :makeup_elixir, "0.14.1", "4f0e96847c63c17841d42c08107405a005a2680eb9c7ccadfd757bd31dabccfb", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "f2438b1a80eaec9ede832b5c41cd4f373b38fd7aa33e3b22d9db79e640cbde11"},
"meeseeks": {:hex, :meeseeks, "0.15.1", "148d5d9ea879cdb415b8bc4162ac5528f9a2fe42fbfe1802c681a2842cb1c0a4", [:mix], [{:meeseeks_html5ever, "~> 0.12.1", [hex: :meeseeks_html5ever, repo: "hexpm", optional: false]}], "hexpm", "5589957b7cca75e6683cecc308253d7854f43b07806939d7031b81ca6e8abd98"},
"meeseeks_html5ever": {:hex, :meeseeks_html5ever, "0.12.1", "718fab10d05b83204524a518b2b88caa37ba6a6e02f82e80d6a7bc47552fb54a", [:mix], [{:rustler, "~> 0.21.0", [hex: :rustler, repo: "hexpm", optional: false]}], "hexpm", "11489094637f49a26bad4610a9138352c8d229339d888169cb35b08cdfd8861a"},
"mint": {:hex, :mint, "1.0.0", "ca5ab33497ba2bdcc42f6cdd3927420a6159116be87c8173658e93c8746703da", [:mix], [{:castore, "~> 0.1.0", [hex: :castore, repo: "hexpm", optional: true]}], "hexpm", "b8943ef1e630879538dd6620bfc189d4d75fab3ad39f3fe9c50539879f7efd84"},
"nimble_parsec": {:hex, :nimble_parsec, "0.6.0", "32111b3bf39137144abd7ba1cce0914533b2d16ef35e8abc5ec8be6122944263", [:mix], [], "hexpm", "27eac315a94909d4dc68bc07a4a83e06c8379237c5ea528a9acff4ca1c873c52"},
"remix": {:hex, :remix, "0.0.2", "f06115659d8ede8d725fae1708920ef73353a1b39efe6a232d2a38b1f2902109", [:mix], [], "hexpm", "5f5555646ed4fca83fab8620735150aa0bc408c5a17a70d28cfa7086bc6f497c"},
"rustler": {:hex, :rustler, "0.21.1", "5299980be32da997c54382e945bacaa015ed97a60745e1e639beaf6a7b278c65", [:mix], [{:toml, "~> 0.5.2", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "6ee1651e10645b2b2f3bb70502bf180341aa058709177e9bc28c105934094bc6"},
"toml": {:hex, :toml, "0.5.2", "e471388a8726d1ce51a6b32f864b8228a1eb8edc907a0edf2bb50eab9321b526", [:mix], [], "hexpm", "f1e3dabef71fb510d015fad18c0e05e7c57281001141504c6b69d94e99750a07"},
"yamerl": {:hex, :yamerl, "0.8.0", "8214cfe16bbabe5d1d6c14a14aea11c784b9a21903dd6a7c74f8ce180adae5c7", [:rebar3], [], "hexpm", "010634477bf9c208a0767dcca89116c2442cf0b5e87f9c870f85cd1c3e0c2aab"},
"yaml_elixir": {:hex, :yaml_elixir, "2.4.0", "2f444abc3c994c902851fde56b6a9cb82895c291c05a0490a289035c2e62ae71", [:mix], [{:yamerl, "~> 0.7", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "4e25a6d5c873e393689c6f1062c5ec90f6cd1be2527b073178ae37eae4c78bee"},
"yandex_translate": {:hex, :yandex_translate, "0.4.0", "a8851bdd0899d29334c9028aecc70c9921cae5dff86e512e30e72a0bceb6613a", [:mix], [{:castore, "~> 0.1.5", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.2.0", [hex: :jason, repo: "hexpm", optional: false]}, {:joken, "~> 2.2.0", [hex: :joken, repo: "hexpm", optional: false]}, {:mint, "~> 1.0.0", [hex: :mint, repo: "hexpm", optional: false]}], "hexpm", "d4cdb333d8e447d0de20e0314ffc5fdd0a38421800866801007baca9102936f1"},