From d9bc141e518af3f210d798e24d17022c845cf636 Mon Sep 17 00:00:00 2001 From: Danylo Negrienko Date: Wed, 21 Aug 2024 22:40:34 -0400 Subject: [PATCH] word_count --- elixir/word-count/.exercism/config.json | 39 ++++++++ elixir/word-count/.exercism/metadata.json | 1 + elixir/word-count/.formatter.exs | 4 + elixir/word-count/.gitignore | 24 +++++ elixir/word-count/HELP.md | 75 +++++++++++++++ elixir/word-count/README.md | 93 +++++++++++++++++++ elixir/word-count/lib/word_count.ex | 17 ++++ elixir/word-count/mix.exs | 28 ++++++ elixir/word-count/test/test_helper.exs | 2 + elixir/word-count/test/word_count_test.exs | 102 +++++++++++++++++++++ 10 files changed, 385 insertions(+) create mode 100644 elixir/word-count/.exercism/config.json create mode 100644 elixir/word-count/.exercism/metadata.json create mode 100644 elixir/word-count/.formatter.exs create mode 100644 elixir/word-count/.gitignore create mode 100644 elixir/word-count/HELP.md create mode 100644 elixir/word-count/README.md create mode 100644 elixir/word-count/lib/word_count.ex create mode 100644 elixir/word-count/mix.exs create mode 100644 elixir/word-count/test/test_helper.exs create mode 100644 elixir/word-count/test/word_count_test.exs diff --git a/elixir/word-count/.exercism/config.json b/elixir/word-count/.exercism/config.json new file mode 100644 index 0000000..bf3740d --- /dev/null +++ b/elixir/word-count/.exercism/config.json @@ -0,0 +1,39 @@ +{ + "authors": [ + "rubysolo" + ], + "contributors": [ + "andrewsardone", + "angelikatyborska", + "chriseyre2000", + "Cohen-Carlisle", + "dalexj", + "dantswain", + "devonestes", + "henrik", + "kronn", + "kytrinyx", + "lpil", + "lucasprag", + "MarcosX", + "neenjaw", + "parkerl", + "patrickgombert", + "sotojuan", + "Teapane", + "waiting-for-dev" + ], + "files": { + "solution": [ + "lib/word_count.ex" + ], + "test": [ + "test/word_count_test.exs" + ], + "example": [ + ".meta/example.ex" + ] + }, + "blurb": "Given a phrase, count the occurrences of each word in that phrase.", + "source": "This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour." +} diff --git a/elixir/word-count/.exercism/metadata.json b/elixir/word-count/.exercism/metadata.json new file mode 100644 index 0000000..145d544 --- /dev/null +++ b/elixir/word-count/.exercism/metadata.json @@ -0,0 +1 @@ +{"track":"elixir","exercise":"word-count","id":"c8fad83593244f55a57afa42bc2f46cd","url":"https://exercism.org/tracks/elixir/exercises/word-count","handle":"negrienko","is_requester":true,"auto_approve":false} \ No newline at end of file diff --git a/elixir/word-count/.formatter.exs b/elixir/word-count/.formatter.exs new file mode 100644 index 0000000..d2cda26 --- /dev/null +++ b/elixir/word-count/.formatter.exs @@ -0,0 +1,4 @@ +# Used by "mix format" +[ + inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] +] diff --git a/elixir/word-count/.gitignore b/elixir/word-count/.gitignore new file mode 100644 index 0000000..63282b2 --- /dev/null +++ b/elixir/word-count/.gitignore @@ -0,0 +1,24 @@ +# The directory Mix will write compiled artifacts to. +/_build/ + +# If you run "mix test --cover", coverage assets end up here. +/cover/ + +# The directory Mix downloads your dependencies sources to. +/deps/ + +# Where third-party dependencies like ExDoc output generated docs. +/doc/ + +# Ignore .fetch files in case you like to edit your project deps locally. +/.fetch + +# If the VM crashes, it generates a dump, let's ignore it too. +erl_crash.dump + +# Also ignore archive artifacts (built via "mix archive.build"). +*.ez + +# Ignore package tarball (built via "mix hex.build"). +word_count-*.tar + diff --git a/elixir/word-count/HELP.md b/elixir/word-count/HELP.md new file mode 100644 index 0000000..621e09c --- /dev/null +++ b/elixir/word-count/HELP.md @@ -0,0 +1,75 @@ +# Help + +## Running the tests + +From the terminal, change to the base directory of the exercise then execute the tests with: + +```bash +$ mix test +``` + +This will execute the test file found in the `test` subfolder -- a file ending in `_test.exs` + +Documentation: + +* [`mix test` - Elixir's test execution tool](https://hexdocs.pm/mix/Mix.Tasks.Test.html) +* [`ExUnit` - Elixir's unit test library](https://hexdocs.pm/ex_unit/ExUnit.html) + +## Pending tests + +In test suites of practice exercises, all but the first test have been tagged to be skipped. + +Once you get a test passing, you can unskip the next one by commenting out the relevant `@tag :pending` with a `#` symbol. + +For example: + +```elixir +# @tag :pending +test "shouting" do + assert Bob.hey("WATCH OUT!") == "Whoa, chill out!" +end +``` + +If you wish to run all tests at once, you can include all skipped test by using the `--include` flag on the `mix test` command: + +```bash +$ mix test --include pending +``` + +Or, you can enable all the tests by commenting out the `ExUnit.configure` line in the file `test/test_helper.exs`. + +```elixir +# ExUnit.configure(exclude: :pending, trace: true) +``` + +## Useful `mix test` options + +* `test/.exs:LINENUM` - runs only a single test, the test from `.exs` whose definition is on line `LINENUM` +* `--failed` - runs only tests that failed the last time they ran +* `--max-failures` - the suite stops evaluating tests when this number of test failures +is reached +* `--seed 0` - disables randomization so the tests in a single file will always be ran +in the same order they were defined in + +## Submitting your solution + +You can submit your solution using the `exercism submit lib/word_count.ex` command. +This command will upload your solution to the Exercism website and print the solution page's URL. + +It's possible to submit an incomplete solution which allows you to: + +- See how others have completed the exercise +- Request help from a mentor + +## Need to get help? + +If you'd like help solving the exercise, check the following pages: + +- The [Elixir track's documentation](https://exercism.org/docs/tracks/elixir) +- The [Elixir track's programming category on the forum](https://forum.exercism.org/c/programming/elixir) +- [Exercism's programming category on the forum](https://forum.exercism.org/c/programming/5) +- The [Frequently Asked Questions](https://exercism.org/docs/using/faqs) + +Should those resources not suffice, you could submit your (incomplete) solution to request mentoring. + +If you're stuck on something, it may help to look at some of the [available resources](https://exercism.org/docs/tracks/elixir/resources) out there where answers might be found. \ No newline at end of file diff --git a/elixir/word-count/README.md b/elixir/word-count/README.md new file mode 100644 index 0000000..af29f37 --- /dev/null +++ b/elixir/word-count/README.md @@ -0,0 +1,93 @@ +# Word Count + +Welcome to Word Count on Exercism's Elixir Track. +If you need help running the tests or submitting your code, check out `HELP.md`. + +## Introduction + +You teach English as a foreign language to high school students. + +You've decided to base your entire curriculum on TV shows. +You need to analyze which words are used, and how often they're repeated. + +This will let you choose the simplest shows to start with, and to gradually increase the difficulty as time passes. + +## Instructions + +Your task is to count how many times each word occurs in a subtitle of a drama. + +The subtitles from these dramas use only ASCII characters. + +The characters often speak in casual English, using contractions like _they're_ or _it's_. +Though these contractions come from two words (e.g. _we are_), the contraction (_we're_) is considered a single word. + +Words can be separated by any form of punctuation (e.g. ":", "!", or "?") or whitespace (e.g. "\t", "\n", or " "). +The only punctuation that does not separate words is the apostrophe in contractions. + +Numbers are considered words. +If the subtitles say _It costs 100 dollars._ then _100_ will be its own word. + +Words are case insensitive. +For example, the word _you_ occurs three times in the following sentence: + +> You come back, you hear me? DO YOU HEAR ME? + +The ordering of the word counts in the results doesn't matter. + +Here's an example that incorporates several of the elements discussed above: + +- simple words +- contractions +- numbers +- case insensitive words +- punctuation (including apostrophes) to separate words +- different forms of whitespace to separate words + +`"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.` + +The mapping for this subtitle would be: + +```text +123: 1 +agent: 1 +cried: 1 +fled: 1 +i: 1 +password: 2 +so: 1 +special: 1 +that's: 1 +the: 2 +``` + +## Source + +### Created by + +- @rubysolo + +### Contributed to by + +- @andrewsardone +- @angelikatyborska +- @chriseyre2000 +- @Cohen-Carlisle +- @dalexj +- @dantswain +- @devonestes +- @henrik +- @kronn +- @kytrinyx +- @lpil +- @lucasprag +- @MarcosX +- @neenjaw +- @parkerl +- @patrickgombert +- @sotojuan +- @Teapane +- @waiting-for-dev + +### Based on + +This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour. \ No newline at end of file diff --git a/elixir/word-count/lib/word_count.ex b/elixir/word-count/lib/word_count.ex new file mode 100644 index 0000000..2fb5e71 --- /dev/null +++ b/elixir/word-count/lib/word_count.ex @@ -0,0 +1,17 @@ +defmodule WordCount do + @doc """ + Count the number of words in the sentence. + + Words are compared case-insensitively. + """ + @spec count(String.t()) :: map + def count(sentence) do + sentence + |> String.split(~r/[\s,.:!?_]+/, trim: true) + |> Enum.reject(&String.match?(&1, ~r/[&@$%^]+/)) + |> Enum.map(&String.trim(&1, "'")) + |> Enum.reduce(%{}, fn word, acc -> + Map.update(acc, String.downcase(word), 1, &(&1 + 1)) + end) + end +end diff --git a/elixir/word-count/mix.exs b/elixir/word-count/mix.exs new file mode 100644 index 0000000..1c5ab49 --- /dev/null +++ b/elixir/word-count/mix.exs @@ -0,0 +1,28 @@ +defmodule WordCount.MixProject do + use Mix.Project + + def project do + [ + app: :word_count, + version: "0.1.0", + # elixir: "~> 1.8", + start_permanent: Mix.env() == :prod, + deps: deps() + ] + end + + # Run "mix help compile.app" to learn about applications. + def application do + [ + extra_applications: [:logger] + ] + end + + # Run "mix help deps" to learn about dependencies. + defp deps do + [ + # {:dep_from_hexpm, "~> 0.3.0"}, + # {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"} + ] + end +end diff --git a/elixir/word-count/test/test_helper.exs b/elixir/word-count/test/test_helper.exs new file mode 100644 index 0000000..35fc5bf --- /dev/null +++ b/elixir/word-count/test/test_helper.exs @@ -0,0 +1,2 @@ +ExUnit.start() +ExUnit.configure(exclude: :pending, trace: true) diff --git a/elixir/word-count/test/word_count_test.exs b/elixir/word-count/test/word_count_test.exs new file mode 100644 index 0000000..fd5c554 --- /dev/null +++ b/elixir/word-count/test/word_count_test.exs @@ -0,0 +1,102 @@ +defmodule WordCountTest do + use ExUnit.Case + + test "count one word" do + assert WordCount.count("word") == %{"word" => 1} + end + + test "count one of each word" do + expected = %{"one" => 1, "of" => 1, "each" => 1} + assert WordCount.count("one of each") == expected + end + + test "multiple occurrences of a word" do + expected = %{"one" => 1, "fish" => 4, "two" => 1, "red" => 1, "blue" => 1} + assert WordCount.count("one fish two fish red fish blue fish") == expected + end + + test "handles cramped lists" do + expected = %{"one" => 1, "two" => 1, "three" => 1} + assert WordCount.count("one,two,three") == expected + end + + test "handles expanded lists" do + expected = %{"one" => 1, "two" => 1, "three" => 1} + assert WordCount.count("one,\ntwo,\nthree") == expected + end + + test "ignore punctuation" do + expected = %{"car" => 1, "carpet" => 1, "as" => 1, "java" => 1, "javascript" => 1} + assert WordCount.count("car : carpet as java : javascript!!&@$%^&") == expected + end + + test "include numbers" do + expected = %{"testing" => 2, "1" => 1, "2" => 1} + assert WordCount.count("testing, 1, 2 testing") == expected + end + + test "ignore underscores" do + expected = %{"two" => 1, "words" => 1} + assert WordCount.count("two_words") == expected + end + + test "normalize case" do + expected = %{"go" => 3, "stop" => 2} + assert WordCount.count("go Go GO Stop stop") == expected + end + + test "with apostrophes" do + expected = %{ + "first" => 1, + "don't" => 2, + "laugh" => 1, + "then" => 1, + "cry" => 1, + "you're" => 1, + "getting" => 1, + "it" => 1 + } + + assert WordCount.count("First: don't laugh. Then: don't cry. You're getting it.") == expected + end + + test "with quotations" do + expected = %{"joe" => 1, "can't" => 1, "tell" => 1, "between" => 1, "large" => 2, "and" => 1} + assert WordCount.count("Joe can't tell between 'large' and large.") == expected + end + + test "with quotations at the end and beginning of the sentence" do + expected = %{"venti" => 1, "in" => 1, "italian" => 1, "means" => 1, "twenty" => 1} + assert WordCount.count("'venti' in Italian means 'twenty'") == expected + end + + test "substrings from the beginning" do + expected = %{ + "joe" => 1, + "can't" => 1, + "tell" => 1, + "between" => 1, + "app" => 1, + "apple" => 1, + "and" => 1, + "a" => 1 + } + + assert WordCount.count("Joe can't tell between app, apple and a.") == expected + end + + test "multiple spaces not detected as a word" do + expected = %{"multiple" => 1, "whitespaces" => 1} + assert WordCount.count(" multiple whitespaces") == expected + end + + test "alternating word separators not detected as a word" do + expected = %{"one" => 1, "two" => 1, "three" => 1} + assert WordCount.count(",\n,one,\n ,two \n 'three'") == expected + end + + test "quotation for word with apostrophe" do + expected = %{"can" => 1, "can't" => 2} + assert WordCount.count("can, can't, 'can't'") == expected + end +end