From db731118fc5e2ac920ac9d16b69cf184847ff10b Mon Sep 17 00:00:00 2001 From: Danylo Negrienko Date: Tue, 19 Dec 2023 15:20:45 -0500 Subject: [PATCH] dna-encoding --- elixir/dna-encoding/.exercism/config.json | 22 +++++ elixir/dna-encoding/.exercism/metadata.json | 1 + elixir/dna-encoding/.formatter.exs | 4 + elixir/dna-encoding/.gitignore | 24 +++++ elixir/dna-encoding/HELP.md | 75 ++++++++++++++++ elixir/dna-encoding/HINTS.md | 49 ++++++++++ elixir/dna-encoding/README.md | 99 +++++++++++++++++++++ elixir/dna-encoding/lib/dna.ex | 29 ++++++ elixir/dna-encoding/mix.exs | 28 ++++++ elixir/dna-encoding/test/dna_test.exs | 75 ++++++++++++++++ elixir/dna-encoding/test/test_helper.exs | 2 + 11 files changed, 408 insertions(+) create mode 100644 elixir/dna-encoding/.exercism/config.json create mode 100644 elixir/dna-encoding/.exercism/metadata.json create mode 100644 elixir/dna-encoding/.formatter.exs create mode 100644 elixir/dna-encoding/.gitignore create mode 100644 elixir/dna-encoding/HELP.md create mode 100644 elixir/dna-encoding/HINTS.md create mode 100644 elixir/dna-encoding/README.md create mode 100644 elixir/dna-encoding/lib/dna.ex create mode 100644 elixir/dna-encoding/mix.exs create mode 100644 elixir/dna-encoding/test/dna_test.exs create mode 100644 elixir/dna-encoding/test/test_helper.exs diff --git a/elixir/dna-encoding/.exercism/config.json b/elixir/dna-encoding/.exercism/config.json new file mode 100644 index 0000000..c5039fb --- /dev/null +++ b/elixir/dna-encoding/.exercism/config.json @@ -0,0 +1,22 @@ +{ + "authors": [ + "neenjaw" + ], + "contributors": [ + "angelikatyborska", + "NobbZ" + ], + "files": { + "solution": [ + "lib/dna.ex" + ], + "test": [ + "test/dna_test.exs" + ], + "exemplar": [ + ".meta/exemplar.ex" + ] + }, + "language_versions": ">=1.10", + "blurb": "Learn about bitstrings and tail call recursion by encoding DNA sequences as binary data." +} diff --git a/elixir/dna-encoding/.exercism/metadata.json b/elixir/dna-encoding/.exercism/metadata.json new file mode 100644 index 0000000..218cec8 --- /dev/null +++ b/elixir/dna-encoding/.exercism/metadata.json @@ -0,0 +1 @@ +{"track":"elixir","exercise":"dna-encoding","id":"cf5029ade3444553984645359b133fcc","url":"https://exercism.org/tracks/elixir/exercises/dna-encoding","handle":"negrienko","is_requester":true,"auto_approve":false} \ No newline at end of file diff --git a/elixir/dna-encoding/.formatter.exs b/elixir/dna-encoding/.formatter.exs new file mode 100644 index 0000000..d2cda26 --- /dev/null +++ b/elixir/dna-encoding/.formatter.exs @@ -0,0 +1,4 @@ +# Used by "mix format" +[ + inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] +] diff --git a/elixir/dna-encoding/.gitignore b/elixir/dna-encoding/.gitignore new file mode 100644 index 0000000..3793de1 --- /dev/null +++ b/elixir/dna-encoding/.gitignore @@ -0,0 +1,24 @@ +# The directory Mix will write compiled artifacts to. +/_build/ + +# If you run "mix test --cover", coverage assets end up here. +/cover/ + +# The directory Mix downloads your dependencies sources to. +/deps/ + +# Where third-party dependencies like ExDoc output generated docs. +/doc/ + +# Ignore .fetch files in case you like to edit your project deps locally. +/.fetch + +# If the VM crashes, it generates a dump, let's ignore it too. +erl_crash.dump + +# Also ignore archive artifacts (built via "mix archive.build"). +*.ez + +# Ignore package tarball (built via "mix hex.build"). +bitstrings-*.tar + diff --git a/elixir/dna-encoding/HELP.md b/elixir/dna-encoding/HELP.md new file mode 100644 index 0000000..0eb7cda --- /dev/null +++ b/elixir/dna-encoding/HELP.md @@ -0,0 +1,75 @@ +# Help + +## Running the tests + +From the terminal, change to the base directory of the exercise then execute the tests with: + +```bash +$ mix test +``` + +This will execute the test file found in the `test` subfolder -- a file ending in `_test.exs` + +Documentation: + +* [`mix test` - Elixir's test execution tool](https://hexdocs.pm/mix/Mix.Tasks.Test.html) +* [`ExUnit` - Elixir's unit test library](https://hexdocs.pm/ex_unit/ExUnit.html) + +## Pending tests + +In test suites of practice exercises, all but the first test have been tagged to be skipped. + +Once you get a test passing, you can unskip the next one by commenting out the relevant `@tag :pending` with a `#` symbol. + +For example: + +```elixir +# @tag :pending +test "shouting" do + assert Bob.hey("WATCH OUT!") == "Whoa, chill out!" +end +``` + +If you wish to run all tests at once, you can include all skipped test by using the `--include` flag on the `mix test` command: + +```bash +$ mix test --include pending +``` + +Or, you can enable all the tests by commenting out the `ExUnit.configure` line in the file `test/test_helper.exs`. + +```elixir +# ExUnit.configure(exclude: :pending, trace: true) +``` + +## Useful `mix test` options + +* `test/.exs:LINENUM` - runs only a single test, the test from `.exs` whose definition is on line `LINENUM` +* `--failed` - runs only tests that failed the last time they ran +* `--max-failures` - the suite stops evaluating tests when this number of test failures +is reached +* `--seed 0` - disables randomization so the tests in a single file will always be ran +in the same order they were defined in + +## Submitting your solution + +You can submit your solution using the `exercism submit lib/dna.ex` command. +This command will upload your solution to the Exercism website and print the solution page's URL. + +It's possible to submit an incomplete solution which allows you to: + +- See how others have completed the exercise +- Request help from a mentor + +## Need to get help? + +If you'd like help solving the exercise, check the following pages: + +- The [Elixir track's documentation](https://exercism.org/docs/tracks/elixir) +- The [Elixir track's programming category on the forum](https://forum.exercism.org/c/programming/elixir) +- [Exercism's programming category on the forum](https://forum.exercism.org/c/programming/5) +- The [Frequently Asked Questions](https://exercism.org/docs/using/faqs) + +Should those resources not suffice, you could submit your (incomplete) solution to request mentoring. + +If you're stuck on something, it may help to look at some of the [available resources](https://exercism.org/docs/tracks/elixir/resources) out there where answers might be found. \ No newline at end of file diff --git a/elixir/dna-encoding/HINTS.md b/elixir/dna-encoding/HINTS.md new file mode 100644 index 0000000..8dd50ae --- /dev/null +++ b/elixir/dna-encoding/HINTS.md @@ -0,0 +1,49 @@ +# Hints + +## General + +- Use `?` to work with the character [code points][codepoint]. +- `\s` can be used to represent a space. +- Use [integer binary notation][integer-literal] for working with the codes. +- Try to use the tail call recursion strategy. + +## 1. Encode nucleic acid to binary value + +- This function needs to map one integer to another. +- This function doesn't need recursion. +- Making use of multiple clause functions may make this easier by breaking it down. + +## 2. Decode the binary value to the nucleic acid + +- This function is the opposite of part 1's function. +- This function doesn't need recursion. +- Making use of multiple clause functions may make this easier by breaking it down. + +## 3. Encode a DNA charlist + +- Create a tail-recursive function which takes a code point from the charlist and recursively builds the bitstring result. +- Tail-recursive functions need an accumulator. +- Remember, a [charlist][charlist] is a list of [integer code points][codepoint]. +- You can get the first and remaining items from a list using a built-in [`Kernel` module][kernel] function. +- You can also pattern match on a list using the [`[head | tail]`][list] notation. +- Use multiple clause functions to separate the base case from the recursive cases. +- Do not forget to specify the types of bitstring segments using the `::` operator. + +## 4. Decode a DNA bitstring + +- Create a tail-recursive function which [matches the first 4 bits][bitstring-matching] from the [bitstring][bitstring] and recursively builds the [charlist][charlist] result. +- Tail-recursive functions need an accumulator. +- Remember the [bitstring special form][bitstring-form] can be used for matching on bitstrings. +- Do not forget to specify the types of bitstring segments using the `::` operator. +- You will need to reverse the accumulator at the end. Write a private tail-recursive `reverse` function to do that and use it in the base-case of the `decode` function. + +[integer-literal]: https://hexdocs.pm/elixir/syntax-reference.html#integers-in-other-bases-and-unicode-code-points +[codepoint]: https://elixir-lang.org/getting-started/binaries-strings-and-char-lists.html#unicode-and-code-points +[charlist]: https://elixir-lang.org/getting-started/binaries-strings-and-char-lists.html#charlists +[bitstring]: https://elixir-lang.org/getting-started/binaries-strings-and-char-lists.html#bitstrings +[bitstring-form]: https://hexdocs.pm/elixir/Kernel.SpecialForms.html#%3C%3C%3E%3E/1 +[bitstring-matching]: https://hexdocs.pm/elixir/Kernel.SpecialForms.html#%3C%3C%3E%3E/1-binary-bitstring-matching +[type-operator]: https://hexdocs.pm/elixir/Kernel.SpecialForms.html#::/2 +[recursion-tco]: https://en.wikipedia.org/wiki/Tail_call +[list]: https://hexdocs.pm/elixir/List.html#content +[kernel]: https://hexdocs.pm/elixir/Kernel.html#functions \ No newline at end of file diff --git a/elixir/dna-encoding/README.md b/elixir/dna-encoding/README.md new file mode 100644 index 0000000..936619f --- /dev/null +++ b/elixir/dna-encoding/README.md @@ -0,0 +1,99 @@ +# DNA Encoding + +Welcome to DNA Encoding on Exercism's Elixir Track. +If you need help running the tests or submitting your code, check out `HELP.md`. +If you get stuck on the exercise, check out `HINTS.md`, but try and solve it without using those first :) + +## Introduction + +## Tail Call Recursion + +When [recursing][exercism-recursion] through enumerables (lists, bitstrings, strings), there are often two concerns: + +- how much memory is required to store the trail of recursive function calls +- how to build the solution efficiently + +To deal with these concerns an _accumulator_ may be used. + +An accumulator is a variable that is passed along in addition to the data. It is used to pass the current state of the function's execution, from function call to function call, until the _base case_ is reached. In the base case, the accumulator is used to return the final value of the recursive function call. + +Accumulators should be initialized by the function's author, not the function's user. To achieve this, declare two functions - a public function that takes just the necessary data as arguments and initializes the accumulator, and a private function that also takes an accumulator. In Elixir, it is a common pattern to prefix the private function's name with `do_`. + +```elixir +# Count the length of a list without an accumulator +def count([]), do: 0 +def count([_head | tail]), do: 1 + count(tail) + +# Count the length of a list with an accumulator +def count(list), do: do_count(list, 0) + +defp do_count([], count), do: count +defp do_count([_head | tail], count), do: do_count(tail, count + 1) +``` + +The usage of an accumulator allows us to turn recursive functions into _tail-recursive_ functions. A function is tail-recursive if the _last_ thing executed by the function is a call to itself. + +[exercism-recursion]: https://exercism.org/tracks/elixir/concepts/recursion + +## Instructions + +In your DNA research lab, you have been working through various ways to compress your research data to save storage space. One teammate suggests converting the DNA data to a binary representation: + +| Nucleic Acid | Code | +| ------------ | ------ | +| a space | `0000` | +| A | `0001` | +| C | `0010` | +| G | `0100` | +| T | `1000` | + +You ponder this, as it will potentially halve the required data storage costs, but at the expense of human readability. You decide to write a module to encode and decode your data to benchmark your savings. + +## 1. Encode nucleic acid to binary value + +Implement `encode_nucleotide/1` to accept the code point for the nucleic acid and return the integer value of the encoded code. + +```elixir +DNA.encode_nucleotide(?A) +# => 1 +# (which is equal to 0b0001) +``` + +## 2. Decode the binary value to the nucleic acid + +Implement `decode_nucleotide/1` to accept the integer value of the encoded code and return the code point for the nucleic acid. + +```elixir +DNA.decode_nucleotide(0b0001) +# => 65 +# (which is equal to ?A) +``` + +## 3. Encode a DNA charlist + +Implement `encode/1` to accept a charlist representing nucleic acids and gaps and return a bitstring of the encoded data. + +```elixir +DNA.encode(~c"AC GT") +# => <<18, 4, 8::size(4)>> +``` + +## 4. Decode a DNA bitstring + +Implement `decode/1` to accept a bitstring representing nucleic acids and gaps and return the decoded data as a charlist. + +```elixir +DNA.decode(<<132, 2, 1::size(4)>>) +# => ~c"TG CA" +``` + +## Source + +### Created by + +- @neenjaw + +### Contributed to by + +- @angelikatyborska +- @NobbZ \ No newline at end of file diff --git a/elixir/dna-encoding/lib/dna.ex b/elixir/dna-encoding/lib/dna.ex new file mode 100644 index 0000000..dc8252d --- /dev/null +++ b/elixir/dna-encoding/lib/dna.ex @@ -0,0 +1,29 @@ +defmodule DNA do + def encode_nucleotide(?\s), do: 0b0000 + def encode_nucleotide(?A), do: 0b0001 + def encode_nucleotide(?C), do: 0b0010 + def encode_nucleotide(?G), do: 0b0100 + def encode_nucleotide(?T), do: 0b1000 + + def decode_nucleotide(0b0000), do: ?\s + def decode_nucleotide(0b0001), do: ?A + def decode_nucleotide(0b0010), do: ?C + def decode_nucleotide(0b0100), do: ?G + def decode_nucleotide(0b1000), do: ?T + + def encode(dna), do: do_encode(dna) + + defp do_encode(list, acc \\ <<0::size(0)>>) + defp do_encode([], acc), do: acc + defp do_encode([nucleotide | tail], acc) do + do_encode(tail, <>) + end + + def decode(dna), do: do_decode(dna) + + defp do_decode(list, acc \\ []) + defp do_decode(<<>>, acc), do: acc + defp do_decode(<>, acc) do + do_decode(rest, acc ++ [decode_nucleotide(nucleotide)]) + end +end diff --git a/elixir/dna-encoding/mix.exs b/elixir/dna-encoding/mix.exs new file mode 100644 index 0000000..b322446 --- /dev/null +++ b/elixir/dna-encoding/mix.exs @@ -0,0 +1,28 @@ +defmodule DNA.MixProject do + use Mix.Project + + def project do + [ + app: :dna, + version: "0.1.0", + # elixir: "~> 1.10", + start_permanent: Mix.env() == :prod, + deps: deps() + ] + end + + # Run "mix help compile.app" to learn about applications. + def application do + [ + extra_applications: [:logger] + ] + end + + # Run "mix help deps" to learn about dependencies. + defp deps do + [ + # {:dep_from_hexpm, "~> 0.3.0"}, + # {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"} + ] + end +end diff --git a/elixir/dna-encoding/test/dna_test.exs b/elixir/dna-encoding/test/dna_test.exs new file mode 100644 index 0000000..1a54086 --- /dev/null +++ b/elixir/dna-encoding/test/dna_test.exs @@ -0,0 +1,75 @@ +defmodule DNATest do + use ExUnit.Case + + describe "encode to 4-bit encoding" do + @tag task_id: 1 + test "?\\s to 0b0000", do: assert(DNA.encode_nucleotide(?\s) == 0b0000) + @tag task_id: 1 + test "?A to 0b0001", do: assert(DNA.encode_nucleotide(?A) == 0b0001) + @tag task_id: 1 + test "?C to 0b0010", do: assert(DNA.encode_nucleotide(?C) == 0b0010) + @tag task_id: 1 + test "?G to 0b0100", do: assert(DNA.encode_nucleotide(?G) == 0b0100) + @tag task_id: 1 + test "?T to 0b1000", do: assert(DNA.encode_nucleotide(?T) == 0b1000) + end + + describe "decode to code point" do + @tag task_id: 2 + test "0b0000 to ?\\s", do: assert(DNA.decode_nucleotide(0b0000) == ?\s) + @tag task_id: 2 + test "0b0001 to ?A", do: assert(DNA.decode_nucleotide(0b0001) == ?A) + @tag task_id: 2 + test "0b0010 to ?C", do: assert(DNA.decode_nucleotide(0b0010) == ?C) + @tag task_id: 2 + test "0b0100 to ?G", do: assert(DNA.decode_nucleotide(0b0100) == ?G) + @tag task_id: 2 + test "0b1000 to ?T", do: assert(DNA.decode_nucleotide(0b1000) == ?T) + end + + describe "encoding" do + @tag task_id: 3 + test "' '", do: assert(DNA.encode(~c" ") == <<0b0000::4>>) + @tag task_id: 3 + test "'A'", do: assert(DNA.encode(~c"A") == <<0b0001::4>>) + @tag task_id: 3 + test "'C'", do: assert(DNA.encode(~c"C") == <<0b0010::4>>) + @tag task_id: 3 + test "'G'", do: assert(DNA.encode(~c"G") == <<0b0100::4>>) + @tag task_id: 3 + test "'T'", do: assert(DNA.encode(~c"T") == <<0b1000::4>>) + + @tag task_id: 3 + test "' ACGT'", + do: + assert(DNA.encode(~c" ACGT") == <<0b0000::4, 0b0001::4, 0b0010::4, 0b0100::4, 0b1000::4>>) + + @tag task_id: 3 + test "'TGCA '", + do: + assert(DNA.encode(~c"TGCA ") == <<0b1000::4, 0b0100::4, 0b0010::4, 0b0001::4, 0b0000::4>>) + end + + describe "decoding" do + @tag task_id: 4 + test "' '", do: assert(DNA.decode(<<0b0000::4>>) == ~c" ") + @tag task_id: 4 + test "'A'", do: assert(DNA.decode(<<0b0001::4>>) == ~c"A") + @tag task_id: 4 + test "'C'", do: assert(DNA.decode(<<0b0010::4>>) == ~c"C") + @tag task_id: 4 + test "'G'", do: assert(DNA.decode(<<0b0100::4>>) == ~c"G") + @tag task_id: 4 + test "'T'", do: assert(DNA.decode(<<0b1000::4>>) == ~c"T") + + @tag task_id: 4 + test "' ACGT'", + do: + assert(DNA.decode(<<0b0000::4, 0b0001::4, 0b0010::4, 0b0100::4, 0b1000::4>>) == ~c" ACGT") + + @tag task_id: 4 + test "'TGCA '", + do: + assert(DNA.decode(<<0b1000::4, 0b0100::4, 0b0010::4, 0b0001::4, 0b0000::4>>) == ~c"TGCA ") + end +end diff --git a/elixir/dna-encoding/test/test_helper.exs b/elixir/dna-encoding/test/test_helper.exs new file mode 100644 index 0000000..e8677a3 --- /dev/null +++ b/elixir/dna-encoding/test/test_helper.exs @@ -0,0 +1,2 @@ +ExUnit.start() +ExUnit.configure(exclude: :pending, trace: true, seed: 0)