dna-encoding

2023-12-19 15:20:45 -05:00 · 2023-12-19 15:20:45 -05:00 · db731118fc
commit db731118fc
parent 9d03ff01dc
11 changed files with 408 additions and 0 deletions
--- a/elixir/dna-encoding/.exercism/config.json
+++ b/elixir/dna-encoding/.exercism/config.json
@ -0,0 +1,22 @@
+{
+  "authors": [
+    "neenjaw"
+  ],
+  "contributors": [
+    "angelikatyborska",
+    "NobbZ"
+  ],
+  "files": {
+    "solution": [
+      "lib/dna.ex"
+    ],
+    "test": [
+      "test/dna_test.exs"
+    ],
+    "exemplar": [
+      ".meta/exemplar.ex"
+    ]
+  },
+  "language_versions": ">=1.10",
+  "blurb": "Learn about bitstrings and tail call recursion by encoding DNA sequences as binary data."
+}
--- a/elixir/dna-encoding/.exercism/metadata.json
+++ b/elixir/dna-encoding/.exercism/metadata.json
@ -0,0 +1 @@
+{"track":"elixir","exercise":"dna-encoding","id":"cf5029ade3444553984645359b133fcc","url":"https://exercism.org/tracks/elixir/exercises/dna-encoding","handle":"negrienko","is_requester":true,"auto_approve":false}
--- a/elixir/dna-encoding/.formatter.exs
+++ b/elixir/dna-encoding/.formatter.exs
@ -0,0 +1,4 @@
+# Used by "mix format"
+[
+  inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
+]
--- a/elixir/dna-encoding/.gitignore
+++ b/elixir/dna-encoding/.gitignore
@ -0,0 +1,24 @@
+# The directory Mix will write compiled artifacts to.
+/_build/
+
+# If you run "mix test --cover", coverage assets end up here.
+/cover/
+
+# The directory Mix downloads your dependencies sources to.
+/deps/
+
+# Where third-party dependencies like ExDoc output generated docs.
+/doc/
+
+# Ignore .fetch files in case you like to edit your project deps locally.
+/.fetch
+
+# If the VM crashes, it generates a dump, let's ignore it too.
+erl_crash.dump
+
+# Also ignore archive artifacts (built via "mix archive.build").
+*.ez
+
+# Ignore package tarball (built via "mix hex.build").
+bitstrings-*.tar
+
--- a/elixir/dna-encoding/HELP.md
+++ b/elixir/dna-encoding/HELP.md
@ -0,0 +1,75 @@
+# Help
+
+## Running the tests
+
+From the terminal, change to the base directory of the exercise then execute the tests with:
+
+```bash
+$ mix test
+```
+
+This will execute the test file found in the `test` subfolder -- a file ending in `_test.exs`
+
+Documentation:
+
+* [`mix test` - Elixir's test execution tool](https://hexdocs.pm/mix/Mix.Tasks.Test.html)
+* [`ExUnit` - Elixir's unit test library](https://hexdocs.pm/ex_unit/ExUnit.html)
+
+## Pending tests
+
+In test suites of practice exercises, all but the first test have been tagged to be skipped.
+
+Once you get a test passing, you can unskip the next one by commenting out the relevant `@tag :pending` with a `#` symbol.
+
+For example:
+
+```elixir
+# @tag :pending
+test "shouting" do
+  assert Bob.hey("WATCH OUT!") == "Whoa, chill out!"
+end
+```
+
+If you wish to run all tests at once, you can include all skipped test by using the `--include` flag on the `mix test` command:
+
+```bash
+$ mix test --include pending
+```
+
+Or, you can enable all the tests by commenting out the `ExUnit.configure` line in the file `test/test_helper.exs`.
+
+```elixir
+# ExUnit.configure(exclude: :pending, trace: true)
+```
+
+## Useful `mix test` options
+
+* `test/<FILE>.exs:LINENUM` - runs only a single test, the test from `<FILE>.exs` whose definition is on line `LINENUM`
+* `--failed` - runs only tests that failed the last time they ran
+* `--max-failures` - the suite stops evaluating tests when this number of test failures
+is reached
+* `--seed 0` - disables randomization so the tests in a single file will always be ran
+in the same order they were defined in
+
+## Submitting your solution
+
+You can submit your solution using the `exercism submit lib/dna.ex` command.
+This command will upload your solution to the Exercism website and print the solution page's URL.
+
+It's possible to submit an incomplete solution which allows you to:
+
+- See how others have completed the exercise
+- Request help from a mentor
+
+## Need to get help?
+
+If you'd like help solving the exercise, check the following pages:
+
+- The [Elixir track's documentation](https://exercism.org/docs/tracks/elixir)
+- The [Elixir track's programming category on the forum](https://forum.exercism.org/c/programming/elixir)
+- [Exercism's programming category on the forum](https://forum.exercism.org/c/programming/5)
+- The [Frequently Asked Questions](https://exercism.org/docs/using/faqs)
+
+Should those resources not suffice, you could submit your (incomplete) solution to request mentoring.
+
+If you're stuck on something, it may help to look at some of the [available resources](https://exercism.org/docs/tracks/elixir/resources) out there where answers might be found.
--- a/elixir/dna-encoding/HINTS.md
+++ b/elixir/dna-encoding/HINTS.md
@ -0,0 +1,49 @@
+# Hints
+
+## General
+
+- Use `?` to work with the character [code points][codepoint].
+- `\s` can be used to represent a space.
+- Use [integer binary notation][integer-literal] for working with the codes.
+- Try to use the tail call recursion strategy.
+
+## 1. Encode nucleic acid to binary value
+
+- This function needs to map one integer to another.
+- This function doesn't need recursion.
+- Making use of multiple clause functions may make this easier by breaking it down.
+
+## 2. Decode the binary value to the nucleic acid
+
+- This function is the opposite of part 1's function.
+- This function doesn't need recursion.
+- Making use of multiple clause functions may make this easier by breaking it down.
+
+## 3. Encode a DNA charlist
+
+- Create a tail-recursive function which takes a code point from the charlist and recursively builds the bitstring result.
+- Tail-recursive functions need an accumulator.
+- Remember, a [charlist][charlist] is a list of [integer code points][codepoint].
+- You can get the first and remaining items from a list using a built-in [`Kernel` module][kernel] function.
+- You can also pattern match on a list using the [`[head | tail]`][list] notation.
+- Use multiple clause functions to separate the base case from the recursive cases.
+- Do not forget to specify the types of bitstring segments using the `::` operator.
+
+## 4. Decode a DNA bitstring
+
+- Create a tail-recursive function which [matches the first 4 bits][bitstring-matching] from the [bitstring][bitstring] and recursively builds the [charlist][charlist] result.
+- Tail-recursive functions need an accumulator.
+- Remember the [bitstring special form][bitstring-form] can be used for matching on bitstrings.
+- Do not forget to specify the types of bitstring segments using the `::` operator.
+- You will need to reverse the accumulator at the end. Write a private tail-recursive `reverse` function to do that and use it in the base-case of the `decode` function.
+
+[integer-literal]: https://hexdocs.pm/elixir/syntax-reference.html#integers-in-other-bases-and-unicode-code-points
+[codepoint]: https://elixir-lang.org/getting-started/binaries-strings-and-char-lists.html#unicode-and-code-points
+[charlist]: https://elixir-lang.org/getting-started/binaries-strings-and-char-lists.html#charlists
+[bitstring]: https://elixir-lang.org/getting-started/binaries-strings-and-char-lists.html#bitstrings
+[bitstring-form]: https://hexdocs.pm/elixir/Kernel.SpecialForms.html#%3C%3C%3E%3E/1
+[bitstring-matching]: https://hexdocs.pm/elixir/Kernel.SpecialForms.html#%3C%3C%3E%3E/1-binary-bitstring-matching
+[type-operator]: https://hexdocs.pm/elixir/Kernel.SpecialForms.html#::/2
+[recursion-tco]: https://en.wikipedia.org/wiki/Tail_call
+[list]: https://hexdocs.pm/elixir/List.html#content
+[kernel]: https://hexdocs.pm/elixir/Kernel.html#functions
--- a/elixir/dna-encoding/README.md
+++ b/elixir/dna-encoding/README.md
@ -0,0 +1,99 @@
+# DNA Encoding
+
+Welcome to DNA Encoding on Exercism's Elixir Track.
+If you need help running the tests or submitting your code, check out `HELP.md`.
+If you get stuck on the exercise, check out `HINTS.md`, but try and solve it without using those first :)
+
+## Introduction
+
+## Tail Call Recursion
+
+When [recursing][exercism-recursion] through enumerables (lists, bitstrings, strings), there are often two concerns:
+
+- how much memory is required to store the trail of recursive function calls
+- how to build the solution efficiently
+
+To deal with these concerns an _accumulator_ may be used.
+
+An accumulator is a variable that is passed along in addition to the data. It is used to pass the current state of the function's execution, from function call to function call, until the _base case_ is reached. In the base case, the accumulator is used to return the final value of the recursive function call.
+
+Accumulators should be initialized by the function's author, not the function's user. To achieve this, declare two functions - a public function that takes just the necessary data as arguments and initializes the accumulator, and a private function that also takes an accumulator. In Elixir, it is a common pattern to prefix the private function's name with `do_`.
+
+```elixir
+# Count the length of a list without an accumulator
+def count([]), do: 0
+def count([_head | tail]), do: 1 + count(tail)
+
+# Count the length of a list with an accumulator
+def count(list), do: do_count(list, 0)
+
+defp do_count([], count), do: count
+defp do_count([_head | tail], count), do: do_count(tail, count + 1)
+```
+
+The usage of an accumulator allows us to turn recursive functions into _tail-recursive_ functions. A function is tail-recursive if the _last_ thing executed by the function is a call to itself.
+
+[exercism-recursion]: https://exercism.org/tracks/elixir/concepts/recursion
+
+## Instructions
+
+In your DNA research lab, you have been working through various ways to compress your research data to save storage space. One teammate suggests converting the DNA data to a binary representation:
+
+| Nucleic Acid | Code   |
+| ------------ | ------ |
+| a space      | `0000` |
+| A            | `0001` |
+| C            | `0010` |
+| G            | `0100` |
+| T            | `1000` |
+
+You ponder this, as it will potentially halve the required data storage costs, but at the expense of human readability. You decide to write a module to encode and decode your data to benchmark your savings.
+
+## 1. Encode nucleic acid to binary value
+
+Implement `encode_nucleotide/1` to accept the code point for the nucleic acid and return the integer value of the encoded code.
+
+```elixir
+DNA.encode_nucleotide(?A)
+# => 1
+# (which is equal to 0b0001)
+```
+
+## 2. Decode the binary value to the nucleic acid
+
+Implement `decode_nucleotide/1` to accept the integer value of the encoded code and return the code point for the nucleic acid.
+
+```elixir
+DNA.decode_nucleotide(0b0001)
+# => 65
+# (which is equal to ?A)
+```
+
+## 3. Encode a DNA charlist
+
+Implement `encode/1` to accept a charlist representing nucleic acids and gaps and return a bitstring of the encoded data.
+
+```elixir
+DNA.encode(~c"AC GT")
+# => <<18, 4, 8::size(4)>>
+```
+
+## 4. Decode a DNA bitstring
+
+Implement `decode/1` to accept a bitstring representing nucleic acids and gaps and return the decoded data as a charlist.
+
+```elixir
+DNA.decode(<<132, 2, 1::size(4)>>)
+# => ~c"TG CA"
+```
+
+## Source
+
+### Created by
+
+- @neenjaw
+
+### Contributed to by
+
+- @angelikatyborska
+- @NobbZ
--- a/elixir/dna-encoding/lib/dna.ex
+++ b/elixir/dna-encoding/lib/dna.ex
@ -0,0 +1,29 @@
+defmodule DNA do
+  def encode_nucleotide(?\s), do: 0b0000
+  def encode_nucleotide(?A), do: 0b0001
+  def encode_nucleotide(?C), do: 0b0010
+  def encode_nucleotide(?G), do: 0b0100
+  def encode_nucleotide(?T), do: 0b1000
+
+  def decode_nucleotide(0b0000), do: ?\s
+  def decode_nucleotide(0b0001), do: ?A
+  def decode_nucleotide(0b0010), do: ?C
+  def decode_nucleotide(0b0100), do: ?G
+  def decode_nucleotide(0b1000), do: ?T
+
+  def encode(dna), do: do_encode(dna)
+
+  defp do_encode(list, acc \\ <<0::size(0)>>)
+  defp do_encode([], acc), do: acc
+  defp do_encode([nucleotide | tail], acc) do
+    do_encode(tail, <<acc::bitstring, encode_nucleotide(nucleotide)::size(4)>>)
+  end
+
+  def decode(dna), do: do_decode(dna)
+
+  defp do_decode(list, acc \\ [])
+  defp do_decode(<<>>, acc), do: acc
+  defp do_decode(<<nucleotide::4, rest::bitstring>>, acc) do
+    do_decode(rest, acc ++ [decode_nucleotide(nucleotide)])
+  end
+end
--- a/elixir/dna-encoding/mix.exs
+++ b/elixir/dna-encoding/mix.exs
@ -0,0 +1,28 @@
+defmodule DNA.MixProject do
+  use Mix.Project
+
+  def project do
+    [
+      app: :dna,
+      version: "0.1.0",
+      # elixir: "~> 1.10",
+      start_permanent: Mix.env() == :prod,
+      deps: deps()
+    ]
+  end
+
+  # Run "mix help compile.app" to learn about applications.
+  def application do
+    [
+      extra_applications: [:logger]
+    ]
+  end
+
+  # Run "mix help deps" to learn about dependencies.
+  defp deps do
+    [
+      # {:dep_from_hexpm, "~> 0.3.0"},
+      # {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"}
+    ]
+  end
+end
--- a/elixir/dna-encoding/test/dna_test.exs
+++ b/elixir/dna-encoding/test/dna_test.exs
@ -0,0 +1,75 @@
+defmodule DNATest do
+  use ExUnit.Case
+
+  describe "encode to 4-bit encoding" do
+    @tag task_id: 1
+    test "?\\s to 0b0000", do: assert(DNA.encode_nucleotide(?\s) == 0b0000)
+    @tag task_id: 1
+    test "?A to 0b0001", do: assert(DNA.encode_nucleotide(?A) == 0b0001)
+    @tag task_id: 1
+    test "?C to 0b0010", do: assert(DNA.encode_nucleotide(?C) == 0b0010)
+    @tag task_id: 1
+    test "?G to 0b0100", do: assert(DNA.encode_nucleotide(?G) == 0b0100)
+    @tag task_id: 1
+    test "?T to 0b1000", do: assert(DNA.encode_nucleotide(?T) == 0b1000)
+  end
+
+  describe "decode to code point" do
+    @tag task_id: 2
+    test "0b0000 to ?\\s", do: assert(DNA.decode_nucleotide(0b0000) == ?\s)
+    @tag task_id: 2
+    test "0b0001 to ?A", do: assert(DNA.decode_nucleotide(0b0001) == ?A)
+    @tag task_id: 2
+    test "0b0010 to ?C", do: assert(DNA.decode_nucleotide(0b0010) == ?C)
+    @tag task_id: 2
+    test "0b0100 to ?G", do: assert(DNA.decode_nucleotide(0b0100) == ?G)
+    @tag task_id: 2
+    test "0b1000 to ?T", do: assert(DNA.decode_nucleotide(0b1000) == ?T)
+  end
+
+  describe "encoding" do
+    @tag task_id: 3
+    test "' '", do: assert(DNA.encode(~c" ") == <<0b0000::4>>)
+    @tag task_id: 3
+    test "'A'", do: assert(DNA.encode(~c"A") == <<0b0001::4>>)
+    @tag task_id: 3
+    test "'C'", do: assert(DNA.encode(~c"C") == <<0b0010::4>>)
+    @tag task_id: 3
+    test "'G'", do: assert(DNA.encode(~c"G") == <<0b0100::4>>)
+    @tag task_id: 3
+    test "'T'", do: assert(DNA.encode(~c"T") == <<0b1000::4>>)
+
+    @tag task_id: 3
+    test "' ACGT'",
+      do:
+        assert(DNA.encode(~c" ACGT") == <<0b0000::4, 0b0001::4, 0b0010::4, 0b0100::4, 0b1000::4>>)
+
+    @tag task_id: 3
+    test "'TGCA '",
+      do:
+        assert(DNA.encode(~c"TGCA ") == <<0b1000::4, 0b0100::4, 0b0010::4, 0b0001::4, 0b0000::4>>)
+  end
+
+  describe "decoding" do
+    @tag task_id: 4
+    test "' '", do: assert(DNA.decode(<<0b0000::4>>) == ~c" ")
+    @tag task_id: 4
+    test "'A'", do: assert(DNA.decode(<<0b0001::4>>) == ~c"A")
+    @tag task_id: 4
+    test "'C'", do: assert(DNA.decode(<<0b0010::4>>) == ~c"C")
+    @tag task_id: 4
+    test "'G'", do: assert(DNA.decode(<<0b0100::4>>) == ~c"G")
+    @tag task_id: 4
+    test "'T'", do: assert(DNA.decode(<<0b1000::4>>) == ~c"T")
+
+    @tag task_id: 4
+    test "' ACGT'",
+      do:
+        assert(DNA.decode(<<0b0000::4, 0b0001::4, 0b0010::4, 0b0100::4, 0b1000::4>>) == ~c" ACGT")
+
+    @tag task_id: 4
+    test "'TGCA '",
+      do:
+        assert(DNA.decode(<<0b1000::4, 0b0100::4, 0b0010::4, 0b0001::4, 0b0000::4>>) == ~c"TGCA ")
+  end
+end
--- a/elixir/dna-encoding/test/test_helper.exs
+++ b/elixir/dna-encoding/test/test_helper.exs
@ -0,0 +1,2 @@
+ExUnit.start()
+ExUnit.configure(exclude: :pending, trace: true, seed: 0)
				`@ -0,0 +1 @@`
				`{"track":"elixir","exercise":"dna-encoding","id":"cf5029ade3444553984645359b133fcc","url":"https://exercism.org/tracks/elixir/exercises/dna-encoding","handle":"negrienko","is_requester":true,"auto_approve":false}`