dna-encoding
This commit is contained in:
parent
9d03ff01dc
commit
db731118fc
|
@ -0,0 +1,22 @@
|
||||||
|
{
|
||||||
|
"authors": [
|
||||||
|
"neenjaw"
|
||||||
|
],
|
||||||
|
"contributors": [
|
||||||
|
"angelikatyborska",
|
||||||
|
"NobbZ"
|
||||||
|
],
|
||||||
|
"files": {
|
||||||
|
"solution": [
|
||||||
|
"lib/dna.ex"
|
||||||
|
],
|
||||||
|
"test": [
|
||||||
|
"test/dna_test.exs"
|
||||||
|
],
|
||||||
|
"exemplar": [
|
||||||
|
".meta/exemplar.ex"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"language_versions": ">=1.10",
|
||||||
|
"blurb": "Learn about bitstrings and tail call recursion by encoding DNA sequences as binary data."
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
{"track":"elixir","exercise":"dna-encoding","id":"cf5029ade3444553984645359b133fcc","url":"https://exercism.org/tracks/elixir/exercises/dna-encoding","handle":"negrienko","is_requester":true,"auto_approve":false}
|
|
@ -0,0 +1,4 @@
|
||||||
|
# Used by "mix format"
|
||||||
|
[
|
||||||
|
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
|
||||||
|
]
|
|
@ -0,0 +1,24 @@
|
||||||
|
# The directory Mix will write compiled artifacts to.
|
||||||
|
/_build/
|
||||||
|
|
||||||
|
# If you run "mix test --cover", coverage assets end up here.
|
||||||
|
/cover/
|
||||||
|
|
||||||
|
# The directory Mix downloads your dependencies sources to.
|
||||||
|
/deps/
|
||||||
|
|
||||||
|
# Where third-party dependencies like ExDoc output generated docs.
|
||||||
|
/doc/
|
||||||
|
|
||||||
|
# Ignore .fetch files in case you like to edit your project deps locally.
|
||||||
|
/.fetch
|
||||||
|
|
||||||
|
# If the VM crashes, it generates a dump, let's ignore it too.
|
||||||
|
erl_crash.dump
|
||||||
|
|
||||||
|
# Also ignore archive artifacts (built via "mix archive.build").
|
||||||
|
*.ez
|
||||||
|
|
||||||
|
# Ignore package tarball (built via "mix hex.build").
|
||||||
|
bitstrings-*.tar
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
# Help
|
||||||
|
|
||||||
|
## Running the tests
|
||||||
|
|
||||||
|
From the terminal, change to the base directory of the exercise then execute the tests with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ mix test
|
||||||
|
```
|
||||||
|
|
||||||
|
This will execute the test file found in the `test` subfolder -- a file ending in `_test.exs`
|
||||||
|
|
||||||
|
Documentation:
|
||||||
|
|
||||||
|
* [`mix test` - Elixir's test execution tool](https://hexdocs.pm/mix/Mix.Tasks.Test.html)
|
||||||
|
* [`ExUnit` - Elixir's unit test library](https://hexdocs.pm/ex_unit/ExUnit.html)
|
||||||
|
|
||||||
|
## Pending tests
|
||||||
|
|
||||||
|
In test suites of practice exercises, all but the first test have been tagged to be skipped.
|
||||||
|
|
||||||
|
Once you get a test passing, you can unskip the next one by commenting out the relevant `@tag :pending` with a `#` symbol.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```elixir
|
||||||
|
# @tag :pending
|
||||||
|
test "shouting" do
|
||||||
|
assert Bob.hey("WATCH OUT!") == "Whoa, chill out!"
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
If you wish to run all tests at once, you can include all skipped test by using the `--include` flag on the `mix test` command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ mix test --include pending
|
||||||
|
```
|
||||||
|
|
||||||
|
Or, you can enable all the tests by commenting out the `ExUnit.configure` line in the file `test/test_helper.exs`.
|
||||||
|
|
||||||
|
```elixir
|
||||||
|
# ExUnit.configure(exclude: :pending, trace: true)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Useful `mix test` options
|
||||||
|
|
||||||
|
* `test/<FILE>.exs:LINENUM` - runs only a single test, the test from `<FILE>.exs` whose definition is on line `LINENUM`
|
||||||
|
* `--failed` - runs only tests that failed the last time they ran
|
||||||
|
* `--max-failures` - the suite stops evaluating tests when this number of test failures
|
||||||
|
is reached
|
||||||
|
* `--seed 0` - disables randomization so the tests in a single file will always be ran
|
||||||
|
in the same order they were defined in
|
||||||
|
|
||||||
|
## Submitting your solution
|
||||||
|
|
||||||
|
You can submit your solution using the `exercism submit lib/dna.ex` command.
|
||||||
|
This command will upload your solution to the Exercism website and print the solution page's URL.
|
||||||
|
|
||||||
|
It's possible to submit an incomplete solution which allows you to:
|
||||||
|
|
||||||
|
- See how others have completed the exercise
|
||||||
|
- Request help from a mentor
|
||||||
|
|
||||||
|
## Need to get help?
|
||||||
|
|
||||||
|
If you'd like help solving the exercise, check the following pages:
|
||||||
|
|
||||||
|
- The [Elixir track's documentation](https://exercism.org/docs/tracks/elixir)
|
||||||
|
- The [Elixir track's programming category on the forum](https://forum.exercism.org/c/programming/elixir)
|
||||||
|
- [Exercism's programming category on the forum](https://forum.exercism.org/c/programming/5)
|
||||||
|
- The [Frequently Asked Questions](https://exercism.org/docs/using/faqs)
|
||||||
|
|
||||||
|
Should those resources not suffice, you could submit your (incomplete) solution to request mentoring.
|
||||||
|
|
||||||
|
If you're stuck on something, it may help to look at some of the [available resources](https://exercism.org/docs/tracks/elixir/resources) out there where answers might be found.
|
|
@ -0,0 +1,49 @@
|
||||||
|
# Hints
|
||||||
|
|
||||||
|
## General
|
||||||
|
|
||||||
|
- Use `?` to work with the character [code points][codepoint].
|
||||||
|
- `\s` can be used to represent a space.
|
||||||
|
- Use [integer binary notation][integer-literal] for working with the codes.
|
||||||
|
- Try to use the tail call recursion strategy.
|
||||||
|
|
||||||
|
## 1. Encode nucleic acid to binary value
|
||||||
|
|
||||||
|
- This function needs to map one integer to another.
|
||||||
|
- This function doesn't need recursion.
|
||||||
|
- Making use of multiple clause functions may make this easier by breaking it down.
|
||||||
|
|
||||||
|
## 2. Decode the binary value to the nucleic acid
|
||||||
|
|
||||||
|
- This function is the opposite of part 1's function.
|
||||||
|
- This function doesn't need recursion.
|
||||||
|
- Making use of multiple clause functions may make this easier by breaking it down.
|
||||||
|
|
||||||
|
## 3. Encode a DNA charlist
|
||||||
|
|
||||||
|
- Create a tail-recursive function which takes a code point from the charlist and recursively builds the bitstring result.
|
||||||
|
- Tail-recursive functions need an accumulator.
|
||||||
|
- Remember, a [charlist][charlist] is a list of [integer code points][codepoint].
|
||||||
|
- You can get the first and remaining items from a list using a built-in [`Kernel` module][kernel] function.
|
||||||
|
- You can also pattern match on a list using the [`[head | tail]`][list] notation.
|
||||||
|
- Use multiple clause functions to separate the base case from the recursive cases.
|
||||||
|
- Do not forget to specify the types of bitstring segments using the `::` operator.
|
||||||
|
|
||||||
|
## 4. Decode a DNA bitstring
|
||||||
|
|
||||||
|
- Create a tail-recursive function which [matches the first 4 bits][bitstring-matching] from the [bitstring][bitstring] and recursively builds the [charlist][charlist] result.
|
||||||
|
- Tail-recursive functions need an accumulator.
|
||||||
|
- Remember the [bitstring special form][bitstring-form] can be used for matching on bitstrings.
|
||||||
|
- Do not forget to specify the types of bitstring segments using the `::` operator.
|
||||||
|
- You will need to reverse the accumulator at the end. Write a private tail-recursive `reverse` function to do that and use it in the base-case of the `decode` function.
|
||||||
|
|
||||||
|
[integer-literal]: https://hexdocs.pm/elixir/syntax-reference.html#integers-in-other-bases-and-unicode-code-points
|
||||||
|
[codepoint]: https://elixir-lang.org/getting-started/binaries-strings-and-char-lists.html#unicode-and-code-points
|
||||||
|
[charlist]: https://elixir-lang.org/getting-started/binaries-strings-and-char-lists.html#charlists
|
||||||
|
[bitstring]: https://elixir-lang.org/getting-started/binaries-strings-and-char-lists.html#bitstrings
|
||||||
|
[bitstring-form]: https://hexdocs.pm/elixir/Kernel.SpecialForms.html#%3C%3C%3E%3E/1
|
||||||
|
[bitstring-matching]: https://hexdocs.pm/elixir/Kernel.SpecialForms.html#%3C%3C%3E%3E/1-binary-bitstring-matching
|
||||||
|
[type-operator]: https://hexdocs.pm/elixir/Kernel.SpecialForms.html#::/2
|
||||||
|
[recursion-tco]: https://en.wikipedia.org/wiki/Tail_call
|
||||||
|
[list]: https://hexdocs.pm/elixir/List.html#content
|
||||||
|
[kernel]: https://hexdocs.pm/elixir/Kernel.html#functions
|
|
@ -0,0 +1,99 @@
|
||||||
|
# DNA Encoding
|
||||||
|
|
||||||
|
Welcome to DNA Encoding on Exercism's Elixir Track.
|
||||||
|
If you need help running the tests or submitting your code, check out `HELP.md`.
|
||||||
|
If you get stuck on the exercise, check out `HINTS.md`, but try and solve it without using those first :)
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
## Tail Call Recursion
|
||||||
|
|
||||||
|
When [recursing][exercism-recursion] through enumerables (lists, bitstrings, strings), there are often two concerns:
|
||||||
|
|
||||||
|
- how much memory is required to store the trail of recursive function calls
|
||||||
|
- how to build the solution efficiently
|
||||||
|
|
||||||
|
To deal with these concerns an _accumulator_ may be used.
|
||||||
|
|
||||||
|
An accumulator is a variable that is passed along in addition to the data. It is used to pass the current state of the function's execution, from function call to function call, until the _base case_ is reached. In the base case, the accumulator is used to return the final value of the recursive function call.
|
||||||
|
|
||||||
|
Accumulators should be initialized by the function's author, not the function's user. To achieve this, declare two functions - a public function that takes just the necessary data as arguments and initializes the accumulator, and a private function that also takes an accumulator. In Elixir, it is a common pattern to prefix the private function's name with `do_`.
|
||||||
|
|
||||||
|
```elixir
|
||||||
|
# Count the length of a list without an accumulator
|
||||||
|
def count([]), do: 0
|
||||||
|
def count([_head | tail]), do: 1 + count(tail)
|
||||||
|
|
||||||
|
# Count the length of a list with an accumulator
|
||||||
|
def count(list), do: do_count(list, 0)
|
||||||
|
|
||||||
|
defp do_count([], count), do: count
|
||||||
|
defp do_count([_head | tail], count), do: do_count(tail, count + 1)
|
||||||
|
```
|
||||||
|
|
||||||
|
The usage of an accumulator allows us to turn recursive functions into _tail-recursive_ functions. A function is tail-recursive if the _last_ thing executed by the function is a call to itself.
|
||||||
|
|
||||||
|
[exercism-recursion]: https://exercism.org/tracks/elixir/concepts/recursion
|
||||||
|
|
||||||
|
## Instructions
|
||||||
|
|
||||||
|
In your DNA research lab, you have been working through various ways to compress your research data to save storage space. One teammate suggests converting the DNA data to a binary representation:
|
||||||
|
|
||||||
|
| Nucleic Acid | Code |
|
||||||
|
| ------------ | ------ |
|
||||||
|
| a space | `0000` |
|
||||||
|
| A | `0001` |
|
||||||
|
| C | `0010` |
|
||||||
|
| G | `0100` |
|
||||||
|
| T | `1000` |
|
||||||
|
|
||||||
|
You ponder this, as it will potentially halve the required data storage costs, but at the expense of human readability. You decide to write a module to encode and decode your data to benchmark your savings.
|
||||||
|
|
||||||
|
## 1. Encode nucleic acid to binary value
|
||||||
|
|
||||||
|
Implement `encode_nucleotide/1` to accept the code point for the nucleic acid and return the integer value of the encoded code.
|
||||||
|
|
||||||
|
```elixir
|
||||||
|
DNA.encode_nucleotide(?A)
|
||||||
|
# => 1
|
||||||
|
# (which is equal to 0b0001)
|
||||||
|
```
|
||||||
|
|
||||||
|
## 2. Decode the binary value to the nucleic acid
|
||||||
|
|
||||||
|
Implement `decode_nucleotide/1` to accept the integer value of the encoded code and return the code point for the nucleic acid.
|
||||||
|
|
||||||
|
```elixir
|
||||||
|
DNA.decode_nucleotide(0b0001)
|
||||||
|
# => 65
|
||||||
|
# (which is equal to ?A)
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. Encode a DNA charlist
|
||||||
|
|
||||||
|
Implement `encode/1` to accept a charlist representing nucleic acids and gaps and return a bitstring of the encoded data.
|
||||||
|
|
||||||
|
```elixir
|
||||||
|
DNA.encode(~c"AC GT")
|
||||||
|
# => <<18, 4, 8::size(4)>>
|
||||||
|
```
|
||||||
|
|
||||||
|
## 4. Decode a DNA bitstring
|
||||||
|
|
||||||
|
Implement `decode/1` to accept a bitstring representing nucleic acids and gaps and return the decoded data as a charlist.
|
||||||
|
|
||||||
|
```elixir
|
||||||
|
DNA.decode(<<132, 2, 1::size(4)>>)
|
||||||
|
# => ~c"TG CA"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Source
|
||||||
|
|
||||||
|
### Created by
|
||||||
|
|
||||||
|
- @neenjaw
|
||||||
|
|
||||||
|
### Contributed to by
|
||||||
|
|
||||||
|
- @angelikatyborska
|
||||||
|
- @NobbZ
|
|
@ -0,0 +1,29 @@
|
||||||
|
defmodule DNA do
|
||||||
|
def encode_nucleotide(?\s), do: 0b0000
|
||||||
|
def encode_nucleotide(?A), do: 0b0001
|
||||||
|
def encode_nucleotide(?C), do: 0b0010
|
||||||
|
def encode_nucleotide(?G), do: 0b0100
|
||||||
|
def encode_nucleotide(?T), do: 0b1000
|
||||||
|
|
||||||
|
def decode_nucleotide(0b0000), do: ?\s
|
||||||
|
def decode_nucleotide(0b0001), do: ?A
|
||||||
|
def decode_nucleotide(0b0010), do: ?C
|
||||||
|
def decode_nucleotide(0b0100), do: ?G
|
||||||
|
def decode_nucleotide(0b1000), do: ?T
|
||||||
|
|
||||||
|
def encode(dna), do: do_encode(dna)
|
||||||
|
|
||||||
|
defp do_encode(list, acc \\ <<0::size(0)>>)
|
||||||
|
defp do_encode([], acc), do: acc
|
||||||
|
defp do_encode([nucleotide | tail], acc) do
|
||||||
|
do_encode(tail, <<acc::bitstring, encode_nucleotide(nucleotide)::size(4)>>)
|
||||||
|
end
|
||||||
|
|
||||||
|
def decode(dna), do: do_decode(dna)
|
||||||
|
|
||||||
|
defp do_decode(list, acc \\ [])
|
||||||
|
defp do_decode(<<>>, acc), do: acc
|
||||||
|
defp do_decode(<<nucleotide::4, rest::bitstring>>, acc) do
|
||||||
|
do_decode(rest, acc ++ [decode_nucleotide(nucleotide)])
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,28 @@
|
||||||
|
defmodule DNA.MixProject do
|
||||||
|
use Mix.Project
|
||||||
|
|
||||||
|
def project do
|
||||||
|
[
|
||||||
|
app: :dna,
|
||||||
|
version: "0.1.0",
|
||||||
|
# elixir: "~> 1.10",
|
||||||
|
start_permanent: Mix.env() == :prod,
|
||||||
|
deps: deps()
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Run "mix help compile.app" to learn about applications.
|
||||||
|
def application do
|
||||||
|
[
|
||||||
|
extra_applications: [:logger]
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Run "mix help deps" to learn about dependencies.
|
||||||
|
defp deps do
|
||||||
|
[
|
||||||
|
# {:dep_from_hexpm, "~> 0.3.0"},
|
||||||
|
# {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"}
|
||||||
|
]
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,75 @@
|
||||||
|
defmodule DNATest do
|
||||||
|
use ExUnit.Case
|
||||||
|
|
||||||
|
describe "encode to 4-bit encoding" do
|
||||||
|
@tag task_id: 1
|
||||||
|
test "?\\s to 0b0000", do: assert(DNA.encode_nucleotide(?\s) == 0b0000)
|
||||||
|
@tag task_id: 1
|
||||||
|
test "?A to 0b0001", do: assert(DNA.encode_nucleotide(?A) == 0b0001)
|
||||||
|
@tag task_id: 1
|
||||||
|
test "?C to 0b0010", do: assert(DNA.encode_nucleotide(?C) == 0b0010)
|
||||||
|
@tag task_id: 1
|
||||||
|
test "?G to 0b0100", do: assert(DNA.encode_nucleotide(?G) == 0b0100)
|
||||||
|
@tag task_id: 1
|
||||||
|
test "?T to 0b1000", do: assert(DNA.encode_nucleotide(?T) == 0b1000)
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "decode to code point" do
|
||||||
|
@tag task_id: 2
|
||||||
|
test "0b0000 to ?\\s", do: assert(DNA.decode_nucleotide(0b0000) == ?\s)
|
||||||
|
@tag task_id: 2
|
||||||
|
test "0b0001 to ?A", do: assert(DNA.decode_nucleotide(0b0001) == ?A)
|
||||||
|
@tag task_id: 2
|
||||||
|
test "0b0010 to ?C", do: assert(DNA.decode_nucleotide(0b0010) == ?C)
|
||||||
|
@tag task_id: 2
|
||||||
|
test "0b0100 to ?G", do: assert(DNA.decode_nucleotide(0b0100) == ?G)
|
||||||
|
@tag task_id: 2
|
||||||
|
test "0b1000 to ?T", do: assert(DNA.decode_nucleotide(0b1000) == ?T)
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "encoding" do
|
||||||
|
@tag task_id: 3
|
||||||
|
test "' '", do: assert(DNA.encode(~c" ") == <<0b0000::4>>)
|
||||||
|
@tag task_id: 3
|
||||||
|
test "'A'", do: assert(DNA.encode(~c"A") == <<0b0001::4>>)
|
||||||
|
@tag task_id: 3
|
||||||
|
test "'C'", do: assert(DNA.encode(~c"C") == <<0b0010::4>>)
|
||||||
|
@tag task_id: 3
|
||||||
|
test "'G'", do: assert(DNA.encode(~c"G") == <<0b0100::4>>)
|
||||||
|
@tag task_id: 3
|
||||||
|
test "'T'", do: assert(DNA.encode(~c"T") == <<0b1000::4>>)
|
||||||
|
|
||||||
|
@tag task_id: 3
|
||||||
|
test "' ACGT'",
|
||||||
|
do:
|
||||||
|
assert(DNA.encode(~c" ACGT") == <<0b0000::4, 0b0001::4, 0b0010::4, 0b0100::4, 0b1000::4>>)
|
||||||
|
|
||||||
|
@tag task_id: 3
|
||||||
|
test "'TGCA '",
|
||||||
|
do:
|
||||||
|
assert(DNA.encode(~c"TGCA ") == <<0b1000::4, 0b0100::4, 0b0010::4, 0b0001::4, 0b0000::4>>)
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "decoding" do
|
||||||
|
@tag task_id: 4
|
||||||
|
test "' '", do: assert(DNA.decode(<<0b0000::4>>) == ~c" ")
|
||||||
|
@tag task_id: 4
|
||||||
|
test "'A'", do: assert(DNA.decode(<<0b0001::4>>) == ~c"A")
|
||||||
|
@tag task_id: 4
|
||||||
|
test "'C'", do: assert(DNA.decode(<<0b0010::4>>) == ~c"C")
|
||||||
|
@tag task_id: 4
|
||||||
|
test "'G'", do: assert(DNA.decode(<<0b0100::4>>) == ~c"G")
|
||||||
|
@tag task_id: 4
|
||||||
|
test "'T'", do: assert(DNA.decode(<<0b1000::4>>) == ~c"T")
|
||||||
|
|
||||||
|
@tag task_id: 4
|
||||||
|
test "' ACGT'",
|
||||||
|
do:
|
||||||
|
assert(DNA.decode(<<0b0000::4, 0b0001::4, 0b0010::4, 0b0100::4, 0b1000::4>>) == ~c" ACGT")
|
||||||
|
|
||||||
|
@tag task_id: 4
|
||||||
|
test "'TGCA '",
|
||||||
|
do:
|
||||||
|
assert(DNA.decode(<<0b1000::4, 0b0100::4, 0b0010::4, 0b0001::4, 0b0000::4>>) == ~c"TGCA ")
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,2 @@
|
||||||
|
ExUnit.start()
|
||||||
|
ExUnit.configure(exclude: :pending, trace: true, seed: 0)
|
Loading…
Reference in New Issue