Add nucleotide-count exercise (#97)

exercism · Nov 1, 2024 · 9a82afe · 9a82afe
1 parent 46b2bce
commit 9a82afe
Show file tree

Hide file tree

Showing 12 changed files with 3,590 additions and 0 deletions.
diff --git a/config.json b/config.json
@@ -130,6 +130,14 @@
         "prerequisites": [],
         "difficulty": 3
       },
+      {
+        "slug": "nucleotide-count",
+        "name": "Nucleotide Count",
+        "uuid": "3bc52375-890a-4133-b6e8-71abb9a4b64c",
+        "practices": [],
+        "prerequisites": [],
+        "difficulty": 3
+      },
       {
         "slug": "pangram",
         "name": "Pangram",

diff --git a/exercises/practice/nucleotide-count/.docs/instructions.md b/exercises/practice/nucleotide-count/.docs/instructions.md
@@ -0,0 +1,23 @@
+# Instructions
+
+Each of us inherits from our biological parents a set of chemical instructions known as DNA that influence how our bodies are constructed.
+All known life depends on DNA!
+
+> Note: You do not need to understand anything about nucleotides or DNA to complete this exercise.
+
+DNA is a long chain of other chemicals and the most important are the four nucleotides, adenine, cytosine, guanine and thymine.
+A single DNA chain can contain billions of these four nucleotides and the order in which they occur is important!
+We call the order of these nucleotides in a bit of DNA a "DNA sequence".
+
+We represent a DNA sequence as an ordered collection of these four nucleotides and a common way to do that is with a string of characters such as "ATTACG" for a DNA sequence of 6 nucleotides.
+'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' for thymine.
+
+Given a string representing a DNA sequence, count how many of each nucleotide is present.
+If the string contains characters that aren't A, C, G, or T then it is invalid and you should signal an error.
+
+For example:
+
+```text
+"GATTACA" -> 'A': 3, 'C': 1, 'G': 1, 'T': 2
+"INVALID" -> error
+```
diff --git a/exercises/practice/nucleotide-count/.meta/config.json b/exercises/practice/nucleotide-count/.meta/config.json
@@ -0,0 +1,19 @@
+{
+  "authors": [
+    "keiravillekode"
+  ],
+  "files": {
+    "solution": [
+      "nucleotide_count.s"
+    ],
+    "test": [
+      "nucleotide_count_test.c"
+    ],
+    "example": [
+      ".meta/example.s"
+    ]
+  },
+  "blurb": "Given a DNA string, compute how many times each nucleotide occurs in the string.",
+  "source": "The Calculating DNA Nucleotides_problem at Rosalind",
+  "source_url": "https://rosalind.info/problems/dna/"
+}
diff --git a/exercises/practice/nucleotide-count/.meta/example.s b/exercises/practice/nucleotide-count/.meta/example.s
@@ -0,0 +1,55 @@
+.text
+.globl nucleotide_counts
+
+/* extern void nucleotide_counts(int16_t *counts, const char *strand); */
+nucleotide_counts:
+        mov     w4, wzr
+        mov     w5, wzr
+        mov     w6, wzr
+        mov     w7, wzr
+
+.read:
+        ldrb    w2, [x1], #1            /* load byte, post-increment */
+        cbz     w2, .report
+
+        cmp     w2, 'A'
+        beq     .adenine
+
+        cmp     w2, 'C'
+        beq     .cytosine
+
+        cmp     w2, 'G'
+        beq     .guanine
+
+        cmp     w2, 'T'
+        beq     .thymine
+
+        mov     w2, -1
+        strh    w2, [x0], #2
+        strh    w2, [x0], #2
+        strh    w2, [x0], #2
+        strh    w2, [x0]
+        ret
+
+.report:
+        strh    w4, [x0], #2
+        strh    w5, [x0], #2
+        strh    w6, [x0], #2
+        strh    w7, [x0]
+        ret
+
+.adenine:
+        add     w4, w4, #1
+        b       .read
+
+.cytosine:
+        add     w5, w5, #1
+        b       .read
+
+.guanine:
+        add     w6, w6, #1
+        b       .read
+
+.thymine:
+        add     w7, w7, #1
+        b       .read
diff --git a/exercises/practice/nucleotide-count/.meta/tests.toml b/exercises/practice/nucleotide-count/.meta/tests.toml
@@ -0,0 +1,25 @@
+# This is an auto-generated file.
+#
+# Regenerating this file via `configlet sync` will:
+# - Recreate every `description` key/value pair
+# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
+# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
+# - Preserve any other key/value pair
+#
+# As user-added comments (using the # character) will be removed when this file
+# is regenerated, comments can be added via a `comment` key.
+
+[3e5c30a8-87e2-4845-a815-a49671ade970]
+description = "empty strand"
+
+[a0ea42a6-06d9-4ac6-828c-7ccaccf98fec]
+description = "can count one nucleotide in single-character input"
+
+[eca0d565-ed8c-43e7-9033-6cefbf5115b5]
+description = "strand with repeated nucleotide"
+
+[40a45eac-c83f-4740-901a-20b22d15a39f]
+description = "strand with multiple nucleotides"
+
+[b4c47851-ee9e-4b0a-be70-a86e343bd851]
+description = "strand with invalid nucleotides"
diff --git a/exercises/practice/nucleotide-count/Makefile b/exercises/practice/nucleotide-count/Makefile
@@ -0,0 +1,36 @@
+AS = aarch64-linux-gnu-as
+CC = aarch64-linux-gnu-gcc
+
+CFLAGS = -g -Wall -Wextra -pedantic -Werror
+LDFLAGS =
+
+ALL_LDFLAGS = -pie -Wl,--fatal-warnings
+
+ALL_CFLAGS = -std=c99 -fPIE $(CFLAGS)
+ALL_LDFLAGS += $(LDFLAGS)
+
+C_OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
+AS_OBJS = $(patsubst %.s,%.o,$(wildcard *.s))
+ALL_OBJS = $(filter-out example.o,$(C_OBJS) $(AS_OBJS) vendor/unity.o)
+
+CC_CMD = $(CC) $(ALL_CFLAGS) -c -o $@ $<
+
+all: tests
+	qemu-aarch64 -L /usr/aarch64-linux-gnu ./$<
+
+tests: $(ALL_OBJS)
+	@$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) -o $@ $(ALL_OBJS)
+
+%.o: %.s
+	@$(AS) -o $@ $<
+
+%.o: %.c
+	@$(CC_CMD)
+
+vendor/unity.o: vendor/unity.c vendor/unity.h vendor/unity_internals.h
+	@$(CC_CMD)
+
+clean:
+	@rm -f *.o vendor/*.o tests
+
+.PHONY: all clean
diff --git a/exercises/practice/nucleotide-count/nucleotide_count.s b/exercises/practice/nucleotide-count/nucleotide_count.s
@@ -0,0 +1,5 @@
+.text
+.globl nucleotide_counts
+
+nucleotide_counts:
+    ret
diff --git a/exercises/practice/nucleotide-count/nucleotide_count_test.c b/exercises/practice/nucleotide-count/nucleotide_count_test.c
@@ -0,0 +1,79 @@
+#include "vendor/unity.h"
+
+#include <stdint.h>
+
+#define INVALID -1
+
+enum nucleotide {
+    ADENINE,
+    CYTOSINE,
+    GUANINE,
+    THYMINE
+};
+
+extern void nucleotide_counts(int16_t *counts, const char *strand);
+
+void setUp(void) {
+}
+
+void tearDown(void) {
+}
+
+void test_empty_strand(void) {
+    int16_t counts[4];
+    nucleotide_counts(counts, "");
+    TEST_ASSERT_EQUAL_INT(0, counts[ADENINE]);
+    TEST_ASSERT_EQUAL_INT(0, counts[CYTOSINE]);
+    TEST_ASSERT_EQUAL_INT(0, counts[GUANINE]);
+    TEST_ASSERT_EQUAL_INT(0, counts[THYMINE]);
+}
+
+void test_can_count_one_nucleotide_in_singlecharacter_input(void) {
+    TEST_IGNORE();
+    int16_t counts[4];
+    nucleotide_counts(counts, "G");
+    TEST_ASSERT_EQUAL_INT(0, counts[ADENINE]);
+    TEST_ASSERT_EQUAL_INT(0, counts[CYTOSINE]);
+    TEST_ASSERT_EQUAL_INT(1, counts[GUANINE]);
+    TEST_ASSERT_EQUAL_INT(0, counts[THYMINE]);
+}
+
+void test_strand_with_repeated_nucleotide(void) {
+    TEST_IGNORE();
+    int16_t counts[4];
+    nucleotide_counts(counts, "GGGGGGG");
+    TEST_ASSERT_EQUAL_INT(0, counts[ADENINE]);
+    TEST_ASSERT_EQUAL_INT(0, counts[CYTOSINE]);
+    TEST_ASSERT_EQUAL_INT(7, counts[GUANINE]);
+    TEST_ASSERT_EQUAL_INT(0, counts[THYMINE]);
+}
+
+void test_strand_with_multiple_nucleotides(void) {
+    TEST_IGNORE();
+    int16_t counts[4];
+    nucleotide_counts(counts, "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC");
+    TEST_ASSERT_EQUAL_INT(20, counts[ADENINE]);
+    TEST_ASSERT_EQUAL_INT(12, counts[CYTOSINE]);
+    TEST_ASSERT_EQUAL_INT(17, counts[GUANINE]);
+    TEST_ASSERT_EQUAL_INT(21, counts[THYMINE]);
+}
+
+void test_strand_with_invalid_nucleotides(void) {
+    TEST_IGNORE();
+    int16_t counts[4];
+    nucleotide_counts(counts, "AGXXACT");
+    TEST_ASSERT_EQUAL_INT(INVALID, counts[ADENINE]);
+    TEST_ASSERT_EQUAL_INT(INVALID, counts[CYTOSINE]);
+    TEST_ASSERT_EQUAL_INT(INVALID, counts[GUANINE]);
+    TEST_ASSERT_EQUAL_INT(INVALID, counts[THYMINE]);
+}
+
+int main(void) {
+    UNITY_BEGIN();
+    RUN_TEST(test_empty_strand);
+    RUN_TEST(test_can_count_one_nucleotide_in_singlecharacter_input);
+    RUN_TEST(test_strand_with_repeated_nucleotide);
+    RUN_TEST(test_strand_with_multiple_nucleotides);
+    RUN_TEST(test_strand_with_invalid_nucleotides);
+    return UNITY_END();
+}