Skip to content

Commit

Permalink
Add nucleotide-count exercise (#97)
Browse files Browse the repository at this point in the history
  • Loading branch information
keiravillekode authored Nov 1, 2024
1 parent 46b2bce commit 9a82afe
Show file tree
Hide file tree
Showing 12 changed files with 3,590 additions and 0 deletions.
8 changes: 8 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@
"prerequisites": [],
"difficulty": 3
},
{
"slug": "nucleotide-count",
"name": "Nucleotide Count",
"uuid": "3bc52375-890a-4133-b6e8-71abb9a4b64c",
"practices": [],
"prerequisites": [],
"difficulty": 3
},
{
"slug": "pangram",
"name": "Pangram",
Expand Down
23 changes: 23 additions & 0 deletions exercises/practice/nucleotide-count/.docs/instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Instructions

Each of us inherits from our biological parents a set of chemical instructions known as DNA that influence how our bodies are constructed.
All known life depends on DNA!

> Note: You do not need to understand anything about nucleotides or DNA to complete this exercise.
DNA is a long chain of other chemicals and the most important are the four nucleotides, adenine, cytosine, guanine and thymine.
A single DNA chain can contain billions of these four nucleotides and the order in which they occur is important!
We call the order of these nucleotides in a bit of DNA a "DNA sequence".

We represent a DNA sequence as an ordered collection of these four nucleotides and a common way to do that is with a string of characters such as "ATTACG" for a DNA sequence of 6 nucleotides.
'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' for thymine.

Given a string representing a DNA sequence, count how many of each nucleotide is present.
If the string contains characters that aren't A, C, G, or T then it is invalid and you should signal an error.

For example:

```text
"GATTACA" -> 'A': 3, 'C': 1, 'G': 1, 'T': 2
"INVALID" -> error
```
19 changes: 19 additions & 0 deletions exercises/practice/nucleotide-count/.meta/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"authors": [
"keiravillekode"
],
"files": {
"solution": [
"nucleotide_count.s"
],
"test": [
"nucleotide_count_test.c"
],
"example": [
".meta/example.s"
]
},
"blurb": "Given a DNA string, compute how many times each nucleotide occurs in the string.",
"source": "The Calculating DNA Nucleotides_problem at Rosalind",
"source_url": "https://rosalind.info/problems/dna/"
}
55 changes: 55 additions & 0 deletions exercises/practice/nucleotide-count/.meta/example.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
.text
.globl nucleotide_counts

/* extern void nucleotide_counts(int16_t *counts, const char *strand); */
nucleotide_counts:
mov w4, wzr
mov w5, wzr
mov w6, wzr
mov w7, wzr

.read:
ldrb w2, [x1], #1 /* load byte, post-increment */
cbz w2, .report

cmp w2, 'A'
beq .adenine

cmp w2, 'C'
beq .cytosine

cmp w2, 'G'
beq .guanine

cmp w2, 'T'
beq .thymine

mov w2, -1
strh w2, [x0], #2
strh w2, [x0], #2
strh w2, [x0], #2
strh w2, [x0]
ret

.report:
strh w4, [x0], #2
strh w5, [x0], #2
strh w6, [x0], #2
strh w7, [x0]
ret

.adenine:
add w4, w4, #1
b .read

.cytosine:
add w5, w5, #1
b .read

.guanine:
add w6, w6, #1
b .read

.thymine:
add w7, w7, #1
b .read
25 changes: 25 additions & 0 deletions exercises/practice/nucleotide-count/.meta/tests.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# This is an auto-generated file.
#
# Regenerating this file via `configlet sync` will:
# - Recreate every `description` key/value pair
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
# - Preserve any other key/value pair
#
# As user-added comments (using the # character) will be removed when this file
# is regenerated, comments can be added via a `comment` key.

[3e5c30a8-87e2-4845-a815-a49671ade970]
description = "empty strand"

[a0ea42a6-06d9-4ac6-828c-7ccaccf98fec]
description = "can count one nucleotide in single-character input"

[eca0d565-ed8c-43e7-9033-6cefbf5115b5]
description = "strand with repeated nucleotide"

[40a45eac-c83f-4740-901a-20b22d15a39f]
description = "strand with multiple nucleotides"

[b4c47851-ee9e-4b0a-be70-a86e343bd851]
description = "strand with invalid nucleotides"
36 changes: 36 additions & 0 deletions exercises/practice/nucleotide-count/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
AS = aarch64-linux-gnu-as
CC = aarch64-linux-gnu-gcc

CFLAGS = -g -Wall -Wextra -pedantic -Werror
LDFLAGS =

ALL_LDFLAGS = -pie -Wl,--fatal-warnings

ALL_CFLAGS = -std=c99 -fPIE $(CFLAGS)
ALL_LDFLAGS += $(LDFLAGS)

C_OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
AS_OBJS = $(patsubst %.s,%.o,$(wildcard *.s))
ALL_OBJS = $(filter-out example.o,$(C_OBJS) $(AS_OBJS) vendor/unity.o)

CC_CMD = $(CC) $(ALL_CFLAGS) -c -o $@ $<

all: tests
qemu-aarch64 -L /usr/aarch64-linux-gnu ./$<

tests: $(ALL_OBJS)
@$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) -o $@ $(ALL_OBJS)

%.o: %.s
@$(AS) -o $@ $<

%.o: %.c
@$(CC_CMD)

vendor/unity.o: vendor/unity.c vendor/unity.h vendor/unity_internals.h
@$(CC_CMD)

clean:
@rm -f *.o vendor/*.o tests

.PHONY: all clean
5 changes: 5 additions & 0 deletions exercises/practice/nucleotide-count/nucleotide_count.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.text
.globl nucleotide_counts

nucleotide_counts:
ret
79 changes: 79 additions & 0 deletions exercises/practice/nucleotide-count/nucleotide_count_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#include "vendor/unity.h"

#include <stdint.h>

#define INVALID -1

enum nucleotide {
ADENINE,
CYTOSINE,
GUANINE,
THYMINE
};

extern void nucleotide_counts(int16_t *counts, const char *strand);

void setUp(void) {
}

void tearDown(void) {
}

void test_empty_strand(void) {
int16_t counts[4];
nucleotide_counts(counts, "");
TEST_ASSERT_EQUAL_INT(0, counts[ADENINE]);
TEST_ASSERT_EQUAL_INT(0, counts[CYTOSINE]);
TEST_ASSERT_EQUAL_INT(0, counts[GUANINE]);
TEST_ASSERT_EQUAL_INT(0, counts[THYMINE]);
}

void test_can_count_one_nucleotide_in_singlecharacter_input(void) {
TEST_IGNORE();
int16_t counts[4];
nucleotide_counts(counts, "G");
TEST_ASSERT_EQUAL_INT(0, counts[ADENINE]);
TEST_ASSERT_EQUAL_INT(0, counts[CYTOSINE]);
TEST_ASSERT_EQUAL_INT(1, counts[GUANINE]);
TEST_ASSERT_EQUAL_INT(0, counts[THYMINE]);
}

void test_strand_with_repeated_nucleotide(void) {
TEST_IGNORE();
int16_t counts[4];
nucleotide_counts(counts, "GGGGGGG");
TEST_ASSERT_EQUAL_INT(0, counts[ADENINE]);
TEST_ASSERT_EQUAL_INT(0, counts[CYTOSINE]);
TEST_ASSERT_EQUAL_INT(7, counts[GUANINE]);
TEST_ASSERT_EQUAL_INT(0, counts[THYMINE]);
}

void test_strand_with_multiple_nucleotides(void) {
TEST_IGNORE();
int16_t counts[4];
nucleotide_counts(counts, "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC");
TEST_ASSERT_EQUAL_INT(20, counts[ADENINE]);
TEST_ASSERT_EQUAL_INT(12, counts[CYTOSINE]);
TEST_ASSERT_EQUAL_INT(17, counts[GUANINE]);
TEST_ASSERT_EQUAL_INT(21, counts[THYMINE]);
}

void test_strand_with_invalid_nucleotides(void) {
TEST_IGNORE();
int16_t counts[4];
nucleotide_counts(counts, "AGXXACT");
TEST_ASSERT_EQUAL_INT(INVALID, counts[ADENINE]);
TEST_ASSERT_EQUAL_INT(INVALID, counts[CYTOSINE]);
TEST_ASSERT_EQUAL_INT(INVALID, counts[GUANINE]);
TEST_ASSERT_EQUAL_INT(INVALID, counts[THYMINE]);
}

int main(void) {
UNITY_BEGIN();
RUN_TEST(test_empty_strand);
RUN_TEST(test_can_count_one_nucleotide_in_singlecharacter_input);
RUN_TEST(test_strand_with_repeated_nucleotide);
RUN_TEST(test_strand_with_multiple_nucleotides);
RUN_TEST(test_strand_with_invalid_nucleotides);
return UNITY_END();
}
Loading

0 comments on commit 9a82afe

Please sign in to comment.