diff --git a/pyromaniac/compiler/code/errors.py b/pyromaniac/compiler/code/errors.py
new file mode 100644
index 0000000..d9846a1
--- /dev/null
+++ b/pyromaniac/compiler/code/errors.py
@@ -0,0 +1,5 @@
+from ..errors import CompilerError
+
+
+class CodeError(CompilerError):
+    pass
diff --git a/pyromaniac/compiler/code/segment/__init__.py b/pyromaniac/compiler/code/segment/__init__.py
new file mode 100644
index 0000000..45a423e
--- /dev/null
+++ b/pyromaniac/compiler/code/segment/__init__.py
@@ -0,0 +1,11 @@
+from .errors import (
+    SegmentError,
+    UnexpectedTokenError,
+    InvalidSignatureError,
+)
+from .segment import segment
+
+__all__ = [
+    segment,
+    SegmentError, UnexpectedTokenError, InvalidSignatureError,
+]
diff --git a/pyromaniac/compiler/code/segment/errors.py b/pyromaniac/compiler/code/segment/errors.py
new file mode 100644
index 0000000..3e0080a
--- /dev/null
+++ b/pyromaniac/compiler/code/segment/errors.py
@@ -0,0 +1,47 @@
+from typing import TYPE_CHECKING
+from ..errors import CodeError
+
+if TYPE_CHECKING:
+    from .token import Token
+
+
+class SegmentError(CodeError):
+    """Code segmenting error.
+
+    :param token: token at which error occured
+    """
+
+    def __init__(self, token: 'Token'):
+        self.token = token
+
+    @property
+    def line(self) -> int:
+        """Shortcut for token info start line."""
+        return self.token.info.start[0]
+
+
+class InvalidSignatureError(SegmentError):
+    """Invalid signature error.
+
+    :param token: token at wich error occured
+    """
+
+    def __str__(self) -> str:
+        print(self.token.string)
+        return f"unmatched delimiter in signature in line {self.line}"
+
+
+class UnexpectedTokenError(SegmentError):
+    """Unexpected token error.
+
+    :param token: token at wich error occured
+    :param location: string describing where the error occured
+    """
+
+    def __init__(self, token: 'Token', location: str):
+        super().__init__(token)
+        self.location = location
+
+    def __str__(self) -> str:
+        string = repr(self.token.string)
+        return f"unexpected {string} {self.location} in line {self.line}"
diff --git a/pyromaniac/compiler/code/segment/segment.py b/pyromaniac/compiler/code/segment/segment.py
new file mode 100644
index 0000000..e7658c8
--- /dev/null
+++ b/pyromaniac/compiler/code/segment/segment.py
@@ -0,0 +1,27 @@
+from .segmenter import Segmenter
+
+
+def segment(
+    code: str
+) -> tuple[str | None, str | None, str | None, str | None]:
+    """Segment the source code into doc string, signature, python, and yaml.
+
+    Returns either the segment string or None for each possible segment. Makes
+    sure that the line numbers stay unaltered for python and yaml code to
+    enable accurate error messages during parsing.
+
+    :param code: source code to create segments from
+    :returns: string or None for each segment respectively
+    """
+    doc, sig, python, yaml = Segmenter(code).segment()
+    return (
+        code[doc] if doc is not None else None,
+        code[sig] if sig is not None else None,
+        extract(code, python) if python is not None else None,
+        extract(code, yaml) if yaml is not None else None,
+    )
+
+
+# extract code segment without altering line numbers
+def extract(code: str, slc: slice):
+    return "\n" * code[:slc.start].count("\n") + code[slc]
diff --git a/pyromaniac/compiler/code/segment/segmenter.py b/pyromaniac/compiler/code/segment/segmenter.py
new file mode 100644
index 0000000..d4cb641
--- /dev/null
+++ b/pyromaniac/compiler/code/segment/segmenter.py
@@ -0,0 +1,116 @@
+import tokenize as t
+
+from .errors import UnexpectedTokenError, InvalidSignatureError
+from .token import Token
+from .stream import Stream
+
+# token types to ignore between meaningfull tokens
+TYPES = [t.NL, t.NEWLINE, t.COMMENT]
+
+
+class Segmenter:
+    """Source code segmenter.
+
+    :param code: source code to segment"""
+
+    def __init__(self, code: str):
+        self.tokens = Stream(code)
+        self.length = len(code)
+
+    def segment(
+        self
+    ) -> tuple[slice | None, slice | None, slice | None, slice | None]:
+        """Segment source code into doc string, signature, python and yaml.
+
+        Returns either a slice or None for each possible segment. If a slice,
+        it can be used to index the source code to get the according segment.
+        Raises errors when doc string or signature are followed by unexpected
+        tokens or the signature isn't finished but leaves all other error
+        detection to the python and yaml parsers.
+
+        :returns: tuple of optional slices representing source code positions
+        """
+        # initialize result slices
+        doc, sig, python, yaml = (None,) * 4
+
+        # consume encoding token
+        last = self.tokens.consume([t.ENCODING])
+        if last is None:
+            token = self.tokens.get(0)
+            raise UnexpectedTokenError(token, "at the beginning")
+
+        # get doc string if present
+        last = self.tokens.consume(TYPES) or last
+        if self.tokens.match(t.STRING):
+            doc, last = self.read_doc()
+            last = self.tokens.consume(TYPES)
+            if last is None:
+                token = self.tokens.get(0)
+                raise UnexpectedTokenError(token, "after the doc string")
+
+        # get signature if present
+        if self.tokens.match((t.OP, '(')):
+            sig, last = self.read_signature()
+            last = self.tokens.consume(TYPES)
+            if last is None:
+                token = self.tokens.get(0)
+                raise UnexpectedTokenError(token, "after the signature")
+
+        # get python code if present
+        if self.tokens.match(
+            (t.OP, '-'), (t.OP, '-'), (t.OP, '-'), t.NEWLINE,
+        ):
+            last = self.tokens.consume(3)
+            python, last, end = self.read_python()
+        else:
+            end = False
+
+        # get yaml code if present
+        if not end:
+            yaml = slice(last.stop, self.length)
+
+        # return slices
+        return doc, sig, python, yaml
+
+    # read doc string and return the slice and the last consumed token
+    def read_doc(self) -> tuple[slice, Token]:
+        last = self.tokens.consume(1)
+        return last.slice, last
+
+    # read the signature and return the slice and the last consumed token
+    def read_signature(self) -> tuple[slice, Token]:
+        balance = 0
+
+        # consume opening paranthesis
+        last = self.tokens.consume(1)
+        start = last.start
+        balance += 1
+
+        # consume until matching closing paranthesis
+        while balance > 0:
+            if self.tokens.match((t.OP, '(')):
+                balance += 1
+            elif self.tokens.match((t.OP, ')')):
+                balance -= 1
+            elif self.tokens.match(t.ERRORTOKEN):
+                raise InvalidSignatureError(self.tokens.get(0))
+            last = self.tokens.consume(1)
+
+        return slice(start, last.stop), last
+
+    # read the python code and return slice, the last token, and whether at end
+    def read_python(self) -> tuple[slice, Token, bool]:
+        last = self.tokens.consume(1)
+        start = self.tokens.get(0).start
+
+        while True:
+            if self.tokens.match(t.ENDMARKER):
+                return slice(start, last.stop), last, True
+            elif self.tokens.match(t.ERRORTOKEN):
+                return slice(start, self.length), last, True
+            elif last.type in (t.NL, t.NEWLINE) and self.tokens.match(
+                (t.OP, '-'), (t.OP, '-'), (t.OP, '-'), t.NEWLINE,
+            ):
+                return slice(start, last.stop), self.tokens.consume(4), False
+            else:
+                last = self.tokens.consume(1)
diff --git a/pyromaniac/compiler/code/segment/stream.py b/pyromaniac/compiler/code/segment/stream.py
new file mode 100644
index 0000000..a5c4318
--- /dev/null
+++ b/pyromaniac/compiler/code/segment/stream.py
@@ -0,0 +1,110 @@
+from collections.abc import Iterable
+from io import BytesIO
+import tokenize as t
+
+from .token import Token
+
+
+class Stream:
+    """Token stream with look ahead and matching.
+
+    Produces an endless stream of ERRORTOKENs after the ENDMARKER or when an
+    unclosed pair (paranthesis, quotes, etc.) is encountered.
+
+    :param code: source code to parse
+    """
+
+    def __init__(self, code: str):
+        self.buffer = []
+        self.stream = generate(code)
+
+    def match(self, *pattern: int | tuple[int, str]) -> bool:
+        """Check if leading tokens have specified types (and strings).
+
+        Returns True iff the next tokens in the stream match the specified
+        types (and string contents) in the specified order.
+
+        :param pattern: list of types and optionally strings to match against
+        :returns: whether the leading tokens match the pattern
+        """
+        for i, pat in enumerate(pattern):
+            if i > 0 and self.get(i - 1).type == t.ENDMARKER:
+                return False
+
+            # check type and string
+            match pat, self.get(i):
+                case int(type), tok if type != tok.type:
+                    return False
+                case (pt, ps), tok if (pt, ps) != (tok.type, tok.string):
+                    return False
+
+        # return True if no mismatch occured
+        return True
+
+    def consume(self, what: int | list[int]) -> Token | None:
+        """Remove specified tokens from the start of the stream.
+
+        If *what* is an integer it is interpretet as the amount of tokens to
+        remove. If it is a list of integers it is interpreted as a list of
+        token types which are removed until a token with a different type is at
+        the next one in the stream.
+
+        :param what: count or token types to remove
+        :returns: last removed token if any
+        """
+        last = None
+
+        if isinstance(what, int):
+            for i in range(what):
+                last = self.get(0, True)
+        else:
+            while self.get(0).type in what:
+                last = self.get(0, True)
+
+        return last
+
+    def get(self, i: int, pop: bool = False) -> Token:
+        """Get the token at position *i* and remove it if requested.
+
+        Makes sure, at least *i + 1* tokens are buffered and returns the
+        requested token, removing it if requested.
+
+        :param i: index of token to return
+        :param pop: whether to remove the token
+        :returns: the requested token
+        """
+        while len(self.buffer) <= i:
+            self.buffer.append(next(self.stream))
+        return self.buffer.pop(i) if pop else self.buffer[i]
+
+
+# generate tokens with position in source code
+def generate(code: str) -> Iterable[Token]:
+    line_start = 0
+    end = 0
+    try:
+        for info in t.tokenize(BytesIO(code.encode()).readline):
+            # get token start and end
+            start = line_start + info.start[1]
+            if info.start[0] == info.end[0]:
+                slc = slice(start, line_start + info.end[1])
+            else:
+                slc = slice(start, start + len(t.string))
+
+            # keep track of start of line
+            match info.type:
+                case t.NL | t.NEWLINE: line_start = slc.stop
+                case t.STRING: line_start = slc.stop - info.end[1]
+            end = slc.stop
+
+            # yield token and position
+            yield Token(info, slc)
+    except t.TokenError:
+        pass
+
+    # Keep yielding error token on further reading
+    message = 'invalid token'
+    info = t.TokenInfo(t.ERRORTOKEN, message, info.start, info.start, message)
+    token = Token(info, slice(end, end))
+    while True:
+        yield token
diff --git a/pyromaniac/compiler/code/segment/token.py b/pyromaniac/compiler/code/segment/token.py
new file mode 100644
index 0000000..5eb7830
--- /dev/null
+++ b/pyromaniac/compiler/code/segment/token.py
@@ -0,0 +1,33 @@
+from tokenize import TokenInfo
+
+
+class Token:
+    """Syntactic token with source code position.
+
+    :param info: TokenInfo object
+    :param slice: position of token in source code
+    """
+
+    def __init__(self, info: TokenInfo, slice: slice):
+        self.info = info
+        self.slice = slice
+
+    @property
+    def type(self) -> int:
+        """Shortcut for token info type."""
+        return self.info.type
+
+    @property
+    def string(self) -> str:
+        """Shortcut for token info string."""
+        return self.info.string
+
+    @property
+    def start(self) -> int:
+        """Shortcut for source code position start."""
+        return self.slice.start
+
+    @property
+    def stop(self) -> int:
+        """Shortcut for source code position stop."""
+        return self.slice.stop