diff --git a/malduck/extractor/extractor.py b/malduck/extractor/extractor.py index c15be80..520f6a0 100644 --- a/malduck/extractor/extractor.py +++ b/malduck/extractor/extractor.py @@ -3,6 +3,8 @@ import logging from typing import List, cast +import yara + from ..procmem import ProcessMemory, ProcessMemoryELF, ProcessMemoryPE log = logging.getLogger(__name__) @@ -77,6 +79,7 @@ class Extractor: * :py:attr:`family` (see :py:attr:`extractor.Extractor.family`) * :py:attr:`yara_rules` + * :py:attr:`yara_source` * :py:attr:`overrides` (optional, see :py:attr:`extractor.Extractor.overrides`) Example extractor code for Citadel: @@ -115,6 +118,31 @@ def cit_login(self, p, addr, match): - `@Extractor.rule` methods - `@Extractor.final` methods + .. py:decoratormethod:: Extractor.yara + + Decorator for extractor classes to embed Yara rules and compute the `yara_rules` property. + + The above example can embed the rule as follow: + + .. code-block:: Python + + from malduck import Extractor + + @Extractor.yara(r\"\"\" + rule possible_citadel { + strings: + $briankerbs = ... + $cit_login = ... + conditions: + all of them + } + \"\"\") + class Citadel(Extractor): + family = "citadel" + overrides = ("zeus",) + + ... + .. py:decoratormethod:: Extractor.string Decorator for string-based extractor methods. @@ -327,6 +355,7 @@ def is_it_really_evil(self, p): """ yara_rules = () #: Names of Yara rules for which handle_match is called + yara_source = None family = None #: Extracted malware family, automatically added to "family" key for strong extraction methods overrides = [] #: Family match overrides another match e.g. citadel overrides zeus @@ -572,3 +601,19 @@ def weak(method): ) method.weak = True return method + + @staticmethod + def yara(source): + if not isinstance(source, str): + raise TypeError("Expected string argument") + + def modifier(extractor): + if not issubclass(extractor, Extractor): + raise TypeError("Expected Extractor argument") + extractor.yara_source = source + extractor.yara_rules = [ + rule.identifier for rule in yara.compile(source=source) + ] + return extractor + + return modifier diff --git a/malduck/extractor/extractor.pyi b/malduck/extractor/extractor.pyi index 3aff06e..464db8d 100644 --- a/malduck/extractor/extractor.pyi +++ b/malduck/extractor/extractor.pyi @@ -93,6 +93,7 @@ class FinalExtractorMethod(ExtractorMethod[T, U]): class Extractor: yara_rules: Tuple[str, ...] + yara_source: Optional[str] family: Optional[str] overrides: List[str] parent: ExtractionContext @@ -151,3 +152,5 @@ class Extractor: ) -> ExtractorMethod[T, ProcessMemoryELF]: ... @staticmethod def weak(method: ExtractorMethod[T, U]) -> ExtractorMethod[T, U]: ... + @staticmethod + def yara(source: str) -> Callable[[Extractor], Extractor]: ... diff --git a/malduck/extractor/modules.py b/malduck/extractor/modules.py index a602c22..5a53017 100644 --- a/malduck/extractor/modules.py +++ b/malduck/extractor/modules.py @@ -20,19 +20,25 @@ class ExtractorModules: :param modules_path: Path with module files (Extractor classes and Yara files, default '~/.malduck') :type modules_path: str + :param modules: List of already loaded extractor modules. + :type modules_path: list """ - def __init__(self, modules_path: Optional[str] = None) -> None: + def __init__( + self, + modules_path: Optional[str] = None, + modules: Optional[List[Type[Extractor]]] = None, + ) -> None: if modules_path is None: modules_path = os.path.join(os.path.expanduser("~"), ".malduck") - if not os.path.exists(modules_path): - os.makedirs(modules_path) - # Load Yara rules - self.rules: Yara = Yara.from_dir(modules_path) + # Preload modules loaded_modules = load_modules(modules_path, onerror=self.on_error) self.extractors: List[Type[Extractor]] = Extractor.__subclasses__() + if modules: + self.extractors.extend(modules) + loaded_extractors = [x.__module__ for x in self.extractors] for module in loaded_modules.values(): @@ -44,6 +50,16 @@ def __init__(self, modules_path: Optional[str] = None) -> None: ) self.override_paths = make_override_paths(self.extractors) + # Load Yara rules + self.rules: Yara = Yara.from_dir_and_sources( + path=modules_path, + sources={ + extractor.family: extractor.yara_source + for extractor in self.extractors + if extractor.yara_source and extractor.family + }, + ) + def on_error(self, exc: Exception, module_name: str) -> None: """ Handler for all exceptions raised during module load diff --git a/malduck/yara.py b/malduck/yara.py index 06986f3..510024a 100644 --- a/malduck/yara.py +++ b/malduck/yara.py @@ -108,13 +108,25 @@ class Yara: :type strings: dict or str or :class:`YaraString` :param condition: Yara rule condition (default: "any of them") :type condition: str + :param sources: Dictionary of {"namespace": "rule_source"}. See also :py:meth:`Yara.from_source`. + :type rule_paths: dict """ def __init__( - self, rule_paths=None, name="r", strings=None, condition="any of them" + self, + rule_paths=None, + name="r", + strings=None, + condition="any of them", + sources=None, ): - if rule_paths: - self.rules = yara.compile(filepaths=rule_paths) + if rule_paths or sources: + if not sources: + sources = {} + for namespace in rule_paths: + with open(rule_paths[namespace], "r") as source: + sources[namespace] = source.read() + self.rules = yara.compile(sources=sources) return if not strings: @@ -143,7 +155,7 @@ def __init__( self.rules = yara.compile(source=yara_source) @staticmethod - def from_dir(path, recursive=True, followlinks=True): + def from_dir_and_sources(path=None, recursive=True, followlinks=True, sources=None): """ Find rules (recursively) in specified path. Supported extensions: \\*.yar, \\*.yara @@ -153,24 +165,55 @@ def from_dir(path, recursive=True, followlinks=True): :type recursive: bool :param followlinks: Follow symbolic links (default: enabled) :type followlinks: bool + :param sources: Dictionary of {"namespace": "rule_source"} + :type sources: dict :rtype: :class:`Yara` """ rule_paths: Dict[str, str] = {} - for root, _, files in os.walk(path, followlinks=followlinks): - for fname in files: - if not fname.endswith(".yar") and not fname.endswith(".yara"): - continue - ruleset_name = os.path.splitext(os.path.basename(fname))[0] - ruleset_path = os.path.join(root, fname) - if ruleset_name in rule_paths: - log.warning( - f"Yara file name collision - {rule_paths[ruleset_name]} " - f"overridden by {ruleset_path}" - ) - rule_paths[ruleset_name] = ruleset_path - if not recursive: - break - return Yara(rule_paths=rule_paths) + if path: + for root, _, files in os.walk(path, followlinks=followlinks): + for fname in files: + if not fname.endswith(".yar") and not fname.endswith(".yara"): + continue + ruleset_name = os.path.splitext(os.path.basename(fname))[0] + ruleset_path = os.path.join(root, fname) + if ruleset_name in rule_paths: + log.warning( + f"Yara file name collision - {rule_paths[ruleset_name]} " + f"overridden by {ruleset_path}" + ) + rule_paths[ruleset_name] = ruleset_path + if not recursive: + break + return Yara(rule_paths=rule_paths, sources=sources) + + @staticmethod + def from_dir(path, recursive=True, followlinks=True): + """ + Find rules (recursively) in specified path. Supported extensions: \\*.yar, \\*.yara + + :param path: Root path for searching + :type path: str + :param recursive: Search recursively (default: enabled) + :type recursive: bool + :param followlinks: Follow symbolic links (default: enabled) + :type followlinks: bool + :rtype: :class:`Yara` + """ + return Yara.from_dir_and_sources( + path=path, recursive=recursive, followlinks=followlinks + ) + + @staticmethod + def from_sources(sources): + """ + Loads rules for the specified namespaces. + + :param sources: Dictionary of {"namespace": "rule_source"} + :type sources: dict + :rtype: :class:`Yara` + """ + return Yara.from_dir_and_sources(sources=sources) def match(self, offset_mapper=None, extended=False, **kwargs): """ diff --git a/malduck/yara.pyi b/malduck/yara.pyi index c3ea6d6..c968b42 100644 --- a/malduck/yara.pyi +++ b/malduck/yara.pyi @@ -51,11 +51,21 @@ class Yara: str, "YaraString", Dict[str, Union[str, "YaraString"]], None ] = None, condition: str = "any of them", + sources: Optional[Dict[str, str]] = None, ) -> None: ... @staticmethod + def from_dir_and_sources( + path: Optional[str] = None, + recursive: bool = True, + followlinks: bool = True, + sources: Optional[Dict[str, str]] = None, + ) -> "Yara": ... + @staticmethod def from_dir( path: str, recursive: bool = True, followlinks: bool = True ) -> "Yara": ... + @staticmethod + def from_sources(sources: Dict[str, str]) -> "Yara": ... # match(...) # match(offset_mapper, ...) # match(offset_mapper, extended=False, ...) diff --git a/setup.py b/setup.py index d4c4f68..34563bb 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ name="malduck", version="4.4.0", description="Malduck is your ducky companion in malware analysis journeys", - long_description=open("README.md").read(), + long_description=open("README.md", encoding="utf8").read(), long_description_content_type="text/markdown", author="CERT Polska", author_email="info@cert.pl", @@ -20,7 +20,7 @@ }, license="GPLv3", include_package_data=True, - install_requires=open("requirements.txt").read().splitlines(), + install_requires=open("requirements.txt", encoding="utf8").read().splitlines(), url="https://github.com/CERT-Polska/malduck", classifiers=[ "Programming Language :: Python :: 3", diff --git a/tests/files/embedded.txt b/tests/files/embedded.txt new file mode 100644 index 0000000..6a69737 --- /dev/null +++ b/tests/files/embedded.txt @@ -0,0 +1 @@ +Start with this and nothing else... \ No newline at end of file diff --git a/tests/files/modules/embedded/__init__.py b/tests/files/modules/embedded/__init__.py new file mode 100644 index 0000000..9486f4e --- /dev/null +++ b/tests/files/modules/embedded/__init__.py @@ -0,0 +1 @@ +from .embedded import Embedded diff --git a/tests/files/modules/embedded/embedded.py b/tests/files/modules/embedded/embedded.py new file mode 100644 index 0000000..47ba2bf --- /dev/null +++ b/tests/files/modules/embedded/embedded.py @@ -0,0 +1,18 @@ +from malduck.extractor import Extractor +from malduck import base64, procmempe + +@Extractor.yara(r""" +rule embedded_test +{ + strings: + $start = "Start with this and nothing else..." + condition: + all of them and $start at 0 +} +""") +class Embedded(Extractor): + family = "embedded" + + @Extractor.final + def embedded(self, p): + return {"embedded": True} diff --git a/tests/test_extractor.py b/tests/test_extractor.py index ba2f536..c8a8d74 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -80,3 +80,12 @@ def test_multirules(): 'matched': ['v2'], 'third': ['ThIrD string'] }] + + +def test_embedded(): + modules = ExtractorModules("tests/files/modules") + p = procmem.from_file("tests/files/embedded.txt") + assert p.extract(modules) == [{ + "embedded": True, + "family": "embedded", + }]