Skip to content

Commit

Permalink
Add support for embedded Yara rules
Browse files Browse the repository at this point in the history
  • Loading branch information
0xThiebaut committed Nov 29, 2023
1 parent 7a801b8 commit 081c7d5
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 26 deletions.
45 changes: 45 additions & 0 deletions malduck/extractor/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
from typing import List, cast

import yara

from ..procmem import ProcessMemory, ProcessMemoryELF, ProcessMemoryPE

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -77,6 +79,7 @@ class Extractor:
* :py:attr:`family` (see :py:attr:`extractor.Extractor.family`)
* :py:attr:`yara_rules`
* :py:attr:`yara_source`
* :py:attr:`overrides` (optional, see :py:attr:`extractor.Extractor.overrides`)
Example extractor code for Citadel:
Expand Down Expand Up @@ -115,6 +118,31 @@ def cit_login(self, p, addr, match):
- `@Extractor.rule` methods
- `@Extractor.final` methods
.. py:decoratormethod:: Extractor.yara
Decorator for extractor classes to embed Yara rules and compute the `yara_rules` property.
The above example can embed the rule as follow:
.. code-block:: Python
from malduck import Extractor
@Extractor.yara(r\"\"\"
rule possible_citadel {
strings:
$briankerbs = ...
$cit_login = ...
conditions:
all of them
}
\"\"\")
class Citadel(Extractor):
family = "citadel"
overrides = ("zeus",)
...
.. py:decoratormethod:: Extractor.string
Decorator for string-based extractor methods.
Expand Down Expand Up @@ -327,6 +355,7 @@ def is_it_really_evil(self, p):
"""

yara_rules = () #: Names of Yara rules for which handle_match is called
yara_source = None
family = None #: Extracted malware family, automatically added to "family" key for strong extraction methods
overrides = [] #: Family match overrides another match e.g. citadel overrides zeus

Expand Down Expand Up @@ -572,3 +601,19 @@ def weak(method):
)
method.weak = True
return method

@staticmethod
def yara(source):
if not isinstance(source, str):
raise TypeError("Expected string argument")

def modifier(extractor):
if not issubclass(extractor, Extractor):
raise TypeError("Expected Extractor argument")
extractor.yara_source = source
extractor.yara_rules = [
rule.identifier for rule in yara.compile(source=source)
]
return extractor

return modifier
3 changes: 3 additions & 0 deletions malduck/extractor/extractor.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class FinalExtractorMethod(ExtractorMethod[T, U]):

class Extractor:
yara_rules: Tuple[str, ...]
yara_source: Optional[str]
family: Optional[str]
overrides: List[str]
parent: ExtractionContext
Expand Down Expand Up @@ -151,3 +152,5 @@ class Extractor:
) -> ExtractorMethod[T, ProcessMemoryELF]: ...
@staticmethod
def weak(method: ExtractorMethod[T, U]) -> ExtractorMethod[T, U]: ...
@staticmethod
def yara(source: str) -> Callable[[Extractor], Extractor]: ...
26 changes: 21 additions & 5 deletions malduck/extractor/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,25 @@ class ExtractorModules:
:param modules_path: Path with module files (Extractor classes and Yara files, default '~/.malduck')
:type modules_path: str
:param modules: List of already loaded extractor modules.
:type modules_path: list
"""

def __init__(self, modules_path: Optional[str] = None) -> None:
def __init__(
self,
modules_path: Optional[str] = None,
modules: Optional[List[Type[Extractor]]] = None,
) -> None:
if modules_path is None:
modules_path = os.path.join(os.path.expanduser("~"), ".malduck")
if not os.path.exists(modules_path):
os.makedirs(modules_path)
# Load Yara rules
self.rules: Yara = Yara.from_dir(modules_path)

# Preload modules
loaded_modules = load_modules(modules_path, onerror=self.on_error)
self.extractors: List[Type[Extractor]] = Extractor.__subclasses__()

if modules:
self.extractors.extend(modules)

loaded_extractors = [x.__module__ for x in self.extractors]

for module in loaded_modules.values():
Expand All @@ -44,6 +50,16 @@ def __init__(self, modules_path: Optional[str] = None) -> None:
)
self.override_paths = make_override_paths(self.extractors)

# Load Yara rules
self.rules: Yara = Yara.from_dir_and_sources(
path=modules_path,
sources={
extractor.family: extractor.yara_source
for extractor in self.extractors
if extractor.yara_source and extractor.family
},
)

def on_error(self, exc: Exception, module_name: str) -> None:
"""
Handler for all exceptions raised during module load
Expand Down
81 changes: 62 additions & 19 deletions malduck/yara.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,25 @@ class Yara:
:type strings: dict or str or :class:`YaraString`
:param condition: Yara rule condition (default: "any of them")
:type condition: str
:param sources: Dictionary of {"namespace": "rule_source"}. See also :py:meth:`Yara.from_source`.
:type rule_paths: dict
"""

def __init__(
self, rule_paths=None, name="r", strings=None, condition="any of them"
self,
rule_paths=None,
name="r",
strings=None,
condition="any of them",
sources=None,
):
if rule_paths:
self.rules = yara.compile(filepaths=rule_paths)
if rule_paths or sources:
if not sources:
sources = {}
for namespace in rule_paths:
with open(rule_paths[namespace], "r") as source:
sources[namespace] = source.read()
self.rules = yara.compile(sources=sources)
return

if not strings:
Expand Down Expand Up @@ -143,7 +155,7 @@ def __init__(
self.rules = yara.compile(source=yara_source)

@staticmethod
def from_dir(path, recursive=True, followlinks=True):
def from_dir_and_sources(path=None, recursive=True, followlinks=True, sources=None):
"""
Find rules (recursively) in specified path. Supported extensions: \\*.yar, \\*.yara
Expand All @@ -153,24 +165,55 @@ def from_dir(path, recursive=True, followlinks=True):
:type recursive: bool
:param followlinks: Follow symbolic links (default: enabled)
:type followlinks: bool
:param sources: Dictionary of {"namespace": "rule_source"}
:type sources: dict
:rtype: :class:`Yara`
"""
rule_paths: Dict[str, str] = {}
for root, _, files in os.walk(path, followlinks=followlinks):
for fname in files:
if not fname.endswith(".yar") and not fname.endswith(".yara"):
continue
ruleset_name = os.path.splitext(os.path.basename(fname))[0]
ruleset_path = os.path.join(root, fname)
if ruleset_name in rule_paths:
log.warning(
f"Yara file name collision - {rule_paths[ruleset_name]} "
f"overridden by {ruleset_path}"
)
rule_paths[ruleset_name] = ruleset_path
if not recursive:
break
return Yara(rule_paths=rule_paths)
if path:
for root, _, files in os.walk(path, followlinks=followlinks):
for fname in files:
if not fname.endswith(".yar") and not fname.endswith(".yara"):
continue
ruleset_name = os.path.splitext(os.path.basename(fname))[0]
ruleset_path = os.path.join(root, fname)
if ruleset_name in rule_paths:
log.warning(
f"Yara file name collision - {rule_paths[ruleset_name]} "
f"overridden by {ruleset_path}"
)
rule_paths[ruleset_name] = ruleset_path
if not recursive:
break
return Yara(rule_paths=rule_paths, sources=sources)

@staticmethod
def from_dir(path, recursive=True, followlinks=True):
"""
Find rules (recursively) in specified path. Supported extensions: \\*.yar, \\*.yara
:param path: Root path for searching
:type path: str
:param recursive: Search recursively (default: enabled)
:type recursive: bool
:param followlinks: Follow symbolic links (default: enabled)
:type followlinks: bool
:rtype: :class:`Yara`
"""
return Yara.from_dir_and_sources(
path=path, recursive=recursive, followlinks=followlinks
)

@staticmethod
def from_sources(sources):
"""
Loads rules for the specified namespaces.
:param sources: Dictionary of {"namespace": "rule_source"}
:type sources: dict
:rtype: :class:`Yara`
"""
return Yara.from_dir_and_sources(sources=sources)

def match(self, offset_mapper=None, extended=False, **kwargs):
"""
Expand Down
10 changes: 10 additions & 0 deletions malduck/yara.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,21 @@ class Yara:
str, "YaraString", Dict[str, Union[str, "YaraString"]], None
] = None,
condition: str = "any of them",
sources: Optional[Dict[str, str]] = None,
) -> None: ...
@staticmethod
def from_dir_and_sources(
path: Optional[str] = None,
recursive: bool = True,
followlinks: bool = True,
sources: Optional[Dict[str, str]] = None,
) -> "Yara": ...
@staticmethod
def from_dir(
path: str, recursive: bool = True, followlinks: bool = True
) -> "Yara": ...
@staticmethod
def from_sources(sources: Dict[str, str]) -> "Yara": ...
# match(...)
# match(offset_mapper, ...)
# match(offset_mapper, extended=False, ...)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
name="malduck",
version="4.4.0",
description="Malduck is your ducky companion in malware analysis journeys",
long_description=open("README.md").read(),
long_description=open("README.md", encoding="utf8").read(),
long_description_content_type="text/markdown",
author="CERT Polska",
author_email="[email protected]",
Expand All @@ -20,7 +20,7 @@
},
license="GPLv3",
include_package_data=True,
install_requires=open("requirements.txt").read().splitlines(),
install_requires=open("requirements.txt", encoding="utf8").read().splitlines(),
url="https://github.com/CERT-Polska/malduck",
classifiers=[
"Programming Language :: Python :: 3",
Expand Down
1 change: 1 addition & 0 deletions tests/files/embedded.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Start with this and nothing else...
1 change: 1 addition & 0 deletions tests/files/modules/embedded/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .embedded import Embedded
18 changes: 18 additions & 0 deletions tests/files/modules/embedded/embedded.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from malduck.extractor import Extractor
from malduck import base64, procmempe

@Extractor.yara(r"""
rule embedded_test
{
strings:
$start = "Start with this and nothing else..."
condition:
all of them and $start at 0
}
""")
class Embedded(Extractor):
family = "embedded"

@Extractor.final
def embedded(self, p):
return {"embedded": True}
9 changes: 9 additions & 0 deletions tests/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,12 @@ def test_multirules():
'matched': ['v2'],
'third': ['ThIrD string']
}]


def test_embedded():
modules = ExtractorModules("tests/files/modules")
p = procmem.from_file("tests/files/embedded.txt")
assert p.extract(modules) == [{
"embedded": True,
"family": "embedded",
}]

0 comments on commit 081c7d5

Please sign in to comment.