Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for signing non XML data #238

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 22 additions & 11 deletions signxml/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,18 @@ def parser(self):
return self._default_parser
return self._parser

def _fromstring(self, xml_string, **kwargs):
xml_node = etree.fromstring(xml_string, parser=self.parser, **kwargs)
for entity in xml_node.iter(etree.Entity):
raise InvalidInput("Entities are not supported in XML input")
def _fromstring(self, data, **kwargs):
try:
xml_node = etree.fromstring(data, parser=self.parser, **kwargs)
for entity in xml_node.iter(etree.Entity):
raise InvalidInput("Entities are not supported in XML input")
except etree.XMLSyntaxError as e:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not safe. In general, we should not ignore XML syntax errors, should not try to parse arbitrary binary data as XML, and should not return from an except block.

Any configuration that references a binary data blob should be direct and explicit, not implicit.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get it. But I'm struggling to find a way to mark the data as a particular content type, so it would be treated properly in various places of the code.

So what would you think about adding a non_xml boolean parameter to sign method and propagating it through _unpack, get_root, etc. methods to _fromstring invocations? I don't see a clear way to attach that info to the data variable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I'll think about the best way to organize this more over the weekend and come back with a recommendation here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just thought about another way of solving this problem. What would you think about adding digest parameter? You would sign with either data or digest filled out. With digest set, code would skip all the canonicalization and digesting logic and just use the passed value in reference.

That way it would be feasible to sign even huge documents, with digests obtained externally.

logger.debug("Data to be signed is not well-formed XML: %s", data)
if isinstance(data, str):
# We have to convert string to bytes
data = data.encode("utf-8")
return data

return xml_node

def _tostring(self, xml_node, **kwargs):
Expand Down Expand Up @@ -117,13 +125,16 @@ def _c14n(self, nodes, algorithm: CanonicalizationMethod, inclusive_ns_prefixes=

c14n = b""
for node in nodes:
c14n += etree.tostring(
node,
method="c14n",
exclusive=exclusive,
with_comments=with_comments,
inclusive_ns_prefixes=inclusive_ns_prefixes,
)
if isinstance(node, bytes):
c14n += node
else:
c14n += etree.tostring(
node,
method="c14n",
exclusive=exclusive,
with_comments=with_comments,
inclusive_ns_prefixes=inclusive_ns_prefixes,
)
if exclusive is False and self.excise_empty_xmlns_declarations is True:
# Incorrect legacy behavior. See also:
# - https://github.com/XML-Security/signxml/issues/193
Expand Down
32 changes: 24 additions & 8 deletions signxml/signer.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,13 @@ def _get_c14n_inputs_from_references(self, doc_root, references: List[SignatureR

def _unpack(self, data, references: List[SignatureReference]):
sig_root = Element(ds_tag("Signature"), nsmap=self.namespaces)
if isinstance(data, str):
data = data.encode("utf-8")
data_is_not_xml = True
elif isinstance(data, bytes):
data_is_not_xml = True
else:
data_is_not_xml = False
if self.construction_method == SignatureConstructionMethod.enveloped:
if isinstance(data, (str, bytes)):
raise InvalidInput("When using enveloped signature, **data** must be an XML element")
Expand Down Expand Up @@ -357,11 +364,16 @@ def _unpack(self, data, references: List[SignatureReference]):
elif self.construction_method == SignatureConstructionMethod.detached:
doc_root = self.get_root(data)
if references is None:
if data_is_not_xml:
raise InvalidInput("When using detached signature and data is not XML, **reference** has to be provided")
uri = "#{}".format(data.get("Id", data.get("ID", "object")))
references = [SignatureReference(URI=uri)]
c14n_inputs = [self.get_root(data)]
try:
c14n_inputs, references = self._get_c14n_inputs_from_references(doc_root, references)
try:
if data_is_not_xml:
c14n_inputs = [doc_root]
else:
c14n_inputs, references = self._get_c14n_inputs_from_references(doc_root, references)
except InvalidInput: # Dummy reference URI
c14n_inputs = [self.get_root(data)]
elif self.construction_method == SignatureConstructionMethod.enveloping:
Expand Down Expand Up @@ -400,13 +412,17 @@ def _build_sig(self, sig_root, references, c14n_inputs, inclusive_ns_prefixes):
if reference.inclusive_ns_prefixes is None:
reference = replace(reference, inclusive_ns_prefixes=inclusive_ns_prefixes)
reference_node = SubElement(signed_info, ds_tag("Reference"), URI=reference.URI)
transforms = SubElement(reference_node, ds_tag("Transforms"))
self._build_transforms_for_reference(transforms_node=transforms, reference=reference)
SubElement(reference_node, ds_tag("DigestMethod"), Algorithm=self.digest_alg.value)
if not isinstance(c14n_inputs[i], bytes):
transforms = SubElement(reference_node, ds_tag("Transforms"))
self._build_transforms_for_reference(transforms_node=transforms, reference=reference)
SubElement(reference_node, ds_tag("DigestMethod"), Algorithm=self.digest_alg.value)
digest_value = SubElement(reference_node, ds_tag("DigestValue"))
payload_c14n = self._c14n(
c14n_inputs[i], algorithm=reference.c14n_method, inclusive_ns_prefixes=reference.inclusive_ns_prefixes
)
if not isinstance(c14n_inputs[i], bytes):
payload_c14n = self._c14n(
c14n_inputs[i], algorithm=reference.c14n_method, inclusive_ns_prefixes=reference.inclusive_ns_prefixes
)
else:
payload_c14n = c14n_inputs[i]
digest = self._get_digest(payload_c14n, algorithm=self.digest_alg)
digest_value.text = b64encode(digest).decode()
signature_value = SubElement(sig_root, ds_tag("SignatureValue"))
Expand Down