Skip to content

Commit

Permalink
Merge pull request #17330 from michaelnebel/java/modelgenfieldbased
Browse files Browse the repository at this point in the history
Java/C#: Field based model generator (Experimental).
  • Loading branch information
michaelnebel authored Sep 6, 2024
2 parents b73b868 + e1048cf commit a5b4622
Show file tree
Hide file tree
Showing 27 changed files with 575 additions and 33 deletions.
113 changes: 104 additions & 9 deletions csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,35 @@ private class ReturnNodeExt extends DataFlow::Node {
kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind()
}

string getOutput() {
kind instanceof DataFlowImplCommon::ValueReturnKind and
/**
* Gets the kind of the return node.
*/
DataFlowImplCommon::ReturnKindExt getKind() { result = kind }
}

bindingset[c]
private signature string printCallableParamSig(Callable c, ParameterPosition p);

private module PrintReturnNodeExt<printCallableParamSig/2 printCallableParam> {
string getOutput(ReturnNodeExt node) {
node.getKind() instanceof DataFlowImplCommon::ValueReturnKind and
result = "ReturnValue"
or
exists(ParameterPosition pos |
pos = kind.(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
result = paramReturnNodeAsOutput(returnNodeEnclosingCallable(this), pos)
pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
result = printCallableParam(returnNodeEnclosingCallable(node), pos)
)
}
}

string getOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
}

string getContentOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsContentOutput/2>::getOutput(node)
}

class DataFlowSummaryTargetApi extends SummaryTargetApi {
DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) }
}
Expand Down Expand Up @@ -71,7 +89,8 @@ private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2
* Holds if content `c` is either a field, a synthetic field or language specific
* content of a relevant type or a container like content.
*/
private predicate isRelevantContent(DataFlow::ContentSet c) {
pragma[nomagic]
private predicate isRelevantContent0(DataFlow::ContentSet c) {
isRelevantTypeInContent(c) or
containerContent(c)
}
Expand All @@ -85,6 +104,16 @@ string parameterNodeAsInput(DataFlow::ParameterNode p) {
result = qualifierString() and p instanceof InstanceParameterNode
}

/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterNodeAsContentInput(DataFlow::ParameterNode p) {
result = parameterContentAccess(p.asParameter())
or
result = qualifierString() and p instanceof InstanceParameterNode
}

/**
* Gets the MaD input string representation of `source`.
*/
Expand Down Expand Up @@ -170,7 +199,7 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
) {
exists(DataFlow::ContentSet c |
DataFlowImplCommon::store(node1, c.getAStoreContent(), node2, _, _) and
isRelevantContent(c) and
isRelevantContent0(c) and
(
state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1
or
Expand All @@ -180,7 +209,7 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
or
exists(DataFlow::ContentSet c |
DataFlowPrivate::readStep(node1, c, node2) and
isRelevantContent(c) and
isRelevantContent0(c) and
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
)
}
Expand All @@ -196,14 +225,17 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {

module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;

/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow0(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
) {
exists(string input, string output |
p.getEnclosingCallable() = api and
returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and
input = parameterNodeAsInput(p) and
output = returnNodeExt.getOutput() and
output = getOutput(returnNodeExt) and
input != output and
result = Printing::asTaintModel(api, input, output)
)
Expand All @@ -219,6 +251,69 @@ string captureThroughFlow(DataFlowSummaryTargetApi api) {
)
}

private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source instanceof DataFlow::ParameterNode and
source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}

predicate isSink(DataFlow::Node sink) {
sink instanceof ReturnNodeExt and
sink.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}

predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2;

predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
}

int accessPathLimit() { result = 2 }

predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) }

DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}

private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;

private string getContent(PropagateContentFlow::AccessPath ap, int i) {
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
i = 0 and
result = "." + printContent(head)
or
i > 0 and result = getContent(tail, i - 1)
)
}

private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
result = concat(int i | | getContent(ap, i), "" order by i)
}

private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
result = concat(int i | | getContent(ap, i), "" order by i desc)
}

string captureContentFlow(DataFlowSummaryTargetApi api) {
exists(
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output,
PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores,
boolean preservesValue
|
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
input != output and
result = Printing::asModel(api, input, output, preservesValue)
)
}

/**
* A dataflow configuration used for finding new sources.
* The sources are the already known existing sources and the sinks are the API return nodes.
Expand Down Expand Up @@ -261,7 +356,7 @@ string captureSource(DataFlowSourceTargetApi api) {
ExternalFlow::sourceNode(source, kind) and
api = sink.getEnclosingCallable() and
not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and
result = Printing::asSourceModel(api, sink.getOutput(), kind)
result = Printing::asSourceModel(api, getOutput(sink), kind)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
private import csharp as CS
private import semmle.code.csharp.commons.Util as Util
private import semmle.code.csharp.commons.Collections as Collections
private import semmle.code.csharp.commons.QualifiedName as QualifiedName
private import semmle.code.csharp.dataflow.internal.DataFlowDispatch
private import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.csharp.frameworks.system.linq.Expressions
private import semmle.code.csharp.frameworks.System
private import semmle.code.csharp.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
import semmle.code.csharp.dataflow.internal.ExternalFlow as ExternalFlow
import semmle.code.csharp.dataflow.internal.ContentDataFlow as ContentDataFlow
import semmle.code.csharp.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
import semmle.code.csharp.dataflow.internal.DataFlowPrivate as DataFlowPrivate
import semmle.code.csharp.dataflow.internal.DataFlowDispatch as DataFlowDispatch
Expand All @@ -22,6 +25,8 @@ class Type = CS::Type;

class Callable = CS::Callable;

class ContentSet = DataFlow::ContentSet;

/**
* Holds if any of the parameters of `api` are `System.Func<>`.
*/
Expand Down Expand Up @@ -241,20 +246,40 @@ string parameterAccess(CS::Parameter p) {
else result = "Argument[" + p.getPosition() + "]"
}

/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterContentAccess(CS::Parameter p) { result = "Argument[" + p.getPosition() + "]" }

class InstanceParameterNode = DataFlowPrivate::InstanceParameterNode;

class ParameterPosition = DataFlowDispatch::ParameterPosition;

private signature string parameterAccessSig(Parameter p);

module ParamReturnNodeAsOutput<parameterAccessSig/1 getParamAccess> {
bindingset[c]
string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) {
result = getParamAccess(c.getParameter(pos.getPosition()))
or
pos.isThisParameter() and
result = qualifierString()
}
}

/**
* Gets the MaD string representation of return through parameter at position
* `pos` of callable `c`.
*/
bindingset[c]
string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) {
result = parameterAccess(c.getParameter(pos.getPosition()))
or
pos.isThisParameter() and
result = qualifierString()
result = ParamReturnNodeAsOutput<parameterAccess/1>::paramReturnNodeAsOutput(c, pos)
}

bindingset[c]
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
result = ParamReturnNodeAsOutput<parameterContentAccess/1>::paramReturnNodeAsOutput(c, pos)
}

/**
Expand Down Expand Up @@ -344,3 +369,44 @@ predicate isRelevantSourceKind(string kind) { any() }
* Holds if the the content `c` is a container.
*/
predicate containerContent(DataFlow::ContentSet c) { c.isElement() }

/**
* Holds if there is a taint step from `node1` to `node2` in content flow.
*/
predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
TaintTrackingPrivate::defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and
not nodeTo.asExpr() instanceof CS::ElementAccess and
not exists(DataFlow::ContentSet c |
DataFlowPrivate::readStep(nodeFrom, c, nodeTo) and containerContent(c)
)
}

bindingset[d]
private string getFullyQualifiedName(Declaration d) {
exists(string qualifier, string name |
d.hasFullyQualifiedName(qualifier, name) and
result = QualifiedName::getQualifiedName(qualifier, name)
)
}

/**
* Gets the MaD string representation of the contentset `c`.
*/
string printContent(DataFlow::ContentSet c) {
exists(CS::Field f, string name | name = getFullyQualifiedName(f) |
c.isField(f) and
if f.isEffectivelyPublic()
then result = "Field[" + name + "]"
else result = "SyntheticField[" + name + "]"
)
or
exists(CS::Property p, string name | name = getFullyQualifiedName(p) |
c.isProperty(p) and
if p.isEffectivelyPublic()
then result = "Property[" + name + "]"
else result = "SyntheticField[" + name + "]"
)
or
c.isElement() and
result = "Element"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
unexpectedModel
expectedModel
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
extensions:
- addsTo:
pack: codeql/csharp-all
extensible: summaryModel
data:
- [ "Models", "ManuallyModelled", False, "HasSummary", "(System.Object)", "", "Argument[0]", "ReturnValue", "value", "manual"]

- addsTo:
pack: codeql/csharp-all
extensible: neutralModel
data:
- [ "Models", "ManuallyModelled", "HasNeutralSummary", "(System.Object)", "summary", "manual"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import csharp
import utils.modelgenerator.internal.CaptureModels
import TestUtilities.InlineMadTest

module InlineMadTestConfig implements InlineMadTestConfigSig {
string getCapturedModel(Callable c) { result = captureContentFlow(c) }

string getKind() { result = "contentbased-summary" }
}

import InlineMadTest<InlineMadTestConfig>
Loading

0 comments on commit a5b4622

Please sign in to comment.