Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ruby: Track types in data flow #18315

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions ruby/ql/consistency-queries/DataFlowConsistency.ql
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ private module Input implements InputSig<Location, RubyDataFlow> {
n.getASplit() instanceof Split::ConditionalCompletionSplit
)
}

predicate uniqueTypeExclude(Node n) {
n =
any(DataFlow::CallNode call |
Private::isStandardNewCall(call.getExprNode(), _, _) and
not call.getReceiver().asExpr().getExpr() instanceof ConstantReadAccess
)
}
}

import MakeConsistency<Location, RubyDataFlow, RubyTaintTracking, Input>
4 changes: 4 additions & 0 deletions ruby/ql/lib/change-notes/2024-12-20-data-flow-types.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Types are now being tracked in data flow, but only when the type of an object is obvious from the context. For example, `C.new` has guaranteed type `C`, while in `def add(x, y) { x + y }` we cannot assign a type to `x + y` (it could, for instance, be both `String` and `Integer`). Tracking types allows us to remove false-positive results when type incompatibility can be established.
173 changes: 11 additions & 162 deletions ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class NormalCall extends DataFlowCall, TNormalCall {
* need to track the `View` instance (2) into the receiver of the adjusted method
* call, in order to figure out that the call target is in fact `view.html.erb`.
*/
private module ViewComponentRenderModeling {
module ViewComponentRenderModeling {
private import codeql.ruby.frameworks.ViewComponent

private class RenderMethod extends SummarizedCallable, LibraryCallableToIncludeInTypeTracking {
Expand Down Expand Up @@ -333,7 +333,7 @@ private predicate selfInModule(SelfVariable self, Module m) {

/** Holds if `self` belongs to method `method` inside module `m`. */
pragma[nomagic]
private predicate selfInMethod(SelfVariable self, MethodBase method, Module m) {
predicate selfInMethod(SelfVariable self, MethodBase method, Module m) {
exists(ModuleBase encl |
method = self.getDeclaringScope() and
encl = method.getEnclosingModule() and
Expand All @@ -343,67 +343,6 @@ private predicate selfInMethod(SelfVariable self, MethodBase method, Module m) {
)
}

/** Holds if `self` belongs to the top-level. */
pragma[nomagic]
private predicate selfInToplevel(SelfVariable self, Module m) {
ViewComponentRenderModeling::selfInErbToplevel(self, m)
or
not ViewComponentRenderModeling::selfInErbToplevel(self, _) and
self.getDeclaringScope() instanceof Toplevel and
m = Module::TResolved("Object")
}

/**
* Holds if SSA definition `def` belongs to a variable introduced via pattern
* matching on type `m`. For example, in
*
* ```rb
* case object
* in C => c then c.foo
* end
* ```
*
* the SSA definition for `c` is introduced by matching on `C`.
*/
private predicate asModulePattern(SsaDefinitionExtNode def, Module m) {
exists(AsPattern ap |
m = Module::resolveConstantReadAccess(ap.getPattern()) and
def.getDefinitionExt().(Ssa::WriteDefinition).getWriteAccess().getAstNode() =
ap.getVariableAccess()
)
}

/**
* Holds if `read1` and `read2` are adjacent reads of SSA definition `def`,
* and `read2` is checked to have type `m`. For example, in
*
* ```rb
* case object
* when C then object.foo
* end
* ```
*
* the two reads of `object` are adjacent, and the second is checked to have type `C`.
*/
private predicate hasAdjacentTypeCheckedReads(
Ssa::Definition def, CfgNodes::ExprCfgNode read1, CfgNodes::ExprCfgNode read2, Module m
) {
exists(
CfgNodes::ExprCfgNode pattern, ConditionBlock cb, CfgNodes::ExprNodes::CaseExprCfgNode case
|
m = Module::resolveConstantReadAccess(pattern.getExpr()) and
cb.getLastNode() = pattern and
cb.controls(read2.getBasicBlock(),
any(SuccessorTypes::MatchingSuccessor match | match.getValue() = true)) and
def.hasAdjacentReads(read1, read2) and
case.getValue() = read1
|
pattern = case.getBranch(_).(CfgNodes::ExprNodes::WhenClauseCfgNode).getPattern(_)
or
pattern = case.getBranch(_).(CfgNodes::ExprNodes::InClauseCfgNode).getPattern()
)
}

/** Holds if `new` is a user-defined `self.new` method. */
predicate isUserDefinedNew(SingletonMethod new) {
exists(Expr object | singletonMethod(new, "new", object) |
Expand Down Expand Up @@ -638,7 +577,7 @@ private predicate hasUserDefinedNew(Module m) {
* `self.new` on `m`.
*/
pragma[nomagic]
private predicate isStandardNewCall(RelevantCall new, Module m, boolean exact) {
predicate isStandardNewCall(RelevantCall new, Module m, boolean exact) {
exists(DataFlow::LocalSourceNode sourceNode |
flowsToMethodCallReceiver(TNormalCall(new), sourceNode, "new") and
// `m` should not have a user-defined `self.new` method
Expand Down Expand Up @@ -667,106 +606,11 @@ private predicate localFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo,
}

private module TrackInstanceInput implements CallGraphConstruction::InputSig {
pragma[nomagic]
private predicate isInstanceNoCall(DataFlow::Node n, Module tp, boolean exact) {
n.asExpr().getExpr() instanceof NilLiteral and
tp = Module::TResolved("NilClass") and
exact = true
or
n.asExpr().getExpr().(BooleanLiteral).isFalse() and
tp = Module::TResolved("FalseClass") and
exact = true
or
n.asExpr().getExpr().(BooleanLiteral).isTrue() and
tp = Module::TResolved("TrueClass") and
exact = true
or
n.asExpr().getExpr() instanceof IntegerLiteral and
tp = Module::TResolved("Integer") and
exact = true
or
n.asExpr().getExpr() instanceof FloatLiteral and
tp = Module::TResolved("Float") and
exact = true
or
n.asExpr().getExpr() instanceof RationalLiteral and
tp = Module::TResolved("Rational") and
exact = true
or
n.asExpr().getExpr() instanceof ComplexLiteral and
tp = Module::TResolved("Complex") and
exact = true
or
n.asExpr().getExpr() instanceof StringlikeLiteral and
tp = Module::TResolved("String") and
exact = true
or
n.asExpr() instanceof CfgNodes::ExprNodes::ArrayLiteralCfgNode and
tp = Module::TResolved("Array") and
exact = true
or
n.asExpr() instanceof CfgNodes::ExprNodes::HashLiteralCfgNode and
tp = Module::TResolved("Hash") and
exact = true
or
n.asExpr().getExpr() instanceof MethodBase and
tp = Module::TResolved("Symbol") and
exact = true
or
n.asParameter() instanceof BlockParameter and
tp = Module::TResolved("Proc") and
exact = true
or
n.asExpr().getExpr() instanceof Lambda and
tp = Module::TResolved("Proc") and
exact = true
or
// `self` reference in method or top-level (but not in module or singleton method,
// where instance methods cannot be called; only singleton methods)
n =
any(SelfLocalSourceNode self |
exists(MethodBase m |
selfInMethod(self.getVariable(), m, tp) and
not m instanceof SingletonMethod and
if m.getEnclosingModule() instanceof Toplevel then exact = true else exact = false
)
or
selfInToplevel(self.getVariable(), tp) and
exact = true
)
or
// `in C => c then c.foo`
asModulePattern(n, tp) and
exact = false
or
// `case object when C then object.foo`
hasAdjacentTypeCheckedReads(_, _, n.asExpr(), tp) and
exact = false
}

pragma[nomagic]
private predicate isInstanceCall(DataFlow::Node n, Module tp, boolean exact) {
isStandardNewCall(n.asExpr(), tp, exact)
}

/** Holds if `n` is an instance of type `tp`. */
pragma[inline]
private predicate isInstance(DataFlow::Node n, Module tp, boolean exact) {
isInstanceNoCall(n, tp, exact)
or
isInstanceCall(n, tp, exact)
}

pragma[nomagic]
private predicate hasAdjacentTypeCheckedReads(DataFlow::Node node) {
hasAdjacentTypeCheckedReads(_, _, node.asExpr(), _)
}

newtype State = additional MkState(Module m, Boolean exact)

predicate start(DataFlow::Node start, State state) {
exists(Module tp, boolean exact | state = MkState(tp, exact) |
isInstance(start, tp, exact)
TypeInference::hasType(start, tp, exact)
or
exists(Module m |
(if m.isClass() then tp = Module::TResolved("Class") else tp = Module::TResolved("Module")) and
Expand All @@ -784,15 +628,20 @@ private module TrackInstanceInput implements CallGraphConstruction::InputSig {
)
}

pragma[nomagic]
private predicate hasAdjacentTypeCheckedRead(DataFlow::Node node) {
TypeInference::hasAdjacentTypeCheckedRead(node.asExpr(), _)
}

pragma[nomagic]
predicate stepNoCall(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, StepSummary summary) {
smallStepNoCall(nodeFrom, nodeTo, summary)
or
// We exclude steps into type checked variables. For those, we instead rely on the
// type being checked against
localFlowStep(nodeFrom, nodeTo, summary) and
not hasAdjacentTypeCheckedReads(nodeTo) and
not asModulePattern(nodeTo, _)
not hasAdjacentTypeCheckedRead(nodeTo) and
not TypeInference::asModulePattern(nodeTo.(SsaDefinitionExtNode).getDefinitionExt(), _)
}

predicate stepCall(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, StepSummary summary) {
Expand Down
Loading
Loading