Skip to content

Commit

Permalink
Merge pull request #15386 from asgerf/js/graph-export
Browse files Browse the repository at this point in the history
JS: Add library for exporting graphs as type models
  • Loading branch information
asgerf authored Apr 18, 2024
2 parents 622f69e + 3c885f3 commit decd576
Show file tree
Hide file tree
Showing 27 changed files with 778 additions and 20 deletions.
37 changes: 25 additions & 12 deletions javascript/ql/lib/semmle/javascript/ApiGraphs.qll
Original file line number Diff line number Diff line change
Expand Up @@ -501,16 +501,25 @@ module API {
}

/**
* Gets the location of this API node, if it corresponds to a program element with a source location.
*/
final Location getLocation() { result = this.getInducingNode().getLocation() }

/**
* DEPRECATED: Use `getLocation().hasLocationInfo()` instead.
*
* Holds if this node is located in file `path` between line `startline`, column `startcol`,
* and line `endline`, column `endcol`.
*
* For nodes that do not have a meaningful location, `path` is the empty string and all other
* parameters are zero.
*/
predicate hasLocationInfo(string path, int startline, int startcol, int endline, int endcol) {
this.getInducingNode().hasLocationInfo(path, startline, startcol, endline, endcol)
deprecated predicate hasLocationInfo(
string path, int startline, int startcol, int endline, int endcol
) {
this.getLocation().hasLocationInfo(path, startline, startcol, endline, endcol)
or
not exists(this.getInducingNode()) and
not exists(this.getLocation()) and
path = "" and
startline = 0 and
startcol = 0 and
Expand Down Expand Up @@ -696,14 +705,7 @@ module API {
or
any(Type t).hasUnderlyingType(m, _)
} or
MkClassInstance(DataFlow::ClassNode cls) {
hasSemantics(cls) and
(
cls = trackDefNode(_)
or
cls.getAnInstanceReference() = trackDefNode(_)
)
} or
MkClassInstance(DataFlow::ClassNode cls) { needsDefNode(cls) } or
MkDef(DataFlow::Node nd) { rhs(_, _, nd) } or
MkUse(DataFlow::Node nd) { use(_, _, nd) } or
/** A use of a TypeScript type. */
Expand All @@ -716,6 +718,17 @@ module API {
trackUseNode(src, true, bound, "").flowsTo(nd.getCalleeNode())
}

private predicate needsDefNode(DataFlow::ClassNode cls) {
hasSemantics(cls) and
(
cls = trackDefNode(_)
or
cls.getAnInstanceReference() = trackDefNode(_)
or
needsDefNode(cls.getADirectSubClass())
)
}

class TDef = MkModuleDef or TNonModuleDef;

class TNonModuleDef = MkModuleExport or MkClassInstance or MkDef or MkSyntheticCallbackArg;
Expand Down Expand Up @@ -1306,7 +1319,7 @@ module API {
succ = MkDef(rhs)
or
exists(DataFlow::ClassNode cls |
cls.getAnInstanceReference() = rhs and
cls.getAnInstanceReference().flowsTo(rhs) and
succ = MkClassInstance(cls)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,11 @@ private predicate isPrivateAssignment(DataFlow::Node node) {
)
}

private predicate isPrivateLike(API::Node node) { isPrivateAssignment(node.asSink()) }
/**
* Holds if `node` is the sink node corresponding to the right-hand side of a private declaration,
* like a private field (`#field`) or class member with the `private` modifier.
*/
predicate isPrivateLike(API::Node node) { isPrivateAssignment(node.asSink()) }

bindingset[name]
private int getNameBadness(string name) {
Expand Down
104 changes: 104 additions & 0 deletions javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
private import javascript
private import internal.ApiGraphModels as Shared
private import internal.ApiGraphModelsSpecific as Specific
private import semmle.javascript.endpoints.EndpointNaming as EndpointNaming
import Shared::ModelInput as ModelInput
import Shared::ModelOutput as ModelOutput

Expand Down Expand Up @@ -55,3 +56,106 @@ private class TaintStepFromSummary extends TaintTracking::SharedTaintStep {
summaryStepNodes(pred, succ, "taint")
}
}

/**
* Specifies which parts of the API graph to export in `ModelExport`.
*/
signature module ModelExportSig {
/**
* Holds if the exported model should contain `node`, if it is publicly accessible.
*
* This ensures that all ways to access `node` will be exported in type models.
*/
predicate shouldContain(API::Node node);

/**
* Holds if `node` must be named if it is part of the exported graph.
*/
default predicate mustBeNamed(API::Node node) { none() }

/**
* Holds if the exported model should preserve all paths leading to an instance of `type`,
* including partial ones. It does not need to be closed transitively, `ModelExport` will
* extend this to include type models from which `type` can be derived.
*/
default predicate shouldContainType(string type) { none() }
}

/**
* Module for exporting type models for a given set of nodes in the API graph.
*/
module ModelExport<ModelExportSig S> {
private import codeql.mad.dynamic.GraphExport
private import internal.ApiGraphModelsExport

private module GraphExportConfig implements GraphExportSig<Location, API::Node> {
predicate edge = Specific::apiGraphHasEdge/3;

predicate shouldContain = S::shouldContain/1;

predicate shouldNotContain(API::Node node) {
EndpointNaming::isPrivateLike(node)
or
node instanceof API::Use
}

predicate mustBeNamed(API::Node node) {
node.getAValueReachingSink() instanceof DataFlow::ClassNode
or
node = API::Internal::getClassInstance(_)
or
S::mustBeNamed(node)
}

predicate exposedName(API::Node node, string type, string path) {
node = API::moduleExport(type) and path = ""
}

predicate suggestedName(API::Node node, string type) {
exists(string package, string name |
(
EndpointNaming::sinkHasPrimaryName(node, package, name) and
not EndpointNaming::aliasDefinition(_, _, _, _, node)
or
EndpointNaming::aliasDefinition(_, _, package, name, node)
) and
type = EndpointNaming::renderName(package, name)
)
}

bindingset[host]
predicate hasTypeSummary(API::Node host, string path) {
exists(string methodName |
functionReturnsReceiver(host.getMember(methodName).getAValueReachingSink()) and
path = "Member[" + methodName + "].ReturnValue"
)
}

pragma[nomagic]
private predicate functionReturnsReceiver(DataFlow::FunctionNode func) {
getAReceiverRef(func).flowsTo(func.getReturnNode())
}

pragma[nomagic]
private DataFlow::MethodCallNode getAReceiverCall(DataFlow::FunctionNode func) {
result = getAReceiverRef(func).getAMethodCall()
}

pragma[nomagic]
private predicate callReturnsReceiver(DataFlow::MethodCallNode call) {
functionReturnsReceiver(call.getACallee().flow())
}

pragma[nomagic]
private DataFlow::SourceNode getAReceiverRef(DataFlow::FunctionNode func) {
result = func.getReceiver()
or
result = getAReceiverCall(func) and
callReturnsReceiver(result)
}
}

private module ExportedGraph = TypeGraphExport<GraphExportConfig, S::shouldContainType/1>;

import ExportedGraph
}
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ private predicate summaryModel(
}

/** Holds if a type model exists for the given parameters. */
private predicate typeModel(string type1, string type2, string path) {
predicate typeModel(string type1, string type2, string path) {
any(DeprecationAdapter a).typeModel(type1, type2, path)
or
Extensions::typeModel(type1, type2, path)
Expand Down Expand Up @@ -500,7 +500,7 @@ private API::Node getNodeFromType(string type) {
* Gets the API node identified by the first `n` tokens of `path` in the given `(type, path)` tuple.
*/
pragma[nomagic]
private API::Node getNodeFromPath(string type, AccessPath path, int n) {
API::Node getNodeFromPath(string type, AccessPath path, int n) {
isRelevantFullPath(type, path) and
(
n = 0 and
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/**
* Contains an extension of `GraphExport` that relies on API graph specific functionality.
*/

private import ApiGraphModels as Shared
private import codeql.mad.dynamic.GraphExport
private import ApiGraphModelsSpecific as Specific

private module API = Specific::API;

private import Shared

/**
* Holds if some proper prefix of `(type, path)` evaluated to `node`, where `remainingPath`
* is bound to the suffix of `path` that was not evaluated yet.
*
* See concrete examples in `TypeGraphExport`.
*/
bindingset[type, path]
private predicate partiallyEvaluatedModel(
string type, AccessPath path, API::Node node, string remainingPath
) {
exists(int n |
getNodeFromPath(type, path, n) = node and
n > 0 and
// Note that `n < path.getNumToken()` is implied by the use of strictconcat()
remainingPath =
strictconcat(int k | k = [n .. path.getNumToken() - 1] | path.getToken(k), "." order by k)
)
}

/**
* Holds if `type` and all types leading to `type` should be re-exported.
*/
signature predicate shouldContainTypeSig(string type);

/**
* Wrapper around `GraphExport` that also exports information about re-exported types.
*
* ### JavaScript example 1
* For example, suppose `shouldContainType("foo")` holds, and the following is the entry point for a package `bar`:
* ```js
* // bar.js
* module.exports.xxx = require('foo');
* ```
* then this would generate the following type model:
* ```
* foo; bar; Member[xxx]
* ```
*
* ### JavaScript example 2
* For a more complex case, suppose the following type model exists:
* ```
* foo.XYZ; foo; Member[x].Member[y].Member[z]
* ```
* And the package exports something that matches a prefix of the access path above:
* ```js
* module.exports.blah = require('foo').x.y;
* ```
* This would result in the following type model:
* ```
* foo.XYZ; bar; Member[blah].Member[z]
* ```
* Notice that the access path `Member[blah].Member[z]` consists of an access path generated from the API
* graph, with pieces of the access path from the original type model appended to it.
*/
module TypeGraphExport<
GraphExportSig<Specific::Location, API::Node> S, shouldContainTypeSig/1 shouldContainType>
{
/** Like `shouldContainType` but includes types that lead to `type` via type models. */
private predicate shouldContainTypeEx(string type) {
shouldContainType(type)
or
exists(string prevType |
shouldContainType(prevType) and
Shared::typeModel(prevType, type, _)
)
}

private module Config implements GraphExportSig<Specific::Location, API::Node> {
import S

predicate shouldContain(API::Node node) {
S::shouldContain(node)
or
exists(string type1 | shouldContainTypeEx(type1) |
ModelOutput::getATypeNode(type1).getAValueReachableFromSource() = node.asSink()
or
exists(string type2, string path |
Shared::typeModel(type1, type2, path) and
getNodeFromPath(type2, path, _).getAValueReachableFromSource() = node.asSink()
)
)
}
}

private module ExportedGraph = GraphExport<Specific::Location, API::Node, Config>;

import ExportedGraph

/**
* Holds if `type1, type2, path` should be emitted as a type model, that is `(type2, path)` leads to an instance of `type1`.
*/
predicate typeModel(string type1, string type2, string path) {
ExportedGraph::typeModel(type1, type2, path)
or
shouldContainTypeEx(type1) and
exists(API::Node node |
// A relevant type is exported directly
Specific::sourceFlowsToSink(ModelOutput::getATypeNode(type1), node) and
ExportedGraph::pathToNode(type2, path, node)
or
// Something that leads to a relevant type, but didn't finish its access path, is exported
exists(string midType, string midPath, string remainingPath, string prefix, API::Node source |
Shared::typeModel(type1, midType, midPath) and
partiallyEvaluatedModel(midType, midPath, source, remainingPath) and
Specific::sourceFlowsToSink(source, node) and
ExportedGraph::pathToNode(type2, prefix, node) and
path = join(prefix, remainingPath)
)
)
}
}
Loading

0 comments on commit decd576

Please sign in to comment.