Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JS: Add library for exporting graphs as type models #15386

Merged
merged 27 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
acef9b7
Dynamic/JS: Add library for exporting models
asgerf Apr 4, 2024
c55e03c
Dynamic/JS: Add support for re-exporting type models
asgerf Apr 4, 2024
348c95e
JS: Add a test case with fluent flow
asgerf Apr 5, 2024
946f0b4
JS: Add test for class with aliases
asgerf Apr 5, 2024
f4e05cc
JS: Add tests with semi-internal class problem
asgerf Apr 5, 2024
ab3c03d
JS: Add test where root export object is a function
asgerf Apr 5, 2024
3022c59
JS: Add access path alias test
asgerf Apr 5, 2024
ef7767b
JS: Add partial test for subclassing
asgerf Apr 5, 2024
9313564
JS: Add subclassing test and fix lack of subclassing handling
asgerf Apr 5, 2024
f2ea88a
JS: Add test showing missing re-export of base class relationship
asgerf Apr 5, 2024
56ebe6c
JS: More re-export logic to handle subclass export
asgerf Apr 5, 2024
29a6145
JS: Add test case showing problem with chains going through internal …
asgerf Apr 5, 2024
81b96a8
JS: Ensure MkClassInstance exists for base classes
asgerf Apr 5, 2024
8cb80d6
JS: Switch from hasLocationInfo to Location
asgerf Apr 8, 2024
8210143
Dynamic: Add hasPrettyName()
asgerf Apr 8, 2024
f5355cf
Dynamic: Sync ApiGraphModels.qll
asgerf Apr 9, 2024
15eabb4
JS: Address review comments
asgerf Apr 12, 2024
330229c
Update javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsDa…
asgerf Apr 12, 2024
3949ae4
Update shared/mad/codeql/mad/dynamic/GraphExport.qll
asgerf Apr 12, 2024
844b29b
Update shared/mad/codeql/mad/dynamic/GraphExport.qll
asgerf Apr 16, 2024
ee5cb6f
Update shared/mad/codeql/mad/dynamic/GraphExport.qll
asgerf Apr 16, 2024
be64daf
Merge branch 'main' into js/graph-export
asgerf Apr 16, 2024
c0db40d
Merge branch 'js/graph-export' of github.com:asgerf/codeql into js/gr…
asgerf Apr 16, 2024
3335d48
Sync files
asgerf Apr 16, 2024
93a9c62
Merge branch 'main' into js/graph-export
asgerf Apr 17, 2024
5e7026c
JS: Use AccessPath as parameter type
asgerf Apr 17, 2024
3c885f3
JS: Fix bug in MkClassInstance use-nodes
asgerf Apr 18, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions javascript/ql/lib/semmle/javascript/ApiGraphs.qll
Original file line number Diff line number Diff line change
Expand Up @@ -501,16 +501,25 @@ module API {
}

/**
* Gets the location of this API node, if it corresponds to a program element with a source location.
*/
final Location getLocation() { result = this.getInducingNode().getLocation() }

/**
* DEPRECATED: Use `getLocation().hasLocationInfo()` instead.
*
* Holds if this node is located in file `path` between line `startline`, column `startcol`,
* and line `endline`, column `endcol`.
*
* For nodes that do not have a meaningful location, `path` is the empty string and all other
* parameters are zero.
*/
predicate hasLocationInfo(string path, int startline, int startcol, int endline, int endcol) {
this.getInducingNode().hasLocationInfo(path, startline, startcol, endline, endcol)
deprecated predicate hasLocationInfo(
string path, int startline, int startcol, int endline, int endcol
) {
this.getLocation().hasLocationInfo(path, startline, startcol, endline, endcol)
or
not exists(this.getInducingNode()) and
not exists(this.getLocation()) and
path = "" and
startline = 0 and
startcol = 0 and
Expand Down Expand Up @@ -696,14 +705,7 @@ module API {
or
any(Type t).hasUnderlyingType(m, _)
} or
MkClassInstance(DataFlow::ClassNode cls) {
hasSemantics(cls) and
(
cls = trackDefNode(_)
or
cls.getAnInstanceReference() = trackDefNode(_)
)
} or
MkClassInstance(DataFlow::ClassNode cls) { needsDefNode(cls) } or
MkDef(DataFlow::Node nd) { rhs(_, _, nd) } or
MkUse(DataFlow::Node nd) { use(_, _, nd) } or
/** A use of a TypeScript type. */
Expand All @@ -716,6 +718,17 @@ module API {
trackUseNode(src, true, bound, "").flowsTo(nd.getCalleeNode())
}

private predicate needsDefNode(DataFlow::ClassNode cls) {
hasSemantics(cls) and
(
cls = trackDefNode(_)
or
cls.getAnInstanceReference() = trackDefNode(_)
or
needsDefNode(cls.getADirectSubClass())
)
}

class TDef = MkModuleDef or TNonModuleDef;

class TNonModuleDef = MkModuleExport or MkClassInstance or MkDef or MkSyntheticCallbackArg;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,11 @@ private predicate isPrivateAssignment(DataFlow::Node node) {
)
}

private predicate isPrivateLike(API::Node node) { isPrivateAssignment(node.asSink()) }
/**
* Holds if `node` is the sink node corresponding to the right-hand side of a private declaration,
* like a private field (`#field`) or class member with the `private` modifier.
*/
predicate isPrivateLike(API::Node node) { isPrivateAssignment(node.asSink()) }

bindingset[name]
private int getNameBadness(string name) {
Expand Down
104 changes: 104 additions & 0 deletions javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
private import javascript
private import internal.ApiGraphModels as Shared
private import internal.ApiGraphModelsSpecific as Specific
private import semmle.javascript.endpoints.EndpointNaming as EndpointNaming
import Shared::ModelInput as ModelInput
import Shared::ModelOutput as ModelOutput

Expand Down Expand Up @@ -55,3 +56,106 @@ private class TaintStepFromSummary extends TaintTracking::SharedTaintStep {
summaryStepNodes(pred, succ, "taint")
}
}

/**
* Specifies which parts of the API graph to export in `ModelExport`.
*/
signature module ModelExportSig {
/**
* Holds if the exported model should contain `node`, if it is publicly accessible.
*
* This ensures that all ways to access `node` will be exported in type models.
*/
predicate shouldContain(API::Node node);

/**
* Holds if a named must be generated for `node` if it is to be included in the exported graph.
asgerf marked this conversation as resolved.
Show resolved Hide resolved
*/
default predicate mustBeNamed(API::Node node) { none() }

/**
* Holds if the exported model should preserve all paths leading to an instance of `type`,
* including partial ones. It does not need to be closed transitively, `ModelExport` will
* extend this to include type models from which `type` can be derived.
*/
default predicate shouldContainType(string type) { none() }
}

/**
* Module for exporting type models for a given set of nodes in the API graph.
*/
module ModelExport<ModelExportSig S> {
private import codeql.mad.dynamic.GraphExport
private import internal.ApiGraphModelsExport

private module GraphExportConfig implements GraphExportSig<Location, API::Node> {
predicate edge = Specific::apiGraphHasEdge/3;

predicate shouldContain = S::shouldContain/1;

predicate shouldNotContain(API::Node node) {
EndpointNaming::isPrivateLike(node)
or
node instanceof API::Use
}

predicate mustBeNamed(API::Node node) {
node.getAValueReachingSink() instanceof DataFlow::ClassNode
or
node = API::Internal::getClassInstance(_)
or
S::mustBeNamed(node)
}

predicate exposedName(API::Node node, string type, string path) {
node = API::moduleExport(type) and path = ""
}

predicate suggestedName(API::Node node, string type) {
exists(string package, string name |
(
EndpointNaming::sinkHasPrimaryName(node, package, name) and
not EndpointNaming::aliasDefinition(_, _, _, _, node)
or
EndpointNaming::aliasDefinition(_, _, package, name, node)
) and
type = EndpointNaming::renderName(package, name)
)
}

bindingset[host]
predicate hasTypeSummary(API::Node host, string path) {
exists(string methodName |
functionReturnsReceiver(host.getMember(methodName).getAValueReachingSink()) and
path = "Member[" + methodName + "].ReturnValue"
)
}

pragma[nomagic]
private predicate functionReturnsReceiver(DataFlow::FunctionNode func) {
getAReceiverRef(func).flowsTo(func.getReturnNode())
}

pragma[nomagic]
private DataFlow::MethodCallNode getAReceiverCall(DataFlow::FunctionNode func) {
result = getAReceiverRef(func).getAMethodCall()
}

pragma[nomagic]
private predicate callReturnsReceiver(DataFlow::MethodCallNode call) {
functionReturnsReceiver(call.getACallee().flow())
}

pragma[nomagic]
private DataFlow::SourceNode getAReceiverRef(DataFlow::FunctionNode func) {
result = func.getReceiver()
or
result = getAReceiverCall(func) and
callReturnsReceiver(result)
}
}

private module ExportedGraph = TypeGraphExport<GraphExportConfig, S::shouldContainType/1>;

import ExportedGraph
}
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ private predicate summaryModel(string type, string path, string input, string ou
}

/** Holds if a type model exists for the given parameters. */
private predicate typeModel(string type1, string type2, string path) {
predicate typeModel(string type1, string type2, string path) {
exists(string row |
typeModel(row) and
row.splitAt(";", 0) = type1 and
Expand Down Expand Up @@ -435,7 +435,7 @@ private API::Node getNodeFromType(string type) {
* Gets the API node identified by the first `n` tokens of `path` in the given `(type, path)` tuple.
*/
pragma[nomagic]
private API::Node getNodeFromPath(string type, AccessPath path, int n) {
API::Node getNodeFromPath(string type, AccessPath path, int n) {
isRelevantFullPath(type, path) and
(
n = 0 and
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/**
* Contains an extension of `GraphExport` that relies on API graph specific functionality.
*/

private import ApiGraphModels as Shared
private import codeql.mad.dynamic.GraphExport
private import ApiGraphModelsSpecific as Specific

private module API = Specific::API;

private import Shared

/**
* Holds if some proper prefix of `(type, path)` evaluated to `node`, where `remainingPath`
* is bound to the suffix of `path` that was not evaluated yet.
*/
asgerf marked this conversation as resolved.
Show resolved Hide resolved
bindingset[type, path]
predicate partiallyEvaluatedModel(string type, string path, API::Node node, string remainingPath) {
asgerf marked this conversation as resolved.
Show resolved Hide resolved
exists(int n, AccessPath accessPath |
accessPath = path and
asgerf marked this conversation as resolved.
Show resolved Hide resolved
getNodeFromPath(type, accessPath, n) = node and
n > 0 and
// Note that `n < accessPath.getNumToken()` is implied by the use of strictconcat()
remainingPath =
strictconcat(int k |
k = [n .. accessPath.getNumToken() - 1]
|
accessPath.getToken(k), "." order by k
)
)
}

/**
* Holds if `type` and all types leading to `type` should be re-exported.
*/
signature predicate shouldContainTypeSig(string type);

/**
* Wrapper around `GraphExport` that also exports information about re-exported types.
*
* ### JavaScript example 1
* For example, suppose `shouldContainType("foo")` holds, and the following is the entry point for a package `bar`:
* ```js
* // bar.js
* module.exports.xxx = require('foo');
* ```
* then this would generate the following type model:
* ```
* foo; bar; Member[xxx]
* ```
*
* ### JavaScript example 2
* For a more complex case, suppose the following type model exists:
* ```
* foo.XYZ; foo; Member[x].Member[y].Member[z]
* ```
* And the package exports something that matches a prefix of the access path above:
* ```js
* module.exports.blah = require('foo').x.y;
* ```
* This would result in the following type model:
* ```
* foo.XYZ; bar; Member[blah].Member[z]
* ```
* Notice that the access path `Member[blah].Member[z]` consists of an access path generated from the API
* graph, with pieces of the access path from the original type model appended to it.
*/
module TypeGraphExport<
GraphExportSig<Specific::Location, API::Node> S, shouldContainTypeSig/1 shouldContainType>
{
/** Like `shouldContainType` but includes types that lead to `type` via type models. */
private predicate shouldContainTypeEx(string type) {
shouldContainType(type)
or
exists(string prevType |
shouldContainType(prevType) and
Shared::typeModel(prevType, type, _)
)
}

private module Config implements GraphExportSig<Specific::Location, API::Node> {
import S

predicate shouldContain(API::Node node) {
S::shouldContain(node)
or
exists(string type1 | shouldContainTypeEx(type1) |
ModelOutput::getATypeNode(type1).getAValueReachableFromSource() = node.asSink()
or
exists(string type2, string path |
Shared::typeModel(type1, type2, path) and
getNodeFromPath(type2, path, _).getAValueReachableFromSource() = node.asSink()
)
)
}
}

private module ExportedGraph = GraphExport<Specific::Location, API::Node, Config>;

import ExportedGraph

/**
* Holds if `type1, type2, path` should be emitted as a type model, that is `(type2, path)` leads to an instance of `type1`.
*/
predicate typeModel(string type1, string type2, string path) {
ExportedGraph::typeModel(type1, type2, path)
or
shouldContainTypeEx(type1) and
exists(API::Node node |
// A relevant type is exported directly
Specific::sourceFlowsToSink(ModelOutput::getATypeNode(type1), node) and
ExportedGraph::pathToNode(type2, path, node)
or
// Something that leads to a relevant type, but didn't finish its access path, is exported
exists(string midType, string midPath, string remainingPath, string prefix, API::Node source |
Shared::typeModel(type1, midType, midPath) and
partiallyEvaluatedModel(midType, midPath, source, remainingPath) and
Specific::sourceFlowsToSink(source, node) and
ExportedGraph::pathToNode(type2, prefix, node) and
path = join(prefix, remainingPath)
)
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ module API = JS::API;

import JS::DataFlow as DataFlow

class Location = JS::Location;

/**
* Holds if `rawType` represents the JavaScript type `qualifiedName` from the given NPM `package`.
*
Expand Down Expand Up @@ -353,3 +355,54 @@ module ModelOutputSpecific {
)
}
}

/**
* Holds if the edge `pred -> succ` labelled with `path` exists in the API graph.
*/
bindingset[pred]
predicate apiGraphHasEdge(API::Node pred, string path, API::Node succ) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have any guarantees (or tests) to show that the edge names we generate here are valid MaD access-paths?

Say we changed Member[xxx] to be Property[xxx] in the rest of our MaD modeling, would the whole type-model export just silently produce models that couldn't be parsed until a human realized there was a problem? 🤔

I can imagine scenarios where we try to parse the access-path generated as if it had been in a YAML file, maybe even checking that if we start from a node that we generate an MaD access-path for, we will reach the same node from parsing and following that access-path

(I realize it's not so much a question about this specific predicate, as a thing in whole... and the question might be answered from reading some of the next commits)

Copy link
Contributor Author

@asgerf asgerf Apr 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No guarantees and no currently no tests. I spent a bit of time refactoring the ModelOutput::getAWarning into a parameterised module so we could check these as well, but it gets a bit fiddly. I backed out of it for now.

Some tests are going to exist in an internal repo where we use model generation, in the sense that those tests would fail if there was a mismatch here.

Copy link
Member

@RasmusWL RasmusWL Apr 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be nice with these kinds of tests, however I'm not going to block this PR to get them in NOW.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

however I'm going to block this PR to get them in NOW.

I assume a "not" was accidentally left out of that sentence 😅

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed 🙈 (fixed by edit)

exists(string name | succ = pred.getMember(name) and path = "Member[" + name + "]")
or
succ = pred.getUnknownMember() and path = "AnyMember"
or
succ = pred.getInstance() and path = "Instance"
or
succ = pred.getReturn() and path = "ReturnValue"
or
exists(int n | succ = pred.getParameter(n) |
if pred instanceof API::Use then path = "Argument[" + n + "]" else path = "Parameter[" + n + "]"
)
or
succ = pred.getPromised() and path = "Awaited"
or
exists(DataFlow::ClassNode cls |
pred = API::Internal::getClassInstance(cls.getADirectSubClass()) and
succ = API::Internal::getClassInstance(cls) and
path = ""
)
}

/**
* Holds if the value of `source` is exposed at `sink`.
*/
bindingset[source]
predicate sourceFlowsToSink(API::Node source, API::Node sink) {
source.getAValueReachableFromSource() = sink.asSink()
or
// Handle the case of an upstream class being the base class of an exposed own class
//
// class Foo extends external.BaseClass {}
//
// Here we want to ensure that `Instance(Foo)` is seen as subtype of `Instance(external.BaseClass)`.
//
// Although we have a dedicated sink node for `Instance(Foo)` we don't have dedicate source node for `Instance(external.BaseClass)`.
//
// However, there is always an `Instance` edge from the base class expression (`external.BaseClass`)
// to the receiver node in subclass constructor (the implicit constructor of `Foo`), which always exists.
// So we use the constructor receiver as the representative for `Instance(external.BaseClass)`.
// (This will get simplified when migrating to Ruby-style API graphs, as both sides will have explicit API nodes).
exists(DataFlow::ClassNode cls |
source.asSource() = cls.getConstructor().getReceiver() and
sink = API::Internal::getClassInstance(cls)
)
}
Loading
Loading