github · asgerf · Apr 18, 2024 · Apr 4, 2024 · Apr 4, 2024 · Apr 5, 2024
@@ -501,16 +501,25 @@ module API {
     }
 
     /**
+     * Gets the location of this API node, if it corresponds to a program element with a source location.
+     */
+    final Location getLocation() { result = this.getInducingNode().getLocation() }
+
+    /**
+     * DEPRECATED: Use `getLocation().hasLocationInfo()` instead.
+     *
      * Holds if this node is located in file `path` between line `startline`, column `startcol`,
      * and line `endline`, column `endcol`.
      *
      * For nodes that do not have a meaningful location, `path` is the empty string and all other
      * parameters are zero.
      */
-    predicate hasLocationInfo(string path, int startline, int startcol, int endline, int endcol) {
-      this.getInducingNode().hasLocationInfo(path, startline, startcol, endline, endcol)
+    deprecated predicate hasLocationInfo(
+      string path, int startline, int startcol, int endline, int endcol
+    ) {
+      this.getLocation().hasLocationInfo(path, startline, startcol, endline, endcol)
       or
-      not exists(this.getInducingNode()) and
+      not exists(this.getLocation()) and
       path = "" and
       startline = 0 and
       startcol = 0 and
@@ -696,14 +705,7 @@ module API {
         or
         any(Type t).hasUnderlyingType(m, _)
       } or
-      MkClassInstance(DataFlow::ClassNode cls) {
-        hasSemantics(cls) and
-        (
-          cls = trackDefNode(_)
-          or
-          cls.getAnInstanceReference() = trackDefNode(_)
-        )
-      } or
+      MkClassInstance(DataFlow::ClassNode cls) { needsDefNode(cls) } or
       MkDef(DataFlow::Node nd) { rhs(_, _, nd) } or
       MkUse(DataFlow::Node nd) { use(_, _, nd) } or
       /** A use of a TypeScript type. */
@@ -716,6 +718,17 @@ module API {
         trackUseNode(src, true, bound, "").flowsTo(nd.getCalleeNode())
       }
 
+    private predicate needsDefNode(DataFlow::ClassNode cls) {
+      hasSemantics(cls) and
+      (
+        cls = trackDefNode(_)
+        or
+        cls.getAnInstanceReference() = trackDefNode(_)
+        or
+        needsDefNode(cls.getADirectSubClass())
+      )
+    }
+
     class TDef = MkModuleDef or TNonModuleDef;
 
     class TNonModuleDef = MkModuleExport or MkClassInstance or MkDef or MkSyntheticCallbackArg;

@@ -147,7 +147,11 @@ private predicate isPrivateAssignment(DataFlow::Node node) {
   )
 }
 
-private predicate isPrivateLike(API::Node node) { isPrivateAssignment(node.asSink()) }
+/**
+ * Holds if `node` is the sink node corresponding to the right-hand side of a private declaration,
+ * like a private field (`#field`) or class member with the `private` modifier.
+ */
+predicate isPrivateLike(API::Node node) { isPrivateAssignment(node.asSink()) }
 
 bindingset[name]
 private int getNameBadness(string name) {

@@ -19,6 +19,7 @@
 private import javascript
 private import internal.ApiGraphModels as Shared
 private import internal.ApiGraphModelsSpecific as Specific
+private import semmle.javascript.endpoints.EndpointNaming as EndpointNaming
 import Shared::ModelInput as ModelInput
 import Shared::ModelOutput as ModelOutput
 
@@ -55,3 +56,106 @@ private class TaintStepFromSummary extends TaintTracking::SharedTaintStep {
     summaryStepNodes(pred, succ, "taint")
   }
 }
+
+/**
+ * Specifies which parts of the API graph to export in `ModelExport`.
+ */
+signature module ModelExportSig {
+  /**
+   * Holds if the exported model should contain `node`, if it is publicly accessible.
+   *
+   * This ensures that all ways to access `node` will be exported in type models.
+   */
+  predicate shouldContain(API::Node node);
+
+  /**
+   * Holds if a named must be generated for `node` if it is to be included in the exported graph.
+   */
+  default predicate mustBeNamed(API::Node node) { none() }
+
+  /**
+   * Holds if the exported model should preserve all paths leading to an instance of `type`,
+   * including partial ones. It does not need to be closed transitively, `ModelExport` will
+   * extend this to include type models from which `type` can be derived.
+   */
+  default predicate shouldContainType(string type) { none() }
+}
+
+/**
+ * Module for exporting type models for a given set of nodes in the API graph.
+ */
+module ModelExport<ModelExportSig S> {
+  private import codeql.mad.dynamic.GraphExport
+  private import internal.ApiGraphModelsExport
+
+  private module GraphExportConfig implements GraphExportSig<Location, API::Node> {
+    predicate edge = Specific::apiGraphHasEdge/3;
+
+    predicate shouldContain = S::shouldContain/1;
+
+    predicate shouldNotContain(API::Node node) {
+      EndpointNaming::isPrivateLike(node)
+      or
+      node instanceof API::Use
+    }
+
+    predicate mustBeNamed(API::Node node) {
+      node.getAValueReachingSink() instanceof DataFlow::ClassNode
+      or
+      node = API::Internal::getClassInstance(_)
+      or
+      S::mustBeNamed(node)
+    }
+
+    predicate exposedName(API::Node node, string type, string path) {
+      node = API::moduleExport(type) and path = ""
+    }
+
+    predicate suggestedName(API::Node node, string type) {
+      exists(string package, string name |
+        (
+          EndpointNaming::sinkHasPrimaryName(node, package, name) and
+          not EndpointNaming::aliasDefinition(_, _, _, _, node)
+          or
+          EndpointNaming::aliasDefinition(_, _, package, name, node)
+        ) and
+        type = EndpointNaming::renderName(package, name)
+      )
+    }
+
+    bindingset[host]
+    predicate hasTypeSummary(API::Node host, string path) {
+      exists(string methodName |
+        functionReturnsReceiver(host.getMember(methodName).getAValueReachingSink()) and
+        path = "Member[" + methodName + "].ReturnValue"
+      )
+    }
+
+    pragma[nomagic]
+    private predicate functionReturnsReceiver(DataFlow::FunctionNode func) {
+      getAReceiverRef(func).flowsTo(func.getReturnNode())
+    }
+
+    pragma[nomagic]
+    private DataFlow::MethodCallNode getAReceiverCall(DataFlow::FunctionNode func) {
+      result = getAReceiverRef(func).getAMethodCall()
+    }
+
+    pragma[nomagic]
+    private predicate callReturnsReceiver(DataFlow::MethodCallNode call) {
+      functionReturnsReceiver(call.getACallee().flow())
+    }
+
+    pragma[nomagic]
+    private DataFlow::SourceNode getAReceiverRef(DataFlow::FunctionNode func) {
+      result = func.getReceiver()
+      or
+      result = getAReceiverCall(func) and
+      callReturnsReceiver(result)
+    }
+  }
+
+  private module ExportedGraph = TypeGraphExport<GraphExportConfig, S::shouldContainType/1>;
+
+  import ExportedGraph
+}
@@ -267,7 +267,7 @@ private predicate summaryModel(string type, string path, string input, string ou
 }
 
 /** Holds if a type model exists for the given parameters. */
-private predicate typeModel(string type1, string type2, string path) {
+predicate typeModel(string type1, string type2, string path) {
   exists(string row |
     typeModel(row) and
     row.splitAt(";", 0) = type1 and
@@ -435,7 +435,7 @@ private API::Node getNodeFromType(string type) {
  * Gets the API node identified by the first `n` tokens of `path` in the given `(type, path)` tuple.
  */
 pragma[nomagic]
-private API::Node getNodeFromPath(string type, AccessPath path, int n) {
+API::Node getNodeFromPath(string type, AccessPath path, int n) {
   isRelevantFullPath(type, path) and
   (
     n = 0 and

@@ -0,0 +1,124 @@
+/**
+ * Contains an extension of `GraphExport` that relies on API graph specific functionality.
+ */
+
+private import ApiGraphModels as Shared
+private import codeql.mad.dynamic.GraphExport
+private import ApiGraphModelsSpecific as Specific
+
+private module API = Specific::API;
+
+private import Shared
+
+/**
+ * Holds if some proper prefix of `(type, path)` evaluated to `node`, where `remainingPath`
+ * is bound to the suffix of `path` that was not evaluated yet.
+ */
+bindingset[type, path]
+predicate partiallyEvaluatedModel(string type, string path, API::Node node, string remainingPath) {
+  exists(int n, AccessPath accessPath |
+    accessPath = path and
+    getNodeFromPath(type, accessPath, n) = node and
+    n > 0 and
+    // Note that `n < accessPath.getNumToken()` is implied by the use of strictconcat()
+    remainingPath =
+      strictconcat(int k |
+        k = [n .. accessPath.getNumToken() - 1]
+      |
+        accessPath.getToken(k), "." order by k
+      )
+  )
+}
+
+/**
+ * Holds if `type` and all types leading to `type` should be re-exported.
+ */
+signature predicate shouldContainTypeSig(string type);
+
+/**
+ * Wrapper around `GraphExport` that also exports information about re-exported types.
+ *
+ * ### JavaScript example 1
+ * For example, suppose `shouldContainType("foo")` holds, and the following is the entry point for a package `bar`:
+ * ```js
+ * // bar.js
+ * module.exports.xxx = require('foo');
+ * ```
+ * then this would generate the following type model:
+ * ```
+ * foo; bar; Member[xxx]
+ * ```
+ *
+ * ### JavaScript example 2
+ * For a more complex case, suppose the following type model exists:
+ * ```
+ * foo.XYZ; foo; Member[x].Member[y].Member[z]
+ * ```
+ * And the package exports something that matches a prefix of the access path above:
+ * ```js
+ * module.exports.blah = require('foo').x.y;
+ * ```
+ * This would result in the following type model:
+ * ```
+ * foo.XYZ; bar; Member[blah].Member[z]
+ * ```
+ * Notice that the access path `Member[blah].Member[z]` consists of an access path generated from the API
+ * graph, with pieces of the access path from the original type model appended to it.
+ */
+module TypeGraphExport<
+  GraphExportSig<Specific::Location, API::Node> S, shouldContainTypeSig/1 shouldContainType>
+{
+  /** Like `shouldContainType` but includes types that lead to `type` via type models. */
+  private predicate shouldContainTypeEx(string type) {
+    shouldContainType(type)
+    or
+    exists(string prevType |
+      shouldContainType(prevType) and
+      Shared::typeModel(prevType, type, _)
+    )
+  }
+
+  private module Config implements GraphExportSig<Specific::Location, API::Node> {
+    import S
+
+    predicate shouldContain(API::Node node) {
+      S::shouldContain(node)
+      or
+      exists(string type1 | shouldContainTypeEx(type1) |
+        ModelOutput::getATypeNode(type1).getAValueReachableFromSource() = node.asSink()
+        or
+        exists(string type2, string path |
+          Shared::typeModel(type1, type2, path) and
+          getNodeFromPath(type2, path, _).getAValueReachableFromSource() = node.asSink()
+        )
+      )
+    }
+  }
+
+  private module ExportedGraph = GraphExport<Specific::Location, API::Node, Config>;
+
+  import ExportedGraph
+
+  /**
+   * Holds if `type1, type2, path` should be emitted as a type model, that is `(type2, path)` leads to an instance of `type1`.
+   */
+  predicate typeModel(string type1, string type2, string path) {
+    ExportedGraph::typeModel(type1, type2, path)
+    or
+    shouldContainTypeEx(type1) and
+    exists(API::Node node |
+      // A relevant type is exported directly
+      Specific::sourceFlowsToSink(ModelOutput::getATypeNode(type1), node) and
+      ExportedGraph::pathToNode(type2, path, node)
+      or
+      // Something that leads to a relevant type, but didn't finish its access path, is exported
+      exists(string midType, string midPath, string remainingPath, string prefix, API::Node source |
+        Shared::typeModel(type1, midType, midPath) and
+        partiallyEvaluatedModel(midType, midPath, source, remainingPath) and
+        Specific::sourceFlowsToSink(source, node) and
+        ExportedGraph::pathToNode(type2, prefix, node) and
+        path = join(prefix, remainingPath)
+      )
+    )
+  }
+}
@@ -27,6 +27,8 @@ module API = JS::API;
 
 import JS::DataFlow as DataFlow
 
+class Location = JS::Location;
+
 /**
  * Holds if `rawType` represents the JavaScript type `qualifiedName` from the given NPM `package`.
  *
@@ -353,3 +355,54 @@ module ModelOutputSpecific {
     )
   }
 }
+
+/**
+ * Holds if the edge `pred -> succ` labelled with `path` exists in the API graph.
+ */
+bindingset[pred]
+predicate apiGraphHasEdge(API::Node pred, string path, API::Node succ) {
+  exists(string name | succ = pred.getMember(name) and path = "Member[" + name + "]")
+  or
+  succ = pred.getUnknownMember() and path = "AnyMember"
+  or
+  succ = pred.getInstance() and path = "Instance"
+  or
+  succ = pred.getReturn() and path = "ReturnValue"
+  or
+  exists(int n | succ = pred.getParameter(n) |
+    if pred instanceof API::Use then path = "Argument[" + n + "]" else path = "Parameter[" + n + "]"
+  )
+  or
+  succ = pred.getPromised() and path = "Awaited"
+  or
+  exists(DataFlow::ClassNode cls |
+    pred = API::Internal::getClassInstance(cls.getADirectSubClass()) and
+    succ = API::Internal::getClassInstance(cls) and
+    path = ""
+  )
+}
+
+/**
+ * Holds if the value of `source` is exposed at `sink`.
+ */
+bindingset[source]
+predicate sourceFlowsToSink(API::Node source, API::Node sink) {
+  source.getAValueReachableFromSource() = sink.asSink()
+  or
+  // Handle the case of an upstream class being the base class of an exposed own class
+  //
+  //   class Foo extends external.BaseClass {}
+  //
+  // Here we want to ensure that `Instance(Foo)` is seen as subtype of `Instance(external.BaseClass)`.
+  //
+  // Although we have a dedicated sink node for `Instance(Foo)` we don't have dedicate source node for `Instance(external.BaseClass)`.
+  //
+  // However, there is always an `Instance` edge from the base class expression (`external.BaseClass`)
+  // to the receiver node in subclass constructor (the implicit constructor of `Foo`), which always exists.
+  // So we use the constructor receiver as the representative for `Instance(external.BaseClass)`.
+  // (This will get simplified when migrating to Ruby-style API graphs, as both sides will have explicit API nodes).
+  exists(DataFlow::ClassNode cls |
+    source.asSource() = cls.getConstructor().getReceiver() and
+    sink = API::Internal::getClassInstance(cls)
+  )
+}