Skip to content

Commit

Permalink
Merge pull request #57 from awslabs/implicit-dataflow
Browse files Browse the repository at this point in the history
Error on implicit flow in taint analysis
  • Loading branch information
samarth-aws authored Feb 20, 2024
2 parents 5ede2e2 + e78c146 commit 3e3d32a
Show file tree
Hide file tree
Showing 42 changed files with 484 additions and 27 deletions.
4 changes: 4 additions & 0 deletions analysis/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ type TaintSpec struct {

// Filters contains a list of filters that can be used by analyses
Filters []CodeIdentifier

// ExplicitFlowOnly indicates whether the taint analysis should only consider explicit data flows.
// This should be set to true when proving a data flow property instead of an information flow property.
ExplicitFlowOnly bool `yaml:"explicit-flow-only" json:"explicit-flow-only"`
}

// SlicingSpec contains code identifiers that identify a specific program slicing / backwards dataflow analysis spec.
Expand Down
1 change: 1 addition & 0 deletions analysis/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ func TestLoadMisc(t *testing.T) {
{"", "some/other/package", "", "", "", "OneField", "ThatStruct", "", "", nil},
{"", "some/other/package", "Interface", "", "", "", "", "", "", nil},
},
ExplicitFlowOnly: true,
},
},
Options: Options{
Expand Down
46 changes: 46 additions & 0 deletions analysis/dataflow/function_summary_graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ type SummaryGraph struct {
// the return instructions are linked to ReturnNode, one per Value in a tuple returned
Returns map[ssa.Instruction][]*ReturnValNode

// the if statements of the function
Ifs map[ssa.Instruction]*IfNode

// errors can be used to accumulate errors that were encountered while building the summary graph
errors map[error]bool

Expand Down Expand Up @@ -133,6 +136,7 @@ func NewSummaryGraph(s *AnalyzerState, f *ssa.Function, id uint32,
AccessGlobalNodes: make(map[ssa.Instruction]map[ssa.Value]*AccessGlobalNode),
SyntheticNodes: make(map[ssa.Instruction]*SyntheticNode),
BoundLabelNodes: make(map[ssa.Instruction]*BoundLabelNode),
Ifs: make(map[ssa.Instruction]*IfNode),
errors: map[error]bool{},
lastNodeID: &lastNodeID,
shouldTrack: shouldTrack,
Expand Down Expand Up @@ -195,6 +199,8 @@ func (g *SummaryGraph) initializeInnerNodes(s *AnalyzerState, shouldTrack func(*
}
case *ssa.MakeClosure:
g.addClosure(x)
case *ssa.If:
g.addIfNode(x)

// Other types of sources that may be used in config
case *ssa.Alloc, *ssa.FieldAddr, *ssa.Field, *ssa.UnOp:
Expand Down Expand Up @@ -457,6 +463,19 @@ func (g *SummaryGraph) addBoundLabelNode(instr ssa.Instruction, label *pointer.L
}
}

func (g *SummaryGraph) addIfNode(x *ssa.If) {
if _, ok := g.Ifs[x]; !ok {
node := &IfNode{
id: g.newNodeID(),
parent: g,
ssaNode: x,
out: make(map[GraphNode]EdgeInfo),
in: make(map[GraphNode]EdgeInfo),
}
g.Ifs[x] = node
}
}

func (g *SummaryGraph) addSyntheticEdge(mark MarkWithAccessPath, info *ConditionInfo, instr ssa.Instruction, _ string) {

node, ok := g.SyntheticNodes[instr]
Expand All @@ -475,6 +494,14 @@ func (g *SummaryGraph) addBoundLabelEdge(mark MarkWithAccessPath, info *Conditio
g.addEdge(mark, node, info)
}

func (g *SummaryGraph) addIfEdge(mark MarkWithAccessPath, info *ConditionInfo, n *ssa.If) {
node, ok := g.Ifs[n]
if !ok {
return
}
g.addEdge(mark, node, info)
}

// selectNodesFromMark returns a slice of nodes that correspond to the mark. In most cases this slice should have only
// one element.
//
Expand Down Expand Up @@ -644,6 +671,15 @@ func (g *SummaryGraph) addEdge(source MarkWithAccessPath, dest GraphNode, cond *
}
}
}

if source.Mark.IsIf() {
sourceInstr := source.Mark.Node.(ssa.Instruction)
if sourceNode, ok := g.Ifs[sourceInstr]; ok {
if sourceNode != dest {
updateEdgeInfo(source, dest, cond, sourceNode)
}
}
}
}

func isDiffNode(mark MarkWithAccessPath, source GraphNode, dest GraphNode) bool {
Expand Down Expand Up @@ -794,6 +830,8 @@ func addInEdge(dest GraphNode, source GraphNode, path EdgeInfo) {
node.in[source] = path
case *BoundLabelNode:
node.in[source] = path
case *IfNode:
node.in[source] = path
default:
panic(fmt.Sprintf("invalid dest node type: %T", dest))
}
Expand Down Expand Up @@ -996,6 +1034,14 @@ func (a *BoundLabelNode) String() string {
a.targetInfo.BoundIndex)
}

func (a *IfNode) String() string {
if a == nil {
return ""
}

return fmt.Sprintf("\"[#%d] %s\"", a.id, ftu.SanitizeRepr(a.ssaNode))
}

// Print the summary graph to w in the graphviz format.
// If g is nil, then prints the empty graph "subgraph {}"
//
Expand Down
78 changes: 78 additions & 0 deletions analysis/dataflow/function_summary_graph_nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ func Instr(node GraphNode) ssa.Instruction {
return node.ParentNode().CallSite()
case *SyntheticNode:
return node.Instr()
case *IfNode:
return node.SsaNode()
}

return nil
Expand Down Expand Up @@ -1013,3 +1015,79 @@ func (a *BoundLabelNode) ParentName() string {
func (a *BoundLabelNode) LongID() string {
return "#" + strconv.Itoa(int(a.parent.ID)) + "." + strconv.Itoa(int(a.id))
}

// IfNode is used to track dataflow to if-statements (includes all types of branching).
// For now, this just contains the value that is being branched on (the if-condition).
type IfNode struct {
id uint32
parent *SummaryGraph // the parent of an IfNode is the summary of the function in which it appears
ssaNode *ssa.If // an IfNode must correspond to a specific instruction
out map[GraphNode]EdgeInfo // the out maps the node to other nodes to which data flows
in map[GraphNode]EdgeInfo // the in maps the node to other nodes from which data flows
marks LocSet
}

// ID returns the integer id of the node in its parent graph.
func (a *IfNode) ID() uint32 { return a.id }

// Graph returns the parent summary graph of the node.
func (a *IfNode) Graph() *SummaryGraph { return a.parent }

// Out returns the nodes the graph node's data flows to, with their object path.
func (a *IfNode) Out() map[GraphNode]EdgeInfo { return a.out }

// In returns the nodes with incoming edges to the current node, with their object path.
func (a *IfNode) In() map[GraphNode]EdgeInfo { return a.in }

// Marks returns the location information of the node.
func (a *IfNode) Marks() LocSet { return a.marks }

// SetLocs sets the locations information of the node.
func (a *IfNode) SetLocs(set LocSet) {
if a.marks == nil {
a.marks = map[ssa.Instruction]bool{}
}
funcutil.Merge(a.marks, set, funcutil.First[bool])
}

// Type returns the type of the value in the if-condition.
func (a *IfNode) Type() types.Type { return a.ssaNode.Cond.Type() }

// Position returns the position of the node.
func (a *IfNode) Position(c *AnalyzerState) token.Position {
cond := a.ssaNode
if cond == nil {
return lang.DummyPos
}

pos := c.Program.Fset.Position(cond.Pos())
if !pos.IsValid() {
pos = c.Program.Fset.Position(cond.Cond.Pos())
}

return pos
}

// Equal implements equality checking between nodes.
func (a *IfNode) Equal(node GraphNode) bool {
if a2, ok := node.(*IfNode); ok {
return a == a2
}
return false
}

// SsaNode returns the if-instruction.
func (a *IfNode) SsaNode() *ssa.If { return a.ssaNode }

// ParentName returns the name of the parent function.
func (a *IfNode) ParentName() string {
if a.parent != nil && a.parent.Parent != nil {
return a.parent.Parent.Name()
}
return "IfNode"
}

// LongID returns a string identifier for the node.
func (a *IfNode) LongID() string {
return "#" + strconv.Itoa(int(a.parent.ID)) + "." + strconv.Itoa(int(a.id))
}
8 changes: 8 additions & 0 deletions analysis/dataflow/intra_procedural.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ func (state *IntraAnalysisState) makeEdgesAtInstruction(_ int, instr ssa.Instruc
state.makeEdgesAtReturn(typedInstr)
case *ssa.Store:
state.makeEdgesAtStoreInCapturedLabel(typedInstr)
case *ssa.If:
state.makeEdgesAtIf(typedInstr)
}
// Always check if it's a synthetic node
state.makeEdgesSyntheticNodes(instr)
Expand Down Expand Up @@ -310,6 +312,12 @@ func (state *IntraAnalysisState) makeEdgesAtStoreInCapturedLabel(x *ssa.Store) {
}
}

func (state *IntraAnalysisState) makeEdgesAtIf(x *ssa.If) {
for _, origin := range state.getMarks(x, x.Cond, "", false) {
state.summary.addIfEdge(origin, nil, x)
}
}

// makeEdgesSyntheticNodes analyzes the synthetic
func (state *IntraAnalysisState) makeEdgesSyntheticNodes(instr ssa.Instruction) {
aState := state.parentAnalyzerState
Expand Down
2 changes: 1 addition & 1 deletion analysis/dataflow/intra_procedural_instruction_ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ func (state *IntraAnalysisState) DoStore(x *ssa.Store) {
}

// DoIf is a no-op
func (state *IntraAnalysisState) DoIf(*ssa.If) {
func (state *IntraAnalysisState) DoIf(x *ssa.If) {

}

Expand Down
38 changes: 37 additions & 1 deletion analysis/dataflow/intra_procedural_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func TestFunctionSummaries(t *testing.T) {
if function.Name() == "main" {
ok := len(summary.Returns) == 0 // main does not return any data
ok = ok && len(summary.Params) == 0
ok = ok && len(summary.Callees) == 7 // 6 regular function calls + 1 closure
ok = ok && len(summary.Callees) == 8 // 7 regular function calls + 1 closure
if !ok {
t.Errorf("main graph is not as expected")
}
Expand Down Expand Up @@ -506,6 +506,40 @@ func TestFunctionSummaries(t *testing.T) {
t.Errorf("in Baz, a closure freevar outgoing edge should be a return")
}
}

if function.Name() == "ImplicitFlow" {
for _, callee := range summary.Callees {
for _, call := range callee {
if call.FuncName() == "Source" {
hasIf := false
for out := range call.Out() {
if _, ok := out.(*dataflow.IfNode); ok {
hasIf = true
}
}
if !hasIf {
t.Errorf("in ImplicitFlow, a call to Source() outgoing edge should be an if node")
}
}
}
}

for _, ifn := range summary.Ifs {
hasCallNodeIn := false
for in := range ifn.In() {
if call, ok := in.(*dataflow.CallNode); ok {
hasCallNodeIn = call.FuncName() == "Source"
}
}
if !hasCallNodeIn {
t.Errorf("in ImplicitFlow, an if incoming edge should be a call to Source()")
}

if len(ifn.Out()) != 0 {
t.Errorf("in ImplicitFlow, an if node should have no outgoing edges")
}
}
}
}
}

Expand Down Expand Up @@ -534,4 +568,6 @@ func TestStringNilSafety(t *testing.T) {
_ = g.String()
var c *dataflow.ClosureNode
_ = c.String()
var i *dataflow.IfNode
_ = i.String()
}
9 changes: 9 additions & 0 deletions analysis/dataflow/mark.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ const (
Global
// Synthetic node type for any other node.
Synthetic
// If is an if statement.
If
)

func (m MarkType) String() string {
Expand All @@ -67,6 +69,8 @@ func (m MarkType) String() string {
return "global"
case Synthetic:
return "synthetic"
case If:
return "if"
default:
return "multiple"
}
Expand Down Expand Up @@ -151,6 +155,11 @@ func (m Mark) IsSynthetic() bool {
return m.Type&Synthetic != 0
}

// IsIf returns true if the source is an if condition.
func (m Mark) IsIf() bool {
return m.Type&If != 0
}

func (m Mark) String() string {
str := m.Type.String() + ": "
if m.Qualifier != nil {
Expand Down
29 changes: 29 additions & 0 deletions analysis/taint/dataflow_visitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,18 @@ func (v *Visitor) Reset() {
v.seen = make(map[df.KeyType]bool)
}

// CondError represents an error where taint flows to a conditional statement.
type CondError struct {
Cond *ssa.If // Cond is the conditional statement
ParentName string // ParentName is the name of the function in which the conditional statement occurs
Trace string // Trace is a string representing the taint trace
Pos token.Position // Pos is the position
}

func (e *CondError) Error() string {
return fmt.Sprintf("taint flows to conditional statement %v in function %v: %v\n\tat %v", e.Cond, e.ParentName, e.Trace, e.Pos)
}

// Visit runs an inter-procedural analysis to add any detected taint flow from currentSource to a sink. This implements
// the visitor interface of the dataflow package.
//
Expand Down Expand Up @@ -628,6 +640,23 @@ func (v *Visitor) Visit(s *df.AnalyzerState, source df.NodeWithTrace) {
TracingInfo: cur.Status.TracingInfo.Next(closureNode.ClosureSummary, graphNode.Index()),
},
df.EdgeInfo{})
case *df.IfNode:
// If only explicit taint flows should be tracked,
// then don't track flow inside of conditionals (information flow)
if v.taintSpec.ExplicitFlowOnly {
break
}

// taint is expected to flow to validators
if isValidatorCondition(v.taintSpec, graphNode.SsaNode().Cond, true) {
break
}

cond := graphNode.SsaNode()
pos := graphNode.Position(s)
err := &CondError{Cond: cond, ParentName: cur.Node.ParentName(), Trace: cur.Trace.SummaryString(), Pos: pos}
logger.Warnf("%v\n", err)
s.AddError("cond", err)
}
}

Expand Down
Loading

0 comments on commit 3e3d32a

Please sign in to comment.