From fdb85488aa37bf47a17c40e14a1c58409bd3d420 Mon Sep 17 00:00:00 2001 From: Samarth Kishor Date: Tue, 13 Feb 2024 15:22:28 -0500 Subject: [PATCH 1/3] Handle implicit taint flows --- analysis/config/config.go | 4 + analysis/config/config_test.go | 1 + analysis/dataflow/function_summary_graph.go | 46 +++++++++++ .../dataflow/function_summary_graph_nodes.go | 71 +++++++++++++++++ analysis/dataflow/intra_procedural.go | 8 ++ .../intra_procedural_instruction_ops.go | 2 +- analysis/dataflow/intra_procedural_test.go | 38 ++++++++- analysis/dataflow/mark.go | 9 +++ analysis/taint/dataflow_visitor.go | 26 +++++++ analysis/taint/taint_test.go | 60 ++++++++++++--- analysis/taint/taint_utils_test.go | 28 ++++++- cmd/taint/main.go | 5 +- doc/01_taint.md | 41 +++++++++- testdata/config-examples/config3.yaml | 2 + testdata/src/dataflow/closures/main.go | 1 + testdata/src/dataflow/summaries/main.go | 8 ++ testdata/src/taint/agent-example/config.yaml | 1 + .../agent-example/messaging/messageutil.go | 4 +- testdata/src/taint/basic/config.yaml | 1 + testdata/src/taint/benchmark/config.yaml | 3 +- testdata/src/taint/closures/config.yaml | 1 + .../taint/closures_flowprecise/config.yaml | 1 + .../src/taint/escape-integration/config.yaml | 3 +- testdata/src/taint/example0/main.go | 4 - testdata/src/taint/example1/main.go | 1 + testdata/src/taint/example2/main.go | 3 + testdata/src/taint/fields/config.yaml | 3 +- .../taint/fromlevee/callorder/multiblock.go | 1 + .../src/taint/fromlevee/collections/chans.go | 1 + .../src/taint/fromlevee/collections/maps.go | 2 + .../src/taint/fromlevee/collections/slices.go | 2 + testdata/src/taint/globals/config.yaml | 3 +- testdata/src/taint/implicit-flow/config.yaml | 13 ++++ testdata/src/taint/implicit-flow/go.mod | 3 + testdata/src/taint/implicit-flow/main.go | 77 +++++++++++++++++++ .../src/taint/interface-summaries/config.yaml | 1 + .../src/taint/intra-procedural/config.yaml | 1 + testdata/src/taint/parameters/config.yaml | 1 + testdata/src/taint/selects/config.yaml | 3 +- testdata/src/taint/stdlib_121/config.yaml | 1 + testdata/src/taint/validators/main.go | 2 + testdata/src/taint/with-context/config.yaml | 1 + 42 files changed, 460 insertions(+), 27 deletions(-) create mode 100644 testdata/src/taint/implicit-flow/config.yaml create mode 100644 testdata/src/taint/implicit-flow/go.mod create mode 100644 testdata/src/taint/implicit-flow/main.go diff --git a/analysis/config/config.go b/analysis/config/config.go index e028db28..6685dd06 100644 --- a/analysis/config/config.go +++ b/analysis/config/config.go @@ -143,6 +143,10 @@ type TaintSpec struct { // Filters contains a list of filters that can be used by analyses Filters []CodeIdentifier + + // ExplicitFlowOnly indicates whether the taint analysis should only consider explicit data flows. + // This should be set to true when proving a data flow property instead of an information flow property. + ExplicitFlowOnly bool `yaml:"explicit-flow-only" json:"explicit-flow-only"` } // SlicingSpec contains code identifiers that identify a specific program slicing / backwards dataflow analysis spec. diff --git a/analysis/config/config_test.go b/analysis/config/config_test.go index 3291ce96..71f4a059 100644 --- a/analysis/config/config_test.go +++ b/analysis/config/config_test.go @@ -304,6 +304,7 @@ func TestLoadMisc(t *testing.T) { {"", "some/other/package", "", "", "", "OneField", "ThatStruct", "", "", nil}, {"", "some/other/package", "Interface", "", "", "", "", "", "", nil}, }, + ExplicitFlowOnly: true, }, }, Options: Options{ diff --git a/analysis/dataflow/function_summary_graph.go b/analysis/dataflow/function_summary_graph.go index 76a47c7d..9e13f752 100644 --- a/analysis/dataflow/function_summary_graph.go +++ b/analysis/dataflow/function_summary_graph.go @@ -82,6 +82,9 @@ type SummaryGraph struct { // the return instructions are linked to ReturnNode, one per Value in a tuple returned Returns map[ssa.Instruction][]*ReturnValNode + // the if statements of the function + Ifs map[ssa.Instruction]*IfNode + // errors can be used to accumulate errors that were encountered while building the summary graph errors map[error]bool @@ -133,6 +136,7 @@ func NewSummaryGraph(s *AnalyzerState, f *ssa.Function, id uint32, AccessGlobalNodes: make(map[ssa.Instruction]map[ssa.Value]*AccessGlobalNode), SyntheticNodes: make(map[ssa.Instruction]*SyntheticNode), BoundLabelNodes: make(map[ssa.Instruction]*BoundLabelNode), + Ifs: make(map[ssa.Instruction]*IfNode), errors: map[error]bool{}, lastNodeID: &lastNodeID, shouldTrack: shouldTrack, @@ -195,6 +199,8 @@ func (g *SummaryGraph) initializeInnerNodes(s *AnalyzerState, shouldTrack func(* } case *ssa.MakeClosure: g.addClosure(x) + case *ssa.If: + g.addIfNode(x) // Other types of sources that may be used in config case *ssa.Alloc, *ssa.FieldAddr, *ssa.Field, *ssa.UnOp: @@ -457,6 +463,19 @@ func (g *SummaryGraph) addBoundLabelNode(instr ssa.Instruction, label *pointer.L } } +func (g *SummaryGraph) addIfNode(x *ssa.If) { + if _, ok := g.Ifs[x]; !ok { + node := &IfNode{ + id: g.newNodeID(), + parent: g, + ssaNode: x, + out: make(map[GraphNode]EdgeInfo), + in: make(map[GraphNode]EdgeInfo), + } + g.Ifs[x] = node + } +} + func (g *SummaryGraph) addSyntheticEdge(mark MarkWithAccessPath, info *ConditionInfo, instr ssa.Instruction, _ string) { node, ok := g.SyntheticNodes[instr] @@ -475,6 +494,14 @@ func (g *SummaryGraph) addBoundLabelEdge(mark MarkWithAccessPath, info *Conditio g.addEdge(mark, node, info) } +func (g *SummaryGraph) addIfEdge(mark MarkWithAccessPath, info *ConditionInfo, n *ssa.If) { + node, ok := g.Ifs[n] + if !ok { + return + } + g.addEdge(mark, node, info) +} + // selectNodesFromMark returns a slice of nodes that correspond to the mark. In most cases this slice should have only // one element. // @@ -644,6 +671,15 @@ func (g *SummaryGraph) addEdge(source MarkWithAccessPath, dest GraphNode, cond * } } } + + if source.Mark.IsIf() { + sourceInstr := source.Mark.Node.(ssa.Instruction) + if sourceNode, ok := g.Ifs[sourceInstr]; ok { + if sourceNode != dest { + updateEdgeInfo(source, dest, cond, sourceNode) + } + } + } } func isDiffNode(mark MarkWithAccessPath, source GraphNode, dest GraphNode) bool { @@ -794,6 +830,8 @@ func addInEdge(dest GraphNode, source GraphNode, path EdgeInfo) { node.in[source] = path case *BoundLabelNode: node.in[source] = path + case *IfNode: + node.in[source] = path default: panic(fmt.Sprintf("invalid dest node type: %T", dest)) } @@ -996,6 +1034,14 @@ func (a *BoundLabelNode) String() string { a.targetInfo.BoundIndex) } +func (a *IfNode) String() string { + if a == nil { + return "" + } + + return fmt.Sprintf("\"[#%d] %s\"", a.id, ftu.SanitizeRepr(a.ssaNode)) +} + // Print the summary graph to w in the graphviz format. // If g is nil, then prints the empty graph "subgraph {}" // diff --git a/analysis/dataflow/function_summary_graph_nodes.go b/analysis/dataflow/function_summary_graph_nodes.go index c390f571..226c80f8 100644 --- a/analysis/dataflow/function_summary_graph_nodes.go +++ b/analysis/dataflow/function_summary_graph_nodes.go @@ -113,6 +113,8 @@ func Instr(node GraphNode) ssa.Instruction { return node.ParentNode().CallSite() case *SyntheticNode: return node.Instr() + case *IfNode: + return node.SsaNode() } return nil @@ -1013,3 +1015,72 @@ func (a *BoundLabelNode) ParentName() string { func (a *BoundLabelNode) LongID() string { return "#" + strconv.Itoa(int(a.parent.ID)) + "." + strconv.Itoa(int(a.id)) } + +// IfNode is used to track dataflow to if-statements (includes all types of branching). +// For now, this just contains the value that is being branched on (the if-condition). +type IfNode struct { + id uint32 + parent *SummaryGraph // the parent of an IfNode is the summary of the function in which it appears + ssaNode *ssa.If // an IfNode must correspond to a specific instruction + out map[GraphNode]EdgeInfo // the out maps the node to other nodes to which data flows + in map[GraphNode]EdgeInfo // the in maps the node to other nodes from which data flows + marks LocSet +} + +// ID returns the integer id of the node in its parent graph. +func (a *IfNode) ID() uint32 { return a.id } + +// Graph returns the parent summary graph of the node. +func (a *IfNode) Graph() *SummaryGraph { return a.parent } + +// Out returns the nodes the graph node's data flows to, with their object path. +func (a *IfNode) Out() map[GraphNode]EdgeInfo { return a.out } + +// In returns the nodes with incoming edges to the current node, with their object path. +func (a *IfNode) In() map[GraphNode]EdgeInfo { return a.in } + +// Marks returns the location information of the node. +func (a *IfNode) Marks() LocSet { return a.marks } + +// SetLocs sets the locations information of the node. +func (a *IfNode) SetLocs(set LocSet) { + if a.marks == nil { + a.marks = map[ssa.Instruction]bool{} + } + funcutil.Merge(a.marks, set, funcutil.First[bool]) +} + +// Type returns the type of the value in the if-condition. +func (a *IfNode) Type() types.Type { return a.ssaNode.Cond.Type() } + +// Position returns the position of the node. +func (a *IfNode) Position(c *AnalyzerState) token.Position { + if a.ssaNode != nil { + return c.Program.Fset.Position(a.ssaNode.Pos()) + } + return lang.DummyPos +} + +// Equal implements equality checking between nodes. +func (a *IfNode) Equal(node GraphNode) bool { + if a2, ok := node.(*IfNode); ok { + return a == a2 + } + return false +} + +// SsaNode returns the if-instruction. +func (a *IfNode) SsaNode() *ssa.If { return a.ssaNode } + +// ParentName returns the name of the parent function. +func (a *IfNode) ParentName() string { + if a.parent != nil && a.parent.Parent != nil { + return a.parent.Parent.Name() + } + return "IfNode" +} + +// LongID returns a string identifier for the node. +func (a *IfNode) LongID() string { + return "#" + strconv.Itoa(int(a.parent.ID)) + "." + strconv.Itoa(int(a.id)) +} diff --git a/analysis/dataflow/intra_procedural.go b/analysis/dataflow/intra_procedural.go index 0f838220..2f0d3e7f 100644 --- a/analysis/dataflow/intra_procedural.go +++ b/analysis/dataflow/intra_procedural.go @@ -152,6 +152,8 @@ func (state *IntraAnalysisState) makeEdgesAtInstruction(_ int, instr ssa.Instruc state.makeEdgesAtReturn(typedInstr) case *ssa.Store: state.makeEdgesAtStoreInCapturedLabel(typedInstr) + case *ssa.If: + state.makeEdgesAtIf(typedInstr) } // Always check if it's a synthetic node state.makeEdgesSyntheticNodes(instr) @@ -310,6 +312,12 @@ func (state *IntraAnalysisState) makeEdgesAtStoreInCapturedLabel(x *ssa.Store) { } } +func (state *IntraAnalysisState) makeEdgesAtIf(x *ssa.If) { + for _, origin := range state.getMarks(x, x.Cond, "", false) { + state.summary.addIfEdge(origin, nil, x) + } +} + // makeEdgesSyntheticNodes analyzes the synthetic func (state *IntraAnalysisState) makeEdgesSyntheticNodes(instr ssa.Instruction) { aState := state.parentAnalyzerState diff --git a/analysis/dataflow/intra_procedural_instruction_ops.go b/analysis/dataflow/intra_procedural_instruction_ops.go index ebf638bc..407758ae 100644 --- a/analysis/dataflow/intra_procedural_instruction_ops.go +++ b/analysis/dataflow/intra_procedural_instruction_ops.go @@ -161,7 +161,7 @@ func (state *IntraAnalysisState) DoStore(x *ssa.Store) { } // DoIf is a no-op -func (state *IntraAnalysisState) DoIf(*ssa.If) { +func (state *IntraAnalysisState) DoIf(x *ssa.If) { } diff --git a/analysis/dataflow/intra_procedural_test.go b/analysis/dataflow/intra_procedural_test.go index cab07577..ef7ea96b 100644 --- a/analysis/dataflow/intra_procedural_test.go +++ b/analysis/dataflow/intra_procedural_test.go @@ -67,7 +67,7 @@ func TestFunctionSummaries(t *testing.T) { if function.Name() == "main" { ok := len(summary.Returns) == 0 // main does not return any data ok = ok && len(summary.Params) == 0 - ok = ok && len(summary.Callees) == 7 // 6 regular function calls + 1 closure + ok = ok && len(summary.Callees) == 8 // 7 regular function calls + 1 closure if !ok { t.Errorf("main graph is not as expected") } @@ -506,6 +506,40 @@ func TestFunctionSummaries(t *testing.T) { t.Errorf("in Baz, a closure freevar outgoing edge should be a return") } } + + if function.Name() == "ImplicitFlow" { + for _, callee := range summary.Callees { + for _, call := range callee { + if call.FuncName() == "Source" { + hasIf := false + for out := range call.Out() { + if _, ok := out.(*dataflow.IfNode); ok { + hasIf = true + } + } + if !hasIf { + t.Errorf("in ImplicitFlow, a call to Source() outgoing edge should be an if node") + } + } + } + } + + for _, ifn := range summary.Ifs { + hasCallNodeIn := false + for in := range ifn.In() { + if call, ok := in.(*dataflow.CallNode); ok { + hasCallNodeIn = call.FuncName() == "Source" + } + } + if !hasCallNodeIn { + t.Errorf("in ImplicitFlow, an if incoming edge should be a call to Source()") + } + + if len(ifn.Out()) != 0 { + t.Errorf("in ImplicitFlow, an if node should have no outgoing edges") + } + } + } } } @@ -534,4 +568,6 @@ func TestStringNilSafety(t *testing.T) { _ = g.String() var c *dataflow.ClosureNode _ = c.String() + var i *dataflow.IfNode + _ = i.String() } diff --git a/analysis/dataflow/mark.go b/analysis/dataflow/mark.go index bc9c06ac..6eaa5c12 100644 --- a/analysis/dataflow/mark.go +++ b/analysis/dataflow/mark.go @@ -45,6 +45,8 @@ const ( Global // Synthetic node type for any other node. Synthetic + // If is an if statement. + If ) func (m MarkType) String() string { @@ -67,6 +69,8 @@ func (m MarkType) String() string { return "global" case Synthetic: return "synthetic" + case If: + return "if" default: return "multiple" } @@ -151,6 +155,11 @@ func (m Mark) IsSynthetic() bool { return m.Type&Synthetic != 0 } +// IsIf returns true if the source is an if condition. +func (m Mark) IsIf() bool { + return m.Type&If != 0 +} + func (m Mark) String() string { str := m.Type.String() + ": " if m.Qualifier != nil { diff --git a/analysis/taint/dataflow_visitor.go b/analysis/taint/dataflow_visitor.go index 76109595..51d6af78 100644 --- a/analysis/taint/dataflow_visitor.go +++ b/analysis/taint/dataflow_visitor.go @@ -83,6 +83,17 @@ func (v *Visitor) Reset() { v.seen = make(map[df.KeyType]bool) } +// CondError represents an error where taint flows to a conditional statement. +type CondError struct { + Cond *ssa.If // Cond is the conditional statement + ParentName string // ParentName is the name of the function in which the conditional statement occurs + Trace string // Trace is a string representing the taint trace +} + +func (e *CondError) Error() string { + return fmt.Sprintf("taint flows to conditional statement %v in function %v: %v", e.Cond, e.ParentName, e.Trace) +} + // Visit runs an inter-procedural analysis to add any detected taint flow from currentSource to a sink. This implements // the visitor interface of the dataflow package. // @@ -628,6 +639,21 @@ func (v *Visitor) Visit(s *df.AnalyzerState, source df.NodeWithTrace) { TracingInfo: cur.Status.TracingInfo.Next(closureNode.ClosureSummary, graphNode.Index()), }, df.EdgeInfo{}) + case *df.IfNode: + // If only explicit taint flows should be tracked, + // then don't track flow inside of conditionals (information flow) + if v.taintSpec.ExplicitFlowOnly { + break + } + + // taint is expected to flow to validators + if isValidatorCondition(v.taintSpec, graphNode.SsaNode().Cond, true) { + break + } + + err := &CondError{Cond: graphNode.SsaNode(), ParentName: cur.Node.ParentName(), Trace: cur.Trace.SummaryString()} + logger.Warnf("%v\n", err) + s.AddError("cond", err) } } diff --git a/analysis/taint/taint_test.go b/analysis/taint/taint_test.go index 6ed7be78..3ad48ac5 100644 --- a/analysis/taint/taint_test.go +++ b/analysis/taint/taint_test.go @@ -38,7 +38,8 @@ func TestTaint(t *testing.T) { }, { name: "basic", - args: args{"basic", + args: args{ + "basic", []string{ "bar.go", "example.go", @@ -47,8 +48,10 @@ func TestTaint(t *testing.T) { "fields.go", "sanitizers.go", "memory.go", - "channels.go"}, - noErrorExpected}, + "channels.go", + }, + noErrorExpected, + }, }, { name: "builtins", @@ -84,11 +87,19 @@ func TestTaint(t *testing.T) { }, { name: "example1", - args: args{"example1", []string{}, noErrorExpected}, + args: args{ + "example1", + []string{}, + expectTaintedCondInFuncs("source"), + }, }, { name: "example2", - args: args{"example2", []string{}, noErrorExpected}, + args: args{ + "example2", + []string{}, + expectTaintedCondInFuncs("rec", "extract", "main"), + }, }, { name: "closures", @@ -108,7 +119,11 @@ func TestTaint(t *testing.T) { }, { name: "validators", - args: args{"validators", []string{"values.go"}, noErrorExpected}, + args: args{ + "validators", + []string{"values.go"}, + expectTaintedCondInFuncs("validatorExample4", "Validate2"), + }, }, { name: "taint with filters", @@ -124,7 +139,18 @@ func TestTaint(t *testing.T) { }, { name: "fromlevee", - args: args{"fromlevee", []string{}, noErrorExpected}, + args: args{ + "fromlevee", + []string{}, + expectTaintedCondInFuncs( + "TestRangeOverMapWithSourceAsKey", + "TestRangeOverMapWithSourceAsValue", + "TestSinkAfterTaintInFor", + "TestRangeOverChan", + "TestRangeOverSlice", + "TestRangeOverInterfaceSlice", + ), + }, }, { name: "globals", @@ -132,11 +158,27 @@ func TestTaint(t *testing.T) { }, { name: "complex functionality example", - args: args{"agent-example", []string{}, noErrorExpected}, + args: args{ + "agent-example", + []string{}, + noErrorExpected, // config specifies explicit flow only + }, }, { name: "benchmarking example", - args: args{"benchmark", []string{}, noErrorExpected}, + args: args{ + "benchmark", + []string{}, + noErrorExpected, // config specifies explicit flow only + }, + }, + { + name: "implicit flow", + args: args{ + "implicit-flow", + []string{}, + expectTaintedCondInFuncs("example1", "example2", "switchByte"), + }, }, } diff --git a/analysis/taint/taint_utils_test.go b/analysis/taint/taint_utils_test.go index 4f776796..e4739e95 100644 --- a/analysis/taint/taint_utils_test.go +++ b/analysis/taint/taint_utils_test.go @@ -15,6 +15,7 @@ package taint import ( + "errors" "fmt" "os" "path/filepath" @@ -213,6 +214,27 @@ func noErrorExpected(_ error) bool { return false } +// expectTaintCondInFuncs returns a function that returns true when the supplied +// CondError's callee is in funcNames. +// +// Note: the tests are implemented this way because *ssa.If does not store any position data +func expectTaintedCondInFuncs(funcNames ...string) func(error) bool { + return func(err error) bool { + var e *CondError + if !errors.As(err, &e) { + return false + } + + for _, calleeName := range funcNames { + if e.ParentName == calleeName { + return true + } + } + + return false + } +} + // runTest runs a test instance by building the program from all the files in files plus a file "main.go", relative // to the directory dirName func runTest(t *testing.T, dirName string, files []string, summarizeOnDemand bool, errorExpected func(e error) bool) { @@ -231,9 +253,9 @@ func runTest(t *testing.T, dirName string, files []string, summarizeOnDemand boo cfg.SummarizeOnDemand = summarizeOnDemand result, err := Analyze(cfg, program) if err != nil { - for errs := result.State.CheckError(); len(errs) > 0; errs = result.State.CheckError() { - if !errorExpected(errs[0]) { - t.Fatalf("taint analysis returned error: %v", errs[0]) + for _, err := range result.State.CheckError() { + if !errorExpected(err) { + t.Errorf("taint analysis returned error: %v", err) } } } diff --git a/cmd/taint/main.go b/cmd/taint/main.go index 601467d0..418a21d4 100644 --- a/cmd/taint/main.go +++ b/cmd/taint/main.go @@ -93,7 +93,10 @@ func main() { result, err := taint.Analyze(taintConfig, program) duration := time.Since(start) if err != nil { - fmt.Fprintf(os.Stderr, "analysis failed: %v\n", err) + fmt.Fprintf(os.Stderr, "analysis failed:\n") + for _, err := range result.State.CheckError() { + fmt.Fprintf(os.Stderr, "\t%v\n", err) + } return } result.State.Logger.Infof("") diff --git a/doc/01_taint.md b/doc/01_taint.md index 645f296c..b12e2f46 100644 --- a/doc/01_taint.md +++ b/doc/01_taint.md @@ -113,7 +113,17 @@ This implies that any method whose receiver implements the `mypackage.interfaceN ### Controlling The Data Flow Search -The configuration contains some specific fields that allow users to tune how the analysis searches for data flows. Those options, if not set to their default value, will cause the analysis to possible ignore some tainted flows. However, this can be useful when the analysis reports false positives and the user wants to trade soundness for precision. +The configuration contains some specific fields that allow users to tune how the analysis searches for data flows. Those options, if not set to their default value, will cause the analysis to possibly ignore some tainted flows. However, this can be useful when the analysis reports false positives and the user wants to trade soundness for precision. + +#### Implicit flows +Some flows can be implicit: for example, branching on tainted data. See the `implicit-flow` test for an example. +By default, the taint analysis will produce an error if tainted data is branched on. +If the tool should only track explicit taint flows, set the `explicit-flow-only` option to `true`: +```yaml +taint-tracking-problems: + - + explicit-flow-only: true +``` #### Filters By default, the analysis considers that any type can carry tainted data. In some cases, this can be excessive, as one might not see boolean values as tainted data (for example, a boolean cannot store a user's password). In order to ignore flows that pass through variables of certain types, one add filters. Filters are either a *type* or a *method*, optionally within a *package*. A type filter will cause the tool to ignore data flows through objects of that type, and a method filter will cause the tool to ignore data flows through that method (or function). @@ -249,6 +259,35 @@ The taint analysis tool can detect such flows with many intermediate calls. > 📝 To limit the size of the traces reported by the tool, one can limit how many functions deep the trace can be using the `max-depth: [some integer]` option in the configuration file. Note that if this option is used, then the tool may not report some taint flows. In the previous example, the trace would not be reported if the configuration file sets `maxdepth: 2`. +### Implicit Information Flow + +Consider the following example: + +```go +func main() { + data := source() + newData := make([]byte, len(data)) + // loop over tainted data + for i, b := range data { + switch b { + case 0x62: // 'b' + newData[i] = 0x62 + case 0x61: // 'a' + newData[i] = 0x61 + case 0x64: // 'd' + newData[i] = 0x64 + default: + newData[i] = 0x0 + } + } + + sink(newData) +} +``` + +There is an "information leak" from `source` to `sink` because the individual bytes from `data` are copied over to `newData` without explicitly assigning any data from `data` to `newData`. To detect such implicit information flows, the taint analysis logs a warning when tainted data flows to a branch statement (`if`, `switch`, `select`, etc.) or loop (which involves a branch in SSA form). The analysis also returns an error. + +> 📝 The taint analysis is unsound in the presence of other types of implicit data flows called [side channels](https://en.wikipedia.org/wiki/Side-channel_attack). ### Field Sensitivity diff --git a/testdata/config-examples/config3.yaml b/testdata/config-examples/config3.yaml index 91f90a80..56e5e6ba 100644 --- a/testdata/config-examples/config3.yaml +++ b/testdata/config-examples/config3.yaml @@ -22,3 +22,5 @@ taint-tracking-problems: field: OneField - package: some/other/package interface: Interface + + explicit-flow-only: true diff --git a/testdata/src/dataflow/closures/main.go b/testdata/src/dataflow/closures/main.go index dd62f528..b7d7f4eb 100644 --- a/testdata/src/dataflow/closures/main.go +++ b/testdata/src/dataflow/closures/main.go @@ -366,6 +366,7 @@ func example17() { data := "ok" a := func() { data = "ok" } b := func() { data = source() } // @Source(example17) + // loop over tainted data for _, f := range []func(){b, a} { f() } diff --git a/testdata/src/dataflow/summaries/main.go b/testdata/src/dataflow/summaries/main.go index c096ee6e..fe0af6fb 100644 --- a/testdata/src/dataflow/summaries/main.go +++ b/testdata/src/dataflow/summaries/main.go @@ -90,6 +90,13 @@ func Baz(x string) { Sink(s5) } +func ImplicitFlow() { + s := Source() + if len(s) == 0 { + Sink("") + } +} + func main() { s := fmt.Sprintf("bad-%s", "data") s2 := "example2" @@ -102,4 +109,5 @@ func main() { go ChannelConsumer(c) x := Source() Baz(x) + ImplicitFlow() } diff --git a/testdata/src/taint/agent-example/config.yaml b/testdata/src/taint/agent-example/config.yaml index 2817388b..3dd66768 100644 --- a/testdata/src/taint/agent-example/config.yaml +++ b/testdata/src/taint/agent-example/config.yaml @@ -7,6 +7,7 @@ taint-tracking-problems: interface: "Logger" - method: "sink" package: "((main)|(command-line-arguments)|(agent-example))" + explicit-flow-only: true options: log-level: 4 diff --git a/testdata/src/taint/agent-example/messaging/messageutil.go b/testdata/src/taint/agent-example/messaging/messageutil.go index f952a7ae..78e37fac 100644 --- a/testdata/src/taint/agent-example/messaging/messageutil.go +++ b/testdata/src/taint/agent-example/messaging/messageutil.go @@ -73,8 +73,8 @@ func ParseSendCommandMessage(context context.Context, msg InstanceMessage, messa logger := log.Default() commandID, _ := GetCommandID(msg.MessageId) - logger.Printf("Processing send command message ", msg.MessageId) - logger.Printf("Processing send command payload: ", msg.Payload) + logger.Printf("Processing send command message: %v\n", msg.MessageId) + logger.Printf("Processing send command payload: %v\n", msg.Payload) // parse message to retrieve parameters var parsedMessage SendCommandPayload diff --git a/testdata/src/taint/basic/config.yaml b/testdata/src/taint/basic/config.yaml index 210610e2..0500ec86 100644 --- a/testdata/src/taint/basic/config.yaml +++ b/testdata/src/taint/basic/config.yaml @@ -16,6 +16,7 @@ taint-tracking-problems: sanitizers: - package: "(main)|(command-line-arguments)" method: ".*(s|S)anitize[1-9]?" + explicit-flow-only: true options: verbose: true diff --git a/testdata/src/taint/benchmark/config.yaml b/testdata/src/taint/benchmark/config.yaml index b4735359..4e83b440 100644 --- a/testdata/src/taint/benchmark/config.yaml +++ b/testdata/src/taint/benchmark/config.yaml @@ -11,7 +11,8 @@ taint-tracking-problems: - package: "(main)|(command-line-arguments)|(git.amazon.com[[:graph:]]*)$" # Similarly, sinks are sink1 sink2 sink2 ... method: "sink[1-9]?" + explicit-flow-only: true options: report-paths: true - log-level: 4 \ No newline at end of file + log-level: 4 diff --git a/testdata/src/taint/closures/config.yaml b/testdata/src/taint/closures/config.yaml index e2bd0ed8..c5a3328b 100644 --- a/testdata/src/taint/closures/config.yaml +++ b/testdata/src/taint/closures/config.yaml @@ -9,6 +9,7 @@ taint-tracking-problems: - package: "(main)|(command-line-arguments)|(git.amazon.com[[:graph:]]*)$" # Similarly, sinks are sink1 sink2 sink2 ... method: "sink[1-9]?" + explicit-flow-only: true # Same as sinks diff --git a/testdata/src/taint/closures_flowprecise/config.yaml b/testdata/src/taint/closures_flowprecise/config.yaml index 659818c7..6701cc5b 100644 --- a/testdata/src/taint/closures_flowprecise/config.yaml +++ b/testdata/src/taint/closures_flowprecise/config.yaml @@ -9,6 +9,7 @@ taint-tracking-problems: - package: "(main)|(command-line-arguments)|(git.amazon.com[[:graph:]]*)$" # Similarly, sinks are sink1 sink2 sink2 ... method: "sink[1-9]?" + explicit-flow-only: true # Same as sinks slicing-problems: diff --git a/testdata/src/taint/escape-integration/config.yaml b/testdata/src/taint/escape-integration/config.yaml index e6345d69..f434cef0 100644 --- a/testdata/src/taint/escape-integration/config.yaml +++ b/testdata/src/taint/escape-integration/config.yaml @@ -9,8 +9,9 @@ taint-tracking-problems: - package: "(main)|(command-line-arguments)|(git.amazon.com[[:graph:]]*)$" # Similarly, sinks are sink1 sink2 sink2 ... method: "sink[1-9]?" + explicit-flow-only: true options: report-paths: true use-escape-analysis: true - escape-config: "escape-config.json" \ No newline at end of file + escape-config: "escape-config.json" diff --git a/testdata/src/taint/example0/main.go b/testdata/src/taint/example0/main.go index 2b33a3ce..9cb7ee47 100644 --- a/testdata/src/taint/example0/main.go +++ b/testdata/src/taint/example0/main.go @@ -25,10 +25,6 @@ type nestedStruct struct { A string } -func source() string { - return "tainted data" -} - func source2() nestedStruct { return nestedStruct{ Ex: example{}, diff --git a/testdata/src/taint/example1/main.go b/testdata/src/taint/example1/main.go index b6c0000a..49fb1be0 100644 --- a/testdata/src/taint/example1/main.go +++ b/testdata/src/taint/example1/main.go @@ -35,6 +35,7 @@ type fooProducer struct { } func (f fooProducer) source() wrappedString { + // branch on tainted data if f.I > 0 && f.I < 10 { return wrappedString{Content: strings.Repeat("x", f.I)} } else { diff --git a/testdata/src/taint/example2/main.go b/testdata/src/taint/example2/main.go index 760ddbfd..15950e20 100644 --- a/testdata/src/taint/example2/main.go +++ b/testdata/src/taint/example2/main.go @@ -20,6 +20,7 @@ import ( ) func extract(name string) string { + // branch on tainted data if !strings.HasSuffix(name, ").Error") { return "" } @@ -47,6 +48,7 @@ func sink1(x any) { } func rec(x string) string { + // branch on tainted data switch x { case "": return rec("b") @@ -64,6 +66,7 @@ func rec(x string) string { func main() { x := fmt.Sprintf("%s", rec(rec(source1()))) // @Source(source1) y := extract(x) + // branch on tainted data if len(y) > 2 { return } diff --git a/testdata/src/taint/fields/config.yaml b/testdata/src/taint/fields/config.yaml index d34afa5e..c335f751 100644 --- a/testdata/src/taint/fields/config.yaml +++ b/testdata/src/taint/fields/config.yaml @@ -11,7 +11,8 @@ taint-tracking-problems: - package: "(main)|(command-line-arguments)" # Similarly, sinks are sink1 sink2 sink2 ... method: "sink[1-9]?" + explicit-flow-only: true options: report-paths: true - field-sensitive: true \ No newline at end of file + field-sensitive: true diff --git a/testdata/src/taint/fromlevee/callorder/multiblock.go b/testdata/src/taint/fromlevee/callorder/multiblock.go index 9f2c0df9..e62ea713 100644 --- a/testdata/src/taint/fromlevee/callorder/multiblock.go +++ b/testdata/src/taint/fromlevee/callorder/multiblock.go @@ -145,6 +145,7 @@ func TestSinkAfterTaintInFor() { sources[i] = core.Source() // @Source(mb6) } + // branch on tainted data for i := 0; i < len(sources); i++ { fmt.Fprintf(w, "%v", sources[i]) } diff --git a/testdata/src/taint/fromlevee/collections/chans.go b/testdata/src/taint/fromlevee/collections/chans.go index 8e530c56..cea6799d 100644 --- a/testdata/src/taint/fromlevee/collections/chans.go +++ b/testdata/src/taint/fromlevee/collections/chans.go @@ -42,6 +42,7 @@ func TestChannelIsNoLongerTaintedWhenNilledOut(sources chan core.SourceT) { } func TestRangeOverChan(sources chan core.SourceT) { + // loop over tainted channel for s := range sources { core.Sink(s) // @Sink(c) } diff --git a/testdata/src/taint/fromlevee/collections/maps.go b/testdata/src/taint/fromlevee/collections/maps.go index 7eb265ed..e7015613 100644 --- a/testdata/src/taint/fromlevee/collections/maps.go +++ b/testdata/src/taint/fromlevee/collections/maps.go @@ -73,6 +73,7 @@ func TestMapUpdateWithTaintedKeyDoesNotTaintTheValue(key core.SourceT, value str func TestRangeOverMapWithSourceAsValue() { m := map[string]core.SourceT{"secret": core.Source2()} // @Source(map8) + // loop over tainted data for k, s := range m { core.Sink(s) // @Sink(map8) core.Sink(k) // @Sink(map8) TODO: our analysis is not sensitive to key/values, but levee is @@ -81,6 +82,7 @@ func TestRangeOverMapWithSourceAsValue() { func TestRangeOverMapWithSourceAsKey() { m := map[core.SourceT]string{core.Source2(): "don't sink me"} // @Source(map9) + // branch over tainted data for src, str := range m { core.Sink(src) // @Sink(map9) core.Sink(str) // @Sink(map9) TODO: our analysis is not sensitive to key/values, but levee is diff --git a/testdata/src/taint/fromlevee/collections/slices.go b/testdata/src/taint/fromlevee/collections/slices.go index c4f093d4..5db071c6 100644 --- a/testdata/src/taint/fromlevee/collections/slices.go +++ b/testdata/src/taint/fromlevee/collections/slices.go @@ -30,6 +30,7 @@ func TestSlices(s core.SourceT) { func TestRangeOverSlice() { sources := []core.SourceT{core.Source2()} // @Source(sl2) + // loop over tainted data for i, s := range sources { core.Sink(s) // @Sink(sl2) core.Sink(i) @@ -37,6 +38,7 @@ func TestRangeOverSlice() { } func TestRangeOverInterfaceSlice() { + // loop over tainted data for i, s := range []interface{}{core.Source2()} { // @Source(sl3) core.Sink(s) // @Sink(sl3) core.Sink(i) diff --git a/testdata/src/taint/globals/config.yaml b/testdata/src/taint/globals/config.yaml index 41be4e89..b3af6f48 100644 --- a/testdata/src/taint/globals/config.yaml +++ b/testdata/src/taint/globals/config.yaml @@ -7,6 +7,7 @@ taint-tracking-problems: method: "source[1-9]?" sinks: - method: "sink[1-9]?" + explicit-flow-only: true options: - report-summaries: true \ No newline at end of file + report-summaries: true diff --git a/testdata/src/taint/implicit-flow/config.yaml b/testdata/src/taint/implicit-flow/config.yaml new file mode 100644 index 00000000..0f6efef8 --- /dev/null +++ b/testdata/src/taint/implicit-flow/config.yaml @@ -0,0 +1,13 @@ +taint-tracking-problems: + - # The package regex matches all possible ways the package name might appear depending on how the program is loaded + sources: + - package: "(main)|(command-line-arguments)|(git.amazon.com[[:graph:]]*)$" + # Sources can be source1, source2, etc. + method: "source[1-9]?" + sinks: + - package: "(main)|(command-line-arguments)|(git.amazon.com[[:graph:]]*)$" + # Similarly, sinks are sink1 sink2 sink2 ... + method: "sink[1-9]?" + +options: + # report-paths: true diff --git a/testdata/src/taint/implicit-flow/go.mod b/testdata/src/taint/implicit-flow/go.mod new file mode 100644 index 00000000..4683887b --- /dev/null +++ b/testdata/src/taint/implicit-flow/go.mod @@ -0,0 +1,3 @@ +module implicit-flow + +go 1.19 diff --git a/testdata/src/taint/implicit-flow/main.go b/testdata/src/taint/implicit-flow/main.go new file mode 100644 index 00000000..6871c78a --- /dev/null +++ b/testdata/src/taint/implicit-flow/main.go @@ -0,0 +1,77 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" +) + +func example1() { + data := source() // @Source(ex1) + newData := make([]byte, len(data)) + // loop over tainted data + for i, b := range data { + switch b { + case 0x62: // 'b' + newData[i] = 0x62 + case 0x61: // 'a' + newData[i] = 0x61 + case 0x64: // 'd' + newData[i] = 0x64 + default: + newData[i] = 0x0 + } + } + + sink(newData) +} + +func example2() { + data := source() // @Source(ex2) + newData := make([]byte, len(data)) + // loop over tainted data + for i, b := range data { + switchByte(b, newData, i) + } + + sink(newData) +} + +func switchByte(b byte, newData []byte, i int) { + // branch on tainted data + switch b { + case 0x62: // 'b' + newData[i] = 0x62 + case 0x61: // 'a' + newData[i] = 0x61 + case 0x64: // 'd' + newData[i] = 0x64 + default: + newData[i] = 0x0 + } +} + +func main() { + example1() + example2() +} + +func source() []byte { + return []byte("bad") +} + +func sink(b []byte) { + fmt.Println(string(b)) +} diff --git a/testdata/src/taint/interface-summaries/config.yaml b/testdata/src/taint/interface-summaries/config.yaml index 65c5641a..8431538b 100644 --- a/testdata/src/taint/interface-summaries/config.yaml +++ b/testdata/src/taint/interface-summaries/config.yaml @@ -13,6 +13,7 @@ taint-tracking-problems: - package: "(main)|(command-line-arguments)|(git.amazon.com[[:graph:]]*)$" # Similarly, sinks are sink1 sink2 sink2 ... method: "sink[1-9]?" + explicit-flow-only: true dataflow-specs: - "dataflows.json" diff --git a/testdata/src/taint/intra-procedural/config.yaml b/testdata/src/taint/intra-procedural/config.yaml index 775ee1dd..f358e9f9 100644 --- a/testdata/src/taint/intra-procedural/config.yaml +++ b/testdata/src/taint/intra-procedural/config.yaml @@ -8,6 +8,7 @@ taint-tracking-problems: sinks: - package: "((main)|(command-line-arguments))" method: sink[1-9] + explicit-flow-only: true options: skip-interprocedural: false diff --git a/testdata/src/taint/parameters/config.yaml b/testdata/src/taint/parameters/config.yaml index d2b0fe47..e16b1202 100644 --- a/testdata/src/taint/parameters/config.yaml +++ b/testdata/src/taint/parameters/config.yaml @@ -10,6 +10,7 @@ taint-tracking-problems: # Similarly, sinks are sink1 sink2 sink2 ... method: "sink[1-9]?" - method: "Start" + explicit-flow-only: true options: verbose: true diff --git a/testdata/src/taint/selects/config.yaml b/testdata/src/taint/selects/config.yaml index f83c9c70..d2c219fb 100644 --- a/testdata/src/taint/selects/config.yaml +++ b/testdata/src/taint/selects/config.yaml @@ -7,7 +7,8 @@ taint-tracking-problems: method: "source[1-9]?" sinks: - method: "sink[1-9]?" + explicit-flow-only: true options: report-summaries: true - log-level: 4 \ No newline at end of file + log-level: 4 diff --git a/testdata/src/taint/stdlib_121/config.yaml b/testdata/src/taint/stdlib_121/config.yaml index 3b316223..b35ba65a 100644 --- a/testdata/src/taint/stdlib_121/config.yaml +++ b/testdata/src/taint/stdlib_121/config.yaml @@ -12,6 +12,7 @@ taint-tracking-problems: sanitizers: - package: "(main)|(command-line-arguments)|(git.amazon.com[[:graph:]]*)$" method: ".*(s|S)anitize[1-9]?" + explicit-flow-only: true options: verbose: true diff --git a/testdata/src/taint/validators/main.go b/testdata/src/taint/validators/main.go index a9bdfbee..f390450a 100644 --- a/testdata/src/taint/validators/main.go +++ b/testdata/src/taint/validators/main.go @@ -118,6 +118,7 @@ func pass(i int, x string) (bool, string) { func validatorExample4() { a := source1() // @Source(ex4) ok, b := pass(2, a) + // branch on tainted data TODO: validators on part of the argument if ok { sink1(b) // @Sink(ex4) TODO: validators on part of the argument } @@ -147,6 +148,7 @@ type A struct { } func Validate2(a A) bool { + // branch on tainted data return a.X > 0 && Validate(a.Y) } diff --git a/testdata/src/taint/with-context/config.yaml b/testdata/src/taint/with-context/config.yaml index 30dccdd3..0158b10a 100644 --- a/testdata/src/taint/with-context/config.yaml +++ b/testdata/src/taint/with-context/config.yaml @@ -10,3 +10,4 @@ taint-tracking-problems: - package: "(main)|(command-line-arguments)|(example1)$" # Similarly, sinks are sink1 sink2 sink2 ... method: "sink[1-9]?" + explicit-flow-only: true From b5ba78447a33fa4a609e62e594816328be3d4d91 Mon Sep 17 00:00:00 2001 From: Samarth Kishor Date: Fri, 16 Feb 2024 14:09:02 -0500 Subject: [PATCH 2/3] Add position information to taint.CondError --- analysis/dataflow/function_summary_graph_nodes.go | 13 ++++++++++--- analysis/taint/dataflow_visitor.go | 13 ++++++++----- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/analysis/dataflow/function_summary_graph_nodes.go b/analysis/dataflow/function_summary_graph_nodes.go index 226c80f8..5c63745e 100644 --- a/analysis/dataflow/function_summary_graph_nodes.go +++ b/analysis/dataflow/function_summary_graph_nodes.go @@ -1055,10 +1055,17 @@ func (a *IfNode) Type() types.Type { return a.ssaNode.Cond.Type() } // Position returns the position of the node. func (a *IfNode) Position(c *AnalyzerState) token.Position { - if a.ssaNode != nil { - return c.Program.Fset.Position(a.ssaNode.Pos()) + cond := a.ssaNode + if cond == nil { + return lang.DummyPos } - return lang.DummyPos + + pos := c.Program.Fset.Position(cond.Pos()) + if !pos.IsValid() { + pos = c.Program.Fset.Position(cond.Cond.Pos()) + } + + return pos } // Equal implements equality checking between nodes. diff --git a/analysis/taint/dataflow_visitor.go b/analysis/taint/dataflow_visitor.go index 51d6af78..d8ec36cb 100644 --- a/analysis/taint/dataflow_visitor.go +++ b/analysis/taint/dataflow_visitor.go @@ -85,13 +85,14 @@ func (v *Visitor) Reset() { // CondError represents an error where taint flows to a conditional statement. type CondError struct { - Cond *ssa.If // Cond is the conditional statement - ParentName string // ParentName is the name of the function in which the conditional statement occurs - Trace string // Trace is a string representing the taint trace + Cond *ssa.If // Cond is the conditional statement + ParentName string // ParentName is the name of the function in which the conditional statement occurs + Trace string // Trace is a string representing the taint trace + Pos token.Position // Pos is the position } func (e *CondError) Error() string { - return fmt.Sprintf("taint flows to conditional statement %v in function %v: %v", e.Cond, e.ParentName, e.Trace) + return fmt.Sprintf("taint flows to conditional statement %v in function %v: %v\n\tat %v", e.Cond, e.ParentName, e.Trace, e.Pos) } // Visit runs an inter-procedural analysis to add any detected taint flow from currentSource to a sink. This implements @@ -651,7 +652,9 @@ func (v *Visitor) Visit(s *df.AnalyzerState, source df.NodeWithTrace) { break } - err := &CondError{Cond: graphNode.SsaNode(), ParentName: cur.Node.ParentName(), Trace: cur.Trace.SummaryString()} + cond := graphNode.SsaNode() + pos := graphNode.Position(s) + err := &CondError{Cond: cond, ParentName: cur.Node.ParentName(), Trace: cur.Trace.SummaryString(), Pos: pos} logger.Warnf("%v\n", err) s.AddError("cond", err) } From e78c146e42760b041d4963175e5075071afbc6d5 Mon Sep 17 00:00:00 2001 From: Samarth Kishor Date: Mon, 19 Feb 2024 16:16:24 -0500 Subject: [PATCH 3/3] Add test for implicit flow via select --- analysis/taint/taint_test.go | 2 +- testdata/src/taint/implicit-flow/main.go | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/analysis/taint/taint_test.go b/analysis/taint/taint_test.go index 3ad48ac5..3d4f9c9b 100644 --- a/analysis/taint/taint_test.go +++ b/analysis/taint/taint_test.go @@ -177,7 +177,7 @@ func TestTaint(t *testing.T) { args: args{ "implicit-flow", []string{}, - expectTaintedCondInFuncs("example1", "example2", "switchByte"), + expectTaintedCondInFuncs("example1", "example2", "example3", "switchByte"), }, }, } diff --git a/testdata/src/taint/implicit-flow/main.go b/testdata/src/taint/implicit-flow/main.go index 6871c78a..20743c92 100644 --- a/testdata/src/taint/implicit-flow/main.go +++ b/testdata/src/taint/implicit-flow/main.go @@ -49,6 +49,19 @@ func example2() { sink(newData) } +func example3() { + c1 := make(chan []byte) + c1 <- source() + c2 := make(chan []byte) + // select tainted channel + select { + case <-c1: + fmt.Println("tainted channel") + case <-c2: + fmt.Println("safe channel") + } +} + func switchByte(b byte, newData []byte, i int) { // branch on tainted data switch b { @@ -66,6 +79,7 @@ func switchByte(b byte, newData []byte, i int) { func main() { example1() example2() + example3() } func source() []byte {