From 1d0446aa8d453af35bbacb9303b41fad3c176b2c Mon Sep 17 00:00:00 2001 From: Victor Nicolet Date: Thu, 24 Oct 2024 09:41:51 -0400 Subject: [PATCH] Using argot:ignore annotations in the taint analysis. --- analysis/annotations/annotations.go | 59 ++++++++++++++++++--- analysis/annotations/annotations_test.go | 26 +++++++++ analysis/annotations/testdata/main.go | 4 +- analysis/dataflow/inter_procedural.go | 5 +- analysis/dataflow/state.go | 3 +- analysis/taint/dataflow_visitor.go | 25 +++++---- analysis/taint/report.go | 3 +- analysis/taint/taint.go | 1 + analysis/taint/testdata/annotations/main.go | 4 +- internal/funcutil/collections.go | 10 ++++ 10 files changed, 117 insertions(+), 23 deletions(-) diff --git a/analysis/annotations/annotations.go b/analysis/annotations/annotations.go index d3ad4eca..40bc5b1b 100644 --- a/analysis/annotations/annotations.go +++ b/analysis/annotations/annotations.go @@ -17,7 +17,9 @@ package annotations import ( "fmt" "go/ast" + "go/token" "regexp" + "strconv" "strings" "github.com/awslabs/ar-go-tools/analysis/config" @@ -41,6 +43,8 @@ const ( Sanitizer // SetOptions is the kind of SetOptions(...) annotations SetOptions + // Ignore is an empty annotation for a line + Ignore ) // AnyTag is the special tag used to match any other tag @@ -98,6 +102,23 @@ func (a Annotation) IsMatchingAnnotation(kind AnnotationKind, tag string) bool { return a.Kind == kind && (tag == AnyTag || (len(a.Tags) > 0 && a.Tags[0] == AnyTag) || slices.Contains(a.Tags, tag)) } +type LinePos struct { + Line int + File string +} + +// NewLinePos returns a LinePos from a token position. The column and offset are abstracted away. +func NewLinePos(pos token.Position) LinePos { + return LinePos{ + Line: pos.Line, + File: pos.Filename, + } +} + +func (l LinePos) String() string { + return l.File + ":" + strconv.Itoa(l.Line) +} + // A FunctionAnnotation groups the annotations relative to a function into main annotations for the entire function // and parameter annotations for each parameter type FunctionAnnotation struct { @@ -152,6 +173,17 @@ type ProgramAnnotations struct { Consts map[*ssa.NamedConst][]Annotation // Globals is the map of global variable annotations (TODO: implementation) Globals map[*ssa.Global][]Annotation + // Positional is the map of line-file location to annotations that are not attached to a specific construct. + // There can be only one annotation per line. + Positional map[LinePos]Annotation +} + +// IsIgnoredPos returns true when the given position is on the same line as an //argot:ignore annotation. +func (pa ProgramAnnotations) IsIgnoredPos(pos token.Position, tag string) bool { + if posAnnot, hasPosAnnot := pa.Positional[NewLinePos(pos)]; hasPosAnnot { + return posAnnot.Kind == Ignore && (posAnnot.Tags[0] == tag || posAnnot.Tags[0] == AnyTag) + } + return false } // Count returns the total number of annotations in the program @@ -189,7 +221,7 @@ func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkgs [] for _, comments := range astFile.Comments { for _, comment := range comments.List { if annotationContents := extractAnnotation(comment); annotationContents != nil { - pa.loadFileAnnotations(logger, annotationContents, pkg.Fset.Position(comment.Pos()).String()) + pa.loadFileAnnotations(logger, annotationContents, pkg.Fset.Position(comment.Pos())) } } } @@ -205,11 +237,12 @@ func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkgs [] // will also print warnings when some syntactic components of the comments look like they should be an annotation. func LoadAnnotations(logger *config.LogGroup, packages []*ssa.Package) (ProgramAnnotations, error) { annotations := ProgramAnnotations{ - Configs: map[string]map[string]string{}, - Funcs: map[*ssa.Function]FunctionAnnotation{}, - Types: map[*ssa.Type][]Annotation{}, - Consts: map[*ssa.NamedConst][]Annotation{}, - Globals: map[*ssa.Global][]Annotation{}, + Configs: map[string]map[string]string{}, + Funcs: map[*ssa.Function]FunctionAnnotation{}, + Types: map[*ssa.Type][]Annotation{}, + Consts: map[*ssa.NamedConst][]Annotation{}, + Globals: map[*ssa.Global][]Annotation{}, + Positional: map[LinePos]Annotation{}, } for _, pkg := range packages { for _, m := range pkg.Members { @@ -287,7 +320,7 @@ func (pa ProgramAnnotations) loadPackageDocAnnotations(doc *ast.CommentGroup) { // loadFileAnnotations loads the annotation that are not tied to a specific ssa node. This includes: // - config annotations // - positional annotations -func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annotationContents []string, position string) { +func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annotationContents []string, position token.Position) { if len(annotationContents) <= 1 { logger.Warnf("ignoring argot annotation with no arguments at %s", position) return @@ -295,13 +328,23 @@ func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annota switch annotationContents[0] { case ConfigTarget: pa.loadConfigTargetAnnotation(logger, annotationContents, position) + case IgnoreTarget: + pa.addIgnoreLineAnnotation(position, annotationContents) + } +} + +// addIgnoreLineAnnotation adds a Ignore annotation at the position's line +func (pa ProgramAnnotations) addIgnoreLineAnnotation(position token.Position, annotationContents []string) { + pa.Positional[NewLinePos(position)] = Annotation{ + Kind: Ignore, + Tags: []string{annotationContents[1]}, // only the tag is used } } // loadConfigTargetAnnotation loads a config annotation. Config annotations look like // "//argot:config tag SetOptions(option-name-1=value1,option-name-2=value2)" and are always linked to a specific problem // tag. -func (pa ProgramAnnotations) loadConfigTargetAnnotation(logger *config.LogGroup, annotationContents []string, position string) { +func (pa ProgramAnnotations) loadConfigTargetAnnotation(logger *config.LogGroup, annotationContents []string, position token.Position) { if len(annotationContents) <= 2 { logger.Warnf("argot:config expects a target tag and one or more SetOptions") logger.Warnf("a comment is likely missing something at %s", position) diff --git a/analysis/annotations/annotations_test.go b/analysis/annotations/annotations_test.go index b7173886..b45e1e1a 100644 --- a/analysis/annotations/annotations_test.go +++ b/analysis/annotations/annotations_test.go @@ -36,9 +36,35 @@ func TestLoadAnnotations(t *testing.T) { testProgram.Prog.Build() logger := config.NewLogGroup(testProgram.Config) a, err := annotations.LoadAnnotations(logger, testProgram.Prog.AllPackages()) + a.CompleteFromSyntax(logger, testProgram.Pkgs) if err != nil { t.Errorf("error loading annotations: %s", err) } + + // Positional annotations like //argot:ignore + t.Logf("%d positional annotations", len(a.Positional)) + if !funcutil.ExistsInMap(a.Positional, func(k annotations.LinePos, _ annotations.Annotation) bool { + return k.Line == 43 + }) { + t.Errorf("Expected annotation at line 43.") + } + + for pos, annot := range a.Positional { + if pos.Line == 43 && (annot.Kind != annotations.Ignore || !funcutil.Contains(annot.Tags, "_")) { + t.Errorf("Expected annotation at line 43 to be //argot:ignore _ but its %v", annot) + } + } + + // Config annotations like //argot:config + t.Logf("%d config annotations", len(a.Configs)) + if _, hasOpt := a.Configs["_"]; !hasOpt { + t.Errorf("expected a config option for _") + } + if a.Configs["_"]["log-level"] != "3" { + t.Errorf("Expected config option log-level set to 3 by annotation.") + } + + // Function annotations like //argot:function and //argot:param t.Logf("%d functions scanned", len(a.Funcs)) for ssaFunc, functionAnnotation := range a.Funcs { switch ssaFunc.Name() { diff --git a/analysis/annotations/testdata/main.go b/analysis/annotations/testdata/main.go index 2fb65f47..c769c2eb 100644 --- a/analysis/annotations/testdata/main.go +++ b/analysis/annotations/testdata/main.go @@ -16,6 +16,8 @@ package main import "fmt" +//argot:config _ @SetOptions(log-level=3) configures log-level for all problems + // sink and source annotation are relative to data categories (e.g. in this example bar, io and html) // Sink(_) means it is always a sink @@ -38,7 +40,7 @@ func foo() string { // //argot:function Sink(_) func superSensitiveFunction(iWillPrintYouUnencrypted string) { - fmt.Println(iWillPrintYouUnencrypted) + fmt.Println(iWillPrintYouUnencrypted) //argot:ignore _ } // sanitizerOfIo diff --git a/analysis/dataflow/inter_procedural.go b/analysis/dataflow/inter_procedural.go index 49a4e480..5331bddb 100644 --- a/analysis/dataflow/inter_procedural.go +++ b/analysis/dataflow/inter_procedural.go @@ -312,12 +312,15 @@ func (g *InterProceduralFlowGraph) RunVisitorOnEntryPoints(visitor Visitor, // entrypoints at once, but this would require a finer context-tracking mechanism than what the NodeWithCallStack // implements. // If the maximum number of alarms has been reached, stop early. + i := 0 for _, entry := range entryPoints { if !g.AnalyzerState.TestAlarmCount() { - g.AnalyzerState.Logger.Warnf("Some entrypoints are skipped, max number of alarms reached.") + g.AnalyzerState.Logger.Warnf("%d entrypoints are skipped, max number of alarms reached.", + len(entryPoints)-i) return } visitor.Visit(g.AnalyzerState, entry) + i++ } } diff --git a/analysis/dataflow/state.go b/analysis/dataflow/state.go index 3ab8a295..dc788639 100644 --- a/analysis/dataflow/state.go +++ b/analysis/dataflow/state.go @@ -38,7 +38,7 @@ type AnalyzerState struct { // Annotations contains all the annotations of the program Annotations annotations.ProgramAnnotations - // The logger used during the analysis (can be used to control output. + // The logger used during the analysis (can be used to control output). Logger *config.LogGroup // The configuration file for the analysis @@ -217,6 +217,7 @@ func NewDefaultAnalyzer(p *ssa.Program, pkgs []*packages.Package) (*AnalyzerStat // CopyTo copies pointers in receiver into argument (shallow copy of everything except mutex). // Do not use two copies in separate routines. func (s *AnalyzerState) CopyTo(b *AnalyzerState) { + b.Annotations = s.Annotations b.BoundingInfo = s.BoundingInfo b.Config = s.Config b.EscapeAnalysisState = s.EscapeAnalysisState diff --git a/analysis/taint/dataflow_visitor.go b/analysis/taint/dataflow_visitor.go index 53afa142..2214cad7 100644 --- a/analysis/taint/dataflow_visitor.go +++ b/analysis/taint/dataflow_visitor.go @@ -140,21 +140,28 @@ func (v *Visitor) Visit(s *df.AnalyzerState, source df.NodeWithTrace) { // you are tracking flows to logging but don't care about integers and booleans for example). if isFiltered(s, v.taintSpec, cur.Node) { if _, isIf := cur.Node.(*df.IfNode); !isIf || v.taintSpec.FailOnImplicitFlow { - logger.Infof("Filtered value: %s\n", cur.Node.String()) - logger.Infof("At: %s\n", cur.Node.Position(s)) + // Filtered values logged at debug level -- there can be many of those. + logger.Debugf("Filtered value: %s\n", cur.Node.String()) + logger.Debugf("At: %s\n", cur.Node.Position(s)) } continue } // If node is sink, then we reached a sink from a source, and we must log the taint flow. if isSink(s, v.taintSpec, cur.Node) && cur.Status.Kind == df.DefaultTracing { - if v.taints.addNewPathCandidate(NewFlowNode(v.currentSource), NewFlowNode(cur.NodeWithTrace)) { - reportTaintFlow(s, v.currentSource, cur) - } - // Stop if there is a limit on number of alarms, and it has been reached. - if !s.IncrementAndTestAlarms() { - logger.Warnf("Reached the limit of %d alarms.", s.Config.MaxAlarms) - return + // Don't report taint flow if the sink location is annotated with //argot:ignore + if s.Annotations.IsIgnoredPos(cur.Node.Position(s), v.taintSpec.Tag) { + s.Logger.Infof("//argot:ignore taint flow to %s", + cur.Node.Position(s)) + } else { + if v.taints.addNewPathCandidate(NewFlowNode(v.currentSource), NewFlowNode(cur.NodeWithTrace)) { + reportTaintFlow(s, v.currentSource, cur) + } + // Stop if there is a limit on number of alarms, and it has been reached. + if !s.IncrementAndTestAlarms() { + logger.Warnf("Reached the limit of %d alarms.", s.Config.MaxAlarms) + return + } } // A sink does not have successors in the taint flow analysis (but other sinks can be reached // as there are still values flowing). diff --git a/analysis/taint/report.go b/analysis/taint/report.go index 0dc1bc35..133a0921 100644 --- a/analysis/taint/report.go +++ b/analysis/taint/report.go @@ -76,7 +76,8 @@ func reportCoverage(coverage map[string]bool, coverageWriter io.StringWriter) { } // reportTaintFlow reports a taint flow by writing to a file if the configuration has the ReportPaths flag set, -// and writing in the logger +// and writing in the logger. +// The function checks the annotations to suppress reports where there are //argot:ignore annotations. func reportTaintFlow(c *dataflow.AnalyzerState, source dataflow.NodeWithTrace, sink *dataflow.VisitorNode) { c.Logger.Infof(" !!!! TAINT FLOW !!!!") c.Logger.Infof(" 💀 Sink reached at %s\n", formatutil.Red(sink.Node.Position(c))) diff --git a/analysis/taint/taint.go b/analysis/taint/taint.go index 14ec16bb..27189a14 100644 --- a/analysis/taint/taint.go +++ b/analysis/taint/taint.go @@ -140,6 +140,7 @@ func Analyze(cfg *config.Config, prog *ssa.Program, pkgs []*packages.Package) (A } } visitor := NewVisitor(&taintSpec) + state.Logger.Infof("Analyzing taint-tracking problem %s", taintSpec.Tag) analysis.RunInterProcedural(state, visitor, analysis.InterProceduralParams{ // The entry points are specific to each taint tracking problem (unlike in the intra-procedural pass) IsEntrypoint: func(node ssa.Node) bool { return IsSourceNode(state, &taintSpec, node) }, diff --git a/analysis/taint/testdata/annotations/main.go b/analysis/taint/testdata/annotations/main.go index ccd37c8f..a0a85c3b 100644 --- a/analysis/taint/testdata/annotations/main.go +++ b/analysis/taint/testdata/annotations/main.go @@ -22,7 +22,7 @@ import ( ) // Specific problem-level config options can be set in annotations -//argot:config lim SetOptions(max-alarms=2,unsafe-max-depth=2) +//argot:config lim SetOptions(max-alarms=12,unsafe-max-depth=2) // bar // It is also a source for the problem hybridDef also defined in the config.json @@ -79,7 +79,7 @@ func main() { sink(s) // @Sink(ex1) sinkOnSecondArg(s, "ok") // only second argument of this is a sink sinkOnSecondArg("ok", s) // @Sink(ex2) - sink(sanitizeSecondArg(s, "ok")) // @Sink(ex1) + sink(sanitizeSecondArg(s, "ok")) //argot:ignore _ (but should be an ex1 sink) sink(sanitizeSecondArg("ok", s)) sink(sanitizer(s)) fmt.Println(s) // @Sink(hybridDef) diff --git a/internal/funcutil/collections.go b/internal/funcutil/collections.go index aa7bcec3..fa49aa71 100644 --- a/internal/funcutil/collections.go +++ b/internal/funcutil/collections.go @@ -134,6 +134,16 @@ func Exists[T any](a []T, f func(T) bool) bool { return false } +// ExistsInMap returns true when there exists some k,x in map a such that f(k,x), otherwise false. +func ExistsInMap[K comparable, T any](a map[K]T, f func(K, T) bool) bool { + for k, x := range a { + if f(k, x) { + return true + } + } + return false +} + // FindMap returns Some(f(x)) when there exists some x in slice a such that p(f(x)), otherwise None. func FindMap[T any, R any](a []T, f func(T) R, p func(R) bool) Optional[R] { for _, x := range a {