From 3ef7a0932ad9249539bb90dc3f4f1e3bb46fae6e Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 20 Mar 2024 10:50:59 +0000 Subject: [PATCH 1/6] Add flow through string concatenation --- go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql b/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql index 48731179127e..0392f09e801d 100644 --- a/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql +++ b/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql @@ -101,6 +101,10 @@ module IncompleteHostNameRegexpConfig implements DataFlow::ConfigSig { ) and not regexpGuardsError(sink) } + + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + StringOps::Concatenation::taintStep(node1, node2) + } } module Flow = DataFlow::Global; From 0000c72329e778f15542c3764cdad8b6f3a8beb1 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 20 Mar 2024 11:01:50 +0000 Subject: [PATCH 2/6] Remove attempt at avoiding duplicate alerts --- go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql | 5 ----- 1 file changed, 5 deletions(-) diff --git a/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql b/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql index 0392f09e801d..e114d924475c 100644 --- a/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql +++ b/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql @@ -84,11 +84,6 @@ module IncompleteHostNameRegexpConfig implements DataFlow::ConfigSig { exists(Expr e | e = source.asExpr() and isIncompleteHostNameRegexpPattern(e.getStringValue(), hostPart) - | - e instanceof StringLit - or - e instanceof AddExpr and - not isIncompleteHostNameRegexpPattern(e.(AddExpr).getAnOperand().getStringValue(), _) ) } From 89623072916531254942b03cda2491539f178bb1 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Tue, 23 Apr 2024 21:13:55 +0100 Subject: [PATCH 3/6] Add second good go file to tests --- .../CWE-020/IncompleteHostnameRegexpGood2.go | 2 +- .../IncompleteHostnameRegexpGood2.go | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/IncompleteHostnameRegexpGood2.go diff --git a/go/ql/src/Security/CWE-020/IncompleteHostnameRegexpGood2.go b/go/ql/src/Security/CWE-020/IncompleteHostnameRegexpGood2.go index 7c5df3f67426..c6c3fc069815 100644 --- a/go/ql/src/Security/CWE-020/IncompleteHostnameRegexpGood2.go +++ b/go/ql/src/Security/CWE-020/IncompleteHostnameRegexpGood2.go @@ -6,7 +6,7 @@ import ( "regexp" ) -func checkRedirectGood(req *http.Request, via []*http.Request) error { +func checkRedirectGood2(req *http.Request, via []*http.Request) error { // GOOD: the host of `req.URL` must be `example.com`, `www.example.com` or `beta.example.com` re := `^((www|beta)\.)?example\.com/` if matched, _ := regexp.MatchString(re, req.URL.Host); matched { diff --git a/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/IncompleteHostnameRegexpGood2.go b/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/IncompleteHostnameRegexpGood2.go new file mode 100644 index 000000000000..c6c3fc069815 --- /dev/null +++ b/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/IncompleteHostnameRegexpGood2.go @@ -0,0 +1,16 @@ +package main + +import ( + "errors" + "net/http" + "regexp" +) + +func checkRedirectGood2(req *http.Request, via []*http.Request) error { + // GOOD: the host of `req.URL` must be `example.com`, `www.example.com` or `beta.example.com` + re := `^((www|beta)\.)?example\.com/` + if matched, _ := regexp.MatchString(re, req.URL.Host); matched { + return nil + } + return errors.New("Invalid redirect") +} From fd306ed79be81f5305fbba653f8e88e967dce552 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 24 Apr 2024 12:19:11 +0100 Subject: [PATCH 4/6] Exclude constant names from sources to avoid duplicate results --- go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql b/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql index e114d924475c..03018ee1c32d 100644 --- a/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql +++ b/go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql @@ -81,9 +81,12 @@ predicate regexpGuardsError(RegexpPattern regexp) { module IncompleteHostNameRegexpConfig implements DataFlow::ConfigSig { additional predicate isSourceString(DataFlow::Node source, string hostPart) { - exists(Expr e | - e = source.asExpr() and - isIncompleteHostNameRegexpPattern(e.getStringValue(), hostPart) + exists(Expr e | e = source.asExpr() | + isIncompleteHostNameRegexpPattern(e.getStringValue(), hostPart) and + // Exclude constant names to avoid duplicate results, because the string + // literals which they are initialised with are also considered as + // sources. + not e instanceof ConstantName ) } From 41409424795de9b79bc8ddbd4c64e73b50578059 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 24 Apr 2024 12:20:18 +0100 Subject: [PATCH 5/6] Update tests --- .../IncompleteHostnameRegexp.expected | 18 +++++++++++---- .../CWE-020/IncompleteHostnameRegexp/main.go | 22 ++++++++++++++++++- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/IncompleteHostnameRegexp.expected b/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/IncompleteHostnameRegexp.expected index 4486b4e0962f..c9ba782fd56b 100644 --- a/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/IncompleteHostnameRegexp.expected +++ b/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/IncompleteHostnameRegexp.expected @@ -1,12 +1,22 @@ edges | IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:12:38:12:39 | re | provenance | | +| main.go:49:21:49:45 | `https://www.example.com` | main.go:62:15:62:25 | sourceConst | provenance | | +| main.go:62:15:62:25 | sourceConst | main.go:65:15:65:23 | localVar3 | provenance | | nodes | IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | semmle.label | "^((www\|beta).)?example.com/" | | IncompleteHostnameRegexp.go:12:38:12:39 | re | semmle.label | re | -| main.go:39:60:39:79 | "^test2.github.com$" | semmle.label | "^test2.github.com$" | -| main.go:44:15:44:39 | `https://www.example.com` | semmle.label | `https://www.example.com` | +| main.go:40:60:40:79 | "^test2.github.com$" | semmle.label | "^test2.github.com$" | +| main.go:45:15:45:39 | `https://www.example.com` | semmle.label | `https://www.example.com` | +| main.go:49:21:49:45 | `https://www.example.com` | semmle.label | `https://www.example.com` | +| main.go:56:15:56:34 | ...+... | semmle.label | ...+... | +| main.go:58:15:58:42 | ...+... | semmle.label | ...+... | +| main.go:62:15:62:25 | sourceConst | semmle.label | sourceConst | +| main.go:65:15:65:23 | localVar3 | semmle.label | localVar3 | subpaths #select | IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:12:38:12:39 | re | This regular expression has an unescaped dot before ')?example.com', so it might match more hosts than expected when $@. | IncompleteHostnameRegexp.go:12:38:12:39 | re | the regular expression is used | -| main.go:39:60:39:79 | "^test2.github.com$" | main.go:39:60:39:79 | "^test2.github.com$" | main.go:39:60:39:79 | "^test2.github.com$" | This regular expression has an unescaped dot before 'github.com', so it might match more hosts than expected when $@. | main.go:39:60:39:79 | "^test2.github.com$" | the regular expression is used | -| main.go:44:15:44:39 | `https://www.example.com` | main.go:44:15:44:39 | `https://www.example.com` | main.go:44:15:44:39 | `https://www.example.com` | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:44:15:44:39 | `https://www.example.com` | the regular expression is used | +| main.go:40:60:40:79 | "^test2.github.com$" | main.go:40:60:40:79 | "^test2.github.com$" | main.go:40:60:40:79 | "^test2.github.com$" | This regular expression has an unescaped dot before 'github.com', so it might match more hosts than expected when $@. | main.go:40:60:40:79 | "^test2.github.com$" | the regular expression is used | +| main.go:45:15:45:39 | `https://www.example.com` | main.go:45:15:45:39 | `https://www.example.com` | main.go:45:15:45:39 | `https://www.example.com` | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:45:15:45:39 | `https://www.example.com` | the regular expression is used | +| main.go:49:21:49:45 | `https://www.example.com` | main.go:49:21:49:45 | `https://www.example.com` | main.go:65:15:65:23 | localVar3 | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:65:15:65:23 | localVar3 | the regular expression is used | +| main.go:56:15:56:34 | ...+... | main.go:56:15:56:34 | ...+... | main.go:56:15:56:34 | ...+... | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:56:15:56:34 | ...+... | the regular expression is used | +| main.go:58:15:58:42 | ...+... | main.go:58:15:58:42 | ...+... | main.go:58:15:58:42 | ...+... | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:58:15:58:42 | ...+... | the regular expression is used | diff --git a/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/main.go b/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/main.go index fb1e5b77b516..7eda0d7255a2 100644 --- a/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/main.go +++ b/go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/main.go @@ -3,10 +3,11 @@ package main import ( - "github.com/elazarl/goproxy" "net/http" "regexp" "time" + + "github.com/elazarl/goproxy" ) func Match(notARegex string) bool { @@ -44,3 +45,22 @@ func main() { regexp.Match(`https://www.example.com`, []byte("")) // NOT OK regexp.Match(`https://www\.example\.com`, []byte("")) // OK } + +const sourceConst = `https://www.example.com` +const firstHalfConst = `https://www.example.` + +func concatenateStrings() { + firstHalf := `https://www.example.` + regexp.Match(firstHalf+`com`, []byte("")) // MISSING: NOT OK + + regexp.Match(firstHalfConst+`com`, []byte("")) // NOT OK + + regexp.Match(`https://www.example.`+`com`, []byte("")) // NOT OK +} + +func avoidDuplicateResults() { + localVar1 := sourceConst + localVar2 := localVar1 + localVar3 := localVar2 + regexp.Match(localVar3, []byte("")) // NOT OK +} From c61177cf42d45784910d6999b625592f3f2fbb2d Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 24 Apr 2024 14:18:51 +0100 Subject: [PATCH 6/6] Add change note --- .../src/change-notes/2024-04-24-incomplete-hostname-regexp.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 go/ql/src/change-notes/2024-04-24-incomplete-hostname-regexp.md diff --git a/go/ql/src/change-notes/2024-04-24-incomplete-hostname-regexp.md b/go/ql/src/change-notes/2024-04-24-incomplete-hostname-regexp.md new file mode 100644 index 000000000000..3e7f0d593ec1 --- /dev/null +++ b/go/ql/src/change-notes/2024-04-24-incomplete-hostname-regexp.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The query `go/incomplete-hostname-regexp` now recognizes more sources involving concatenation of string literals and also follows flow through string concatenation. This may lead to more alerts.