Skip to content

Commit

Permalink
Merge pull request #16307 from owen-mc/go/fix/incomplete-hostname-regex
Browse files Browse the repository at this point in the history
Go: fix flow through string concatenation in `go/incomplete-hostname-regex`
  • Loading branch information
owen-mc authored Apr 25, 2024
2 parents 13ff941 + c61177c commit 82bbecc
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 14 deletions.
18 changes: 10 additions & 8 deletions go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,12 @@ predicate regexpGuardsError(RegexpPattern regexp) {

module IncompleteHostNameRegexpConfig implements DataFlow::ConfigSig {
additional predicate isSourceString(DataFlow::Node source, string hostPart) {
exists(Expr e |
e = source.asExpr() and
isIncompleteHostNameRegexpPattern(e.getStringValue(), hostPart)
|
e instanceof StringLit
or
e instanceof AddExpr and
not isIncompleteHostNameRegexpPattern(e.(AddExpr).getAnOperand().getStringValue(), _)
exists(Expr e | e = source.asExpr() |
isIncompleteHostNameRegexpPattern(e.getStringValue(), hostPart) and
// Exclude constant names to avoid duplicate results, because the string
// literals which they are initialised with are also considered as
// sources.
not e instanceof ConstantName
)
}

Expand All @@ -101,6 +99,10 @@ module IncompleteHostNameRegexpConfig implements DataFlow::ConfigSig {
) and
not regexpGuardsError(sink)
}

predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
StringOps::Concatenation::taintStep(node1, node2)
}
}

module Flow = DataFlow::Global<IncompleteHostNameRegexpConfig>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"regexp"
)

func checkRedirectGood(req *http.Request, via []*http.Request) error {
func checkRedirectGood2(req *http.Request, via []*http.Request) error {
// GOOD: the host of `req.URL` must be `example.com`, `www.example.com` or `beta.example.com`
re := `^((www|beta)\.)?example\.com/`
if matched, _ := regexp.MatchString(re, req.URL.Host); matched {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The query `go/incomplete-hostname-regexp` now recognizes more sources involving concatenation of string literals and also follows flow through string concatenation. This may lead to more alerts.
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
edges
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:12:38:12:39 | re | provenance | |
| main.go:49:21:49:45 | `https://www.example.com` | main.go:62:15:62:25 | sourceConst | provenance | |
| main.go:62:15:62:25 | sourceConst | main.go:65:15:65:23 | localVar3 | provenance | |
nodes
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | semmle.label | "^((www\|beta).)?example.com/" |
| IncompleteHostnameRegexp.go:12:38:12:39 | re | semmle.label | re |
| main.go:39:60:39:79 | "^test2.github.com$" | semmle.label | "^test2.github.com$" |
| main.go:44:15:44:39 | `https://www.example.com` | semmle.label | `https://www.example.com` |
| main.go:40:60:40:79 | "^test2.github.com$" | semmle.label | "^test2.github.com$" |
| main.go:45:15:45:39 | `https://www.example.com` | semmle.label | `https://www.example.com` |
| main.go:49:21:49:45 | `https://www.example.com` | semmle.label | `https://www.example.com` |
| main.go:56:15:56:34 | ...+... | semmle.label | ...+... |
| main.go:58:15:58:42 | ...+... | semmle.label | ...+... |
| main.go:62:15:62:25 | sourceConst | semmle.label | sourceConst |
| main.go:65:15:65:23 | localVar3 | semmle.label | localVar3 |
subpaths
#select
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:12:38:12:39 | re | This regular expression has an unescaped dot before ')?example.com', so it might match more hosts than expected when $@. | IncompleteHostnameRegexp.go:12:38:12:39 | re | the regular expression is used |
| main.go:39:60:39:79 | "^test2.github.com$" | main.go:39:60:39:79 | "^test2.github.com$" | main.go:39:60:39:79 | "^test2.github.com$" | This regular expression has an unescaped dot before 'github.com', so it might match more hosts than expected when $@. | main.go:39:60:39:79 | "^test2.github.com$" | the regular expression is used |
| main.go:44:15:44:39 | `https://www.example.com` | main.go:44:15:44:39 | `https://www.example.com` | main.go:44:15:44:39 | `https://www.example.com` | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:44:15:44:39 | `https://www.example.com` | the regular expression is used |
| main.go:40:60:40:79 | "^test2.github.com$" | main.go:40:60:40:79 | "^test2.github.com$" | main.go:40:60:40:79 | "^test2.github.com$" | This regular expression has an unescaped dot before 'github.com', so it might match more hosts than expected when $@. | main.go:40:60:40:79 | "^test2.github.com$" | the regular expression is used |
| main.go:45:15:45:39 | `https://www.example.com` | main.go:45:15:45:39 | `https://www.example.com` | main.go:45:15:45:39 | `https://www.example.com` | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:45:15:45:39 | `https://www.example.com` | the regular expression is used |
| main.go:49:21:49:45 | `https://www.example.com` | main.go:49:21:49:45 | `https://www.example.com` | main.go:65:15:65:23 | localVar3 | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:65:15:65:23 | localVar3 | the regular expression is used |
| main.go:56:15:56:34 | ...+... | main.go:56:15:56:34 | ...+... | main.go:56:15:56:34 | ...+... | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:56:15:56:34 | ...+... | the regular expression is used |
| main.go:58:15:58:42 | ...+... | main.go:58:15:58:42 | ...+... | main.go:58:15:58:42 | ...+... | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:58:15:58:42 | ...+... | the regular expression is used |
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package main

import (
"errors"
"net/http"
"regexp"
)

func checkRedirectGood2(req *http.Request, via []*http.Request) error {
// GOOD: the host of `req.URL` must be `example.com`, `www.example.com` or `beta.example.com`
re := `^((www|beta)\.)?example\.com/`
if matched, _ := regexp.MatchString(re, req.URL.Host); matched {
return nil
}
return errors.New("Invalid redirect")
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
package main

import (
"github.com/elazarl/goproxy"
"net/http"
"regexp"
"time"

"github.com/elazarl/goproxy"
)

func Match(notARegex string) bool {
Expand Down Expand Up @@ -44,3 +45,22 @@ func main() {
regexp.Match(`https://www.example.com`, []byte("")) // NOT OK
regexp.Match(`https://www\.example\.com`, []byte("")) // OK
}

const sourceConst = `https://www.example.com`
const firstHalfConst = `https://www.example.`

func concatenateStrings() {
firstHalf := `https://www.example.`
regexp.Match(firstHalf+`com`, []byte("")) // MISSING: NOT OK

regexp.Match(firstHalfConst+`com`, []byte("")) // NOT OK

regexp.Match(`https://www.example.`+`com`, []byte("")) // NOT OK
}

func avoidDuplicateResults() {
localVar1 := sourceConst
localVar2 := localVar1
localVar3 := localVar2
regexp.Match(localVar3, []byte("")) // NOT OK
}

0 comments on commit 82bbecc

Please sign in to comment.