Skip to content

Commit

Permalink
Merge pull request #87 from CompassSecurity/71-allow-disabling-truffl…
Browse files Browse the repository at this point in the history
…ehog-verification

allow disabling the trufflehog verification
  • Loading branch information
frjcomp authored Dec 18, 2024
2 parents 3d22395 + f37cd96 commit 9d496a6
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 29 deletions.
1 change: 1 addition & 0 deletions src/pipeleak/cmd/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func NewScanCmd() *cobra.Command {
scanCmd.PersistentFlags().StringVarP(&maxArtifactSize, "max-artifact-size", "", "500Mb", "Max file size of an artifact to be included in scanning. Larger files are skipped. Format: https://pkg.go.dev/github.com/docker/go-units#FromHumanSize")
scanCmd.PersistentFlags().IntVarP(&options.MaxScanGoRoutines, "threads", "", 4, "Nr of threads used to scan")
scanCmd.PersistentFlags().StringVarP(&options.QueueFolder, "queue", "q", "", "Relative folderpath where the queue files will be stored. Defaults to system tmp")
scanCmd.PersistentFlags().BoolVarP(&options.TruffleHogVerification, "truffleHogVerification", "", true, "Enable the TruffleHog credential verification, will actively test the found credentials and only report those. Disable with --truffleHogVerification=false")

scanCmd.PersistentFlags().BoolVarP(&options.Verbose, "verbose", "v", false, "Verbose logging")

Expand Down
30 changes: 17 additions & 13 deletions src/pipeleak/scanner/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,20 @@ var queueFileName string
var queueDB *sql.DB

type ScanOptions struct {
GitlabUrl string
GitlabApiToken string
GitlabCookie string
ProjectSearchQuery string
Artifacts bool
Owned bool
Member bool
JobLimit int
Verbose bool
ConfidenceFilter []string
MaxArtifactSize int64
MaxScanGoRoutines int
QueueFolder string
GitlabUrl string
GitlabApiToken string
GitlabCookie string
ProjectSearchQuery string
Artifacts bool
Owned bool
Member bool
JobLimit int
Verbose bool
ConfidenceFilter []string
MaxArtifactSize int64
MaxScanGoRoutines int
QueueFolder string
TruffleHogVerification bool
}

func ScanGitLabPipelines(options *ScanOptions) {
Expand All @@ -50,6 +51,9 @@ func ScanGitLabPipelines(options *ScanOptions) {
})

InitRules(options.ConfidenceFilter)
if !options.TruffleHogVerification {
log.Info().Msg("TruffleHog verification is disabled")
}

git, err := helper.GetGitlabClient(options.GitlabApiToken, options.GitlabUrl)
if err != nil {
Expand Down
12 changes: 6 additions & 6 deletions src/pipeleak/scanner/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func analyzeJobTrace(git *gitlab.Client, item QueueItem, options *ScanOptions) {
return
}

findings := DetectHits(trace, options.MaxScanGoRoutines)
findings := DetectHits(trace, options.MaxScanGoRoutines, options.TruffleHogVerification)
for _, finding := range findings {
log.Warn().Str("confidence", finding.Pattern.Pattern.Confidence).Str("ruleName", finding.Pattern.Pattern.Name).Str("value", finding.Text).Str("url", item.Meta.JobWebUrl).Str("jobName", item.Meta.JobName).Msg("HIT")
}
Expand Down Expand Up @@ -140,9 +140,9 @@ func analyzeJobArtifact(git *gitlab.Client, item QueueItem, options *ScanOptions
kind, _ := filetype.Match(content)
// do not scan https://pkg.go.dev/github.com/h2non/filetype#readme-supported-types
if kind == filetype.Unknown {
DetectFileHits(content, item.Meta.JobWebUrl, item.Meta.JobName, file.Name, "")
DetectFileHits(content, item.Meta.JobWebUrl, item.Meta.JobName, file.Name, "", options.TruffleHogVerification)
} else if filetype.IsArchive(content) {
handleArchiveArtifact(file.Name, content, item.Meta.JobWebUrl, item.Meta.JobName)
handleArchiveArtifact(file.Name, content, item.Meta.JobWebUrl, item.Meta.JobName, options.TruffleHogVerification)
}
fc.Close()
})
Expand All @@ -157,7 +157,7 @@ func analyzeDotenvArtifact(git *gitlab.Client, item QueueItem, options *ScanOpti
return
}

findings := DetectHits(dotenvText, options.MaxScanGoRoutines)
findings := DetectHits(dotenvText, options.MaxScanGoRoutines, options.TruffleHogVerification)
for _, finding := range findings {
artifactsBaseUrl, _ := url.JoinPath(item.Meta.JobWebUrl, "/-/artifacts")
log.Warn().Str("confidence", finding.Pattern.Pattern.Confidence).Str("ruleName", finding.Pattern.Pattern.Name).Str("value", finding.Text).Str("artifactUrl", artifactsBaseUrl).Int("jobId", item.Meta.JobId).Str("jobName", item.Meta.JobName).Msg("HIT DOTENV: Check artifacts page which is the only place to download the dotenv file")
Expand Down Expand Up @@ -286,7 +286,7 @@ func DownloadEnvArtifact(cookieVal string, gitlabUrl string, prjectPath string,
// https://docs.gitlab.com/ee/ci/caching/#common-use-cases-for-caches
var skippableDirectoryNames = []string{"node_modules", ".yarn", ".yarn-cache", ".npm", "venv", "vendor", ".go/pkg/mod/"}

func handleArchiveArtifact(archivefileName string, content []byte, jobWebUrl string, jobName string) {
func handleArchiveArtifact(archivefileName string, content []byte, jobWebUrl string, jobName string, enableTruffleHogVerification bool) {
for _, skipKeyword := range skippableDirectoryNames {
if strings.Contains(archivefileName, skipKeyword) {
log.Debug().Str("file", archivefileName).Str("keyword", skipKeyword).Msg("Skipped archive due to blocklist entry")
Expand Down Expand Up @@ -342,7 +342,7 @@ func handleArchiveArtifact(archivefileName string, content []byte, jobWebUrl str

kind, _ := filetype.Match(fileBytes)
if kind == filetype.Unknown {
DetectFileHits(fileBytes, jobWebUrl, jobName, path.Base(fPath), archivefileName)
DetectFileHits(fileBytes, jobWebUrl, jobName, path.Base(fPath), archivefileName, enableTruffleHogVerification)
}
}
}
Expand Down
28 changes: 18 additions & 10 deletions src/pipeleak/scanner/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ func AppendPipeleakRules(rules []PatternElement) []PatternElement {
return slices.Concat(rules, customRules)
}

func DetectHits(text []byte, maxThreads int) []Finding {
func DetectHits(text []byte, maxThreads int, enableTruffleHogVerification bool) []Finding {
ctx := context.Background()
group := parallel.Collect[[]Finding](parallel.Limited(ctx, maxThreads))

Expand Down Expand Up @@ -179,21 +179,29 @@ func DetectHits(text []byte, maxThreads int) []Finding {
for _, detector := range defaults.DefaultDetectors() {
trGroup.Go(func(ctx context.Context) ([]Finding, error) {
findingsTr := []Finding{}
trHits, err := detector.FromData(ctx, true, text)
trHits, err := detector.FromData(ctx, enableTruffleHogVerification, text)
if err != nil {
log.Error().Msg("Truffelhog Detector Failed " + err.Error())
return []Finding{}, err
}

for _, result := range trHits {
// only report verified
secret := result.Raw
if len(result.RawV2) > 0 {
secret = result.RawV2
}
finding := Finding{Pattern: PatternElement{Pattern: PatternPattern{Name: result.DetectorType.String(), Confidence: "high-verified"}}, Text: string(secret)}

// if trufflehog verification is enalbed ONLY verified rules are reported
if result.Verified {
secret := result.Raw
if len(result.RawV2) > 0 {
secret = result.Raw
}
findingsTr = append(findingsTr, finding)
}

findingsTr = append(findingsTr, Finding{Pattern: PatternElement{Pattern: PatternPattern{Name: result.DetectorType.String(), Confidence: "high-verified"}}, Text: string(secret)})
// if trufflehog verification is disabled all rules are reported
if !enableTruffleHogVerification {
// trufflehog itself does not have confidence information
finding.Pattern.Pattern.Confidence = "trufflehog-unverified"
findingsTr = append(findingsTr, finding)
}
}
return findingsTr, nil
Expand Down Expand Up @@ -231,9 +239,9 @@ func deduplicateFindings(totalFindings []Finding) []Finding {
return dedupedFindings
}

func DetectFileHits(content []byte, jobWebUrl string, jobName string, fileName string, archiveName string) {
func DetectFileHits(content []byte, jobWebUrl string, jobName string, fileName string, archiveName string, enableTruffleHogVerification bool) {
// 1 goroutine to prevent maxThreads^2 which trashes memory
findings := DetectHits(content, 1)
findings := DetectHits(content, 1, enableTruffleHogVerification)
for _, finding := range findings {
baseLog := log.Warn().Str("confidence", finding.Pattern.Pattern.Confidence).Str("ruleName", finding.Pattern.Pattern.Name).Str("value", finding.Text).Str("url", jobWebUrl).Str("jobName", jobName).Str("file", fileName)
if len(archiveName) > 0 {
Expand Down

0 comments on commit 9d496a6

Please sign in to comment.