diff --git a/src/pipeleak/go.mod b/src/pipeleak/go.mod index 2d34075..d987353 100644 --- a/src/pipeleak/go.mod +++ b/src/pipeleak/go.mod @@ -145,6 +145,7 @@ require ( github.com/joho/godotenv v1.5.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 // indirect + github.com/kdomanski/iso9660 v0.3.3 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect github.com/kjk/lzma v0.0.0-20161016003348-3fd93898850d // indirect github.com/klauspost/compress v1.17.9 // indirect @@ -162,6 +163,7 @@ require ( github.com/montanaflynn/stats v0.7.1 // indirect github.com/mtibben/percent v0.2.1 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/nwaples/rardecode v1.1.3 // indirect github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.0 // indirect @@ -219,6 +221,7 @@ require ( golang.org/x/text v0.18.0 // indirect golang.org/x/tools v0.24.0 // indirect golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect + golift.io/xtractr v0.2.2 // indirect google.golang.org/api v0.193.0 // indirect google.golang.org/genproto v0.0.0-20240814211410-ddb44dafa142 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect diff --git a/src/pipeleak/go.sum b/src/pipeleak/go.sum index 36ac91b..8bac1a3 100644 --- a/src/pipeleak/go.sum +++ b/src/pipeleak/go.sum @@ -477,6 +477,8 @@ github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1 github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 h1:qGQQKEcAR99REcMpsXCp3lJ03zYT1PkRd3kQGPn9GVg= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= +github.com/kdomanski/iso9660 v0.3.3 h1:cNwM9L2L1Hzc5hZWGy6fPJ92UyWDccaY69DmEPlfDNY= +github.com/kdomanski/iso9660 v0.3.3/go.mod h1:K+UlIGxKgtrdAWyoigPnFbeQLVs/Xudz4iztWFThBwo= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -558,6 +560,8 @@ github.com/mtibben/percent v0.2.1/go.mod h1:KG9uO+SZkUp+VkRHsCdYQV3XSZrrSpR3O9ib github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc= +github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY= @@ -986,6 +990,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +golift.io/xtractr v0.2.2 h1:MvujxeuX629d1rQs2VJbbcvYMvMmN5SzIkEflU5ryOc= +golift.io/xtractr v0.2.2/go.mod h1:30CvLMUY3yOS2VoKZTTMtzeeljCzBcWkr8dU6EHqfh8= gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o= gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= diff --git a/src/pipeleak/helper/helper.go b/src/pipeleak/helper/helper.go index ca117a5..cd8adb2 100644 --- a/src/pipeleak/helper/helper.go +++ b/src/pipeleak/helper/helper.go @@ -211,3 +211,12 @@ func GetGitlabClient(token string, url string) (*gitlab.Client, error) { client, err := gitlab.NewClient(token, gitlab.WithBaseURL(url), gitlab.WithHTTPClient(GetNonVerifyingHTTPClient())) return client, err } + +func IsDirectory(path string) bool { + fileInfo, err := os.Stat(path) + if err != nil { + return true + } + + return fileInfo.IsDir() +} diff --git a/src/pipeleak/scanner/queue.go b/src/pipeleak/scanner/queue.go index a9a71c8..48548ec 100644 --- a/src/pipeleak/scanner/queue.go +++ b/src/pipeleak/scanner/queue.go @@ -9,8 +9,11 @@ import ( "io" "net/http" "net/url" + "os" + "path" "runtime" "strconv" + "strings" "sync" "github.com/CompassSecurity/pipeleak/helper" @@ -20,6 +23,7 @@ import ( "github.com/rs/zerolog/log" "github.com/wandb/parallel" "github.com/xanzy/go-gitlab" + "golift.io/xtractr" ) type QueueItemType string @@ -104,7 +108,7 @@ func analyzeJobTrace(git *gitlab.Client, item QueueItem, options *ScanOptions) { } func analyzeJobArtifact(git *gitlab.Client, item QueueItem, options *ScanOptions) { - data := getJobArtifacts(git, item.Meta.ProjectId, item.Meta.JobId, options) + data := getJobArtifacts(git, item.Meta.ProjectId, item.Meta.JobId, item.Meta.JobWebUrl, options) if data == nil { return } @@ -135,11 +139,9 @@ func analyzeJobArtifact(git *gitlab.Client, item QueueItem, options *ScanOptions kind, _ := filetype.Match(content) // do not scan https://pkg.go.dev/github.com/h2non/filetype#readme-supported-types if kind == filetype.Unknown { - // use one to prevent maxThreads^2 which trashes memory - findings := DetectHits(content, 1) - for _, finding := range findings { - log.Warn().Str("confidence", finding.Pattern.Pattern.Confidence).Str("name", finding.Pattern.Pattern.Name).Str("value", finding.Text).Str("url", item.Meta.JobWebUrl).Str("file", file.Name).Msg("HIT Artifact") - } + DetectFileHits(content, item.Meta.JobWebUrl, file.Name, "") + } else if filetype.IsArchive(content) { + handleArchiveArtifact(file.Name, content, item.Meta.JobWebUrl) } fc.Close() }) @@ -176,31 +178,31 @@ func getJobTrace(git *gitlab.Client, projectId int, jobId int) []byte { return trace } -func getJobArtifacts(git *gitlab.Client, projectId int, jobId int, options *ScanOptions) []byte { +func getJobArtifacts(git *gitlab.Client, projectId int, jobId int, jobWebUrl string, options *ScanOptions) []byte { artifactsReader, resp, err := git.Jobs.GetJobArtifacts(projectId, jobId) if resp.StatusCode == 404 { return nil } if err != nil { - log.Error().Err(err).Int("project", projectId).Int("job", jobId).Msg("Failed downloading job artifacts zip") + log.Error().Err(err).Str("url", jobWebUrl).Msg("Failed downloading job artifacts zip") return nil } if artifactsReader.Size() > options.MaxArtifactSize { - log.Debug().Int("project", projectId).Int("job", jobId).Int64("bytes", artifactsReader.Size()).Int64("maxBytes", options.MaxArtifactSize).Msg("Skipped large artifact Zip") + log.Debug().Int64("bytes", artifactsReader.Size()).Int64("maxBytes", options.MaxArtifactSize).Str("url", jobWebUrl).Msg("Skipped large artifact Zip") return nil } data, err := io.ReadAll(artifactsReader) if err != nil { - log.Error().Err(err).Int("project", projectId).Int("job", jobId).Msg("Failed reading artifacts stream") + log.Error().Err(err).Str("url", jobWebUrl).Msg("Failed reading artifacts stream") return nil } extractedZipSize := helper.CalculateZipFileSize(data) if extractedZipSize > uint64(options.MaxArtifactSize) { - log.Debug().Int("project", projectId).Int("job", jobId).Int64("zipBytes", artifactsReader.Size()).Uint64("bytesExtracted", extractedZipSize).Int64("maxBytes", options.MaxArtifactSize).Msg("Skipped large extracted Zip artifact") + log.Debug().Str("url", jobWebUrl).Int64("zipBytes", artifactsReader.Size()).Uint64("bytesExtracted", extractedZipSize).Int64("maxBytes", options.MaxArtifactSize).Msg("Skipped large extracted Zip artifact") return nil } @@ -279,3 +281,68 @@ func DownloadEnvArtifact(cookieVal string, gitlabUrl string, prjectPath string, return envText } + +// https://docs.gitlab.com/ee/ci/caching/#common-use-cases-for-caches +var skippableDirectoryNames = []string{"node_modules", ".yarn", ".yarn-cache", ".npm", "venv", "vendor", ".go/pkg/mod/"} + +func handleArchiveArtifact(archivefileName string, content []byte, jobWebUrl string) { + for _, skipKeyword := range skippableDirectoryNames { + if strings.Contains(archivefileName, skipKeyword) { + log.Debug().Str("file", archivefileName).Str("keyword", skipKeyword).Msg("Skipped archive due to blocklist entry") + return + } + } + + fileType, err := filetype.Get(content) + if err != nil { + log.Error().Stack().Err(err).Msg("Cannot determine file type") + return + } + + tmpArchiveFile, err := os.CreateTemp("", "pipeleak-artifact-archive-*."+fileType.Extension) + if err != nil { + log.Error().Stack().Err(err).Msg("Cannot create artifact archive temp file") + return + } + + err = os.WriteFile(tmpArchiveFile.Name(), content, 0666) + if err != nil { + log.Error().Stack().Err(err).Msg("Failed writing archive to disk") + return + } + defer os.Remove(tmpArchiveFile.Name()) + + tmpArchiveFilesDirectory, err := os.MkdirTemp("", "pipeleak-artifact-archive-out-") + if err != nil { + log.Error().Stack().Err(err).Msg("Cannot create artifact archive temp directory") + return + } + defer os.RemoveAll(tmpArchiveFilesDirectory) + + x := &xtractr.XFile{ + FilePath: tmpArchiveFile.Name(), + OutputDir: tmpArchiveFilesDirectory, + FileMode: 0o600, + DirMode: 0o700, + } + + _, files, _, err := xtractr.ExtractFile(x) + if err != nil || files == nil { + log.Debug().Str("err", err.Error()).Msg("Unable to handle archive in artifacts") + return + } + + for _, fPath := range files { + if !helper.IsDirectory(fPath) { + fileBytes, err := os.ReadFile(fPath) + if err != nil { + log.Debug().Str("file", fPath).Stack().Str("err", err.Error()).Msg("Cannot read temp artifact archive file content") + } + + kind, _ := filetype.Match(fileBytes) + if kind == filetype.Unknown { + DetectFileHits(fileBytes, jobWebUrl, path.Base(fPath), archivefileName) + } + } + } +} diff --git a/src/pipeleak/scanner/rules.go b/src/pipeleak/scanner/rules.go index 7813aca..a25b2cd 100644 --- a/src/pipeleak/scanner/rules.go +++ b/src/pipeleak/scanner/rules.go @@ -193,6 +193,19 @@ func DetectHits(text []byte, maxThreads int) []Finding { return slices.Concat(findingsCombined, findingsTr) } +func DetectFileHits(content []byte, jobWebUrl string, fileName string, archiveName string) { + // 1 goroutine to prevent maxThreads^2 which trashes memory + findings := DetectHits(content, 1) + for _, finding := range findings { + baseLog := log.Warn().Str("confidence", finding.Pattern.Pattern.Confidence).Str("name", finding.Pattern.Pattern.Name).Str("value", finding.Text).Str("url", jobWebUrl).Str("file", fileName) + if len(archiveName) > 0 { + baseLog.Str("archive", archiveName).Msg("HIT Artifact (in archive)") + } else { + baseLog.Msg("HIT Artifact") + } + } +} + func extractHitWithSurroundingText(text []byte, hitIndex []int, additionalBytes int) string { startIndex := hitIndex[0] endIndex := hitIndex[1]