Skip to content

Commit

Permalink
implement archiv in archive scanning (#33)
Browse files Browse the repository at this point in the history
Extract and scan archives which are included in the artifacts.zip file
  • Loading branch information
frjcomp authored Oct 10, 2024
1 parent 1a3a60b commit 4ef0a4b
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 11 deletions.
3 changes: 3 additions & 0 deletions src/pipeleak/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ require (
github.com/joho/godotenv v1.5.1 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 // indirect
github.com/kdomanski/iso9660 v0.3.3 // indirect
github.com/kevinburke/ssh_config v1.2.0 // indirect
github.com/kjk/lzma v0.0.0-20161016003348-3fd93898850d // indirect
github.com/klauspost/compress v1.17.9 // indirect
Expand All @@ -162,6 +163,7 @@ require (
github.com/montanaflynn/stats v0.7.1 // indirect
github.com/mtibben/percent v0.2.1 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nwaples/rardecode v1.1.3 // indirect
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.0 // indirect
Expand Down Expand Up @@ -219,6 +221,7 @@ require (
golang.org/x/text v0.18.0 // indirect
golang.org/x/tools v0.24.0 // indirect
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
golift.io/xtractr v0.2.2 // indirect
google.golang.org/api v0.193.0 // indirect
google.golang.org/genproto v0.0.0-20240814211410-ddb44dafa142 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect
Expand Down
6 changes: 6 additions & 0 deletions src/pipeleak/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,8 @@ github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 h1:qGQQKEcAR99REcMpsXCp3lJ03zYT1PkRd3kQGPn9GVg=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/kdomanski/iso9660 v0.3.3 h1:cNwM9L2L1Hzc5hZWGy6fPJ92UyWDccaY69DmEPlfDNY=
github.com/kdomanski/iso9660 v0.3.3/go.mod h1:K+UlIGxKgtrdAWyoigPnFbeQLVs/Xudz4iztWFThBwo=
github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=
github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
Expand Down Expand Up @@ -558,6 +560,8 @@ github.com/mtibben/percent v0.2.1/go.mod h1:KG9uO+SZkUp+VkRHsCdYQV3XSZrrSpR3O9ib
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk=
github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY=
github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=
Expand Down Expand Up @@ -986,6 +990,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
golift.io/xtractr v0.2.2 h1:MvujxeuX629d1rQs2VJbbcvYMvMmN5SzIkEflU5ryOc=
golift.io/xtractr v0.2.2/go.mod h1:30CvLMUY3yOS2VoKZTTMtzeeljCzBcWkr8dU6EHqfh8=
gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o=
gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
Expand Down
9 changes: 9 additions & 0 deletions src/pipeleak/helper/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,12 @@ func GetGitlabClient(token string, url string) (*gitlab.Client, error) {
client, err := gitlab.NewClient(token, gitlab.WithBaseURL(url), gitlab.WithHTTPClient(GetNonVerifyingHTTPClient()))
return client, err
}

func IsDirectory(path string) bool {
fileInfo, err := os.Stat(path)
if err != nil {
return true
}

return fileInfo.IsDir()
}
89 changes: 78 additions & 11 deletions src/pipeleak/scanner/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ import (
"io"
"net/http"
"net/url"
"os"
"path"
"runtime"
"strconv"
"strings"
"sync"

"github.com/CompassSecurity/pipeleak/helper"
Expand All @@ -20,6 +23,7 @@ import (
"github.com/rs/zerolog/log"
"github.com/wandb/parallel"
"github.com/xanzy/go-gitlab"
"golift.io/xtractr"
)

type QueueItemType string
Expand Down Expand Up @@ -104,7 +108,7 @@ func analyzeJobTrace(git *gitlab.Client, item QueueItem, options *ScanOptions) {
}

func analyzeJobArtifact(git *gitlab.Client, item QueueItem, options *ScanOptions) {
data := getJobArtifacts(git, item.Meta.ProjectId, item.Meta.JobId, options)
data := getJobArtifacts(git, item.Meta.ProjectId, item.Meta.JobId, item.Meta.JobWebUrl, options)
if data == nil {
return
}
Expand Down Expand Up @@ -135,11 +139,9 @@ func analyzeJobArtifact(git *gitlab.Client, item QueueItem, options *ScanOptions
kind, _ := filetype.Match(content)
// do not scan https://pkg.go.dev/github.com/h2non/filetype#readme-supported-types
if kind == filetype.Unknown {
// use one to prevent maxThreads^2 which trashes memory
findings := DetectHits(content, 1)
for _, finding := range findings {
log.Warn().Str("confidence", finding.Pattern.Pattern.Confidence).Str("name", finding.Pattern.Pattern.Name).Str("value", finding.Text).Str("url", item.Meta.JobWebUrl).Str("file", file.Name).Msg("HIT Artifact")
}
DetectFileHits(content, item.Meta.JobWebUrl, file.Name, "")
} else if filetype.IsArchive(content) {
handleArchiveArtifact(file.Name, content, item.Meta.JobWebUrl)
}
fc.Close()
})
Expand Down Expand Up @@ -176,31 +178,31 @@ func getJobTrace(git *gitlab.Client, projectId int, jobId int) []byte {
return trace
}

func getJobArtifacts(git *gitlab.Client, projectId int, jobId int, options *ScanOptions) []byte {
func getJobArtifacts(git *gitlab.Client, projectId int, jobId int, jobWebUrl string, options *ScanOptions) []byte {
artifactsReader, resp, err := git.Jobs.GetJobArtifacts(projectId, jobId)
if resp.StatusCode == 404 {
return nil
}

if err != nil {
log.Error().Err(err).Int("project", projectId).Int("job", jobId).Msg("Failed downloading job artifacts zip")
log.Error().Err(err).Str("url", jobWebUrl).Msg("Failed downloading job artifacts zip")
return nil
}

if artifactsReader.Size() > options.MaxArtifactSize {
log.Debug().Int("project", projectId).Int("job", jobId).Int64("bytes", artifactsReader.Size()).Int64("maxBytes", options.MaxArtifactSize).Msg("Skipped large artifact Zip")
log.Debug().Int64("bytes", artifactsReader.Size()).Int64("maxBytes", options.MaxArtifactSize).Str("url", jobWebUrl).Msg("Skipped large artifact Zip")
return nil
}

data, err := io.ReadAll(artifactsReader)
if err != nil {
log.Error().Err(err).Int("project", projectId).Int("job", jobId).Msg("Failed reading artifacts stream")
log.Error().Err(err).Str("url", jobWebUrl).Msg("Failed reading artifacts stream")
return nil
}

extractedZipSize := helper.CalculateZipFileSize(data)
if extractedZipSize > uint64(options.MaxArtifactSize) {
log.Debug().Int("project", projectId).Int("job", jobId).Int64("zipBytes", artifactsReader.Size()).Uint64("bytesExtracted", extractedZipSize).Int64("maxBytes", options.MaxArtifactSize).Msg("Skipped large extracted Zip artifact")
log.Debug().Str("url", jobWebUrl).Int64("zipBytes", artifactsReader.Size()).Uint64("bytesExtracted", extractedZipSize).Int64("maxBytes", options.MaxArtifactSize).Msg("Skipped large extracted Zip artifact")
return nil
}

Expand Down Expand Up @@ -279,3 +281,68 @@ func DownloadEnvArtifact(cookieVal string, gitlabUrl string, prjectPath string,

return envText
}

// https://docs.gitlab.com/ee/ci/caching/#common-use-cases-for-caches
var skippableDirectoryNames = []string{"node_modules", ".yarn", ".yarn-cache", ".npm", "venv", "vendor", ".go/pkg/mod/"}

func handleArchiveArtifact(archivefileName string, content []byte, jobWebUrl string) {
for _, skipKeyword := range skippableDirectoryNames {
if strings.Contains(archivefileName, skipKeyword) {
log.Debug().Str("file", archivefileName).Str("keyword", skipKeyword).Msg("Skipped archive due to blocklist entry")
return
}
}

fileType, err := filetype.Get(content)
if err != nil {
log.Error().Stack().Err(err).Msg("Cannot determine file type")
return
}

tmpArchiveFile, err := os.CreateTemp("", "pipeleak-artifact-archive-*."+fileType.Extension)
if err != nil {
log.Error().Stack().Err(err).Msg("Cannot create artifact archive temp file")
return
}

err = os.WriteFile(tmpArchiveFile.Name(), content, 0666)
if err != nil {
log.Error().Stack().Err(err).Msg("Failed writing archive to disk")
return
}
defer os.Remove(tmpArchiveFile.Name())

tmpArchiveFilesDirectory, err := os.MkdirTemp("", "pipeleak-artifact-archive-out-")
if err != nil {
log.Error().Stack().Err(err).Msg("Cannot create artifact archive temp directory")
return
}
defer os.RemoveAll(tmpArchiveFilesDirectory)

x := &xtractr.XFile{
FilePath: tmpArchiveFile.Name(),
OutputDir: tmpArchiveFilesDirectory,
FileMode: 0o600,
DirMode: 0o700,
}

_, files, _, err := xtractr.ExtractFile(x)
if err != nil || files == nil {
log.Debug().Str("err", err.Error()).Msg("Unable to handle archive in artifacts")
return
}

for _, fPath := range files {
if !helper.IsDirectory(fPath) {
fileBytes, err := os.ReadFile(fPath)
if err != nil {
log.Debug().Str("file", fPath).Stack().Str("err", err.Error()).Msg("Cannot read temp artifact archive file content")
}

kind, _ := filetype.Match(fileBytes)
if kind == filetype.Unknown {
DetectFileHits(fileBytes, jobWebUrl, path.Base(fPath), archivefileName)
}
}
}
}
13 changes: 13 additions & 0 deletions src/pipeleak/scanner/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,19 @@ func DetectHits(text []byte, maxThreads int) []Finding {
return slices.Concat(findingsCombined, findingsTr)
}

func DetectFileHits(content []byte, jobWebUrl string, fileName string, archiveName string) {
// 1 goroutine to prevent maxThreads^2 which trashes memory
findings := DetectHits(content, 1)
for _, finding := range findings {
baseLog := log.Warn().Str("confidence", finding.Pattern.Pattern.Confidence).Str("name", finding.Pattern.Pattern.Name).Str("value", finding.Text).Str("url", jobWebUrl).Str("file", fileName)
if len(archiveName) > 0 {
baseLog.Str("archive", archiveName).Msg("HIT Artifact (in archive)")
} else {
baseLog.Msg("HIT Artifact")
}
}
}

func extractHitWithSurroundingText(text []byte, hitIndex []int, additionalBytes int) string {
startIndex := hitIndex[0]
endIndex := hitIndex[1]
Expand Down

0 comments on commit 4ef0a4b

Please sign in to comment.