Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 49 additions & 11 deletions pkg/sources/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package github
import (
"errors"
"fmt"
"io"
"math/rand/v2"
"net/http"
"net/url"
Expand Down Expand Up @@ -1531,6 +1532,18 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget,
}
meta := metaType.Github

chunkSkel := sources.Chunk{
SourceType: s.Type(),
SourceName: s.name,
SourceID: s.SourceID(),
JobID: s.JobID(),
SecretID: target.SecretID,
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Github{Github: meta},
},
Verify: s.verify,
}

u, err := url.Parse(meta.GetLink())
if err != nil {
return fmt.Errorf("unable to parse GitHub URL: %w", err)
Expand All @@ -1543,6 +1556,14 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget,
return fmt.Errorf("invalid GitHub URL")
}

if meta.GetFile() == "" && meta.GetCommit() != "" {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a log message (at level 2) explaining what's happening. (If you do this, I think you can eliminate the comment - the log message text should explain to readers what's happening.)

You might as well add the commit hash as a context value here, instead of right before calling HandleFile in scanCommitMetadata. That way, if someone adds more logging later, they'll get the commit hash without having to think about it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done ✔️

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, sorry, I wasn't clear about the context thing. I meant something like this:

ctx := context.WithValues(ctx, "commit_hash", meta.GetCommit())
ctx.Logger().V(2).Info("secret metadata has no file; scanning commit metadata instead")
return s.scanCommitMetadata(ctx, segments[1], segments[2], meta, &chunkSkel, reporter)

Note how the commit hash is now embedded into the context so all future log messages that use this context will inherit it. (I also put the message at level 2, so it will only show up with the --debug or --trace flags.)

ctx := context.WithValues(ctx, "commit_hash", meta.GetCommit())
ctx.Logger().V(2).Info("secret metadata has no file; scanning commit metadata instead")

return s.scanCommitMetadata(ctx, segments[1], segments[2], meta, &chunkSkel, reporter)
}

// else try downloading the file content to scan
readCloser, resp, err := s.connector.APIClient().Repositories.DownloadContents(
ctx,
segments[1],
Expand All @@ -1561,21 +1582,38 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget,
return fmt.Errorf("unexpected HTTP response status when trying to download file for scan: %v", resp.Status)
}

chunkSkel := sources.Chunk{
SourceType: s.Type(),
SourceName: s.name,
SourceID: s.SourceID(),
JobID: s.JobID(),
SecretID: target.SecretID,
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Github{Github: meta},
},
Verify: s.verify,
}
fileCtx := context.WithValues(ctx, "path", meta.GetFile())
return handlers.HandleFile(fileCtx, readCloser, &chunkSkel, reporter)
}

func (s *Source) scanCommitMetadata(ctx context.Context, owner, repo string, meta *source_metadatapb.Github, chunkSkel *sources.Chunk, reporter sources.ChunkReporter) error {
// fetch the commit
commit, resp, err := s.connector.APIClient().Repositories.GetCommit(ctx, owner, repo, meta.GetCommit(), nil)
if resp != nil && resp.Body != nil {
defer resp.Body.Close()
}
if err != nil {
return fmt.Errorf("could not fetch commit for metadata scan: %w", err)
}

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unexpected HTTP response status when fetching commit: %v", resp.Status)
}

// create the string with the exact format we use in Git.ScanCommits()
// author email + "\n" + committer + "\n" + commit message
var sb strings.Builder

sb.WriteString(commit.GetCommit().Author.GetEmail())
sb.WriteString("\n")
sb.WriteString(commit.GetCommitter().GetEmail())
sb.WriteString("\n")
sb.WriteString(commit.GetCommit().GetMessage())

content := strings.NewReader(sb.String())
return handlers.HandleFile(ctx, io.NopCloser(content), chunkSkel, reporter)
}

func (s *Source) ChunkUnit(ctx context.Context, unit sources.SourceUnit, reporter sources.ChunkReporter) error {
repoURL, _ := unit.SourceUnitID()
ctx = context.WithValue(ctx, "repo", repoURL)
Expand Down
18 changes: 18 additions & 0 deletions pkg/sources/github/github_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,24 @@ func TestSource_Chunks_TargetedScan(t *testing.T) {
},
wantChunks: 607,
},
{
name: "targeted scan, commit metadata",
init: init{
name: "test source",
connection: &sourcespb.GitHub{Credential: &sourcespb.GitHub_Token{Token: githubToken}},
queryCriteria: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Github{
Github: &source_metadatapb.Github{
Repository: "https://github.com/trufflesecurity/trufflehog.git",
Link: "https://github.com/trufflesecurity/trufflehog/commit/1c51106e35c3b3c327fe12e358177c03079bb771",
Commit: "1c51106e35c3b3c327fe12e358177c03079bb771",
File: "", // no file
},
},
},
},
wantChunks: 1,
},
{
name: "no file in commit",
init: init{
Expand Down