Skip to content

Commit a95f8bc

Browse files
Scan GitLab Groups (#4320)
* gitlab groups init * added list group projects api * list group projects updated * added duplicate repo scan check * comments addressed * added error when repo and group id flags are provided at the same time * added test case for gitlab group projects
1 parent 05e2328 commit a95f8bc

File tree

7 files changed

+706
-532
lines changed

7 files changed

+706
-532
lines changed

main.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ var (
136136
gitlabScanEndpoint = gitlabScan.Flag("endpoint", "GitLab endpoint.").Default("https://gitlab.com").String()
137137
gitlabScanRepos = gitlabScan.Flag("repo", "GitLab repo url. You can repeat this flag. Leave empty to scan all repos accessible with provided credential. Example: https://gitlab.com/org/repo.git").Strings()
138138
gitlabScanToken = gitlabScan.Flag("token", "GitLab token. Can be provided with environment variable GITLAB_TOKEN.").Envar("GITLAB_TOKEN").Required().String()
139+
gitlabScanGroupIds = gitlabScan.Flag("group-id", "GitLab group ID. If provided, it will scan the group and its subgroups. You can repeat this flag.").Strings()
139140
gitlabScanIncludePaths = gitlabScan.Flag("include-paths", "Path to file with newline separated regexes for files to include in scan.").Short('i').String()
140141
gitlabScanExcludePaths = gitlabScan.Flag("exclude-paths", "Path to file with newline separated regexes for files to exclude in scan.").Short('x').String()
141142
gitlabScanIncludeRepos = gitlabScan.Flag("include-repos", `Repositories to include in an org scan. This can also be a glob pattern. You can repeat this flag. Must use Gitlab repo full name. Example: "trufflesecurity/trufflehog", "trufflesecurity/t*"`).Strings()
@@ -788,10 +789,15 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
788789
return scanMetrics, fmt.Errorf("could not create filter: %v", err)
789790
}
790791

792+
if len(*gitlabScanRepos) > 0 && len(*gitlabScanGroupIds) > 0 {
793+
return scanMetrics, fmt.Errorf("invalid config: you cannot specify both repositories and groups at the same time")
794+
}
795+
791796
cfg := sources.GitlabConfig{
792797
Endpoint: *gitlabScanEndpoint,
793798
Token: *gitlabScanToken,
794799
Repos: *gitlabScanRepos,
800+
GroupIds: *gitlabScanGroupIds,
795801
IncludeRepos: *gitlabScanIncludeRepos,
796802
ExcludeRepos: *gitlabScanExcludeRepos,
797803
Filter: filter,

pkg/engine/gitlab.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ func (e *Engine) ScanGitLab(ctx context.Context, c sources.GitlabConfig) (source
4646
connection.Repositories = c.Repos
4747
}
4848

49+
if len(c.GroupIds) > 0 {
50+
connection.GroupIds = c.GroupIds
51+
}
52+
4953
if len(c.IncludeRepos) > 0 {
5054
connection.IncludeRepos = c.IncludeRepos
5155
}

pkg/pb/sourcespb/sources.pb.go

Lines changed: 519 additions & 509 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/sources/gitlab/gitlab.go

Lines changed: 139 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ type Source struct {
4444
token string
4545
url string
4646
repos []string
47+
groupIds []string
4748
ignoreRepos []string
4849
includeRepos []string
4950

@@ -158,6 +159,7 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou
158159
}
159160

160161
s.repos = conn.GetRepositories()
162+
s.groupIds = conn.GetGroupIds()
161163
s.ignoreRepos = conn.GetIgnoreRepos()
162164
s.includeRepos = conn.GetIncludeRepos()
163165
s.enumerateSharedProjects = !conn.ExcludeProjectsSharedIntoGroups
@@ -266,14 +268,9 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar
266268
return ctx.Err()
267269
},
268270
}
269-
if feature.UseSimplifiedGitlabEnumeration.Load() {
270-
if err := s.getAllProjectReposV2(ctx, apiClient, ignoreRepo, reporter); err != nil {
271-
return err
272-
}
273-
} else {
274-
if err := s.getAllProjectRepos(ctx, apiClient, ignoreRepo, reporter); err != nil {
275-
return err
276-
}
271+
272+
if err := s.listProjects(ctx, apiClient, ignoreRepo, reporter); err != nil {
273+
return err
277274
}
278275

279276
} else {
@@ -287,6 +284,21 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar
287284
return s.scanRepos(ctx, chunksChan)
288285
}
289286

287+
func (s *Source) listProjects(ctx context.Context,
288+
apiClient *gitlab.Client,
289+
ignoreProject func(string) bool,
290+
visitor sources.UnitReporter) error {
291+
if len(s.groupIds) > 0 {
292+
return s.getAllProjectReposInGroups(ctx, apiClient, ignoreProject, visitor)
293+
}
294+
295+
if feature.UseSimplifiedGitlabEnumeration.Load() {
296+
return s.getAllProjectReposV2(ctx, apiClient, ignoreProject, visitor)
297+
}
298+
299+
return s.getAllProjectRepos(ctx, apiClient, ignoreProject, visitor)
300+
}
301+
290302
func (s *Source) scanTargets(ctx context.Context, client *gitlab.Client, targets []sources.ChunkingTarget, chunksChan chan *sources.Chunk) error {
291303
ctx = context.WithValues(ctx, "scan_type", "targeted")
292304
for _, tgt := range targets {
@@ -401,16 +413,9 @@ func (s *Source) Validate(ctx context.Context) []error {
401413
},
402414
}
403415

404-
if feature.UseSimplifiedGitlabEnumeration.Load() {
405-
if err := s.getAllProjectReposV2(ctx, apiClient, ignoreProject, visitor); err != nil {
406-
errs = append(errs, err)
407-
return errs
408-
}
409-
} else {
410-
if err := s.getAllProjectRepos(ctx, apiClient, ignoreProject, visitor); err != nil {
411-
errs = append(errs, err)
412-
return errs
413-
}
416+
if err := s.listProjects(ctx, apiClient, ignoreProject, visitor); err != nil {
417+
errs = append(errs, err)
418+
return errs
414419
}
415420

416421
if len(repos) == 0 {
@@ -478,7 +483,6 @@ func (s *Source) getAllProjectRepos(
478483
reporter sources.UnitReporter,
479484
) error {
480485
gitlabReposEnumerated.WithLabelValues(s.name).Set(0)
481-
482486
// Projects without repo will get user projects, groups projects, and subgroup projects.
483487
user, _, err := apiClient.Users.CurrentUser()
484488
if err != nil {
@@ -728,6 +732,118 @@ func (s *Source) getAllProjectReposV2(
728732
return nil
729733
}
730734

735+
// getAllProjectReposInGroups fetches all projects in a GitLab group and its subgroups.
736+
// It uses the group projects API with include_subgroups=true parameter.
737+
func (s *Source) getAllProjectReposInGroups(
738+
ctx context.Context,
739+
apiClient *gitlab.Client,
740+
ignoreRepo func(string) bool,
741+
reporter sources.UnitReporter,
742+
) error {
743+
gitlabReposEnumerated.WithLabelValues(s.name).Set(0)
744+
gitlabGroupsEnumerated.WithLabelValues(s.name).Set(float64(len(s.groupIds)))
745+
746+
processedProjects := make(map[string]bool)
747+
748+
var projectsWithNamespace []string
749+
const (
750+
orderBy = "id"
751+
paginationLimit = 100
752+
)
753+
754+
listOpts := gitlab.ListOptions{PerPage: paginationLimit}
755+
projectOpts := &gitlab.ListGroupProjectsOptions{
756+
ListOptions: listOpts,
757+
OrderBy: gitlab.Ptr(orderBy),
758+
IncludeSubGroups: gitlab.Ptr(true),
759+
WithShared: gitlab.Ptr(true),
760+
}
761+
762+
// For non gitlab.com instances, you might want to adjust access levels
763+
if s.url != gitlabBaseURL {
764+
projectOpts.MinAccessLevel = gitlab.Ptr(gitlab.GuestPermissions)
765+
}
766+
767+
ctx.Logger().Info("starting group projects enumeration",
768+
"group_ids", s.groupIds,
769+
"include_subgroups", true,
770+
"list_options", listOpts)
771+
772+
for _, groupID := range s.groupIds {
773+
groupCtx := context.WithValues(ctx, "group_id", groupID)
774+
775+
projectOpts.Page = 0
776+
groupCtx.Logger().V(2).Info("processing group", "group_id", groupID)
777+
778+
for {
779+
projects, res, err := apiClient.Groups.ListGroupProjects(groupID, projectOpts)
780+
if err != nil {
781+
err = fmt.Errorf("received error on listing projects for group %s: %w", groupID, err)
782+
if err := reporter.UnitErr(ctx, err); err != nil {
783+
return err
784+
}
785+
break
786+
}
787+
788+
groupCtx.Logger().V(3).Info("listed group projects", "count", len(projects))
789+
790+
for _, proj := range projects {
791+
projCtx := context.WithValues(ctx,
792+
"project_id", proj.ID,
793+
"project_name", proj.NameWithNamespace,
794+
"group_id", groupID)
795+
796+
if processedProjects[proj.HTTPURLToRepo] {
797+
projCtx.Logger().V(3).Info("skipping project", "reason", "already processed")
798+
continue
799+
}
800+
processedProjects[proj.HTTPURLToRepo] = true
801+
802+
// skip projects configured to be ignored.
803+
if ignoreRepo(proj.PathWithNamespace) {
804+
projCtx.Logger().V(3).Info("skipping project", "reason", "ignored in config")
805+
continue
806+
}
807+
808+
// report an error if we could not convert the project into a URL.
809+
if _, err := url.Parse(proj.HTTPURLToRepo); err != nil {
810+
projCtx.Logger().V(3).Info("skipping project",
811+
"reason", "URL parse failure",
812+
"url", proj.HTTPURLToRepo,
813+
"parse_error", err)
814+
815+
err = fmt.Errorf("could not parse url %q given by project: %w", proj.HTTPURLToRepo, err)
816+
if err := reporter.UnitErr(ctx, err); err != nil {
817+
return err
818+
}
819+
continue
820+
}
821+
822+
// report the unit.
823+
projCtx.Logger().V(3).Info("accepting project")
824+
825+
unit := git.SourceUnit{Kind: git.UnitRepo, ID: proj.HTTPURLToRepo}
826+
gitlabReposEnumerated.WithLabelValues(s.name).Inc()
827+
projectsWithNamespace = append(projectsWithNamespace, proj.NameWithNamespace)
828+
829+
if err := reporter.UnitOk(ctx, unit); err != nil {
830+
return err
831+
}
832+
}
833+
834+
// handle pagination.
835+
projectOpts.Page = res.NextPage
836+
if res.NextPage == 0 {
837+
break
838+
}
839+
}
840+
}
841+
842+
ctx.Logger().Info("Enumerated GitLab group projects", "count", len(projectsWithNamespace))
843+
844+
return nil
845+
}
846+
731847
func (s *Source) scanRepos(ctx context.Context, chunksChan chan *sources.Chunk) error {
732848
// If there is resume information available, limit this scan to only the repos that still need scanning.
733849
reposToScan, progressIndexOffset := sources.FilterReposToResume(s.repos, s.GetProgress().EncodedResumeInfo)
@@ -937,11 +1053,11 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e
9371053
_ = reporter.UnitErr(ctx, fmt.Errorf("could not compile include/exclude repo glob: %w", err))
9381054
})
9391055

940-
if feature.UseSimplifiedGitlabEnumeration.Load() {
941-
return s.getAllProjectReposV2(ctx, apiClient, ignoreRepo, reporter)
942-
} else {
943-
return s.getAllProjectRepos(ctx, apiClient, ignoreRepo, reporter)
1056+
if err := s.listProjects(ctx, apiClient, ignoreRepo, reporter); err != nil {
1057+
return err
9441058
}
1059+
1060+
return nil
9451061
}
9461062

9471063
// ChunkUnit downloads and reports chunks for the given GitLab repository unit.

pkg/sources/gitlab/gitlab_integration_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,41 @@ func TestSource_Scan(t *testing.T) {
136136
},
137137
wantReposScanned: 1,
138138
},
139+
{
140+
name: "token auth, group projects enumeration with include_subgroups",
141+
init: init{
142+
name: "test source group enumeration",
143+
connection: &sourcespb.GitLab{
144+
Credential: &sourcespb.GitLab_Token{
145+
Token: token,
146+
},
147+
GroupIds: []string{"15013490"},
148+
},
149+
},
150+
wantChunk: &sources.Chunk{
151+
SourceType: sourcespb.SourceType_SOURCE_TYPE_GITLAB,
152+
SourceName: "test source group enumeration",
153+
},
154+
wantReposScanned: 5,
155+
},
156+
{
157+
name: "token auth, group projects enumeration with include_subgroups and exclude repositories",
158+
init: init{
159+
name: "test source group enumeration with exclude repos",
160+
connection: &sourcespb.GitLab{
161+
Credential: &sourcespb.GitLab_Token{
162+
Token: token,
163+
},
164+
GroupIds: []string{"15013490"},
165+
IgnoreRepos: []string{"tes1188/test-user-count"},
166+
},
167+
},
168+
wantChunk: &sources.Chunk{
169+
SourceType: sourcespb.SourceType_SOURCE_TYPE_GITLAB,
170+
SourceName: "test source group enumeration with exclude repos",
171+
},
172+
wantReposScanned: 4,
173+
},
139174
}
140175

141176
for _, tt := range tests {

pkg/sources/sources.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,8 @@ type GitlabConfig struct {
342342
Token string
343343
// Repos is the list of repositories to scan.
344344
Repos []string
345+
// GroupIds is the list of groups to scan.
346+
GroupIds []string
345347
// Filter is the filter to use to scan the source.
346348
Filter *common.Filter
347349
// SkipBinaries allows skipping binary files from the scan.

proto/sources.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ message GitLab {
233233
repeated string include_repos = 9;
234234
bool exclude_projects_shared_into_groups = 10;
235235
bool remove_auth_in_url = 11;
236+
repeated string group_ids = 12;
236237
}
237238

238239
message GitHub {

0 commit comments

Comments
 (0)