Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.13
55 changes: 53 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ $ virtualenv .venv && source .venv/bin/activate
|`GITLAB_GROUPS_IGNORE_LIST` |Groups to ignore (separated by commas, default value is "lost-and-found" |lost-and-found |
|`GITLAB_GROUPS_SEARCH` |Limit to those groups (separated by commas, empty means all groups). |gitlab2rbac |
|`GITLAB_NAMESPACE_GRANULARITY` |Whether to get permissions from GitLab projects or groups. |project |
|`GITLAB_NAMESPACE_MAPPING` |JSON mapping of GitLab paths to custom K8s namespace names (see below). |{} |
|`GITLAB_PRIVATE_TOKEN` |Configure gitlab API token. | |
|`GITLAB_USERNAME_IGNORE_LIST` |Gitlab users to ignore for the synchronisation | |
|`GITLAB_TIMEOUT` |Timeout for GitLab operations, in seconds. |10 |
Expand All @@ -101,6 +102,56 @@ $ virtualenv .venv && source .venv/bin/activate
|`KUBERNETES_LOAD_INCLUSTER_CONFIG` |Load configuration inside Kubernetes when gitlab2rbac runs as a pod. |False |
|`KUBERNETES_TIMEOUT` |Timeout for Kubernetes operations, in seconds. |10 |

### Custom Namespace Mapping

The `GITLAB_NAMESPACE_MAPPING` environment variable allows you to map specific GitLab groups or projects to custom Kubernetes namespace names. This is useful when:
- You want to map sub-projects to specific namespaces
- Your Kubernetes namespace naming doesn't match your GitLab structure
- You need to map one GitLab project to multiple Kubernetes namespaces

#### Example Usage

All values must be arrays (even for single namespace mappings):

```sh
# Single namespace mapping (must use array)
export GITLAB_NAMESPACE_MAPPING='{"team-data/airflow": ["airflow-system"]}'

# Multiple namespace mapping (one GitLab project to many K8s namespaces)
export GITLAB_NAMESPACE_MAPPING='{"team-data/spark": ["spark-operator", "spark"]}'

# Complete example
export GITLAB_NAMESPACE_MAPPING='{
"team-data/spark": ["spark-operator", "spark"],
"team-data/airflow": ["airflow-system"],
"team-infrastructure/kubernetes": ["kube-ops", "infrastructure"]
}'
```

Or in a Kubernetes ConfigMap:

```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: gitlab2rbac
namespace: gitlab2rbac
data:
GITLAB_URL: https://gitlab.example.com
GITLAB_PRIVATE_TOKEN: your-token
GITLAB_NAMESPACE_MAPPING: |
{
"team-data/spark": ["spark-operator", "spark"],
"team-data/airflow": ["airflow-system"],
"team-infrastructure/monitoring": ["prometheus", "grafana", "alertmanager"]
}
```

With this configuration:
- Users from `team-data/spark` GitLab project will get permissions in both `spark-operator` AND `spark` Kubernetes namespaces
- Users from `team-data/airflow` will get permissions in the `airflow-system` namespace
- Users from `team-infrastructure/monitoring` will get permissions in `prometheus`, `grafana`, AND `alertmanager` namespaces

## Kubernetes cluster compatibility

The following table outlines the compatibility between gitlab2rbac versions and Kubernetes cluster versions. Ensure that you are using the correct version of gitlab2rbac for your Kubernetes cluster to maintain stability and functionality.
Expand All @@ -109,9 +160,9 @@ The following table outlines the compatibility between gitlab2rbac versions and

:green_circle: ok

| GitLab2rbac Version | k8s 1.25 | k8s 1.26 | k8s 1.27 | k8s 1.28 | k8s 1.29 | k8s 1.30 | k8s 1.31 |
| GitLab2rbac Version | k8s 1.25 | k8s 1.26 | k8s 1.27 | k8s 1.28 | k8s 1.29 | k8s 1.30 | k8s 1.31 | k8s 1.32 | k8s 1.33 |
|-------------------|:-----------:|:-----------:|:-----------:|:-----------:|:-----------:|:-----------:|:-----------:|
| **0.2.4** | :green_circle: | :green_circle: | :green_circle: | :green_circle: | :green_circle: | :green_circle: | :green_circle: |
| **0.2.6** | :green_circle: | :green_circle: | :green_circle: | :green_circle: | :green_circle: | :green_circle: | :green_circle: | :green_circle: | :green_circle: |

## License
MIT
165 changes: 139 additions & 26 deletions gitlab2rbac.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import logging
from collections import defaultdict
from contextlib import suppress
from os import environ
from time import sleep, time
from typing import Any
Expand Down Expand Up @@ -40,6 +42,7 @@ def __init__(
admins_group: str | None,
username_ignore_list: list[str],
groups_ignore_list: list[str],
namespace_mapping: dict[str, list[str]] | None = None,
) -> None:
self.client: Gitlab | None = None
self.gitlab_users: list[dict[str, Any]] = []
Expand All @@ -49,9 +52,11 @@ def __init__(
self.url = url
self.namespace_granularity = namespace_granularity
self.admins_group = admins_group
self.namespaces: list[Group | Project] = []
self.namespaces: list[tuple[Group | Project, str]] = []
self.username_ignore_list = username_ignore_list
self.groups_ignore_list = groups_ignore_list
self.namespace_mapping = namespace_mapping or {}
self.namespace_name_mapping: dict[str, list[str]] = {}

def connect(self) -> None:
"""Performs an authentication via private token.
Expand All @@ -68,10 +73,28 @@ def connect(self) -> None:
raise Exception("unable to connect on gitlab :: {}".format(e))

try:
mapped_gitlab_ids = set()
for gitlab_path, k8s_namespaces in self.namespace_mapping.items():
namespace_obj = self.get_specific_group(gitlab_path)
if namespace_obj:
mapped_gitlab_ids.add(namespace_obj.id)
for k8s_namespace in k8s_namespaces:
self.namespaces.append((namespace_obj, k8s_namespace))

self.namespace_name_mapping[namespace_obj.name] = k8s_namespaces
self.namespace_name_mapping[gitlab_path] = k8s_namespaces

if self.namespace_granularity == "group":
self.namespaces = self.get_groups()
regular_namespaces = self.get_groups()
else:
self.namespaces = self.get_projects()
regular_namespaces = self.get_projects()

for ns in regular_namespaces:
if ns.id not in mapped_gitlab_ids:
k8s_name = slugify(ns.name)
self.namespaces.append((ns, k8s_name))
self.namespace_name_mapping[ns.name] = [k8s_name]

except Exception as e:
raise Exception("unable to define namespaces :: {}".format(e))

Expand Down Expand Up @@ -116,8 +139,9 @@ def get_admins(self) -> list[dict[str, str]]:
if self.client is None:
logging.error("Gitlab client is not connected.")
return []
ns = self.client.groups.list(search=self.admins_group)
return self.get_users(from_namespaces=ns) or []
ns_list = self.client.groups.list(search=self.admins_group)
ns_tuples = [(ns, "admin") for ns in ns_list]
return self.get_users(from_namespaces=ns_tuples) or []

admins: list[dict[str, str]] = []
if self.client is None:
Expand Down Expand Up @@ -183,27 +207,29 @@ def _get_users_query_paginated(
return nodes

def get_users(
self, from_namespaces: list[Group | Project] | None = None
self, from_namespaces: list[tuple[Group | Project, str]] | None = None
) -> list[dict[str, Any]]:
"""Returns all users from groups/projects.
We use a GraphQL to minimize the queries made to Gitlab API

Args:
from_namespaces: Retrieve users from this namespaces.
Namespaces should be given as a list of 2-tuples with the
namespace object and the corresponding k8s namespace
e.g. [(group_obj, "spark-operator"), (project_obj, "data-platform")]

e.g. user {
Returns:
list[dict[str, Any]]: list for success, empty otherwise.
e.g. user {
'access_level': 'reporter',
'email': '[email protected]',
'id': '123',
'namespace': 'default'
}

Returns:
list[dict[str, Any]]: list for success, empty otherwise.
"""
try:
users: list[dict[str, Any]] = []
namespaces = from_namespaces or self.namespaces
namespace_tuples = from_namespaces or self.namespaces
query = gql(
"""
query ($first: Int, $after: String, $namespace : ID!) {
Expand Down Expand Up @@ -254,21 +280,22 @@ def get_users(
client = Client(
transport=transport, fetch_schema_from_transport=True
)
for namespace in namespaces:

for namespace_obj, k8s_namespace in namespace_tuples:
_start = time()
variable_values = {"namespace": namespace.name}
variable_values = {"namespace": namespace_obj.name}
members = self._get_users_query_paginated(
client, query, variable_values
)
timespent = time() - _start
logging.debug(
f"Fetched members of group {namespace.name} in {timespent} seconds"
f"Fetched members of group {namespace_obj.name} for k8s namespace {k8s_namespace} in {timespent} seconds"
)
for member in members:
# ignore user if it doesn't pass some checks
if not self.check_user(member["user"]):
continue

user = {
"access_level": member["accessLevel"]["integerValue"],
"email": member["user"]["emails"]["edges"][0]["node"][
Expand All @@ -277,13 +304,14 @@ def get_users(
"id": member["user"]["id"].replace(
"gid://gitlab/User/", ""
),
"namespace": slugify(namespace.name),
"namespace": k8s_namespace,
"username": member["user"]["username"],
}
users.append(user)
logging.info(
"|namespace={} user={} email={} access_level={}".format(
namespace.name,
"|gitlab={} k8s_namespace={} user={} email={} access_level={}".format(
namespace_obj.name,
k8s_namespace,
user["username"],
user["email"],
user["access_level"],
Expand All @@ -295,6 +323,33 @@ def get_users(
exit(1)
return []

def get_specific_group(self, full_path: str) -> Group | Project | None:
"""Get a specific group or project by its full path.

Args:
full_path: Full path to the group or project (e.g., "project/kubernetes/spark")

Returns:
Group or Project object if found, None otherwise.
"""
if self.client is None:
logging.error("Gitlab client is not connected.")
return None

# Try to get `full_path` as either a group or a project
with suppress(Exception):
group = self.client.groups.get(full_path)
logging.info(f"|found mapped group={full_path}")
return group

with suppress(Exception):
project = self.client.projects.get(full_path)
logging.info(f"|found mapped project={full_path}")
return project

logging.warning(f"Unable to find group or project at path: {full_path}")
return None

def get_groups(self) -> list[Group]:
groups: list[Group] = []
if self.client is None:
Expand Down Expand Up @@ -367,27 +422,28 @@ def get_namespaces(self) -> list[str]:
logging.error("unable to retrieve namespaces :: {}".format(e))
return []

def auto_create(self, namespaces: list[Group | Project]) -> list[Any]:
def auto_create(self, namespaces: list[tuple[Group | Project, str]]) -> list[Any]:
try:
if self.client_core is None:
logging.error("Kubernetes CoreV1Api client is not connected.")
return []
for namespace in namespaces:
slug_namespace = slugify(namespace.name)

for namespace_obj, k8s_namespace in namespaces:
labels = {
"app.kubernetes.io/name": slug_namespace,
"app.kubernetes.io/name": k8s_namespace,
"app.kubernetes.io/managed-by": "gitlab2rbac",
"gitlab2rbac.kubernetes.io/gitlab-name": namespace_obj.name,
}
if self.check_namespace(name=slug_namespace):
if self.check_namespace(name=k8s_namespace):
continue
metadata = kubernetes.client.V1ObjectMeta(
name=slug_namespace, labels=labels
name=k8s_namespace, labels=labels
)
namespace_body = kubernetes.client.V1Namespace(
metadata=metadata
)
self.client_core.create_namespace(body=namespace_body)
logging.info("auto create namespace={}".format(slug_namespace))
logging.info("auto create namespace={} (gitlab={})".format(k8s_namespace, namespace_obj.name))
except ApiException as e:
error = "unable to auto create :: {}".format(
eval(e.body)["message"]
Expand Down Expand Up @@ -625,9 +681,47 @@ def __init__(

def __call__(self) -> None:
if self.kubernetes_auto_create:
# When auto-creating, create namespaces first, then fetch all users
self.kubernetes.auto_create(namespaces=self.gitlab.namespaces)
gitlab_users = self.gitlab.get_users()
else:
# When not auto-creating, filter namespaces first, then fetch users only from existing ones
existing_k8s_namespaces = set(self.kubernetes.get_namespaces())

filtered_namespaces = []
missing_namespaces = set()
skipped_gitlab_groups = set()

for gitlab_obj, k8s_namespace in self.gitlab.namespaces:
if k8s_namespace in existing_k8s_namespaces:
filtered_namespaces.append((gitlab_obj, k8s_namespace))
else:
missing_namespaces.add(k8s_namespace)
skipped_gitlab_groups.add(gitlab_obj.name)

if missing_namespaces:
logging.warning(
f"Found {len(missing_namespaces)} non-existent Kubernetes namespace(s). "
f"Skipping user fetch from {len(skipped_gitlab_groups)} GitLab group(s)/project(s)."
)
for ns in sorted(missing_namespaces):
logging.info(
f" - Namespace '{ns}' does not exist. "
f"Enable KUBERNETES_AUTO_CREATE or create it manually."
)
logging.info(f"Skipped GitLab groups/projects: {', '.join(sorted(skipped_gitlab_groups))}")

if filtered_namespaces:
gitlab_users = self.gitlab.get_users(from_namespaces=filtered_namespaces)
logging.info(
f"Fetched users from {len(filtered_namespaces)} GitLab group(s)/project(s) "
f"with existing Kubernetes namespaces"
)
else:
gitlab_users = []
logging.warning("No GitLab groups/projects have corresponding Kubernetes namespaces")

gitlab_users = self.gitlab.get_users()
# Fetch admins separately (they don't depend on namespaces)
gitlab_admins = self.gitlab.get_admins()

self.create_admin_role_bindings(admins=gitlab_admins)
Expand Down Expand Up @@ -710,6 +804,24 @@ def main() -> None:
GITLAB_GROUPS_IGNORE_LIST = environ.get(
"GITLAB_GROUPS_IGNORE_LIST", "lost-and-found"
).split(",")

GITLAB_NAMESPACE_MAPPING = {}
namespace_mapping_str = environ.get("GITLAB_NAMESPACE_MAPPING", "")
if namespace_mapping_str:
try:
raw_mapping = json.loads(namespace_mapping_str)
for gitlab_path, k8s_namespaces in raw_mapping.items():
if isinstance(k8s_namespaces, list):
GITLAB_NAMESPACE_MAPPING[gitlab_path] = k8s_namespaces
else:
logging.error(f"Invalid value type for {gitlab_path}: expected list of strings, got {type(k8s_namespaces)}")
raise ValueError(f"All values in GITLAB_NAMESPACE_MAPPING must be arrays")
logging.info(f"Loaded namespace mapping: {GITLAB_NAMESPACE_MAPPING}")
except json.JSONDecodeError as e:
logging.error(f"Failed to parse GITLAB_NAMESPACE_MAPPING: {e}")
logging.error("Expected JSON format, e.g.: '{\"team-data/spark\": [\"spark-operator\", \"spark\"]}'")
except ValueError as e:
logging.error(f"Invalid GITLAB_NAMESPACE_MAPPING format: {e}")

if not GITLAB_URL or not GITLAB_PRIVATE_TOKEN:
raise Exception(
Expand All @@ -726,6 +838,7 @@ def main() -> None:
admins_group=GITLAB_ADMINS_GROUP,
username_ignore_list=GITLAB_USERNAME_IGNORE_LIST,
groups_ignore_list=GITLAB_GROUPS_IGNORE_LIST,
namespace_mapping=GITLAB_NAMESPACE_MAPPING,
)
gitlab_helper.connect()

Expand Down
12 changes: 12 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
[project]
name = "gitlab2rbac"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"gql>=3.5.3",
"kubernetes>=33.1.0",
"python-gitlab>=6.2.0",
"slugify>=0.0.1",
]
[tool.black]
line-length = 79
include = '\.pyi?$'
Expand Down
Loading