Skip to content

Commit d228c47

Browse files
committed
idk
1 parent 4561fc1 commit d228c47

File tree

2 files changed

+88
-69
lines changed

2 files changed

+88
-69
lines changed

src/gitingest/clone.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99

1010
from gitingest.config import DEFAULT_TIMEOUT
1111
from gitingest.utils.git_utils import (
12-
_add_token_to_url,
1312
check_repo_exists,
1413
checkout_partial_clone,
1514
create_git_repo,
1615
ensure_git_installed,
16+
git_auth_context,
1717
is_github_host,
1818
resolve_commit,
1919
)
@@ -86,12 +86,7 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
8686
commit = await resolve_commit(config, token=token)
8787
logger.debug("Resolved commit", extra={"commit": commit})
8888

89-
# Prepare URL with authentication if needed
90-
clone_url = url
91-
if token and is_github_host(url):
92-
clone_url = _add_token_to_url(url, token)
93-
94-
# Clone the repository using GitPython
89+
# Clone the repository using GitPython with proper authentication
9590
logger.info("Executing git clone operation", extra={"url": "<redacted>", "local_path": local_path})
9691
try:
9792
clone_kwargs = {
@@ -100,17 +95,20 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
10095
"depth": 1,
10196
}
10297

103-
if partial_clone:
104-
# GitPython doesn't directly support --filter and --sparse in clone
105-
# We'll need to use git.Git() for the initial clone with these options
106-
git_cmd = git.Git()
107-
cmd_args = ["--single-branch", "--no-checkout", "--depth=1"]
98+
with git_auth_context(url, token) as (git_cmd, auth_url):
10899
if partial_clone:
100+
# For partial clones, use git.Git() with filter and sparse options
101+
cmd_args = ["--single-branch", "--no-checkout", "--depth=1"]
109102
cmd_args.extend(["--filter=blob:none", "--sparse"])
110-
cmd_args.extend([clone_url, local_path])
111-
git_cmd.clone(*cmd_args)
112-
else:
113-
git.Repo.clone_from(clone_url, local_path, **clone_kwargs)
103+
cmd_args.extend([auth_url, local_path])
104+
git_cmd.clone(*cmd_args)
105+
elif token and is_github_host(url):
106+
# For authenticated GitHub repos, use git_cmd with auth URL
107+
cmd_args = ["--single-branch", "--no-checkout", "--depth=1", auth_url, local_path]
108+
git_cmd.clone(*cmd_args)
109+
else:
110+
# For non-authenticated repos, use the standard GitPython method
111+
git.Repo.clone_from(url, local_path, **clone_kwargs)
114112

115113
logger.info("Git clone completed successfully")
116114
except git.GitCommandError as exc:

src/gitingest/utils/git_utils.py

Lines changed: 74 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
import os
88
import re
99
import sys
10+
from contextlib import contextmanager
1011
from pathlib import Path
11-
from typing import TYPE_CHECKING, Final, Iterable
12+
from typing import TYPE_CHECKING, Final, Generator, Iterable
1213
from urllib.parse import urlparse, urlunparse
1314

1415
import git
@@ -217,13 +218,6 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
217218

218219
# Use GitPython to get remote references
219220
try:
220-
git_cmd = git.Git()
221-
222-
# Prepare authentication if needed
223-
if token and is_github_host(url):
224-
auth_url = _add_token_to_url(url, token)
225-
url = auth_url
226-
227221
fetch_tags = ref_type == "tags"
228222
to_fetch = "tags" if fetch_tags else "heads"
229223

@@ -233,8 +227,11 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
233227
cmd_args.append("--refs") # Filter out peeled tag objects
234228
cmd_args.append(url)
235229

236-
# Run the command using git_cmd.ls_remote() method
237-
output = git_cmd.ls_remote(*cmd_args)
230+
# Run the command with proper authentication
231+
with git_auth_context(url, token) as (git_cmd, auth_url):
232+
# Replace the URL in cmd_args with the authenticated URL
233+
cmd_args[-1] = auth_url # URL is the last argument
234+
output = git_cmd.ls_remote(*cmd_args)
238235

239236
# Parse output
240237
return [
@@ -318,6 +315,70 @@ def create_git_auth_header(token: str, url: str = "https://github.com") -> str:
318315
return f"http.https://{hostname}/.extraheader=Authorization: Basic {basic}"
319316

320317

318+
def create_authenticated_url(url: str, token: str | None = None) -> str:
319+
"""Create an authenticated URL for Git operations.
320+
321+
This is the safest approach for multi-user environments - no global state.
322+
323+
Parameters
324+
----------
325+
url : str
326+
The repository URL.
327+
token : str | None
328+
GitHub personal access token (PAT) for accessing private repositories.
329+
330+
Returns
331+
-------
332+
str
333+
The URL with authentication embedded (for GitHub) or original URL.
334+
335+
"""
336+
if not (token and is_github_host(url)):
337+
return url
338+
339+
parsed = urlparse(url)
340+
# Add token as username in URL (GitHub supports this)
341+
netloc = f"x-oauth-basic:{token}@{parsed.hostname}"
342+
if parsed.port:
343+
netloc += f":{parsed.port}"
344+
345+
return urlunparse(
346+
(
347+
parsed.scheme,
348+
netloc,
349+
parsed.path,
350+
parsed.params,
351+
parsed.query,
352+
parsed.fragment,
353+
),
354+
)
355+
356+
357+
@contextmanager
358+
def git_auth_context(url: str, token: str | None = None) -> Generator[tuple[git.Git, str]]:
359+
"""Context manager that provides Git command and authenticated URL.
360+
361+
Returns both a Git command object and the authenticated URL to use.
362+
This avoids any global state contamination between users.
363+
364+
Parameters
365+
----------
366+
url : str
367+
The repository URL to check if authentication is needed.
368+
token : str | None
369+
GitHub personal access token (PAT) for accessing private repositories.
370+
371+
Yields
372+
------
373+
Generator[tuple[git.Git, str]]
374+
Tuple of (Git command object, authenticated URL to use).
375+
376+
"""
377+
git_cmd = git.Git()
378+
auth_url = create_authenticated_url(url, token)
379+
yield git_cmd, auth_url
380+
381+
321382
def validate_github_token(token: str) -> None:
322383
"""Validate the format of a GitHub Personal Access Token.
323384
@@ -419,15 +480,9 @@ async def _resolve_ref_to_sha(url: str, pattern: str, token: str | None = None)
419480
420481
"""
421482
try:
422-
git_cmd = git.Git()
423-
424-
# Prepare authentication if needed
425-
auth_url = url
426-
if token and is_github_host(url):
427-
auth_url = _add_token_to_url(url, token)
428-
429-
# Execute ls-remote command
430-
output = git_cmd.ls_remote(auth_url, pattern)
483+
# Execute ls-remote command with proper authentication
484+
with git_auth_context(url, token) as (git_cmd, auth_url):
485+
output = git_cmd.ls_remote(auth_url, pattern)
431486
lines = output.splitlines()
432487

433488
sha = _pick_commit_sha(lines)
@@ -475,37 +530,3 @@ def _pick_commit_sha(lines: Iterable[str]) -> str | None:
475530
first_non_peeled = sha
476531

477532
return first_non_peeled # branch or lightweight tag (or None)
478-
479-
480-
def _add_token_to_url(url: str, token: str) -> str:
481-
"""Add authentication token to GitHub URL.
482-
483-
Parameters
484-
----------
485-
url : str
486-
The original GitHub URL.
487-
token : str
488-
The GitHub token to add.
489-
490-
Returns
491-
-------
492-
str
493-
The URL with embedded authentication.
494-
495-
"""
496-
parsed = urlparse(url)
497-
# Add token as username in URL (GitHub supports this)
498-
netloc = f"x-oauth-basic:{token}@{parsed.hostname}"
499-
if parsed.port:
500-
netloc += f":{parsed.port}"
501-
502-
return urlunparse(
503-
(
504-
parsed.scheme,
505-
netloc,
506-
parsed.path,
507-
parsed.params,
508-
parsed.query,
509-
parsed.fragment,
510-
),
511-
)

0 commit comments

Comments
 (0)