|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import json |
| 4 | +import subprocess |
| 5 | +import tempfile |
| 6 | +import time |
| 7 | +from functools import lru_cache |
| 8 | +from pathlib import Path |
| 9 | +from typing import TYPE_CHECKING, Optional |
| 10 | + |
| 11 | +import git |
| 12 | +from filelock import FileLock |
| 13 | + |
| 14 | +from codeflash.cli_cmds.console import logger |
| 15 | +from codeflash.code_utils.compat import codeflash_cache_dir |
| 16 | +from codeflash.code_utils.git_utils import check_running_in_git_repo, git_root_dir |
| 17 | + |
| 18 | +if TYPE_CHECKING: |
| 19 | + from typing import Any |
| 20 | + |
| 21 | + from git import Repo |
| 22 | + |
| 23 | + |
| 24 | +worktree_dirs = codeflash_cache_dir / "worktrees" |
| 25 | +patches_dir = codeflash_cache_dir / "patches" |
| 26 | + |
| 27 | +if TYPE_CHECKING: |
| 28 | + from git import Repo |
| 29 | + |
| 30 | + |
| 31 | +@lru_cache(maxsize=1) |
| 32 | +def get_git_project_id() -> str: |
| 33 | + """Return the first commit sha of the repo.""" |
| 34 | + repo: Repo = git.Repo(search_parent_directories=True) |
| 35 | + root_commits = list(repo.iter_commits(rev="HEAD", max_parents=0)) |
| 36 | + return root_commits[0].hexsha |
| 37 | + |
| 38 | + |
| 39 | +def create_worktree_snapshot_commit(worktree_dir: Path, commit_message: str) -> None: |
| 40 | + repository = git.Repo(worktree_dir, search_parent_directories=True) |
| 41 | + repository.git.add(".") |
| 42 | + repository.git.commit("-m", commit_message, "--no-verify") |
| 43 | + |
| 44 | + |
| 45 | +def create_detached_worktree(module_root: Path) -> Optional[Path]: |
| 46 | + if not check_running_in_git_repo(module_root): |
| 47 | + logger.warning("Module is not in a git repository. Skipping worktree creation.") |
| 48 | + return None |
| 49 | + git_root = git_root_dir() |
| 50 | + current_time_str = time.strftime("%Y%m%d-%H%M%S") |
| 51 | + worktree_dir = worktree_dirs / f"{git_root.name}-{current_time_str}" |
| 52 | + |
| 53 | + repository = git.Repo(git_root, search_parent_directories=True) |
| 54 | + |
| 55 | + repository.git.worktree("add", "-d", str(worktree_dir)) |
| 56 | + |
| 57 | + # Get uncommitted diff from the original repo |
| 58 | + repository.git.add("-N", ".") # add the index for untracked files to be included in the diff |
| 59 | + exclude_binary_files = [":!*.pyc", ":!*.pyo", ":!*.pyd", ":!*.so", ":!*.dll", ":!*.whl", ":!*.egg", ":!*.egg-info", ":!*.pyz", ":!*.pkl", ":!*.pickle", ":!*.joblib", ":!*.npy", ":!*.npz", ":!*.h5", ":!*.hdf5", ":!*.pth", ":!*.pt", ":!*.pb", ":!*.onnx", ":!*.db", ":!*.sqlite", ":!*.sqlite3", ":!*.feather", ":!*.parquet", ":!*.jpg", ":!*.jpeg", ":!*.png", ":!*.gif", ":!*.bmp", ":!*.tiff", ":!*.webp", ":!*.wav", ":!*.mp3", ":!*.ogg", ":!*.flac", ":!*.mp4", ":!*.avi", ":!*.mov", ":!*.mkv", ":!*.pdf", ":!*.doc", ":!*.docx", ":!*.xls", ":!*.xlsx", ":!*.ppt", ":!*.pptx", ":!*.zip", ":!*.rar", ":!*.tar", ":!*.tar.gz", ":!*.tgz", ":!*.bz2", ":!*.xz"] # fmt: off |
| 60 | + uni_diff_text = repository.git.diff( |
| 61 | + None, "HEAD", "--", *exclude_binary_files, ignore_blank_lines=True, ignore_space_at_eol=True |
| 62 | + ) |
| 63 | + |
| 64 | + if not uni_diff_text.strip(): |
| 65 | + logger.info("No uncommitted changes to copy to worktree.") |
| 66 | + return worktree_dir |
| 67 | + |
| 68 | + # Write the diff to a temporary file |
| 69 | + with tempfile.NamedTemporaryFile(mode="w", suffix=".codeflash.patch", delete=False) as tmp_patch_file: |
| 70 | + tmp_patch_file.write(uni_diff_text + "\n") # the new line here is a must otherwise the last hunk won't be valid |
| 71 | + tmp_patch_file.flush() |
| 72 | + |
| 73 | + patch_path = Path(tmp_patch_file.name).resolve() |
| 74 | + |
| 75 | + # Apply the patch inside the worktree |
| 76 | + try: |
| 77 | + subprocess.run( |
| 78 | + ["git", "apply", "--ignore-space-change", "--ignore-whitespace", "--whitespace=nowarn", patch_path], |
| 79 | + cwd=worktree_dir, |
| 80 | + check=True, |
| 81 | + ) |
| 82 | + create_worktree_snapshot_commit(worktree_dir, "Initial Snapshot") |
| 83 | + except subprocess.CalledProcessError as e: |
| 84 | + logger.error(f"Failed to apply patch to worktree: {e}") |
| 85 | + |
| 86 | + return worktree_dir |
| 87 | + |
| 88 | + |
| 89 | +def remove_worktree(worktree_dir: Path) -> None: |
| 90 | + try: |
| 91 | + repository = git.Repo(worktree_dir, search_parent_directories=True) |
| 92 | + repository.git.worktree("remove", "--force", worktree_dir) |
| 93 | + except Exception: |
| 94 | + logger.exception(f"Failed to remove worktree: {worktree_dir}") |
| 95 | + |
| 96 | + |
| 97 | +@lru_cache(maxsize=1) |
| 98 | +def get_patches_dir_for_project() -> Path: |
| 99 | + project_id = get_git_project_id() or "" |
| 100 | + return Path(patches_dir / project_id) |
| 101 | + |
| 102 | + |
| 103 | +def get_patches_metadata() -> dict[str, Any]: |
| 104 | + project_patches_dir = get_patches_dir_for_project() |
| 105 | + meta_file = project_patches_dir / "metadata.json" |
| 106 | + if meta_file.exists(): |
| 107 | + with meta_file.open("r", encoding="utf-8") as f: |
| 108 | + return json.load(f) |
| 109 | + return {"id": get_git_project_id() or "", "patches": []} |
| 110 | + |
| 111 | + |
| 112 | +def save_patches_metadata(patch_metadata: dict) -> dict: |
| 113 | + project_patches_dir = get_patches_dir_for_project() |
| 114 | + meta_file = project_patches_dir / "metadata.json" |
| 115 | + lock_file = project_patches_dir / "metadata.json.lock" |
| 116 | + |
| 117 | + # we are not supporting multiple concurrent optimizations within the same process, but keep that in case we decide to do so in the future. |
| 118 | + with FileLock(lock_file, timeout=10): |
| 119 | + metadata = get_patches_metadata() |
| 120 | + |
| 121 | + patch_metadata["id"] = time.strftime("%Y%m%d-%H%M%S") |
| 122 | + metadata["patches"].append(patch_metadata) |
| 123 | + |
| 124 | + meta_file.write_text(json.dumps(metadata, indent=2)) |
| 125 | + |
| 126 | + return patch_metadata |
| 127 | + |
| 128 | + |
| 129 | +def overwrite_patch_metadata(patches: list[dict]) -> bool: |
| 130 | + project_patches_dir = get_patches_dir_for_project() |
| 131 | + meta_file = project_patches_dir / "metadata.json" |
| 132 | + lock_file = project_patches_dir / "metadata.json.lock" |
| 133 | + |
| 134 | + with FileLock(lock_file, timeout=10): |
| 135 | + metadata = get_patches_metadata() |
| 136 | + metadata["patches"] = patches |
| 137 | + meta_file.write_text(json.dumps(metadata, indent=2)) |
| 138 | + return True |
| 139 | + |
| 140 | + |
| 141 | +def create_diff_patch_from_worktree( |
| 142 | + worktree_dir: Path, |
| 143 | + files: list[str], |
| 144 | + fto_name: Optional[str] = None, |
| 145 | + metadata_input: Optional[dict[str, Any]] = None, |
| 146 | +) -> dict[str, Any]: |
| 147 | + repository = git.Repo(worktree_dir, search_parent_directories=True) |
| 148 | + uni_diff_text = repository.git.diff(None, "HEAD", *files, ignore_blank_lines=True, ignore_space_at_eol=True) |
| 149 | + |
| 150 | + if not uni_diff_text: |
| 151 | + logger.warning("No changes found in worktree.") |
| 152 | + return {} |
| 153 | + |
| 154 | + if not uni_diff_text.endswith("\n"): |
| 155 | + uni_diff_text += "\n" |
| 156 | + |
| 157 | + project_patches_dir = get_patches_dir_for_project() |
| 158 | + project_patches_dir.mkdir(parents=True, exist_ok=True) |
| 159 | + |
| 160 | + final_function_name = fto_name or metadata_input.get("fto_name", "unknown") |
| 161 | + patch_path = project_patches_dir / f"{worktree_dir.name}.{final_function_name}.patch" |
| 162 | + with patch_path.open("w", encoding="utf8") as f: |
| 163 | + f.write(uni_diff_text) |
| 164 | + |
| 165 | + final_metadata = {"patch_path": str(patch_path)} |
| 166 | + if metadata_input: |
| 167 | + final_metadata.update(metadata_input) |
| 168 | + final_metadata = save_patches_metadata(final_metadata) |
| 169 | + |
| 170 | + return final_metadata |
0 commit comments