Skip to content

Commit 6c2c6a4

Browse files
committed
Parallelize postgres build make invocations
- Add ThreadPoolExecutor to parallelize work_dirs builds in make_postgres() - Extract build_work_dir() method for parallel execution - Use CPU core count to determine optimal worker count - Add comprehensive error handling and logging - Maintain backward compatibility with existing functionality Fixes #27196
1 parent d3bc257 commit 6c2c6a4

File tree

1 file changed

+81
-43
lines changed

1 file changed

+81
-43
lines changed

python/yugabyte/build_postgres.py

Lines changed: 81 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import subprocess
2929
import sys
3030
import time
31+
from concurrent.futures import ThreadPoolExecutor, as_completed
3132

3233
from contextlib import contextmanager
3334
from overrides import overrides
@@ -810,8 +811,6 @@ def make_postgres(self) -> None:
810811

811812
env_script_content = self.get_env_script_content()
812813

813-
pg_compile_commands_paths = []
814-
815814
external_extension_dirs = [os.path.join(self.pg_build_root, d) for d
816815
in ('third-party-extensions', 'yb-extensions')]
817816
work_dirs = [
@@ -821,47 +820,36 @@ def make_postgres(self) -> None:
821820
os.path.join(self.pg_build_root, 'src/tools/pg_bsd_indent'),
822821
] + external_extension_dirs
823822

824-
# TODO(#27196): parallelize this for loop.
825-
for work_dir in work_dirs:
826-
# Postgresql requires MAKELEVEL to be 0 or non-set when calling its make.
827-
# But in the case where the YB project is built with make,
828-
# MAKELEVEL is not 0 at this point. We temporarily unset MAKELEVEL to
829-
# deal with this.
830-
with WorkDirContext(work_dir), SavedEnviron('MAKELEVEL'):
831-
self.write_debug_scripts(env_script_content)
832-
833-
make_cmd_suffix = []
834-
if work_dir in external_extension_dirs:
835-
make_cmd_suffix = ['PG_CONFIG=' + self.pg_config_path]
836-
837-
# Actually run Make.
838-
if is_verbose_mode():
839-
logging.info("Running make in the %s directory", work_dir)
840-
841-
complete_make_cmd = make_cmd + make_cmd_suffix
842-
complete_make_cmd_str = shlex_join(complete_make_cmd)
843-
self.run_make_with_retries(work_dir, complete_make_cmd_str)
844-
845-
if self.build_type != 'compilecmds' or work_dir == self.pg_build_root:
846-
self.run_make_install(make_cmd, make_cmd_suffix)
847-
else:
848-
logging.info(
849-
"Not running 'make install' in the %s directory since we are only "
850-
"generating the compilation database", work_dir)
851-
852-
if self.export_compile_commands and not self.skip_pg_compile_commands:
853-
logging.info("Generating the compilation database in directory '%s'", work_dir)
854-
855-
compile_commands_path = os.path.join(work_dir, 'compile_commands.json')
856-
with SavedEnviron(YB_PG_SKIP_CONFIG_STATUS='1'):
857-
if not os.path.exists(compile_commands_path):
858-
run_program(
859-
['compiledb', 'make', '-n'] + make_cmd_suffix, capture_output=False)
860-
861-
if not os.path.exists(compile_commands_path):
862-
raise RuntimeError("Failed to generate compilation database at: %s" %
863-
compile_commands_path)
864-
pg_compile_commands_paths.append(compile_commands_path)
823+
# Parallelize the build process for work_dirs
824+
# Use ThreadPoolExecutor to run builds in parallel
825+
max_workers = min(len(work_dirs), multiprocessing.cpu_count())
826+
logging.info("Starting parallel build with %d workers for %d work directories",
827+
max_workers, len(work_dirs))
828+
829+
pg_compile_commands_paths = []
830+
start_time_sec = time.time()
831+
832+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
833+
# Submit all work directory builds to the executor
834+
future_to_work_dir = {
835+
executor.submit(self.build_work_dir, work_dir, make_cmd, env_script_content,
836+
external_extension_dirs): work_dir
837+
for work_dir in work_dirs
838+
}
839+
840+
# Collect results as they complete
841+
for future in as_completed(future_to_work_dir):
842+
work_dir = future_to_work_dir[future]
843+
try:
844+
compile_commands_paths = future.result()
845+
pg_compile_commands_paths.extend(compile_commands_paths)
846+
logging.info("Completed build for work directory: %s", work_dir)
847+
except Exception as exc:
848+
logging.error("Build failed for work directory %s: %s", work_dir, exc)
849+
raise RuntimeError(f"PostgreSQL build failed in directory {work_dir}: {exc}")
850+
851+
elapsed_time_sec = time.time() - start_time_sec
852+
logging.info("Parallel build completed in %.2f sec", elapsed_time_sec)
865853

866854
if self.export_compile_commands:
867855
self.write_compile_commands_files(pg_compile_commands_paths)
@@ -911,6 +899,56 @@ def run_make_with_retries(self, work_dir: str, complete_make_cmd_str: str) -> No
911899
raise RuntimeError(
912900
f"Maximum build attempts reached ({TRANSIENT_BUILD_RETRIES} attempts).")
913901

902+
def build_work_dir(self, work_dir: str, make_cmd: List[str], env_script_content: str,
903+
external_extension_dirs: List[str]) -> List[str]:
904+
"""
905+
Build a single work directory. Returns list of compile commands paths.
906+
This method is designed to be called in parallel.
907+
"""
908+
pg_compile_commands_paths = []
909+
910+
# Postgresql requires MAKELEVEL to be 0 or non-set when calling its make.
911+
# But in the case where the YB project is built with make,
912+
# MAKELEVEL is not 0 at this point. We temporarily unset MAKELEVEL to
913+
# deal with this.
914+
with WorkDirContext(work_dir), SavedEnviron('MAKELEVEL'):
915+
self.write_debug_scripts(env_script_content)
916+
917+
make_cmd_suffix = []
918+
if work_dir in external_extension_dirs:
919+
make_cmd_suffix = ['PG_CONFIG=' + self.pg_config_path]
920+
921+
# Actually run Make.
922+
if is_verbose_mode():
923+
logging.info("Running make in the %s directory", work_dir)
924+
925+
complete_make_cmd = make_cmd + make_cmd_suffix
926+
complete_make_cmd_str = shlex_join(complete_make_cmd)
927+
self.run_make_with_retries(work_dir, complete_make_cmd_str)
928+
929+
if self.build_type != 'compilecmds' or work_dir == self.pg_build_root:
930+
self.run_make_install(make_cmd, make_cmd_suffix)
931+
else:
932+
logging.info(
933+
"Not running 'make install' in the %s directory since we are only "
934+
"generating the compilation database", work_dir)
935+
936+
if self.export_compile_commands and not self.skip_pg_compile_commands:
937+
logging.info("Generating the compilation database in directory '%s'", work_dir)
938+
939+
compile_commands_path = os.path.join(work_dir, 'compile_commands.json')
940+
with SavedEnviron(YB_PG_SKIP_CONFIG_STATUS='1'):
941+
if not os.path.exists(compile_commands_path):
942+
run_program(
943+
['compiledb', 'make', '-n'] + make_cmd_suffix, capture_output=False)
944+
945+
if not os.path.exists(compile_commands_path):
946+
raise RuntimeError("Failed to generate compilation database at: %s" %
947+
compile_commands_path)
948+
pg_compile_commands_paths.append(compile_commands_path)
949+
950+
return pg_compile_commands_paths
951+
914952
def get_env_script_content(self) -> str:
915953
"""
916954
Returns a Bash script that sets all variables necessary to easily rerun the "make" step

0 commit comments

Comments
 (0)