Skip to content

Commit ed688a2

Browse files
committed
Add retries for external dependencies
1 parent 5c33bf9 commit ed688a2

File tree

3 files changed

+64
-4
lines changed

3 files changed

+64
-4
lines changed

01_install_requirements.sh

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,30 @@ sudo dnf -y clean all
4040
old_version=$(sudo dnf info NetworkManager | grep Version | cut -d ':' -f 2)
4141

4242
# Update to latest packages first
43-
sudo dnf -y upgrade --nobest
43+
# Number of attempts
44+
MAX_RETRIES=5
45+
# Delay between attempts (in seconds)
46+
_YUM_RETRY_BACKOFF=15
47+
48+
attempt=1
49+
while (( attempt <= MAX_RETRIES )); do
50+
if sudo dnf -y upgrade --nobest; then
51+
echo "System upgraded successfully."
52+
break
53+
else
54+
echo "Upgrade failed (attempt $attempt). Cleaning cache and retrying..."
55+
sudo dnf clean all
56+
sudo rm -rf /var/cache/dnf/*
57+
sleep $(( _YUM_RETRY_BACKOFF * attempt ))
58+
fi
59+
60+
(( attempt++ ))
61+
done
62+
63+
if (( attempt > MAX_RETRIES )); then
64+
echo "ERROR: Failed to upgrade system after $MAX_RETRIES attempts."
65+
exit 1
66+
fi
4467

4568
new_version=$(sudo dnf info NetworkManager | grep Version | cut -d ':' -f 2)
4669
# If NetworkManager was upgraded it needs to be restarted

ocp_install_env.sh

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,33 @@ function extract_command() {
1919
local cmd
2020
local outdir
2121
local extract_dir
22+
local MAX_RETRIES=5
23+
local SLEEP_BETWEEN=10
2224

2325
cmd="$1"
2426
release_image="$2"
2527
outdir="$3"
2628

27-
extract_dir=$(mktemp --tmpdir -d "installer--XXXXXXXXXX")
28-
_tmpfiles="$_tmpfiles $extract_dir"
29+
# Retry loop for oc adm release extract to handle quay.io blips
30+
for attempt in $(seq 1 $MAX_RETRIES); do
31+
extract_dir=$(mktemp --tmpdir -d "installer--XXXXXXXXXX")
2932

30-
oc adm release extract --registry-config "${PULL_SECRET_FILE}" --command=$cmd --to "${extract_dir}" ${release_image}
33+
if oc adm release extract --registry-config "${PULL_SECRET_FILE}" --command="$cmd" --to "${extract_dir}" "${release_image}"; then
34+
echo "Successfully extracted $cmd"
35+
break
36+
fi
37+
38+
if [[ $attempt -lt $MAX_RETRIES ]]; then
39+
echo "Extraction failed, retrying in ${SLEEP_BETWEEN}s..."
40+
rm -rf "${extract_dir}"
41+
sleep "${SLEEP_BETWEEN}"
42+
else
43+
echo "Failed to extract $cmd from ${release_image} after $MAX_RETRIES attempts"
44+
return 1
45+
fi
46+
done
47+
48+
_tmpfiles="$_tmpfiles $extract_dir"
3149

3250
if [[ $cmd == "oc.rhel8" ]]; then
3351
cmd="oc"

utils.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,25 @@ EOF
617617
if [[ "$reg_state" != "running" || $restart_registry -eq 1 ]]; then
618618
sudo podman rm registry -f || true
619619

620+
MAX_RETRIES=5
621+
_PULL_RETRY_DELAY=10
622+
623+
# Try pulling the image first to tolerate quay.io errors like 504s.
624+
for attempt in $(seq 1 $MAX_RETRIES); do
625+
if sudo podman pull "${DOCKER_REGISTRY_IMAGE}"; then
626+
echo "Successfully pulled ${DOCKER_REGISTRY_IMAGE}"
627+
break
628+
fi
629+
630+
if [[ $attempt -lt $MAX_RETRIES ]]; then
631+
echo "Pull failed, retrying in ${_PULL_RETRY_DELAY}s..."
632+
sleep "${_PULL_RETRY_DELAY}"
633+
else
634+
echo "Failed to pull ${DOCKER_REGISTRY_IMAGE} after $MAX_RETRIES attempts"
635+
exit 1
636+
fi
637+
done
638+
620639
sudo podman run -d --name registry --net=host --privileged \
621640
-v ${REGISTRY_DIR}/data:/var/lib/registry:z \
622641
-v ${REGISTRY_DIR}/auth:/auth:z \

0 commit comments

Comments
 (0)