sync: add retry to fetch operations
Add retries with exponential backoff and jitter to the fetch
operations. By default don't change behavior and enable
behind the new flag '--fetch-retries'.
Bug: https://crbug.com/1061473
Change-Id: I492710843985d00f81cbe3402dc56f2d21a45b35
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/261576
Reviewed-by: Mike Frysinger <vapier@google.com>
Tested-by: George Engelbrecht <engeg@google.com>
diff --git a/project.py b/project.py
index d35ad52..691e0d9 100644
--- a/project.py
+++ b/project.py
@@ -55,6 +55,12 @@
input = raw_input # noqa: F821
+# Maximum sleep time allowed during retries.
+MAXIMUM_RETRY_SLEEP_SEC = 3600.0
+# +-10% random jitter is added to each Fetches retry sleep duration.
+RETRY_JITTER_PERCENT = 0.1
+
+
def _lwrite(path, content):
lock = '%s.lock' % path
@@ -875,6 +881,7 @@
is_derived=False,
dest_branch=None,
optimized_fetch=False,
+ retry_fetches=0,
old_revision=None):
"""Init a Project object.
@@ -901,6 +908,8 @@
dest_branch: The branch to which to push changes for review by default.
optimized_fetch: If True, when a project is set to a sha1 revision, only
fetch from the remote if the sha1 is not present locally.
+ retry_fetches: Retry remote fetches n times upon receiving transient error
+ with exponential backoff and jitter.
old_revision: saved git commit id for open GITC projects.
"""
self.manifest = manifest
@@ -936,6 +945,7 @@
self.use_git_worktrees = use_git_worktrees
self.is_derived = is_derived
self.optimized_fetch = optimized_fetch
+ self.retry_fetches = max(0, retry_fetches)
self.subprojects = []
self.snapshots = {}
@@ -1449,6 +1459,7 @@
tags=True,
archive=False,
optimized_fetch=False,
+ retry_fetches=0,
prune=False,
submodules=False,
clone_filter=None):
@@ -1532,7 +1543,7 @@
current_branch_only=current_branch_only,
tags=tags, prune=prune, depth=depth,
submodules=submodules, force_sync=force_sync,
- clone_filter=clone_filter):
+ clone_filter=clone_filter, retry_fetches=retry_fetches):
return False
mp = self.manifest.manifestProject
@@ -2334,8 +2345,10 @@
depth=None,
submodules=False,
force_sync=False,
- clone_filter=None):
-
+ clone_filter=None,
+ retry_fetches=2,
+ retry_sleep_initial_sec=4.0,
+ retry_exp_factor=2.0):
is_sha1 = False
tag_name = None
# The depth should not be used when fetching to a mirror because
@@ -2497,18 +2510,37 @@
cmd.extend(spec)
- ok = False
- for _i in range(2):
+ # At least one retry minimum due to git remote prune.
+ retry_fetches = max(retry_fetches, 2)
+ retry_cur_sleep = retry_sleep_initial_sec
+ ok = prune_tried = False
+ for try_n in range(retry_fetches):
gitcmd = GitCommand(self, cmd, bare=True, ssh_proxy=ssh_proxy,
merge_output=True, capture_stdout=quiet)
ret = gitcmd.Wait()
if ret == 0:
ok = True
break
- # If needed, run the 'git remote prune' the first time through the loop
- elif (not _i and
- "error:" in gitcmd.stderr and
- "git remote prune" in gitcmd.stderr):
+
+ # Retry later due to HTTP 429 Too Many Requests.
+ elif ('error:' in gitcmd.stderr and
+ 'HTTP 429' in gitcmd.stderr):
+ if not quiet:
+ print('429 received, sleeping: %s sec' % retry_cur_sleep,
+ file=sys.stderr)
+ time.sleep(retry_cur_sleep)
+ retry_cur_sleep = min(retry_exp_factor * retry_cur_sleep,
+ MAXIMUM_RETRY_SLEEP_SEC)
+ retry_cur_sleep *= (1 - random.uniform(-RETRY_JITTER_PERCENT,
+ RETRY_JITTER_PERCENT))
+ continue
+
+ # If this is not last attempt, try 'git remote prune'.
+ elif (try_n < retry_fetches - 1 and
+ 'error:' in gitcmd.stderr and
+ 'git remote prune' in gitcmd.stderr and
+ not prune_tried):
+ prune_tried = True
prunecmd = GitCommand(self, ['remote', 'prune', name], bare=True,
ssh_proxy=ssh_proxy)
ret = prunecmd.Wait()
diff --git a/subcmds/sync.py b/subcmds/sync.py
index de6deec..efd3961 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -265,6 +265,9 @@
p.add_option('--optimized-fetch',
dest='optimized_fetch', action='store_true',
help='only fetch projects fixed to sha1 if revision does not exist locally')
+ p.add_option('--retry-fetches',
+ default=0, action='store', type='int',
+ help='number of times to retry fetches on transient errors')
p.add_option('--prune', dest='prune', action='store_true',
help='delete refs that no longer exist on the remote')
if show_smart:
@@ -342,6 +345,7 @@
clone_bundle=opt.clone_bundle,
tags=opt.tags, archive=self.manifest.IsArchive,
optimized_fetch=opt.optimized_fetch,
+ retry_fetches=opt.retry_fetches,
prune=opt.prune,
clone_filter=clone_filter)
self._fetch_times.Set(project, time.time() - start)
@@ -777,6 +781,7 @@
current_branch_only=opt.current_branch_only,
tags=opt.tags,
optimized_fetch=opt.optimized_fetch,
+ retry_fetches=opt.retry_fetches,
submodules=self.manifest.HasSubmodules,
clone_filter=self.manifest.CloneFilter)
finish = time.time()