Handle rate limit better

This commit is contained in:
Antonio J. Delgado 2025-02-24 21:19:41 +02:00
parent d6a5dc4166
commit 73fd484eb2

View file

@ -51,6 +51,7 @@ class GithubPackages:
"orgs_without_assets": [],
# "assets": [],
"packages_urls": [],
'organizations': [],
}
self._default_payload = {
'per_page': 100,
@ -89,6 +90,10 @@ class GithubPackages:
self.config['max_organizations'],
)
organizations = []
self._log.debug(
"Obtained %s organizations",
len(full_organizations)
)
for organization in full_organizations:
if 'login' in organization:
organizations.append(organization['login'])
@ -102,7 +107,7 @@ class GithubPackages:
"Organization is not a dictionay? %s",
organization,
)
sys.exit(2)
# sys.exit(2)
self._log.debug(
"A total of %s organizations fetched",
len(organizations)
@ -120,9 +125,11 @@ class GithubPackages:
error
)
def close(self):
def close(self, error=0):
'''Close class and save data'''
self._save_cached_data(self.data)
if error > 0:
sys.exit(error)
def _process_organization(self, organization):
self._log.debug(
@ -234,14 +241,17 @@ class GithubPackages:
json.dump(data, cache_file, indent=2)
def _new_request(self, headers):
if (
'X-RateLimit-Remaining' in headers and
int(headers['X-RateLimit-Remaining']) < 50
):
if 'X-RateLimit-Remaining' in headers:
if int(headers['X-RateLimit-Remaining']) % 1000 == 0:
self._log.debug(
"Rate limit remaining requests %s",
headers['X-RateLimit-Remaining']
)
if int(headers['X-RateLimit-Remaining']) < 50:
if 'X-RateLimit-Reset' in headers:
wait = int(headers['X-RateLimit-Reset']) - time.time() + 5
str_reset_time = time.strftime(
"%Y/%m/%d %H:%M:%S",
"%Y/%m/%d %H:%M:%S %z",
time.gmtime(int(headers['X-RateLimit-Reset']))
)
if wait > 60:
@ -257,11 +267,12 @@ class GithubPackages:
if self.request_count['time_start'] + 60*60 > time.time():
self.request_count['max_per_minute'] -= 1
if self.request_count['max_per_minute'] < 1:
self._save_cached_data(self.data)
self._log.warning(
"Maximun number of request per minute (%s) reached, waiting one minute",
self.config['rate_limit']
"Maximun number of request per minute (%s) reached, waiting 30 seconds",
round(self.config['rate_limit'], 2)
)
time.sleep(60)
time.sleep(30)
self.request_count['max_per_minute'] = self.config['rate_limit']
self.request_count['time_start'] = time.time()
@ -282,7 +293,13 @@ class GithubPackages:
payload = self._default_payload
result = self.session.get(url, params=payload)
self._new_request(result.headers)
items = result.json()
data = result.json()
if isinstance(data, dict):
items = [
data
]
else:
items = data
if 'status' in items:
if int(items['status']) > 299 and items['status'] != '404':
self._log.error(
@ -292,7 +309,7 @@ class GithubPackages:
items,
result.headers
)
sys.exit(1)
self.close(1)
elif items['status'] == '404':
self._log.debug(
"Not found any resource (404) in the endpoint '%s'.",
@ -307,13 +324,21 @@ class GithubPackages:
next_link = self._get_next_pagination_link(result.headers)
while next_link and len(items) < max_items:
page += 1
# self._log.debug(
# "Getting page %s (%s items)",
# page,
# len(items)
# )
result = self.session.get(next_link)
items += result.json()
self._log.debug(
"Obtained %s items so far",
len(items)
)
result = self.session.get(next_link, params=payload)
self._new_request(result.headers)
result_data = result.json()
if 'status' not in result_data:
items += list(result_data)
else:
self._log.warning(
"Didn't obtain a successful response. Stopping fetching pages. %s",
result_data
)
return items
next_link = self._get_next_pagination_link(result.headers)
return items
@ -345,10 +370,6 @@ class GithubPackages:
if 'log_file' in self.config:
log_file = self.config['log_file']
else:
HOME_FOLDER = os.environ.get(
'HOME', os.environ.get('USERPROFILE', '')
)
LOG_FOLDER = os.path.join(HOME_FOLDER, "log")
log_file = os.path.join(LOG_FOLDER, "github_packages.log")
if not os.path.exists(os.path.dirname(log_file)):