Handle rate limit better

This commit is contained in:
Antonio J. Delgado 2025-02-24 21:19:41 +02:00
parent d6a5dc4166
commit 73fd484eb2

View file

@ -51,6 +51,7 @@ class GithubPackages:
"orgs_without_assets": [], "orgs_without_assets": [],
# "assets": [], # "assets": [],
"packages_urls": [], "packages_urls": [],
'organizations': [],
} }
self._default_payload = { self._default_payload = {
'per_page': 100, 'per_page': 100,
@ -89,6 +90,10 @@ class GithubPackages:
self.config['max_organizations'], self.config['max_organizations'],
) )
organizations = [] organizations = []
self._log.debug(
"Obtained %s organizations",
len(full_organizations)
)
for organization in full_organizations: for organization in full_organizations:
if 'login' in organization: if 'login' in organization:
organizations.append(organization['login']) organizations.append(organization['login'])
@ -102,7 +107,7 @@ class GithubPackages:
"Organization is not a dictionay? %s", "Organization is not a dictionay? %s",
organization, organization,
) )
sys.exit(2) # sys.exit(2)
self._log.debug( self._log.debug(
"A total of %s organizations fetched", "A total of %s organizations fetched",
len(organizations) len(organizations)
@ -120,9 +125,11 @@ class GithubPackages:
error error
) )
def close(self): def close(self, error=0):
'''Close class and save data''' '''Close class and save data'''
self._save_cached_data(self.data) self._save_cached_data(self.data)
if error > 0:
sys.exit(error)
def _process_organization(self, organization): def _process_organization(self, organization):
self._log.debug( self._log.debug(
@ -234,34 +241,38 @@ class GithubPackages:
json.dump(data, cache_file, indent=2) json.dump(data, cache_file, indent=2)
def _new_request(self, headers): def _new_request(self, headers):
if ( if 'X-RateLimit-Remaining' in headers:
'X-RateLimit-Remaining' in headers and if int(headers['X-RateLimit-Remaining']) % 1000 == 0:
int(headers['X-RateLimit-Remaining']) < 50 self._log.debug(
): "Rate limit remaining requests %s",
if 'X-RateLimit-Reset' in headers: headers['X-RateLimit-Remaining']
wait = int(headers['X-RateLimit-Reset']) - time.time() + 5
str_reset_time = time.strftime(
"%Y/%m/%d %H:%M:%S",
time.gmtime(int(headers['X-RateLimit-Reset']))
) )
if wait > 60: if int(headers['X-RateLimit-Remaining']) < 50:
pretty_wait = f"{round(wait / 60, 2)} minutes (until {str_reset_time})" if 'X-RateLimit-Reset' in headers:
else: wait = int(headers['X-RateLimit-Reset']) - time.time() + 5
pretty_wait = f"{wait} seconds (until {str_reset_time})" str_reset_time = time.strftime(
self._log.warning( "%Y/%m/%d %H:%M:%S %z",
"Rate limit too close to be consumed (%s requests remaining). Waiting %s until it's reset", time.gmtime(int(headers['X-RateLimit-Reset']))
headers['X-RateLimit-Remaining'], )
pretty_wait if wait > 60:
) pretty_wait = f"{round(wait / 60, 2)} minutes (until {str_reset_time})"
time.sleep(wait) else:
pretty_wait = f"{wait} seconds (until {str_reset_time})"
self._log.warning(
"Rate limit too close to be consumed (%s requests remaining). Waiting %s until it's reset",
headers['X-RateLimit-Remaining'],
pretty_wait
)
time.sleep(wait)
if self.request_count['time_start'] + 60*60 > time.time(): if self.request_count['time_start'] + 60*60 > time.time():
self.request_count['max_per_minute'] -= 1 self.request_count['max_per_minute'] -= 1
if self.request_count['max_per_minute'] < 1: if self.request_count['max_per_minute'] < 1:
self._save_cached_data(self.data)
self._log.warning( self._log.warning(
"Maximun number of request per minute (%s) reached, waiting one minute", "Maximun number of request per minute (%s) reached, waiting 30 seconds",
self.config['rate_limit'] round(self.config['rate_limit'], 2)
) )
time.sleep(60) time.sleep(30)
self.request_count['max_per_minute'] = self.config['rate_limit'] self.request_count['max_per_minute'] = self.config['rate_limit']
self.request_count['time_start'] = time.time() self.request_count['time_start'] = time.time()
@ -282,7 +293,13 @@ class GithubPackages:
payload = self._default_payload payload = self._default_payload
result = self.session.get(url, params=payload) result = self.session.get(url, params=payload)
self._new_request(result.headers) self._new_request(result.headers)
items = result.json() data = result.json()
if isinstance(data, dict):
items = [
data
]
else:
items = data
if 'status' in items: if 'status' in items:
if int(items['status']) > 299 and items['status'] != '404': if int(items['status']) > 299 and items['status'] != '404':
self._log.error( self._log.error(
@ -292,7 +309,7 @@ class GithubPackages:
items, items,
result.headers result.headers
) )
sys.exit(1) self.close(1)
elif items['status'] == '404': elif items['status'] == '404':
self._log.debug( self._log.debug(
"Not found any resource (404) in the endpoint '%s'.", "Not found any resource (404) in the endpoint '%s'.",
@ -307,13 +324,21 @@ class GithubPackages:
next_link = self._get_next_pagination_link(result.headers) next_link = self._get_next_pagination_link(result.headers)
while next_link and len(items) < max_items: while next_link and len(items) < max_items:
page += 1 page += 1
# self._log.debug( self._log.debug(
# "Getting page %s (%s items)", "Obtained %s items so far",
# page, len(items)
# len(items) )
# ) result = self.session.get(next_link, params=payload)
result = self.session.get(next_link) self._new_request(result.headers)
items += result.json() result_data = result.json()
if 'status' not in result_data:
items += list(result_data)
else:
self._log.warning(
"Didn't obtain a successful response. Stopping fetching pages. %s",
result_data
)
return items
next_link = self._get_next_pagination_link(result.headers) next_link = self._get_next_pagination_link(result.headers)
return items return items
@ -345,10 +370,6 @@ class GithubPackages:
if 'log_file' in self.config: if 'log_file' in self.config:
log_file = self.config['log_file'] log_file = self.config['log_file']
else: else:
HOME_FOLDER = os.environ.get(
'HOME', os.environ.get('USERPROFILE', '')
)
LOG_FOLDER = os.path.join(HOME_FOLDER, "log")
log_file = os.path.join(LOG_FOLDER, "github_packages.log") log_file = os.path.join(LOG_FOLDER, "github_packages.log")
if not os.path.exists(os.path.dirname(log_file)): if not os.path.exists(os.path.dirname(log_file)):