Handle rate limit better
This commit is contained in:
parent
d6a5dc4166
commit
73fd484eb2
1 changed files with 58 additions and 37 deletions
|
@ -51,6 +51,7 @@ class GithubPackages:
|
||||||
"orgs_without_assets": [],
|
"orgs_without_assets": [],
|
||||||
# "assets": [],
|
# "assets": [],
|
||||||
"packages_urls": [],
|
"packages_urls": [],
|
||||||
|
'organizations': [],
|
||||||
}
|
}
|
||||||
self._default_payload = {
|
self._default_payload = {
|
||||||
'per_page': 100,
|
'per_page': 100,
|
||||||
|
@ -89,6 +90,10 @@ class GithubPackages:
|
||||||
self.config['max_organizations'],
|
self.config['max_organizations'],
|
||||||
)
|
)
|
||||||
organizations = []
|
organizations = []
|
||||||
|
self._log.debug(
|
||||||
|
"Obtained %s organizations",
|
||||||
|
len(full_organizations)
|
||||||
|
)
|
||||||
for organization in full_organizations:
|
for organization in full_organizations:
|
||||||
if 'login' in organization:
|
if 'login' in organization:
|
||||||
organizations.append(organization['login'])
|
organizations.append(organization['login'])
|
||||||
|
@ -102,7 +107,7 @@ class GithubPackages:
|
||||||
"Organization is not a dictionay? %s",
|
"Organization is not a dictionay? %s",
|
||||||
organization,
|
organization,
|
||||||
)
|
)
|
||||||
sys.exit(2)
|
# sys.exit(2)
|
||||||
self._log.debug(
|
self._log.debug(
|
||||||
"A total of %s organizations fetched",
|
"A total of %s organizations fetched",
|
||||||
len(organizations)
|
len(organizations)
|
||||||
|
@ -120,9 +125,11 @@ class GithubPackages:
|
||||||
error
|
error
|
||||||
)
|
)
|
||||||
|
|
||||||
def close(self):
|
def close(self, error=0):
|
||||||
'''Close class and save data'''
|
'''Close class and save data'''
|
||||||
self._save_cached_data(self.data)
|
self._save_cached_data(self.data)
|
||||||
|
if error > 0:
|
||||||
|
sys.exit(error)
|
||||||
|
|
||||||
def _process_organization(self, organization):
|
def _process_organization(self, organization):
|
||||||
self._log.debug(
|
self._log.debug(
|
||||||
|
@ -234,34 +241,38 @@ class GithubPackages:
|
||||||
json.dump(data, cache_file, indent=2)
|
json.dump(data, cache_file, indent=2)
|
||||||
|
|
||||||
def _new_request(self, headers):
|
def _new_request(self, headers):
|
||||||
if (
|
if 'X-RateLimit-Remaining' in headers:
|
||||||
'X-RateLimit-Remaining' in headers and
|
if int(headers['X-RateLimit-Remaining']) % 1000 == 0:
|
||||||
int(headers['X-RateLimit-Remaining']) < 50
|
self._log.debug(
|
||||||
):
|
"Rate limit remaining requests %s",
|
||||||
if 'X-RateLimit-Reset' in headers:
|
headers['X-RateLimit-Remaining']
|
||||||
wait = int(headers['X-RateLimit-Reset']) - time.time() + 5
|
|
||||||
str_reset_time = time.strftime(
|
|
||||||
"%Y/%m/%d %H:%M:%S",
|
|
||||||
time.gmtime(int(headers['X-RateLimit-Reset']))
|
|
||||||
)
|
)
|
||||||
if wait > 60:
|
if int(headers['X-RateLimit-Remaining']) < 50:
|
||||||
pretty_wait = f"{round(wait / 60, 2)} minutes (until {str_reset_time})"
|
if 'X-RateLimit-Reset' in headers:
|
||||||
else:
|
wait = int(headers['X-RateLimit-Reset']) - time.time() + 5
|
||||||
pretty_wait = f"{wait} seconds (until {str_reset_time})"
|
str_reset_time = time.strftime(
|
||||||
self._log.warning(
|
"%Y/%m/%d %H:%M:%S %z",
|
||||||
"Rate limit too close to be consumed (%s requests remaining). Waiting %s until it's reset",
|
time.gmtime(int(headers['X-RateLimit-Reset']))
|
||||||
headers['X-RateLimit-Remaining'],
|
)
|
||||||
pretty_wait
|
if wait > 60:
|
||||||
)
|
pretty_wait = f"{round(wait / 60, 2)} minutes (until {str_reset_time})"
|
||||||
time.sleep(wait)
|
else:
|
||||||
|
pretty_wait = f"{wait} seconds (until {str_reset_time})"
|
||||||
|
self._log.warning(
|
||||||
|
"Rate limit too close to be consumed (%s requests remaining). Waiting %s until it's reset",
|
||||||
|
headers['X-RateLimit-Remaining'],
|
||||||
|
pretty_wait
|
||||||
|
)
|
||||||
|
time.sleep(wait)
|
||||||
if self.request_count['time_start'] + 60*60 > time.time():
|
if self.request_count['time_start'] + 60*60 > time.time():
|
||||||
self.request_count['max_per_minute'] -= 1
|
self.request_count['max_per_minute'] -= 1
|
||||||
if self.request_count['max_per_minute'] < 1:
|
if self.request_count['max_per_minute'] < 1:
|
||||||
|
self._save_cached_data(self.data)
|
||||||
self._log.warning(
|
self._log.warning(
|
||||||
"Maximun number of request per minute (%s) reached, waiting one minute",
|
"Maximun number of request per minute (%s) reached, waiting 30 seconds",
|
||||||
self.config['rate_limit']
|
round(self.config['rate_limit'], 2)
|
||||||
)
|
)
|
||||||
time.sleep(60)
|
time.sleep(30)
|
||||||
self.request_count['max_per_minute'] = self.config['rate_limit']
|
self.request_count['max_per_minute'] = self.config['rate_limit']
|
||||||
self.request_count['time_start'] = time.time()
|
self.request_count['time_start'] = time.time()
|
||||||
|
|
||||||
|
@ -282,7 +293,13 @@ class GithubPackages:
|
||||||
payload = self._default_payload
|
payload = self._default_payload
|
||||||
result = self.session.get(url, params=payload)
|
result = self.session.get(url, params=payload)
|
||||||
self._new_request(result.headers)
|
self._new_request(result.headers)
|
||||||
items = result.json()
|
data = result.json()
|
||||||
|
if isinstance(data, dict):
|
||||||
|
items = [
|
||||||
|
data
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
items = data
|
||||||
if 'status' in items:
|
if 'status' in items:
|
||||||
if int(items['status']) > 299 and items['status'] != '404':
|
if int(items['status']) > 299 and items['status'] != '404':
|
||||||
self._log.error(
|
self._log.error(
|
||||||
|
@ -292,7 +309,7 @@ class GithubPackages:
|
||||||
items,
|
items,
|
||||||
result.headers
|
result.headers
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
self.close(1)
|
||||||
elif items['status'] == '404':
|
elif items['status'] == '404':
|
||||||
self._log.debug(
|
self._log.debug(
|
||||||
"Not found any resource (404) in the endpoint '%s'.",
|
"Not found any resource (404) in the endpoint '%s'.",
|
||||||
|
@ -307,13 +324,21 @@ class GithubPackages:
|
||||||
next_link = self._get_next_pagination_link(result.headers)
|
next_link = self._get_next_pagination_link(result.headers)
|
||||||
while next_link and len(items) < max_items:
|
while next_link and len(items) < max_items:
|
||||||
page += 1
|
page += 1
|
||||||
# self._log.debug(
|
self._log.debug(
|
||||||
# "Getting page %s (%s items)",
|
"Obtained %s items so far",
|
||||||
# page,
|
len(items)
|
||||||
# len(items)
|
)
|
||||||
# )
|
result = self.session.get(next_link, params=payload)
|
||||||
result = self.session.get(next_link)
|
self._new_request(result.headers)
|
||||||
items += result.json()
|
result_data = result.json()
|
||||||
|
if 'status' not in result_data:
|
||||||
|
items += list(result_data)
|
||||||
|
else:
|
||||||
|
self._log.warning(
|
||||||
|
"Didn't obtain a successful response. Stopping fetching pages. %s",
|
||||||
|
result_data
|
||||||
|
)
|
||||||
|
return items
|
||||||
next_link = self._get_next_pagination_link(result.headers)
|
next_link = self._get_next_pagination_link(result.headers)
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
@ -345,10 +370,6 @@ class GithubPackages:
|
||||||
if 'log_file' in self.config:
|
if 'log_file' in self.config:
|
||||||
log_file = self.config['log_file']
|
log_file = self.config['log_file']
|
||||||
else:
|
else:
|
||||||
HOME_FOLDER = os.environ.get(
|
|
||||||
'HOME', os.environ.get('USERPROFILE', '')
|
|
||||||
)
|
|
||||||
LOG_FOLDER = os.path.join(HOME_FOLDER, "log")
|
|
||||||
log_file = os.path.join(LOG_FOLDER, "github_packages.log")
|
log_file = os.path.join(LOG_FOLDER, "github_packages.log")
|
||||||
|
|
||||||
if not os.path.exists(os.path.dirname(log_file)):
|
if not os.path.exists(os.path.dirname(log_file)):
|
||||||
|
|
Loading…
Reference in a new issue