diff --git a/github_packages/github_packages.py b/github_packages/github_packages.py index 9449e01..2232957 100644 --- a/github_packages/github_packages.py +++ b/github_packages/github_packages.py @@ -51,6 +51,7 @@ class GithubPackages: "orgs_without_assets": [], # "assets": [], "packages_urls": [], + 'organizations': [], } self._default_payload = { 'per_page': 100, @@ -89,6 +90,10 @@ class GithubPackages: self.config['max_organizations'], ) organizations = [] + self._log.debug( + "Obtained %s organizations", + len(full_organizations) + ) for organization in full_organizations: if 'login' in organization: organizations.append(organization['login']) @@ -102,7 +107,7 @@ class GithubPackages: "Organization is not a dictionay? %s", organization, ) - sys.exit(2) + # sys.exit(2) self._log.debug( "A total of %s organizations fetched", len(organizations) @@ -120,9 +125,11 @@ class GithubPackages: error ) - def close(self): + def close(self, error=0): '''Close class and save data''' self._save_cached_data(self.data) + if error > 0: + sys.exit(error) def _process_organization(self, organization): self._log.debug( @@ -234,34 +241,38 @@ class GithubPackages: json.dump(data, cache_file, indent=2) def _new_request(self, headers): - if ( - 'X-RateLimit-Remaining' in headers and - int(headers['X-RateLimit-Remaining']) < 50 - ): - if 'X-RateLimit-Reset' in headers: - wait = int(headers['X-RateLimit-Reset']) - time.time() + 5 - str_reset_time = time.strftime( - "%Y/%m/%d %H:%M:%S", - time.gmtime(int(headers['X-RateLimit-Reset'])) + if 'X-RateLimit-Remaining' in headers: + if int(headers['X-RateLimit-Remaining']) % 1000 == 0: + self._log.debug( + "Rate limit remaining requests %s", + headers['X-RateLimit-Remaining'] ) - if wait > 60: - pretty_wait = f"{round(wait / 60, 2)} minutes (until {str_reset_time})" - else: - pretty_wait = f"{wait} seconds (until {str_reset_time})" - self._log.warning( - "Rate limit too close to be consumed (%s requests remaining). Waiting %s until it's reset", - headers['X-RateLimit-Remaining'], - pretty_wait - ) - time.sleep(wait) + if int(headers['X-RateLimit-Remaining']) < 50: + if 'X-RateLimit-Reset' in headers: + wait = int(headers['X-RateLimit-Reset']) - time.time() + 5 + str_reset_time = time.strftime( + "%Y/%m/%d %H:%M:%S %z", + time.gmtime(int(headers['X-RateLimit-Reset'])) + ) + if wait > 60: + pretty_wait = f"{round(wait / 60, 2)} minutes (until {str_reset_time})" + else: + pretty_wait = f"{wait} seconds (until {str_reset_time})" + self._log.warning( + "Rate limit too close to be consumed (%s requests remaining). Waiting %s until it's reset", + headers['X-RateLimit-Remaining'], + pretty_wait + ) + time.sleep(wait) if self.request_count['time_start'] + 60*60 > time.time(): self.request_count['max_per_minute'] -= 1 if self.request_count['max_per_minute'] < 1: + self._save_cached_data(self.data) self._log.warning( - "Maximun number of request per minute (%s) reached, waiting one minute", - self.config['rate_limit'] + "Maximun number of request per minute (%s) reached, waiting 30 seconds", + round(self.config['rate_limit'], 2) ) - time.sleep(60) + time.sleep(30) self.request_count['max_per_minute'] = self.config['rate_limit'] self.request_count['time_start'] = time.time() @@ -282,7 +293,13 @@ class GithubPackages: payload = self._default_payload result = self.session.get(url, params=payload) self._new_request(result.headers) - items = result.json() + data = result.json() + if isinstance(data, dict): + items = [ + data + ] + else: + items = data if 'status' in items: if int(items['status']) > 299 and items['status'] != '404': self._log.error( @@ -292,7 +309,7 @@ class GithubPackages: items, result.headers ) - sys.exit(1) + self.close(1) elif items['status'] == '404': self._log.debug( "Not found any resource (404) in the endpoint '%s'.", @@ -307,13 +324,21 @@ class GithubPackages: next_link = self._get_next_pagination_link(result.headers) while next_link and len(items) < max_items: page += 1 - # self._log.debug( - # "Getting page %s (%s items)", - # page, - # len(items) - # ) - result = self.session.get(next_link) - items += result.json() + self._log.debug( + "Obtained %s items so far", + len(items) + ) + result = self.session.get(next_link, params=payload) + self._new_request(result.headers) + result_data = result.json() + if 'status' not in result_data: + items += list(result_data) + else: + self._log.warning( + "Didn't obtain a successful response. Stopping fetching pages. %s", + result_data + ) + return items next_link = self._get_next_pagination_link(result.headers) return items @@ -345,10 +370,6 @@ class GithubPackages: if 'log_file' in self.config: log_file = self.config['log_file'] else: - HOME_FOLDER = os.environ.get( - 'HOME', os.environ.get('USERPROFILE', '') - ) - LOG_FOLDER = os.path.join(HOME_FOLDER, "log") log_file = os.path.join(LOG_FOLDER, "github_packages.log") if not os.path.exists(os.path.dirname(log_file)):