diff --git a/discover-mastodon-servers/discover_mastodon_servers.py b/discover-mastodon-servers/discover_mastodon_servers.py index f5c742b..8763b7c 100755 --- a/discover-mastodon-servers/discover_mastodon_servers.py +++ b/discover-mastodon-servers/discover_mastodon_servers.py @@ -11,7 +11,6 @@ import logging from logging.handlers import SysLogHandler import sqlite3 import time -import json import re import click import click_config_file @@ -51,6 +50,51 @@ class DiscoverMastodonServers: while new_servers_count > 0: new_servers_count = self.discover() + def get_timeline(self, server): + '''Get the data of a public timeline for a given server''' + return self.get_path(server, 'api/v1/timelines/public') + + def get_path(self, server, endpoint): + '''Get the data of an endpoint of a server''' + data = None + try: + result = self.session.get( + f"https://{server}/{endpoint}", + timeout=10 + ) + if result.status_code < 400: + if 'application/json' in result.headers['Content-Type']: + data = result.json() + if 'error' not in data: + return data + else: + self._log.debug( + "Server '%s' didn't reply with JSON data.", server + ) + else: + self._log.debug( + "Server '%s' returned error code %s.", server, result.status_code + ) + except requests.exceptions.ReadTimeout as error: + self._log.warning( + "Server '%s' didn't respond on time. %s", + server, + error + ) + except requests.exceptions.SSLError as error: + self._log.warning( + "Server '%s' don't have a valid SSL certificate. %s", + server, + error + ) + except requests.exceptions.ConnectionError as error: + self._log.warning( + "Server '%s' connection failed. %s", + server, + error + ) + return data + def discover(self): '''Discover new servers''' all_servers = [] @@ -58,89 +102,63 @@ class DiscoverMastodonServers: for server in self.servers.items(): all_servers.append(server[0]) if not server[1]['private']: - self._log.debug("Fetching public timeline in server '%s'", server[0]) - try: - result = self.session.get( - f"https://{server[0]}/api/v1/timelines/public", - timeout=10 + self._log.debug("Fetching peers of the server '%s'", server[0]) + data = self.get_path(server[0], 'api/v1/instance/peers') + if data: + for new_server in data: + new_servers_count += 1 + self._log.debug( + "Adding new server '%s'", + new_server ) - if result.status_code < 400: - if 'application/json' in result.headers['Content-Type']: - data = result.json() - if 'error' in data: - if data['error'] == 'This method requires an authenticated user': - server[1]['private'] = True - self.write_record(server) - else: - for item in data: - if 'uri' in item: - match_server = re.match(r'https?://([^/]*)/', item['uri']) - if match_server: - new_server = match_server.group(1) - if new_server not in all_servers: - try: - new_result = self.session.get( - f"https://{new_server}/api/v1/timelines/public", - timeout=10 - ) - if new_result.status_code < 400: - new_servers_count += 1 - self._log.debug( - "Adding new server '%s'", - new_server - ) - all_servers.append(new_server) - # self._log.debug(new_result.headers['Content-Type']) - # self._log.debug(new_result.content) - if 'application/json' in new_result.headers['Content-Type']: - data = new_result.json() - # except requests.exceptions.JSONDecodeError: - private = False - if 'error' in data: - if data['error'] == 'This method requires an authenticated user': - private = True - self.write_record( - (new_server, - { - "name": new_server, - "last_update": time.time(), - "private": private - } - ) - ) - else: - self._log.debug( - "Server '%s' didn't reply with JSON encoded data", - new_server - ) - private = True - self.write_record( - (new_server, - { - "name": new_server, - "last_update": time.time(), - "private": private - } - ) - ) - except requests.exceptions.ReadTimeout: - self._log.warning( - "Server '%s' didn't respond on time.", - new_server - ) - else: - # Item in public timeline don't have an URI - self._log.debug( - "Item don't have URI. %s", - item - ) - else: - self._log.debug( - "Server '%s' didn't reply with JSON data.", - server[0] + all_servers.append(new_server) + self.write_record( + (new_server, + { + "name": new_server, + "last_update": time.time(), + "private": False + } ) - except requests.exceptions.ReadTimeout: - self._log.warning("Server '%s' didn't respond on time.", server[0]) + ) + self._log.debug("Fetching public timeline in server '%s'", server[0]) + data = self.get_timeline(server[0]) + if data: + for item in data: + if 'uri' in item: + match_server = re.match(r'https?://([^/]*)/', item['uri']) + if match_server: + new_server = match_server.group(1) + if new_server not in all_servers: + data = self.get_timeline(new_server) + if data: + new_servers_count += 1 + self._log.debug( + "Adding new server '%s'", + new_server + ) + all_servers.append(new_server) + private = False + else: + private = True + self.write_record( + (new_server, + { + "name": new_server, + "last_update": time.time(), + "private": private + } + ) + ) + else: + # Item in public timeline don't have an URI + self._log.debug( + "Item don't have URI. %s", + item + ) + else: + server[1]['private'] = True + self.write_record(server) return new_servers_count def write_record(self, record, table='servers'):