Add banned hostnames
This commit is contained in:
parent
ce4609068d
commit
f614cbdceb
1 changed files with 107 additions and 61 deletions
|
@ -12,6 +12,7 @@ from logging.handlers import SysLogHandler
|
|||
import sqlite3
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
import click
|
||||
import click_config_file
|
||||
import requests
|
||||
|
@ -63,13 +64,21 @@ class DiscoverMastodonServers:
|
|||
timeout=10
|
||||
)
|
||||
if result.status_code < 400:
|
||||
if 'application/json' in result.headers['Content-Type']:
|
||||
data = result.json()
|
||||
if 'error' not in data:
|
||||
return data
|
||||
if 'Content-Type' in result.headers:
|
||||
if 'application/json' in result.headers['Content-Type']:
|
||||
data = result.json()
|
||||
if 'error' not in data:
|
||||
return data
|
||||
else:
|
||||
self._log.debug(
|
||||
"Server '%s' didn't reply with JSON data.", server
|
||||
)
|
||||
else:
|
||||
self._log.debug(
|
||||
"Server '%s' didn't reply with JSON data.", server
|
||||
"Server '%s' didn't return Content-Type header. Headers: '%s'. Content returned: '%s'",
|
||||
server,
|
||||
json.dumps(result.headers, indent=2),
|
||||
result.content
|
||||
)
|
||||
else:
|
||||
self._log.debug(
|
||||
|
@ -95,70 +104,103 @@ class DiscoverMastodonServers:
|
|||
)
|
||||
return data
|
||||
|
||||
def get_instance_info(self, server):
|
||||
'''Get all server information'''
|
||||
result = {}
|
||||
instance = self.get_path(server, '/api/v1/instance')
|
||||
if instance:
|
||||
result['instance'] = instance
|
||||
directory = []
|
||||
result['directory'] = directory
|
||||
offset=0
|
||||
while len(directory) == 0:
|
||||
directory = self.get_path(
|
||||
server,
|
||||
f"/api/v1/directory?limit=80&offset={offset}"
|
||||
)
|
||||
if directory:
|
||||
result['directory'] = result['directory'] + directory
|
||||
offset += 80
|
||||
return result
|
||||
|
||||
def test_banned_server(self, server):
|
||||
'''Check if a server name match agains any banned regular expressions'''
|
||||
for banned in self.config['regexp_banned_host']:
|
||||
match = re.search(banned, server)
|
||||
if match:
|
||||
self._log.debug(
|
||||
"Regexp '%s' match server '%s'",
|
||||
banned,
|
||||
server
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
def discover(self):
|
||||
'''Discover new servers'''
|
||||
all_servers = []
|
||||
new_servers_count = 0
|
||||
for server in self.servers.items():
|
||||
all_servers.append(server[0])
|
||||
if not server[1]['private']:
|
||||
self._log.debug("Fetching peers of the server '%s'", server[0])
|
||||
data = self.get_path(server[0], 'api/v1/instance/peers')
|
||||
if data:
|
||||
for new_server in data:
|
||||
new_servers_count += 1
|
||||
self._log.debug(
|
||||
"Adding new server '%s'",
|
||||
new_server
|
||||
)
|
||||
all_servers.append(new_server)
|
||||
self.write_record(
|
||||
(new_server,
|
||||
{
|
||||
"name": new_server,
|
||||
"last_update": time.time(),
|
||||
"private": False
|
||||
}
|
||||
)
|
||||
)
|
||||
self._log.debug("Fetching public timeline in server '%s'", server[0])
|
||||
data = self.get_timeline(server[0])
|
||||
if data:
|
||||
for item in data:
|
||||
if 'uri' in item:
|
||||
match_server = re.match(r'https?://([^/]*)/', item['uri'])
|
||||
if match_server:
|
||||
new_server = match_server.group(1)
|
||||
if new_server not in all_servers:
|
||||
data = self.get_timeline(new_server)
|
||||
if data:
|
||||
new_servers_count += 1
|
||||
self._log.debug(
|
||||
"Adding new server '%s'",
|
||||
new_server
|
||||
)
|
||||
all_servers.append(new_server)
|
||||
private = False
|
||||
else:
|
||||
private = True
|
||||
self.write_record(
|
||||
(new_server,
|
||||
{
|
||||
"name": new_server,
|
||||
"last_update": time.time(),
|
||||
"private": private
|
||||
}
|
||||
)
|
||||
)
|
||||
else:
|
||||
# Item in public timeline don't have an URI
|
||||
if not self.test_banned_server(server[0]):
|
||||
if not server[1]['private']:
|
||||
self._log.debug("Fetching peers of the server '%s'", server[0])
|
||||
data = self.get_path(server[0], 'api/v1/instance/peers')
|
||||
if data:
|
||||
for new_server in data:
|
||||
new_servers_count += 1
|
||||
self._log.debug(
|
||||
"Item don't have URI. %s",
|
||||
item
|
||||
"Adding new server '%s'",
|
||||
new_server
|
||||
)
|
||||
else:
|
||||
server[1]['private'] = True
|
||||
self.write_record(server)
|
||||
all_servers.append(new_server)
|
||||
self.write_record(
|
||||
(new_server,
|
||||
{
|
||||
"name": new_server,
|
||||
"last_update": time.time(),
|
||||
"private": False
|
||||
}
|
||||
)
|
||||
)
|
||||
self._log.debug("Fetching public timeline in server '%s'", server[0])
|
||||
data = self.get_timeline(server[0])
|
||||
if data:
|
||||
for item in data:
|
||||
if 'uri' in item:
|
||||
match_server = re.match(r'https?://([^/]*)/', item['uri'])
|
||||
if match_server:
|
||||
new_server = match_server.group(1)
|
||||
if new_server not in all_servers:
|
||||
data = self.get_timeline(new_server)
|
||||
if data:
|
||||
new_servers_count += 1
|
||||
self._log.debug(
|
||||
"Adding new server '%s'",
|
||||
new_server
|
||||
)
|
||||
all_servers.append(new_server)
|
||||
private = False
|
||||
else:
|
||||
private = True
|
||||
self.write_record(
|
||||
(new_server,
|
||||
{
|
||||
"name": new_server,
|
||||
"last_update": time.time(),
|
||||
"private": private
|
||||
}
|
||||
)
|
||||
)
|
||||
else:
|
||||
# Item in public timeline don't have an URI
|
||||
self._log.debug(
|
||||
"Item don't have URI. %s",
|
||||
item
|
||||
)
|
||||
else:
|
||||
server[1]['private'] = True
|
||||
self.write_record(server)
|
||||
return new_servers_count
|
||||
|
||||
def write_record(self, record, table='servers'):
|
||||
|
@ -279,6 +321,10 @@ class DiscoverMastodonServers:
|
|||
'--database-file', '-d', default='mastodon-servers.db',
|
||||
help='File with the database of results.'
|
||||
)
|
||||
@click.option(
|
||||
'--regexp-banned-host', '-r', multiple=True,
|
||||
help='Regular expression for banned host names.'
|
||||
)
|
||||
@click_config_file.configuration_option()
|
||||
def __main__(**kwargs):
|
||||
return DiscoverMastodonServers(**kwargs)
|
||||
|
|
Loading…
Reference in a new issue