Improve performance fetching only once each message
This commit is contained in:
parent
6178df97b6
commit
a2890e0264
2 changed files with 435 additions and 54 deletions
|
@ -14,6 +14,8 @@ import email
|
|||
from signal import signal, SIGINT
|
||||
import json
|
||||
import re
|
||||
import codecs
|
||||
import time
|
||||
import click
|
||||
import click_config_file
|
||||
|
||||
|
@ -21,6 +23,7 @@ class ImapFilter:
|
|||
'''IMAP filter tool'''
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
start_time = time.time()
|
||||
self.config = kwargs
|
||||
if 'log_file' not in kwargs or kwargs['log_file'] is None:
|
||||
self.config['log_file'] = os.path.join(
|
||||
|
@ -36,29 +39,28 @@ class ImapFilter:
|
|||
)
|
||||
self._init_log()
|
||||
signal(SIGINT, self._signal_handler)
|
||||
self._convert_filters()
|
||||
if self.config['filters_file']:
|
||||
self._read_filters_file()
|
||||
if len(self.config['filter']) == 0:
|
||||
with open(self.config['filters_file'], 'r', encoding='UTF-8') as filters_file:
|
||||
self.config['mailboxes'] = json.load(filters_file)
|
||||
if len(self.config['mailboxes']) == 0:
|
||||
self._log.error(
|
||||
"You must indicate either a filter or a filters-file. Use --help to see more details."
|
||||
"Filters file is empty. Use --help to see more details."
|
||||
)
|
||||
sys.exit(1)
|
||||
self.connect_imap()
|
||||
self._process_filters()
|
||||
|
||||
def _read_filters_file(self):
|
||||
with open(self.config['filters_file'], 'r', encoding='UTF-8') as filters_file:
|
||||
new_filters = json.load(filters_file)
|
||||
for new_filter in new_filters:
|
||||
self.config['filter'].append(new_filter)
|
||||
self._log.debug(
|
||||
"Took %s seconds to process",
|
||||
time.time() - start_time
|
||||
)
|
||||
|
||||
def _process_filters(self):
|
||||
for mfilter in self.config['filter']:
|
||||
self.matches = 0
|
||||
self._log.debug("Processing filter '%s'...", mfilter)
|
||||
self.imap.select(mailbox=mfilter['mailbox'], readonly=False)
|
||||
self._log.debug("Searching for all messages in mailbox '%s'...", mfilter['mailbox'])
|
||||
for mailbox in self.config['mailboxes']:
|
||||
self._log.debug(
|
||||
"Processing mailbox '%s'...",
|
||||
mailbox['mailbox']
|
||||
)
|
||||
self.imap.select(mailbox=mailbox['mailbox'], readonly=False)
|
||||
self._log.debug("Searching for all messages in mailbox '%s'...", mailbox['mailbox'])
|
||||
typ, data = self.imap.search('UTF-8', 'ALL')
|
||||
if typ != 'OK':
|
||||
self._log.error('Error, server replied: %s', data)
|
||||
|
@ -68,9 +70,10 @@ class ImapFilter:
|
|||
self._log.debug(
|
||||
"Processing %s messages in mailbox '%s'...",
|
||||
total_msgs,
|
||||
mfilter['mailbox']
|
||||
mailbox['mailbox']
|
||||
)
|
||||
msg_count = 0
|
||||
self.matches = 0
|
||||
for message_id in all_msgs_uids:
|
||||
msg_count += 1
|
||||
self._log.debug(
|
||||
|
@ -87,25 +90,38 @@ class ImapFilter:
|
|||
if typ != 'OK':
|
||||
self._log.error('Error, server replied: %s', unseen_data)
|
||||
return False
|
||||
self._process_message(message_id, data[0], mfilter)
|
||||
for mfilter in mailbox['filters']:
|
||||
self._log.debug("Processing filter '%s'...", mfilter['name'])
|
||||
self._process_message(message_id, data[0], mfilter)
|
||||
except imaplib.IMAP4.error as error:
|
||||
self._log.error(
|
||||
"Error fetching message. %s",
|
||||
error
|
||||
)
|
||||
self._log.debug(
|
||||
"A total of %s matches for this filter",
|
||||
self.matches
|
||||
if self.matches > 0:
|
||||
self._log.info(
|
||||
"A total of %s matches for this mailbox",
|
||||
self.matches
|
||||
)
|
||||
else:
|
||||
self._log.info(
|
||||
"No matches for this mailbox"
|
||||
)
|
||||
try:
|
||||
self.imap.expunge()
|
||||
except imaplib.IMAP4.abort as error:
|
||||
self._log.error(
|
||||
"Error expunging connection. %s",
|
||||
error
|
||||
)
|
||||
self.imap.expunge()
|
||||
return True
|
||||
|
||||
def _process_message(self, message_id, data, mfilter):
|
||||
'''Process a mail message'''
|
||||
if isinstance(data[1], int):
|
||||
self._log.warning(
|
||||
"Response part is integer %s in data '%s'. Try again.",
|
||||
data[1],
|
||||
data[0]
|
||||
"Unexpected response fetching message: '%s'. Try again.",
|
||||
data
|
||||
)
|
||||
return False
|
||||
part = data[1].decode('utf-8')
|
||||
|
@ -115,31 +131,52 @@ class ImapFilter:
|
|||
field_data = decoded_field[0][0]
|
||||
else:
|
||||
field_data = decoded_field[0][0].decode()
|
||||
match = re.match(mfilter['regexp'], field_data)
|
||||
if 'words' in mfilter:
|
||||
regexp = '^(?=.*' + '.*|.*'.join(mfilter['words']) + '.*)'
|
||||
else:
|
||||
if 'regexp' in mfilter:
|
||||
regexp = mfilter['regexp']
|
||||
else:
|
||||
self._log.error(
|
||||
"The filter '%s' doesn't have a 'words' or 'regexp' value. %s",
|
||||
mfilter['name'],
|
||||
mfilter
|
||||
)
|
||||
match = re.match(regexp, field_data)
|
||||
if match:
|
||||
self._log.info(
|
||||
"Field '%s' => '%s', matches '%s'",
|
||||
"Field '%s' => '%s', matches filter '%s'",
|
||||
mfilter['field'],
|
||||
field_data,
|
||||
mfilter['regexp']
|
||||
mfilter['name']
|
||||
)
|
||||
self.matches += 1
|
||||
if self.config['dummy']:
|
||||
self._log.info('Doing nothing (dummy run)')
|
||||
else:
|
||||
self._do_filter(message_id, mfilter)
|
||||
return True
|
||||
|
||||
def _do_filter(self, message_id, mfilter):
|
||||
if f"_action_{mfilter['action']}" in dir(self):
|
||||
function = getattr(self, f"_action_{mfilter['action']}")
|
||||
# try:
|
||||
result = function(message_id, mfilter)
|
||||
self._log.debug(
|
||||
"Result: %s",
|
||||
result
|
||||
)
|
||||
# except Exception as error:
|
||||
# self._log.error(
|
||||
# b"Error performing '%s' action with message %s. Filter: %s. Error: %s",
|
||||
# mfilter['action'],
|
||||
# message_id,
|
||||
# mfilter,
|
||||
# error
|
||||
# )
|
||||
|
||||
def _action_move(self, message_id, mfilter):
|
||||
self._log.debug(
|
||||
self._log.info(
|
||||
"Moving message '%s' to '%s'...",
|
||||
message_id,
|
||||
mfilter['destination']
|
||||
|
@ -152,7 +189,7 @@ class ImapFilter:
|
|||
"Creating mailbox '%s'...",
|
||||
mailbox
|
||||
)
|
||||
typ, data = self.imap.create(mailbox)
|
||||
typ, data = self.imap.create(mailbox.encode('utf-8', 'replace'))
|
||||
if typ != 'OK':
|
||||
self._log.error(
|
||||
'Error creating mailbox %s, server replied: %s',
|
||||
|
@ -163,16 +200,20 @@ class ImapFilter:
|
|||
return True
|
||||
|
||||
def _action_copy(self, message_id, mfilter):
|
||||
self._log.debug(
|
||||
self._log.info(
|
||||
"Copying message '%s' to '%s'...",
|
||||
message_id,
|
||||
mfilter['destination']
|
||||
)
|
||||
typ, data = self.imap.copy(message_id, mfilter['destination'])
|
||||
mailbox_encoded = codecs.encode(
|
||||
mfilter['destination'],
|
||||
encoding="utf-7"
|
||||
).replace(b"+", b"&")
|
||||
typ, data = self.imap.copy(message_id, mailbox_encoded)
|
||||
if typ != 'OK':
|
||||
if b'[TRYCREATE]' in data[0]:
|
||||
if self._create_mailbox(mfilter['destination']):
|
||||
typ, data = self.imap.copy(message_id, mfilter['destination'])
|
||||
if self._create_mailbox(mailbox_encoded):
|
||||
typ, data = self.imap.copy(message_id,mailbox_encoded)
|
||||
if typ != 'OK':
|
||||
self._log.error(
|
||||
'Error copying message to %s, server replied: %s',
|
||||
|
@ -190,7 +231,7 @@ class ImapFilter:
|
|||
return True
|
||||
|
||||
def _action_delete(self, message_id, mfilter):
|
||||
self._log.debug(
|
||||
self._log.info(
|
||||
"Deleting message '%s'...",
|
||||
message_id
|
||||
)
|
||||
|
@ -205,7 +246,7 @@ class ImapFilter:
|
|||
return True
|
||||
|
||||
def _action_mark_seen(self, message_id, mfilter):
|
||||
self._log.debug(
|
||||
self._log.info(
|
||||
"Marking as seen message '%s'...",
|
||||
message_id
|
||||
)
|
||||
|
@ -219,12 +260,6 @@ class ImapFilter:
|
|||
return False
|
||||
return True
|
||||
|
||||
def _convert_filters(self):
|
||||
new_filter = []
|
||||
for old_filter in self.config['filter']:
|
||||
new_filter.append(json.loads(old_filter))
|
||||
self.config['filter'] = new_filter
|
||||
|
||||
def connect_imap(self):
|
||||
'''Create connection object to the IMAP server'''
|
||||
self._log.debug(
|
||||
|
@ -345,12 +380,7 @@ class ImapFilter:
|
|||
help='Whether to use a secure connection or not.'
|
||||
)
|
||||
@click.option(
|
||||
'--filter', '-f', required=False,
|
||||
multiple=True,
|
||||
help='Filter rule.'
|
||||
)
|
||||
@click.option(
|
||||
'--filters-file', '-F', required=False,
|
||||
'--filters-file', '-F', required=True,
|
||||
help='JSON file containing a list of dictionaries with the filter rules.'
|
||||
)
|
||||
@click_config_file.configuration_option()
|
||||
|
|
363
test.json
363
test.json
|
@ -1,10 +1,361 @@
|
|||
[
|
||||
{
|
||||
"name": "python",
|
||||
"mailbox": "Feeds/Mastodon/Test",
|
||||
"field": "Subject",
|
||||
"regexp": "^(?=.*nvidia.*|.*Nvidia.*|.*ansible.*|.*Ansible.*|.*ubuntu.*|.*Ubuntu.*|.*blender.*|.*Blender.*|.*technology.*|.*Technology.*|.*msdos.*|.*dosbox.*|.*python.*|.*Python.*|.*devops.*|.*DevOps.*|.*forgejo.*|.*Forgejo.*|.*smartphone.*|.*Smartphone.*|.*SmartPhone.*|.*Android.*|.*android.*|.*github.*|.*Github.*|.*gitlab.*|.*Gitlab.*|.*#programming.*|.*TechCrunch.*|.*researchbuzz.*|.*ripencc.*|.*FCAI.*|.*TechDesk.*|.*#selfhosting.*|.*#selfhosted.*|.*#ai.*|.*#deepfake.*|.*#chatgpt.*|.*#tietotekniikka.*|.*#videogames.*|.*#software.*|.*#retrogaming.*|.*#web.*|.*#gaming.*|.*#pcgaming.*|.*#gamedev.*|.*#fairphone.*|.*#ebike.*|.*#windows.*|.*#speedrun.*|.*#cloud.*|.*#euhosted.*|.*#python.*|.*#steamdeck.*|.*#indiegame.*|.*#webdev.*|.*#rustlang.*|.*#valve.*|.*#intel.*|.*#dns.*|.*#digitaljustice.*|.*#tv.*)",
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Test/tech"
|
||||
"mailbox": "Feeds/Mastodon",
|
||||
"filters": [
|
||||
{
|
||||
"name": "tech",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"nvidia",
|
||||
"ansible",
|
||||
"ubuntu",
|
||||
"#blender",
|
||||
"#technology",
|
||||
"msdos",
|
||||
"dosbox",
|
||||
"#python",
|
||||
"devops",
|
||||
"forgejo",
|
||||
"#smartphone",
|
||||
"#android",
|
||||
"github",
|
||||
"gitlab",
|
||||
"#programming",
|
||||
"TechCrunch",
|
||||
"researchbuzz",
|
||||
"ripencc",
|
||||
"FCAI",
|
||||
"TechDesk",
|
||||
"selfhost",
|
||||
"#ai",
|
||||
"#deepfake",
|
||||
"#chatgpt",
|
||||
"#tietotekniikka",
|
||||
"#videogames",
|
||||
"#software",
|
||||
"#retrogaming",
|
||||
"#web",
|
||||
"#gaming",
|
||||
"#pcgaming",
|
||||
"#gamedev",
|
||||
"#fairphone",
|
||||
"#ebike",
|
||||
"#windows",
|
||||
"#speedrun",
|
||||
"#cloud",
|
||||
"#euhosted",
|
||||
"#python",
|
||||
"#steamdeck",
|
||||
"#indiegame",
|
||||
"#webdev",
|
||||
"#rustlang",
|
||||
"#valve",
|
||||
"#intel",
|
||||
"#dns",
|
||||
"#digitaljustice",
|
||||
"#tv",
|
||||
"#internetarchive"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Tech"
|
||||
},
|
||||
{
|
||||
"name": "tampere",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"tampere",
|
||||
"Tampere",
|
||||
"pirkanmaa",
|
||||
"Pirkanmaa",
|
||||
"kaukajärv",
|
||||
"Kaukajärv",
|
||||
"#pirkkala"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Tampere"
|
||||
},
|
||||
{
|
||||
"name": "tampere_body",
|
||||
"field": "Body",
|
||||
"regexp": "^.*\"content\": \"(?=.*tampere.*|.*Tampere.*|.*pirkanmaa.*|.*Pirkanmaa.*|.*kaukajärv.*|.*Kaukajärv.*|.*#pirkkala)",
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Tampere"
|
||||
},
|
||||
{
|
||||
"name": "infosec",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#infosec",
|
||||
"#hacking",
|
||||
"#defcon",
|
||||
"thehackernews",
|
||||
"#cybersecurity",
|
||||
"#opsec",
|
||||
"#surveillance",
|
||||
"#encryption",
|
||||
"#security",
|
||||
"#spyware",
|
||||
"#ninjalab",
|
||||
"#yubikey",
|
||||
"#yubico"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/InfoSec"
|
||||
},
|
||||
{
|
||||
"name": "infosec_body",
|
||||
"field": "Body",
|
||||
"regexp": "^.*\"content\": \"(?=.*infosec.*|.*InfoSec .*|.*hacking.*|.*Hacking .*|.*defcon.*|.*DefCon .*|.*thehackernews.*|.*cybersecurity .*|.*Cybersecurity .*|.*opsec .*|.*OpSec .*|.*#surveillance.*|.*#encryption.*|.*#security.*|.*#spyware .*|.*#ninjalab.*|.*#yubikey.*|.*#yubico.*)",
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/InfoSec"
|
||||
},
|
||||
{
|
||||
"name": "movies",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#movie",
|
||||
"#film",
|
||||
"#trailer",
|
||||
"#moviesuggestion",
|
||||
"rottentomatoes",
|
||||
"RottenTomatoes",
|
||||
"#pelicula",
|
||||
"#cine"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Movies"
|
||||
},
|
||||
{
|
||||
"name": "almeria",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#almeria",
|
||||
"#almería",
|
||||
"#cabodegata",
|
||||
"#costadelaluz"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Almería"
|
||||
},
|
||||
{
|
||||
"name": "almeria",
|
||||
"field": "Body",
|
||||
"regexp": "^.*\"content\": \"(?=.*almeria.*|.*Almeria .*|.*Almería.*|.*almería .*|.*cabo de gata.*|.*Cabo De Gata .*|.*Cabo de Gata.*|.*Costa de la Luz .*|.*Costa De La Luz .*|.*costa de la luz .*)",
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Almería"
|
||||
},
|
||||
{
|
||||
"name": "Jobs",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#gethired",
|
||||
"#joboffer",
|
||||
"#hiring",
|
||||
"#flossjobs",
|
||||
"#layoffs",
|
||||
"#lookingforwork",
|
||||
"#osjobjub",
|
||||
"#trabajo",
|
||||
"#laboral",
|
||||
"#jobhunt",
|
||||
"#fedihire",
|
||||
"#fedijobs",
|
||||
"#getfedihired",
|
||||
"#trabajoremoto",
|
||||
"#remotework",
|
||||
"#wfh",
|
||||
"#opensourcejobs"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Jobs"
|
||||
},
|
||||
{
|
||||
"name": "OpenSource",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#pinephone",
|
||||
"#pinetime",
|
||||
"#pinetab",
|
||||
"#jellyfin",
|
||||
"linux",
|
||||
"foss",
|
||||
"opensource",
|
||||
"freesoftware",
|
||||
"LibreOffice",
|
||||
"libreoffice",
|
||||
"#sailfishos",
|
||||
"#nextcloud",
|
||||
"#righttorepair",
|
||||
"#phosh",
|
||||
"#debian",
|
||||
"#openstreetmap",
|
||||
"#mobian",
|
||||
"#gnome",
|
||||
"#kde",
|
||||
"#xfce",
|
||||
"#wayland",
|
||||
"#waydroid",
|
||||
"#steam",
|
||||
"#godot",
|
||||
"#inkscape",
|
||||
"#homeassistant",
|
||||
"#openhome"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/linux"
|
||||
},
|
||||
{
|
||||
"name": "Cats",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#caturday",
|
||||
"#catsofmastodon",
|
||||
"#martesdegatos"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Caturday"
|
||||
},
|
||||
{
|
||||
"name": "Dogs",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#dogsofmastodon"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/DogsOfMastodon"
|
||||
},
|
||||
{
|
||||
"name": "Fediverse",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#fediverse",
|
||||
"#mastodon",
|
||||
"peertube",
|
||||
"pixelfeed",
|
||||
"activitypub",
|
||||
"ActivityPub",
|
||||
"#mastodonadmin"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Fediverse"
|
||||
},
|
||||
{
|
||||
"name": "Humor",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#humor",
|
||||
"#funny",
|
||||
"#meme"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Humor"
|
||||
},
|
||||
{
|
||||
"name": "New",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#news",
|
||||
"#noticias",
|
||||
"ElSaltoDiario",
|
||||
"earthquake",
|
||||
"zoom_earth"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/News"
|
||||
},
|
||||
{
|
||||
"name": "Jazz",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"jazz",
|
||||
"Jazz"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/News"
|
||||
},
|
||||
{
|
||||
"name": "USPolitics",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#uspol",
|
||||
"#kamalaharris",
|
||||
"#trump",
|
||||
"#biden",
|
||||
"#gop",
|
||||
"#texas"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/USPolitics"
|
||||
},
|
||||
{
|
||||
"name": "España",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#spanish",
|
||||
"#spain",
|
||||
"#españa",
|
||||
"#espanja",
|
||||
"#malaga",
|
||||
"#málaga",
|
||||
"#madrid",
|
||||
"#andalucia",
|
||||
"#sevilla",
|
||||
"#barcelona",
|
||||
"#mallorca",
|
||||
"#ibiza",
|
||||
"#canaryislands",
|
||||
"#balear",
|
||||
"España"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/España"
|
||||
},
|
||||
{
|
||||
"name": "Birds",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#birds"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Birds"
|
||||
},
|
||||
{
|
||||
"name": "Finland",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#finland",
|
||||
"#suomi"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Finland"
|
||||
},
|
||||
{
|
||||
"name": "Autism",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#actuallyautistic",
|
||||
"#autism",
|
||||
"#autistic"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Autism"
|
||||
},
|
||||
{
|
||||
"name": "Art",
|
||||
"field": "Subject",
|
||||
"words": [
|
||||
"#art",
|
||||
"#artist",
|
||||
"#streetart",
|
||||
"#mural",
|
||||
"#photography",
|
||||
"#fotografie",
|
||||
"#photoart",
|
||||
"#urbanart"
|
||||
],
|
||||
"action": "move",
|
||||
"destination": "Feeds/Mastodon/Art"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
Loading…
Reference in a new issue