From a2890e026457a471c9d96f6c2be3686793b50340 Mon Sep 17 00:00:00 2001 From: "Antonio J. Delgado" Date: Thu, 5 Sep 2024 18:50:11 +0300 Subject: [PATCH] Improve performance fetching only once each message --- imap_filter/imap_filter.py | 126 ++++++++----- test.json | 363 ++++++++++++++++++++++++++++++++++++- 2 files changed, 435 insertions(+), 54 deletions(-) diff --git a/imap_filter/imap_filter.py b/imap_filter/imap_filter.py index 8459a04..eeecc32 100644 --- a/imap_filter/imap_filter.py +++ b/imap_filter/imap_filter.py @@ -14,6 +14,8 @@ import email from signal import signal, SIGINT import json import re +import codecs +import time import click import click_config_file @@ -21,6 +23,7 @@ class ImapFilter: '''IMAP filter tool''' def __init__(self, **kwargs): + start_time = time.time() self.config = kwargs if 'log_file' not in kwargs or kwargs['log_file'] is None: self.config['log_file'] = os.path.join( @@ -36,29 +39,28 @@ class ImapFilter: ) self._init_log() signal(SIGINT, self._signal_handler) - self._convert_filters() - if self.config['filters_file']: - self._read_filters_file() - if len(self.config['filter']) == 0: + with open(self.config['filters_file'], 'r', encoding='UTF-8') as filters_file: + self.config['mailboxes'] = json.load(filters_file) + if len(self.config['mailboxes']) == 0: self._log.error( - "You must indicate either a filter or a filters-file. Use --help to see more details." + "Filters file is empty. Use --help to see more details." ) sys.exit(1) self.connect_imap() self._process_filters() - - def _read_filters_file(self): - with open(self.config['filters_file'], 'r', encoding='UTF-8') as filters_file: - new_filters = json.load(filters_file) - for new_filter in new_filters: - self.config['filter'].append(new_filter) + self._log.debug( + "Took %s seconds to process", + time.time() - start_time + ) def _process_filters(self): - for mfilter in self.config['filter']: - self.matches = 0 - self._log.debug("Processing filter '%s'...", mfilter) - self.imap.select(mailbox=mfilter['mailbox'], readonly=False) - self._log.debug("Searching for all messages in mailbox '%s'...", mfilter['mailbox']) + for mailbox in self.config['mailboxes']: + self._log.debug( + "Processing mailbox '%s'...", + mailbox['mailbox'] + ) + self.imap.select(mailbox=mailbox['mailbox'], readonly=False) + self._log.debug("Searching for all messages in mailbox '%s'...", mailbox['mailbox']) typ, data = self.imap.search('UTF-8', 'ALL') if typ != 'OK': self._log.error('Error, server replied: %s', data) @@ -68,9 +70,10 @@ class ImapFilter: self._log.debug( "Processing %s messages in mailbox '%s'...", total_msgs, - mfilter['mailbox'] + mailbox['mailbox'] ) msg_count = 0 + self.matches = 0 for message_id in all_msgs_uids: msg_count += 1 self._log.debug( @@ -87,25 +90,38 @@ class ImapFilter: if typ != 'OK': self._log.error('Error, server replied: %s', unseen_data) return False - self._process_message(message_id, data[0], mfilter) + for mfilter in mailbox['filters']: + self._log.debug("Processing filter '%s'...", mfilter['name']) + self._process_message(message_id, data[0], mfilter) except imaplib.IMAP4.error as error: self._log.error( "Error fetching message. %s", error ) - self._log.debug( - "A total of %s matches for this filter", - self.matches + if self.matches > 0: + self._log.info( + "A total of %s matches for this mailbox", + self.matches + ) + else: + self._log.info( + "No matches for this mailbox" + ) + try: + self.imap.expunge() + except imaplib.IMAP4.abort as error: + self._log.error( + "Error expunging connection. %s", + error ) - self.imap.expunge() + return True def _process_message(self, message_id, data, mfilter): '''Process a mail message''' if isinstance(data[1], int): self._log.warning( - "Response part is integer %s in data '%s'. Try again.", - data[1], - data[0] + "Unexpected response fetching message: '%s'. Try again.", + data ) return False part = data[1].decode('utf-8') @@ -115,31 +131,52 @@ class ImapFilter: field_data = decoded_field[0][0] else: field_data = decoded_field[0][0].decode() - match = re.match(mfilter['regexp'], field_data) + if 'words' in mfilter: + regexp = '^(?=.*' + '.*|.*'.join(mfilter['words']) + '.*)' + else: + if 'regexp' in mfilter: + regexp = mfilter['regexp'] + else: + self._log.error( + "The filter '%s' doesn't have a 'words' or 'regexp' value. %s", + mfilter['name'], + mfilter + ) + match = re.match(regexp, field_data) if match: self._log.info( - "Field '%s' => '%s', matches '%s'", + "Field '%s' => '%s', matches filter '%s'", mfilter['field'], field_data, - mfilter['regexp'] + mfilter['name'] ) self.matches += 1 if self.config['dummy']: self._log.info('Doing nothing (dummy run)') else: self._do_filter(message_id, mfilter) + return True def _do_filter(self, message_id, mfilter): if f"_action_{mfilter['action']}" in dir(self): function = getattr(self, f"_action_{mfilter['action']}") + # try: result = function(message_id, mfilter) self._log.debug( "Result: %s", result ) + # except Exception as error: + # self._log.error( + # b"Error performing '%s' action with message %s. Filter: %s. Error: %s", + # mfilter['action'], + # message_id, + # mfilter, + # error + # ) def _action_move(self, message_id, mfilter): - self._log.debug( + self._log.info( "Moving message '%s' to '%s'...", message_id, mfilter['destination'] @@ -152,7 +189,7 @@ class ImapFilter: "Creating mailbox '%s'...", mailbox ) - typ, data = self.imap.create(mailbox) + typ, data = self.imap.create(mailbox.encode('utf-8', 'replace')) if typ != 'OK': self._log.error( 'Error creating mailbox %s, server replied: %s', @@ -163,16 +200,20 @@ class ImapFilter: return True def _action_copy(self, message_id, mfilter): - self._log.debug( + self._log.info( "Copying message '%s' to '%s'...", message_id, mfilter['destination'] ) - typ, data = self.imap.copy(message_id, mfilter['destination']) + mailbox_encoded = codecs.encode( + mfilter['destination'], + encoding="utf-7" + ).replace(b"+", b"&") + typ, data = self.imap.copy(message_id, mailbox_encoded) if typ != 'OK': if b'[TRYCREATE]' in data[0]: - if self._create_mailbox(mfilter['destination']): - typ, data = self.imap.copy(message_id, mfilter['destination']) + if self._create_mailbox(mailbox_encoded): + typ, data = self.imap.copy(message_id,mailbox_encoded) if typ != 'OK': self._log.error( 'Error copying message to %s, server replied: %s', @@ -190,7 +231,7 @@ class ImapFilter: return True def _action_delete(self, message_id, mfilter): - self._log.debug( + self._log.info( "Deleting message '%s'...", message_id ) @@ -205,7 +246,7 @@ class ImapFilter: return True def _action_mark_seen(self, message_id, mfilter): - self._log.debug( + self._log.info( "Marking as seen message '%s'...", message_id ) @@ -219,12 +260,6 @@ class ImapFilter: return False return True - def _convert_filters(self): - new_filter = [] - for old_filter in self.config['filter']: - new_filter.append(json.loads(old_filter)) - self.config['filter'] = new_filter - def connect_imap(self): '''Create connection object to the IMAP server''' self._log.debug( @@ -345,12 +380,7 @@ class ImapFilter: help='Whether to use a secure connection or not.' ) @click.option( - '--filter', '-f', required=False, - multiple=True, - help='Filter rule.' -) -@click.option( - '--filters-file', '-F', required=False, + '--filters-file', '-F', required=True, help='JSON file containing a list of dictionaries with the filter rules.' ) @click_config_file.configuration_option() diff --git a/test.json b/test.json index 1762f29..e0c5674 100644 --- a/test.json +++ b/test.json @@ -1,10 +1,361 @@ [ { - "name": "python", - "mailbox": "Feeds/Mastodon/Test", - "field": "Subject", - "regexp": "^(?=.*nvidia.*|.*Nvidia.*|.*ansible.*|.*Ansible.*|.*ubuntu.*|.*Ubuntu.*|.*blender.*|.*Blender.*|.*technology.*|.*Technology.*|.*msdos.*|.*dosbox.*|.*python.*|.*Python.*|.*devops.*|.*DevOps.*|.*forgejo.*|.*Forgejo.*|.*smartphone.*|.*Smartphone.*|.*SmartPhone.*|.*Android.*|.*android.*|.*github.*|.*Github.*|.*gitlab.*|.*Gitlab.*|.*#programming.*|.*TechCrunch.*|.*researchbuzz.*|.*ripencc.*|.*FCAI.*|.*TechDesk.*|.*#selfhosting.*|.*#selfhosted.*|.*#ai.*|.*#deepfake.*|.*#chatgpt.*|.*#tietotekniikka.*|.*#videogames.*|.*#software.*|.*#retrogaming.*|.*#web.*|.*#gaming.*|.*#pcgaming.*|.*#gamedev.*|.*#fairphone.*|.*#ebike.*|.*#windows.*|.*#speedrun.*|.*#cloud.*|.*#euhosted.*|.*#python.*|.*#steamdeck.*|.*#indiegame.*|.*#webdev.*|.*#rustlang.*|.*#valve.*|.*#intel.*|.*#dns.*|.*#digitaljustice.*|.*#tv.*)", - "action": "move", - "destination": "Feeds/Mastodon/Test/tech" + "mailbox": "Feeds/Mastodon", + "filters": [ + { + "name": "tech", + "field": "Subject", + "words": [ + "nvidia", + "ansible", + "ubuntu", + "#blender", + "#technology", + "msdos", + "dosbox", + "#python", + "devops", + "forgejo", + "#smartphone", + "#android", + "github", + "gitlab", + "#programming", + "TechCrunch", + "researchbuzz", + "ripencc", + "FCAI", + "TechDesk", + "selfhost", + "#ai", + "#deepfake", + "#chatgpt", + "#tietotekniikka", + "#videogames", + "#software", + "#retrogaming", + "#web", + "#gaming", + "#pcgaming", + "#gamedev", + "#fairphone", + "#ebike", + "#windows", + "#speedrun", + "#cloud", + "#euhosted", + "#python", + "#steamdeck", + "#indiegame", + "#webdev", + "#rustlang", + "#valve", + "#intel", + "#dns", + "#digitaljustice", + "#tv", + "#internetarchive" + ], + "action": "move", + "destination": "Feeds/Mastodon/Tech" + }, + { + "name": "tampere", + "field": "Subject", + "words": [ + "tampere", + "Tampere", + "pirkanmaa", + "Pirkanmaa", + "kaukajärv", + "Kaukajärv", + "#pirkkala" + ], + "action": "move", + "destination": "Feeds/Mastodon/Tampere" + }, + { + "name": "tampere_body", + "field": "Body", + "regexp": "^.*\"content\": \"(?=.*tampere.*|.*Tampere.*|.*pirkanmaa.*|.*Pirkanmaa.*|.*kaukajärv.*|.*Kaukajärv.*|.*#pirkkala)", + "action": "move", + "destination": "Feeds/Mastodon/Tampere" + }, + { + "name": "infosec", + "field": "Subject", + "words": [ + "#infosec", + "#hacking", + "#defcon", + "thehackernews", + "#cybersecurity", + "#opsec", + "#surveillance", + "#encryption", + "#security", + "#spyware", + "#ninjalab", + "#yubikey", + "#yubico" + ], + "action": "move", + "destination": "Feeds/Mastodon/InfoSec" + }, + { + "name": "infosec_body", + "field": "Body", + "regexp": "^.*\"content\": \"(?=.*infosec.*|.*InfoSec .*|.*hacking.*|.*Hacking .*|.*defcon.*|.*DefCon .*|.*thehackernews.*|.*cybersecurity .*|.*Cybersecurity .*|.*opsec .*|.*OpSec .*|.*#surveillance.*|.*#encryption.*|.*#security.*|.*#spyware .*|.*#ninjalab.*|.*#yubikey.*|.*#yubico.*)", + "action": "move", + "destination": "Feeds/Mastodon/InfoSec" + }, + { + "name": "movies", + "field": "Subject", + "words": [ + "#movie", + "#film", + "#trailer", + "#moviesuggestion", + "rottentomatoes", + "RottenTomatoes", + "#pelicula", + "#cine" + ], + "action": "move", + "destination": "Feeds/Mastodon/Movies" + }, + { + "name": "almeria", + "field": "Subject", + "words": [ + "#almeria", + "#almería", + "#cabodegata", + "#costadelaluz" + ], + "action": "move", + "destination": "Feeds/Mastodon/Almería" + }, + { + "name": "almeria", + "field": "Body", + "regexp": "^.*\"content\": \"(?=.*almeria.*|.*Almeria .*|.*Almería.*|.*almería .*|.*cabo de gata.*|.*Cabo De Gata .*|.*Cabo de Gata.*|.*Costa de la Luz .*|.*Costa De La Luz .*|.*costa de la luz .*)", + "action": "move", + "destination": "Feeds/Mastodon/Almería" + }, + { + "name": "Jobs", + "field": "Subject", + "words": [ + "#gethired", + "#joboffer", + "#hiring", + "#flossjobs", + "#layoffs", + "#lookingforwork", + "#osjobjub", + "#trabajo", + "#laboral", + "#jobhunt", + "#fedihire", + "#fedijobs", + "#getfedihired", + "#trabajoremoto", + "#remotework", + "#wfh", + "#opensourcejobs" + ], + "action": "move", + "destination": "Feeds/Mastodon/Jobs" + }, + { + "name": "OpenSource", + "field": "Subject", + "words": [ + "#pinephone", + "#pinetime", + "#pinetab", + "#jellyfin", + "linux", + "foss", + "opensource", + "freesoftware", + "LibreOffice", + "libreoffice", + "#sailfishos", + "#nextcloud", + "#righttorepair", + "#phosh", + "#debian", + "#openstreetmap", + "#mobian", + "#gnome", + "#kde", + "#xfce", + "#wayland", + "#waydroid", + "#steam", + "#godot", + "#inkscape", + "#homeassistant", + "#openhome" + ], + "action": "move", + "destination": "Feeds/Mastodon/linux" + }, + { + "name": "Cats", + "field": "Subject", + "words": [ + "#caturday", + "#catsofmastodon", + "#martesdegatos" + ], + "action": "move", + "destination": "Feeds/Mastodon/Caturday" + }, + { + "name": "Dogs", + "field": "Subject", + "words": [ + "#dogsofmastodon" + ], + "action": "move", + "destination": "Feeds/Mastodon/DogsOfMastodon" + }, + { + "name": "Fediverse", + "field": "Subject", + "words": [ + "#fediverse", + "#mastodon", + "peertube", + "pixelfeed", + "activitypub", + "ActivityPub", + "#mastodonadmin" + ], + "action": "move", + "destination": "Feeds/Mastodon/Fediverse" + }, + { + "name": "Humor", + "field": "Subject", + "words": [ + "#humor", + "#funny", + "#meme" + ], + "action": "move", + "destination": "Feeds/Mastodon/Humor" + }, + { + "name": "New", + "field": "Subject", + "words": [ + "#news", + "#noticias", + "ElSaltoDiario", + "earthquake", + "zoom_earth" + ], + "action": "move", + "destination": "Feeds/Mastodon/News" + }, + { + "name": "Jazz", + "field": "Subject", + "words": [ + "jazz", + "Jazz" + ], + "action": "move", + "destination": "Feeds/Mastodon/News" + }, + { + "name": "USPolitics", + "field": "Subject", + "words": [ + "#uspol", + "#kamalaharris", + "#trump", + "#biden", + "#gop", + "#texas" + ], + "action": "move", + "destination": "Feeds/Mastodon/USPolitics" + }, + { + "name": "España", + "field": "Subject", + "words": [ + "#spanish", + "#spain", + "#españa", + "#espanja", + "#malaga", + "#málaga", + "#madrid", + "#andalucia", + "#sevilla", + "#barcelona", + "#mallorca", + "#ibiza", + "#canaryislands", + "#balear", + "España" + ], + "action": "move", + "destination": "Feeds/Mastodon/España" + }, + { + "name": "Birds", + "field": "Subject", + "words": [ + "#birds" + ], + "action": "move", + "destination": "Feeds/Mastodon/Birds" + }, + { + "name": "Finland", + "field": "Subject", + "words": [ + "#finland", + "#suomi" + ], + "action": "move", + "destination": "Feeds/Mastodon/Finland" + }, + { + "name": "Autism", + "field": "Subject", + "words": [ + "#actuallyautistic", + "#autism", + "#autistic" + ], + "action": "move", + "destination": "Feeds/Mastodon/Autism" + }, + { + "name": "Art", + "field": "Subject", + "words": [ + "#art", + "#artist", + "#streetart", + "#mural", + "#photography", + "#fotografie", + "#photoart", + "#urbanart" + ], + "action": "move", + "destination": "Feeds/Mastodon/Art" + } + ] } ]