From f64229786832706904a63dc0f0829e00c9d87cb8 Mon Sep 17 00:00:00 2001 From: "Antonio J. Delgado" Date: Tue, 3 Sep 2024 23:18:45 +0300 Subject: [PATCH] Add initial version --- pyproject.toml | 4 +- .../remove_duplicate_imap_messages.py | 165 +++++++++++++++++- setup.py | 2 +- 3 files changed, 162 insertions(+), 9 deletions(-) mode change 100644 => 100755 remove_duplicate_imap_messages/remove_duplicate_imap_messages.py diff --git a/pyproject.toml b/pyproject.toml index 33436a1..a032185 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" [project.urls] -Homepage = "" +Homepage = "https://susurrando.com" [project] name = "remove_duplicate_imap_messages" @@ -22,4 +22,4 @@ dependencies = [ "click", "click_config_file", ] -requires-python = ">=3" \ No newline at end of file +requires-python = ">=3" diff --git a/remove_duplicate_imap_messages/remove_duplicate_imap_messages.py b/remove_duplicate_imap_messages/remove_duplicate_imap_messages.py old mode 100644 new mode 100755 index ced5952..d44d9e3 --- a/remove_duplicate_imap_messages/remove_duplicate_imap_messages.py +++ b/remove_duplicate_imap_messages/remove_duplicate_imap_messages.py @@ -3,17 +3,22 @@ # # This script is licensed under GNU GPL version 2.0 or above # (c) 2024 Antonio J. Delgado -"""Given an IMAP folder look for duplicate messages and optionally delete them""" +"""Given an IMAP mailbox look for duplicate messages and optionally delete them""" import sys import os import logging from logging.handlers import SysLogHandler +import hashlib +import imaplib +import email +from signal import signal, SIGINT import click import click_config_file class RemoveDuplicateImapMessages: + """Given an IMAP mailbox look for duplicate messages and optionally delete them""" def __init__(self, **kwargs): self.config = kwargs @@ -30,6 +35,133 @@ class RemoveDuplicateImapMessages: 'remove_duplicate_imap_messages.log' ) self._init_log() + signal(SIGINT, self._signal_handler) + self.messages_hashes = [] + self.duplicates_count = 0 + if 'mailbox' not in self.config: + self.config['mailbox'] = [] + if len(self.config['mailbox'])==0: + self.config['mailbox'].append('INBOX') + self.connect_imap() + for mailbox in self.config['mailbox']: + self._process_mailbox(mailbox) + + def _process_mailbox(self, mailbox): + '''Process a mailbox''' + self.imap.select(mailbox=mailbox, readonly=False) + self._log.debug("Searching for all messages in mailbox '%s'...", mailbox) + typ, data = self.imap.search('UTF-8', 'ALL') + self._log.debug('Response: %s', typ) + if typ != 'OK': + self._log.error('Error, server replied: %s', data) + return False + all_msgs = data[0].split() + total_msgs = len(all_msgs) + self._log.debug("Processing %s messages in mailbox '%s'...", total_msgs , mailbox) + msg_count = 0 + + for message_id in all_msgs: + msg_count += 1 + self._log.debug( + "Fetching message %s of %s (%s duplicates found)...", + msg_count, + total_msgs, + self.duplicates_count + ) + typ, data = self.imap.fetch(message_id, '(RFC822)') + self._log.debug('Response: %s', typ) + if typ != 'OK': + self._log.error('Error, server replied: %s', data) + return False + self._process_message(message_id, data[0]) + return True + + def _process_message(self, message_id, data): + '''Process a mail message''' + # print(data) + if isinstance(data[1], int): + self._log.warning( + "Response part is integer %s in data '%s'. Try again.", + data[1], + data[0] + ) + return False + part = data[1].decode('utf-8') + message = email.message_from_string(part) + # print(message) + hash_obj = hashlib.sha256() + hash_obj.update(message.as_bytes()) + hash_obj.digest() + msg_hash = hash_obj.hexdigest() + self._log.debug("Hash '%s'", msg_hash) + if msg_hash in self.messages_hashes: + decoded_subject = email.header.decode_header(message.get("Subject", "")) + if isinstance(decoded_subject[0][0], str): + msg_subject = decoded_subject[0][0] + else: + msg_subject = decoded_subject[0][0].decode() + self._log.info( + "Message with subject '%s' is duplicate (hash check) of another", + msg_subject + ) + if self.config['dummy']: + self._log.info("Duplicate message won't be delete (dummy run).") + else: + self._log.info("Deleting message '%s'", message_id) + result = self.imap.store(message_id, '+FLAGS', '\\Deleted') + self._log.info("Result: %s", result) + self.imap.expunge() + self.duplicates_count += 1 + else: + self.messages_hashes.append(msg_hash) + return True + + def connect_imap(self): + '''Create connection object to the IMAP server''' + self._log.debug( + 'Connecting to server %s:%s...', + self.config['imap_server'], + self.config['imap_port'] + ) + if self.config['ssl']: + try: + self.imap = imaplib.IMAP4_SSL(self.config['imap_server'], self.config['imap_port']) + except Exception as error: + self._log.error( + "Error connecting securely to IMAP server '%s'. %s", + self.config['imap_server'], + error + ) + sys.exit(1) + else: + try: + self.imap = imaplib.IMAP4(self.config['imap_server'], self.config['imap_port']) + except Exception as error: + self._log.error( + "Error connecting to IMAP server '%s'. %s", + self.config['imap_server'], + error + ) + sys.exit(2) + try: + self._log.debug('Authenticating as user %s...', self.config['imap_user']) + self.imap.login(self.config['imap_user'], self.config['imap_password']) + except Exception as error: + self._log.error("Error while login as '%s'. %s'", self.config['imap_user'], error) + self.imap.close() + sys.exit(3) + + + def _signal_handler(self, signal_received, frame): + # Handle any cleanup here + self._log.info( + 'SIGINT or CTRL-C (%s %s) detected. Exiting gracefully', + signal_received, + frame + ) + self.imap.close() + self.imap.logout() + sys.exit(0) def _init_log(self): ''' Initialize log object ''' @@ -49,11 +181,11 @@ class RemoveDuplicateImapMessages: if 'log_file' in self.config: log_file = self.config['log_file'] else: - home_folder = os.environ.get( + home_mailbox = os.environ.get( 'HOME', os.environ.get('USERPROFILE', '') ) - log_folder = os.path.join(home_folder, "log") - log_file = os.path.join(log_folder, "remove_duplicate_imap_messages.log") + log_mailbox = os.path.join(home_mailbox, "log") + log_file = os.path.join(log_mailbox, "remove_duplicate_imap_messages.log") if not os.path.exists(os.path.dirname(log_file)): os.mkdir(os.path.dirname(log_file)) @@ -83,8 +215,29 @@ class RemoveDuplicateImapMessages: help='Set the debug level for the standard output.' ) @click.option('--log-file', '-l', help="File to store all debug messages.") -# @click.option("--dummy","-n", is_flag=True, -# help="Don't do anything, just show what would be done.") +@click.option( + "--dummy","-n", is_flag=True, + help="Don't do anything, just show what would be done." +) +@click.option('--imap-server', '-s', default='localhost', help='IMAP server') +@click.option( + '--imap-port', '-p', default=993, + type=click.IntRange(1, 65535), help='IMAP server port' +) +@click.option( + '--imap-user', '-u', required=True, + help='User name to use for the connection to the IMAP server' +) +@click.option( + '--imap-password', '-P', required=True, + help='Password to connect to the IMAP server. Warning! Use a configuration file to avoid revelaing your passwords.' +) +@click.option('--ssl', '-S', default=True, help='Whether to use a secure connection or not.') +@click.option( + '--mailbox', '-m', + multiple=True, + help='IMAP mailboxes to check. Will compare messages between all mailboxes. Default: INBOX' +) @click_config_file.configuration_option() def __main__(**kwargs): return RemoveDuplicateImapMessages(**kwargs) diff --git a/setup.py b/setup.py index 54e7493..a6c9aed 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setuptools.setup( version=config['metadata']['version'], name=config['metadata']['name'], author_email="", - url="", + url="https://susurrando.com", description="Given an IMAP folder look for duplicate messages and optionally delete them", long_description="README.md", long_description_content_type="text/markdown",