diff --git a/pyproject.toml b/pyproject.toml index a032185..30ff2de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,10 +7,10 @@ Homepage = "https://susurrando.com" [project] name = "remove_duplicate_imap_messages" -version = "0.0.1" +version = "0.0.2" description = "Given an IMAP folder look for duplicate messages and optionally delete them" readme = "README.md" -authors = [{ name = "Antonio J. Delgado", email = "" }] +authors = [{ name = "Antonio J. Delgado", email = "ad@susurrando.com" }] license = { file = "LICENSE" } classifiers = [ "License :: OSI Approved :: GPLv3 License", diff --git a/remove_duplicate_imap_messages/remove_duplicate_imap_messages.py b/remove_duplicate_imap_messages/remove_duplicate_imap_messages.py index 26acfc7..8c535e9 100755 --- a/remove_duplicate_imap_messages/remove_duplicate_imap_messages.py +++ b/remove_duplicate_imap_messages/remove_duplicate_imap_messages.py @@ -37,6 +37,11 @@ class RemoveDuplicateImapMessages: self._init_log() signal(SIGINT, self._signal_handler) self.messages_hashes = [] + if self.config['method'] == 'headers' and 'header' in self.config and len(self.config['header']) == 0: + self._log.error( + "Error in parameters. If you specify the 'headers' method, you must indicate at least one --header to check" + ) + sys.exit(1) self.duplicates_count = 0 if 'mailbox' not in self.config: self.config['mailbox'] = [] @@ -82,6 +87,19 @@ class RemoveDuplicateImapMessages: self.imap.expunge() return True + def _get_header(self, message, header): + decoded_header = email.header.decode_header(message.get(header, "")) + if isinstance(decoded_header[0][0], str): + header_value = decoded_header[0][0] + else: + header_value = decoded_header[0][0].decode() + self._log.debug( + "Field '%s' is '%s'", + header, + header_value + ) + return header_value + def _process_message(self, message_id, data): '''Process a mail message''' if isinstance(data[1], int): @@ -94,16 +112,16 @@ class RemoveDuplicateImapMessages: part = data[1].decode('utf-8') message = email.message_from_string(part) hash_obj = hashlib.sha256() - hash_obj.update(message.as_bytes()) + if self.config['method'] == 'headers': + for header in self.config['header']: + hash_obj.update(self._get_header(message, header).encode('UTF-8')) + else: + hash_obj.update(message.as_bytes()) hash_obj.digest() msg_hash = hash_obj.hexdigest() self._log.debug("Hash '%s'", msg_hash) if msg_hash in self.messages_hashes: - decoded_subject = email.header.decode_header(message.get("Subject", "")) - if isinstance(decoded_subject[0][0], str): - msg_subject = decoded_subject[0][0] - else: - msg_subject = decoded_subject[0][0].decode() + msg_subject = self._get_header(message, 'Subject') self._log.info( "Message with subject '%s' is duplicate (hash check) of another", msg_subject @@ -241,6 +259,22 @@ class RemoveDuplicateImapMessages: multiple=True, help='IMAP mailboxes to check. Will compare messages between all mailboxes. Default: INBOX' ) +@click.option( + '--method', + '-m', + default='full', + type=click.Choice( + ["full", "headers"], + case_sensitive=False, + ), + help="Method to decide messages are duplicated. The 'full' method will check that the whole message and headers are the same. The 'headers' method will check that selected headers (with --header) are the same." +) +@click.option( + '--header', '-f', + multiple=True, + help="Fields to compare when method is 'headers'" +) + @click_config_file.configuration_option() def __main__(**kwargs): return RemoveDuplicateImapMessages(**kwargs) diff --git a/setup.cfg b/setup.cfg index 4c80b69..533dbf2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = remove_duplicate_imap_messages -version = 0.0.1 +version = 0.0.2 [options] packages = remove_duplicate_imap_messages diff --git a/setup.py b/setup.py index a6c9aed..c900d22 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setuptools.setup( author="Antonio J. Delgado", version=config['metadata']['version'], name=config['metadata']['name'], - author_email="", + author_email="ad@susurrando.com", url="https://susurrando.com", description="Given an IMAP folder look for duplicate messages and optionally delete them", long_description="README.md",