Add alternative method by selecting headers to match

This commit is contained in:
Antonio J. Delgado 2024-09-22 12:47:31 +03:00
parent 545dab0b59
commit 41611197e8
4 changed files with 44 additions and 10 deletions

View file

@ -7,10 +7,10 @@ Homepage = "https://susurrando.com"
[project]
name = "remove_duplicate_imap_messages"
version = "0.0.1"
version = "0.0.2"
description = "Given an IMAP folder look for duplicate messages and optionally delete them"
readme = "README.md"
authors = [{ name = "Antonio J. Delgado", email = "" }]
authors = [{ name = "Antonio J. Delgado", email = "ad@susurrando.com" }]
license = { file = "LICENSE" }
classifiers = [
"License :: OSI Approved :: GPLv3 License",

View file

@ -37,6 +37,11 @@ class RemoveDuplicateImapMessages:
self._init_log()
signal(SIGINT, self._signal_handler)
self.messages_hashes = []
if self.config['method'] == 'headers' and 'header' in self.config and len(self.config['header']) == 0:
self._log.error(
"Error in parameters. If you specify the 'headers' method, you must indicate at least one --header to check"
)
sys.exit(1)
self.duplicates_count = 0
if 'mailbox' not in self.config:
self.config['mailbox'] = []
@ -82,6 +87,19 @@ class RemoveDuplicateImapMessages:
self.imap.expunge()
return True
def _get_header(self, message, header):
decoded_header = email.header.decode_header(message.get(header, ""))
if isinstance(decoded_header[0][0], str):
header_value = decoded_header[0][0]
else:
header_value = decoded_header[0][0].decode()
self._log.debug(
"Field '%s' is '%s'",
header,
header_value
)
return header_value
def _process_message(self, message_id, data):
'''Process a mail message'''
if isinstance(data[1], int):
@ -94,16 +112,16 @@ class RemoveDuplicateImapMessages:
part = data[1].decode('utf-8')
message = email.message_from_string(part)
hash_obj = hashlib.sha256()
hash_obj.update(message.as_bytes())
if self.config['method'] == 'headers':
for header in self.config['header']:
hash_obj.update(self._get_header(message, header).encode('UTF-8'))
else:
hash_obj.update(message.as_bytes())
hash_obj.digest()
msg_hash = hash_obj.hexdigest()
self._log.debug("Hash '%s'", msg_hash)
if msg_hash in self.messages_hashes:
decoded_subject = email.header.decode_header(message.get("Subject", ""))
if isinstance(decoded_subject[0][0], str):
msg_subject = decoded_subject[0][0]
else:
msg_subject = decoded_subject[0][0].decode()
msg_subject = self._get_header(message, 'Subject')
self._log.info(
"Message with subject '%s' is duplicate (hash check) of another",
msg_subject
@ -241,6 +259,22 @@ class RemoveDuplicateImapMessages:
multiple=True,
help='IMAP mailboxes to check. Will compare messages between all mailboxes. Default: INBOX'
)
@click.option(
'--method',
'-m',
default='full',
type=click.Choice(
["full", "headers"],
case_sensitive=False,
),
help="Method to decide messages are duplicated. The 'full' method will check that the whole message and headers are the same. The 'headers' method will check that selected headers (with --header) are the same."
)
@click.option(
'--header', '-f',
multiple=True,
help="Fields to compare when method is 'headers'"
)
@click_config_file.configuration_option()
def __main__(**kwargs):
return RemoveDuplicateImapMessages(**kwargs)

View file

@ -1,6 +1,6 @@
[metadata]
name = remove_duplicate_imap_messages
version = 0.0.1
version = 0.0.2
[options]
packages = remove_duplicate_imap_messages

View file

@ -13,7 +13,7 @@ setuptools.setup(
author="Antonio J. Delgado",
version=config['metadata']['version'],
name=config['metadata']['name'],
author_email="",
author_email="ad@susurrando.com",
url="https://susurrando.com",
description="Given an IMAP folder look for duplicate messages and optionally delete them",
long_description="README.md",