Add alternative method by selecting headers to match
This commit is contained in:
parent
545dab0b59
commit
41611197e8
4 changed files with 44 additions and 10 deletions
|
@ -7,10 +7,10 @@ Homepage = "https://susurrando.com"
|
|||
|
||||
[project]
|
||||
name = "remove_duplicate_imap_messages"
|
||||
version = "0.0.1"
|
||||
version = "0.0.2"
|
||||
description = "Given an IMAP folder look for duplicate messages and optionally delete them"
|
||||
readme = "README.md"
|
||||
authors = [{ name = "Antonio J. Delgado", email = "" }]
|
||||
authors = [{ name = "Antonio J. Delgado", email = "ad@susurrando.com" }]
|
||||
license = { file = "LICENSE" }
|
||||
classifiers = [
|
||||
"License :: OSI Approved :: GPLv3 License",
|
||||
|
|
|
@ -37,6 +37,11 @@ class RemoveDuplicateImapMessages:
|
|||
self._init_log()
|
||||
signal(SIGINT, self._signal_handler)
|
||||
self.messages_hashes = []
|
||||
if self.config['method'] == 'headers' and 'header' in self.config and len(self.config['header']) == 0:
|
||||
self._log.error(
|
||||
"Error in parameters. If you specify the 'headers' method, you must indicate at least one --header to check"
|
||||
)
|
||||
sys.exit(1)
|
||||
self.duplicates_count = 0
|
||||
if 'mailbox' not in self.config:
|
||||
self.config['mailbox'] = []
|
||||
|
@ -82,6 +87,19 @@ class RemoveDuplicateImapMessages:
|
|||
self.imap.expunge()
|
||||
return True
|
||||
|
||||
def _get_header(self, message, header):
|
||||
decoded_header = email.header.decode_header(message.get(header, ""))
|
||||
if isinstance(decoded_header[0][0], str):
|
||||
header_value = decoded_header[0][0]
|
||||
else:
|
||||
header_value = decoded_header[0][0].decode()
|
||||
self._log.debug(
|
||||
"Field '%s' is '%s'",
|
||||
header,
|
||||
header_value
|
||||
)
|
||||
return header_value
|
||||
|
||||
def _process_message(self, message_id, data):
|
||||
'''Process a mail message'''
|
||||
if isinstance(data[1], int):
|
||||
|
@ -94,16 +112,16 @@ class RemoveDuplicateImapMessages:
|
|||
part = data[1].decode('utf-8')
|
||||
message = email.message_from_string(part)
|
||||
hash_obj = hashlib.sha256()
|
||||
hash_obj.update(message.as_bytes())
|
||||
if self.config['method'] == 'headers':
|
||||
for header in self.config['header']:
|
||||
hash_obj.update(self._get_header(message, header).encode('UTF-8'))
|
||||
else:
|
||||
hash_obj.update(message.as_bytes())
|
||||
hash_obj.digest()
|
||||
msg_hash = hash_obj.hexdigest()
|
||||
self._log.debug("Hash '%s'", msg_hash)
|
||||
if msg_hash in self.messages_hashes:
|
||||
decoded_subject = email.header.decode_header(message.get("Subject", ""))
|
||||
if isinstance(decoded_subject[0][0], str):
|
||||
msg_subject = decoded_subject[0][0]
|
||||
else:
|
||||
msg_subject = decoded_subject[0][0].decode()
|
||||
msg_subject = self._get_header(message, 'Subject')
|
||||
self._log.info(
|
||||
"Message with subject '%s' is duplicate (hash check) of another",
|
||||
msg_subject
|
||||
|
@ -241,6 +259,22 @@ class RemoveDuplicateImapMessages:
|
|||
multiple=True,
|
||||
help='IMAP mailboxes to check. Will compare messages between all mailboxes. Default: INBOX'
|
||||
)
|
||||
@click.option(
|
||||
'--method',
|
||||
'-m',
|
||||
default='full',
|
||||
type=click.Choice(
|
||||
["full", "headers"],
|
||||
case_sensitive=False,
|
||||
),
|
||||
help="Method to decide messages are duplicated. The 'full' method will check that the whole message and headers are the same. The 'headers' method will check that selected headers (with --header) are the same."
|
||||
)
|
||||
@click.option(
|
||||
'--header', '-f',
|
||||
multiple=True,
|
||||
help="Fields to compare when method is 'headers'"
|
||||
)
|
||||
|
||||
@click_config_file.configuration_option()
|
||||
def __main__(**kwargs):
|
||||
return RemoveDuplicateImapMessages(**kwargs)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[metadata]
|
||||
name = remove_duplicate_imap_messages
|
||||
version = 0.0.1
|
||||
version = 0.0.2
|
||||
|
||||
[options]
|
||||
packages = remove_duplicate_imap_messages
|
||||
|
|
2
setup.py
2
setup.py
|
@ -13,7 +13,7 @@ setuptools.setup(
|
|||
author="Antonio J. Delgado",
|
||||
version=config['metadata']['version'],
|
||||
name=config['metadata']['name'],
|
||||
author_email="",
|
||||
author_email="ad@susurrando.com",
|
||||
url="https://susurrando.com",
|
||||
description="Given an IMAP folder look for duplicate messages and optionally delete them",
|
||||
long_description="README.md",
|
||||
|
|
Loading…
Reference in a new issue