Add alternative method by selecting headers to match
This commit is contained in:
parent
545dab0b59
commit
41611197e8
4 changed files with 44 additions and 10 deletions
|
@ -7,10 +7,10 @@ Homepage = "https://susurrando.com"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "remove_duplicate_imap_messages"
|
name = "remove_duplicate_imap_messages"
|
||||||
version = "0.0.1"
|
version = "0.0.2"
|
||||||
description = "Given an IMAP folder look for duplicate messages and optionally delete them"
|
description = "Given an IMAP folder look for duplicate messages and optionally delete them"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
authors = [{ name = "Antonio J. Delgado", email = "" }]
|
authors = [{ name = "Antonio J. Delgado", email = "ad@susurrando.com" }]
|
||||||
license = { file = "LICENSE" }
|
license = { file = "LICENSE" }
|
||||||
classifiers = [
|
classifiers = [
|
||||||
"License :: OSI Approved :: GPLv3 License",
|
"License :: OSI Approved :: GPLv3 License",
|
||||||
|
|
|
@ -37,6 +37,11 @@ class RemoveDuplicateImapMessages:
|
||||||
self._init_log()
|
self._init_log()
|
||||||
signal(SIGINT, self._signal_handler)
|
signal(SIGINT, self._signal_handler)
|
||||||
self.messages_hashes = []
|
self.messages_hashes = []
|
||||||
|
if self.config['method'] == 'headers' and 'header' in self.config and len(self.config['header']) == 0:
|
||||||
|
self._log.error(
|
||||||
|
"Error in parameters. If you specify the 'headers' method, you must indicate at least one --header to check"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
self.duplicates_count = 0
|
self.duplicates_count = 0
|
||||||
if 'mailbox' not in self.config:
|
if 'mailbox' not in self.config:
|
||||||
self.config['mailbox'] = []
|
self.config['mailbox'] = []
|
||||||
|
@ -82,6 +87,19 @@ class RemoveDuplicateImapMessages:
|
||||||
self.imap.expunge()
|
self.imap.expunge()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def _get_header(self, message, header):
|
||||||
|
decoded_header = email.header.decode_header(message.get(header, ""))
|
||||||
|
if isinstance(decoded_header[0][0], str):
|
||||||
|
header_value = decoded_header[0][0]
|
||||||
|
else:
|
||||||
|
header_value = decoded_header[0][0].decode()
|
||||||
|
self._log.debug(
|
||||||
|
"Field '%s' is '%s'",
|
||||||
|
header,
|
||||||
|
header_value
|
||||||
|
)
|
||||||
|
return header_value
|
||||||
|
|
||||||
def _process_message(self, message_id, data):
|
def _process_message(self, message_id, data):
|
||||||
'''Process a mail message'''
|
'''Process a mail message'''
|
||||||
if isinstance(data[1], int):
|
if isinstance(data[1], int):
|
||||||
|
@ -94,16 +112,16 @@ class RemoveDuplicateImapMessages:
|
||||||
part = data[1].decode('utf-8')
|
part = data[1].decode('utf-8')
|
||||||
message = email.message_from_string(part)
|
message = email.message_from_string(part)
|
||||||
hash_obj = hashlib.sha256()
|
hash_obj = hashlib.sha256()
|
||||||
hash_obj.update(message.as_bytes())
|
if self.config['method'] == 'headers':
|
||||||
|
for header in self.config['header']:
|
||||||
|
hash_obj.update(self._get_header(message, header).encode('UTF-8'))
|
||||||
|
else:
|
||||||
|
hash_obj.update(message.as_bytes())
|
||||||
hash_obj.digest()
|
hash_obj.digest()
|
||||||
msg_hash = hash_obj.hexdigest()
|
msg_hash = hash_obj.hexdigest()
|
||||||
self._log.debug("Hash '%s'", msg_hash)
|
self._log.debug("Hash '%s'", msg_hash)
|
||||||
if msg_hash in self.messages_hashes:
|
if msg_hash in self.messages_hashes:
|
||||||
decoded_subject = email.header.decode_header(message.get("Subject", ""))
|
msg_subject = self._get_header(message, 'Subject')
|
||||||
if isinstance(decoded_subject[0][0], str):
|
|
||||||
msg_subject = decoded_subject[0][0]
|
|
||||||
else:
|
|
||||||
msg_subject = decoded_subject[0][0].decode()
|
|
||||||
self._log.info(
|
self._log.info(
|
||||||
"Message with subject '%s' is duplicate (hash check) of another",
|
"Message with subject '%s' is duplicate (hash check) of another",
|
||||||
msg_subject
|
msg_subject
|
||||||
|
@ -241,6 +259,22 @@ class RemoveDuplicateImapMessages:
|
||||||
multiple=True,
|
multiple=True,
|
||||||
help='IMAP mailboxes to check. Will compare messages between all mailboxes. Default: INBOX'
|
help='IMAP mailboxes to check. Will compare messages between all mailboxes. Default: INBOX'
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
'--method',
|
||||||
|
'-m',
|
||||||
|
default='full',
|
||||||
|
type=click.Choice(
|
||||||
|
["full", "headers"],
|
||||||
|
case_sensitive=False,
|
||||||
|
),
|
||||||
|
help="Method to decide messages are duplicated. The 'full' method will check that the whole message and headers are the same. The 'headers' method will check that selected headers (with --header) are the same."
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
'--header', '-f',
|
||||||
|
multiple=True,
|
||||||
|
help="Fields to compare when method is 'headers'"
|
||||||
|
)
|
||||||
|
|
||||||
@click_config_file.configuration_option()
|
@click_config_file.configuration_option()
|
||||||
def __main__(**kwargs):
|
def __main__(**kwargs):
|
||||||
return RemoveDuplicateImapMessages(**kwargs)
|
return RemoveDuplicateImapMessages(**kwargs)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[metadata]
|
[metadata]
|
||||||
name = remove_duplicate_imap_messages
|
name = remove_duplicate_imap_messages
|
||||||
version = 0.0.1
|
version = 0.0.2
|
||||||
|
|
||||||
[options]
|
[options]
|
||||||
packages = remove_duplicate_imap_messages
|
packages = remove_duplicate_imap_messages
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -13,7 +13,7 @@ setuptools.setup(
|
||||||
author="Antonio J. Delgado",
|
author="Antonio J. Delgado",
|
||||||
version=config['metadata']['version'],
|
version=config['metadata']['version'],
|
||||||
name=config['metadata']['name'],
|
name=config['metadata']['name'],
|
||||||
author_email="",
|
author_email="ad@susurrando.com",
|
||||||
url="https://susurrando.com",
|
url="https://susurrando.com",
|
||||||
description="Given an IMAP folder look for duplicate messages and optionally delete them",
|
description="Given an IMAP folder look for duplicate messages and optionally delete them",
|
||||||
long_description="README.md",
|
long_description="README.md",
|
||||||
|
|
Loading…
Reference in a new issue