Add initial version
This commit is contained in:
parent
1393409ff7
commit
f642297868
3 changed files with 162 additions and 9 deletions
|
@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Homepage = ""
|
Homepage = "https://susurrando.com"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "remove_duplicate_imap_messages"
|
name = "remove_duplicate_imap_messages"
|
||||||
|
@ -22,4 +22,4 @@ dependencies = [
|
||||||
"click",
|
"click",
|
||||||
"click_config_file",
|
"click_config_file",
|
||||||
]
|
]
|
||||||
requires-python = ">=3"
|
requires-python = ">=3"
|
||||||
|
|
165
remove_duplicate_imap_messages/remove_duplicate_imap_messages.py
Normal file → Executable file
165
remove_duplicate_imap_messages/remove_duplicate_imap_messages.py
Normal file → Executable file
|
@ -3,17 +3,22 @@
|
||||||
#
|
#
|
||||||
# This script is licensed under GNU GPL version 2.0 or above
|
# This script is licensed under GNU GPL version 2.0 or above
|
||||||
# (c) 2024 Antonio J. Delgado
|
# (c) 2024 Antonio J. Delgado
|
||||||
"""Given an IMAP folder look for duplicate messages and optionally delete them"""
|
"""Given an IMAP mailbox look for duplicate messages and optionally delete them"""
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
from logging.handlers import SysLogHandler
|
from logging.handlers import SysLogHandler
|
||||||
|
import hashlib
|
||||||
|
import imaplib
|
||||||
|
import email
|
||||||
|
from signal import signal, SIGINT
|
||||||
import click
|
import click
|
||||||
import click_config_file
|
import click_config_file
|
||||||
|
|
||||||
|
|
||||||
class RemoveDuplicateImapMessages:
|
class RemoveDuplicateImapMessages:
|
||||||
|
"""Given an IMAP mailbox look for duplicate messages and optionally delete them"""
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
self.config = kwargs
|
self.config = kwargs
|
||||||
|
@ -30,6 +35,133 @@ class RemoveDuplicateImapMessages:
|
||||||
'remove_duplicate_imap_messages.log'
|
'remove_duplicate_imap_messages.log'
|
||||||
)
|
)
|
||||||
self._init_log()
|
self._init_log()
|
||||||
|
signal(SIGINT, self._signal_handler)
|
||||||
|
self.messages_hashes = []
|
||||||
|
self.duplicates_count = 0
|
||||||
|
if 'mailbox' not in self.config:
|
||||||
|
self.config['mailbox'] = []
|
||||||
|
if len(self.config['mailbox'])==0:
|
||||||
|
self.config['mailbox'].append('INBOX')
|
||||||
|
self.connect_imap()
|
||||||
|
for mailbox in self.config['mailbox']:
|
||||||
|
self._process_mailbox(mailbox)
|
||||||
|
|
||||||
|
def _process_mailbox(self, mailbox):
|
||||||
|
'''Process a mailbox'''
|
||||||
|
self.imap.select(mailbox=mailbox, readonly=False)
|
||||||
|
self._log.debug("Searching for all messages in mailbox '%s'...", mailbox)
|
||||||
|
typ, data = self.imap.search('UTF-8', 'ALL')
|
||||||
|
self._log.debug('Response: %s', typ)
|
||||||
|
if typ != 'OK':
|
||||||
|
self._log.error('Error, server replied: %s', data)
|
||||||
|
return False
|
||||||
|
all_msgs = data[0].split()
|
||||||
|
total_msgs = len(all_msgs)
|
||||||
|
self._log.debug("Processing %s messages in mailbox '%s'...", total_msgs , mailbox)
|
||||||
|
msg_count = 0
|
||||||
|
|
||||||
|
for message_id in all_msgs:
|
||||||
|
msg_count += 1
|
||||||
|
self._log.debug(
|
||||||
|
"Fetching message %s of %s (%s duplicates found)...",
|
||||||
|
msg_count,
|
||||||
|
total_msgs,
|
||||||
|
self.duplicates_count
|
||||||
|
)
|
||||||
|
typ, data = self.imap.fetch(message_id, '(RFC822)')
|
||||||
|
self._log.debug('Response: %s', typ)
|
||||||
|
if typ != 'OK':
|
||||||
|
self._log.error('Error, server replied: %s', data)
|
||||||
|
return False
|
||||||
|
self._process_message(message_id, data[0])
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _process_message(self, message_id, data):
|
||||||
|
'''Process a mail message'''
|
||||||
|
# print(data)
|
||||||
|
if isinstance(data[1], int):
|
||||||
|
self._log.warning(
|
||||||
|
"Response part is integer %s in data '%s'. Try again.",
|
||||||
|
data[1],
|
||||||
|
data[0]
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
part = data[1].decode('utf-8')
|
||||||
|
message = email.message_from_string(part)
|
||||||
|
# print(message)
|
||||||
|
hash_obj = hashlib.sha256()
|
||||||
|
hash_obj.update(message.as_bytes())
|
||||||
|
hash_obj.digest()
|
||||||
|
msg_hash = hash_obj.hexdigest()
|
||||||
|
self._log.debug("Hash '%s'", msg_hash)
|
||||||
|
if msg_hash in self.messages_hashes:
|
||||||
|
decoded_subject = email.header.decode_header(message.get("Subject", ""))
|
||||||
|
if isinstance(decoded_subject[0][0], str):
|
||||||
|
msg_subject = decoded_subject[0][0]
|
||||||
|
else:
|
||||||
|
msg_subject = decoded_subject[0][0].decode()
|
||||||
|
self._log.info(
|
||||||
|
"Message with subject '%s' is duplicate (hash check) of another",
|
||||||
|
msg_subject
|
||||||
|
)
|
||||||
|
if self.config['dummy']:
|
||||||
|
self._log.info("Duplicate message won't be delete (dummy run).")
|
||||||
|
else:
|
||||||
|
self._log.info("Deleting message '%s'", message_id)
|
||||||
|
result = self.imap.store(message_id, '+FLAGS', '\\Deleted')
|
||||||
|
self._log.info("Result: %s", result)
|
||||||
|
self.imap.expunge()
|
||||||
|
self.duplicates_count += 1
|
||||||
|
else:
|
||||||
|
self.messages_hashes.append(msg_hash)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def connect_imap(self):
|
||||||
|
'''Create connection object to the IMAP server'''
|
||||||
|
self._log.debug(
|
||||||
|
'Connecting to server %s:%s...',
|
||||||
|
self.config['imap_server'],
|
||||||
|
self.config['imap_port']
|
||||||
|
)
|
||||||
|
if self.config['ssl']:
|
||||||
|
try:
|
||||||
|
self.imap = imaplib.IMAP4_SSL(self.config['imap_server'], self.config['imap_port'])
|
||||||
|
except Exception as error:
|
||||||
|
self._log.error(
|
||||||
|
"Error connecting securely to IMAP server '%s'. %s",
|
||||||
|
self.config['imap_server'],
|
||||||
|
error
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self.imap = imaplib.IMAP4(self.config['imap_server'], self.config['imap_port'])
|
||||||
|
except Exception as error:
|
||||||
|
self._log.error(
|
||||||
|
"Error connecting to IMAP server '%s'. %s",
|
||||||
|
self.config['imap_server'],
|
||||||
|
error
|
||||||
|
)
|
||||||
|
sys.exit(2)
|
||||||
|
try:
|
||||||
|
self._log.debug('Authenticating as user %s...', self.config['imap_user'])
|
||||||
|
self.imap.login(self.config['imap_user'], self.config['imap_password'])
|
||||||
|
except Exception as error:
|
||||||
|
self._log.error("Error while login as '%s'. %s'", self.config['imap_user'], error)
|
||||||
|
self.imap.close()
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
|
||||||
|
def _signal_handler(self, signal_received, frame):
|
||||||
|
# Handle any cleanup here
|
||||||
|
self._log.info(
|
||||||
|
'SIGINT or CTRL-C (%s %s) detected. Exiting gracefully',
|
||||||
|
signal_received,
|
||||||
|
frame
|
||||||
|
)
|
||||||
|
self.imap.close()
|
||||||
|
self.imap.logout()
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
def _init_log(self):
|
def _init_log(self):
|
||||||
''' Initialize log object '''
|
''' Initialize log object '''
|
||||||
|
@ -49,11 +181,11 @@ class RemoveDuplicateImapMessages:
|
||||||
if 'log_file' in self.config:
|
if 'log_file' in self.config:
|
||||||
log_file = self.config['log_file']
|
log_file = self.config['log_file']
|
||||||
else:
|
else:
|
||||||
home_folder = os.environ.get(
|
home_mailbox = os.environ.get(
|
||||||
'HOME', os.environ.get('USERPROFILE', '')
|
'HOME', os.environ.get('USERPROFILE', '')
|
||||||
)
|
)
|
||||||
log_folder = os.path.join(home_folder, "log")
|
log_mailbox = os.path.join(home_mailbox, "log")
|
||||||
log_file = os.path.join(log_folder, "remove_duplicate_imap_messages.log")
|
log_file = os.path.join(log_mailbox, "remove_duplicate_imap_messages.log")
|
||||||
|
|
||||||
if not os.path.exists(os.path.dirname(log_file)):
|
if not os.path.exists(os.path.dirname(log_file)):
|
||||||
os.mkdir(os.path.dirname(log_file))
|
os.mkdir(os.path.dirname(log_file))
|
||||||
|
@ -83,8 +215,29 @@ class RemoveDuplicateImapMessages:
|
||||||
help='Set the debug level for the standard output.'
|
help='Set the debug level for the standard output.'
|
||||||
)
|
)
|
||||||
@click.option('--log-file', '-l', help="File to store all debug messages.")
|
@click.option('--log-file', '-l', help="File to store all debug messages.")
|
||||||
# @click.option("--dummy","-n", is_flag=True,
|
@click.option(
|
||||||
# help="Don't do anything, just show what would be done.")
|
"--dummy","-n", is_flag=True,
|
||||||
|
help="Don't do anything, just show what would be done."
|
||||||
|
)
|
||||||
|
@click.option('--imap-server', '-s', default='localhost', help='IMAP server')
|
||||||
|
@click.option(
|
||||||
|
'--imap-port', '-p', default=993,
|
||||||
|
type=click.IntRange(1, 65535), help='IMAP server port'
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
'--imap-user', '-u', required=True,
|
||||||
|
help='User name to use for the connection to the IMAP server'
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
'--imap-password', '-P', required=True,
|
||||||
|
help='Password to connect to the IMAP server. Warning! Use a configuration file to avoid revelaing your passwords.'
|
||||||
|
)
|
||||||
|
@click.option('--ssl', '-S', default=True, help='Whether to use a secure connection or not.')
|
||||||
|
@click.option(
|
||||||
|
'--mailbox', '-m',
|
||||||
|
multiple=True,
|
||||||
|
help='IMAP mailboxes to check. Will compare messages between all mailboxes. Default: INBOX'
|
||||||
|
)
|
||||||
@click_config_file.configuration_option()
|
@click_config_file.configuration_option()
|
||||||
def __main__(**kwargs):
|
def __main__(**kwargs):
|
||||||
return RemoveDuplicateImapMessages(**kwargs)
|
return RemoveDuplicateImapMessages(**kwargs)
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -14,7 +14,7 @@ setuptools.setup(
|
||||||
version=config['metadata']['version'],
|
version=config['metadata']['version'],
|
||||||
name=config['metadata']['name'],
|
name=config['metadata']['name'],
|
||||||
author_email="",
|
author_email="",
|
||||||
url="",
|
url="https://susurrando.com",
|
||||||
description="Given an IMAP folder look for duplicate messages and optionally delete them",
|
description="Given an IMAP folder look for duplicate messages and optionally delete them",
|
||||||
long_description="README.md",
|
long_description="README.md",
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
|
Loading…
Reference in a new issue