Add initial version

This commit is contained in:
Antonio J. Delgado 2024-09-03 23:18:45 +03:00
parent 1393409ff7
commit f642297868
3 changed files with 162 additions and 9 deletions

View file

@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[project.urls]
Homepage = ""
Homepage = "https://susurrando.com"
[project]
name = "remove_duplicate_imap_messages"
@ -22,4 +22,4 @@ dependencies = [
"click",
"click_config_file",
]
requires-python = ">=3"
requires-python = ">=3"

View file

@ -3,17 +3,22 @@
#
# This script is licensed under GNU GPL version 2.0 or above
# (c) 2024 Antonio J. Delgado
"""Given an IMAP folder look for duplicate messages and optionally delete them"""
"""Given an IMAP mailbox look for duplicate messages and optionally delete them"""
import sys
import os
import logging
from logging.handlers import SysLogHandler
import hashlib
import imaplib
import email
from signal import signal, SIGINT
import click
import click_config_file
class RemoveDuplicateImapMessages:
"""Given an IMAP mailbox look for duplicate messages and optionally delete them"""
def __init__(self, **kwargs):
self.config = kwargs
@ -30,6 +35,133 @@ class RemoveDuplicateImapMessages:
'remove_duplicate_imap_messages.log'
)
self._init_log()
signal(SIGINT, self._signal_handler)
self.messages_hashes = []
self.duplicates_count = 0
if 'mailbox' not in self.config:
self.config['mailbox'] = []
if len(self.config['mailbox'])==0:
self.config['mailbox'].append('INBOX')
self.connect_imap()
for mailbox in self.config['mailbox']:
self._process_mailbox(mailbox)
def _process_mailbox(self, mailbox):
'''Process a mailbox'''
self.imap.select(mailbox=mailbox, readonly=False)
self._log.debug("Searching for all messages in mailbox '%s'...", mailbox)
typ, data = self.imap.search('UTF-8', 'ALL')
self._log.debug('Response: %s', typ)
if typ != 'OK':
self._log.error('Error, server replied: %s', data)
return False
all_msgs = data[0].split()
total_msgs = len(all_msgs)
self._log.debug("Processing %s messages in mailbox '%s'...", total_msgs , mailbox)
msg_count = 0
for message_id in all_msgs:
msg_count += 1
self._log.debug(
"Fetching message %s of %s (%s duplicates found)...",
msg_count,
total_msgs,
self.duplicates_count
)
typ, data = self.imap.fetch(message_id, '(RFC822)')
self._log.debug('Response: %s', typ)
if typ != 'OK':
self._log.error('Error, server replied: %s', data)
return False
self._process_message(message_id, data[0])
return True
def _process_message(self, message_id, data):
'''Process a mail message'''
# print(data)
if isinstance(data[1], int):
self._log.warning(
"Response part is integer %s in data '%s'. Try again.",
data[1],
data[0]
)
return False
part = data[1].decode('utf-8')
message = email.message_from_string(part)
# print(message)
hash_obj = hashlib.sha256()
hash_obj.update(message.as_bytes())
hash_obj.digest()
msg_hash = hash_obj.hexdigest()
self._log.debug("Hash '%s'", msg_hash)
if msg_hash in self.messages_hashes:
decoded_subject = email.header.decode_header(message.get("Subject", ""))
if isinstance(decoded_subject[0][0], str):
msg_subject = decoded_subject[0][0]
else:
msg_subject = decoded_subject[0][0].decode()
self._log.info(
"Message with subject '%s' is duplicate (hash check) of another",
msg_subject
)
if self.config['dummy']:
self._log.info("Duplicate message won't be delete (dummy run).")
else:
self._log.info("Deleting message '%s'", message_id)
result = self.imap.store(message_id, '+FLAGS', '\\Deleted')
self._log.info("Result: %s", result)
self.imap.expunge()
self.duplicates_count += 1
else:
self.messages_hashes.append(msg_hash)
return True
def connect_imap(self):
'''Create connection object to the IMAP server'''
self._log.debug(
'Connecting to server %s:%s...',
self.config['imap_server'],
self.config['imap_port']
)
if self.config['ssl']:
try:
self.imap = imaplib.IMAP4_SSL(self.config['imap_server'], self.config['imap_port'])
except Exception as error:
self._log.error(
"Error connecting securely to IMAP server '%s'. %s",
self.config['imap_server'],
error
)
sys.exit(1)
else:
try:
self.imap = imaplib.IMAP4(self.config['imap_server'], self.config['imap_port'])
except Exception as error:
self._log.error(
"Error connecting to IMAP server '%s'. %s",
self.config['imap_server'],
error
)
sys.exit(2)
try:
self._log.debug('Authenticating as user %s...', self.config['imap_user'])
self.imap.login(self.config['imap_user'], self.config['imap_password'])
except Exception as error:
self._log.error("Error while login as '%s'. %s'", self.config['imap_user'], error)
self.imap.close()
sys.exit(3)
def _signal_handler(self, signal_received, frame):
# Handle any cleanup here
self._log.info(
'SIGINT or CTRL-C (%s %s) detected. Exiting gracefully',
signal_received,
frame
)
self.imap.close()
self.imap.logout()
sys.exit(0)
def _init_log(self):
''' Initialize log object '''
@ -49,11 +181,11 @@ class RemoveDuplicateImapMessages:
if 'log_file' in self.config:
log_file = self.config['log_file']
else:
home_folder = os.environ.get(
home_mailbox = os.environ.get(
'HOME', os.environ.get('USERPROFILE', '')
)
log_folder = os.path.join(home_folder, "log")
log_file = os.path.join(log_folder, "remove_duplicate_imap_messages.log")
log_mailbox = os.path.join(home_mailbox, "log")
log_file = os.path.join(log_mailbox, "remove_duplicate_imap_messages.log")
if not os.path.exists(os.path.dirname(log_file)):
os.mkdir(os.path.dirname(log_file))
@ -83,8 +215,29 @@ class RemoveDuplicateImapMessages:
help='Set the debug level for the standard output.'
)
@click.option('--log-file', '-l', help="File to store all debug messages.")
# @click.option("--dummy","-n", is_flag=True,
# help="Don't do anything, just show what would be done.")
@click.option(
"--dummy","-n", is_flag=True,
help="Don't do anything, just show what would be done."
)
@click.option('--imap-server', '-s', default='localhost', help='IMAP server')
@click.option(
'--imap-port', '-p', default=993,
type=click.IntRange(1, 65535), help='IMAP server port'
)
@click.option(
'--imap-user', '-u', required=True,
help='User name to use for the connection to the IMAP server'
)
@click.option(
'--imap-password', '-P', required=True,
help='Password to connect to the IMAP server. Warning! Use a configuration file to avoid revelaing your passwords.'
)
@click.option('--ssl', '-S', default=True, help='Whether to use a secure connection or not.')
@click.option(
'--mailbox', '-m',
multiple=True,
help='IMAP mailboxes to check. Will compare messages between all mailboxes. Default: INBOX'
)
@click_config_file.configuration_option()
def __main__(**kwargs):
return RemoveDuplicateImapMessages(**kwargs)

View file

@ -14,7 +14,7 @@ setuptools.setup(
version=config['metadata']['version'],
name=config['metadata']['name'],
author_email="",
url="",
url="https://susurrando.com",
description="Given an IMAP folder look for duplicate messages and optionally delete them",
long_description="README.md",
long_description_content_type="text/markdown",