Add initial version
This commit is contained in:
parent
1393409ff7
commit
f642297868
3 changed files with 162 additions and 9 deletions
|
@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
|
|||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project.urls]
|
||||
Homepage = ""
|
||||
Homepage = "https://susurrando.com"
|
||||
|
||||
[project]
|
||||
name = "remove_duplicate_imap_messages"
|
||||
|
@ -22,4 +22,4 @@ dependencies = [
|
|||
"click",
|
||||
"click_config_file",
|
||||
]
|
||||
requires-python = ">=3"
|
||||
requires-python = ">=3"
|
||||
|
|
165
remove_duplicate_imap_messages/remove_duplicate_imap_messages.py
Normal file → Executable file
165
remove_duplicate_imap_messages/remove_duplicate_imap_messages.py
Normal file → Executable file
|
@ -3,17 +3,22 @@
|
|||
#
|
||||
# This script is licensed under GNU GPL version 2.0 or above
|
||||
# (c) 2024 Antonio J. Delgado
|
||||
"""Given an IMAP folder look for duplicate messages and optionally delete them"""
|
||||
"""Given an IMAP mailbox look for duplicate messages and optionally delete them"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import logging
|
||||
from logging.handlers import SysLogHandler
|
||||
import hashlib
|
||||
import imaplib
|
||||
import email
|
||||
from signal import signal, SIGINT
|
||||
import click
|
||||
import click_config_file
|
||||
|
||||
|
||||
class RemoveDuplicateImapMessages:
|
||||
"""Given an IMAP mailbox look for duplicate messages and optionally delete them"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.config = kwargs
|
||||
|
@ -30,6 +35,133 @@ class RemoveDuplicateImapMessages:
|
|||
'remove_duplicate_imap_messages.log'
|
||||
)
|
||||
self._init_log()
|
||||
signal(SIGINT, self._signal_handler)
|
||||
self.messages_hashes = []
|
||||
self.duplicates_count = 0
|
||||
if 'mailbox' not in self.config:
|
||||
self.config['mailbox'] = []
|
||||
if len(self.config['mailbox'])==0:
|
||||
self.config['mailbox'].append('INBOX')
|
||||
self.connect_imap()
|
||||
for mailbox in self.config['mailbox']:
|
||||
self._process_mailbox(mailbox)
|
||||
|
||||
def _process_mailbox(self, mailbox):
|
||||
'''Process a mailbox'''
|
||||
self.imap.select(mailbox=mailbox, readonly=False)
|
||||
self._log.debug("Searching for all messages in mailbox '%s'...", mailbox)
|
||||
typ, data = self.imap.search('UTF-8', 'ALL')
|
||||
self._log.debug('Response: %s', typ)
|
||||
if typ != 'OK':
|
||||
self._log.error('Error, server replied: %s', data)
|
||||
return False
|
||||
all_msgs = data[0].split()
|
||||
total_msgs = len(all_msgs)
|
||||
self._log.debug("Processing %s messages in mailbox '%s'...", total_msgs , mailbox)
|
||||
msg_count = 0
|
||||
|
||||
for message_id in all_msgs:
|
||||
msg_count += 1
|
||||
self._log.debug(
|
||||
"Fetching message %s of %s (%s duplicates found)...",
|
||||
msg_count,
|
||||
total_msgs,
|
||||
self.duplicates_count
|
||||
)
|
||||
typ, data = self.imap.fetch(message_id, '(RFC822)')
|
||||
self._log.debug('Response: %s', typ)
|
||||
if typ != 'OK':
|
||||
self._log.error('Error, server replied: %s', data)
|
||||
return False
|
||||
self._process_message(message_id, data[0])
|
||||
return True
|
||||
|
||||
def _process_message(self, message_id, data):
|
||||
'''Process a mail message'''
|
||||
# print(data)
|
||||
if isinstance(data[1], int):
|
||||
self._log.warning(
|
||||
"Response part is integer %s in data '%s'. Try again.",
|
||||
data[1],
|
||||
data[0]
|
||||
)
|
||||
return False
|
||||
part = data[1].decode('utf-8')
|
||||
message = email.message_from_string(part)
|
||||
# print(message)
|
||||
hash_obj = hashlib.sha256()
|
||||
hash_obj.update(message.as_bytes())
|
||||
hash_obj.digest()
|
||||
msg_hash = hash_obj.hexdigest()
|
||||
self._log.debug("Hash '%s'", msg_hash)
|
||||
if msg_hash in self.messages_hashes:
|
||||
decoded_subject = email.header.decode_header(message.get("Subject", ""))
|
||||
if isinstance(decoded_subject[0][0], str):
|
||||
msg_subject = decoded_subject[0][0]
|
||||
else:
|
||||
msg_subject = decoded_subject[0][0].decode()
|
||||
self._log.info(
|
||||
"Message with subject '%s' is duplicate (hash check) of another",
|
||||
msg_subject
|
||||
)
|
||||
if self.config['dummy']:
|
||||
self._log.info("Duplicate message won't be delete (dummy run).")
|
||||
else:
|
||||
self._log.info("Deleting message '%s'", message_id)
|
||||
result = self.imap.store(message_id, '+FLAGS', '\\Deleted')
|
||||
self._log.info("Result: %s", result)
|
||||
self.imap.expunge()
|
||||
self.duplicates_count += 1
|
||||
else:
|
||||
self.messages_hashes.append(msg_hash)
|
||||
return True
|
||||
|
||||
def connect_imap(self):
|
||||
'''Create connection object to the IMAP server'''
|
||||
self._log.debug(
|
||||
'Connecting to server %s:%s...',
|
||||
self.config['imap_server'],
|
||||
self.config['imap_port']
|
||||
)
|
||||
if self.config['ssl']:
|
||||
try:
|
||||
self.imap = imaplib.IMAP4_SSL(self.config['imap_server'], self.config['imap_port'])
|
||||
except Exception as error:
|
||||
self._log.error(
|
||||
"Error connecting securely to IMAP server '%s'. %s",
|
||||
self.config['imap_server'],
|
||||
error
|
||||
)
|
||||
sys.exit(1)
|
||||
else:
|
||||
try:
|
||||
self.imap = imaplib.IMAP4(self.config['imap_server'], self.config['imap_port'])
|
||||
except Exception as error:
|
||||
self._log.error(
|
||||
"Error connecting to IMAP server '%s'. %s",
|
||||
self.config['imap_server'],
|
||||
error
|
||||
)
|
||||
sys.exit(2)
|
||||
try:
|
||||
self._log.debug('Authenticating as user %s...', self.config['imap_user'])
|
||||
self.imap.login(self.config['imap_user'], self.config['imap_password'])
|
||||
except Exception as error:
|
||||
self._log.error("Error while login as '%s'. %s'", self.config['imap_user'], error)
|
||||
self.imap.close()
|
||||
sys.exit(3)
|
||||
|
||||
|
||||
def _signal_handler(self, signal_received, frame):
|
||||
# Handle any cleanup here
|
||||
self._log.info(
|
||||
'SIGINT or CTRL-C (%s %s) detected. Exiting gracefully',
|
||||
signal_received,
|
||||
frame
|
||||
)
|
||||
self.imap.close()
|
||||
self.imap.logout()
|
||||
sys.exit(0)
|
||||
|
||||
def _init_log(self):
|
||||
''' Initialize log object '''
|
||||
|
@ -49,11 +181,11 @@ class RemoveDuplicateImapMessages:
|
|||
if 'log_file' in self.config:
|
||||
log_file = self.config['log_file']
|
||||
else:
|
||||
home_folder = os.environ.get(
|
||||
home_mailbox = os.environ.get(
|
||||
'HOME', os.environ.get('USERPROFILE', '')
|
||||
)
|
||||
log_folder = os.path.join(home_folder, "log")
|
||||
log_file = os.path.join(log_folder, "remove_duplicate_imap_messages.log")
|
||||
log_mailbox = os.path.join(home_mailbox, "log")
|
||||
log_file = os.path.join(log_mailbox, "remove_duplicate_imap_messages.log")
|
||||
|
||||
if not os.path.exists(os.path.dirname(log_file)):
|
||||
os.mkdir(os.path.dirname(log_file))
|
||||
|
@ -83,8 +215,29 @@ class RemoveDuplicateImapMessages:
|
|||
help='Set the debug level for the standard output.'
|
||||
)
|
||||
@click.option('--log-file', '-l', help="File to store all debug messages.")
|
||||
# @click.option("--dummy","-n", is_flag=True,
|
||||
# help="Don't do anything, just show what would be done.")
|
||||
@click.option(
|
||||
"--dummy","-n", is_flag=True,
|
||||
help="Don't do anything, just show what would be done."
|
||||
)
|
||||
@click.option('--imap-server', '-s', default='localhost', help='IMAP server')
|
||||
@click.option(
|
||||
'--imap-port', '-p', default=993,
|
||||
type=click.IntRange(1, 65535), help='IMAP server port'
|
||||
)
|
||||
@click.option(
|
||||
'--imap-user', '-u', required=True,
|
||||
help='User name to use for the connection to the IMAP server'
|
||||
)
|
||||
@click.option(
|
||||
'--imap-password', '-P', required=True,
|
||||
help='Password to connect to the IMAP server. Warning! Use a configuration file to avoid revelaing your passwords.'
|
||||
)
|
||||
@click.option('--ssl', '-S', default=True, help='Whether to use a secure connection or not.')
|
||||
@click.option(
|
||||
'--mailbox', '-m',
|
||||
multiple=True,
|
||||
help='IMAP mailboxes to check. Will compare messages between all mailboxes. Default: INBOX'
|
||||
)
|
||||
@click_config_file.configuration_option()
|
||||
def __main__(**kwargs):
|
||||
return RemoveDuplicateImapMessages(**kwargs)
|
||||
|
|
2
setup.py
2
setup.py
|
@ -14,7 +14,7 @@ setuptools.setup(
|
|||
version=config['metadata']['version'],
|
||||
name=config['metadata']['name'],
|
||||
author_email="",
|
||||
url="",
|
||||
url="https://susurrando.com",
|
||||
description="Given an IMAP folder look for duplicate messages and optionally delete them",
|
||||
long_description="README.md",
|
||||
long_description_content_type="text/markdown",
|
||||
|
|
Loading…
Reference in a new issue