From 402c43e54f4e663751cf7a3af194f48adc6996b2 Mon Sep 17 00:00:00 2001 From: "Antonio J. Delgado" Date: Thu, 27 Mar 2025 07:24:54 +0200 Subject: [PATCH] Add cache data and increase maxline --- backup_imap/backup_imap.py | 127 ++++++++++++++++++++++++++++++------- 1 file changed, 103 insertions(+), 24 deletions(-) diff --git a/backup_imap/backup_imap.py b/backup_imap/backup_imap.py index 328dab8..95f6d12 100644 --- a/backup_imap/backup_imap.py +++ b/backup_imap/backup_imap.py @@ -47,10 +47,16 @@ class BackupImap: 'backup_imap.log' ) self._init_log() + self._default_data = { + "last_update": 0, + "backedup_messages": [], + } + self.data = self._read_cached_data() if not os.path.exists(self.config['destination_folder']): os.mkdir(self.config['destination_folder']) self.imap = None + imaplib._MAXLINE = 100000 self._connect_imap( imap_server=self.config['imap_server'], imap_port=self.config['imap_port'], @@ -123,39 +129,74 @@ class BackupImap: sys.exit(6) subject = f'__no_subject__{message_uid}' data = fetch_data[0][1] - subject_match = re.search(rb'Subject: (.*)\r\n', data) - if subject_match: - subject = subject_match.group(1).decode().replace(os.path.sep, '_') + subjects = self._get_mail_header('Subject', data) + if len(subjects) > 0: + subject = subjects[-1] message_path = os.path.join( mailbox_path, subject ) original_subject = subject - counter = 1 - while os.path.exists(message_path): - subject = f"{original_subject}_{counter}" - message_path = os.path.join( - mailbox_path, - subject - ) - counter += 1 - try: - with open(message_path, 'wb') as file_pointer: - file_pointer.write(data) - except OSError as error: - if error.errno == 36: # File name too long + message_id = self._get_mail_header('Message-ID', data)[-1] + if not self._backedup_message(message_id, mailbox): + counter = 1 + while os.path.exists(message_path): + subject = f"{original_subject}_{counter}" message_path = os.path.join( mailbox_path, - f"message_uid_{message_uid.decode()}" + subject ) + counter += 1 + try: with open(message_path, 'wb') as file_pointer: file_pointer.write(data) - else: - self._log.error( - "Error writing email '%s'. %s", - message_path, - error - ) + self.data['backedup_messages'].append({ "message_id": message_id, "mailbox": mailbox}) + except OSError as error: + if error.errno == 36: # File name too long + message_path = os.path.join( + mailbox_path, + f"message_uid_{message_uid.decode()}" + ) + with open(message_path, 'wb') as file_pointer: + file_pointer.write(data) + self.data['backedup_messages'].append({ "message_id": message_id, "mailbox": mailbox}) + else: + self._log.error( + "Error writing email '%s'. %s", + message_path, + error + ) + + def _backedup_message(self, message_id, mailbox): + for message in self.data['backedup_messages']: + if message['message_id'] == message_id and message['mailbox'] == mailbox: + return True + return False + + def _get_mail_header(self, header, data): + message = email.message_from_string(data) + decoded_header = email.header.decode_header(message.get(header, "")) + result = [] + for raw_header_data in decoded_header: + if isinstance(raw_header_data[0], str): + header_data = raw_header_data[0] + result.append(header_data) + else: + try: + header_data = raw_header_data[0].decode() + result.append(header_data) + except UnicodeDecodeError: + try: + header_data = raw_header_data[0].decode('Windows-1252') + result.append(header_data) + except UnicodeDecodeError as error: + self._log.error( + "Error decoding header data as UTF-8. Data: %s. Error: %s", + raw_header_data[0], + error + ) + break + return result def _parse_mailbox(self, data): result = data @@ -210,7 +251,33 @@ class BackupImap: sys.exit(4) def close(self): - '''Close class''' + '''Close class and save data''' + self._save_cached_data(self.data) + + def _read_cached_data(self): + if os.path.exists(self.config['cache_file']): + with open(self.config['cache_file'], 'r', encoding='utf-8') as cache_file: + try: + cached_data = json.load(cache_file) + if ( + 'last_update' in cached_data and + cached_data['last_update'] + self.config['max_cache_age'] > time.time() + ): + cached_data = self._default_data + except json.decoder.JSONDecodeError: + cached_data = self._default_data + return cached_data + else: + return self._default_data + + def _save_cached_data(self, data): + data['last_update'] = time.time() + with open(self.config['cache_file'], 'w', encoding='utf-8') as cache_file: + json.dump(data, cache_file, indent=2) + self._log.debug( + "Saved cached data in '%s'", + self.config['cache_file'] + ) def _init_log(self): ''' Initialize log object ''' @@ -312,6 +379,18 @@ class BackupImap: required=True, help='Folder to save the messages and folders' ) +@click.option( + '--cache-file', + '-f', + default=f"{CACHE_FOLDER}/__project_code_name__.json", + help='Cache file to store data from each run', +) +@click.option( + '--max-cache-age', + '-a', + default=60*60*24*7, + help='Max age in seconds for the cache' +) # @click.option("--dummy","-n", is_flag=True, # help="Don't do anything, just show what would be done.") @click_config_file.configuration_option()