diff --git a/README.md b/README.md index 4b6f93d..69d3476 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Save the contacts as individual (one contact per file) vCard files in an isolate ## Usage - Save the contacts as individual (one contact per file) vCard files in an isolated directory, pass this directory with the *--directory* option. -- Those completely equal, except for some list of keys (see the *ignore_fileds* variable), will be directly moved to the *--duplicates-destination* folder inside the *--directory*. +- Those completely equal, except for some list of keys (see the *ignore_fields* variable), will be directly moved to the *--duplicates-destination* folder inside the *--directory*. - Those with equal full name, will be show and a prompt will ask you to keep one of the contact cards or just do nothing with them. ```find_duplicate_contacts.py [OPTIONS]``` @@ -36,4 +36,4 @@ Options: ## License -[GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html) \ No newline at end of file +[GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html) diff --git a/find_duplicate_contacts/find_duplicate_contacts.py b/find_duplicate_contacts/find_duplicate_contacts.py index 65beff5..695faa3 100755 --- a/find_duplicate_contacts/find_duplicate_contacts.py +++ b/find_duplicate_contacts/find_duplicate_contacts.py @@ -3,32 +3,41 @@ # # This script is licensed under GNU GPL version 2.0 or above # (c) 2022 Antonio J. Delgado -# __description__ +''' Find duplicate contacts ''' import sys import os import logging -import click -import click_config_file from logging.handlers import SysLogHandler -import vobject -import deepdiff import shutil from pprint import pprint -import json +import click +import click_config_file +import vobject +import deepdiff -class find_duplicate_contacts: - def __init__(self, debug_level, log_file, directory, duplicates_destination): +class FindDuplicateContacts: + '''Main class to find duplicate contacts''' + + def __init__(self, **kwargs): ''' Initial function called when object is created ''' - self.config = dict() - self.config['debug_level'] = debug_level - if log_file is None: - log_file = os.path.join(os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())), 'log', 'find_duplicate_contacts.log') - self.config['log_file'] = log_file + self.config = kwargs + if 'log_file' not in kwargs or kwargs['log_file'] is None: + self.config['log_file'] = os.path.join( + os.environ.get( + 'HOME', + os.environ.get( + 'USERPROFILE', + os.getcwd() + ) + ), + 'log', + '__project_codename__.log' + ) self._init_log() - self.ignore_fileds = [ + self.ignore_fields = [ "prodid", "uid", "version", @@ -38,15 +47,15 @@ class find_duplicate_contacts: "photo" ] - self.directory = directory - self.duplicates_destination = duplicates_destination - self.duplicates_folder = os.path.join(self.directory, self.duplicates_destination) + self.duplicates_folder = os.path.join( + self.config['directory'], + self.config['duplicates_destination'] + ) if not os.path.exists(self.duplicates_folder): os.mkdir(self.duplicates_folder) - - self.entries = list() - for entry in os.scandir(directory): + self.entries = [] + for entry in os.scandir(self.config['directory']): self.entries.append(entry) self.read_cards() @@ -54,36 +63,45 @@ class find_duplicate_contacts: self.compare_cards() def read_cards(self): + '''Read all vCards''' self.cards = [] for entry in self.entries: - self._log.debug(f"Reading vcard '{entry.path}'...") + self._log.debug( + "Reading vcard '%s'...", + entry.path + ) card = {} card['filename'] = entry.path card['content'] = {} if not entry.is_dir(): - with open(entry.path, 'r') as filep: + with open(entry.path, 'r', encoding='UTF-8') as filep: content=filep.read() if len(content) > 0: vcard = vobject.readOne(content) - + for key in vcard.contents.keys(): - if key not in self.ignore_fileds: + if key not in self.ignore_fields: card['content'][key] = list() for item in vcard.contents[key]: card['content'][key].append(item.value) self.cards.append(card) def compare_cards(self): + '''Compare all vCards''' checked_cards = [] count = 0 for card in self.cards: count +=1 - print(f"Contact {count} of {len(self.cards)}:") + print(f"Contact {count} of {len(self.cards)}:\b") duplicated = False for checked_card in checked_cards: if self.are_same_dict(card['content'], checked_card['content']): duplicated = True - self._log.info(f"Totally duplicates:\n '{card['filename']}'\n '{checked_card['filename']}") + self._log.info( + "Totally duplicates:\n '%s'\n '%s", + card['filename'], + checked_card['filename'] + ) shutil.move( card['filename'], os.path.join(self.duplicates_folder, os.path.basename(card['filename'])) @@ -93,10 +111,14 @@ class find_duplicate_contacts: duplicated = True if not duplicated: checked_cards.append(card) - self._log.info(f"Found {len(checked_cards)} unique cards") + self._log.info( + "Found %s unique cards", + len(checked_cards) + ) def manual_check_cards(self, card1, card2): - cols, rows = os.get_terminal_size() + '''Manual check of vCards''' + cols = os.get_terminal_size()[0] print("#" * cols) print("Card#1:") print(f" filename: {card1['filename']}") @@ -140,9 +162,9 @@ class find_duplicate_contacts: else: print('Doing nothing.') return False - def are_partially_same_dict(self, d1, d2, key='id'): + '''Test if two dictionaries are similar''' if not isinstance(d1[key], list): d1[key] = [ d1[key] ] d2[key] = [ d2[key] ] @@ -150,6 +172,7 @@ class find_duplicate_contacts: return True def are_same_dict(self, d1, d2): + '''Test if two dictionaries are equal''' ddiff = deepdiff.DeepDiff(d1, d2, ignore_order=True) if ddiff == dict(): return True @@ -160,13 +183,18 @@ class find_duplicate_contacts: if 'values_changed' in ddiff: real_change = False for key in ddiff['values_changed'].keys(): - if isinstance(ddiff['values_changed'][key]['new_value'], str): - if ddiff['values_changed'][key]['new_value'].lower() != ddiff['values_changed'][key]['old_value'].lower(): + if isinstance( + ddiff['values_changed'][key]['new_value'], + str + ): + if (ddiff['values_changed'][key]['new_value'].lower() + != + ddiff['values_changed'][key]['old_value'].lower()): real_change = True if real_change: return False else: - #print(ddiff) + # print(ddiff) return False def _init_log(self): @@ -208,11 +236,15 @@ class find_duplicate_contacts: ), help='Set the debug level for the standard output.') @click.option('--log-file', '-l', help="File to store all debug messages.") @click.option("--directory", "-f", required=True, help="Directory containing vCard files to check.") -@click.option('--duplicates-destination', '-D', default='duplicates', help='Directory to move duplicates files, relative to the directory containing the vCards.') +@click.option( + '--duplicates-destination', + '-D', + default='duplicates', + help='Directory to move duplicates files, relative to the directory containing the vCards.' +) @click_config_file.configuration_option() -def __main__(debug_level, log_file, directory, duplicates_destination): - return find_duplicate_contacts(debug_level, log_file, directory, duplicates_destination) +def __main__(**kwargs): + return FindDuplicateContacts(**kwargs) if __name__ == "__main__": __main__() - diff --git a/setup.py b/setup.py index 1b86202..1e5c4ae 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""Setup script""" + +import configparser import setuptools + +config = configparser.ConfigParser() +config.read('setup.cfg') + setuptools.setup( scripts=['find_duplicate_contacts/find_duplicate_contacts.py'], author="Antonio J. Delgado", version='0.0.3', name='find_duplicate_contacts', - author_email="", - url="", + author_email="TXj6QGdwejbTD1iWAj2ws9pnV@susurrando.com", + url="https://susurrando.com", description="Find duplicate contacts in vCard files", long_description="README.md", long_description_content_type="text/markdown",