fix var name and update style

This commit is contained in:
Antonio J. Delgado 2024-09-25 14:19:34 +03:00
parent a038b0e2ca
commit e6f837a530
3 changed files with 81 additions and 40 deletions

View file

@ -17,7 +17,7 @@ Save the contacts as individual (one contact per file) vCard files in an isolate
## Usage ## Usage
- Save the contacts as individual (one contact per file) vCard files in an isolated directory, pass this directory with the *--directory* option. - Save the contacts as individual (one contact per file) vCard files in an isolated directory, pass this directory with the *--directory* option.
- Those completely equal, except for some list of keys (see the *ignore_fileds* variable), will be directly moved to the *--duplicates-destination* folder inside the *--directory*. - Those completely equal, except for some list of keys (see the *ignore_fields* variable), will be directly moved to the *--duplicates-destination* folder inside the *--directory*.
- Those with equal full name, will be show and a prompt will ask you to keep one of the contact cards or just do nothing with them. - Those with equal full name, will be show and a prompt will ask you to keep one of the contact cards or just do nothing with them.
```find_duplicate_contacts.py [OPTIONS]``` ```find_duplicate_contacts.py [OPTIONS]```
@ -36,4 +36,4 @@ Options:
## License ## License
[GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html) [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html)

View file

@ -3,32 +3,41 @@
# #
# This script is licensed under GNU GPL version 2.0 or above # This script is licensed under GNU GPL version 2.0 or above
# (c) 2022 Antonio J. Delgado # (c) 2022 Antonio J. Delgado
# __description__ ''' Find duplicate contacts '''
import sys import sys
import os import os
import logging import logging
import click
import click_config_file
from logging.handlers import SysLogHandler from logging.handlers import SysLogHandler
import vobject
import deepdiff
import shutil import shutil
from pprint import pprint from pprint import pprint
import json import click
import click_config_file
import vobject
import deepdiff
class find_duplicate_contacts:
def __init__(self, debug_level, log_file, directory, duplicates_destination): class FindDuplicateContacts:
'''Main class to find duplicate contacts'''
def __init__(self, **kwargs):
''' Initial function called when object is created ''' ''' Initial function called when object is created '''
self.config = dict() self.config = kwargs
self.config['debug_level'] = debug_level if 'log_file' not in kwargs or kwargs['log_file'] is None:
if log_file is None: self.config['log_file'] = os.path.join(
log_file = os.path.join(os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())), 'log', 'find_duplicate_contacts.log') os.environ.get(
self.config['log_file'] = log_file 'HOME',
os.environ.get(
'USERPROFILE',
os.getcwd()
)
),
'log',
'__project_codename__.log'
)
self._init_log() self._init_log()
self.ignore_fileds = [ self.ignore_fields = [
"prodid", "prodid",
"uid", "uid",
"version", "version",
@ -38,15 +47,15 @@ class find_duplicate_contacts:
"photo" "photo"
] ]
self.directory = directory self.duplicates_folder = os.path.join(
self.duplicates_destination = duplicates_destination self.config['directory'],
self.duplicates_folder = os.path.join(self.directory, self.duplicates_destination) self.config['duplicates_destination']
)
if not os.path.exists(self.duplicates_folder): if not os.path.exists(self.duplicates_folder):
os.mkdir(self.duplicates_folder) os.mkdir(self.duplicates_folder)
self.entries = []
self.entries = list() for entry in os.scandir(self.config['directory']):
for entry in os.scandir(directory):
self.entries.append(entry) self.entries.append(entry)
self.read_cards() self.read_cards()
@ -54,36 +63,45 @@ class find_duplicate_contacts:
self.compare_cards() self.compare_cards()
def read_cards(self): def read_cards(self):
'''Read all vCards'''
self.cards = [] self.cards = []
for entry in self.entries: for entry in self.entries:
self._log.debug(f"Reading vcard '{entry.path}'...") self._log.debug(
"Reading vcard '%s'...",
entry.path
)
card = {} card = {}
card['filename'] = entry.path card['filename'] = entry.path
card['content'] = {} card['content'] = {}
if not entry.is_dir(): if not entry.is_dir():
with open(entry.path, 'r') as filep: with open(entry.path, 'r', encoding='UTF-8') as filep:
content=filep.read() content=filep.read()
if len(content) > 0: if len(content) > 0:
vcard = vobject.readOne(content) vcard = vobject.readOne(content)
for key in vcard.contents.keys(): for key in vcard.contents.keys():
if key not in self.ignore_fileds: if key not in self.ignore_fields:
card['content'][key] = list() card['content'][key] = list()
for item in vcard.contents[key]: for item in vcard.contents[key]:
card['content'][key].append(item.value) card['content'][key].append(item.value)
self.cards.append(card) self.cards.append(card)
def compare_cards(self): def compare_cards(self):
'''Compare all vCards'''
checked_cards = [] checked_cards = []
count = 0 count = 0
for card in self.cards: for card in self.cards:
count +=1 count +=1
print(f"Contact {count} of {len(self.cards)}:") print(f"Contact {count} of {len(self.cards)}:\b")
duplicated = False duplicated = False
for checked_card in checked_cards: for checked_card in checked_cards:
if self.are_same_dict(card['content'], checked_card['content']): if self.are_same_dict(card['content'], checked_card['content']):
duplicated = True duplicated = True
self._log.info(f"Totally duplicates:\n '{card['filename']}'\n '{checked_card['filename']}") self._log.info(
"Totally duplicates:\n '%s'\n '%s",
card['filename'],
checked_card['filename']
)
shutil.move( shutil.move(
card['filename'], card['filename'],
os.path.join(self.duplicates_folder, os.path.basename(card['filename'])) os.path.join(self.duplicates_folder, os.path.basename(card['filename']))
@ -93,10 +111,14 @@ class find_duplicate_contacts:
duplicated = True duplicated = True
if not duplicated: if not duplicated:
checked_cards.append(card) checked_cards.append(card)
self._log.info(f"Found {len(checked_cards)} unique cards") self._log.info(
"Found %s unique cards",
len(checked_cards)
)
def manual_check_cards(self, card1, card2): def manual_check_cards(self, card1, card2):
cols, rows = os.get_terminal_size() '''Manual check of vCards'''
cols = os.get_terminal_size()[0]
print("#" * cols) print("#" * cols)
print("Card#1:") print("Card#1:")
print(f" filename: {card1['filename']}") print(f" filename: {card1['filename']}")
@ -140,9 +162,9 @@ class find_duplicate_contacts:
else: else:
print('Doing nothing.') print('Doing nothing.')
return False return False
def are_partially_same_dict(self, d1, d2, key='id'): def are_partially_same_dict(self, d1, d2, key='id'):
'''Test if two dictionaries are similar'''
if not isinstance(d1[key], list): if not isinstance(d1[key], list):
d1[key] = [ d1[key] ] d1[key] = [ d1[key] ]
d2[key] = [ d2[key] ] d2[key] = [ d2[key] ]
@ -150,6 +172,7 @@ class find_duplicate_contacts:
return True return True
def are_same_dict(self, d1, d2): def are_same_dict(self, d1, d2):
'''Test if two dictionaries are equal'''
ddiff = deepdiff.DeepDiff(d1, d2, ignore_order=True) ddiff = deepdiff.DeepDiff(d1, d2, ignore_order=True)
if ddiff == dict(): if ddiff == dict():
return True return True
@ -160,13 +183,18 @@ class find_duplicate_contacts:
if 'values_changed' in ddiff: if 'values_changed' in ddiff:
real_change = False real_change = False
for key in ddiff['values_changed'].keys(): for key in ddiff['values_changed'].keys():
if isinstance(ddiff['values_changed'][key]['new_value'], str): if isinstance(
if ddiff['values_changed'][key]['new_value'].lower() != ddiff['values_changed'][key]['old_value'].lower(): ddiff['values_changed'][key]['new_value'],
str
):
if (ddiff['values_changed'][key]['new_value'].lower()
!=
ddiff['values_changed'][key]['old_value'].lower()):
real_change = True real_change = True
if real_change: if real_change:
return False return False
else: else:
#print(ddiff) # print(ddiff)
return False return False
def _init_log(self): def _init_log(self):
@ -208,11 +236,15 @@ class find_duplicate_contacts:
), help='Set the debug level for the standard output.') ), help='Set the debug level for the standard output.')
@click.option('--log-file', '-l', help="File to store all debug messages.") @click.option('--log-file', '-l', help="File to store all debug messages.")
@click.option("--directory", "-f", required=True, help="Directory containing vCard files to check.") @click.option("--directory", "-f", required=True, help="Directory containing vCard files to check.")
@click.option('--duplicates-destination', '-D', default='duplicates', help='Directory to move duplicates files, relative to the directory containing the vCards.') @click.option(
'--duplicates-destination',
'-D',
default='duplicates',
help='Directory to move duplicates files, relative to the directory containing the vCards.'
)
@click_config_file.configuration_option() @click_config_file.configuration_option()
def __main__(debug_level, log_file, directory, duplicates_destination): def __main__(**kwargs):
return find_duplicate_contacts(debug_level, log_file, directory, duplicates_destination) return FindDuplicateContacts(**kwargs)
if __name__ == "__main__": if __name__ == "__main__":
__main__() __main__()

View file

@ -1,11 +1,20 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Setup script"""
import configparser
import setuptools import setuptools
config = configparser.ConfigParser()
config.read('setup.cfg')
setuptools.setup( setuptools.setup(
scripts=['find_duplicate_contacts/find_duplicate_contacts.py'], scripts=['find_duplicate_contacts/find_duplicate_contacts.py'],
author="Antonio J. Delgado", author="Antonio J. Delgado",
version='0.0.3', version='0.0.3',
name='find_duplicate_contacts', name='find_duplicate_contacts',
author_email="", author_email="TXj6QGdwejbTD1iWAj2ws9pnV@susurrando.com",
url="", url="https://susurrando.com",
description="Find duplicate contacts in vCard files", description="Find duplicate contacts in vCard files",
long_description="README.md", long_description="README.md",
long_description_content_type="text/markdown", long_description_content_type="text/markdown",