fix var name and update style
This commit is contained in:
parent
a038b0e2ca
commit
e6f837a530
3 changed files with 81 additions and 40 deletions
|
@ -17,7 +17,7 @@ Save the contacts as individual (one contact per file) vCard files in an isolate
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
- Save the contacts as individual (one contact per file) vCard files in an isolated directory, pass this directory with the *--directory* option.
|
- Save the contacts as individual (one contact per file) vCard files in an isolated directory, pass this directory with the *--directory* option.
|
||||||
- Those completely equal, except for some list of keys (see the *ignore_fileds* variable), will be directly moved to the *--duplicates-destination* folder inside the *--directory*.
|
- Those completely equal, except for some list of keys (see the *ignore_fields* variable), will be directly moved to the *--duplicates-destination* folder inside the *--directory*.
|
||||||
- Those with equal full name, will be show and a prompt will ask you to keep one of the contact cards or just do nothing with them.
|
- Those with equal full name, will be show and a prompt will ask you to keep one of the contact cards or just do nothing with them.
|
||||||
|
|
||||||
```find_duplicate_contacts.py [OPTIONS]```
|
```find_duplicate_contacts.py [OPTIONS]```
|
||||||
|
@ -36,4 +36,4 @@ Options:
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
[GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html)
|
[GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html)
|
||||||
|
|
|
@ -3,32 +3,41 @@
|
||||||
#
|
#
|
||||||
# This script is licensed under GNU GPL version 2.0 or above
|
# This script is licensed under GNU GPL version 2.0 or above
|
||||||
# (c) 2022 Antonio J. Delgado
|
# (c) 2022 Antonio J. Delgado
|
||||||
# __description__
|
''' Find duplicate contacts '''
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import click
|
|
||||||
import click_config_file
|
|
||||||
from logging.handlers import SysLogHandler
|
from logging.handlers import SysLogHandler
|
||||||
import vobject
|
|
||||||
import deepdiff
|
|
||||||
import shutil
|
import shutil
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
import json
|
import click
|
||||||
|
import click_config_file
|
||||||
|
import vobject
|
||||||
|
import deepdiff
|
||||||
|
|
||||||
class find_duplicate_contacts:
|
|
||||||
|
|
||||||
def __init__(self, debug_level, log_file, directory, duplicates_destination):
|
class FindDuplicateContacts:
|
||||||
|
'''Main class to find duplicate contacts'''
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
''' Initial function called when object is created '''
|
''' Initial function called when object is created '''
|
||||||
self.config = dict()
|
self.config = kwargs
|
||||||
self.config['debug_level'] = debug_level
|
if 'log_file' not in kwargs or kwargs['log_file'] is None:
|
||||||
if log_file is None:
|
self.config['log_file'] = os.path.join(
|
||||||
log_file = os.path.join(os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())), 'log', 'find_duplicate_contacts.log')
|
os.environ.get(
|
||||||
self.config['log_file'] = log_file
|
'HOME',
|
||||||
|
os.environ.get(
|
||||||
|
'USERPROFILE',
|
||||||
|
os.getcwd()
|
||||||
|
)
|
||||||
|
),
|
||||||
|
'log',
|
||||||
|
'__project_codename__.log'
|
||||||
|
)
|
||||||
self._init_log()
|
self._init_log()
|
||||||
|
|
||||||
self.ignore_fileds = [
|
self.ignore_fields = [
|
||||||
"prodid",
|
"prodid",
|
||||||
"uid",
|
"uid",
|
||||||
"version",
|
"version",
|
||||||
|
@ -38,15 +47,15 @@ class find_duplicate_contacts:
|
||||||
"photo"
|
"photo"
|
||||||
]
|
]
|
||||||
|
|
||||||
self.directory = directory
|
self.duplicates_folder = os.path.join(
|
||||||
self.duplicates_destination = duplicates_destination
|
self.config['directory'],
|
||||||
self.duplicates_folder = os.path.join(self.directory, self.duplicates_destination)
|
self.config['duplicates_destination']
|
||||||
|
)
|
||||||
if not os.path.exists(self.duplicates_folder):
|
if not os.path.exists(self.duplicates_folder):
|
||||||
os.mkdir(self.duplicates_folder)
|
os.mkdir(self.duplicates_folder)
|
||||||
|
|
||||||
|
self.entries = []
|
||||||
self.entries = list()
|
for entry in os.scandir(self.config['directory']):
|
||||||
for entry in os.scandir(directory):
|
|
||||||
self.entries.append(entry)
|
self.entries.append(entry)
|
||||||
|
|
||||||
self.read_cards()
|
self.read_cards()
|
||||||
|
@ -54,36 +63,45 @@ class find_duplicate_contacts:
|
||||||
self.compare_cards()
|
self.compare_cards()
|
||||||
|
|
||||||
def read_cards(self):
|
def read_cards(self):
|
||||||
|
'''Read all vCards'''
|
||||||
self.cards = []
|
self.cards = []
|
||||||
for entry in self.entries:
|
for entry in self.entries:
|
||||||
self._log.debug(f"Reading vcard '{entry.path}'...")
|
self._log.debug(
|
||||||
|
"Reading vcard '%s'...",
|
||||||
|
entry.path
|
||||||
|
)
|
||||||
card = {}
|
card = {}
|
||||||
card['filename'] = entry.path
|
card['filename'] = entry.path
|
||||||
card['content'] = {}
|
card['content'] = {}
|
||||||
if not entry.is_dir():
|
if not entry.is_dir():
|
||||||
with open(entry.path, 'r') as filep:
|
with open(entry.path, 'r', encoding='UTF-8') as filep:
|
||||||
content=filep.read()
|
content=filep.read()
|
||||||
if len(content) > 0:
|
if len(content) > 0:
|
||||||
vcard = vobject.readOne(content)
|
vcard = vobject.readOne(content)
|
||||||
|
|
||||||
for key in vcard.contents.keys():
|
for key in vcard.contents.keys():
|
||||||
if key not in self.ignore_fileds:
|
if key not in self.ignore_fields:
|
||||||
card['content'][key] = list()
|
card['content'][key] = list()
|
||||||
for item in vcard.contents[key]:
|
for item in vcard.contents[key]:
|
||||||
card['content'][key].append(item.value)
|
card['content'][key].append(item.value)
|
||||||
self.cards.append(card)
|
self.cards.append(card)
|
||||||
|
|
||||||
def compare_cards(self):
|
def compare_cards(self):
|
||||||
|
'''Compare all vCards'''
|
||||||
checked_cards = []
|
checked_cards = []
|
||||||
count = 0
|
count = 0
|
||||||
for card in self.cards:
|
for card in self.cards:
|
||||||
count +=1
|
count +=1
|
||||||
print(f"Contact {count} of {len(self.cards)}:")
|
print(f"Contact {count} of {len(self.cards)}:\b")
|
||||||
duplicated = False
|
duplicated = False
|
||||||
for checked_card in checked_cards:
|
for checked_card in checked_cards:
|
||||||
if self.are_same_dict(card['content'], checked_card['content']):
|
if self.are_same_dict(card['content'], checked_card['content']):
|
||||||
duplicated = True
|
duplicated = True
|
||||||
self._log.info(f"Totally duplicates:\n '{card['filename']}'\n '{checked_card['filename']}")
|
self._log.info(
|
||||||
|
"Totally duplicates:\n '%s'\n '%s",
|
||||||
|
card['filename'],
|
||||||
|
checked_card['filename']
|
||||||
|
)
|
||||||
shutil.move(
|
shutil.move(
|
||||||
card['filename'],
|
card['filename'],
|
||||||
os.path.join(self.duplicates_folder, os.path.basename(card['filename']))
|
os.path.join(self.duplicates_folder, os.path.basename(card['filename']))
|
||||||
|
@ -93,10 +111,14 @@ class find_duplicate_contacts:
|
||||||
duplicated = True
|
duplicated = True
|
||||||
if not duplicated:
|
if not duplicated:
|
||||||
checked_cards.append(card)
|
checked_cards.append(card)
|
||||||
self._log.info(f"Found {len(checked_cards)} unique cards")
|
self._log.info(
|
||||||
|
"Found %s unique cards",
|
||||||
|
len(checked_cards)
|
||||||
|
)
|
||||||
|
|
||||||
def manual_check_cards(self, card1, card2):
|
def manual_check_cards(self, card1, card2):
|
||||||
cols, rows = os.get_terminal_size()
|
'''Manual check of vCards'''
|
||||||
|
cols = os.get_terminal_size()[0]
|
||||||
print("#" * cols)
|
print("#" * cols)
|
||||||
print("Card#1:")
|
print("Card#1:")
|
||||||
print(f" filename: {card1['filename']}")
|
print(f" filename: {card1['filename']}")
|
||||||
|
@ -140,9 +162,9 @@ class find_duplicate_contacts:
|
||||||
else:
|
else:
|
||||||
print('Doing nothing.')
|
print('Doing nothing.')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def are_partially_same_dict(self, d1, d2, key='id'):
|
def are_partially_same_dict(self, d1, d2, key='id'):
|
||||||
|
'''Test if two dictionaries are similar'''
|
||||||
if not isinstance(d1[key], list):
|
if not isinstance(d1[key], list):
|
||||||
d1[key] = [ d1[key] ]
|
d1[key] = [ d1[key] ]
|
||||||
d2[key] = [ d2[key] ]
|
d2[key] = [ d2[key] ]
|
||||||
|
@ -150,6 +172,7 @@ class find_duplicate_contacts:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def are_same_dict(self, d1, d2):
|
def are_same_dict(self, d1, d2):
|
||||||
|
'''Test if two dictionaries are equal'''
|
||||||
ddiff = deepdiff.DeepDiff(d1, d2, ignore_order=True)
|
ddiff = deepdiff.DeepDiff(d1, d2, ignore_order=True)
|
||||||
if ddiff == dict():
|
if ddiff == dict():
|
||||||
return True
|
return True
|
||||||
|
@ -160,13 +183,18 @@ class find_duplicate_contacts:
|
||||||
if 'values_changed' in ddiff:
|
if 'values_changed' in ddiff:
|
||||||
real_change = False
|
real_change = False
|
||||||
for key in ddiff['values_changed'].keys():
|
for key in ddiff['values_changed'].keys():
|
||||||
if isinstance(ddiff['values_changed'][key]['new_value'], str):
|
if isinstance(
|
||||||
if ddiff['values_changed'][key]['new_value'].lower() != ddiff['values_changed'][key]['old_value'].lower():
|
ddiff['values_changed'][key]['new_value'],
|
||||||
|
str
|
||||||
|
):
|
||||||
|
if (ddiff['values_changed'][key]['new_value'].lower()
|
||||||
|
!=
|
||||||
|
ddiff['values_changed'][key]['old_value'].lower()):
|
||||||
real_change = True
|
real_change = True
|
||||||
if real_change:
|
if real_change:
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
#print(ddiff)
|
# print(ddiff)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _init_log(self):
|
def _init_log(self):
|
||||||
|
@ -208,11 +236,15 @@ class find_duplicate_contacts:
|
||||||
), help='Set the debug level for the standard output.')
|
), help='Set the debug level for the standard output.')
|
||||||
@click.option('--log-file', '-l', help="File to store all debug messages.")
|
@click.option('--log-file', '-l', help="File to store all debug messages.")
|
||||||
@click.option("--directory", "-f", required=True, help="Directory containing vCard files to check.")
|
@click.option("--directory", "-f", required=True, help="Directory containing vCard files to check.")
|
||||||
@click.option('--duplicates-destination', '-D', default='duplicates', help='Directory to move duplicates files, relative to the directory containing the vCards.')
|
@click.option(
|
||||||
|
'--duplicates-destination',
|
||||||
|
'-D',
|
||||||
|
default='duplicates',
|
||||||
|
help='Directory to move duplicates files, relative to the directory containing the vCards.'
|
||||||
|
)
|
||||||
@click_config_file.configuration_option()
|
@click_config_file.configuration_option()
|
||||||
def __main__(debug_level, log_file, directory, duplicates_destination):
|
def __main__(**kwargs):
|
||||||
return find_duplicate_contacts(debug_level, log_file, directory, duplicates_destination)
|
return FindDuplicateContacts(**kwargs)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
__main__()
|
__main__()
|
||||||
|
|
||||||
|
|
13
setup.py
13
setup.py
|
@ -1,11 +1,20 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Setup script"""
|
||||||
|
|
||||||
|
import configparser
|
||||||
import setuptools
|
import setuptools
|
||||||
|
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
config.read('setup.cfg')
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
scripts=['find_duplicate_contacts/find_duplicate_contacts.py'],
|
scripts=['find_duplicate_contacts/find_duplicate_contacts.py'],
|
||||||
author="Antonio J. Delgado",
|
author="Antonio J. Delgado",
|
||||||
version='0.0.3',
|
version='0.0.3',
|
||||||
name='find_duplicate_contacts',
|
name='find_duplicate_contacts',
|
||||||
author_email="",
|
author_email="TXj6QGdwejbTD1iWAj2ws9pnV@susurrando.com",
|
||||||
url="",
|
url="https://susurrando.com",
|
||||||
description="Find duplicate contacts in vCard files",
|
description="Find duplicate contacts in vCard files",
|
||||||
long_description="README.md",
|
long_description="README.md",
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
|
Loading…
Reference in a new issue