Find duplicates in vcards
This commit is contained in:
parent
1d62e480de
commit
55fb663768
2 changed files with 82 additions and 13 deletions
|
@ -12,10 +12,12 @@ import click
|
||||||
import click_config_file
|
import click_config_file
|
||||||
from logging.handlers import SysLogHandler
|
from logging.handlers import SysLogHandler
|
||||||
import vobject
|
import vobject
|
||||||
|
import deepdiff
|
||||||
|
import shutil
|
||||||
|
|
||||||
class find_duplicate_contacts:
|
class find_duplicate_contacts:
|
||||||
|
|
||||||
def __init__(self, debug_level, log_file, directory):
|
def __init__(self, debug_level, log_file, directory, duplicates_destination):
|
||||||
''' Initial function called when object is created '''
|
''' Initial function called when object is created '''
|
||||||
self.config = dict()
|
self.config = dict()
|
||||||
self.config['debug_level'] = debug_level
|
self.config['debug_level'] = debug_level
|
||||||
|
@ -24,23 +26,88 @@ class find_duplicate_contacts:
|
||||||
self.config['log_file'] = log_file
|
self.config['log_file'] = log_file
|
||||||
self._init_log()
|
self._init_log()
|
||||||
|
|
||||||
|
self.ignore_fileds = [
|
||||||
|
"prodid",
|
||||||
|
"uid",
|
||||||
|
"version",
|
||||||
|
"rev",
|
||||||
|
"x-thunderbird-etag",
|
||||||
|
"x-mozilla-html",
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
|
||||||
self.directory = directory
|
self.directory = directory
|
||||||
|
self.duplicates_destination = duplicates_destination
|
||||||
|
self.duplicates_folder = os.path.join(self.directory, self.duplicates_destination)
|
||||||
|
if not os.path.exists(self.duplicates_folder):
|
||||||
|
os.mkdir(self.duplicates_folder)
|
||||||
|
|
||||||
|
|
||||||
self.entries = list()
|
self.entries = list()
|
||||||
for entry in os.scandir(directory):
|
for entry in os.scandir(directory):
|
||||||
self.entries.append(entry)
|
self.entries.append(entry)
|
||||||
|
|
||||||
self.process_entries()
|
self.read_cards()
|
||||||
|
|
||||||
def process_entries(self):
|
self.compare_cards()
|
||||||
|
|
||||||
|
def read_cards(self):
|
||||||
|
self.cards = []
|
||||||
for entry in self.entries:
|
for entry in self.entries:
|
||||||
with open(entry.path, 'r') as filep:
|
self._log.debug(f"Reading vcard '{entry.path}'...")
|
||||||
content=filep.read()
|
card = {}
|
||||||
card = vobject.readOne(content)
|
card['filename'] = entry.path
|
||||||
print(entry.path)
|
card['content'] = {}
|
||||||
print(card.contents.keys())
|
if not entry.is_dir():
|
||||||
sys.exit(0)
|
with open(entry.path, 'r') as filep:
|
||||||
|
content=filep.read()
|
||||||
|
if len(content) > 0:
|
||||||
|
vcard = vobject.readOne(content)
|
||||||
|
|
||||||
|
for key in vcard.contents.keys():
|
||||||
|
if key not in self.ignore_fileds:
|
||||||
|
card['content'][key] = list()
|
||||||
|
for item in vcard.contents[key]:
|
||||||
|
card['content'][key].append(item.value)
|
||||||
|
self.cards.append(card)
|
||||||
|
|
||||||
|
def compare_cards(self):
|
||||||
|
checked_cards = []
|
||||||
|
count = 0
|
||||||
|
for card in self.cards:
|
||||||
|
count +=1
|
||||||
|
duplicated = False
|
||||||
|
for checked_card in checked_cards:
|
||||||
|
if self.are_same_dict(card['content'], checked_card['content']):
|
||||||
|
duplicated = True
|
||||||
|
self._log.info(f"Duplicates:\n '{card['filename']}'\n '{checked_card['filename']}")
|
||||||
|
shutil.move(
|
||||||
|
card['filename'],
|
||||||
|
os.path.join(self.duplicates_folder, os.path.basename(card['filename']))
|
||||||
|
)
|
||||||
|
if not duplicated:
|
||||||
|
checked_cards.append(card)
|
||||||
|
self._log.info(f"Found {len(checked_cards)} unique cards")
|
||||||
|
|
||||||
|
def are_same_dict(self, d1, d2):
|
||||||
|
ddiff = deepdiff.DeepDiff(d1, d2, ignore_order=True)
|
||||||
|
if ddiff == dict():
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
if 'dictionary_item_added' in ddiff or 'dictionary_item_removed' in ddiff:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
if 'values_changed' in ddiff:
|
||||||
|
real_change = False
|
||||||
|
for key in ddiff['values_changed'].keys():
|
||||||
|
if isinstance(ddiff['values_changed'][key]['new_value'], str):
|
||||||
|
if ddiff['values_changed'][key]['new_value'].lower() != ddiff['values_changed'][key]['old_value'].lower():
|
||||||
|
real_change = True
|
||||||
|
if real_change:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
#print(ddiff)
|
||||||
|
return False
|
||||||
|
|
||||||
def _init_log(self):
|
def _init_log(self):
|
||||||
''' Initialize log object '''
|
''' Initialize log object '''
|
||||||
|
@ -81,9 +148,10 @@ class find_duplicate_contacts:
|
||||||
), help='Set the debug level for the standard output.')
|
), help='Set the debug level for the standard output.')
|
||||||
@click.option('--log-file', '-l', help="File to store all debug messages.")
|
@click.option('--log-file', '-l', help="File to store all debug messages.")
|
||||||
@click.option("--directory", "-d", help="Directory containing vCard files to check.")
|
@click.option("--directory", "-d", help="Directory containing vCard files to check.")
|
||||||
|
@click.option('--duplicates-destination', '-D', default='duplicates', help='Directory to move duplicates files, relative to the directory containing the vCards.')
|
||||||
@click_config_file.configuration_option()
|
@click_config_file.configuration_option()
|
||||||
def __main__(debug_level, log_file, directory):
|
def __main__(debug_level, log_file, directory, duplicates_destination):
|
||||||
return find_duplicate_contacts(debug_level, log_file, directory)
|
return find_duplicate_contacts(debug_level, log_file, directory, duplicates_destination)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
__main__()
|
__main__()
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
click
|
click
|
||||||
click_config_file
|
click_config_file
|
||||||
vobject
|
vobject
|
||||||
|
deepdiff
|
Loading…
Reference in a new issue