add scan of hashes

This commit is contained in:
Antonio J. Delgado 2023-01-25 14:41:31 +02:00
parent 228ae6c0a3
commit b817fd2c7e

View file

@ -11,6 +11,7 @@ import logging
import click
import click_config_file
from logging.handlers import SysLogHandler
import hashlib
class find_duplicate_files:
@ -32,14 +33,23 @@ class find_duplicate_files:
second_files = self.recursive_scandir(self.second_directory)
self._log.debug(f"Found {len(second_files)} files in second directory '{second_directory}'")
for hash in first_files:
if hash in second_files:
print(f"#File '{first_files[hash]}' is dupe with '{second_files[hash]}'.")
print(f"rm '{first_files[hash]}'")
def recursive_scandir(self, path, ignore_hidden_files=True):
''' Recursively scan a directory for files'''
files = []
files = dict()
try:
for file in os.scandir(path):
if not file.name.startswith('.'):
if file.is_file():
files.append(file)
hash = hashlib.sha256()
with open(file.name, 'r') as file_pointer:
file_content = file_pointer.read()
hash.update(file_content)
files[hash.hexdigest()] = file.name
elif file.is_dir(follow_symlinks=False):
more_files = self.recursive_scandir(
file.path,