Fix style

This commit is contained in:
Antonio J. Delgado 2025-01-24 17:07:19 +02:00
parent f9c24bb88d
commit d6cf8036ae

View file

@ -8,21 +8,29 @@
import sys
import os
import logging
import click
import click_config_file
from logging.handlers import SysLogHandler
import zlib
import sqlite3
import re
import click
import click_config_file
class find_duplicate_files:
class FindDuplicateFiles:
'''Find duplicate files'''
def __init__(self, debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates):
def __init__(
self, debug_level, log_file, dummy, first_directory, second_directory,
exclude, limit, output_file, delete_duplicates
):
''' Initial function called when object is created '''
self.config = dict()
self.config = {}
self.config['debug_level'] = debug_level
if log_file is None:
log_file = os.path.join(os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())), 'log', 'find_duplicate_files.log')
log_file = os.path.join(
os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())),
'log',
'FindDuplicateFiles.log'
)
self.config['log_file'] = log_file
self._init_log()
@ -42,20 +50,24 @@ class find_duplicate_files:
total = len(first_files)
count = 0
with open(self.output_file, 'w') as output_pointer:
for hash in first_files:
with open(self.output_file, 'w', encoding='utf-8') as output_pointer:
for file_hash in first_files:
count += 1
self._log.info(f"# Checking file {count} of {total}")
if hash in second_files:
self._log.info(f"#File '{first_files[hash]}' is a duplicate of '{second_files[hash]}'.")
if file_hash in second_files:
self._log.info(f"#File '{first_files[file_hash]}' is a duplicate of '{second_files[file_hash]}'.")
if delete_duplicates:
self._log.warning(f"Removed file '{first_files[hash]}', duplicate of '{second_files[hash]}'.")
os.remove(first_files[hash])
self._log.warning(f"Removed file '{first_files[file_hash]}', duplicate of '{second_files[file_hash]}'.")
os.remove(first_files[file_hash])
else:
self._log.info(f"rm '{first_files[hash]}'")
output_pointer.write(f"rm '{first_files[hash]}'\n")
self._log.info(f"rm '{first_files[file_hash]}'")
output_pointer.write(f"rm '{first_files[file_hash]}'\n")
def _init_db_cache(self, cache_file='/var/cache/find_duplicate_files.cache.sql'):
def _init_db_cache(self, cache_file='/var/cache/FindDuplicateFiles.cache.sql'):
self._log.debug(
"Initializing database file '%s'...",
cache_file,
)
self.cache_file = cache_file
self.cache_db = sqlite3.connect(self.cache_file)
self.cur = self.cache_db.cursor()
@ -63,6 +75,10 @@ class find_duplicate_files:
self.cache_db.commit()
def _check_file_cache(self, file):
self._log.debug(
"Checking file '%s' in cache...",
file
)
file_sql = file.replace("'", "''")
query = f"SELECT hash FROM files WHERE file = '{file_sql}'"
row = False
@ -79,9 +95,14 @@ class find_duplicate_files:
else:
return False
def _cache_file(self, file, hash):
def _cache_file(self, file, file_hash):
self._log.debug(
"Adding file '%s' has '%s' to cache...",
file,
file_hash,
)
file_sql = file.replace("'", "''")
query = f"INSERT INTO files (file, hash) VALUES ('{file_sql}', '{hash}')"
query = f"INSERT INTO files (file, hash) VALUES ('{file_sql}', '{file_hash}')"
result = False
if isinstance(query, bytes):
query = query.decode('utf-8')
@ -112,7 +133,7 @@ class find_duplicate_files:
def recursive_scandir(self, path, ignore_hidden_files=True):
''' Recursively scan a directory for files'''
files = dict()
files = {}
if os.path.exists(path):
try:
for file in os.scandir(path):
@ -128,9 +149,9 @@ class find_duplicate_files:
else:
with open(file.path, 'rb') as file_pointer:
file_content = file_pointer.read()
hash = zlib.adler32(file_content)
files[hash] = file.path
self._cache_file(file.path, hash)
file_hash = zlib.adler32(file_content)
files[file_hash] = file.path
self._cache_file(file.path, file_hash)
elif file.is_dir(follow_symlinks=False):
more_files = self.recursive_scandir(
file.path,
@ -147,7 +168,7 @@ class find_duplicate_files:
def _init_log(self):
''' Initialize log object '''
self._log = logging.getLogger("find_duplicate_files")
self._log = logging.getLogger("FindDuplicateFiles")
self._log.setLevel(logging.DEBUG)
sysloghandler = SysLogHandler()
@ -163,7 +184,7 @@ class find_duplicate_files:
else:
home_folder = os.environ.get('HOME', os.environ.get('USERPROFILE', ''))
log_folder = os.path.join(home_folder, "log")
log_file = os.path.join(log_folder, "find_duplicate_files.log")
log_file = os.path.join(log_folder, "FindDuplicateFiles.log")
if not os.path.exists(os.path.dirname(log_file)):
os.mkdir(os.path.dirname(log_file))
@ -192,8 +213,7 @@ class find_duplicate_files:
@click.option('--delete-duplicates', default=False, is_flag=True, help='Delete duplicate files instead of creating commands file')
@click_config_file.configuration_option()
def __main__(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates):
return find_duplicate_files(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates)
return FindDuplicateFiles(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates)
if __name__ == "__main__":
__main__()