Fix style
This commit is contained in:
parent
f9c24bb88d
commit
d6cf8036ae
1 changed files with 45 additions and 25 deletions
|
@ -8,21 +8,29 @@
|
|||
import sys
|
||||
import os
|
||||
import logging
|
||||
import click
|
||||
import click_config_file
|
||||
from logging.handlers import SysLogHandler
|
||||
import zlib
|
||||
import sqlite3
|
||||
import re
|
||||
import click
|
||||
import click_config_file
|
||||
|
||||
class find_duplicate_files:
|
||||
class FindDuplicateFiles:
|
||||
'''Find duplicate files'''
|
||||
|
||||
def __init__(self, debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates):
|
||||
def __init__(
|
||||
self, debug_level, log_file, dummy, first_directory, second_directory,
|
||||
exclude, limit, output_file, delete_duplicates
|
||||
):
|
||||
''' Initial function called when object is created '''
|
||||
self.config = dict()
|
||||
self.config = {}
|
||||
self.config['debug_level'] = debug_level
|
||||
if log_file is None:
|
||||
log_file = os.path.join(os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())), 'log', 'find_duplicate_files.log')
|
||||
log_file = os.path.join(
|
||||
os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())),
|
||||
'log',
|
||||
'FindDuplicateFiles.log'
|
||||
)
|
||||
self.config['log_file'] = log_file
|
||||
self._init_log()
|
||||
|
||||
|
@ -42,20 +50,24 @@ class find_duplicate_files:
|
|||
|
||||
total = len(first_files)
|
||||
count = 0
|
||||
with open(self.output_file, 'w') as output_pointer:
|
||||
for hash in first_files:
|
||||
with open(self.output_file, 'w', encoding='utf-8') as output_pointer:
|
||||
for file_hash in first_files:
|
||||
count += 1
|
||||
self._log.info(f"# Checking file {count} of {total}")
|
||||
if hash in second_files:
|
||||
self._log.info(f"#File '{first_files[hash]}' is a duplicate of '{second_files[hash]}'.")
|
||||
if file_hash in second_files:
|
||||
self._log.info(f"#File '{first_files[file_hash]}' is a duplicate of '{second_files[file_hash]}'.")
|
||||
if delete_duplicates:
|
||||
self._log.warning(f"Removed file '{first_files[hash]}', duplicate of '{second_files[hash]}'.")
|
||||
os.remove(first_files[hash])
|
||||
self._log.warning(f"Removed file '{first_files[file_hash]}', duplicate of '{second_files[file_hash]}'.")
|
||||
os.remove(first_files[file_hash])
|
||||
else:
|
||||
self._log.info(f"rm '{first_files[hash]}'")
|
||||
output_pointer.write(f"rm '{first_files[hash]}'\n")
|
||||
self._log.info(f"rm '{first_files[file_hash]}'")
|
||||
output_pointer.write(f"rm '{first_files[file_hash]}'\n")
|
||||
|
||||
def _init_db_cache(self, cache_file='/var/cache/find_duplicate_files.cache.sql'):
|
||||
def _init_db_cache(self, cache_file='/var/cache/FindDuplicateFiles.cache.sql'):
|
||||
self._log.debug(
|
||||
"Initializing database file '%s'...",
|
||||
cache_file,
|
||||
)
|
||||
self.cache_file = cache_file
|
||||
self.cache_db = sqlite3.connect(self.cache_file)
|
||||
self.cur = self.cache_db.cursor()
|
||||
|
@ -63,6 +75,10 @@ class find_duplicate_files:
|
|||
self.cache_db.commit()
|
||||
|
||||
def _check_file_cache(self, file):
|
||||
self._log.debug(
|
||||
"Checking file '%s' in cache...",
|
||||
file
|
||||
)
|
||||
file_sql = file.replace("'", "''")
|
||||
query = f"SELECT hash FROM files WHERE file = '{file_sql}'"
|
||||
row = False
|
||||
|
@ -79,9 +95,14 @@ class find_duplicate_files:
|
|||
else:
|
||||
return False
|
||||
|
||||
def _cache_file(self, file, hash):
|
||||
def _cache_file(self, file, file_hash):
|
||||
self._log.debug(
|
||||
"Adding file '%s' has '%s' to cache...",
|
||||
file,
|
||||
file_hash,
|
||||
)
|
||||
file_sql = file.replace("'", "''")
|
||||
query = f"INSERT INTO files (file, hash) VALUES ('{file_sql}', '{hash}')"
|
||||
query = f"INSERT INTO files (file, hash) VALUES ('{file_sql}', '{file_hash}')"
|
||||
result = False
|
||||
if isinstance(query, bytes):
|
||||
query = query.decode('utf-8')
|
||||
|
@ -112,7 +133,7 @@ class find_duplicate_files:
|
|||
|
||||
def recursive_scandir(self, path, ignore_hidden_files=True):
|
||||
''' Recursively scan a directory for files'''
|
||||
files = dict()
|
||||
files = {}
|
||||
if os.path.exists(path):
|
||||
try:
|
||||
for file in os.scandir(path):
|
||||
|
@ -128,9 +149,9 @@ class find_duplicate_files:
|
|||
else:
|
||||
with open(file.path, 'rb') as file_pointer:
|
||||
file_content = file_pointer.read()
|
||||
hash = zlib.adler32(file_content)
|
||||
files[hash] = file.path
|
||||
self._cache_file(file.path, hash)
|
||||
file_hash = zlib.adler32(file_content)
|
||||
files[file_hash] = file.path
|
||||
self._cache_file(file.path, file_hash)
|
||||
elif file.is_dir(follow_symlinks=False):
|
||||
more_files = self.recursive_scandir(
|
||||
file.path,
|
||||
|
@ -147,7 +168,7 @@ class find_duplicate_files:
|
|||
|
||||
def _init_log(self):
|
||||
''' Initialize log object '''
|
||||
self._log = logging.getLogger("find_duplicate_files")
|
||||
self._log = logging.getLogger("FindDuplicateFiles")
|
||||
self._log.setLevel(logging.DEBUG)
|
||||
|
||||
sysloghandler = SysLogHandler()
|
||||
|
@ -163,7 +184,7 @@ class find_duplicate_files:
|
|||
else:
|
||||
home_folder = os.environ.get('HOME', os.environ.get('USERPROFILE', ''))
|
||||
log_folder = os.path.join(home_folder, "log")
|
||||
log_file = os.path.join(log_folder, "find_duplicate_files.log")
|
||||
log_file = os.path.join(log_folder, "FindDuplicateFiles.log")
|
||||
|
||||
if not os.path.exists(os.path.dirname(log_file)):
|
||||
os.mkdir(os.path.dirname(log_file))
|
||||
|
@ -192,8 +213,7 @@ class find_duplicate_files:
|
|||
@click.option('--delete-duplicates', default=False, is_flag=True, help='Delete duplicate files instead of creating commands file')
|
||||
@click_config_file.configuration_option()
|
||||
def __main__(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates):
|
||||
return find_duplicate_files(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates)
|
||||
return FindDuplicateFiles(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates)
|
||||
|
||||
if __name__ == "__main__":
|
||||
__main__()
|
||||
|
||||
|
|
Loading…
Reference in a new issue