Fix style
This commit is contained in:
parent
f9c24bb88d
commit
d6cf8036ae
1 changed files with 45 additions and 25 deletions
|
@ -8,21 +8,29 @@
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import click
|
|
||||||
import click_config_file
|
|
||||||
from logging.handlers import SysLogHandler
|
from logging.handlers import SysLogHandler
|
||||||
import zlib
|
import zlib
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import re
|
import re
|
||||||
|
import click
|
||||||
|
import click_config_file
|
||||||
|
|
||||||
class find_duplicate_files:
|
class FindDuplicateFiles:
|
||||||
|
'''Find duplicate files'''
|
||||||
|
|
||||||
def __init__(self, debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates):
|
def __init__(
|
||||||
|
self, debug_level, log_file, dummy, first_directory, second_directory,
|
||||||
|
exclude, limit, output_file, delete_duplicates
|
||||||
|
):
|
||||||
''' Initial function called when object is created '''
|
''' Initial function called when object is created '''
|
||||||
self.config = dict()
|
self.config = {}
|
||||||
self.config['debug_level'] = debug_level
|
self.config['debug_level'] = debug_level
|
||||||
if log_file is None:
|
if log_file is None:
|
||||||
log_file = os.path.join(os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())), 'log', 'find_duplicate_files.log')
|
log_file = os.path.join(
|
||||||
|
os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())),
|
||||||
|
'log',
|
||||||
|
'FindDuplicateFiles.log'
|
||||||
|
)
|
||||||
self.config['log_file'] = log_file
|
self.config['log_file'] = log_file
|
||||||
self._init_log()
|
self._init_log()
|
||||||
|
|
||||||
|
@ -42,20 +50,24 @@ class find_duplicate_files:
|
||||||
|
|
||||||
total = len(first_files)
|
total = len(first_files)
|
||||||
count = 0
|
count = 0
|
||||||
with open(self.output_file, 'w') as output_pointer:
|
with open(self.output_file, 'w', encoding='utf-8') as output_pointer:
|
||||||
for hash in first_files:
|
for file_hash in first_files:
|
||||||
count += 1
|
count += 1
|
||||||
self._log.info(f"# Checking file {count} of {total}")
|
self._log.info(f"# Checking file {count} of {total}")
|
||||||
if hash in second_files:
|
if file_hash in second_files:
|
||||||
self._log.info(f"#File '{first_files[hash]}' is a duplicate of '{second_files[hash]}'.")
|
self._log.info(f"#File '{first_files[file_hash]}' is a duplicate of '{second_files[file_hash]}'.")
|
||||||
if delete_duplicates:
|
if delete_duplicates:
|
||||||
self._log.warning(f"Removed file '{first_files[hash]}', duplicate of '{second_files[hash]}'.")
|
self._log.warning(f"Removed file '{first_files[file_hash]}', duplicate of '{second_files[file_hash]}'.")
|
||||||
os.remove(first_files[hash])
|
os.remove(first_files[file_hash])
|
||||||
else:
|
else:
|
||||||
self._log.info(f"rm '{first_files[hash]}'")
|
self._log.info(f"rm '{first_files[file_hash]}'")
|
||||||
output_pointer.write(f"rm '{first_files[hash]}'\n")
|
output_pointer.write(f"rm '{first_files[file_hash]}'\n")
|
||||||
|
|
||||||
def _init_db_cache(self, cache_file='/var/cache/find_duplicate_files.cache.sql'):
|
def _init_db_cache(self, cache_file='/var/cache/FindDuplicateFiles.cache.sql'):
|
||||||
|
self._log.debug(
|
||||||
|
"Initializing database file '%s'...",
|
||||||
|
cache_file,
|
||||||
|
)
|
||||||
self.cache_file = cache_file
|
self.cache_file = cache_file
|
||||||
self.cache_db = sqlite3.connect(self.cache_file)
|
self.cache_db = sqlite3.connect(self.cache_file)
|
||||||
self.cur = self.cache_db.cursor()
|
self.cur = self.cache_db.cursor()
|
||||||
|
@ -63,6 +75,10 @@ class find_duplicate_files:
|
||||||
self.cache_db.commit()
|
self.cache_db.commit()
|
||||||
|
|
||||||
def _check_file_cache(self, file):
|
def _check_file_cache(self, file):
|
||||||
|
self._log.debug(
|
||||||
|
"Checking file '%s' in cache...",
|
||||||
|
file
|
||||||
|
)
|
||||||
file_sql = file.replace("'", "''")
|
file_sql = file.replace("'", "''")
|
||||||
query = f"SELECT hash FROM files WHERE file = '{file_sql}'"
|
query = f"SELECT hash FROM files WHERE file = '{file_sql}'"
|
||||||
row = False
|
row = False
|
||||||
|
@ -79,9 +95,14 @@ class find_duplicate_files:
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _cache_file(self, file, hash):
|
def _cache_file(self, file, file_hash):
|
||||||
|
self._log.debug(
|
||||||
|
"Adding file '%s' has '%s' to cache...",
|
||||||
|
file,
|
||||||
|
file_hash,
|
||||||
|
)
|
||||||
file_sql = file.replace("'", "''")
|
file_sql = file.replace("'", "''")
|
||||||
query = f"INSERT INTO files (file, hash) VALUES ('{file_sql}', '{hash}')"
|
query = f"INSERT INTO files (file, hash) VALUES ('{file_sql}', '{file_hash}')"
|
||||||
result = False
|
result = False
|
||||||
if isinstance(query, bytes):
|
if isinstance(query, bytes):
|
||||||
query = query.decode('utf-8')
|
query = query.decode('utf-8')
|
||||||
|
@ -112,7 +133,7 @@ class find_duplicate_files:
|
||||||
|
|
||||||
def recursive_scandir(self, path, ignore_hidden_files=True):
|
def recursive_scandir(self, path, ignore_hidden_files=True):
|
||||||
''' Recursively scan a directory for files'''
|
''' Recursively scan a directory for files'''
|
||||||
files = dict()
|
files = {}
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
try:
|
try:
|
||||||
for file in os.scandir(path):
|
for file in os.scandir(path):
|
||||||
|
@ -128,9 +149,9 @@ class find_duplicate_files:
|
||||||
else:
|
else:
|
||||||
with open(file.path, 'rb') as file_pointer:
|
with open(file.path, 'rb') as file_pointer:
|
||||||
file_content = file_pointer.read()
|
file_content = file_pointer.read()
|
||||||
hash = zlib.adler32(file_content)
|
file_hash = zlib.adler32(file_content)
|
||||||
files[hash] = file.path
|
files[file_hash] = file.path
|
||||||
self._cache_file(file.path, hash)
|
self._cache_file(file.path, file_hash)
|
||||||
elif file.is_dir(follow_symlinks=False):
|
elif file.is_dir(follow_symlinks=False):
|
||||||
more_files = self.recursive_scandir(
|
more_files = self.recursive_scandir(
|
||||||
file.path,
|
file.path,
|
||||||
|
@ -147,7 +168,7 @@ class find_duplicate_files:
|
||||||
|
|
||||||
def _init_log(self):
|
def _init_log(self):
|
||||||
''' Initialize log object '''
|
''' Initialize log object '''
|
||||||
self._log = logging.getLogger("find_duplicate_files")
|
self._log = logging.getLogger("FindDuplicateFiles")
|
||||||
self._log.setLevel(logging.DEBUG)
|
self._log.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
sysloghandler = SysLogHandler()
|
sysloghandler = SysLogHandler()
|
||||||
|
@ -163,7 +184,7 @@ class find_duplicate_files:
|
||||||
else:
|
else:
|
||||||
home_folder = os.environ.get('HOME', os.environ.get('USERPROFILE', ''))
|
home_folder = os.environ.get('HOME', os.environ.get('USERPROFILE', ''))
|
||||||
log_folder = os.path.join(home_folder, "log")
|
log_folder = os.path.join(home_folder, "log")
|
||||||
log_file = os.path.join(log_folder, "find_duplicate_files.log")
|
log_file = os.path.join(log_folder, "FindDuplicateFiles.log")
|
||||||
|
|
||||||
if not os.path.exists(os.path.dirname(log_file)):
|
if not os.path.exists(os.path.dirname(log_file)):
|
||||||
os.mkdir(os.path.dirname(log_file))
|
os.mkdir(os.path.dirname(log_file))
|
||||||
|
@ -192,8 +213,7 @@ class find_duplicate_files:
|
||||||
@click.option('--delete-duplicates', default=False, is_flag=True, help='Delete duplicate files instead of creating commands file')
|
@click.option('--delete-duplicates', default=False, is_flag=True, help='Delete duplicate files instead of creating commands file')
|
||||||
@click_config_file.configuration_option()
|
@click_config_file.configuration_option()
|
||||||
def __main__(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates):
|
def __main__(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates):
|
||||||
return find_duplicate_files(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates)
|
return FindDuplicateFiles(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
__main__()
|
__main__()
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue