diff --git a/find_duplicate_files/find_duplicate_files.py b/find_duplicate_files/find_duplicate_files.py index 344946d..192d1e7 100644 --- a/find_duplicate_files/find_duplicate_files.py +++ b/find_duplicate_files/find_duplicate_files.py @@ -17,7 +17,7 @@ import re class find_duplicate_files: - def __init__(self, debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file): + def __init__(self, debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates): ''' Initial function called when object is created ''' self.config = dict() self.config['debug_level'] = debug_level @@ -47,8 +47,12 @@ class find_duplicate_files: count += 1 self._log.info(f"# Checking file {count} of {total}") if hash in second_files: - self._log.info(f"#File '{first_files[hash]}' is dupe with '{second_files[hash]}'.") - self._log.info(f"rm '{first_files[hash]}'") + self._log.info(f"#File '{first_files[hash]}' is a duplicate of '{second_files[hash]}'.") + if delete_duplicates: + self._log.warning(f"Removed file '{first_files[hash]}', duplicate of '{second_files[hash]}'.") + os.remove(first_files[hash]) + else: + self._log.info(f"rm '{first_files[hash]}'") output_pointer.write(f"rm '{first_files[hash]}'\n") def _init_db_cache(self, cache_file='/var/cache/find_duplicate_files.cache.sql'): @@ -185,9 +189,10 @@ class find_duplicate_files: @click.option('--exclude', '-e', multiple=True, help='Regular expression pattern to exclude from files and directories.') @click.option('--limit', '-l', default=0, type=int, help='Limit to a certain number of files to check.') @click.option('--output-file', '-o', default='/tmp/delete_duplicates_commands.sh', help='File to write the commands to delete duplicate files. USE WITH CAUTION!') +@click.option('--delete-duplicates', default=False, is_flag=True, help='Delete duplicate files instead of creating commands file') @click_config_file.configuration_option() -def __main__(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file): - return find_duplicate_files(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file) +def __main__(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates): + return find_duplicate_files(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file, delete_duplicates) if __name__ == "__main__": __main__()