diff --git a/find_duplicate_files/find_duplicate_files.py b/find_duplicate_files/find_duplicate_files.py index 8c816e3..301f787 100644 --- a/find_duplicate_files/find_duplicate_files.py +++ b/find_duplicate_files/find_duplicate_files.py @@ -17,7 +17,7 @@ import re class find_duplicate_files: - def __init__(self, debug_level, log_file, dummy, first_directory, second_directory, exclude, limit): + def __init__(self, debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file): ''' Initial function called when object is created ''' self.config = dict() self.config['debug_level'] = debug_level @@ -31,6 +31,7 @@ class find_duplicate_files: self.second_directory = second_directory self.exclude = exclude self.limit = limit + self.output_file = output_file self._init_db_cache() @@ -41,12 +42,14 @@ class find_duplicate_files: total = len(first_files) count = 0 - for hash in first_files: - count += 1 - self._log.info(f"# Checking file {count} of {total}") - if hash in second_files: - self._log.info(f"#File '{first_files[hash]}' is dupe with '{second_files[hash]}'.") - self._log.info(f"rm '{first_files[hash]}'") + with open(self.output_file, 'w') as output_pointer: + for hash in first_files: + count += 1 + self._log.info(f"# Checking file {count} of {total}") + if hash in second_files: + self._log.info(f"#File '{first_files[hash]}' is dupe with '{second_files[hash]}'.") + self._log.info(f"rm '{first_files[hash]}'") + output_pointer.write(f"rm '{first_files[hash]}'\n") def _init_db_cache(self, cache_file='/var/cache/find_duplicate_files.cache.sql'): self.cache_file = cache_file @@ -161,9 +164,10 @@ class find_duplicate_files: @click.option('--second-directory', '-s', required=True, help='Second directory to find files') @click.option('--exclude', '-e', multiple=True, help='Regular expression pattern to exclude from files and directories.') @click.option('--limit', '-l', default=0, type=int, help='Limit to a certain number of files to check.') +@click.option('--output-file', '-o', default='/tmp/delete_duplicates_commands.sh', help='File to write the commands to delete duplicate files. USE WITH CAUTION!') @click_config_file.configuration_option() -def __main__(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit): - return find_duplicate_files(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit) +def __main__(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file): + return find_duplicate_files(debug_level, log_file, dummy, first_directory, second_directory, exclude, limit, output_file) if __name__ == "__main__": __main__()