From 4f294524cae4968b24fb9eba5836213164964977 Mon Sep 17 00:00:00 2001 From: "Antonio J. Delgado" Date: Wed, 25 Jan 2023 14:30:40 +0200 Subject: [PATCH] add scanning --- find_duplicate_files/find_duplicate_files.py | 35 +++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/find_duplicate_files/find_duplicate_files.py b/find_duplicate_files/find_duplicate_files.py index 6fb1e6c..edf58a4 100644 --- a/find_duplicate_files/find_duplicate_files.py +++ b/find_duplicate_files/find_duplicate_files.py @@ -27,24 +27,27 @@ class find_duplicate_files: self.first_directory = first_directory self.second_directory = second_directory - first_files = self._find_files(self.first_directory) - second_files = self._find_files(self.second_directory) + first_files = self.recursive_scandir(self.first_directory) + second_files = self.recursive_scandir(self.second_directory) - def _find_files(self, directory, hidden=False): - if os.path.exists(directory): - files = list() - with os.scandir(directory) as it: - for entry in it: - if not entry.name.startswith('.') and entry.is_file(): - file = { - 'file': entry.name - } + def recursive_scandir(self, path, ignore_hidden_files=True): + ''' Recursively scan a directory for files''' + files = [] + try: + for file in os.scandir(path): + if not file.name.startswith('.'): + if file.is_file(): files.append(file) - self._log.debug(f"Found {len(files)} in '{directory}'") - return files - else: - self._log.error(f"Given path '{directory}' doesn't exist.") - sys.exit(1) + elif file.is_dir(follow_symlinks=False): + more_files = self.recursive_scandir( + file.path, + ignore_hidden_files=ignore_hidden_files + ) + if more_files: + files = files + more_files + except PermissionError as error: + self._log.warning(f"Permission denied accessing folder '{path}'") + return files def _init_log(self): ''' Initialize log object '''