commit aa503a42348131aa24967c0bb947145585fc9dd0 Author: Antonio J. Delgado Date: Wed Jan 25 14:26:45 2023 +0200 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..297e1f5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,142 @@ + Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# Configuration files +*.conf +*.ini diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e69de29 diff --git a/README.md b/README.md new file mode 100644 index 0000000..0f5b3f4 --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +# find_duplicate_files + +## Requirements + +## Installation + +### Linux + + ```bash +sudo python3 setup.py install +``` + +### Windows (from PowerShell) + + ```powershell +& $(where.exe python).split()[0] setup.py install +``` + +## Usage + + ```bash +find_duplicate_files.py [--debug-level|-d CRITICAL|ERROR|WARNING|INFO|DEBUG|NOTSET] # Other parameters +``` diff --git a/find_duplicate_files/__init__.py b/find_duplicate_files/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/find_duplicate_files/find_duplicate_files.py b/find_duplicate_files/find_duplicate_files.py new file mode 100644 index 0000000..6fb1e6c --- /dev/null +++ b/find_duplicate_files/find_duplicate_files.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- +# +# This script is licensed under GNU GPL version 2.0 or above +# (c) 2023 Antonio J. Delgado +# Given two directories, find files in first directory that are present in the second by checking hashes + +import sys +import os +import logging +import click +import click_config_file +from logging.handlers import SysLogHandler + +class find_duplicate_files: + + def __init__(self, debug_level, log_file, dummy, first_directory, second_directory): + ''' Initial function called when object is created ''' + self.config = dict() + self.config['debug_level'] = debug_level + if log_file is None: + log_file = os.path.join(os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())), 'log', 'find_duplicate_files.log') + self.config['log_file'] = log_file + self._init_log() + + self.dummy = dummy + self.first_directory = first_directory + self.second_directory = second_directory + + first_files = self._find_files(self.first_directory) + second_files = self._find_files(self.second_directory) + + def _find_files(self, directory, hidden=False): + if os.path.exists(directory): + files = list() + with os.scandir(directory) as it: + for entry in it: + if not entry.name.startswith('.') and entry.is_file(): + file = { + 'file': entry.name + } + files.append(file) + self._log.debug(f"Found {len(files)} in '{directory}'") + return files + else: + self._log.error(f"Given path '{directory}' doesn't exist.") + sys.exit(1) + + def _init_log(self): + ''' Initialize log object ''' + self._log = logging.getLogger("find_duplicate_files") + self._log.setLevel(logging.DEBUG) + + sysloghandler = SysLogHandler() + sysloghandler.setLevel(logging.DEBUG) + self._log.addHandler(sysloghandler) + + streamhandler = logging.StreamHandler(sys.stdout) + streamhandler.setLevel(logging.getLevelName(self.config.get("debug_level", 'INFO'))) + self._log.addHandler(streamhandler) + + if 'log_file' in self.config: + log_file = self.config['log_file'] + else: + home_folder = os.environ.get('HOME', os.environ.get('USERPROFILE', '')) + log_folder = os.path.join(home_folder, "log") + log_file = os.path.join(log_folder, "find_duplicate_files.log") + + if not os.path.exists(os.path.dirname(log_file)): + os.mkdir(os.path.dirname(log_file)) + + filehandler = logging.handlers.RotatingFileHandler(log_file, maxBytes=102400000) + # create formatter + formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s') + filehandler.setFormatter(formatter) + filehandler.setLevel(logging.DEBUG) + self._log.addHandler(filehandler) + return True + +@click.command() +@click.option("--debug-level", "-d", default="INFO", + type=click.Choice( + ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"], + case_sensitive=False, + ), help='Set the debug level for the standard output.') +@click.option('--log-file', '-l', help="File to store all debug messages.") +@click.option("--dummy","-n", is_flag=True, help="Don't do anything, just show what would be done.") # Don't forget to add dummy to parameters of main function +@click.option('--first-directory', '-f', required=True, help='First directory to find files AND TO DELETE FILES FROM!!!') +@click.option('--second-directory', '-s', required=True, help='Second directory to find files') +@click_config_file.configuration_option() +def __main__(debug_level, log_file, dummy, first_directory, second_directory): + return find_duplicate_files(debug_level, log_file, dummy, first_directory, second_directory) + +if __name__ == "__main__": + __main__() + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4e87b97 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project.urls] +Homepage = "" + +[project] +name = "find_duplicate_files" +version = "0.0.1" +description = "Given two directories, find files in first directory that are present in the second by checking hashes" +readme = "README.md" +authors = [{ name = "Antonio J. Delgado", email = "" }] +license = { file = "LICENSE" } +classifiers = [ + "License :: OSI Approved :: GPLv3 License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", +] +#keywords = ["vCard", "contacts", "duplicates"] +dependencies = [ + "click", + "click_config_file", +] +requires-python = ">=3" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..66bf966 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +click +click_config_file \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..3cb3e84 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,9 @@ +[metadata] +name = find_duplicate_files +version = 0.0.1 + +[options] +packages = find_duplicate_files +install_requires = + requests + importlib; python_version == "3.10" diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..5b7c88c --- /dev/null +++ b/setup.py @@ -0,0 +1,14 @@ +import setuptools +setuptools.setup( + scripts=['find_duplicate_files/find_duplicate_files.py'], + author="Antonio J. Delgado", + version='0.0.1', + name='find_duplicate_files', + author_email="", + url="", + description="Given two directories, find files in first directory that are present in the second by checking hashes", + long_description="README.md", + long_description_content_type="text/markdown", + license="GPLv3", + #keywords=["my", "script", "does", "things"] +)