Initial commit

This commit is contained in:
Antonio J. Delgado 2023-01-25 14:26:45 +02:00
commit aa503a4234
9 changed files with 311 additions and 0 deletions

142
.gitignore vendored Normal file
View file

@ -0,0 +1,142 @@
Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# Configuration files
*.conf
*.ini

0
LICENSE Normal file
View file

23
README.md Normal file
View file

@ -0,0 +1,23 @@
# find_duplicate_files
## Requirements
## Installation
### Linux
```bash
sudo python3 setup.py install
```
### Windows (from PowerShell)
```powershell
& $(where.exe python).split()[0] setup.py install
```
## Usage
```bash
find_duplicate_files.py [--debug-level|-d CRITICAL|ERROR|WARNING|INFO|DEBUG|NOTSET] # Other parameters
```

View file

View file

@ -0,0 +1,96 @@
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
#
# This script is licensed under GNU GPL version 2.0 or above
# (c) 2023 Antonio J. Delgado
# Given two directories, find files in first directory that are present in the second by checking hashes
import sys
import os
import logging
import click
import click_config_file
from logging.handlers import SysLogHandler
class find_duplicate_files:
def __init__(self, debug_level, log_file, dummy, first_directory, second_directory):
''' Initial function called when object is created '''
self.config = dict()
self.config['debug_level'] = debug_level
if log_file is None:
log_file = os.path.join(os.environ.get('HOME', os.environ.get('USERPROFILE', os.getcwd())), 'log', 'find_duplicate_files.log')
self.config['log_file'] = log_file
self._init_log()
self.dummy = dummy
self.first_directory = first_directory
self.second_directory = second_directory
first_files = self._find_files(self.first_directory)
second_files = self._find_files(self.second_directory)
def _find_files(self, directory, hidden=False):
if os.path.exists(directory):
files = list()
with os.scandir(directory) as it:
for entry in it:
if not entry.name.startswith('.') and entry.is_file():
file = {
'file': entry.name
}
files.append(file)
self._log.debug(f"Found {len(files)} in '{directory}'")
return files
else:
self._log.error(f"Given path '{directory}' doesn't exist.")
sys.exit(1)
def _init_log(self):
''' Initialize log object '''
self._log = logging.getLogger("find_duplicate_files")
self._log.setLevel(logging.DEBUG)
sysloghandler = SysLogHandler()
sysloghandler.setLevel(logging.DEBUG)
self._log.addHandler(sysloghandler)
streamhandler = logging.StreamHandler(sys.stdout)
streamhandler.setLevel(logging.getLevelName(self.config.get("debug_level", 'INFO')))
self._log.addHandler(streamhandler)
if 'log_file' in self.config:
log_file = self.config['log_file']
else:
home_folder = os.environ.get('HOME', os.environ.get('USERPROFILE', ''))
log_folder = os.path.join(home_folder, "log")
log_file = os.path.join(log_folder, "find_duplicate_files.log")
if not os.path.exists(os.path.dirname(log_file)):
os.mkdir(os.path.dirname(log_file))
filehandler = logging.handlers.RotatingFileHandler(log_file, maxBytes=102400000)
# create formatter
formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
filehandler.setFormatter(formatter)
filehandler.setLevel(logging.DEBUG)
self._log.addHandler(filehandler)
return True
@click.command()
@click.option("--debug-level", "-d", default="INFO",
type=click.Choice(
["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"],
case_sensitive=False,
), help='Set the debug level for the standard output.')
@click.option('--log-file', '-l', help="File to store all debug messages.")
@click.option("--dummy","-n", is_flag=True, help="Don't do anything, just show what would be done.") # Don't forget to add dummy to parameters of main function
@click.option('--first-directory', '-f', required=True, help='First directory to find files AND TO DELETE FILES FROM!!!')
@click.option('--second-directory', '-s', required=True, help='Second directory to find files')
@click_config_file.configuration_option()
def __main__(debug_level, log_file, dummy, first_directory, second_directory):
return find_duplicate_files(debug_level, log_file, dummy, first_directory, second_directory)
if __name__ == "__main__":
__main__()

25
pyproject.toml Normal file
View file

@ -0,0 +1,25 @@
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[project.urls]
Homepage = ""
[project]
name = "find_duplicate_files"
version = "0.0.1"
description = "Given two directories, find files in first directory that are present in the second by checking hashes"
readme = "README.md"
authors = [{ name = "Antonio J. Delgado", email = "" }]
license = { file = "LICENSE" }
classifiers = [
"License :: OSI Approved :: GPLv3 License",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
]
#keywords = ["vCard", "contacts", "duplicates"]
dependencies = [
"click",
"click_config_file",
]
requires-python = ">=3"

2
requirements.txt Normal file
View file

@ -0,0 +1,2 @@
click
click_config_file

9
setup.cfg Normal file
View file

@ -0,0 +1,9 @@
[metadata]
name = find_duplicate_files
version = 0.0.1
[options]
packages = find_duplicate_files
install_requires =
requests
importlib; python_version == "3.10"

14
setup.py Normal file
View file

@ -0,0 +1,14 @@
import setuptools
setuptools.setup(
scripts=['find_duplicate_files/find_duplicate_files.py'],
author="Antonio J. Delgado",
version='0.0.1',
name='find_duplicate_files',
author_email="",
url="",
description="Given two directories, find files in first directory that are present in the second by checking hashes",
long_description="README.md",
long_description_content_type="text/markdown",
license="GPLv3",
#keywords=["my", "script", "does", "things"]
)