Add initial script
This commit is contained in:
parent
54d285054d
commit
ee8f3f6081
1 changed files with 47 additions and 0 deletions
|
@ -9,6 +9,7 @@ import sys
|
|||
import os
|
||||
import json
|
||||
import time
|
||||
import re
|
||||
import logging
|
||||
from logging.handlers import SysLogHandler
|
||||
import click
|
||||
|
@ -48,6 +49,38 @@ class ExtractWords:
|
|||
}
|
||||
self.data = self._read_cached_data()
|
||||
|
||||
self._debug(
|
||||
f"Reading file '{self.config['input_file'].name}'..."
|
||||
)
|
||||
content = self.config['input_file'].read()
|
||||
self._debug(
|
||||
"Unsplitting separated words..."
|
||||
)
|
||||
content = re.sub('- ?\n', '', content)
|
||||
content = content.lower()
|
||||
self._debug(
|
||||
f"Splitting file of {len(content)} bytes..."
|
||||
)
|
||||
words = content.split()
|
||||
self._debug(
|
||||
f"Found {len(words)} a total (non-unique) words"
|
||||
)
|
||||
all_words = []
|
||||
for word in words:
|
||||
match = re.match(r'\w*', word)
|
||||
if match:
|
||||
if match.group(0) != '':
|
||||
match_numbers = re.match(r'[0-9]', match.group(0))
|
||||
if not match_numbers:
|
||||
if match.group(0) not in all_words:
|
||||
all_words.append(match.group(0))
|
||||
self._debug(
|
||||
f"A total of {len(all_words)} unique words"
|
||||
)
|
||||
all_words.sort()
|
||||
self.config['output_file'].write('\n'.join(all_words))
|
||||
|
||||
|
||||
def close(self):
|
||||
'''Close class and save data'''
|
||||
self._save_cached_data(self.data)
|
||||
|
@ -181,6 +214,20 @@ class ExtractWords:
|
|||
default=60*60*24*7,
|
||||
help='Max age in seconds for the cache'
|
||||
)
|
||||
@click.option(
|
||||
'--input-file',
|
||||
'-i',
|
||||
required=True,
|
||||
type=click.File('r'),
|
||||
help='File containing words mixed with other things (like a book) in plain text',
|
||||
)
|
||||
@click.option(
|
||||
'--output-file',
|
||||
'-O',
|
||||
required=True,
|
||||
type=click.File('w'),
|
||||
help='File to write the words',
|
||||
)
|
||||
# @click.option("--dummy","-n", is_flag=True,
|
||||
# help="Don't do anything, just show what would be done.")
|
||||
@click_config_file.configuration_option()
|
||||
|
|
Loading…
Reference in a new issue