Improve URL matching

This commit is contained in:
Antonio J. Delgado 2024-11-28 20:24:39 +02:00
parent 28102ef5be
commit b154a88ad6

View file

@ -294,10 +294,7 @@ class NextcloudHandler:
"message": "Fernet token for passwords local cache is invalid, discarding the local cache.",
}
)
self.cache = {
"last_update": -1,
"cached_passwords": []
}
self.reset_cache()
else:
self.debug(
@ -306,10 +303,7 @@ class NextcloudHandler:
"message": "The cache file was empty, so initializing cache"
}
)
self.cache = {
"last_update": -1,
"cached_passwords": []
}
self.reset_cache()
else:
self.debug(
{
@ -317,11 +311,13 @@ class NextcloudHandler:
"message": "There wasn't a cache file, so initializing cache"
}
)
self.cache = {
"last_update": -1,
"cached_passwords": []
}
self.reset_cache()
def reset_cache(self):
self.cache = {
"last_update": -1,
"cached_passwords": []
}
def _write_cache(self):
self.debug(
{
@ -975,7 +971,7 @@ class NextcloudHandler:
"action": "delete_password",
"object": min_obj,
"message": f"Nextcloud instance returned status code {r.status_code}",
"returned_content": r.content,
"returned_content": f"{r.content}",
}
)
except requests.exceptions.ReadTimeout as error:
@ -983,7 +979,7 @@ class NextcloudHandler:
{
"action": "delete_password",
"message": f"Timeout ({self.timeout} sec) error doing GET request",
"error": error,
"error": f"{error}",
}
)
return False
@ -1115,7 +1111,41 @@ class NextcloudHandler:
result['path'] = match.group(3)
split_hostname = result['hostname'].split('.')
result['tld'] = split_hostname[len(split_hostname)-1]
result['domain'] = f"{split_hostname[len(split_hostname)-1]}.{split_hostname[len(split_hostname)-2]}"
result['domain'] = f"{split_hostname[len(split_hostname)-2]}.{result['tld']}"
return result
match = re.search('([a-z]*)://([^/]*)/$', url)
if match:
result['protocol'] = match.group(1)
result['hostname'] = match.group(2)
result['path'] = ''
split_hostname = result['hostname'].split('.')
result['tld'] = split_hostname[len(split_hostname)-1]
result['domain'] = f"{split_hostname[len(split_hostname)-2]}.{result['tld']}"
return result
match = re.search('([a-z]*)://([^/]*)$', url)
if match:
result['protocol'] = match.group(1)
result['hostname'] = match.group(2)
result['path'] = ''
split_hostname = result['hostname'].split('.')
result['tld'] = split_hostname[len(split_hostname)-1]
result['domain'] = f"{split_hostname[len(split_hostname)-2]}.{result['tld']}"
return result
match = re.search('([^/]*)$', url)
if match:
result['protocol'] = ''
result['hostname'] = match.group(1)
result['path'] = ''
split_hostname = result['hostname'].split('.')
result['tld'] = split_hostname[len(split_hostname)-1]
result['domain'] = f"{split_hostname[len(split_hostname)-2]}.{result['tld']}"
return result
self.debug(
{
"action": "_split_url",
"message": f"URL '{url}' can't be split, no match for our regular expressions"
}
)
return result
def get_folder_id(self, folder_name):
@ -1406,6 +1436,7 @@ class NcPasswordClient:
def remove_duplicates(self, limit, comparation_fields):
'''Remove duplicate passwords'''
self.nc.comparation_fields = comparation_fields
self.nc.reset_cache()
checked_passwords = []
count = 0
if limit == 0: