Improve URL matching

2024-11-28 20:24:39 +02:00 · 2024-11-28 20:24:39 +02:00 · b154a88ad6
commit b154a88ad6
parent 28102ef5be
1 changed files with 46 additions and 15 deletions
--- a/nc_password_client/nc_password_client.py
+++ b/nc_password_client/nc_password_client.py
@ -294,10 +294,7 @@ class NextcloudHandler:
                                "message": "Fernet token for passwords local cache is invalid, discarding the local cache.",
                            }
                        )
-                        self.cache = {
-                            "last_update": -1,
-                            "cached_passwords": []
-                        }
+                        self.reset_cache()

                else:
                    self.debug(
@ -306,10 +303,7 @@ class NextcloudHandler:
                            "message": "The cache file was empty, so initializing cache"
                        }
                    )
-                    self.cache = {
-                        "last_update": -1,
-                        "cached_passwords": []
-                    }
+                    self.reset_cache()
        else:
            self.debug(
                {
@ -317,11 +311,13 @@ class NextcloudHandler:
                    "message": "There wasn't a cache file, so initializing cache"
                }
            )
-            self.cache = {
-                "last_update": -1,
-                "cached_passwords": []
-            }
+            self.reset_cache()

+    def reset_cache(self):
+        self.cache = {
+            "last_update": -1,
+            "cached_passwords": []
+        }
    def _write_cache(self):
        self.debug(
                {
@ -975,7 +971,7 @@ class NextcloudHandler:
                    "action": "delete_password",
                    "object": min_obj,
                    "message": f"Nextcloud instance returned status code {r.status_code}",
-                    "returned_content": r.content,
+                    "returned_content": f"{r.content}",
                }
            )
        except requests.exceptions.ReadTimeout as error:
@ -983,7 +979,7 @@ class NextcloudHandler:
                {
                    "action": "delete_password",
                    "message": f"Timeout ({self.timeout} sec) error doing GET request",
-                    "error": error,
+                    "error": f"{error}",
                }
            )
        return False
@ -1115,7 +1111,41 @@ class NextcloudHandler:
            result['path'] = match.group(3)
            split_hostname = result['hostname'].split('.')
            result['tld'] = split_hostname[len(split_hostname)-1]
-            result['domain'] = f"{split_hostname[len(split_hostname)-1]}.{split_hostname[len(split_hostname)-2]}"
+            result['domain'] = f"{split_hostname[len(split_hostname)-2]}.{result['tld']}"
+            return result
+        match = re.search('([a-z]*)://([^/]*)/$', url)
+        if match:
+            result['protocol'] = match.group(1)
+            result['hostname'] = match.group(2)
+            result['path'] = ''
+            split_hostname = result['hostname'].split('.')
+            result['tld'] = split_hostname[len(split_hostname)-1]
+            result['domain'] = f"{split_hostname[len(split_hostname)-2]}.{result['tld']}"
+            return result
+        match = re.search('([a-z]*)://([^/]*)$', url)
+        if match:
+            result['protocol'] = match.group(1)
+            result['hostname'] = match.group(2)
+            result['path'] = ''
+            split_hostname = result['hostname'].split('.')
+            result['tld'] = split_hostname[len(split_hostname)-1]
+            result['domain'] = f"{split_hostname[len(split_hostname)-2]}.{result['tld']}"
+            return result
+        match = re.search('([^/]*)$', url)
+        if match:
+            result['protocol'] = ''
+            result['hostname'] = match.group(1)
+            result['path'] = ''
+            split_hostname = result['hostname'].split('.')
+            result['tld'] = split_hostname[len(split_hostname)-1]
+            result['domain'] = f"{split_hostname[len(split_hostname)-2]}.{result['tld']}"
+            return result
+        self.debug(
+            {
+                "action": "_split_url",
+                "message": f"URL '{url}' can't be split, no match for our regular expressions"
+            }
+        )
        return result

    def get_folder_id(self, folder_name):
@ -1406,6 +1436,7 @@ class NcPasswordClient:
    def remove_duplicates(self, limit, comparation_fields):
        '''Remove duplicate passwords'''
        self.nc.comparation_fields = comparation_fields
+        self.nc.reset_cache()
        checked_passwords = []
        count = 0
        if limit == 0: