keep track of proxies failures

This commit is contained in:
Antonio J. Delgado 2025-07-21 08:38:59 +03:00
parent a68b241731
commit f431b391a7

View file

@ -8,10 +8,12 @@
import sys
import os
import re
import time
import stat
import json
import logging
from logging.handlers import SysLogHandler
import yaml
from yaml import dump
try:
from yaml import CDumper as Dumper
@ -23,6 +25,14 @@ import requests
import feedparser
import yt_dlp
HOME_FOLDER = os.environ.get('HOME', os.environ.get('USERPROFILE', '/'))
if HOME_FOLDER == '/':
CACHE_FOLDER = '/var/cache'
LOG_FOLDER = '/var/log/'
else:
CACHE_FOLDER = f"{HOME_FOLDER}/.local/"
LOG_FOLDER = f"{HOME_FOLDER}/log/"
class GetYoutubeVideos:
'''Get YouTube videos from a series of channels'''
@ -49,6 +59,13 @@ class GetYoutubeVideos:
'get_youtube_videos.log'
)
self._init_log()
self._default_data = {
"last_update": 0,
"proxies": {},
}
self.data = self._read_cached_data()
self.summary = {
'config': self.config,
'entries_count': 0,
@ -62,9 +79,16 @@ class GetYoutubeVideos:
'downloaded_videos_titles': [],
}
if len(self.config['proxy']) > 0:
self.selected_proxy = self.config['proxy'][0]
self.proxies = self.config['proxy']
for proxy in self.config['proxy']:
if proxy not in self._default_data['proxies']:
self._default_data['proxies'][proxy]['failures'] = 0
less_failures = -1
for proxy in self._default_data['proxies']:
if less_failures == -1 or self._default_data['proxies'][proxy]['failures'] < less_failures:
less_failures = self._default_data['proxies'][proxy]['failures']
self.selected_proxy = proxy
else:
self._default_data['proxies'] = {}
self.selected_proxy = ''
self.proxy_index = 0
if os.path.exists(self.config['downloaded_database']):
@ -84,6 +108,60 @@ class GetYoutubeVideos:
)
self._save_metrics(self.summary)
def close(self):
'''Close class and save data'''
self._save_cached_data(self.data)
def _read_cached_data(self):
if os.path.exists(self.config['cache_file']):
with open(self.config['cache_file'], 'r', encoding='utf-8') as cache_file:
try:
cached_data = json.load(cache_file)
if (
'last_update' in cached_data and
cached_data['last_update'] + self.config['max_cache_age'] > time.time()
):
cached_data = self._default_data
except json.decoder.JSONDecodeError:
cached_data = self._default_data
return cached_data
else:
return self._default_data
def _save_cached_data(self, data):
data['last_update'] = time.time()
with open(self.config['cache_file'], 'w', encoding='utf-8') as cache_file:
json.dump(data, cache_file, indent=2)
self._debug(
f"Saved cached data in '{self.config['cache_file']}'",
)
def _output(self, message):
if self.config['output_format'] == 'JSON':
return json.dumps(message, indent=2)
elif self.config['output_format'] == 'YAML':
return yaml.dump(message, Dumper=yaml.Dumper)
elif self.config['output_format'] == 'PLAIN':
return f"{message}"
else:
self._log.warning(
"Output format '%s' not supported",
self.config['output_format']
)
return message
def _info(self, message):
return self._log.info(self._output(message))
def _warning(self, message):
return self._log.warning(self._output(message))
def _error(self, message):
return self._log.error(self._output(message))
def _debug(self, message):
return self._log.debug(self._output(message))
def _is_numeric(self, variable):
if isinstance(variable, (int, float, complex)):
return True
@ -108,28 +186,21 @@ class GetYoutubeVideos:
os.chmod('/var/lib/prometheus/node-exporter/get_youtube_videos.prom', stat.S_IROTH)
def _change_proxy(self, video_id):
if len(self.proxies) > 0:
if isinstance(self.proxies, list):
self._log.debug(
"Removing failing proxy '%s'",
self.selected_proxy
)
self.proxies.pop(self.proxy_index)
if self.proxy_index<len(self.proxies)-1:
self.selected_proxy = self.proxies[self.proxy_index]
self.proxy_index += 1
self._log.warning(
"Got an error fetching video information. Setting proxy to '%s'...",
self.selected_proxy
)
return self._get_video_info(video_id)
less_failures = -1
for proxy in self._default_data['proxies']:
if less_failures == -1 or self._default_data['proxies'][proxy]['failures'] < less_failures:
less_failures = self._default_data['proxies'][proxy['failures']]
self.selected_proxy = proxy
if less_failures == -1:
self.selected_proxy = list(self._default_data['proxies'].keys())[0]
self._log.debug(
"No more proxies to try. Resetting list."
"All proxies have the same number of errors, using first proxy of the list."
)
self.proxy_index = 0
self.proxies = self.config['proxy']
self.selected_proxy = self.proxies[0]
return None
self._log.warning(
"Got an error fetching video information. Setting proxy to '%s'...",
self.selected_proxy
)
return self._get_video_info(video_id)
def _get_video_info(self, video_id):
uri=f"https://www.youtube.com/watch?v={video_id}"
@ -524,9 +595,22 @@ class GetYoutubeVideos:
default=False,
help='Skip live videos'
)
@click.option(
'--cache-file',
'-f',
default=f"{CACHE_FOLDER}/get_youtube_videos.json",
help='Cache file to store data from each run',
)
@click.option(
'--max-cache-age',
'-a',
default=60*60*24*7,
help='Max age in seconds for the cache'
)
@click_config_file.configuration_option()
def __main__(**kwargs):
return GetYoutubeVideos(**kwargs)
obj = GetYoutubeVideos(**kwargs)
obj.close()
if __name__ == "__main__":
__main__()