From: Tollef Fog Heen Date: Wed, 13 Dec 2017 19:46:36 +0000 (+0100) Subject: Ignore unhealthy hosts for deciding which mirrors are the newest X-Git-Url: https://git.adam-barratt.org.uk/?a=commitdiff_plain;h=fe910feb549fda02fe25bc7c90d185d14c589ffd;p=mirror%2Fdsa-puppet.git Ignore unhealthy hosts for deciding which mirrors are the newest This prevents the case we saw in #877966 where bad timing of a mirror push led to an outage. The disadvantage is that time might be moving backwards instead, but giving out older packages (or dists/) is better than giving out no files at all. --- diff --git a/modules/roles/files/mirror_health/mirror-health b/modules/roles/files/mirror_health/mirror-health index 57703668e..c3606de00 100755 --- a/modules/roles/files/mirror_health/mirror-health +++ b/modules/roles/files/mirror_health/mirror-health @@ -13,10 +13,9 @@ HOSTS = os.environ['MIRROR_CHECK_HOSTS'].split() OUTPUT_DIR = "/run/dsa-mirror-health-{}".format(os.environ['MIRROR_CHECK_SERVICE']) HEALTH_FILE = os.path.join(OUTPUT_DIR, "health") URL = os.environ['MIRROR_CHECK_URL'] +HEALTH_CHECK_URL = os.environ['MIRROR_CHECK_HEALTH_URL'] INTERVAL = int(os.environ.get('MIRROR_CHECK_INTERVAL', '60')) -latest_ts = 0 - def retrieve_from_host(host, url): proxies = { 'http': 'http://{}:80'.format(host), @@ -30,6 +29,11 @@ def last_modified(response): lm = calendar.timegm(parsedate(response.headers['last-modified'])) return lm +def healthy(response): + if response.status_code == 200: + return True + return False + def check_shutdown(): if subprocess.call(['dsa-is-shutdown-scheduled']) == 0: logging.info("considering myself unhealthy, shutdown scheduled") @@ -37,12 +41,13 @@ def check_shutdown(): return True def check_uptodate(): - global latest_ts + latest_ts = 0 for host in HOSTS: try: lm = last_modified(retrieve_from_host(host, URL)) logging.debug("lm for host %s: %s", host, lm) - latest_ts = max(latest_ts, lm) + if healthy(retrieve_from_host(host, HEALTH_CHECK_URL)): + latest_ts = max(latest_ts, lm) except (requests.exceptions.ProxyError, requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout): pass try: diff --git a/modules/roles/manifests/debian_mirror.pp b/modules/roles/manifests/debian_mirror.pp index 2989b3425..e790c8826 100644 --- a/modules/roles/manifests/debian_mirror.pp +++ b/modules/roles/manifests/debian_mirror.pp @@ -44,5 +44,6 @@ class roles::debian_mirror { check_hosts => ['accumu.debian.backend.mirrors.debian.org', 'bytemark.debian.backend.mirrors.debian.org', 'conova.debian.backend.mirrors.debian.org', 'skroutz.debian.backend.mirrors.debian.org'], check_service => 'ftp', url => 'http://debian.backend.mirrors.debian.org/debian/dists/sid/Release', + health_url => 'http://debian.backend.mirrors.debian.org/_health', } } diff --git a/modules/roles/manifests/mirror_health.pp b/modules/roles/manifests/mirror_health.pp index e390833fc..ec1102d5d 100644 --- a/modules/roles/manifests/mirror_health.pp +++ b/modules/roles/manifests/mirror_health.pp @@ -2,6 +2,7 @@ define roles::mirror_health ( $check_hosts = [], $check_service = '', $url = '', + $health_url = '', $check_interval = 60, ) { package { 'python3-requests': diff --git a/modules/roles/templates/mirror-health.service.erb b/modules/roles/templates/mirror-health.service.erb index bf774406d..60d9dcd72 100644 --- a/modules/roles/templates/mirror-health.service.erb +++ b/modules/roles/templates/mirror-health.service.erb @@ -18,6 +18,7 @@ Restart=always Environment="MIRROR_CHECK_SERVICE=<%= @check_service %>" Environment="MIRROR_CHECK_URL=<%= @url %>" +Environment="MIRROR_CHECK_HEALTH_URL=<%= @health_url %>" Environment="MIRROR_CHECK_HOSTS=<%= @check_hosts.join ' ' %>" Environment="MIRROR_CHECK_INTERVAL=<%= @check_interval %>"