Ignore unhealthy hosts for deciding which mirrors are the newest
authorTollef Fog Heen <tfheen@err.no>
Wed, 13 Dec 2017 19:46:36 +0000 (20:46 +0100)
committerTollef Fog Heen <tfheen@err.no>
Wed, 13 Dec 2017 19:46:36 +0000 (20:46 +0100)
This prevents the case we saw in #877966 where bad timing of a mirror
push led to an outage.  The disadvantage is that time might be moving
backwards instead, but giving out older packages (or dists/) is better
than giving out no files at all.

modules/roles/files/mirror_health/mirror-health
modules/roles/manifests/debian_mirror.pp
modules/roles/manifests/mirror_health.pp
modules/roles/templates/mirror-health.service.erb

index 5770366..c3606de 100755 (executable)
@@ -13,10 +13,9 @@ HOSTS = os.environ['MIRROR_CHECK_HOSTS'].split()
 OUTPUT_DIR = "/run/dsa-mirror-health-{}".format(os.environ['MIRROR_CHECK_SERVICE'])
 HEALTH_FILE = os.path.join(OUTPUT_DIR, "health")
 URL = os.environ['MIRROR_CHECK_URL']
+HEALTH_CHECK_URL = os.environ['MIRROR_CHECK_HEALTH_URL']
 INTERVAL = int(os.environ.get('MIRROR_CHECK_INTERVAL', '60'))
 
-latest_ts = 0
-
 def retrieve_from_host(host, url):
     proxies = {
         'http': 'http://{}:80'.format(host),
@@ -30,6 +29,11 @@ def last_modified(response):
         lm = calendar.timegm(parsedate(response.headers['last-modified']))
     return lm
 
+def healthy(response):
+    if response.status_code == 200:
+        return True
+    return False
+
 def check_shutdown():
     if subprocess.call(['dsa-is-shutdown-scheduled']) == 0:
         logging.info("considering myself unhealthy, shutdown scheduled")
@@ -37,12 +41,13 @@ def check_shutdown():
     return True
 
 def check_uptodate():
-    global latest_ts
+    latest_ts = 0
     for host in HOSTS:
         try:
             lm = last_modified(retrieve_from_host(host, URL))
             logging.debug("lm for host %s: %s", host, lm)
-            latest_ts = max(latest_ts, lm)
+            if healthy(retrieve_from_host(host, HEALTH_CHECK_URL)):
+                latest_ts = max(latest_ts, lm)
         except (requests.exceptions.ProxyError, requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout):
             pass
     try:
index 2989b34..e790c88 100644 (file)
@@ -44,5 +44,6 @@ class roles::debian_mirror {
                check_hosts   => ['accumu.debian.backend.mirrors.debian.org', 'bytemark.debian.backend.mirrors.debian.org', 'conova.debian.backend.mirrors.debian.org', 'skroutz.debian.backend.mirrors.debian.org'],
                check_service => 'ftp',
                url           => 'http://debian.backend.mirrors.debian.org/debian/dists/sid/Release',
+               health_url    => 'http://debian.backend.mirrors.debian.org/_health',
         }
 }
index e390833..ec1102d 100644 (file)
@@ -2,6 +2,7 @@ define roles::mirror_health (
        $check_hosts    = [],
        $check_service  = '',
        $url            = '',
+       $health_url     = '',
        $check_interval = 60,
 ) {
        package { 'python3-requests':
index bf77440..60d9dcd 100644 (file)
@@ -18,6 +18,7 @@ Restart=always
 
 Environment="MIRROR_CHECK_SERVICE=<%= @check_service %>"
 Environment="MIRROR_CHECK_URL=<%= @url %>"
+Environment="MIRROR_CHECK_HEALTH_URL=<%= @health_url %>"
 Environment="MIRROR_CHECK_HOSTS=<%= @check_hosts.join ' ' %>"
 Environment="MIRROR_CHECK_INTERVAL=<%= @check_interval %>"