Commit Diff
Diff:
647dd12839d017bd5b1d45621b99ca4b24271723
8050d2d1fc34a2d249e22caf9f2580faa4e3d99d
Commit:
8050d2d1fc34a2d249e22caf9f2580faa4e3d99d
Tree:
1831f7a6bf18c612b302613540a0cbe0c0721215
Author:
pjp <pjp@delphinusdns.org>
Committer:
pjp <pjp@delphinusdns.org>
Date:
Wed Nov 6 08:45:57 2019 UTC
Message:
when we schedule a restart, every delphinusdnsd replicant server must restart at a different time in order to guarantee service at all times. I have replaced the 100 seconds reboot time to 80 seconds plus 0-39 seconds random wait time. A reboot is usually pretty quick depending on the zone file size. In the worst case scenario there will be a reboot at the same time, in that case some service delay is gonna happen. Luckily if no servers answer in DNS the other servers are tried and I don't know if recursives retry reaching a server.
blob - 792efada654eb658e6b4056338158753151d6805
blob + 268130c0a6cc4e1ef5ae3943f4b864794f274843
--- raxfr.c
+++ raxfr.c
@@ -26,7 +26,7 @@
*
*/
/*
- * $Id: raxfr.c,v 1.30 2019/11/05 08:28:23 pjp Exp $
+ * $Id: raxfr.c,v 1.31 2019/11/06 08:45:57 pjp Exp $
*/
#include <sys/types.h>
@@ -121,6 +121,7 @@ static void schedule_refresh(char *, time_t);
static void schedule_retry(char *, time_t);
static void schedule_restart(char *, time_t);
static void schedule_delete(struct myschedule *);
+static int rand_restarttime(void);
int64_t get_remote_soa(struct rzone *rzone);
int do_raxfr(FILE *, struct rzone *);
int pull_rzone(struct rzone *, time_t, int);
@@ -1415,13 +1416,7 @@ replicantloop(ddDB *db, struct imsgbuf *ibuf, struct i
if (pull_rzone(lrz, now, 0) < 0) {
dolog(LOG_INFO, "AXFR failed\n");
} else {
- schedule_restart(lrz->zonename, now + 100);
- /*
- * we've scheduled a restart and there may be more
- * AXFR's to do we only have a window of 100 seconds
- * so we select for 5000 microseconds only, so that
- * other tasks can still complete.
- */
+ schedule_restart(lrz->zonename, now + rand_restarttime());
endspurt = 1;
}
} /* else serial ... */
@@ -1472,14 +1467,8 @@ replicantloop(ddDB *db, struct imsgbuf *ibuf, struct i
}
/* schedule restart */
- schedule_restart(lrz->zonename, now + 100);
- /*
- * we've scheduled a restart and there may be more
- * AXFR's to do we only have a window of 100 seconds
- * so we select for 5000 microseconds only, so that
- * other tasks can still complete.
- */
- endspurt = 1;
+ schedule_restart(lrz->zonename, now + rand_restarttime());
+ endspurt = 1;
} else {
schedule_refresh(lrz->zonename, now + lrz->soa.refresh);
}
@@ -1513,14 +1502,7 @@ replicantloop(ddDB *db, struct imsgbuf *ibuf, struct i
}
/* schedule restart */
- schedule_restart(lrz->zonename, now + 100);
- /*
- * we've scheduled a restart and there may be more
- * AXFR's to do we only have a window of 100 seconds
- * so we select for 5000 microseconds only, so that
- * other tasks can still complete.
- */
-
+ schedule_restart(lrz->zonename, now + rand_restarttime());
endspurt = 1;
} else {
schedule_refresh(lrz->zonename, now + lrz->soa.refresh);
@@ -2187,4 +2169,14 @@ pull_rzone(struct rzone *rzone, time_t now, int dosche
unlink(p);
return 0;
+}
+
+/*
+ * restarttime is 80 seconds plus a random interval between 0 and 39
+ */
+
+static int
+rand_restarttime(void)
+{
+ return (80 + (arc4random() % 40));
}
repomaster@centroid.eu