Mailing List Archive

[6.0] b3344f26c Govern thread creation by queue length instead of dry signals
commit b3344f26ce49a700220be9aeca1afc30df174964
Author: Martin Blix Grydeland <martin@varnish-software.com>
Date: Wed Mar 13 14:14:44 2019 +0100

Govern thread creation by queue length instead of dry signals

When getidleworker signals the pool herder to spawn a thread, it increases
the dry counter, and the herder resets dry again when spawning a single
thread. This will in many cases only create a single thread even though
the herder was signaled dry multiple times, and may cause a situation
where the cache acceptor is queued and no new thread is created. Together
with long lasting tasks (ie endless pipelines), and all other tasks having
higher priority, this will prevent the cache acceptor from being
rescheduled.

c00096.vtc demonstrates how this can lock up.

To fix this, spawn threads if we have queued tasks and we are below the
thread maximum level.

Conflicts:
bin/varnishd/cache/cache_wrk.c

diff --git a/bin/varnishd/cache/cache_pool.h b/bin/varnishd/cache/cache_pool.h
index 9902e32ad..4e668fa0b 100644
--- a/bin/varnishd/cache/cache_pool.h
+++ b/bin/varnishd/cache/cache_pool.h
@@ -48,7 +48,6 @@ struct pool {
struct taskhead idle_queue;
struct taskhead queues[TASK_QUEUE_END];
unsigned nthr;
- unsigned dry;
unsigned lqueue;
uintmax_t sdropped;
uintmax_t rdropped;
diff --git a/bin/varnishd/cache/cache_wrk.c b/bin/varnishd/cache/cache_wrk.c
index fa3fa366a..044e140c4 100644
--- a/bin/varnishd/cache/cache_wrk.c
+++ b/bin/varnishd/cache/cache_wrk.c
@@ -194,13 +194,8 @@ pool_getidleworker(struct pool *pp, enum task_prio prio)
AZ(pp->nidle);
}

- if (pt == NULL) {
- if (pp->nthr < cache_param->wthread_max) {
- pp->dry++;
- AZ(pthread_cond_signal(&pp->herder_cond));
- }
+ if (pt == NULL)
return (NULL);
- }
AZ(pt->func);
CAST_OBJ_NOTNULL(wrk, pt->priv, WORKER_MAGIC);
return (wrk);
@@ -303,6 +298,7 @@ Pool_Task(struct pool *pp, struct pool_task *task, enum task_prio prio)
pp->nqueued++;
pp->lqueue++;
VTAILQ_INSERT_TAIL(&pp->queues[prio], task, list);
+ AZ(pthread_cond_signal(&pp->herder_cond));
} else {
if (prio == TASK_QUEUE_REQ)
pp->sdropped++;
@@ -473,7 +469,6 @@ pool_breed(struct pool *qp)
Lck_Unlock(&pool_mtx);
VTIM_sleep(cache_param->wthread_fail_delay);
} else {
- qp->dry = 0;
qp->nthr++;
Lck_Lock(&pool_mtx);
VSC_C_main->threads++;
@@ -549,7 +544,7 @@ pool_herder(void *priv)

/* Make more threads if needed and allowed */
if (pp->nthr < wthread_min ||
- (pp->dry && pp->nthr < cache_param->wthread_max)) {
+ (pp->lqueue > 0 && pp->nthr < cache_param->wthread_max)) {
pool_breed(pp);
continue;
}
@@ -610,16 +605,14 @@ pool_herder(void *priv)
continue;
}
Lck_Lock(&pp->mtx);
- if (!pp->dry) {
+ if (pp->lqueue == 0) {
if (DO_DEBUG(DBG_VTC_MODE))
delay = 0.5;
(void)Lck_CondWait(&pp->herder_cond, &pp->mtx,
VTIM_real() + delay);
- } else {
+ } else
/* XXX: unsafe counters */
VSC_C_main->threads_limited++;
- pp->dry = 0;
- }
Lck_Unlock(&pp->mtx);
}
return (NULL);
diff --git a/bin/varnishtest/tests/c00096.vtc b/bin/varnishtest/tests/c00096.vtc
new file mode 100644
index 000000000..cf80a0f53
--- /dev/null
+++ b/bin/varnishtest/tests/c00096.vtc
@@ -0,0 +1,106 @@
+varnishtest "Test thread creation on acceptor thread queuing"
+
+# This tests that we are able to spawn new threads in the event that the
+# cache acceptor has been queued. It does this by starting 6 long lasting
+# fetches, which will consume 12 threads. That exceeds the initial
+# allotment of 10 threads, giving some probability that the acceptor
+# thread is queued. Then a single quick fetch is done, which should be
+# served since we are well below the maximum number of threads allowed.
+
+# Barrier b1 blocks the slow servers from finishing until the quick fetch
+# is done.
+barrier b1 cond 7
+
+# Barrier b2 blocks the start of the quick fetch until all slow fetches
+# are known to hold captive two threads each.
+barrier b2 cond 7
+
+server s0 {
+ rxreq
+ txresp -nolen -hdr "Content-Length: 10" -hdr "Connection: close"
+ send "123"
+ barrier b1 sync
+ send "4567890"
+ expect_close
+} -dispatch
+
+server stest {
+ rxreq
+ txresp -body "All good"
+} -start
+
+varnish v1 -arg "-p debug=+syncvsl -p debug=+flush_head"
+varnish v1 -arg "-p thread_pools=1 -p thread_pool_min=10"
+varnish v1 -vcl+backend {
+ sub vcl_backend_fetch {
+ if (bereq.url == "/test") {
+ set bereq.backend = stest;
+ } else {
+ set bereq.backend = s0;
+ }
+ }
+} -start
+
+# NB: we might go above 10 threads when early tasks are submitted to
+# the pool since at least one idle thread must be kept in the pool
+# reserve.
+varnish v1 -expect MAIN.threads >= 10
+
+client c1 {
+ txreq -url /1
+ rxresphdrs
+ barrier b2 sync
+ rxrespbody
+} -start
+
+client c2 {
+ txreq -url /2
+ rxresphdrs
+ barrier b2 sync
+ rxrespbody
+} -start
+
+client c3 {
+ txreq -url /3
+ rxresphdrs
+ barrier b2 sync
+ rxrespbody
+} -start
+
+client c4 {
+ txreq -url /4
+ rxresphdrs
+ barrier b2 sync
+ rxrespbody
+} -start
+
+client c5 {
+ txreq -url /5
+ rxresphdrs
+ barrier b2 sync
+ rxrespbody
+} -start
+
+client c6 {
+ txreq -url /6
+ rxresphdrs
+ barrier b2 sync
+ rxrespbody
+} -start
+
+client ctest {
+ barrier b2 sync
+ txreq -url "/test"
+ rxresp
+ expect resp.status == 200
+ expect resp.body == "All good"
+} -run
+
+barrier b1 sync
+
+client c1 -wait
+client c2 -wait
+client c3 -wait
+client c4 -wait
+client c5 -wait
+client c6 -wait
_______________________________________________
varnish-commit mailing list
varnish-commit@varnish-cache.org
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-commit