Mailing List Archive

Bottleneck on connection accept rates
I'm running the epel build of varnish on CentOS 7 inside a kubernetes pod (docker container).

# rpm -qa | grep varnish
varnish-4.1.5-1.el7.x86_64
# uname -a
Linux media-cdn-2400684925-p137g 3.10.0-514.10.2.el7.x86_64 #1 SMP Fri Mar 3 00:04:05 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux

Currently I seem to be hitting a bottleneck with connection accept rates in varnish. Or maybe I have another problem and it's manifesting like this. Commands like "netstat -nt" show many connections to varnish in the SYN_SENT state which is why I think varnish can't keep up with the listen backlog.

I believe I've ruled out the acceptor_sleep scenario (no debug messages that would accompany it are logged), but I'm going to try and disable it explicitly and see if that helps. I'm also going to try using the accept-filter feature, although I'm not sure how supported it is. And maybe try reducing timeout_linger.

My goal is to have 1-2K simultaneous connections with an establish rate of 1-2K/second. Cache miss rate will be 100% so there will be lots of backend connection management going on. Is this a realistic goal?

Below I have examples of param.show + "varnishtop -i Debug" + "varnishstat -1"

I've been assuming that I need roughly one thread per simultaneous client connection. Is that reasonable or do I need to factor in backend connections too?

james


accept_filter              off [bool] (default)
acceptor_sleep_decay       0.9 (default)
acceptor_sleep_incr        0.000 [seconds] (default)
acceptor_sleep_max         0.050 [seconds] (default)
auto_restart               on [bool] (default)
backend_idle_timeout       60.000 [seconds] (default)
ban_dups                   on [bool] (default)
ban_lurker_age             60.000 [seconds] (default)
ban_lurker_batch           1000 (default)
ban_lurker_sleep           0.010 [seconds] (default)
between_bytes_timeout      60.000 [seconds] (default)
cc_command                 "exec gcc -std=gnu99 -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector-strong --param=ssp-buffer-size=4 -grecord-gcc-switches   -m64 -mtune=generic -Wall -Werror -Wno-error=unused-result -pthread -fpic -shared -Wl,-x -o %o %s" (default)
cli_buffer                 8k [bytes] (default)
cli_limit                  48k [bytes] (default)
cli_timeout                60.000 [seconds] (default)
clock_skew                 10 [seconds] (default)
clock_step                 1.000 [seconds] (default)
connect_timeout            3.500 [seconds] (default)
critbit_cooloff            180.000 [seconds] (default)
debug                      none (default)
default_grace              10.000 [seconds] (default)
default_keep               0.000 [seconds] (default)
default_ttl                120.000 [seconds] (default)
feature                    none (default)
fetch_chunksize            16k [bytes] (default)
fetch_maxchunksize         0.25G [bytes] (default)
first_byte_timeout         60.000 [seconds] (default)
gzip_buffer                32k [bytes] (default)
gzip_level                 6 (default)
gzip_memlevel              8 (default)
http_gzip_support          on [bool] (default)
http_max_hdr               64 [header lines] (default)
http_range_support         on [bool] (default)
http_req_hdr_len           8k [bytes] (default)
http_req_size              32k [bytes] (default)
http_resp_hdr_len          8k [bytes] (default)
http_resp_size             32k [bytes] (default)
idle_send_timeout          60.000 [seconds] (default)
listen_depth               1024 [connections] (default)
lru_interval               2.000 [seconds] (default)
max_esi_depth              5 [levels] (default)
max_restarts               4 [restarts] (default)
max_retries                4 [retries] (default)
nuke_limit                 50 [allocations] (default)
pcre_match_limit           10000 (default)
pcre_match_limit_recursion 20 (default)
ping_interval              3 [seconds] (default)
pipe_timeout               60.000 [seconds] (default)
pool_req                   10,100,10 (default)
pool_sess                  10,100,10 (default)
pool_vbo                   10,100,10 (default)
prefer_ipv6                off [bool] (default)
rush_exponent              3 [requests per request] (default)
send_timeout               5.000 [seconds]
session_max                100000 [sessions] (default)
shm_reclen                 255b [bytes] (default)
shortlived                 10.000 [seconds] (default)
sigsegv_handler            on [bool] (default)
syslog_cli_traffic         on [bool] (default)
tcp_fastopen               off [bool] (default)
tcp_keepalive_intvl        75.000 [seconds] (default)
tcp_keepalive_probes       9 [probes] (default)
tcp_keepalive_time         7200.000 [seconds] (default)
thread_pool_add_delay      0.000 [seconds] (default)
thread_pool_destroy_delay  1.000 [seconds] (default)
thread_pool_fail_delay     0.200 [seconds] (default)
thread_pool_max            1000 [threads]
thread_pool_min            1000 [threads]
thread_pool_reserve        0 [threads] (default)
thread_pool_stack          48k [bytes] (default)
thread_pool_timeout        300.000 [seconds] (default)
thread_pools               2 [pools] (default)
thread_queue_limit         10
thread_stats_rate          10 [requests] (default)
timeout_idle               5.000 [seconds] (default)
timeout_linger             0.050 [seconds] (default)
vcc_allow_inline_c         off [bool] (default)
vcc_err_unref              on [bool] (default)
vcc_unsafe_path            on [bool] (default)
vcl_cooldown               600.000 [seconds] (default)
vcl_dir                    /etc/varnish (default)
vmod_dir                   /usr/lib64/varnish/vmods (default)
vsl_buffer                 4k [bytes] (default)
vsl_mask                   -VCL_trace,-WorkThread,-Hash,-VfpAcct (default)
vsl_reclen                 255b [bytes] (default)
vsl_space                  1G [bytes]
vsm_free_cooldown          60.000 [seconds] (default)
vsm_space                  10M [bytes]
workspace_backend          64k [bytes] (default)
workspace_client           64k [bytes] (default)
workspace_session          0.50k [bytes] (default)
workspace_thread           2k [bytes] (default)


varnishtop -i Debug

list length 82

20982.70 Debug RES_MODE 2
58.56 Debug RES_MODE 0
4.05 Debug Write error, retval = -1, len = 17408, errno = Broken pipe
2.32 Debug Write error, retval = -1, len = 34816, errno = Broken pipe
2.31 Debug Write error, retval = -1, len = 52224, errno = Broken pipe
1.00 Debug Write error, retval = -1, len = 922, errno = Broken pipe
1.00 Debug Write error, retval = -1, len = 1580, errno = Broken pipe
0.99 Debug Write error, retval = -1, len = 1413, errno = Broken pipe
0.75 Debug Write error, retval = -1, len = 5192, errno = Broken pipe
0.71 Debug Write error, retval = -1, len = 3067, errno = Broken pipe
0.71 Debug Write error, retval = -1, len = 4604, errno = Broken pipe
0.62 Debug Write error, retval = -1, len = 6228, errno = Broken pipe
0.61 Debug Write error, retval = -1, len = 4900, errno = Broken pipe
0.61 Debug Write error, retval = -1, len = 7327, errno = Broken pipe
0.60 Debug Write error, retval = -1, len = 8311, errno = Broken pipe
0.60 Debug Write error, retval = -1, len = 10087, errno = Broken pipe
0.60 Debug Write error, retval = -1, len = 11, errno = Broken pipe
0.57 Debug Write error, retval = -1, len = 5129, errno = Broken pipe
0.55 Debug Write error, retval = -1, len = 4821, errno = Broken pipe
0.54 Debug Write error, retval = -1, len = 11224, errno = Broken pipe
0.54 Debug Write error, retval = -1, len = 5044, errno = Broken pipe
0.54 Debug Write error, retval = -1, len = 2474, errno = Broken pipe
0.54 Debug Write error, retval = -1, len = 3257, errno = Broken pipe
0.54 Debug Write error, retval = -1, len = 11788, errno = Broken pipe
0.54 Debug Write error, retval = -1, len = 29455, errno = Broken pipe
0.53 Debug Write error, retval = -1, len = 3346, errno = Broken pipe
0.53 Debug Write error, retval = -1, len = 6534, errno = Broken pipe
0.53 Debug Write error, retval = -1, len = 6552, errno = Broken pipe
0.53 Debug Write error, retval = -1, len = 9850, errno = Broken pipe


varnishstat -1

MAIN.uptime 7546 1.00 Child process uptime
MAIN.sess_conn 277145 36.73 Sessions accepted
MAIN.sess_drop 0 0.00 Sessions dropped
MAIN.sess_fail 0 0.00 Session accept failures
MAIN.client_req_400 0 0.00 Client requests received, subject to 400 errors
MAIN.client_req_417 0 0.00 Client requests received, subject to 417 errors
MAIN.client_req 4307812 570.87 Good client requests received
MAIN.cache_hit 0 0.00 Cache hits
MAIN.cache_hitpass 0 0.00 Cache hits for pass
MAIN.cache_miss 0 0.00 Cache misses
MAIN.backend_conn 61597 8.16 Backend conn. success
MAIN.backend_unhealthy 0 0.00 Backend conn. not attempted
MAIN.backend_busy 0 0.00 Backend conn. too many
MAIN.backend_fail 0 0.00 Backend conn. failures
MAIN.backend_reuse 4250487 563.28 Backend conn. reuses
MAIN.backend_recycle 4279797 567.16 Backend conn. recycles
MAIN.backend_retry 23 0.00 Backend conn. retry
MAIN.fetch_head 219 0.03 Fetch no body (HEAD)
MAIN.fetch_length 4302294 570.14 Fetch with Length
MAIN.fetch_chunked 0 0.00 Fetch chunked
MAIN.fetch_eof 0 0.00 Fetch EOF
MAIN.fetch_bad 0 0.00 Fetch bad T-E
MAIN.fetch_none 146 0.02 Fetch no body
MAIN.fetch_1xx 0 0.00 Fetch no body (1xx)
MAIN.fetch_204 0 0.00 Fetch no body (204)
MAIN.fetch_304 7624 1.01 Fetch no body (304)
MAIN.fetch_failed 67 0.01 Fetch failed (all causes)
MAIN.fetch_no_thread 0 0.00 Fetch failed (no thread)
MAIN.pools 2 . Number of thread pools
MAIN.threads 2000 . Total number of threads
MAIN.threads_limited 0 0.00 Threads hit max
MAIN.threads_created 2000 0.27 Threads created
MAIN.threads_destroyed 0 0.00 Threads destroyed
MAIN.threads_failed 0 0.00 Thread creation failed
MAIN.thread_queue_len 234 . Length of session queue
MAIN.busy_sleep 0 0.00 Number of requests sent to sleep on busy objhdr
MAIN.busy_wakeup 0 0.00 Number of requests woken after sleep on busy objhdr
MAIN.busy_killed 0 0.00 Number of requests killed after sleep on busy objhdr
MAIN.sess_queued 0 0.00 Sessions queued for thread
MAIN.sess_dropped 0 0.00 Sessions dropped for thread
MAIN.n_object 2490 . object structs made
MAIN.n_vampireobject 0 . unresurrected objects
MAIN.n_objectcore 18446744073709551613 . objectcore structs made
MAIN.n_objecthead 0 . objecthead structs made
MAIN.n_waitinglist 0 . waitinglist structs made
MAIN.n_backend 1 . Number of backends
MAIN.n_expired 0 . Number of expired objects
MAIN.n_lru_nuked 0 . Number of LRU nuked objects
MAIN.n_lru_moved 0 . Number of LRU moved objects
MAIN.losthdr 0 0.00 HTTP header overflows
MAIN.s_sess 277145 36.73 Total sessions seen
MAIN.s_req 4307812 570.87 Total requests seen
MAIN.s_pipe 0 0.00 Total pipe sessions seen
MAIN.s_pass 4307812 570.87 Total pass-ed requests seen
MAIN.s_fetch 4307812 570.87 Total backend fetches initiated
MAIN.s_synth 0 0.00 Total synthethic responses made
MAIN.s_req_hdrbytes 8366066701 1108675.68 Request header bytes
MAIN.s_req_bodybytes 0 0.00 Request body bytes
MAIN.s_resp_hdrbytes 1874113868 248358.58 Response header bytes
MAIN.s_resp_bodybytes 197969033005 26234963.29 Response body bytes
MAIN.s_pipe_hdrbytes 0 0.00 Pipe request header bytes
MAIN.s_pipe_in 0 0.00 Piped bytes from client
MAIN.s_pipe_out 0 0.00 Piped bytes to client
MAIN.sess_closed 1625 0.22 Session Closed
MAIN.sess_closed_err 178659 23.68 Session Closed with error
MAIN.sess_readahead 0 0.00 Session Read Ahead
MAIN.sess_herd 1372346 181.86 Session herd
MAIN.sc_rem_close 97498 12.92 Session OK REM_CLOSE
MAIN.sc_req_close 0 0.00 Session OK REQ_CLOSE
MAIN.sc_req_http10 0 0.00 Session Err REQ_HTTP10
MAIN.sc_rx_bad 0 0.00 Session Err RX_BAD
MAIN.sc_rx_body 0 0.00 Session Err RX_BODY
MAIN.sc_rx_junk 0 0.00 Session Err RX_JUNK
MAIN.sc_rx_overflow 0 0.00 Session Err RX_OVERFLOW
MAIN.sc_rx_timeout 178660 23.68 Session Err RX_TIMEOUT
MAIN.sc_tx_pipe 0 0.00 Session OK TX_PIPE
MAIN.sc_tx_error 0 0.00 Session Err TX_ERROR
MAIN.sc_tx_eof 0 0.00 Session OK TX_EOF
MAIN.sc_resp_close 0 0.00 Session OK RESP_CLOSE
MAIN.sc_overload 0 0.00 Session Err OVERLOAD
MAIN.sc_pipe_overflow 0 0.00 Session Err PIPE_OVERFLOW
MAIN.sc_range_short 0 0.00 Session Err RANGE_SHORT
MAIN.shm_records 574687872 76157.95 SHM records
MAIN.shm_writes 14046734 1861.48 SHM writes
MAIN.shm_flushes 164 0.02 SHM flushes due to overflow
MAIN.shm_cont 127243 16.86 SHM MTX contention
MAIN.shm_cycles 22 0.00 SHM cycles through buffer
MAIN.backend_req 4311083 571.31 Backend requests made
MAIN.n_vcl 1 0.00 Number of loaded VCLs in total
MAIN.n_vcl_avail 1 0.00 Number of VCLs available
MAIN.n_vcl_discard 0 0.00 Number of discarded VCLs
MAIN.bans 1 . Count of bans
MAIN.bans_completed 1 . Number of bans marked 'completed'
MAIN.bans_obj 0 . Number of bans using obj.*
MAIN.bans_req 0 . Number of bans using req.*
MAIN.bans_added 1 0.00 Bans added
MAIN.bans_deleted 0 0.00 Bans deleted
MAIN.bans_tested 0 0.00 Bans tested against objects (lookup)
MAIN.bans_obj_killed 0 0.00 Objects killed by bans (lookup)
MAIN.bans_lurker_tested 0 0.00 Bans tested against objects (lurker)
MAIN.bans_tests_tested 0 0.00 Ban tests tested against objects (lookup)
MAIN.bans_lurker_tests_tested 0 0.00 Ban tests tested against objects (lurker)
MAIN.bans_lurker_obj_killed 0 0.00 Objects killed by bans (lurker)
MAIN.bans_dups 0 0.00 Bans superseded by other bans
MAIN.bans_lurker_contention 0 0.00 Lurker gave way for lookup
MAIN.bans_persisted_bytes 16 . Bytes used by the persisted ban lists
MAIN.bans_persisted_fragmentation 0 . Extra bytes in persisted ban lists due to fragmentation
MAIN.n_purges 0 . Number of purge operations executed
MAIN.n_obj_purged 0 . Number of purged objects
MAIN.exp_mailed 0 0.00 Number of objects mailed to expiry thread
MAIN.exp_received 0 0.00 Number of objects received by expiry thread
MAIN.hcb_nolock 0 0.00 HCB Lookups without lock
MAIN.hcb_lock 0 0.00 HCB Lookups with lock
MAIN.hcb_insert 0 0.00 HCB Inserts
MAIN.esi_errors 0 0.00 ESI parse errors (unlock)
MAIN.esi_warnings 0 0.00 ESI parse warnings (unlock)
MAIN.vmods 0 . Loaded VMODs
MAIN.n_gzip 0 0.00 Gzip operations
MAIN.n_gunzip 723 0.10 Gunzip operations
MAIN.vsm_free 10410896 . Free VSM space
MAIN.vsm_used 1073816640 . Used VSM space
MAIN.vsm_cooling 0 . Cooling VSM space
MAIN.vsm_overflow 0 . Overflow VSM space
MAIN.vsm_overflowed 0 0.00 Overflowed VSM space
MGT.uptime 7547 1.00 Management process uptime
MGT.child_start 1 0.00 Child process started
MGT.child_exit 0 0.00 Child process normal exit
MGT.child_stop 0 0.00 Child process unexpected exit
MGT.child_died 0 0.00 Child process died (signal)
MGT.child_dump 0 0.00 Child process core dumped
MGT.child_panic 0 0.00 Child process panic
MEMPOOL.busyobj.live 949 . In use
MEMPOOL.busyobj.pool 9 . In Pool
MEMPOOL.busyobj.sz_wanted 65536 . Size requested
MEMPOOL.busyobj.sz_actual 65504 . Size allocated
MEMPOOL.busyobj.allocs 4312087 571.44 Allocations
MEMPOOL.busyobj.frees 4311138 571.31 Frees
MEMPOOL.busyobj.recycle 4275161 566.55 Recycled from pool
MEMPOOL.busyobj.timeout 11975 1.59 Timed out from pool
MEMPOOL.busyobj.toosmall 0 0.00 Too small to recycle
MEMPOOL.busyobj.surplus 30375 4.03 Too many for pool
MEMPOOL.busyobj.randry 36926 4.89 Pool ran dry
MEMPOOL.req0.live 478 . In use
MEMPOOL.req0.pool 12 . In Pool
MEMPOOL.req0.sz_wanted 65536 . Size requested
MEMPOOL.req0.sz_actual 65504 . Size allocated
MEMPOOL.req0.allocs 736958 97.66 Allocations
MEMPOOL.req0.frees 736480 97.60 Frees
MEMPOOL.req0.recycle 723240 95.84 Recycled from pool
MEMPOOL.req0.timeout 9382 1.24 Timed out from pool
MEMPOOL.req0.toosmall 0 0.00 Too small to recycle
MEMPOOL.req0.surplus 14481 1.92 Too many for pool
MEMPOOL.req0.randry 13718 1.82 Pool ran dry
MEMPOOL.sess0.live 641 . In use
MEMPOOL.sess0.pool 100 . In Pool
MEMPOOL.sess0.sz_wanted 512 . Size requested
MEMPOOL.sess0.sz_actual 480 . Size allocated
MEMPOOL.sess0.allocs 138665 18.38 Allocations
MEMPOOL.sess0.frees 138024 18.29 Frees
MEMPOOL.sess0.recycle 116552 15.45 Recycled from pool
MEMPOOL.sess0.timeout 12728 1.69 Timed out from pool
MEMPOOL.sess0.toosmall 0 0.00 Too small to recycle
MEMPOOL.sess0.surplus 26627 3.53 Too many for pool
MEMPOOL.sess0.randry 22113 2.93 Pool ran dry
MEMPOOL.req1.live 476 . In use
MEMPOOL.req1.pool 15 . In Pool
MEMPOOL.req1.sz_wanted 65536 . Size requested
MEMPOOL.req1.sz_actual 65504 . Size allocated
MEMPOOL.req1.allocs 734145 97.29 Allocations
MEMPOOL.req1.frees 733669 97.23 Frees
MEMPOOL.req1.recycle 720439 95.47 Recycled from pool
MEMPOOL.req1.timeout 9303 1.23 Timed out from pool
MEMPOOL.req1.toosmall 0 0.00 Too small to recycle
MEMPOOL.req1.surplus 14442 1.91 Too many for pool
MEMPOOL.req1.randry 13706 1.82 Pool ran dry
MEMPOOL.sess1.live 617 . In use
MEMPOOL.sess1.pool 108 . In Pool
MEMPOOL.sess1.sz_wanted 512 . Size requested
MEMPOOL.sess1.sz_actual 480 . Size allocated
MEMPOOL.sess1.allocs 138757 18.39 Allocations
MEMPOOL.sess1.frees 138140 18.31 Frees
MEMPOOL.sess1.recycle 117157 15.53 Recycled from pool
MEMPOOL.sess1.timeout 13106 1.74 Timed out from pool
MEMPOOL.sess1.toosmall 0 0.00 Too small to recycle
MEMPOOL.sess1.surplus 26405 3.50 Too many for pool
MEMPOOL.sess1.randry 21600 2.86 Pool ran dry
SMA.s0.c_req 0 0.00 Allocator requests
SMA.s0.c_fail 0 0.00 Allocator failures
SMA.s0.c_bytes 0 0.00 Bytes allocated
SMA.s0.c_freed 0 0.00 Bytes freed
SMA.s0.g_alloc 0 . Allocations outstanding
SMA.s0.g_bytes 0 . Bytes outstanding
SMA.s0.g_space 268435456 . Bytes available
SMA.Transient.c_req 8614289 1141.57 Allocator requests
SMA.Transient.c_fail 0 0.00 Allocator failures
SMA.Transient.c_bytes 202527906070 26839107.62 Bytes allocated
SMA.Transient.c_freed 202527906070 26839107.62 Bytes freed
SMA.Transient.g_alloc 0 . Allocations outstanding
SMA.Transient.g_bytes 0 . Bytes outstanding
SMA.Transient.g_space 0 . Bytes available
VBE.boot.default.happy 0 . Happy health probes
VBE.boot.default.bereq_hdrbytes 3087002285 409091.21 Request header bytes
VBE.boot.default.bereq_bodybytes 0 0.00 Request body bytes
VBE.boot.default.beresp_hdrbytes 1702354674 225596.96 Response header bytes
VBE.boot.default.beresp_bodybytes 198106537669 26253185.48 Response body bytes
VBE.boot.default.pipe_hdrbytes 0 0.00 Pipe request header bytes
VBE.boot.default.pipe_out 0 0.00 Piped bytes to backend
VBE.boot.default.pipe_in 0 0.00 Piped bytes from backend
VBE.boot.default.conn 949 . Concurrent connections to backend
VBE.boot.default.req 4312088 571.44 Backend requests sent
LCK.backend.creat 2 0.00 Created locks
LCK.backend.destroy 0 0.00 Destroyed locks
LCK.backend.locks 8623228 1142.75 Lock Operations
LCK.backend_tcp.creat 1 0.00 Created locks
LCK.backend_tcp.destroy 0 0.00 Destroyed locks
LCK.backend_tcp.locks 17152562 2273.07 Lock Operations
LCK.ban.creat 1 0.00 Created locks
LCK.ban.destroy 0 0.00 Destroyed locks
LCK.ban.locks 4311758 571.40 Lock Operations
LCK.busyobj.creat 4311923 571.42 Created locks
LCK.busyobj.destroy 4311117 571.31 Destroyed locks
LCK.busyobj.locks 44955817 5957.57 Lock Operations
LCK.cli.creat 1 0.00 Created locks
LCK.cli.destroy 0 0.00 Destroyed locks
LCK.cli.locks 2529 0.34 Lock Operations
LCK.exp.creat 1 0.00 Created locks
LCK.exp.destroy 0 0.00 Destroyed locks
LCK.exp.locks 2404 0.32 Lock Operations
LCK.hcb.creat 1 0.00 Created locks
LCK.hcb.destroy 0 0.00 Destroyed locks
LCK.hcb.locks 42 0.01 Lock Operations
LCK.lru.creat 2 0.00 Created locks
LCK.lru.destroy 0 0.00 Destroyed locks
LCK.lru.locks 0 0.00 Lock Operations
LCK.mempool.creat 5 0.00 Created locks
LCK.mempool.destroy 0 0.00 Destroyed locks
LCK.mempool.locks 12374550 1639.88 Lock Operations
LCK.objhdr.creat 1 0.00 Created locks
LCK.objhdr.destroy 0 0.00 Destroyed locks
LCK.objhdr.locks 50106608 6640.15 Lock Operations
LCK.pipestat.creat 1 0.00 Created locks
LCK.pipestat.destroy 0 0.00 Destroyed locks
LCK.pipestat.locks 0 0.00 Lock Operations
LCK.sess.creat 277251 36.74 Created locks
LCK.sess.destroy 276164 36.60 Destroyed locks
LCK.sess.locks 0 0.00 Lock Operations
LCK.smp.creat 0 0.00 Created locks
LCK.smp.destroy 0 0.00 Destroyed locks
LCK.smp.locks 0 0.00 Lock Operations
LCK.vbe.creat 1 0.00 Created locks
LCK.vbe.destroy 0 0.00 Destroyed locks
LCK.vbe.locks 2519 0.33 Lock Operations
LCK.vcapace.creat 1 0.00 Created locks
LCK.vcapace.destroy 0 0.00 Destroyed locks
LCK.vcapace.locks 0 0.00 Lock Operations
LCK.vcl.creat 1 0.00 Created locks
LCK.vcl.destroy 0 0.00 Destroyed locks
LCK.vcl.locks 8648114 1146.05 Lock Operations
LCK.vxid.creat 1 0.00 Created locks
LCK.vxid.destroy 0 0.00 Destroyed locks
LCK.vxid.locks 1930 0.26 Lock Operations
LCK.waiter.creat 2 0.00 Created locks
LCK.waiter.destroy 0 0.00 Destroyed locks
LCK.waiter.locks 16897202 2239.23 Lock Operations
LCK.wq.creat 3 0.00 Created locks
LCK.wq.destroy 0 0.00 Destroyed locks
LCK.wq.locks 17093091 2265.19 Lock Operations
LCK.wstat.creat 1 0.00 Created locks
LCK.wstat.destroy 0 0.00 Destroyed locks
LCK.wstat.locks 5415146 717.62 Lock Operations
LCK.sma.creat 2 0.00 Created locks
LCK.sma.destroy 0 0.00 Destroyed locks
LCK.sma.locks 17228582 2283.14 Lock Operations






_______________________________________________
varnish-misc mailing list
varnish-misc@varnish-cache.org
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc
Re: Bottleneck on connection accept rates [ In reply to ]
> I believe I've ruled out the acceptor_sleep scenario (no debug messages that would accompany it are logged), but I'm going to try and disable it explicitly and see if that helps. I'm also going to try using the accept-filter feature, although I'm not sure how supported it is. And maybe try reducing timeout_linger.
>
> My goal is to have 1-2K simultaneous connections with an establish rate of 1-2K/second. Cache miss rate will be 100% so there will be lots of backend connection management going on. Is this a realistic goal?

Not sure about the bottleneck but the 2000 workers will become one if
you reach 2K concurrent connections.

> thread_pool_add_delay 0.000 [seconds] (default)
> thread_pool_destroy_delay 1.000 [seconds] (default)
> thread_pool_fail_delay 0.200 [seconds] (default)
> thread_pool_max 1000 [threads]
> thread_pool_min 1000 [threads]
> thread_pool_reserve 0 [threads] (default)
> thread_pool_stack 48k [bytes] (default)
> thread_pool_timeout 300.000 [seconds] (default)
> thread_pools 2 [pools] (default)

Bump thread_pool_max back to 5000 (the default value) to get enough
room to handle the traffic you are expecting.

> MAIN.uptime 7546 1.00 Child process uptime
> MAIN.sess_conn 277145 36.73 Sessions accepted
> MAIN.sess_drop 0 0.00 Sessions dropped
> MAIN.sess_fail 0 0.00 Session accept failures

No failures, and no session dropped, looking good.

> MAIN.thread_queue_len 234 . Length of session queue

And here you are running out of workers apparently.

Dridi

_______________________________________________
varnish-misc mailing list
varnish-misc@varnish-cache.org
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc
Re: Bottleneck on connection accept rates [ In reply to ]
Thank you Dridi. I will bump that tomorrow morning and test again.


Premature optimization on my part apparently.


I had thought I would be fine with 2 thread pools of 2,000 threads each. Do I need a thread for each backend connection as well as for each client connection?


james


________________________________
From: Dridi Boukelmoune <dridi@varni.sh>
Sent: Wednesday, May 3, 2017 6:38 PM
To: James Mathiesen
Cc: varnish-misc@varnish-cache.org
Subject: Re: Bottleneck on connection accept rates

> I believe I've ruled out the acceptor_sleep scenario (no debug messages that would accompany it are logged), but I'm going to try and disable it explicitly and see if that helps. I'm also going to try using the accept-filter feature, although I'm not sure how supported it is. And maybe try reducing timeout_linger.
>
> My goal is to have 1-2K simultaneous connections with an establish rate of 1-2K/second. Cache miss rate will be 100% so there will be lots of backend connection management going on. Is this a realistic goal?

Not sure about the bottleneck but the 2000 workers will become one if
you reach 2K concurrent connections.

> thread_pool_add_delay 0.000 [seconds] (default)
> thread_pool_destroy_delay 1.000 [seconds] (default)
> thread_pool_fail_delay 0.200 [seconds] (default)
> thread_pool_max 1000 [threads]
> thread_pool_min 1000 [threads]
> thread_pool_reserve 0 [threads] (default)
> thread_pool_stack 48k [bytes] (default)
> thread_pool_timeout 300.000 [seconds] (default)
> thread_pools 2 [pools] (default)

Bump thread_pool_max back to 5000 (the default value) to get enough
room to handle the traffic you are expecting.

> MAIN.uptime 7546 1.00 Child process uptime
> MAIN.sess_conn 277145 36.73 Sessions accepted
> MAIN.sess_drop 0 0.00 Sessions dropped
> MAIN.sess_fail 0 0.00 Session accept failures

No failures, and no session dropped, looking good.

> MAIN.thread_queue_len 234 . Length of session queue

And here you are running out of workers apparently.

Dridi
Re: Bottleneck on connection accept rates [ In reply to ]
> I had thought I would be fine with 2 thread pools of 2,000 threads each. Do

According to your parameters you have 2 pools of 1000 threads each.

> I need a thread for each backend connection as well as for each client
> connection?

I'm not in the mood to go down that rabbit hole, so let's say that for
a miss or a pass it's the case. Keep the default max, monitor/graph
the number of threads and see for yourself what you need. It may
depend on how the clients and backends typically behave so there's
no magic recipe.

Drido

_______________________________________________
varnish-misc mailing list
varnish-misc@varnish-cache.org
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc
Re: Bottleneck on connection accept rates [ In reply to ]
Yes, that was it. Everything is scaling up much better now. Thank you for your help! james

On 5/3/17, 7:06 PM, "Dridi Boukelmoune" <dridi@varni.sh> wrote:

> I had thought I would be fine with 2 thread pools of 2,000 threads each. Do

According to your parameters you have 2 pools of 1000 threads each.

> I need a thread for each backend connection as well as for each client
> connection?

I'm not in the mood to go down that rabbit hole, so let's say that for
a miss or a pass it's the case. Keep the default max, monitor/graph
the number of threads and see for yourself what you need. It may
depend on how the clients and backends typically behave so there's
no magic recipe.

Drido


_______________________________________________
varnish-misc mailing list
varnish-misc@varnish-cache.org
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc