Mailing List Archive

clamd: Keep scanning while reloading database
Hi,

I use clamd for filtering incoming email. When you issue the "reload"
command, then the accept() loop is paused while the new engine is
re-read and re-compiled. This might take some seconds, during which
the mails queue up. I'd like to get rid of this delay (if necessary by
patching clamd), and hope you can give me some hints.

So far I see three ways to eliminate this delay:

1) Start reload_db() in a background thread, resume scanning, and call
back once the new engine is in place; then exchange the pointers
from old to new engine and free the old one.
2) reload_db() forks and re-loads the database, then calls back to the
parent PID (e.g. via a signal), which in turn passes the listening
socket FD and eventually exits after the last thread finished its work.
3) Run two instances in parallel and create some shell script logic to
make a symlink point to either of the sockets. If you want to
re-load, pick the inactive instance, reload, switch symlink,
reload the second instance.

While option 3 works in practice, it is not really nice in my eyes. On
IRC it was suggested that option 1 (that I favor) is not possible:

16:07 < lattera> there's a lot that happens behind-the-scenes when
dealing with engine reloads due to threading

Is that really the case? If so, is it possible to to maybe only thread
out the time-intensive part, and let the other parts run
synchronously?

I would appreciate any ideas or pointers to the source regarding this
issue. Or how else do people deal with this delay?

Thanks in advance,

Julius
_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net
Re: clamd: Keep scanning while reloading database [ In reply to ]
Hi,

> 1) Start reload_db() in a background thread, resume scanning, and call
> back once the new engine is in place; then exchange the pointers
> from old to new engine and free the old one.

FWIW, I have implemented this option, and it seems to work just fine.
Patch is pasted below.

Do you see any problems with this approach?

Julius

--
From 56e376e55db86571e50737cf20a33b8716cad291 Mon Sep 17 00:00:00 2001
From: Julius Plenz <plenz@cis.fu-berlin.de>
Date: Mon, 7 Apr 2014 19:21:44 +0200
Subject: [PATCH] Reload virus definition in separate thread, eliminating delay

This implements option #1 of
http://lurker.clamav.net/message/20140407.145009.12f41a68.en.html

We introduce a daemon-global "reload_engine" pointer, also protected
under the reload_mutex. Instead of calling reload_db() syncronously, we
dispatch a thread that will eventually write to the reload_engine
pointer, signaling that the new engine is fully set up. Then, under
protection of the mutex, the current engine is exchanged and the old
engine freed.
---
clamd/server-th.c | 120 +++++++++++++++++++++++++++++-------------------------
clamd/server.h | 7 ++++
2 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/clamd/server-th.c b/clamd/server-th.c
index 86a3a48..9afa2b7 100644
--- a/clamd/server-th.c
+++ b/clamd/server-th.c
@@ -64,6 +64,7 @@
int progexit = 0;
pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
int reload = 0;
+struct cl_engine *reload_engine = NULL;
time_t reloaded_time = 0;
pthread_mutex_t reload_mutex = PTHREAD_MUTEX_INITIALIZER;
int sighup = 0;
@@ -158,40 +159,27 @@ void sighandler_th(int sig)
logg("$Failed to write to syncpipe\n");
}

-static struct cl_engine *reload_db(struct cl_engine *engine, unsigned int dboptions, const struct optstruct *opts, int do_check, int *ret)
+static void *reload_db(void *argp)
{
const char *dbdir;
int retval;
unsigned int sigs = 0;
struct cl_settings *settings = NULL;
+ struct cl_engine *engine = NULL;

- *ret = 0;
- if(do_check) {
- if(!dbstat.entries) {
- logg("No stats for Database check - forcing reload\n");
- return engine;
- }
-
- if(cl_statchkdir(&dbstat) == 1) {
- logg("SelfCheck: Database modification detected. Forcing reload.\n");
- return engine;
- } else {
- logg("SelfCheck: Database status OK.\n");
- return NULL;
- }
- }
+ struct reload_db_args *args = (struct reload_db_args *)argp;
+ struct cl_engine *oldengine = args->engine;
+ unsigned int dboptions = args->dboptions;
+ const struct optstruct *opts = args->opts;

- /* release old structure */
- if(engine) {
+ if(oldengine) {
/* copy current settings */
- settings = cl_engine_settings_copy(engine);
+ settings = cl_engine_settings_copy(oldengine);
if(!settings)
logg("^Can't make a copy of the current engine settings\n");
-
- thrmgr_setactiveengine(NULL);
- cl_engine_free(engine);
}

+
dbdir = optget(opts, "DatabaseDirectory")->strarg;
logg("Reading databases from %s\n", dbdir);

@@ -201,18 +189,16 @@ static struct cl_engine *reload_db(struct cl_engine *engine, unsigned int dbopti
memset(&dbstat, 0, sizeof(struct cl_stat));
if((retval = cl_statinidir(dbdir, &dbstat))) {
logg("!cl_statinidir() failed: %s\n", cl_strerror(retval));
- *ret = 1;
if(settings)
cl_engine_settings_free(settings);
- return NULL;
+ return;
}

if(!(engine = cl_engine_new())) {
logg("!Can't initialize antivirus engine\n");
- *ret = 1;
if(settings)
cl_engine_settings_free(settings);
- return NULL;
+ return;
}

if(settings) {
@@ -227,20 +213,20 @@ static struct cl_engine *reload_db(struct cl_engine *engine, unsigned int dbopti
if((retval = cl_load(dbdir, engine, &sigs, dboptions))) {
logg("!reload db failed: %s\n", cl_strerror(retval));
cl_engine_free(engine);
- *ret = 1;
- return NULL;
+ return;
}

if((retval = cl_engine_compile(engine)) != 0) {
logg("!Database initialization error: can't compile engine: %s\n", cl_strerror(retval));
cl_engine_free(engine);
- *ret = 1;
- return NULL;
+ return;
}
logg("Database correctly reloaded (%u signatures)\n", sigs);

- thrmgr_setactiveengine(engine);
- return engine;
+ pthread_mutex_lock(&reload_mutex);
+ time(&reloaded_time);
+ reload_engine = engine;
+ pthread_mutex_unlock(&reload_mutex);
}

/*
@@ -713,6 +699,7 @@ int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsi
unsigned long long val;
size_t i, j, rr_last = 0;
pthread_t accept_th;
+ pthread_t reload_th;
pthread_mutex_t fds_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t recvfds_mutex = PTHREAD_MUTEX_INITIALIZER;
struct acceptdata acceptdata = ACCEPTDATA_INIT(&fds_mutex, &recvfds_mutex);
@@ -1347,10 +1334,20 @@ int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsi
if(selfchk) {
time(&current_time);
if((current_time - start_time) >= (time_t)selfchk) {
- if(reload_db(engine, dboptions, opts, TRUE, &ret)) {
+ if(!dbstat.entries) {
+ logg("No stats for Database check - forcing reload\n");
+ pthread_mutex_lock(&reload_mutex);
+ reload = 1;
+ pthread_mutex_unlock(&reload_mutex);
+ }
+
+ if(cl_statchkdir(&dbstat) == 1) {
+ logg("SelfCheck: Database modification detected. Forcing reload.\n");
pthread_mutex_lock(&reload_mutex);
reload = 1;
pthread_mutex_unlock(&reload_mutex);
+ } else {
+ logg("SelfCheck: Database status OK.\n");
}
time(&start_time);
}
@@ -1360,34 +1357,45 @@ int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsi
pthread_mutex_lock(&reload_mutex);
if(reload) {
pthread_mutex_unlock(&reload_mutex);
-#if defined(FANOTIFY) || defined(CLAMAUTH)
- if(optget(opts, "ScanOnAccess")->enabled && tharg) {
- logg("Restarting on-access scan\n");
- pthread_kill(fan_pid, SIGUSR1);
- pthread_join(fan_pid, NULL);
- }
-#endif
- engine = reload_db(engine, dboptions, opts, FALSE, &ret);
- if(ret) {
- logg("Terminating because of a fatal error.\n");
- if(new_sd >= 0)
- closesocket(new_sd);
- break;
- }
-
+ struct reload_db_args reload_db_args = {
+ .engine = engine,
+ .dboptions = dboptions,
+ .opts = opts,
+ };
pthread_mutex_lock(&reload_mutex);
reload = 0;
- time(&reloaded_time);
+ pthread_create(&reload_th, NULL, reload_db, &reload_db_args);
pthread_mutex_unlock(&reload_mutex);
+ } else {
+ if(reload_engine != NULL) {
#if defined(FANOTIFY) || defined(CLAMAUTH)
- if(optget(opts, "ScanOnAccess")->enabled && tharg) {
- tharg->engine = engine;
- pthread_create(&fan_pid, &fan_attr, fan_th, tharg);
- }
+ pthread_mutex_unlock(&reload_mutex);
+ if(optget(opts, "ScanOnAccess")->enabled && tharg) {
+ logg("Restarting on-access scan\n");
+ pthread_kill(fan_pid, SIGUSR1);
+ pthread_join(fan_pid, NULL);
+ }
+ pthread_mutex_lock(&reload_mutex);
#endif
- time(&start_time);
- } else {
- pthread_mutex_unlock(&reload_mutex);
+ static struct cl_engine *tmp;
+ tmp = engine;
+ engine = reload_engine;
+ thrmgr_setactiveengine(engine);
+ time(&start_time);
+ reload_engine = NULL;
+ pthread_mutex_unlock(&reload_mutex);
+
+ cl_engine_free(tmp);
+
+#if defined(FANOTIFY) || defined(CLAMAUTH)
+ if(optget(opts, "ScanOnAccess")->enabled && tharg) {
+ tharg->engine = engine;
+ pthread_create(&fan_pid, &fan_attr, fan_th, tharg);
+ }
+#endif
+ } else {
+ pthread_mutex_unlock(&reload_mutex);
+ }
}
}

diff --git a/clamd/server.h b/clamd/server.h
index bc2a296..e171ea9 100644
--- a/clamd/server.h
+++ b/clamd/server.h
@@ -46,6 +46,13 @@ struct thrwarg {
unsigned int options;
};

+/* reload_db arguments */
+struct reload_db_args {
+ struct cl_engine *engine;
+ unsigned int dboptions;
+ const struct optstruct *opts;
+};
+
int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsigned int dboptions, const struct optstruct *opts);
void sighandler(int sig);
void sighandler_th(int sig);
--
1.9.0-zedat

_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net
Re: clamd: Keep scanning while reloading database [ In reply to ]
> > 1) Start reload_db() in a background thread, resume scanning, and call
> > back once the new engine is in place; then exchange the pointers
> > from old to new engine and free the old one.
>
> FWIW, I have implemented this option, and it seems to work just fine.
> Patch is pasted below.
>
> Do you see any problems with this approach?

Hi,

I haven't looked closely, but how is the fact that each thread (which may currently be scanning a different file and may finish at some arbitrary time in the future) has a reference to the current engine object handled? It would seem that some sort of a reference count could be used to manage which thread actually frees the engine object which is being replaced... Maybe the settings object also needs a local reference in each of the scanning threads...

- Mark Pizzolato


_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net
Re: clamd: Keep scanning while reloading database [ In reply to ]
Hi, Mark!

* Mark Pizzolato - ClamAV-devel <clamav-devel@subscriptions.pizzolato.net> [2014-04-07 21:17]:
> I haven't looked closely, but how is the fact that each thread
> (which may currently be scanning a different file and may finish at
> some arbitrary time in the future) has a reference to the current
> engine object handled?

It appears that for every connection that is acceptey by clamd, the
current "engine" value is passed in the "conn" struct. The engine
struct has a ref count, and a process "grabs" the engine by calling
cl_engine_addref(), thus increasing the ref count. Only when
cl_engine_free() is called and the ref count is zero is the object
actually freed.

This is what I got from a cursory reading of the code... (such a
request/command is passed around quite a lot.) I verified that the old
engine was indeed freed after the scan finished by plucking in debug
statements in the relevant functions.

Julius
_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net
Re: clamd: Keep scanning while reloading database [ In reply to ]
Hi Julius,

On Monday, April 07, 2014 at 1:50 PM, Julius Plenz wrote:
> * Mark Pizzolato - ClamAV-devel <clamav-
> devel@subscriptions.pizzolato.net> [2014-04-07 21:17]:
> > I haven't looked closely, but how is the fact that each thread (which
> > may currently be scanning a different file and may finish at some
> > arbitrary time in the future) has a reference to the current engine
> > object handled?
>
> It appears that for every connection that is acceptey by clamd, the current
> "engine" value is passed in the "conn" struct. The engine struct has a ref
> count, and a process "grabs" the engine by calling cl_engine_addref(), thus
> increasing the ref count. Only when
> cl_engine_free() is called and the ref count is zero is the object actually
> freed.

It would seem that there is a race condition in this paradigm. The reference to the engine object should be added when the engine value is set in the conn structure (this determining of the engine value AND the addition of the reference count should be done with the related mutex held). The current paradigm seems to be creating an un-accounted reference and later on incrementing the reference value. By the time that increment happens the engine value which was passed may have already been freed and thus the pointer being deference is no longer pointing at a valid object.

> This is what I got from a cursory reading of the code... (such a
> request/command is passed around quite a lot.) I verified that the old engine
> was indeed freed after the scan finished by plucking in debug statements in
> the relevant functions.

It is good that it usually is freed. That doesn't prove that there isn't a race condition which can cause corruption at arbitrary times (Not your fault).

- Mark


_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net
Re: clamd: Keep scanning while reloading database [ In reply to ]
Hi, Mark!

* Mark Pizzolato - ClamAV-devel <clamav-devel@subscriptions.pizzolato.net> [2014-04-08 00:02]:
> > It appears that for every connection that is acceptey by clamd,
> > the current "engine" value is passed in the "conn" struct. The
> > engine struct has a ref count, and a process "grabs" the engine by
> > calling cl_engine_addref(), thus increasing the ref count. Only
> > when cl_engine_free() is called and the ref count is zero is the
> > object actually freed.

> It would seem that there is a race condition in this paradigm. The
> reference to the engine object should be added when the engine value
> is set in the conn structure (this determining of the engine value
> AND the addition of the reference count should be done with the
> related mutex held). The current paradigm seems to be creating an
> un-accounted reference and later on incrementing the reference
> value. By the time that increment happens the engine value which
> was passed may have already been freed and thus the pointer being
> deference is no longer pointing at a valid object.

Yes, you are certainly right. Thanks for pointing this out! I will try
to work around this issue some time this week.

Julius
_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net
Re: clamd: Keep scanning while reloading database [ In reply to ]
Hi, Mark!

* Mark Pizzolato - ClamAV-devel <clamav-devel@subscriptions.pizzolato.net> [2014-04-08 00:02]:
> > It appears that for every connection that is acceptey by clamd,
> > the current "engine" value is passed in the "conn" struct. The
> > engine struct has a ref count, and a process "grabs" the engine by
> > calling cl_engine_addref(), thus increasing the ref count. Only
> > when cl_engine_free() is called and the ref count is zero is the
> > object actually freed.

> It would seem that there is a race condition in this paradigm. The
> reference to the engine object should be added when the engine value
> is set in the conn structure (this determining of the engine value
> AND the addition of the reference count should be done with the
> related mutex held).

True, this would be the better approach. The downside is that one has
to free (i.e. decrease the ref count of) this not-yet-used object
every time an error occurs. So in many error cases, you'd have
"useless" calls to cl_engine_free().

> The current paradigm seems to be creating an un-accounted reference
> and later on incrementing the reference value. By the time that
> increment happens the engine value which was passed may have already
> been freed and thus the pointer being deference is no longer
> pointing at a valid object.

This is a valid concern, but with the patch I posted I think this can
not happen. The patch mainly changes the behaviour of recvloop_th(),
the "receiving thread". Further we have an "accept thread" and "scanner
threads".

The receiving thread loops over outstanding requests and dispatches
them. It is after the dispatching is done that we check for errors, if
the program should exit, or if the DB should be reloaded. Then the
loop is re-started.

The dispatching happens via the following callpath:

recvloop_th -> parse_dispatch_cmd ->
-> execute_or_dispatch_command -> dispatch_command

dispatch_command() duplicates the conn structure, and now calls
cl_engine_addref(dup_conn->engine). But only now the connection is
handed to a scanner thread via thrmgr_group_dispatch(). In case this
works, dispatch_command() will not free the engine, because this is
now the scanner thread's job. The function returns to the receive loop
eventually -- only now we can come to the point where we re-set the
pointer to the newly-loaded engine and free the old one.

So I think there is no race condition here.

Julius
_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net
Re: clamd: Keep scanning while reloading database [ In reply to ]
Hi Julius,

On Thursday, April 10, 2014 at 4:19 AM, Julius Plenz wrote:
> Hi, Mark!
>
> * Mark Pizzolato - ClamAV-devel <clamav-devel@subscriptions.pizzolato.net> [2014-04-08 00:02]:
> > > It appears that for every connection that is acceptey by clamd, the
> > > current "engine" value is passed in the "conn" struct. The engine
> > > struct has a ref count, and a process "grabs" the engine by calling
> > > cl_engine_addref(), thus increasing the ref count. Only when
> > > cl_engine_free() is called and the ref count is zero is the object
> > > actually freed.
>
> > It would seem that there is a race condition in this paradigm. The
> > reference to the engine object should be added when the engine value
> > is set in the conn structure (this determining of the engine value AND
> > the addition of the reference count should be done with the related
> > mutex held).
>
> True, this would be the better approach. The downside is that one has to
> free (i.e. decrease the ref count of) this not-yet-used object every time an
> error occurs. So in many error cases, you'd have "useless" calls to
> cl_engine_free().
>
> > The current paradigm seems to be creating an un-accounted reference
> > and later on incrementing the reference value. By the time that
> > increment happens the engine value which was passed may have already
> > been freed and thus the pointer being deference is no longer pointing
> > at a valid object.
>
> This is a valid concern, but with the patch I posted I think this can not happen.
> The patch mainly changes the behaviour of recvloop_th(), the "receiving
> thread". Further we have an "accept thread" and "scanner threads".
>
> The receiving thread loops over outstanding requests and dispatches them.
> It is after the dispatching is done that we check for errors, if the program
> should exit, or if the DB should be reloaded. Then the loop is re-started.
>
> The dispatching happens via the following callpath:
>
> recvloop_th -> parse_dispatch_cmd ->
> -> execute_or_dispatch_command -> dispatch_command
>
> dispatch_command() duplicates the conn structure, and now calls
> cl_engine_addref(dup_conn->engine). But only now the connection is
> handed to a scanner thread via thrmgr_group_dispatch(). In case this works,
> dispatch_command() will not free the engine, because this is now the
> scanner thread's job. The function returns to the receive loop eventually --
> only now we can come to the point where we re-set the pointer to the
> newly-loaded engine and free the old one.
>
> So I think there is no race condition here.

Well, I initially said "I haven't looked closely'. You now have looked closely and your analysis looks quite reasonable.

One more question (as I originally said, I haven't looked closely at either the current code or your suggested patch)... What happens if multiple reload requests come in right after each other while scanning thread(s) are still scanning some file(s)?

- Mark

_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net
Re: clamd: Keep scanning while reloading database [ In reply to ]
Hi, Mark!

* Mark Pizzolato - ClamAV-devel <clamav-devel@subscriptions.pizzolato.net> [2014-04-10 18:45]:
> One more question (as I originally said, I haven't looked closely at
> either the current code or your suggested patch)... What happens if
> multiple reload requests come in right after each other while
> scanning thread(s) are still scanning some file(s)?

Then the threads would override each other's work and you'd have
unreferenced engines flying around. Good catch. Pasted below is a
patch to address that problem.

Julius
--
From a178a359970d65a0c439c8e3a84e861cbc9fd944 Mon Sep 17 00:00:00 2001
From: Julius Plenz <plenz@cis.fu-berlin.de>
Date: Mon, 14 Apr 2014 11:05:06 +0200
Subject: [PATCH] Introduce a reload_in_progress flag

---
clamd/server-th.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/clamd/server-th.c b/clamd/server-th.c
index 9afa2b7..2167475 100644
--- a/clamd/server-th.c
+++ b/clamd/server-th.c
@@ -64,6 +64,7 @@
int progexit = 0;
pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
int reload = 0;
+volatile int reload_in_progress = 0;
struct cl_engine *reload_engine = NULL;
time_t reloaded_time = 0;
pthread_mutex_t reload_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -226,6 +227,7 @@ static void *reload_db(void *argp)
pthread_mutex_lock(&reload_mutex);
time(&reloaded_time);
reload_engine = engine;
+ reload_in_progress = 0;
pthread_mutex_unlock(&reload_mutex);
}

@@ -1355,7 +1357,11 @@ int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsi

/* DB reload */
pthread_mutex_lock(&reload_mutex);
- if(reload) {
+ if(reload && reload_in_progress) {
+ logg("Database reload already in progress; ignoring reload request.\n");
+ reload = 0;
+ pthread_mutex_unlock(&reload_mutex);
+ } else if(reload) {
pthread_mutex_unlock(&reload_mutex);
struct reload_db_args reload_db_args = {
.engine = engine,
@@ -1364,6 +1370,7 @@ int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsi
};
pthread_mutex_lock(&reload_mutex);
reload = 0;
+ reload_in_progress = 1;
pthread_create(&reload_th, NULL, reload_db, &reload_db_args);
pthread_mutex_unlock(&reload_mutex);
} else {
--
1.9.0-zedat

_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net
Re: clamd: Keep scanning while reloading database [ In reply to ]
Hi,

* Julius Plenz <plenz@cis.fu-berlin.de> [2014-04-07 20:43]:
> > 1) Start reload_db() in a background thread, resume scanning, and call
> > back once the new engine is in place; then exchange the pointers
> > from old to new engine and free the old one.
>
> FWIW, I have implemented this option, and it seems to work just
> fine.

We have been running ClamAV on all incoming mail for the past week
while periodically reloading a fresh definition database, and it works
very well. Any chance of getting this included into upstream?

Current patch against Git tag clamav-0.98.1 pasted below.

Cheers,

Julius
--
clamd/server-th.c | 145 ++++++++++++++++++++++++++++++++----------------------
clamd/server.h | 7 +++
2 files changed, 94 insertions(+), 58 deletions(-)

diff --git a/clamd/server-th.c b/clamd/server-th.c
index 86a3a48..3df5c9c 100644
--- a/clamd/server-th.c
+++ b/clamd/server-th.c
@@ -64,6 +64,8 @@
int progexit = 0;
pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
int reload = 0;
+volatile int reload_in_progress = 0;
+struct cl_engine *reload_engine = NULL;
time_t reloaded_time = 0;
pthread_mutex_t reload_mutex = PTHREAD_MUTEX_INITIALIZER;
int sighup = 0;
@@ -158,40 +160,27 @@ void sighandler_th(int sig)
logg("$Failed to write to syncpipe\n");
}

-static struct cl_engine *reload_db(struct cl_engine *engine, unsigned int dboptions, const struct optstruct *opts, int do_check, int *ret)
+static void *reload_db(void *argp)
{
- const char *dbdir;
- int retval;
- unsigned int sigs = 0;
- struct cl_settings *settings = NULL;
-
- *ret = 0;
- if(do_check) {
- if(!dbstat.entries) {
- logg("No stats for Database check - forcing reload\n");
- return engine;
- }
-
- if(cl_statchkdir(&dbstat) == 1) {
- logg("SelfCheck: Database modification detected. Forcing reload.\n");
- return engine;
- } else {
- logg("SelfCheck: Database status OK.\n");
- return NULL;
- }
- }
-
- /* release old structure */
- if(engine) {
+ const char *dbdir;
+ int retval;
+ unsigned int sigs = 0;
+ struct cl_settings *settings = NULL;
+ struct cl_engine *engine = NULL;
+
+ struct reload_db_args *args = (struct reload_db_args *)argp;
+ struct cl_engine *oldengine = args->engine;
+ unsigned int dboptions = args->dboptions;
+ const struct optstruct *opts = args->opts;
+
+ if(oldengine) {
/* copy current settings */
- settings = cl_engine_settings_copy(engine);
+ settings = cl_engine_settings_copy(oldengine);
if(!settings)
logg("^Can't make a copy of the current engine settings\n");
-
- thrmgr_setactiveengine(NULL);
- cl_engine_free(engine);
}

+
dbdir = optget(opts, "DatabaseDirectory")->strarg;
logg("Reading databases from %s\n", dbdir);

@@ -201,18 +190,12 @@ static struct cl_engine *reload_db(struct cl_engine *engine, unsigned int dbopti
memset(&dbstat, 0, sizeof(struct cl_stat));
if((retval = cl_statinidir(dbdir, &dbstat))) {
logg("!cl_statinidir() failed: %s\n", cl_strerror(retval));
- *ret = 1;
- if(settings)
- cl_engine_settings_free(settings);
- return NULL;
+ goto err;
}

if(!(engine = cl_engine_new())) {
logg("!Can't initialize antivirus engine\n");
- *ret = 1;
- if(settings)
- cl_engine_settings_free(settings);
- return NULL;
+ goto err;
}

if(settings) {
@@ -222,25 +205,38 @@ static struct cl_engine *reload_db(struct cl_engine *engine, unsigned int dbopti
logg("^Using default engine settings\n");
}
cl_engine_settings_free(settings);
+ settings = NULL;
}

if((retval = cl_load(dbdir, engine, &sigs, dboptions))) {
logg("!reload db failed: %s\n", cl_strerror(retval));
- cl_engine_free(engine);
- *ret = 1;
- return NULL;
+ goto err;
}

if((retval = cl_engine_compile(engine)) != 0) {
logg("!Database initialization error: can't compile engine: %s\n", cl_strerror(retval));
- cl_engine_free(engine);
- *ret = 1;
- return NULL;
+ goto err;
}
logg("Database correctly reloaded (%u signatures)\n", sigs);

- thrmgr_setactiveengine(engine);
- return engine;
+ pthread_mutex_lock(&reload_mutex);
+ time(&reloaded_time);
+ reload_engine = engine;
+ goto end;
+
+err:
+ if(settings)
+ cl_engine_settings_free(settings);
+ if(engine)
+ cl_engine_free(engine);
+
+ pthread_mutex_lock(&reload_mutex);
+
+end:
+ reload_in_progress = 0;
+ pthread_mutex_unlock(&reload_mutex);
+
+ free(args);
}

/*
@@ -713,6 +709,7 @@ int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsi
unsigned long long val;
size_t i, j, rr_last = 0;
pthread_t accept_th;
+ pthread_t reload_th;
pthread_mutex_t fds_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t recvfds_mutex = PTHREAD_MUTEX_INITIALIZER;
struct acceptdata acceptdata = ACCEPTDATA_INIT(&fds_mutex, &recvfds_mutex);
@@ -1347,10 +1344,19 @@ int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsi
if(selfchk) {
time(&current_time);
if((current_time - start_time) >= (time_t)selfchk) {
- if(reload_db(engine, dboptions, opts, TRUE, &ret)) {
+ if(!dbstat.entries) {
+ logg("No stats for Database check - forcing reload\n");
+ pthread_mutex_lock(&reload_mutex);
+ reload = 1;
+ pthread_mutex_unlock(&reload_mutex);
+ }
+ else if(cl_statchkdir(&dbstat) == 1) {
+ logg("SelfCheck: Database modification detected. Forcing reload.\n");
pthread_mutex_lock(&reload_mutex);
reload = 1;
pthread_mutex_unlock(&reload_mutex);
+ } else {
+ logg("SelfCheck: Database status OK.\n");
}
time(&start_time);
}
@@ -1358,34 +1364,57 @@ int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsi

/* DB reload */
pthread_mutex_lock(&reload_mutex);
- if(reload) {
+ if(reload && reload_in_progress) {
+ logg("Database reload already in progress; ignoring reload request.\n");
+ reload = 0;
+ pthread_mutex_unlock(&reload_mutex);
+ } else if(reload) {
pthread_mutex_unlock(&reload_mutex);
+ struct reload_db_args *reload_db_args;
+ reload_db_args = (struct reload_db_args *) malloc(sizeof(struct reload_db_args));
+ if (reload_db_args)
+ {
+ reload_db_args->engine = engine;
+ reload_db_args->dboptions = dboptions;
+ reload_db_args->opts = opts;
+
+ pthread_mutex_lock(&reload_mutex);
+ reload = 0;
+ reload_in_progress = 1;
+
+ if (pthread_create(&reload_th, NULL, reload_db, reload_db_args) != 0) {
+ logg("*Error dispatching DB reload thread!\n");
+ reload_in_progress = 0;
+ free(reload_db_args);
+ }
+ pthread_mutex_unlock(&reload_mutex);
+ }
+ } else if(reload_engine != NULL) {
#if defined(FANOTIFY) || defined(CLAMAUTH)
+ pthread_mutex_unlock(&reload_mutex);
if(optget(opts, "ScanOnAccess")->enabled && tharg) {
logg("Restarting on-access scan\n");
pthread_kill(fan_pid, SIGUSR1);
pthread_join(fan_pid, NULL);
}
-#endif
- engine = reload_db(engine, dboptions, opts, FALSE, &ret);
- if(ret) {
- logg("Terminating because of a fatal error.\n");
- if(new_sd >= 0)
- closesocket(new_sd);
- break;
- }
-
pthread_mutex_lock(&reload_mutex);
- reload = 0;
- time(&reloaded_time);
+#endif
+ static struct cl_engine *tmp;
+ tmp = engine;
+ engine = reload_engine;
+ thrmgr_setactiveengine(engine);
+ time(&start_time);
+ reload_engine = NULL;
pthread_mutex_unlock(&reload_mutex);
+
+ cl_engine_free(tmp);
+
#if defined(FANOTIFY) || defined(CLAMAUTH)
if(optget(opts, "ScanOnAccess")->enabled && tharg) {
tharg->engine = engine;
pthread_create(&fan_pid, &fan_attr, fan_th, tharg);
}
#endif
- time(&start_time);
} else {
pthread_mutex_unlock(&reload_mutex);
}
diff --git a/clamd/server.h b/clamd/server.h
index bc2a296..e171ea9 100644
--- a/clamd/server.h
+++ b/clamd/server.h
@@ -46,6 +46,13 @@ struct thrwarg {
unsigned int options;
};

+/* reload_db arguments */
+struct reload_db_args {
+ struct cl_engine *engine;
+ unsigned int dboptions;
+ const struct optstruct *opts;
+};
+
int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine *engine, unsigned int dboptions, const struct optstruct *opts);
void sighandler(int sig);
void sighandler_th(int sig);
--
1.9.0-zedat

_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net
Re: clamd: Keep scanning while reloading database [ In reply to ]
Thanks for sending a patch, we will look at integrating it with ClamAV.

I have opened bug # 10979 at bugzilla.clamav.net for tracking.


On Mon, Apr 28, 2014 at 10:29 AM, Julius Plenz <plenz@cis.fu-berlin.de>wrote:

> Hi,
>
> * Julius Plenz <plenz@cis.fu-berlin.de> [2014-04-07 20:43]:
> > > 1) Start reload_db() in a background thread, resume scanning, and call
> > > back once the new engine is in place; then exchange the pointers
> > > from old to new engine and free the old one.
> >
> > FWIW, I have implemented this option, and it seems to work just
> > fine.
>
> We have been running ClamAV on all incoming mail for the past week
> while periodically reloading a fresh definition database, and it works
> very well. Any chance of getting this included into upstream?
>
> Current patch against Git tag clamav-0.98.1 pasted below.
>
> Cheers,
>
> Julius
> --
> clamd/server-th.c | 145
> ++++++++++++++++++++++++++++++++----------------------
> clamd/server.h | 7 +++
> 2 files changed, 94 insertions(+), 58 deletions(-)
>
> diff --git a/clamd/server-th.c b/clamd/server-th.c
> index 86a3a48..3df5c9c 100644
> --- a/clamd/server-th.c
> +++ b/clamd/server-th.c
> @@ -64,6 +64,8 @@
> int progexit = 0;
> pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
> int reload = 0;
> +volatile int reload_in_progress = 0;
> +struct cl_engine *reload_engine = NULL;
> time_t reloaded_time = 0;
> pthread_mutex_t reload_mutex = PTHREAD_MUTEX_INITIALIZER;
> int sighup = 0;
> @@ -158,40 +160,27 @@ void sighandler_th(int sig)
> logg("$Failed to write to syncpipe\n");
> }
>
> -static struct cl_engine *reload_db(struct cl_engine *engine, unsigned int
> dboptions, const struct optstruct *opts, int do_check, int *ret)
> +static void *reload_db(void *argp)
> {
> - const char *dbdir;
> - int retval;
> - unsigned int sigs = 0;
> - struct cl_settings *settings = NULL;
> -
> - *ret = 0;
> - if(do_check) {
> - if(!dbstat.entries) {
> - logg("No stats for Database check - forcing reload\n");
> - return engine;
> - }
> -
> - if(cl_statchkdir(&dbstat) == 1) {
> - logg("SelfCheck: Database modification detected. Forcing
> reload.\n");
> - return engine;
> - } else {
> - logg("SelfCheck: Database status OK.\n");
> - return NULL;
> - }
> - }
> -
> - /* release old structure */
> - if(engine) {
> + const char *dbdir;
> + int retval;
> + unsigned int sigs = 0;
> + struct cl_settings *settings = NULL;
> + struct cl_engine *engine = NULL;
> +
> + struct reload_db_args *args = (struct reload_db_args *)argp;
> + struct cl_engine *oldengine = args->engine;
> + unsigned int dboptions = args->dboptions;
> + const struct optstruct *opts = args->opts;
> +
> + if(oldengine) {
> /* copy current settings */
> - settings = cl_engine_settings_copy(engine);
> + settings = cl_engine_settings_copy(oldengine);
> if(!settings)
> logg("^Can't make a copy of the current engine settings\n");
> -
> - thrmgr_setactiveengine(NULL);
> - cl_engine_free(engine);
> }
>
> +
> dbdir = optget(opts, "DatabaseDirectory")->strarg;
> logg("Reading databases from %s\n", dbdir);
>
> @@ -201,18 +190,12 @@ static struct cl_engine *reload_db(struct cl_engine
> *engine, unsigned int dbopti
> memset(&dbstat, 0, sizeof(struct cl_stat));
> if((retval = cl_statinidir(dbdir, &dbstat))) {
> logg("!cl_statinidir() failed: %s\n", cl_strerror(retval));
> - *ret = 1;
> - if(settings)
> - cl_engine_settings_free(settings);
> - return NULL;
> + goto err;
> }
>
> if(!(engine = cl_engine_new())) {
> logg("!Can't initialize antivirus engine\n");
> - *ret = 1;
> - if(settings)
> - cl_engine_settings_free(settings);
> - return NULL;
> + goto err;
> }
>
> if(settings) {
> @@ -222,25 +205,38 @@ static struct cl_engine *reload_db(struct cl_engine
> *engine, unsigned int dbopti
> logg("^Using default engine settings\n");
> }
> cl_engine_settings_free(settings);
> + settings = NULL;
> }
>
> if((retval = cl_load(dbdir, engine, &sigs, dboptions))) {
> logg("!reload db failed: %s\n", cl_strerror(retval));
> - cl_engine_free(engine);
> - *ret = 1;
> - return NULL;
> + goto err;
> }
>
> if((retval = cl_engine_compile(engine)) != 0) {
> logg("!Database initialization error: can't compile engine: %s\n",
> cl_strerror(retval));
> - cl_engine_free(engine);
> - *ret = 1;
> - return NULL;
> + goto err;
> }
> logg("Database correctly reloaded (%u signatures)\n", sigs);
>
> - thrmgr_setactiveengine(engine);
> - return engine;
> + pthread_mutex_lock(&reload_mutex);
> + time(&reloaded_time);
> + reload_engine = engine;
> + goto end;
> +
> +err:
> + if(settings)
> + cl_engine_settings_free(settings);
> + if(engine)
> + cl_engine_free(engine);
> +
> + pthread_mutex_lock(&reload_mutex);
> +
> +end:
> + reload_in_progress = 0;
> + pthread_mutex_unlock(&reload_mutex);
> +
> + free(args);
> }
>
> /*
> @@ -713,6 +709,7 @@ int recvloop_th(int *socketds, unsigned nsockets,
> struct cl_engine *engine, unsi
> unsigned long long val;
> size_t i, j, rr_last = 0;
> pthread_t accept_th;
> + pthread_t reload_th;
> pthread_mutex_t fds_mutex = PTHREAD_MUTEX_INITIALIZER;
> pthread_mutex_t recvfds_mutex = PTHREAD_MUTEX_INITIALIZER;
> struct acceptdata acceptdata = ACCEPTDATA_INIT(&fds_mutex,
> &recvfds_mutex);
> @@ -1347,10 +1344,19 @@ int recvloop_th(int *socketds, unsigned nsockets,
> struct cl_engine *engine, unsi
> if(selfchk) {
> time(&current_time);
> if((current_time - start_time) >= (time_t)selfchk) {
> - if(reload_db(engine, dboptions, opts, TRUE, &ret)) {
> + if(!dbstat.entries) {
> + logg("No stats for Database check - forcing reload\n");
> + pthread_mutex_lock(&reload_mutex);
> + reload = 1;
> + pthread_mutex_unlock(&reload_mutex);
> + }
> + else if(cl_statchkdir(&dbstat) == 1) {
> + logg("SelfCheck: Database modification detected.
> Forcing reload.\n");
> pthread_mutex_lock(&reload_mutex);
> reload = 1;
> pthread_mutex_unlock(&reload_mutex);
> + } else {
> + logg("SelfCheck: Database status OK.\n");
> }
> time(&start_time);
> }
> @@ -1358,34 +1364,57 @@ int recvloop_th(int *socketds, unsigned nsockets,
> struct cl_engine *engine, unsi
>
> /* DB reload */
> pthread_mutex_lock(&reload_mutex);
> - if(reload) {
> + if(reload && reload_in_progress) {
> + logg("Database reload already in progress; ignoring reload
> request.\n");
> + reload = 0;
> + pthread_mutex_unlock(&reload_mutex);
> + } else if(reload) {
> pthread_mutex_unlock(&reload_mutex);
> + struct reload_db_args *reload_db_args;
> + reload_db_args = (struct reload_db_args *)
> malloc(sizeof(struct reload_db_args));
> + if (reload_db_args)
> + {
> + reload_db_args->engine = engine;
> + reload_db_args->dboptions = dboptions;
> + reload_db_args->opts = opts;
> +
> + pthread_mutex_lock(&reload_mutex);
> + reload = 0;
> + reload_in_progress = 1;
> +
> + if (pthread_create(&reload_th, NULL, reload_db,
> reload_db_args) != 0) {
> + logg("*Error dispatching DB reload thread!\n");
> + reload_in_progress = 0;
> + free(reload_db_args);
> + }
> + pthread_mutex_unlock(&reload_mutex);
> + }
> + } else if(reload_engine != NULL) {
> #if defined(FANOTIFY) || defined(CLAMAUTH)
> + pthread_mutex_unlock(&reload_mutex);
> if(optget(opts, "ScanOnAccess")->enabled && tharg) {
> logg("Restarting on-access scan\n");
> pthread_kill(fan_pid, SIGUSR1);
> pthread_join(fan_pid, NULL);
> }
> -#endif
> - engine = reload_db(engine, dboptions, opts, FALSE, &ret);
> - if(ret) {
> - logg("Terminating because of a fatal error.\n");
> - if(new_sd >= 0)
> - closesocket(new_sd);
> - break;
> - }
> -
> pthread_mutex_lock(&reload_mutex);
> - reload = 0;
> - time(&reloaded_time);
> +#endif
> + static struct cl_engine *tmp;
> + tmp = engine;
> + engine = reload_engine;
> + thrmgr_setactiveengine(engine);
> + time(&start_time);
> + reload_engine = NULL;
> pthread_mutex_unlock(&reload_mutex);
> +
> + cl_engine_free(tmp);
> +
> #if defined(FANOTIFY) || defined(CLAMAUTH)
> if(optget(opts, "ScanOnAccess")->enabled && tharg) {
> tharg->engine = engine;
> pthread_create(&fan_pid, &fan_attr, fan_th, tharg);
> }
> #endif
> - time(&start_time);
> } else {
> pthread_mutex_unlock(&reload_mutex);
> }
> diff --git a/clamd/server.h b/clamd/server.h
> index bc2a296..e171ea9 100644
> --- a/clamd/server.h
> +++ b/clamd/server.h
> @@ -46,6 +46,13 @@ struct thrwarg {
> unsigned int options;
> };
>
> +/* reload_db arguments */
> +struct reload_db_args {
> + struct cl_engine *engine;
> + unsigned int dboptions;
> + const struct optstruct *opts;
> +};
> +
> int recvloop_th(int *socketds, unsigned nsockets, struct cl_engine
> *engine, unsigned int dboptions, const struct optstruct *opts);
> void sighandler(int sig);
> void sighandler_th(int sig);
> --
> 1.9.0-zedat
>
> _______________________________________________
> http://lurker.clamav.net/list/clamav-devel.html
> Please submit your patches to our Bugzilla: http://bugs.clamav.net
>
_______________________________________________
http://lurker.clamav.net/list/clamav-devel.html
Please submit your patches to our Bugzilla: http://bugs.clamav.net