Mailing List Archive

[RFC PATCH v1 1/2] libs/light: add device model start timeout env var
When debugging QEMU, the need to run it under Valgrind and asan meant
the compile-time define LIBXL_DEVICE_MODEL_START_TIMEOUT must be changed
to allow for `xl` to wait longer while the instrumented QEMU
initializes.

This commit adds support for reading the environment variable
LIBXL_DEVICE_MODEL_START_TIMEOUT to configure the timeout value and
otherwise fall back to the default 60.

Signed-off-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
---
docs/man/xl.1.pod.in | 11 +++++++++++
tools/libs/light/libxl_9pfs.c | 2 +-
tools/libs/light/libxl_device.c | 2 +-
tools/libs/light/libxl_dm.c | 6 +++---
tools/libs/light/libxl_dom_suspend.c | 2 +-
tools/libs/light/libxl_internal.h | 6 ++++++
tools/libs/light/libxl_pci.c | 6 +++---
7 files changed, 26 insertions(+), 9 deletions(-)

diff --git docs/man/xl.1.pod.in docs/man/xl.1.pod.in
index bed8393473..c159877094 100644
--- docs/man/xl.1.pod.in
+++ docs/man/xl.1.pod.in
@@ -1993,6 +1993,17 @@ Otherwise the build time default in LIBXL_BOOTLOADER_TIMEOUT will be used.
If defined the value must be an unsigned integer between 0 and INT_MAX,
otherwise behavior is undefined. Setting to 0 disables the timeout.

+=item LIBXL_DEVICE_MODEL_START_TIMEOUT
+
+Timeout in seconds for starting the device model process. Useful in case the
+device model takes an unusual amount of time to start— for example in case of
+very slow I/O, in case of slow performance due to memory sanitizer usage, etc.
+
+If undefined, the default hard-coded value of 60 seconds is used.
+
+If defined, the value must be an unsigned integer between 0 and INT_MAX,
+otherwise behaviour is undefined. Setting the value to 0 disables the timeout.
+
=back

=head1 SEE ALSO
diff --git tools/libs/light/libxl_9pfs.c tools/libs/light/libxl_9pfs.c
index 48f894f070..950a464b45 100644
--- tools/libs/light/libxl_9pfs.c
+++ tools/libs/light/libxl_9pfs.c
@@ -132,7 +132,7 @@ static int xen9pfsd_spawn(libxl__egc *egc, uint32_t domid, libxl_device_p9 *p9,
aop9->spawn.ao = aodev->ao;
aop9->spawn.what = "xen-9pfs daemon";
aop9->spawn.xspath = GCSPRINTF("%s/state", path);
- aop9->spawn.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
+ aop9->spawn.timeout_ms = __libxl_device_model_start_timeout() * 1000;
aop9->spawn.pidpath = GCSPRINTF("%s/pid", path);
aop9->spawn.midproc_cb = libxl__spawn_record_pid;
aop9->spawn.confirm_cb = xen9pfsd_confirm;
diff --git tools/libs/light/libxl_device.c tools/libs/light/libxl_device.c
index 6f0100d05e..452e55ba23 100644
--- tools/libs/light/libxl_device.c
+++ tools/libs/light/libxl_device.c
@@ -1436,7 +1436,7 @@ int libxl__wait_for_device_model_deprecated(libxl__gc *gc,

path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
return libxl__xenstore_child_wait_deprecated(gc, domid,
- LIBXL_DEVICE_MODEL_START_TIMEOUT,
+ __libxl_device_model_start_timeout(),
"Device Model", path, state, spawning,
check_callback, check_callback_userdata);
}
diff --git tools/libs/light/libxl_dm.c tools/libs/light/libxl_dm.c
index 0b03a7c747..4369fef161 100644
--- tools/libs/light/libxl_dm.c
+++ tools/libs/light/libxl_dm.c
@@ -2629,7 +2629,7 @@ static void spawn_qmp_proxy(libxl__egc *egc,
sdss->qmp_proxy_spawn.pidpath = GCSPRINTF("%s/image/qmp-proxy-pid", dom_path);
sdss->qmp_proxy_spawn.xspath = DEVICE_MODEL_XS_PATH(gc, LIBXL_TOOLSTACK_DOMID,
dm_domid, "/qmp-proxy-state");
- sdss->qmp_proxy_spawn.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
+ sdss->qmp_proxy_spawn.timeout_ms = __libxl_device_model_start_timeout() * 1000;
sdss->qmp_proxy_spawn.midproc_cb = libxl__spawn_record_pid;
sdss->qmp_proxy_spawn.confirm_cb = qmp_proxy_confirm;
sdss->qmp_proxy_spawn.failure_cb = qmp_proxy_startup_failed;
@@ -3011,7 +3011,7 @@ retry_transaction:
spawn->what = GCSPRINTF("domain %d device model", domid);
spawn->xspath = DEVICE_MODEL_XS_PATH(gc, LIBXL_TOOLSTACK_DOMID, domid,
"/state");
- spawn->timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
+ spawn->timeout_ms = __libxl_device_model_start_timeout() * 1000;
spawn->pidpath = GCSPRINTF("%s/image/device-model-pid", dom_path);
spawn->midproc_cb = libxl__spawn_record_pid;
spawn->confirm_cb = device_model_confirm;
@@ -3435,7 +3435,7 @@ void libxl__spawn_qdisk_backend(libxl__egc *egc, libxl__dm_spawn_state *dmss)

dmss->spawn.what = GCSPRINTF("domain %u Qdisk backend", domid);
dmss->spawn.xspath = GCSPRINTF("device-model/%u/state", domid);
- dmss->spawn.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
+ dmss->spawn.timeout_ms = __libxl_device_model_start_timeout() * 1000;
/*
* We cannot save Qemu pid anywhere in the xenstore guest dir,
* because we will call this from unprivileged driver domains,
diff --git tools/libs/light/libxl_dom_suspend.c tools/libs/light/libxl_dom_suspend.c
index 6091a5f3f6..29e51f6c40 100644
--- tools/libs/light/libxl_dom_suspend.c
+++ tools/libs/light/libxl_dom_suspend.c
@@ -517,7 +517,7 @@ void libxl__dm_resume(libxl__egc *egc,
rc = libxl__ev_time_register_rel(dmrs->ao,
&dmrs->time,
dm_resume_timeout,
- LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000);
+ __libxl_device_model_start_timeout() * 1000);
if (rc) goto out;

switch (libxl__device_model_version_running(gc, domid)) {
diff --git tools/libs/light/libxl_internal.h tools/libs/light/libxl_internal.h
index 803dbc1a03..684390f822 100644
--- tools/libs/light/libxl_internal.h
+++ tools/libs/light/libxl_internal.h
@@ -97,6 +97,12 @@
/* QEMU may be slow to load and start due to a bug in Linux where the I/O
* subsystem sometime produce high latency under load. */
#define LIBXL_DEVICE_MODEL_START_TIMEOUT 60
+static inline int __libxl_device_model_start_timeout(void)
+{
+ const char *timeout_env = getenv("LIBXL_DEVICE_MODEL_START_TIMEOUT");
+ return timeout_env ? atoi(timeout_env)
+ : LIBXL_DEVICE_MODEL_START_TIMEOUT;
+}
#define LIBXL_DEVICE_MODEL_SAVE_FILE XEN_LIB_DIR "/qemu-save" /* .$domid */
#define LIBXL_DEVICE_MODEL_RESTORE_FILE XEN_LIB_DIR "/qemu-resume" /* .$domid */
#define LIBXL_QMP_CMD_TIMEOUT 10
diff --git tools/libs/light/libxl_pci.c tools/libs/light/libxl_pci.c
index 96cb4da079..7bdd9f6c3b 100644
--- tools/libs/light/libxl_pci.c
+++ tools/libs/light/libxl_pci.c
@@ -1157,7 +1157,7 @@ static void do_pci_add(libxl__egc *egc,
pas->xswait.what = "Device Model";
pas->xswait.path = DEVICE_MODEL_XS_PATH(gc,
libxl_get_stubdom_id(CTX, domid), domid, "/state");
- pas->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
+ pas->xswait.timeout_ms = __libxl_device_model_start_timeout() * 1000;
pas->xswait.callback = pci_add_qemu_trad_watch_state_cb;
rc = libxl__xswait_start(gc, &pas->xswait);
if (rc) goto out;
@@ -1719,7 +1719,7 @@ static void device_pci_add_stubdom_wait(libxl__egc *egc,
rc = libxl__ev_devstate_wait(ao, &pas->pciback_ds,
device_pci_add_stubdom_ready,
state_path, XenbusStateConnected,
- LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000);
+ __libxl_device_model_start_timeout() * 1000);
if (rc) goto out;
return;
out:
@@ -1976,7 +1976,7 @@ static void do_pci_remove(libxl__egc *egc, pci_remove_state *prs)
prs->xswait.what = "Device Model";
prs->xswait.path = DEVICE_MODEL_XS_PATH(gc,
libxl_get_stubdom_id(CTX, domid), domid, "/state");
- prs->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
+ prs->xswait.timeout_ms = __libxl_device_model_start_timeout() * 1000;
prs->xswait.callback = pci_remove_qemu_trad_watch_state_cb;
rc = libxl__xswait_start(gc, &prs->xswait);
if (rc) goto out_fail;
--
???? ???? ???????