Mailing List Archive

[patch 1/9] Conditional Calls - Architecture Independent Code
Conditional calls are used to compile in code that is meant to be dynamically
enabled at runtime. When code is disabled, it has a very small footprint.

It has a generic cond_call version and optimized per architecture cond_calls.
The optimized cond_call uses a load immediate to remove a data cache hit.

It adds a new rodata section "__cond_call" to place the pointers to the enable
value.

cond_call activation functions sits in module.c.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
include/asm-generic/vmlinux.lds.h | 16 +-
include/linux/condcall.h | 91 +++++++++++++
include/linux/module.h | 4
kernel/module.c | 248 ++++++++++++++++++++++++++++++++++++++
4 files changed, 356 insertions(+), 3 deletions(-)

Index: linux-2.6-lttng/include/linux/condcall.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/condcall.h 2007-05-17 02:13:53.000000000 -0400
@@ -0,0 +1,91 @@
+#ifndef _LINUX_CONDCALL_H
+#define _LINUX_CONDCALL_H
+
+/*
+ * Conditional function calls. Cheap when disabled, enabled at runtime.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifdef __KERNEL__
+
+struct __cond_call_struct {
+ const char *name;
+ void *enable;
+ int flags;
+} __attribute__((packed));
+
+
+/* Cond call flags : selects the mechanism used to enable the conditional calls
+ * and prescribe what can be executed within their function. This is primarily
+ * used at reentrancy-unfriendly sites. */
+#define CF_OPTIMIZED (1 << 0) /* Use optimized cond_call */
+#define CF_LOCKDEP (1 << 1) /* Can call lockdep */
+#define CF_PRINTK (1 << 2) /* Probe can call vprintk */
+#define CF_STATIC_ENABLE (1 << 3) /* Enable cond_call statically */
+#define _CF_NR 4
+
+#ifdef CONFIG_COND_CALL
+
+/* Generic cond_call flavor always available.
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug. */
+#define cond_call_generic(flags, name, func) \
+ ({ \
+ static const char __cstrtab_name_##name[] \
+ __attribute__((section("__cond_call_strings"))) = #name; \
+ static char __cond_call_enable_##name = \
+ (flags) & CF_STATIC_ENABLE; \
+ static const struct __cond_call_struct __cond_call_##name \
+ __attribute__((section("__cond_call"))) = \
+ { __cstrtab_name_##name, \
+ &__cond_call_enable_##name, \
+ (flags) & ~CF_OPTIMIZED } ; \
+ asm volatile ( "" : : "i" (&__cond_call_##name)); \
+ (unlikely(__cond_call_enable_##name)) ? \
+ (func) : \
+ (__typeof__(func))0; \
+ })
+
+#define COND_CALL_GENERIC_ENABLE_IMMEDIATE_OFFSET 0
+#define COND_CALL_GENERIC_ENABLE_TYPE unsigned char
+/* Dereference enable as lvalue from a pointer to its instruction */
+#define COND_CALL_GENERIC_ENABLE(a) \
+ *(COND_CALL_GENERIC_ENABLE_TYPE*) \
+ ((char*)a+COND_CALL_GENERIC_ENABLE_IMMEDIATE_OFFSET)
+
+static inline int cond_call_generic_set_enable(void *address, char enable)
+{
+ COND_CALL_GENERIC_ENABLE(address) = enable;
+ return 0;
+}
+
+#else /* !CONFIG_COND_CALL */
+#define cond_call_generic(flags, name, func) \
+ ({ \
+ ((flags) & CF_STATIC_ENABLE) ? \
+ (func) : \
+ (__typeof__(func))0; \
+ })
+#endif /* CONFIG_COND_CALL */
+
+#ifdef CONFIG_COND_CALL_ENABLE_OPTIMIZATION
+#include <asm/condcall.h> /* optimized cond_call flavor */
+#else
+#include <asm-generic/condcall.h> /* fallback on generic cond_call */
+#endif
+
+#define COND_CALL_MAX_FORMAT_LEN 1024
+
+extern int cond_call_arm(const char *name);
+extern int cond_call_disarm(const char *name);
+
+/* cond_call_query : Returns 1 if enabled, 0 if disabled or not present */
+extern int cond_call_query(const char *name);
+extern int cond_call_list(const char *name);
+
+#endif /* __KERNEL__ */
+#endif
Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h 2007-05-17 02:12:25.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h 2007-05-17 02:13:42.000000000 -0400
@@ -116,11 +116,22 @@
*(__kcrctab_gpl_future) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \
} \
- \
+ /* Conditional calls: pointers */ \
+ __cond_call : AT(ADDR(__cond_call) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start___cond_call) = .; \
+ *(__cond_call) \
+ VMLINUX_SYMBOL(__stop___cond_call) = .; \
+ } \
/* Kernel symbol table: strings */ \
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
*(__ksymtab_strings) \
- } \
+ } \
+ /* Conditional calls: strings */ \
+ __cond_call_strings : AT(ADDR(__cond_call_strings) - LOAD_OFFSET) { \
+ *(__cond_call_strings) \
+ } \
+ __end_rodata = .; \
+ . = ALIGN(4096); \
\
/* Built-in module parameters. */ \
__param : AT(ADDR(__param) - LOAD_OFFSET) { \
@@ -228,4 +239,3 @@
*(.initcall6s.init) \
*(.initcall7.init) \
*(.initcall7s.init)
-
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h 2007-05-17 02:12:34.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h 2007-05-17 02:13:41.000000000 -0400
@@ -16,6 +16,7 @@
#include <linux/stringify.h>
#include <linux/kobject.h>
#include <linux/moduleparam.h>
+#include <linux/condcall.h>
#include <asm/local.h>

#include <asm/module.h>
@@ -356,6 +357,9 @@
/* The command line arguments (may be mangled). People like
keeping pointers to this stuff */
char *args;
+
+ const struct __cond_call_struct *cond_calls;
+ unsigned int num_cond_calls;
};
#ifndef MODULE_ARCH_INIT
#define MODULE_ARCH_INIT {}
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c 2007-05-17 02:12:25.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c 2007-05-17 02:13:53.000000000 -0400
@@ -32,6 +32,7 @@
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <linux/errno.h>
+#include <linux/condcall.h>
#include <linux/err.h>
#include <linux/vermagic.h>
#include <linux/notifier.h>
@@ -141,6 +142,8 @@
extern const unsigned long __start___kcrctab_gpl_future[];
extern const unsigned long __start___kcrctab_unused[];
extern const unsigned long __start___kcrctab_unused_gpl[];
+extern const struct __cond_call_struct __start___cond_call[];
+extern const struct __cond_call_struct __stop___cond_call[];

#ifndef CONFIG_MODVERSIONS
#define symversion(base, idx) NULL
@@ -301,6 +304,230 @@
return NULL;
}

+#ifdef CONFIG_COND_CALL
+
+/* Set the enable bit of the cond_call, choosing the generic or architecture
+ * specific functions depending on the cond_call's flags.
+ */
+static int cond_call_set_enable(void *address, char enable, int flags)
+{
+ if (flags & CF_OPTIMIZED)
+ return cond_call_optimized_set_enable(address, enable);
+ else
+ return cond_call_generic_set_enable(address, enable);
+}
+
+/* Query the state of a cond_calls range. */
+static int _cond_call_query_range(const char *name,
+ const struct __cond_call_struct *begin,
+ const struct __cond_call_struct *end)
+{
+ const struct __cond_call_struct *iter;
+ int ret = 0;
+
+ for (iter = begin; iter < end; iter++) {
+ if (strcmp(name, iter->name) != 0)
+ continue;
+ if (iter->flags & CF_OPTIMIZED)
+ ret = COND_CALL_OPTIMIZED_ENABLE(iter->enable);
+ else
+ ret = COND_CALL_GENERIC_ENABLE(iter->enable);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+/* Sets a range of cond_calls to a enabled state : set the enable bit. */
+static int _cond_call_arm_range(const char *name,
+ const struct __cond_call_struct *begin,
+ const struct __cond_call_struct *end)
+{
+ const struct __cond_call_struct *iter;
+ int found = 0;
+
+ for (iter = begin; iter < end; iter++) {
+ if (strcmp(name, iter->name) != 0)
+ continue;
+ cond_call_set_enable(iter->enable, 1, iter->flags);
+ found++;
+ }
+ return found;
+}
+
+/* Sets a range of cond_calls to a disabled state : unset the enable bit. */
+static int _cond_call_disarm_range(const char *name,
+ const struct __cond_call_struct *begin,
+ const struct __cond_call_struct *end)
+{
+ const struct __cond_call_struct *iter;
+ int found = 0;
+
+ for (iter = begin; iter < end; iter++) {
+ if (strcmp(name, iter->name) != 0)
+ continue;
+ cond_call_set_enable(iter->enable, 0, iter->flags);
+ found++;
+ }
+ return found;
+}
+
+/* Provides a listing of the cond_calls present in the kernel with their type
+ * (optimized or generic) and state (enabled or disabled). */
+static int _cond_call_list_range(const char *name,
+ const struct __cond_call_struct *begin,
+ const struct __cond_call_struct *end)
+{
+ const struct __cond_call_struct *iter;
+ int found = 0;
+
+ for (iter = begin; iter < end; iter++) {
+ if (name)
+ if (strcmp(name, iter->name) != 0)
+ continue;
+ printk("name %s \n", iter->name);
+ if (iter->flags & CF_OPTIMIZED)
+ printk(" enable %u optimized\n",
+ COND_CALL_OPTIMIZED_ENABLE(iter->enable));
+ else
+ printk(" enable %u generic\n",
+ COND_CALL_GENERIC_ENABLE(iter->enable));
+ found++;
+ }
+ return found;
+}
+
+/* Calls _cond_call_query_range for the core cond_calls and modules cond_calls.
+ */
+static int _cond_call_query(const char *name)
+{
+ struct module *mod;
+ int ret = 0;
+
+ /* Core kernel cond_calls */
+ ret = _cond_call_query_range(name,
+ __start___cond_call, __stop___cond_call);
+ if (ret)
+ return ret;
+ /* Cond_calls in modules. */
+ list_for_each_entry(mod, &modules, list) {
+ if (mod->taints)
+ continue;
+ ret = _cond_call_query_range(name,
+ mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize.
+ * Returns 1 if enabled, 0 if disabled or not present. */
+int cond_call_query(const char *name)
+{
+ int ret = 0;
+
+ mutex_lock(&module_mutex);
+ ret = _cond_call_query(name);
+ mutex_unlock(&module_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cond_call_query);
+
+/* Calls _cond_call_arm_range for the core cond_calls and modules cond_calls.
+ * FIXME : cond_call will not be active on modules loaded later than the
+ * cond_call_arm. */
+static int _cond_call_arm(const char *name)
+{
+ struct module *mod;
+ int found = 0;
+
+ /* Core kernel cond_calls */
+ found += _cond_call_arm_range(name,
+ __start___cond_call, __stop___cond_call);
+ /* Cond_calls in modules. */
+ list_for_each_entry(mod, &modules, list) {
+ if (mod->taints)
+ continue;
+ found += _cond_call_arm_range(name,
+ mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+ }
+ return found;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize. */
+int cond_call_arm(const char *name)
+{
+ int found = 0;
+
+ mutex_lock(&module_mutex);
+ found = _cond_call_arm(name);
+ mutex_unlock(&module_mutex);
+ return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_arm);
+
+/* Calls _cond_call_disarm_range for the core cond_calls and modules cond_calls.
+ */
+static int _cond_call_disarm(const char *name)
+{
+ struct module *mod;
+ int found = 0;
+
+ /* Core kernel cond_calls */
+ found += _cond_call_disarm_range(name,
+ __start___cond_call, __stop___cond_call);
+ /* Cond_calls in modules. */
+ list_for_each_entry(mod, &modules, list) {
+ if (mod->taints)
+ continue;
+ found += _cond_call_disarm_range(name,
+ mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+ }
+ return found;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize. */
+int cond_call_disarm(const char *name)
+{
+ int found = 0;
+
+ mutex_lock(&module_mutex);
+ found = _cond_call_disarm(name);
+ mutex_unlock(&module_mutex);
+ return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_disarm);
+
+/* Calls _cond_call_list_range for the core and module cond_calls.
+ * Cond call listing uses the module_mutex to synchronize.
+ * TODO : should output this listing to a procfs file. */
+int cond_call_list(const char *name)
+{
+ struct module *mod;
+ int found = 0;
+
+ mutex_lock(&module_mutex);
+ /* Core kernel cond_calls */
+ printk("Listing kernel cond_calls\n");
+ found += _cond_call_list_range(name,
+ __start___cond_call, __stop___cond_call);
+ /* Cond_calls in modules. */
+ printk("Listing module cond_calls\n");
+ list_for_each_entry(mod, &modules, list) {
+ if (!mod->taints) {
+ printk("Listing cond_calls for module %s\n", mod->name);
+ found += _cond_call_list_range(name, mod->cond_calls,
+ mod->cond_calls+mod->num_cond_calls);
+ }
+ }
+ mutex_unlock(&module_mutex);
+ return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_list);
+
+#endif
+
#ifdef CONFIG_SMP
/* Number of blocks used and allocated. */
static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -1658,6 +1885,8 @@
unsigned int unusedcrcindex;
unsigned int unusedgplindex;
unsigned int unusedgplcrcindex;
+ unsigned int condcallindex;
+ unsigned int condcallstringsindex;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1754,6 +1983,8 @@
#ifdef ARCH_UNWIND_SECTION_NAME
unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
#endif
+ condcallindex = find_sec(hdr, sechdrs, secstrings, "__cond_call");
+ condcallstringsindex = find_sec(hdr, sechdrs, secstrings, "__cond_call_strings");

/* Don't keep modinfo section */
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1764,6 +1995,18 @@
#endif
if (unwindex)
sechdrs[unwindex].sh_flags |= SHF_ALLOC;
+#ifdef CONFIG_COND_CALL
+ if (condcallindex)
+ sechdrs[condcallindex].sh_flags |= SHF_ALLOC;
+ if (condcallstringsindex)
+ sechdrs[condcallstringsindex].sh_flags |= SHF_ALLOC;
+#else
+ if (condcallindex)
+ sechdrs[condcallindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
+ if (condcallstringsindex)
+ sechdrs[condcallstringsindex].sh_flags
+ &= ~(unsigned long)SHF_ALLOC;
+#endif

/* Check module struct version now, before we try to use module. */
if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -1904,6 +2147,11 @@
mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
if (gplfuturecrcindex)
mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
+ if (condcallindex) {
+ mod->cond_calls = (void *)sechdrs[condcallindex].sh_addr;
+ mod->num_cond_calls =
+ sechdrs[condcallindex].sh_size / sizeof(*mod->cond_calls);
+ }

mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
if (unusedcrcindex)

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
Conditional calls are used to compile in code that is meant to be dynamically
enabled at runtime. When code is disabled, it has a very small footprint.

It has a generic cond_call version and optimized per architecture cond_calls.
The optimized cond_call uses a load immediate to remove a data cache hit.

It adds a new rodata section "__cond_call" to place the pointers to the enable
value.

cond_call activation functions sits in module.c.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
include/asm-generic/vmlinux.lds.h | 16 +-
include/linux/condcall.h | 91 +++++++++++++
include/linux/module.h | 4
kernel/module.c | 248 ++++++++++++++++++++++++++++++++++++++
4 files changed, 356 insertions(+), 3 deletions(-)

Index: linux-2.6-lttng/include/linux/condcall.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/condcall.h 2007-05-17 02:13:53.000000000 -0400
@@ -0,0 +1,91 @@
+#ifndef _LINUX_CONDCALL_H
+#define _LINUX_CONDCALL_H
+
+/*
+ * Conditional function calls. Cheap when disabled, enabled at runtime.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifdef __KERNEL__
+
+struct __cond_call_struct {
+ const char *name;
+ void *enable;
+ int flags;
+} __attribute__((packed));
+
+
+/* Cond call flags : selects the mechanism used to enable the conditional calls
+ * and prescribe what can be executed within their function. This is primarily
+ * used at reentrancy-unfriendly sites. */
+#define CF_OPTIMIZED (1 << 0) /* Use optimized cond_call */
+#define CF_LOCKDEP (1 << 1) /* Can call lockdep */
+#define CF_PRINTK (1 << 2) /* Probe can call vprintk */
+#define CF_STATIC_ENABLE (1 << 3) /* Enable cond_call statically */
+#define _CF_NR 4
+
+#ifdef CONFIG_COND_CALL
+
+/* Generic cond_call flavor always available.
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug. */
+#define cond_call_generic(flags, name, func) \
+ ({ \
+ static const char __cstrtab_name_##name[] \
+ __attribute__((section("__cond_call_strings"))) = #name; \
+ static char __cond_call_enable_##name = \
+ (flags) & CF_STATIC_ENABLE; \
+ static const struct __cond_call_struct __cond_call_##name \
+ __attribute__((section("__cond_call"))) = \
+ { __cstrtab_name_##name, \
+ &__cond_call_enable_##name, \
+ (flags) & ~CF_OPTIMIZED } ; \
+ asm volatile ( "" : : "i" (&__cond_call_##name)); \
+ (unlikely(__cond_call_enable_##name)) ? \
+ (func) : \
+ (__typeof__(func))0; \
+ })
+
+#define COND_CALL_GENERIC_ENABLE_IMMEDIATE_OFFSET 0
+#define COND_CALL_GENERIC_ENABLE_TYPE unsigned char
+/* Dereference enable as lvalue from a pointer to its instruction */
+#define COND_CALL_GENERIC_ENABLE(a) \
+ *(COND_CALL_GENERIC_ENABLE_TYPE*) \
+ ((char*)a+COND_CALL_GENERIC_ENABLE_IMMEDIATE_OFFSET)
+
+static inline int cond_call_generic_set_enable(void *address, char enable)
+{
+ COND_CALL_GENERIC_ENABLE(address) = enable;
+ return 0;
+}
+
+#else /* !CONFIG_COND_CALL */
+#define cond_call_generic(flags, name, func) \
+ ({ \
+ ((flags) & CF_STATIC_ENABLE) ? \
+ (func) : \
+ (__typeof__(func))0; \
+ })
+#endif /* CONFIG_COND_CALL */
+
+#ifdef CONFIG_COND_CALL_ENABLE_OPTIMIZATION
+#include <asm/condcall.h> /* optimized cond_call flavor */
+#else
+#include <asm-generic/condcall.h> /* fallback on generic cond_call */
+#endif
+
+#define COND_CALL_MAX_FORMAT_LEN 1024
+
+extern int cond_call_arm(const char *name);
+extern int cond_call_disarm(const char *name);
+
+/* cond_call_query : Returns 1 if enabled, 0 if disabled or not present */
+extern int cond_call_query(const char *name);
+extern int cond_call_list(const char *name);
+
+#endif /* __KERNEL__ */
+#endif
Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h 2007-05-17 02:12:25.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h 2007-05-17 02:13:42.000000000 -0400
@@ -116,11 +116,22 @@
*(__kcrctab_gpl_future) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \
} \
- \
+ /* Conditional calls: pointers */ \
+ __cond_call : AT(ADDR(__cond_call) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start___cond_call) = .; \
+ *(__cond_call) \
+ VMLINUX_SYMBOL(__stop___cond_call) = .; \
+ } \
/* Kernel symbol table: strings */ \
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
*(__ksymtab_strings) \
- } \
+ } \
+ /* Conditional calls: strings */ \
+ __cond_call_strings : AT(ADDR(__cond_call_strings) - LOAD_OFFSET) { \
+ *(__cond_call_strings) \
+ } \
+ __end_rodata = .; \
+ . = ALIGN(4096); \
\
/* Built-in module parameters. */ \
__param : AT(ADDR(__param) - LOAD_OFFSET) { \
@@ -228,4 +239,3 @@
*(.initcall6s.init) \
*(.initcall7.init) \
*(.initcall7s.init)
-
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h 2007-05-17 02:12:34.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h 2007-05-17 02:13:41.000000000 -0400
@@ -16,6 +16,7 @@
#include <linux/stringify.h>
#include <linux/kobject.h>
#include <linux/moduleparam.h>
+#include <linux/condcall.h>
#include <asm/local.h>

#include <asm/module.h>
@@ -356,6 +357,9 @@
/* The command line arguments (may be mangled). People like
keeping pointers to this stuff */
char *args;
+
+ const struct __cond_call_struct *cond_calls;
+ unsigned int num_cond_calls;
};
#ifndef MODULE_ARCH_INIT
#define MODULE_ARCH_INIT {}
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c 2007-05-17 02:12:25.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c 2007-05-17 02:13:53.000000000 -0400
@@ -32,6 +32,7 @@
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <linux/errno.h>
+#include <linux/condcall.h>
#include <linux/err.h>
#include <linux/vermagic.h>
#include <linux/notifier.h>
@@ -141,6 +142,8 @@
extern const unsigned long __start___kcrctab_gpl_future[];
extern const unsigned long __start___kcrctab_unused[];
extern const unsigned long __start___kcrctab_unused_gpl[];
+extern const struct __cond_call_struct __start___cond_call[];
+extern const struct __cond_call_struct __stop___cond_call[];

#ifndef CONFIG_MODVERSIONS
#define symversion(base, idx) NULL
@@ -301,6 +304,230 @@
return NULL;
}

+#ifdef CONFIG_COND_CALL
+
+/* Set the enable bit of the cond_call, choosing the generic or architecture
+ * specific functions depending on the cond_call's flags.
+ */
+static int cond_call_set_enable(void *address, char enable, int flags)
+{
+ if (flags & CF_OPTIMIZED)
+ return cond_call_optimized_set_enable(address, enable);
+ else
+ return cond_call_generic_set_enable(address, enable);
+}
+
+/* Query the state of a cond_calls range. */
+static int _cond_call_query_range(const char *name,
+ const struct __cond_call_struct *begin,
+ const struct __cond_call_struct *end)
+{
+ const struct __cond_call_struct *iter;
+ int ret = 0;
+
+ for (iter = begin; iter < end; iter++) {
+ if (strcmp(name, iter->name) != 0)
+ continue;
+ if (iter->flags & CF_OPTIMIZED)
+ ret = COND_CALL_OPTIMIZED_ENABLE(iter->enable);
+ else
+ ret = COND_CALL_GENERIC_ENABLE(iter->enable);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+/* Sets a range of cond_calls to a enabled state : set the enable bit. */
+static int _cond_call_arm_range(const char *name,
+ const struct __cond_call_struct *begin,
+ const struct __cond_call_struct *end)
+{
+ const struct __cond_call_struct *iter;
+ int found = 0;
+
+ for (iter = begin; iter < end; iter++) {
+ if (strcmp(name, iter->name) != 0)
+ continue;
+ cond_call_set_enable(iter->enable, 1, iter->flags);
+ found++;
+ }
+ return found;
+}
+
+/* Sets a range of cond_calls to a disabled state : unset the enable bit. */
+static int _cond_call_disarm_range(const char *name,
+ const struct __cond_call_struct *begin,
+ const struct __cond_call_struct *end)
+{
+ const struct __cond_call_struct *iter;
+ int found = 0;
+
+ for (iter = begin; iter < end; iter++) {
+ if (strcmp(name, iter->name) != 0)
+ continue;
+ cond_call_set_enable(iter->enable, 0, iter->flags);
+ found++;
+ }
+ return found;
+}
+
+/* Provides a listing of the cond_calls present in the kernel with their type
+ * (optimized or generic) and state (enabled or disabled). */
+static int _cond_call_list_range(const char *name,
+ const struct __cond_call_struct *begin,
+ const struct __cond_call_struct *end)
+{
+ const struct __cond_call_struct *iter;
+ int found = 0;
+
+ for (iter = begin; iter < end; iter++) {
+ if (name)
+ if (strcmp(name, iter->name) != 0)
+ continue;
+ printk("name %s \n", iter->name);
+ if (iter->flags & CF_OPTIMIZED)
+ printk(" enable %u optimized\n",
+ COND_CALL_OPTIMIZED_ENABLE(iter->enable));
+ else
+ printk(" enable %u generic\n",
+ COND_CALL_GENERIC_ENABLE(iter->enable));
+ found++;
+ }
+ return found;
+}
+
+/* Calls _cond_call_query_range for the core cond_calls and modules cond_calls.
+ */
+static int _cond_call_query(const char *name)
+{
+ struct module *mod;
+ int ret = 0;
+
+ /* Core kernel cond_calls */
+ ret = _cond_call_query_range(name,
+ __start___cond_call, __stop___cond_call);
+ if (ret)
+ return ret;
+ /* Cond_calls in modules. */
+ list_for_each_entry(mod, &modules, list) {
+ if (mod->taints)
+ continue;
+ ret = _cond_call_query_range(name,
+ mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize.
+ * Returns 1 if enabled, 0 if disabled or not present. */
+int cond_call_query(const char *name)
+{
+ int ret = 0;
+
+ mutex_lock(&module_mutex);
+ ret = _cond_call_query(name);
+ mutex_unlock(&module_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cond_call_query);
+
+/* Calls _cond_call_arm_range for the core cond_calls and modules cond_calls.
+ * FIXME : cond_call will not be active on modules loaded later than the
+ * cond_call_arm. */
+static int _cond_call_arm(const char *name)
+{
+ struct module *mod;
+ int found = 0;
+
+ /* Core kernel cond_calls */
+ found += _cond_call_arm_range(name,
+ __start___cond_call, __stop___cond_call);
+ /* Cond_calls in modules. */
+ list_for_each_entry(mod, &modules, list) {
+ if (mod->taints)
+ continue;
+ found += _cond_call_arm_range(name,
+ mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+ }
+ return found;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize. */
+int cond_call_arm(const char *name)
+{
+ int found = 0;
+
+ mutex_lock(&module_mutex);
+ found = _cond_call_arm(name);
+ mutex_unlock(&module_mutex);
+ return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_arm);
+
+/* Calls _cond_call_disarm_range for the core cond_calls and modules cond_calls.
+ */
+static int _cond_call_disarm(const char *name)
+{
+ struct module *mod;
+ int found = 0;
+
+ /* Core kernel cond_calls */
+ found += _cond_call_disarm_range(name,
+ __start___cond_call, __stop___cond_call);
+ /* Cond_calls in modules. */
+ list_for_each_entry(mod, &modules, list) {
+ if (mod->taints)
+ continue;
+ found += _cond_call_disarm_range(name,
+ mod->cond_calls, mod->cond_calls+mod->num_cond_calls);
+ }
+ return found;
+}
+
+/* Cond_call enabling/disabling use the module_mutex to synchronize. */
+int cond_call_disarm(const char *name)
+{
+ int found = 0;
+
+ mutex_lock(&module_mutex);
+ found = _cond_call_disarm(name);
+ mutex_unlock(&module_mutex);
+ return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_disarm);
+
+/* Calls _cond_call_list_range for the core and module cond_calls.
+ * Cond call listing uses the module_mutex to synchronize.
+ * TODO : should output this listing to a procfs file. */
+int cond_call_list(const char *name)
+{
+ struct module *mod;
+ int found = 0;
+
+ mutex_lock(&module_mutex);
+ /* Core kernel cond_calls */
+ printk("Listing kernel cond_calls\n");
+ found += _cond_call_list_range(name,
+ __start___cond_call, __stop___cond_call);
+ /* Cond_calls in modules. */
+ printk("Listing module cond_calls\n");
+ list_for_each_entry(mod, &modules, list) {
+ if (!mod->taints) {
+ printk("Listing cond_calls for module %s\n", mod->name);
+ found += _cond_call_list_range(name, mod->cond_calls,
+ mod->cond_calls+mod->num_cond_calls);
+ }
+ }
+ mutex_unlock(&module_mutex);
+ return found;
+}
+EXPORT_SYMBOL_GPL(cond_call_list);
+
+#endif
+
#ifdef CONFIG_SMP
/* Number of blocks used and allocated. */
static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -1658,6 +1885,8 @@
unsigned int unusedcrcindex;
unsigned int unusedgplindex;
unsigned int unusedgplcrcindex;
+ unsigned int condcallindex;
+ unsigned int condcallstringsindex;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1754,6 +1983,8 @@
#ifdef ARCH_UNWIND_SECTION_NAME
unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
#endif
+ condcallindex = find_sec(hdr, sechdrs, secstrings, "__cond_call");
+ condcallstringsindex = find_sec(hdr, sechdrs, secstrings, "__cond_call_strings");

/* Don't keep modinfo section */
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1764,6 +1995,18 @@
#endif
if (unwindex)
sechdrs[unwindex].sh_flags |= SHF_ALLOC;
+#ifdef CONFIG_COND_CALL
+ if (condcallindex)
+ sechdrs[condcallindex].sh_flags |= SHF_ALLOC;
+ if (condcallstringsindex)
+ sechdrs[condcallstringsindex].sh_flags |= SHF_ALLOC;
+#else
+ if (condcallindex)
+ sechdrs[condcallindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
+ if (condcallstringsindex)
+ sechdrs[condcallstringsindex].sh_flags
+ &= ~(unsigned long)SHF_ALLOC;
+#endif

/* Check module struct version now, before we try to use module. */
if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -1904,6 +2147,11 @@
mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
if (gplfuturecrcindex)
mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
+ if (condcallindex) {
+ mod->cond_calls = (void *)sechdrs[condcallindex].sh_addr;
+ mod->num_cond_calls =
+ sechdrs[condcallindex].sh_size / sizeof(*mod->cond_calls);
+ }

mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
if (unusedcrcindex)

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
On Wed, 30 May 2007 10:00:26 -0400
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote:

> Conditional calls are used to compile in code that is meant to be dynamically
> enabled at runtime. When code is disabled, it has a very small footprint.
>
> It has a generic cond_call version and optimized per architecture cond_calls.
> The optimized cond_call uses a load immediate to remove a data cache hit.
>
> It adds a new rodata section "__cond_call" to place the pointers to the enable
> value.
>
> cond_call activation functions sits in module.c.
>

The above doesn't really describe what these things are, nor what they are
used for, nor how they actually work. It's all a bit of a mystery.

The i386 implementation appears to be using self-modifying code, which is
intriguing.

<finds the documentation patch>

OK, that helps somewhat.

> ---
> include/asm-generic/vmlinux.lds.h | 16 +-
> include/linux/condcall.h | 91 +++++++++++++
> include/linux/module.h | 4
> kernel/module.c | 248 ++++++++++++++++++++++++++++++++++++++
> 4 files changed, 356 insertions(+), 3 deletions(-)
>
> Index: linux-2.6-lttng/include/linux/condcall.h
> ===================================================================
> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/include/linux/condcall.h 2007-05-17 02:13:53.000000000 -0400
> @@ -0,0 +1,91 @@
> +#ifndef _LINUX_CONDCALL_H
> +#define _LINUX_CONDCALL_H
> +
> +/*
> + * Conditional function calls. Cheap when disabled, enabled at runtime.
> + *
> + * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> + *
> + * This file is released under the GPLv2.
> + * See the file COPYING for more details.
> + */
> +
> +#ifdef __KERNEL__
> +
> +struct __cond_call_struct {
> + const char *name;
> + void *enable;
> + int flags;
> +} __attribute__((packed));

Please document data structures lavishly.

> +
> +/* Cond call flags : selects the mechanism used to enable the conditional calls
> + * and prescribe what can be executed within their function. This is primarily
> + * used at reentrancy-unfriendly sites. */

You consistently use the wrong commenting style. We prefer

/* Cond call flags : selects the mechanism used to enable the conditional calls
* and prescribe what can be executed within their function. This is primarily
* used at reentrancy-unfriendly sites.
*/

or

/*
* Cond call flags : selects the mechanism used to enable the conditional calls
* and prescribe what can be executed within their function. This is primarily
* used at reentrancy-unfriendly sites.
*/

> +#ifdef CONFIG_COND_CALL_ENABLE_OPTIMIZATION
> +#include <asm/condcall.h> /* optimized cond_call flavor */
> +#else
> +#include <asm-generic/condcall.h> /* fallback on generic cond_call */
> +#endif

The preferred way to do this is to give every architecture an
asm/condcall.h and from within that, include asm-generic/condcall.h. Your
[patch 3/9] does most of that, but it didn't remove the above ifdef, and I
don't think it removed the should-be-unneeded
CONFIG_COND_CALL_ENABLE_OPTIMIZATION either?

> +#define COND_CALL_MAX_FORMAT_LEN 1024
> +
> +extern int cond_call_arm(const char *name);
> +extern int cond_call_disarm(const char *name);
> +
> +/* cond_call_query : Returns 1 if enabled, 0 if disabled or not present */
> +extern int cond_call_query(const char *name);
> +extern int cond_call_list(const char *name);
> +
> +#endif /* __KERNEL__ */
> +#endif
> Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
> ===================================================================
> --- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h 2007-05-17 02:12:25.000000000 -0400
> +++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h 2007-05-17 02:13:42.000000000 -0400
> @@ -116,11 +116,22 @@
> *(__kcrctab_gpl_future) \
> VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \
> } \
> - \
> + /* Conditional calls: pointers */ \
> + __cond_call : AT(ADDR(__cond_call) - LOAD_OFFSET) { \
> + VMLINUX_SYMBOL(__start___cond_call) = .; \
> + *(__cond_call) \
> + VMLINUX_SYMBOL(__stop___cond_call) = .; \
> + } \
> /* Kernel symbol table: strings */ \
> __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
> *(__ksymtab_strings) \
> - } \
> + } \
> + /* Conditional calls: strings */ \
> + __cond_call_strings : AT(ADDR(__cond_call_strings) - LOAD_OFFSET) { \

whitespace went bad here.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:

> +struct __cond_call_struct {

Calling structs *_struct is severly deprecated and will cause some people
to make fun of your code.


> + const char *name;
> + void *enable;
> + int flags;
> +} __attribute__((packed));

The packed doesn't seem to be needed. There will be padding at
the end anyways because the next one needs to be aligned.

> +
> +
> +/* Cond call flags : selects the mechanism used to enable the conditional calls
> + * and prescribe what can be executed within their function. This is primarily
> + * used at reentrancy-unfriendly sites. */
> +#define CF_OPTIMIZED (1 << 0) /* Use optimized cond_call */
> +#define CF_LOCKDEP (1 << 1) /* Can call lockdep */
> +#define CF_PRINTK (1 << 2) /* Probe can call vprintk */
> +#define CF_STATIC_ENABLE (1 << 3) /* Enable cond_call statically */

Why is that all needed? Condcall shouldn't really need to know anything
about all this. They're just a fancy conditional anyways -- and you don't
tell if() that it may need to printk.

Please consider eliminating.



> +#define _CF_NR 4


> +
> +#ifdef CONFIG_COND_CALL
> +
> +/* Generic cond_call flavor always available.
> + * Note : the empty asm volatile with read constraint is used here instead of a
> + * "used" attribute to fix a gcc 4.1.x bug. */

What gcc 4.1 bug?

> +#define cond_call_generic(flags, name, func) \
> + ({ \
> + static const char __cstrtab_name_##name[] \
> + __attribute__((section("__cond_call_strings"))) = #name; \
> + static char __cond_call_enable_##name = \
> + (flags) & CF_STATIC_ENABLE; \
> + static const struct __cond_call_struct __cond_call_##name \
> + __attribute__((section("__cond_call"))) = \
> + { __cstrtab_name_##name, \
> + &__cond_call_enable_##name, \
> + (flags) & ~CF_OPTIMIZED } ; \
> + asm volatile ( "" : : "i" (&__cond_call_##name)); \
> + (unlikely(__cond_call_enable_##name)) ? \
> + (func) : \
> + (__typeof__(func))0; \
> + })

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
* Andrew Morton (akpm@linux-foundation.org) wrote:
> > +#ifdef CONFIG_COND_CALL_ENABLE_OPTIMIZATION
> > +#include <asm/condcall.h> /* optimized cond_call flavor */
> > +#else
> > +#include <asm-generic/condcall.h> /* fallback on generic cond_call */
> > +#endif
>
> The preferred way to do this is to give every architecture an
> asm/condcall.h and from within that, include asm-generic/condcall.h. Your
> [patch 3/9] does most of that, but it didn't remove the above ifdef, and I
> don't think it removed the should-be-unneeded
> CONFIG_COND_CALL_ENABLE_OPTIMIZATION either?
>

Conditional calls works just like the previous markers in this aspect :
in order to support embedded systems with read-only memory for the text
segment, I leave the choice to disable the "optimized" cond_call as a
config option even if the architecture has the optimized marker flavor.

I use this include scheme because I want to support both the generic and
optimized version at the same time : if a _cond_call is declared with
the CF_OPTIMIZED flags unset, it will use the generic version. This is
useful when we must place cond_calls in locations that present specific
reentrancy issues, such as cond_call, printk, some trap handlers. The
optimized version, when it uses the i386 mechanism to insure correct
code modification, can trigger a trap, which will call into lockdep and
might have other side-effects.

(I am adding this text in the condcall.h header)

Mathieu


--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
On Wed, May 30, 2007 at 10:00:26AM -0400, Mathieu Desnoyers wrote:
> Conditional calls are used to compile in code that is meant to be dynamically
> enabled at runtime. When code is disabled, it has a very small footprint.
>
> It has a generic cond_call version and optimized per architecture cond_calls.
> The optimized cond_call uses a load immediate to remove a data cache hit.
>
> It adds a new rodata section "__cond_call" to place the pointers to the enable
> value.
>...

I have two questions for getting the bigger picture:

1. How much code will be changed?
Looking at the F00F bug fixup example, it seems we'll have to make
several functions in every single driver conditional in the kernel for
getting the best performance.
How many functions to you plan to make conditional this way?

2. What is the real-life performance improvement?
That micro benchmarks comparing cache hits with cache misses give great
looking numbers is obvious.
But what will be the performance improvement in real workloads after the
functions you plan to make conditional according to question 1 have been
made conditional?

TIA
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
* Andi Kleen (andi@firstfloor.org) wrote:
> Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> writes:
>
> > +struct __cond_call_struct {
>
> Calling structs *_struct is severly deprecated and will cause some people
> to make fun of your code.
>

ok

>
> > + const char *name;
> > + void *enable;
> > + int flags;
> > +} __attribute__((packed));
>
> The packed doesn't seem to be needed. There will be padding at
> the end anyways because the next one needs to be aligned.
>
ok

> > +
> > +
> > +/* Cond call flags : selects the mechanism used to enable the conditional calls
> > + * and prescribe what can be executed within their function. This is primarily
> > + * used at reentrancy-unfriendly sites. */
> > +#define CF_OPTIMIZED (1 << 0) /* Use optimized cond_call */
> > +#define CF_LOCKDEP (1 << 1) /* Can call lockdep */
> > +#define CF_PRINTK (1 << 2) /* Probe can call vprintk */
> > +#define CF_STATIC_ENABLE (1 << 3) /* Enable cond_call statically */
>
> Why is that all needed? Condcall shouldn't really need to know anything
> about all this. They're just a fancy conditional anyways -- and you don't
> tell if() that it may need to printk.
>
> Please consider eliminating.
>

I will remove the STATIC_ENABLE and the PRINTK, but I will leave the
CF_LOCKDEP and CF_OPTIMIZED there: they are required to let the generic
version be selected in contexts where a breakpoint cannot be used on
x86 (especially when placing a cond_call within lockdep.c code or any
code that could not afford to fall into a breakpoint handler).

>
>
> > +#define _CF_NR 4
>
>
> > +
> > +#ifdef CONFIG_COND_CALL
> > +
> > +/* Generic cond_call flavor always available.
> > + * Note : the empty asm volatile with read constraint is used here instead of a
> > + * "used" attribute to fix a gcc 4.1.x bug. */
>
> What gcc 4.1 bug?
>

Please see

http://www.ecos.sourceware.org/ml/systemtap/2006-q4/msg00146.html

for Jeremy Fitzhardinge's comment on the issue. I will add some comments
in the code.

Mathieu

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
Hi Adrian,

* Adrian Bunk (bunk@stusta.de) wrote:

> I have two questions for getting the bigger picture:
>
> 1. How much code will be changed?
> Looking at the F00F bug fixup example, it seems we'll have to make
> several functions in every single driver conditional in the kernel for
> getting the best performance.
> How many functions to you plan to make conditional this way?
>

I just changed the infrastructure to match Andi's advice : the
cond_calls are now "fancy" variables : they refer to a static variable
address, and every update (which must be done through the cond call API)
changes every load immediate referring to this variable. Therefore, they
can be simply embedded in a if(cond_call(var)) statement, so there is no
big code change to do.


> 2. What is the real-life performance improvement?
> That micro benchmarks comparing cache hits with cache misses give great
> looking numbers is obvious.
> But what will be the performance improvement in real workloads after the
> functions you plan to make conditional according to question 1 have been
> made conditional?
>

Hrm, I am trying to get interesting numbers out of lmbench: I just ran a
test on a kernel sprinkled with about 50 markers at important sites
(LTTng markers: system call entry/exit, traps, interrupt handlers, ...).
The markers are compiled-in, but in "disabled state". Since the markers
re-use the cond_call infrastructure, each marker has its own cond_call.

I ran the test in two situations on my Pentium 4 box:

1 - Cond call optimizations are disabled. This is the equivalent of
using a global variable (in the kernel data) as a condition for the
branching.

2 - Cond call optimizations are enabled. It uses the load immediate
(which is now loading an integer on x86 instead of a char, to make sure
there is no pipeline stall due to false register dependency).

The results are that we really cannot tell that one is faster/slower
than the other; the standard deviation is much higher than the
difference between the two situations.

Note that lmbench is a workload that will not trigger much L1 cache
stress, since it repeats the same tests many times. Do you have any
suggestion of a test that would be more representative of a real
diversified (in term of in-kernel locality of reference) workload ?

Thanks,

Mathieu


> TIA
> Adrian
>
> --
>
> "Is there not promise of rain?" Ling Tan asked suddenly out
> of the darkness. There had been need of rain for many days.
> "Only a promise," Lao Er said.
> Pearl S. Buck - Dragon Seed
>

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
On Wed, Jun 13, 2007 at 11:57:24AM -0400, Mathieu Desnoyers wrote:
> Hi Adrian,

Hi Mathieu,

>...
> > 2. What is the real-life performance improvement?
> > That micro benchmarks comparing cache hits with cache misses give great
> > looking numbers is obvious.
> > But what will be the performance improvement in real workloads after the
> > functions you plan to make conditional according to question 1 have been
> > made conditional?
>
> Hrm, I am trying to get interesting numbers out of lmbench: I just ran a
> test on a kernel sprinkled with about 50 markers at important sites
> (LTTng markers: system call entry/exit, traps, interrupt handlers, ...).
> The markers are compiled-in, but in "disabled state". Since the markers
> re-use the cond_call infrastructure, each marker has its own cond_call.
>...
> The results are that we really cannot tell that one is faster/slower
> than the other; the standard deviation is much higher than the
> difference between the two situations.
>
> Note that lmbench is a workload that will not trigger much L1 cache
> stress, since it repeats the same tests many times. Do you have any
> suggestion of a test that would be more representative of a real
> diversified (in term of in-kernel locality of reference) workload ?

Please correct me if I'm wrong, but I think 50 markers couldn't ever
result in a visible change:

You need a change that is big enough that it has a measurable influence
on the cache hit ratio.

I don't think you could get any measurable influence unless you get into
areas where > 10% of all code are conditional. And that's a percentage
I wouldn't consider being realistically.

And one big disadvantage of your implementation is the dependency on
MODULES. If you build all driver statically into the kernel, switching
from CONFIG_MODULES=y to CONFIG_MODULES=n already gives you for free a
functionally equivalent kernel that is smaller by at about 8% (depending
on the .config).

My impression is that your patches would add an infrastructure for a
nice sounding idea that will never have any real life effect.

> Thanks,
>
> Mathieu

cu
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
* Adrian Bunk (bunk@stusta.de) wrote:
> On Wed, Jun 13, 2007 at 11:57:24AM -0400, Mathieu Desnoyers wrote:
> > Hi Adrian,
>
> Hi Mathieu,
>
> >...
> > > 2. What is the real-life performance improvement?
> > > That micro benchmarks comparing cache hits with cache misses give great
> > > looking numbers is obvious.
> > > But what will be the performance improvement in real workloads after the
> > > functions you plan to make conditional according to question 1 have been
> > > made conditional?
> >
> > Hrm, I am trying to get interesting numbers out of lmbench: I just ran a
> > test on a kernel sprinkled with about 50 markers at important sites
> > (LTTng markers: system call entry/exit, traps, interrupt handlers, ...).
> > The markers are compiled-in, but in "disabled state". Since the markers
> > re-use the cond_call infrastructure, each marker has its own cond_call.
> >...
> > The results are that we really cannot tell that one is faster/slower
> > than the other; the standard deviation is much higher than the
> > difference between the two situations.
> >
> > Note that lmbench is a workload that will not trigger much L1 cache
> > stress, since it repeats the same tests many times. Do you have any
> > suggestion of a test that would be more representative of a real
> > diversified (in term of in-kernel locality of reference) workload ?
>
> Please correct me if I'm wrong, but I think 50 markers couldn't ever
> result in a visible change:
>

Well, we must take into account where these markers are added and how
often the marked code is run. Since I mark very highly used code paths
(interrupt handlers, page faults, lockdep code) and also plan to mark
other code paths like the VM subsystem, adding cycles to these code
paths seems like a no-go solution for standard distribution kernels.

> You need a change that is big enough that it has a measurable influence
> on the cache hit ratio.
>
> I don't think you could get any measurable influence unless you get into
> areas where > 10% of all code are conditional. And that's a percentage
> I wouldn't consider being realistically.
>

I just constructed a simple workload that exacerbates the improvement
brought by the optimized conditional calls:

- I instrument kernel/irq/hanle.c:handle_IRQ_event() by disabling
interrupts, getting 2 cycle counter counts and incrementing the number
of events logged by 1 and then reenabling interrupts.
- I create a small userspace program that writes to 1MB memory buffers
in a loop, simulating a memory bound user-space workload.
- I get the avg. number of cycles spent per IRQ between the cycle
counter reads.
- I put 4 markers in kernel/irq/hanle.c:handle_IRQ_event() between the
cycles counter reads.
- I get the avg number of cycles with immediate value based markers and
with static variable based markers, under an idle system and while
running my user-space program causing memory pressure. Markers are in
their disabled state.

These tests are conducted on a 3Ghz Pentium 4.

Results : (units are in cycles/interrupt)

Test | Idle system | With memory pressure
---------------------------------------------------------------------
Markers compiled out | 100.47 | 100.27
Immediate value-based markers | 100.22 | 100.16
Static variable-based markers | 100.71 | 105.84

It shows that adding 4 markers does not add a visible impact to this
code path, but that using static variable-based markers adds 5 cycles.
Typical interrupt handler duration, taken from a LTTng trace, are in the
13k cycles range, so we guess 5 cycles does not add much externally
visible impact to this code path. However, if we plan to use markers to
instrument the VM subsystem, lockdep or, like dtrace, every function
entry/exit, it could be worthwhile to have an unmeasurable effect on
performances.

> And one big disadvantage of your implementation is the dependency on
> MODULES. If you build all driver statically into the kernel, switching
> from CONFIG_MODULES=y to CONFIG_MODULES=n already gives you for free a
> functionally equivalent kernel that is smaller by at about 8% (depending
> on the .config).
>

Yes, you are right. I have put my code in module.c only because I need
to take a lock on module_mutex which is statically declared in module.c.
If declaring this lock globally could be considered as an elegant
solution, then I'll be more than happy to put my code in my own new
kernel/condcall.c file. I would remove the dependency on CONFIG_MODULES.
Does it make sense ?

> My impression is that your patches would add an infrastructure for a
> nice sounding idea that will never have any real life effect.
>

People can get really picky when they have to decide wether or not they
compile-in a profiling or tracing infrastructure in a distribution
kernel. If the impact is detectable when they are not doing any tracing
nor profiling, their reflex will be to compile it out so they can have
the "maximum performance". This is why I am going through the trouble of
making the markers impact as small as possible.

Mathieu


> > Thanks,
> >
> > Mathieu
>
> cu
> Adrian
>
> --
>
> "Is there not promise of rain?" Ling Tan asked suddenly out
> of the darkness. There had been need of rain for many days.
> "Only a promise," Lao Er said.
> Pearl S. Buck - Dragon Seed
>

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
On Thu, Jun 14, 2007 at 12:02:42PM -0400, Mathieu Desnoyers wrote:
>...
> Well, we must take into account where these markers are added and how
> often the marked code is run. Since I mark very highly used code paths
> (interrupt handlers, page faults, lockdep code) and also plan to mark
> other code paths like the VM subsystem, adding cycles to these code
> paths seems like a no-go solution for standard distribution kernels.
>...
> People can get really picky when they have to decide wether or not they
> compile-in a profiling or tracing infrastructure in a distribution
> kernel. If the impact is detectable when they are not doing any tracing
> nor profiling, their reflex will be to compile it out so they can have
> the "maximum performance". This is why I am going through the trouble of
> making the markers impact as small as possible.

Now that we finally hear what this code is required for, can we discuss
on this basis whether this is wanted and required?

Including the question which abuse possibilities such an infrastructure
offers, and whether this is wanted in distribution kernels.

> Mathieu

cu
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
* Adrian Bunk (bunk@stusta.de) wrote:
> On Thu, Jun 14, 2007 at 12:02:42PM -0400, Mathieu Desnoyers wrote:
> >...
> > Well, we must take into account where these markers are added and how
> > often the marked code is run. Since I mark very highly used code paths
> > (interrupt handlers, page faults, lockdep code) and also plan to mark
> > other code paths like the VM subsystem, adding cycles to these code
> > paths seems like a no-go solution for standard distribution kernels.
> >...
> > People can get really picky when they have to decide wether or not they
> > compile-in a profiling or tracing infrastructure in a distribution
> > kernel. If the impact is detectable when they are not doing any tracing
> > nor profiling, their reflex will be to compile it out so they can have
> > the "maximum performance". This is why I am going through the trouble of
> > making the markers impact as small as possible.
>
> Now that we finally hear what this code is required for, can we discuss
> on this basis whether this is wanted and required?
>
> Including the question which abuse possibilities such an infrastructure
> offers, and whether this is wanted in distribution kernels.
>

Hi Adrian,

The purpose of this infrastructure has never been a secret; it is a
piece taken from the Linux Kernel Markers. I proposed the first
implementation of markers in December 2006.

Please use the following link as a starting point to the thorough
discussion that has already been held on this matter.

First, a huge discussion thread back in November 2006, where the need
for a marker infrastructure has been recognized:
http://lwn.net/Articles/200059/

A good summary of my recent previous post on kerneltrap:
http://kerneltrap.org/node/8186

If you have new specific concerns to bring forward, I will be glad to
discuss them with you.

Regards,

Mathieu

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
On Wed, Jun 20, 2007 at 05:59:27PM -0400, Mathieu Desnoyers wrote:
> * Adrian Bunk (bunk@stusta.de) wrote:
> > On Thu, Jun 14, 2007 at 12:02:42PM -0400, Mathieu Desnoyers wrote:
> > >...
> > > Well, we must take into account where these markers are added and how
> > > often the marked code is run. Since I mark very highly used code paths
> > > (interrupt handlers, page faults, lockdep code) and also plan to mark
> > > other code paths like the VM subsystem, adding cycles to these code
> > > paths seems like a no-go solution for standard distribution kernels.
> > >...
> > > People can get really picky when they have to decide wether or not they
> > > compile-in a profiling or tracing infrastructure in a distribution
> > > kernel. If the impact is detectable when they are not doing any tracing
> > > nor profiling, their reflex will be to compile it out so they can have
> > > the "maximum performance". This is why I am going through the trouble of
> > > making the markers impact as small as possible.
> >
> > Now that we finally hear what this code is required for, can we discuss
> > on this basis whether this is wanted and required?
> >
> > Including the question which abuse possibilities such an infrastructure
> > offers, and whether this is wanted in distribution kernels.
>
> Hi Adrian,

Hi Mathieu,

> The purpose of this infrastructure has never been a secret; it is a
> piece taken from the Linux Kernel Markers. I proposed the first
> implementation of markers in December 2006.
>
> Please use the following link as a starting point to the thorough
> discussion that has already been held on this matter.
>
> First, a huge discussion thread back in November 2006, where the need
> for a marker infrastructure has been recognized:
> http://lwn.net/Articles/200059/
>
> A good summary of my recent previous post on kerneltrap:
> http://kerneltrap.org/node/8186
>
> If you have new specific concerns to bring forward, I will be glad to
> discuss them with you.

sorry if I was a bit harsh, but at least for me it wasn't clear that the
main (and perhaps only) reasonable use case for your conditional calls
was to get markers enabled in distribution kernels.

Please correct me, but my understanding is:
- conditional calls aim at getting markers enabled in distribution
kernels
- markers are a valuable debugging tool
- you don't need markers during normal operation
- markers allow normal modules doing things they shouldn't be doing,
implying that markers should _not_ be enabled in normal distribution
kernels

> Regards,
>
> Mathieu

cu
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 1/9] Conditional Calls - Architecture Independent Code [ In reply to ]
* Adrian Bunk (bunk@stusta.de) wrote:
> On Wed, Jun 20, 2007 at 05:59:27PM -0400, Mathieu Desnoyers wrote:
> > * Adrian Bunk (bunk@stusta.de) wrote:
> > > On Thu, Jun 14, 2007 at 12:02:42PM -0400, Mathieu Desnoyers wrote:
> > > >...
> > > > Well, we must take into account where these markers are added and how
> > > > often the marked code is run. Since I mark very highly used code paths
> > > > (interrupt handlers, page faults, lockdep code) and also plan to mark
> > > > other code paths like the VM subsystem, adding cycles to these code
> > > > paths seems like a no-go solution for standard distribution kernels.
> > > >...
> > > > People can get really picky when they have to decide wether or not they
> > > > compile-in a profiling or tracing infrastructure in a distribution
> > > > kernel. If the impact is detectable when they are not doing any tracing
> > > > nor profiling, their reflex will be to compile it out so they can have
> > > > the "maximum performance". This is why I am going through the trouble of
> > > > making the markers impact as small as possible.
> > >
> > > Now that we finally hear what this code is required for, can we discuss
> > > on this basis whether this is wanted and required?
> > >
> > > Including the question which abuse possibilities such an infrastructure
> > > offers, and whether this is wanted in distribution kernels.
> >
> > Hi Adrian,
>
> Hi Mathieu,
>
> > The purpose of this infrastructure has never been a secret; it is a
> > piece taken from the Linux Kernel Markers. I proposed the first
> > implementation of markers in December 2006.
> >
> > Please use the following link as a starting point to the thorough
> > discussion that has already been held on this matter.
> >
> > First, a huge discussion thread back in November 2006, where the need
> > for a marker infrastructure has been recognized:
> > http://lwn.net/Articles/200059/
> >
> > A good summary of my recent previous post on kerneltrap:
> > http://kerneltrap.org/node/8186
> >
> > If you have new specific concerns to bring forward, I will be glad to
> > discuss them with you.
>
> sorry if I was a bit harsh, but at least for me it wasn't clear that the
> main (and perhaps only) reasonable use case for your conditional calls
> was to get markers enabled in distribution kernels.
>
> Please correct me, but my understanding is:
> - conditional calls aim at getting markers enabled in distribution
> kernels

Hi Adrian,

Putting markers in a distribution kernel would be of great benefit to
many users, this is true. The issue is not whether distros use it or
not, but whether we can allow users, poweruser to senior kernel
developer, even if they compile their own kernel, to turn on a tracing
infrastructure on-the-fly to study a problem in their system (problem
coming either from user-space, kernel, hypervisor...).

The key aspect here is to provide instrumentation of every privilege
level.

> - markers are a valuable debugging tool

If ptrace() is also called a valuable debugging tool, then yes. Does it
make it less suitable for distributions ? The main goal here is not to
be used as a debugging tool by kernel developers, this is just a nice
side-effect. The main purpose of this tracer is to give an overall view
of the system activity to help _userspace_ developers determine what is
going wrong with the complex interactions between their multithreaded
application, X server and network sockets running 8 CPUs and using a
distributed FS... the more interaction we have between (many) processes
and the OS, the harder it becomes to study that with the traditional
ptrace() approach. When looking for the cause of a slowdown, people
often just does not know even _which_ process is guilty for the problem,
or if they should blame the kernel.


> - you don't need markers during normal operation

False. Considering that application development is part of what I call
"normal operation", these tools are needed not just as part of a
particular kernel debug option.

Moreover, we have some stories ready (see upcoming Martin Bligh's
presentation/paper next week at OLS2007 "Linux Kernel Debugging on
Google-Sized Clusters") showing that some nasty problems a reproducible
so rarely, and only when monitored on such a great number of machines,
that they become nearly impossible to track without a preexisting
tracing infrastructure deployed on production machines.


> - markers allow normal modules doing things they shouldn't be doing,
> implying that markers should _not_ be enabled in normal distribution
> kernels
>

Not exactly.

1 - Markers are flexible enough to permit defining a custom probe that
can be loaded dynamically as a module, but the "standard" probe will be
coming in some of my awaiting patches : it parses the format string to
serialize the information into trace buffers. Therefore, the "standard"
marker usage won't require people to write their own module; just to
enable which marker they want.

2 - kprobes is an example of an infrastructure enabled in distribution
kernels (Redhat at least) that permits calling modules from arbitrary
breakpoints. Markers somewhat limits this by specifying where the
markers are, therefore making the mechanism faster, offers better
reentrancy characteristics and is better suited to a kernel tree in
constant evolution (markers follow the kernel code and are not
maintained as a separate mapping to fixed kernel symbols/addresses).

3 - Providing a probe implies using a function that is exported as
*_GPL(). What exactly is such a module not supposed to be doing? Are
your expressing some kind of twisted pseudo-security concern? Once your
are executing code in kernel space with write access in memory, you can
do pretty much anything. I think that the right way to do it is to
provide the flexibility required to help users do what they need to do
(along with guide lines to help them not shoot themselves in the foot),
but not _restrict_ what is exported to GPL modules just because one
_can_ shoot himself in the foot. Since when are we supposed to provide
protection against modules? Should they run in user-space?

Redards,

Mathieu


> > Regards,
> >
> > Mathieu
>
> cu
> Adrian
>
> --
>
> "Is there not promise of rain?" Ling Tan asked suddenly out
> of the darkness. There had been need of rain for many days.
> "Only a promise," Lao Er said.
> Pearl S. Buck - Dragon Seed
>

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/