Mailing List Archive

svn commit: r1902731 - /httpd/httpd/trunk/server/util_pcre.c
Author: ylavic
Date: Fri Jul 15 10:36:24 2022
New Revision: 1902731

URL: http://svn.apache.org/viewvc?rev=1902731&view=rev
Log:
util_pcre: Add a thread local subpool cache for when stack does not suffice.

When AP_HAS_THREAD_LOCAL is available, use a thread-local match_thread_state to
save per-thread data in a subpool of the thread's pool.

If private_malloc() gets out of the stack buffer and the current thread has a
pool (i.e. ap_thread_current() != NULL), it will apr_palloc()ate and return
memory from the subpool.

When the match is complete and the match_data are freed, the thread subpool is
cleared thus giving back the memory to the allocator, which itself will give
back the memory or recycle it depending on its max_free setting.

* util_pcre.c:
Restore POSIX_MALLOC_THRESHOLDsince this is part of the user API.

* util_pcre.c(match_data_pt):
Type not used (explicitely) anymore, axe.

* util_pcre.c(struct match_data_state):
Put the stack buffer there to simplify code (the state is allocated on
stack anyway).
If APREG_USE_THREAD_LOCAL, add the apr_thread_t* and match_thread_state*
fields that track the thread local data for the match.

* util_pcre.c(alloc_match_data, free_match):
Renamed to setup_state() and cleanup_state(), simplified (no stack buffer
parameters anymore).
cleanup_state() now clears the thread local subpool if used during the match.
setup_state() set state->thd to ap_thread_current(), thus NULL if it's not a
suitable thread for using thread local data.

* util_pcre.c(private_malloc):
Fix a possible buf_used overflow (size <= avail < APR_ALIGN_DEFAULT(size)).
Create the thread local subpool (once per thread) and allocate from there
when stack space is missing and state->thd != NULL, otherwise fall back to
malloc() still.

* util_pcre.c(private_free):
Do nothing for thread local subpool memory, will be freed in cleanup_state
eventually.


Modified:
httpd/httpd/trunk/server/util_pcre.c

Modified: httpd/httpd/trunk/server/util_pcre.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util_pcre.c?rev=1902731&r1=1902730&r2=1902731&view=diff
==============================================================================
--- httpd/httpd/trunk/server/util_pcre.c (original)
+++ httpd/httpd/trunk/server/util_pcre.c Fri Jul 15 10:36:24 2022
@@ -73,8 +73,19 @@ POSSIBILITY OF SUCH DAMAGE.
#define APR_WANT_STRFUNC
#include "apr_want.h"

-#ifndef AP_PCRE_STACKBUF_SIZE
-#define AP_PCRE_STACKBUF_SIZE (256)
+#ifndef POSIX_MALLOC_THRESHOLD
+#define POSIX_MALLOC_THRESHOLD (10)
+#endif
+
+#ifdef HAVE_PCRE2
+/* Reserve 128 bytes for the PCRE2 structs, that is (a bit above):
+ * sizeof(pcre2_general_context) + offsetof(pcre2_match_data, ovector)
+ */
+#define AP_PCRE_STACKBUF_SIZE \
+ (128 + POSIX_MALLOC_THRESHOLD * sizeof(PCRE2_SIZE) * 2)
+#else
+#define AP_PCRE_STACKBUF_SIZE \
+ (POSIX_MALLOC_THRESHOLD * sizeof(int) * 3)
#endif

/* Table of error strings corresponding to POSIX error codes; must be
@@ -202,7 +213,7 @@ AP_DECLARE(int) ap_regcomp(ap_regex_t *
{
#ifdef HAVE_PCRE2
uint32_t capcount;
- size_t erroffset;
+ PCRE2_SIZE erroffset;
#else
const char *errorptr;
int erroffset;
@@ -257,105 +268,201 @@ AP_DECLARE(int) ap_regcomp(ap_regex_t *
* Match a regular expression *
*************************************************/

-/* Unfortunately, PCRE requires 3 ints of working space for each captured
+/* Unfortunately, PCRE1 requires 3 ints of working space for each captured
* substring, so we have to get and release working store instead of just using
* the POSIX structures as was done in earlier releases when PCRE needed only 2
* ints. However, if the number of possible capturing brackets is small, use a
* block of store on the stack, to reduce the use of malloc/free. The threshold
- * is in a macro that can be changed at configure time.
- * Yet more unfortunately, PCRE2 wants an opaque context by providing the API
- * to allocate and free it, so to minimize these calls we maintain one opaque
- * context per thread (in Thread Local Storage, TLS) grown as needed, and while
- * at it we do the same for PCRE1 ints vectors. Note that this requires a fast
- * TLS mechanism to be worth it, which is the case of apr_thread_data_get/set()
- * from/to ap_thread_current() when AP_HAS_THREAD_LOCAL; otherwise we'll do
- * the allocation and freeing for each ap_regexec().
+ * is in POSIX_MALLOC_THRESHOLD macro that can be changed at configure time.
+ * PCRE2 takes an opaque match context and lets us provide the callbacks to
+ * manage the memory needed during the match, so we can still use a small stack
+ * space that'll suffice for regexes up to POSIX_MALLOC_THRESHOLD captures (and
+ * not too complex), above that either use some thread local subpool cache (if
+ * AP_HAS_THREAD_LOCAL) or fall back to malloc()/free().
*/

+#if AP_HAS_THREAD_LOCAL && !defined(APREG_NO_THREAD_LOCAL)
+#define APREG_USE_THREAD_LOCAL 1
+#else
+#define APREG_USE_THREAD_LOCAL 0
+#endif
+
#ifdef HAVE_PCRE2
-typedef pcre2_match_data* match_data_pt;
-typedef size_t* match_vector_pt;
+typedef PCRE2_SIZE* match_vector_pt;
#else
-typedef int* match_data_pt;
-typedef int* match_vector_pt;
+typedef int* match_vector_pt;
#endif

-struct match_data_state
-{
- match_data_pt data;
- char *buf;
- apr_size_t buf_len;
+#if APREG_USE_THREAD_LOCAL
+struct match_thread_state {
+ char *heap;
+ apr_size_t heap_size;
+ apr_size_t heap_used;
+ apr_pool_t *pool;
+};
+
+AP_THREAD_LOCAL static struct match_thread_state *thread_state;
+#endif
+
+struct match_data_state {
+ /* keep first, struct aligned */
+ char buf[AP_PCRE_STACKBUF_SIZE];
apr_size_t buf_used;
+
+#if APREG_USE_THREAD_LOCAL
+ apr_thread_t *thd;
+ struct match_thread_state *ts;
+#endif
+
#ifdef HAVE_PCRE2
pcre2_general_context *pcre2_ctx;
+ pcre2_match_data* match_data;
+#else
+ int *match_data;
#endif
};

static void * private_malloc(size_t size, void *ctx)
{
struct match_data_state *state = ctx;
+ apr_size_t avail;

- if(size <= (state->buf_len - state->buf_used)) {
+ avail = sizeof(state->buf) - state->buf_used;
+ if (avail >= size) {
void *p = state->buf + state->buf_used;
+ size = APR_ALIGN_DEFAULT(size);
+ if (size > avail) {
+ size = avail;
+ }
+ state->buf_used += size;
+ return p;
+ }

- state->buf_used += APR_ALIGN_DEFAULT(size);
+#if APREG_USE_THREAD_LOCAL
+ if (state->thd) {
+ struct match_thread_state *ts = thread_state;
+ void *p;
+
+ if (!ts) {
+ apr_pool_t *tp = apr_thread_pool_get(state->thd);
+ ts = apr_pcalloc(tp, sizeof(*ts));
+ apr_pool_create(&ts->pool, tp);
+ thread_state = state->ts = ts;
+ }
+ else if (!state->ts) {
+ ts->heap_used = 0;
+ state->ts = ts;
+ }

+ avail = ts->heap_size - ts->heap_used;
+ if (avail >= size) {
+ size = APR_ALIGN_DEFAULT(size);
+ if (size > avail) {
+ size = avail;
+ }
+ p = ts->heap + ts->heap_used;
+ }
+ else {
+ ts->heap_size *= 2;
+ size = APR_ALIGN_DEFAULT(size);
+ if (ts->heap_size < size) {
+ ts->heap_size = size;
+ }
+ if (ts->heap_size < AP_PCRE_STACKBUF_SIZE * 2) {
+ ts->heap_size = AP_PCRE_STACKBUF_SIZE * 2;
+ }
+ ts->heap = apr_palloc(ts->pool, ts->heap_size);
+ ts->heap_used = 0;
+ p = ts->heap;
+ }
+
+ ts->heap_used += size;
return p;
}
- else {
- return malloc(size);
- }
+#endif
+
+ return malloc(size);
}

static void private_free(void *block, void *ctx)
{
struct match_data_state *state = ctx;
- void *buf_start = state->buf;
- void *buf_end = state->buf + state->buf_len;
+ char *p = block;

- if (block >= buf_start && block <= buf_end) {
+ if (p >= state->buf && p < state->buf + sizeof(state->buf)) {
/* This block allocated from stack buffer. Do nothing. */
+ return;
}
- else {
- free(block);
+
+#if APREG_USE_THREAD_LOCAL
+ if (state->thd) {
+ /* Freed in cleanup_state() eventually. */
+ return;
}
+#endif
+
+ free(block);
}

static APR_INLINE
-match_data_pt alloc_match_data(apr_size_t size,
- struct match_data_state *state,
- void *stack_buf,
- apr_size_t stack_buf_len)
+int setup_state(struct match_data_state *state, apr_uint32_t ncaps)
{
- state->buf = stack_buf;
- state->buf_len = stack_buf_len;
state->buf_used = 0;
+#if APREG_USE_THREAD_LOCAL
+ state->thd = ap_thread_current();
+ state->ts = NULL;
+#endif

#ifdef HAVE_PCRE2
- state->pcre2_ctx = pcre2_general_context_create(private_malloc, private_free, state);
+ state->pcre2_ctx = pcre2_general_context_create(private_malloc,
+ private_free, state);
if (!state->pcre2_ctx) {
- return NULL;
+ return 0;
}
- state->data = pcre2_match_data_create((int)size, state->pcre2_ctx);
- if (!state->data) {
+
+ state->match_data = pcre2_match_data_create(ncaps, state->pcre2_ctx);
+ if (!state->match_data) {
pcre2_general_context_free(state->pcre2_ctx);
- return NULL;
+ return 0;
}
#else
- state->data = private_malloc(size * sizeof(int) * 3, state);
+ if (ncaps) {
+ state->match_data = private_malloc(ncaps * sizeof(int) * 3, state);
+ if (!state->match_data) {
+ return 0;
+ }
+ }
+ else {
+ /* Fine with PCRE1 */
+ state->match_data = NULL;
+ }
#endif

- return state->data;
+ return 1;
}

static APR_INLINE
-void free_match_data(struct match_data_state *state)
+void cleanup_state(struct match_data_state *state)
{
#ifdef HAVE_PCRE2
- pcre2_match_data_free(state->data);
+ pcre2_match_data_free(state->match_data);
pcre2_general_context_free(state->pcre2_ctx);
#else
- private_free(state->data, state);
+ if (state->match_data) {
+ private_free(state->match_data, state);
+ }
+#endif
+
+#if APREG_USE_THREAD_LOCAL
+ if (state->ts) {
+ /* Let the thread's pool allocator recycle or free according
+ * to its max_free setting.
+ */
+ struct match_thread_state *ts = state->ts;
+ apr_pool_clear(ts->pool);
+ ts->heap_size = 0;
+ ts->heap = NULL;
+ }
#endif
}

@@ -372,16 +479,12 @@ AP_DECLARE(int) ap_regexec_len(const ap_
ap_regmatch_t *pmatch, int eflags)
{
int rc;
- int options = 0, to_free = 0;
- match_vector_pt ovector = NULL;
- apr_size_t ncaps = (apr_size_t)preg->re_nsub + 1;
+ int options = 0;
struct match_data_state state;
- match_data_pt data;
- /* Use apr_uint64_t to get proper alignment. */
- apr_uint64_t stack_buf[(AP_PCRE_STACKBUF_SIZE + sizeof(apr_uint64_t) - 1) / sizeof(apr_uint64_t)];
+ match_vector_pt ovector = NULL;
+ apr_uint32_t ncaps = (apr_uint32_t)preg->re_nsub + 1;

- data = alloc_match_data(ncaps, &state, stack_buf, sizeof(stack_buf));
- if (!data) {
+ if (!setup_state(&state, ncaps)) {
return AP_REG_ESPACE;
}

@@ -396,11 +499,11 @@ AP_DECLARE(int) ap_regexec_len(const ap_

#ifdef HAVE_PCRE2
rc = pcre2_match((const pcre2_code *)preg->re_pcre,
- (const unsigned char *)buff, len,
- 0, options, data, NULL);
- ovector = pcre2_get_ovector_pointer(data);
+ (const unsigned char *)buff, len, 0, options,
+ state.match_data, NULL);
+ ovector = pcre2_get_ovector_pointer(state.match_data);
#else
- ovector = data;
+ ovector = state.match_data;
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
0, options, ovector, ncaps * 3);
#endif
@@ -415,11 +518,11 @@ AP_DECLARE(int) ap_regexec_len(const ap_
}
for (; i < nmatch; i++)
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
- free_match_data(&state);
+ cleanup_state(&state);
return 0;
}
else {
- free_match_data(&state);
+ cleanup_state(&state);
#ifdef HAVE_PCRE2
if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
return AP_REG_INVARG;