Mailing List Archive

r1816 - in trunk/varnish-cache: bin/varnishd include lib/libvcl
Author: phk
Date: 2007-08-08 21:43:51 +0200 (Wed, 08 Aug 2007)
New Revision: 1816

Modified:
trunk/varnish-cache/bin/varnishd/cache.h
trunk/varnish-cache/bin/varnishd/cache_ban.c
trunk/varnish-cache/bin/varnishd/cache_cli.c
trunk/varnish-cache/bin/varnishd/cache_hash.c
trunk/varnish-cache/bin/varnishd/cache_vrt.c
trunk/varnish-cache/bin/varnishd/hash_classic.c
trunk/varnish-cache/bin/varnishd/hash_simple_list.c
trunk/varnish-cache/include/cli.h
trunk/varnish-cache/include/vrt.h
trunk/varnish-cache/lib/libvcl/vcc_action.c
trunk/varnish-cache/lib/libvcl/vcc_fixed_token.c
Log:
Implement purging on either of hash or url.

In VCL:
purge_url(<regexp>)
purge_hash(<regexp>)
(for an interrim period purge(<regexp>) will be the same as purge_url).
In CLI
url.purge <regexp>
hash.purge <regexp>

purge_hash operates on the hash-string which results from vcl_hash(),
by default it is composed of:

req.url "#" req.http.host "#"

To purge everything on the virtual host foo.bar.com:

In CLI:
url.purge "#foo.bar.com#$"

In VCL:
purge_hash("#foo.bar.com#$");


The general format, if you have defined vcl_hash(), is:

Each "req.hash +=" operator appends the right hand side of the
+= and a "#" separator.

You'll have to figure out your own regexps.


Under the hood:

Move the hash string from object to objecthead and save space while
we're at it.

Fix indentation in generated source.



Modified: trunk/varnish-cache/bin/varnishd/cache.h
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache.h 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/bin/varnishd/cache.h 2007-08-08 19:43:51 UTC (rev 1816)
@@ -262,6 +262,8 @@

pthread_mutex_t mtx;
TAILQ_HEAD(,object) objects;
+ char *hash;
+ unsigned hashlen;
};

/* -------------------------------------------------------------------*/
@@ -368,11 +370,12 @@
void vbe_free_bereq(struct bereq *bereq);

/* cache_ban.c */
-void AddBan(const char *);
+void AddBan(const char *, int hash);
void BAN_Init(void);
void cli_func_url_purge(struct cli *cli, char **av, void *priv);
+void cli_func_hash_purge(struct cli *cli, char **av, void *priv);
void BAN_NewObj(struct object *o);
-int BAN_CheckObject(struct object *o, const char *url);
+int BAN_CheckObject(struct object *o, const char *url, const char *hash);

/* cache_center.c [CNT] */
void CNT_Session(struct sess *sp);
@@ -392,8 +395,8 @@

/* cache_hash.c */
void HSH_Prealloc(struct sess *sp);
-int HSH_Compare(struct sess *sp, const char *b, const char *e);
-void HSH_Copy(struct sess *sp, char *b, const char *e);
+int HSH_Compare(struct sess *sp, struct objhead *o);
+void HSH_Copy(struct sess *sp, struct objhead *o);
struct object *HSH_Lookup(struct sess *sp);
void HSH_Unbusy(struct object *o);
void HSH_Ref(struct object *o);

Modified: trunk/varnish-cache/bin/varnishd/cache_ban.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_ban.c 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/bin/varnishd/cache_ban.c 2007-08-08 19:43:51 UTC (rev 1816)
@@ -46,6 +46,7 @@
unsigned gen;
regex_t regexp;
char *ban;
+ int hash;
};

static TAILQ_HEAD(,ban) ban_head = TAILQ_HEAD_INITIALIZER(ban_head);
@@ -53,7 +54,7 @@
static struct ban *ban_start;

void
-AddBan(const char *regexp)
+AddBan(const char *regexp, int hash)
{
struct ban *b;
int i;
@@ -68,6 +69,7 @@
(void)regerror(i, &b->regexp, buf, sizeof buf);
VSL(SLT_Debug, 0, "REGEX: <%s>", buf);
}
+ b->hash = hash;
b->gen = ++ban_next;
b->ban = strdup(regexp);
TAILQ_INSERT_HEAD(&ban_head, b, list);
@@ -82,7 +84,7 @@
}

int
-BAN_CheckObject(struct object *o, const char *url)
+BAN_CheckObject(struct object *o, const char *url, const char *hash)
{
struct ban *b, *b0;
int i;
@@ -91,7 +93,7 @@
for (b = b0;
b != NULL && b->gen > o->ban_seq;
b = TAILQ_NEXT(b, list)) {
- i = regexec(&b->regexp, url, 0, NULL, 0);
+ i = regexec(&b->regexp, b->hash ? hash : url, 0, NULL, 0);
if (!i)
return (1);
}
@@ -104,13 +106,22 @@
{

(void)priv;
- AddBan(av[2]);
+ AddBan(av[2], 0);
cli_out(cli, "PURGE %s\n", av[2]);
}

void
+cli_func_hash_purge(struct cli *cli, char **av, void *priv)
+{
+
+ (void)priv;
+ AddBan(av[2], 1);
+ cli_out(cli, "PURGE %s\n", av[2]);
+}
+
+void
BAN_Init(void)
{

- AddBan("a");
+ AddBan("\001", 0);
}

Modified: trunk/varnish-cache/bin/varnishd/cache_cli.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_cli.c 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/bin/varnishd/cache_cli.c 2007-08-08 19:43:51 UTC (rev 1816)
@@ -67,6 +67,7 @@
{ CLI_URL_QUERY, cli_func_url_query },
#endif
{ CLI_URL_PURGE, cli_func_url_purge },
+ { CLI_HASH_PURGE, cli_func_hash_purge },
{ CLI_VCL_LOAD, cli_func_config_load },
{ CLI_VCL_LIST, cli_func_config_list },
{ CLI_VCL_DISCARD, cli_func_config_discard },

Modified: trunk/varnish-cache/bin/varnishd/cache_hash.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_hash.c 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/bin/varnishd/cache_hash.c 2007-08-08 19:43:51 UTC (rev 1816)
@@ -110,14 +110,16 @@
}

int
-HSH_Compare(struct sess *sp, const char *b, const char *e)
+HSH_Compare(struct sess *sp, struct objhead *obj)
{
int i;
unsigned u, v;
+ const char *b;

- i = sp->lhashptr - (e - b);
+ i = sp->lhashptr - obj->hashlen;
if (i)
return (i);
+ b = obj->hash;
for (u = 0; u < sp->ihashptr; u += 2) {
v = sp->hashptr[u + 1] - sp->hashptr[u];
i = memcmp(sp->hashptr[u], b, v);
@@ -130,16 +132,19 @@
}
assert(*b == '\0');
b++;
- assert(b == e);
+ assert(b == obj->hash + obj->hashlen);
+ VSL(SLT_Debug, sp->fd, "Hash Match: %s", obj->hash);
return (0);
}

void
-HSH_Copy(struct sess *sp, char *b, const char *e)
+HSH_Copy(struct sess *sp, struct objhead *obj)
{
unsigned u, v;
+ char *b;

- assert((e - b) >= sp->lhashptr);
+ assert(obj->hashlen >= sp->lhashptr);
+ b = obj->hash;
for (u = 0; u < sp->ihashptr; u += 2) {
v = sp->hashptr[u + 1] - sp->hashptr[u];
memcpy(b, sp->hashptr[u], v);
@@ -147,7 +152,8 @@
*b++ = '#';
}
*b++ = '\0';
- assert(b <= e);
+ VSL(SLT_Debug, sp->fd, "Hash: %s", obj->hash);
+ assert(b <= obj->hash + obj->hashlen);
}

struct object *
@@ -195,7 +201,8 @@
/* Object banned but not reaped yet */
} else if (o->ttl <= sp->t_req) {
/* Object expired */
- } else if (BAN_CheckObject(o, h->hd[HTTP_HDR_URL].b)) {
+ } else if (BAN_CheckObject(o,
+ h->hd[HTTP_HDR_URL].b, oh->hash)) {
o->ttl = 0;
VSL(SLT_ExpBan, 0, "%u was banned", o->xid);
if (o->heap_idx != 0)

Modified: trunk/varnish-cache/bin/varnishd/cache_vrt.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_vrt.c 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/bin/varnishd/cache_vrt.c 2007-08-08 19:43:51 UTC (rev 1816)
@@ -552,8 +552,8 @@
/*--------------------------------------------------------------------*/

void
-VRT_purge(const char *regexp)
+VRT_purge(const char *regexp, int hash)
{

- AddBan(regexp);
+ AddBan(regexp, hash);
}

Modified: trunk/varnish-cache/bin/varnishd/hash_classic.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/hash_classic.c 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/bin/varnishd/hash_classic.c 2007-08-08 19:43:51 UTC (rev 1816)
@@ -46,8 +46,6 @@
#define HCL_ENTRY_MAGIC 0x0ba707bf
TAILQ_ENTRY(hcl_entry) list;
struct hcl_hd *head;
- char *key;
- unsigned klen;
struct objhead *oh;
unsigned refcnt;
unsigned digest;
@@ -146,15 +144,15 @@
LOCK(&hp->mtx);
TAILQ_FOREACH(he, &hp->head, list) {
CHECK_OBJ_NOTNULL(he, HCL_ENTRY_MAGIC);
- if (sp->lhashptr < he->klen)
+ if (sp->lhashptr < he->oh->hashlen)
continue;
- if (sp->lhashptr > he->klen)
+ if (sp->lhashptr > he->oh->hashlen)
break;
if (he->digest < digest)
continue;
if (he->digest > digest)
break;
- i = HSH_Compare(sp, he->key, he->key + he->klen);
+ i = HSH_Compare(sp, he->oh);
if (i < 0)
continue;
if (i > 0)
@@ -182,19 +180,19 @@
}
UNLOCK(&hp->mtx);

- i = sizeof *he2 + sp->lhashptr;
- he2 = calloc(i, 1);
+ he2 = calloc(sizeof *he2, 1);
XXXAN(he2);
he2->magic = HCL_ENTRY_MAGIC;
he2->oh = noh;
he2->digest = digest;
he2->hash = u1;
he2->head = hp;
- he2->klen = sp->lhashptr;
- noh->hashpriv = he2;

- he2->key = (void*)(he2 + 1);
- HSH_Copy(sp, he2->key, he2->key + sp->lhashptr);
+ noh->hashpriv = he2;
+ noh->hash = malloc(sp->lhashptr);
+ XXXAN(noh->hash);
+ noh->hashlen = sp->lhashptr;
+ HSH_Copy(sp, noh);
}
assert(he2 == NULL); /* FlexeLint */
INCOMPL();

Modified: trunk/varnish-cache/bin/varnishd/hash_simple_list.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/hash_simple_list.c 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/bin/varnishd/hash_simple_list.c 2007-08-08 19:43:51 UTC (rev 1816)
@@ -44,8 +44,6 @@

struct hsl_entry {
TAILQ_ENTRY(hsl_entry) list;
- char *key;
- int keylen;
struct objhead *obj;
unsigned refcnt;
};
@@ -80,7 +78,7 @@

LOCK(&hsl_mutex);
TAILQ_FOREACH(he, &hsl_head, list) {
- i = HSH_Compare(sp, he->key, he->key + he->keylen);
+ i = HSH_Compare(sp, he->obj);
if (i < 0)
continue;
if (i > 0)
@@ -94,14 +92,17 @@
UNLOCK(&hsl_mutex);
return (NULL);
}
- he2 = calloc(sizeof *he2 + sp->lhashptr, 1);
+ he2 = calloc(sizeof *he2, 1);
XXXAN(he2);
he2->obj = nobj;
he2->refcnt = 1;
- he2->key = (void*)(he2 + 1);
- he2->keylen = sp->lhashptr;
- HSH_Copy(sp, he2->key, he2->key + he2->keylen);
+
nobj->hashpriv = he2;
+ nobj->hash = malloc(sp->lhashptr);
+ XXXAN(nobj->hash);
+ nobj->hashlen = sp->lhashptr;
+ HSH_Copy(sp, nobj);
+
if (he != NULL)
TAILQ_INSERT_BEFORE(he, he2, list);
else

Modified: trunk/varnish-cache/include/cli.h
===================================================================
--- trunk/varnish-cache/include/cli.h 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/include/cli.h 2007-08-08 19:43:51 UTC (rev 1816)
@@ -63,10 +63,17 @@
#define CLI_URL_PURGE \
"url.purge", \
"url.purge <regexp>", \
- "\tAll urls matching regexp will consider currently cached\n" \
- "\tobjects obsolete", \
+ "\tAll objects where the urls matches regexp will be " \
+ "marked obsolete.", \
1, 1

+#define CLI_HASH_PURGE \
+ "hash.purge", \
+ "hash.purge <regexp>", \
+ "\tAll objects where the hash string matches regexp will be " \
+ "marked obsolete.", \
+ 1, 1
+
#define CLI_URL_STATUS \
"url.status", \
"url.status <url>", \

Modified: trunk/varnish-cache/include/vrt.h
===================================================================
--- trunk/varnish-cache/include/vrt.h 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/include/vrt.h 2007-08-08 19:43:51 UTC (rev 1816)
@@ -70,7 +70,7 @@
int VRT_re_test(struct vsb *, const char *, int sub);
const char *VRT_regsub(struct sess *sp, const char *, void *, const char *);

-void VRT_purge(const char *);
+void VRT_purge(const char *, int hash);

void VRT_count(struct sess *, unsigned);
int VRT_rewrite(const char *, const char *);

Modified: trunk/varnish-cache/lib/libvcl/vcc_action.c
===================================================================
--- trunk/varnish-cache/lib/libvcl/vcc_action.c 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/lib/libvcl/vcc_action.c 2007-08-08 19:43:51 UTC (rev 1816)
@@ -273,12 +273,12 @@
/*--------------------------------------------------------------------*/

static void
-parse_purge(struct tokenlist *tl)
+parse_purge_url(struct tokenlist *tl)
{

vcc_NextToken(tl);

- Fb(tl, 0, "VRT_purge(");
+ Fb(tl, 1, "VRT_purge(");

Expect(tl, '(');
vcc_NextToken(tl);
@@ -290,12 +290,36 @@

Expect(tl, ')');
vcc_NextToken(tl);
- Fb(tl, 0, ");");
+ Fb(tl, 0, ", 0);\n");
}


/*--------------------------------------------------------------------*/

+static void
+parse_purge_hash(struct tokenlist *tl)
+{
+
+ vcc_NextToken(tl);
+
+ Fb(tl, 1, "VRT_purge(");
+
+ Expect(tl, '(');
+ vcc_NextToken(tl);
+
+ if (!vcc_StringVal(tl)) {
+ vcc_ExpectedStringval(tl);
+ return;
+ }
+
+ Expect(tl, ')');
+ vcc_NextToken(tl);
+ Fb(tl, 0, ", 1);\n");
+}
+
+
+/*--------------------------------------------------------------------*/
+
typedef void action_f(struct tokenlist *tl);

static struct action_table {
@@ -310,7 +334,12 @@
{ "call", parse_call },
{ "set", parse_set },
{ "remove", parse_remove },
- { "purge", parse_purge },
+ { "purge_url", parse_purge_url },
+ { "purge_hash", parse_purge_hash },
+
+ /* XXX: Compat remove in 1.2/2.0 */
+ { "purge", parse_purge_url },
+
{ NULL, NULL }
};


Modified: trunk/varnish-cache/lib/libvcl/vcc_fixed_token.c
===================================================================
--- trunk/varnish-cache/lib/libvcl/vcc_fixed_token.c 2007-08-08 12:10:17 UTC (rev 1815)
+++ trunk/varnish-cache/lib/libvcl/vcc_fixed_token.c 2007-08-08 19:43:51 UTC (rev 1816)
@@ -428,7 +428,7 @@
vsb_cat(sb, "int VRT_re_test(struct vsb *, const char *, int sub);\n");
vsb_cat(sb, "const char *VRT_regsub(struct sess *sp, const char *, void *, const char *);\n");
vsb_cat(sb, "\n");
- vsb_cat(sb, "void VRT_purge(const char *);\n");
+ vsb_cat(sb, "void VRT_purge(const char *, int hash);\n");
vsb_cat(sb, "\n");
vsb_cat(sb, "void VRT_count(struct sess *, unsigned);\n");
vsb_cat(sb, "int VRT_rewrite(const char *, const char *);\n");
r1816 - in trunk/varnish-cache: bin/varnishd include lib/libvcl [ In reply to ]
phk at projects.linpro.no writes:
> Log:
> Implement purging on either of hash or url.
>
> In VCL:
> purge_url(<regexp>)
> purge_hash(<regexp>)
> (for an interrim period purge(<regexp>) will be the same as purge_url).
> In CLI
> url.purge <regexp>
> hash.purge <regexp>

I'm not entirely comfortable with this. If the goal is to allow
purging to take the virtual host into account, I believe it would be
better to implement URL normalization so the host name becomes part of
the URL.

That being said, there is no reason to provide backward compat with
purge() since we never released it - it was added after 1.1.

DES
--
Dag-Erling Sm?rgrav
Senior Software Developer
Linpro AS - www.linpro.no
r1816 - in trunk/varnish-cache: bin/varnishd include lib/libvcl [ In reply to ]
In message <ujr7io4sq1n.fsf at false.linpro.no>, =?iso-8859-1?Q?Dag-Erling_Sm=F8rg
rav?= writes:

>I'm not entirely comfortable with this. If the goal is to allow
>purging to take the virtual host into account, I believe it would be
>better to implement URL normalization so the host name becomes part of
>the URL.

That was my original plan, however, upon thinking about it, I realized
that it would accomplish less, require more code and run slower.

We have no other need of a normalized URL, so the text-processing would
be solely for the ability to purge, and would in all relevant cases mirror
the necessary text-processing we do on the hash string.

And once you boil it down to the essence, what you want to do with
purge is nuke cached objects.

From our point of view, objects have only one relevant unique
identity: their hash string.

And once people start to put cookies or languages into their hash
strings, those properties become valid targets for purges as well
("Purge all french documents").

Unless your VCL is really interesting, the obvious command will
DTRT:
hash.purge mypage.html
or
hash.purge foohost.com

The one thing about this scheme I'm not sure about is the use of '#'
as separator. It's my impression that it occurs infrequently in the
relevant data (url, host, cookies &c) but it bothers me that it is
a commom comment character, but I worked hard to find an alternative.

I kept the purge_url around mostly for backwards compatibility,
because you can do the exact same thing with purge_hash.

The "real" solution, would be to have a
vcl.purge <vcl_conditional>
and compile the conditional with VCC, so you could say things like:

vcl.purge "obj.ttl > 1h && obj.http.cookie[FOO]"

But I doubt we'll ever do that...

>That being said, there is no reason to provide backward compat with
>purge() since we never released it - it was added after 1.1.

Ok, I'll yank that then.

--
Poul-Henning Kamp | UNIX since Zilog Zeus 3.20
phk at FreeBSD.ORG | TCP/IP since RFC 956
FreeBSD committer | BSD since 4.3-tahoe
Never attribute to malice what can adequately be explained by incompetence.
r1816 - in trunk/varnish-cache: bin/varnishd include lib/libvcl [ In reply to ]
"Poul-Henning Kamp" <phk at phk.freebsd.dk> writes:
> The one thing about this scheme I'm not sure about is the use of '#'
> as separator. It's my impression that it occurs infrequently in the
> relevant data (url, host, cookies &c) but it bothers me that it is
> a commom comment character, but I worked hard to find an alternative.

\001?

DES
--
Dag-Erling Sm?rgrav
Senior Software Developer
Linpro AS - www.linpro.no
r1816 - in trunk/varnish-cache: bin/varnishd include lib/libvcl [ In reply to ]
In message <ujrr6mcihoa.fsf at false.linpro.no>, =?iso-8859-1?Q?Dag-Erling_Sm=F8rg
rav?= writes:
>"Poul-Henning Kamp" <phk at phk.freebsd.dk> writes:
>> The one thing about this scheme I'm not sure about is the use of '#'
>> as separator. It's my impression that it occurs infrequently in the
>> relevant data (url, host, cookies &c) but it bothers me that it is
>> a commom comment character, but I worked hard to find an alternative.
>
>\001?

Yeah, only this might travel through the CLI a lot and a backslash
isn't much fun quouting through scripts.

The other thing I thought about was a space character, as it is
neither legal in hostnames nor urls, but if people stick entire
HTTP lines into the hash, then it's a bad idea, and it requires
the regex to be quoted in the cli

--
Poul-Henning Kamp | UNIX since Zilog Zeus 3.20
phk at FreeBSD.ORG | TCP/IP since RFC 956
FreeBSD committer | BSD since 4.3-tahoe
Never attribute to malice what can adequately be explained by incompetence.