Mailing List Archive

r981 - trunk/varnish-cache/bin/varnishncsa
Author: phk
Date: 2006-09-15 09:14:37 +0200 (Fri, 15 Sep 2006)
New Revision: 981

Added:
trunk/varnish-cache/bin/varnishncsa/flint.lnt
trunk/varnish-cache/bin/varnishncsa/flint.sh
Modified:
trunk/varnish-cache/bin/varnishncsa/varnishncsa.c
Log:
Overhaul NCSA logging.

Reduce memory footprint by only allocating space for a fd first time
we see it. Few if any servers will ever see 64k fd's in use.

Move the sbuf into the logline structure for simplicity.

Access the logline structure through a pointer instead of indexing
the array all the time.

Reduce amount of malloc/free traffic for performance.

Use the "cooked" SHMlog api where the length, fd, etc are broken out
as arguments for us. Pass the FILE* where we want the stuff to
go as private data (stdout for now).

Add FlexeLint files.

Now over to Anders again...



Added: trunk/varnish-cache/bin/varnishncsa/flint.lnt
===================================================================
--- trunk/varnish-cache/bin/varnishncsa/flint.lnt 2006-09-13 21:26:01 UTC (rev 980)
+++ trunk/varnish-cache/bin/varnishncsa/flint.lnt 2006-09-15 07:14:37 UTC (rev 981)
@@ -0,0 +1,60 @@
+-passes=3
+
+-header(../../config.h)
+
++d__attribute__()=
+
+// Fix strchr() semtics, it can only return NULL if arg2 != 0
+-sem(strchr, 1p, type(1), 2n == 0 ? (@p < 1p) : (@p < 1p || @p == 0 ))
+
+-ffc // No automatic custody
+
+-e763 // Redundant declaration for symbol '...' previously declared
+-e726 // Extraneous comma ignored
+-e728 // Symbol ... not explicitly initialized
+-e716 // while(1) ...
+-e785 // Too few initializers for aggregate
+-e786 // String concatenation within initializer
+
+-emacro(740, TAILQ_PREV) // Unusual pointer cast (incompatible indirect types)
+-emacro((826), TAILQ_PREV) // Suspicious pointer-to-pointer conversion (area too small)
+
+
+-esym(534, printf) // Ignoring return value of function
+-esym(534, fprintf) // Ignoring return value of function
+-esym(534, memset) // Ignoring return value of function
+-esym(534, memcpy) // Ignoring return value of function
+-esym(534, memmove) // Ignoring return value of function
+-esym(534, strcpy) // Ignoring return value of function
+-esym(534, vsb_printf) // Ignoring return value of function
+-esym(534, vsb_cat) // Ignoring return value of function
+
+// cache.h
+-emacro(506, INCOMPL) // Constant value Boolean
+
+// cache_center.c
+-efunc(525, CNT_Session) // Negative indentation from line
+-efunc(525, http_FilterHeader) // Negative indentation from line
+-efunc(539, http_FilterHeader) // Positive indentation from line
+
+// cache_vcl.c
+-efunc(525, vcl_handlingname) // Negative indentation from line
+-esym(528, vcl_handlingname) // Not referenced
+-e641 // Converting enum 'cli_status_e' to int
+
+// Review all below this line ///////////////////////////////////////////////
+
+-e767 // Macro redef (system queue.h vs ours )
+
+-e574 // Signed-unsigned mix with relational
+-e712 // Loss of precision (assignment) (long long to
+-e747 // Significant prototype coercion (arg. no. 2) long
+-e713 // Loss of precision (assignment) (unsigned long long to long long)
+-e506 // Constant value boolean
+-e818 // Pointer parameter '...' could be declared as pointing to const
+-e774 // Boolean within 'if' always evaluates to False
+-e534 // Ignoring return value of function
+-e557 // unrecog format
+-e732 // Loss of sign (arg. no. 2) (int to unsigned
+-e737 // [45] Loss of sign in promotion from int to unsigned
+

Added: trunk/varnish-cache/bin/varnishncsa/flint.sh
===================================================================
--- trunk/varnish-cache/bin/varnishncsa/flint.sh 2006-09-13 21:26:01 UTC (rev 980)
+++ trunk/varnish-cache/bin/varnishncsa/flint.sh 2006-09-15 07:14:37 UTC (rev 981)
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+T=/tmp/_$$
+flexelint \
+ -I/usr/include \
+ -I. \
+ -I../../include \
+ flint.lnt \
+ *.c > $T 2>&1
+
+for t in Error Warning Info
+do
+ sed -n "/$t [0-9][0-9][0-9]:/s/.*\($t [0-9][0-9][0-9]\).*/\1/p" $T
+done | awk '
+$2 == 830 { next }
+$2 == 831 { next }
+ {
+ i=$2"_"$1
+ h[i]++
+ n++
+ }
+END {
+ printf "%5d %s\n", n, "Total"
+ for (i in h)
+ printf "%5d %s\n", h[i], i
+ }
+' | sort -rn
+
+cat $T

Modified: trunk/varnish-cache/bin/varnishncsa/varnishncsa.c
===================================================================
--- trunk/varnish-cache/bin/varnishncsa/varnishncsa.c 2006-09-13 21:26:01 UTC (rev 980)
+++ trunk/varnish-cache/bin/varnishncsa/varnishncsa.c 2006-09-15 07:14:37 UTC (rev 981)
@@ -27,385 +27,133 @@
#include "shmlog.h"
#include "varnishapi.h"

+static struct logline {
+ char df_h[4 * (3 + 1)]; /* Datafield for %h (IP adress) */
+ char df_s[4]; /* Datafield for %s, Status */
+ char df_b[12]; /* Datafield for %b, Bytes */
+ char *df_R; /* Datafield for %{Referer} */
+ char *df_U; /* Datafield for %{User-agent} */
+ int bogus_req; /* bogus request */
+ struct vsb *sb;
+} *ll[65536];

-/* Ordering-----------------------------------------------------------*/
-
-
-/* Adding a struct to hold the data for the logline
- *
- */
-
-struct logline {
- char df_h[4 * (3 + 1)]; // Datafield for %h (IP adress)
- int df_hfini; // Set to 1 when a SessionClose is seen.
- unsigned char *df_r; // Datafield for %r (Request)
- int df_rfini; // Set to 1 when a ReqServTime has come.
- unsigned char *df_s; // Datafield for %s, Status
- int df_sfini;
- unsigned char *df_b; // Datafield for %b, Bytes
- int df_bfini;
- struct tm *logline_time; // Datafield for %t
- unsigned char *df_R; // Datafield for %{Referer}i
- int df_Rfini;
- unsigned char *df_U; // Datafield for %{User-agent}i
- int df_Ufini;
- int bogus_req; // Set to 1 if we discover a bogus request
-};
-
-/* We make a array of pointers to vsb's. Sbuf is a string buffer.
- * * The buffer can be made/extended/cleared etc. through a API.
- * * The array is 65536 long because we will use sessionid as key.
- * *
- * */
-
-static struct vsb *ob[65536];
-static struct logline ll[65536];
-
-
-/*
-* Clean order is called once in a while. It clears all the sessions that
-* where never finished (SLT_SessionClose). Because the data is not complete
-* we disregard the data.
-*
-*/
-
-static void
-clean_order(void)
+/* Check if string starts with pfx */
+static int
+ispfx(const char *ptr, unsigned len, const char *pfx)
{
- unsigned u;
+ unsigned l;

- for (u = 0; u < 65536; u++) {
- if (ob[u] == NULL)
- continue;
- vsb_finish(ob[u]);
- vsb_clear(ob[u]);
- }
+ l = strlen(pfx);
+ if (l > len)
+ return (0);
+ if (strncasecmp(ptr, pfx, l))
+ return (0);
+ return (1);
}

-static struct tm *make_timestruct(char *tmpPtra){
- char *tmpPtrb;
- int timesec = 0;
- time_t req_time; // Timeobject used for making the requesttime.
- int i = 0;
- char temp_time[27];
- struct tm *timestring;
-
- temp_time[0] = '\0';
- for ( tmpPtrb = strtok(tmpPtra," "); tmpPtrb != NULL; tmpPtrb = strtok(NULL, " ")){
- if (i == 2){
- // We have the right time
- //printf("Time: %s\n", tmpPtrb);
- tmpPtra = tmpPtrb;
- strncpy(temp_time, tmpPtra, 10);
- temp_time[11] = '\0';
- }
- //printf("ReqServTime number %d: %s\n", i, tmpPtrb);
-
- i++;
- }
-
- //strncpy(temp_time, tmpPtra, 20);
- //printf("inten: %s\n",temp_time);
- timesec = atoi(temp_time);
- req_time = timesec;
- timestring = localtime(&req_time);
- return timestring;
-
-}
-
-static void
-extended_log_format(unsigned char *p)
+static int
+extended_log_format(void *priv, unsigned tag, unsigned fd, unsigned len, unsigned spec, const char *ptr)
{
- unsigned u,v,w;
+ const char *p;
+ char *q;
+ FILE *fo;
+ time_t t;
+ long l;
+ struct tm tm;
+ char tbuf[40];
+ struct logline *lp;

- // Used for getting IP.
- unsigned char *tmpPtr;
- int j;
+ if (!(spec &VSL_S_CLIENT))
+ return (0);

- // Used for requesttime.
- char *tmpPtra;
-
- u = (p[2] << 8) | p[3];
- if (ob[u] == NULL) {
- ob[u] = vsb_new(NULL, NULL, 0, VSB_AUTOEXTEND);
- assert(ob[u] != NULL);
+ if (ll[fd] == NULL) {
+ ll[fd] = calloc(sizeof *ll[fd], 1);
+ assert(ll[fd] != NULL);
+ ll[fd]->sb = vsb_new(NULL, NULL, 0, VSB_AUTOEXTEND);
+ assert(ll[fd]->sb != NULL);
}
-
- v = 0;
- w = 0;
- j = 0;
- //ll[u].df_rfini = 0;
- //ll[u].df_hfini = 0;
+ lp = ll[fd];

- switch (p[0]) {
+ switch (tag) {

case SLT_SessionOpen:
-
- // We catch the IP adress of the session.
- // We also catch IP in SessionReuse because we can not always
- // be sure we see a SessionOpen when we start logging.
-
- //printf("IP %s\n", p+4);
- ll[u].df_h[0] = '\0';
- tmpPtr = strchr(p + 4, ' ');
- if (tmpPtr != NULL){
-
- j = strlen(p + 4) - strlen(tmpPtr); // length of IP
- strncpy(ll[u].df_h, p + 4, j);
- ll[u].df_h[j] = '\0'; // put on a NULL at end of buffer.
- }
- //printf("New session [%d]: %s \n",u, ll[u].df_h);
-
+ for (p = ptr, q = lp->df_h; *p && *p != ' ';)
+ *q++ = *p++;
+ *q = '\0';
break;

case SLT_ReqStart:
-
- // We use XID to catch that a new request is comming inn.
-
+ vsb_clear(lp->sb);
break;

case SLT_RxRequest:
-
- vsb_clear(ob[u]);
-
- if (p[1] >= 4 && !strncasecmp((void *)&p[4], "HEAD",4)){
- vsb_bcat(ob[u], p + 4, strlen(p + 4));
- //printf("Got a HEAD\n");
+ if (ispfx(ptr, len, "HEAD")) {
+ vsb_bcat(lp->sb, ptr, len);
+ } else if (ispfx(ptr, len, "POST")) {
+ vsb_bcat(lp->sb, ptr, len);
+ } else if (ispfx(ptr, len, "GET")) {
+ vsb_bcat(lp->sb, ptr, len);
+ } else {
+ lp->bogus_req = 1;
}
+ break;

- else if (p[1] >= 4 && !strncasecmp((void *)&p[4], "POST",4)){
- vsb_bcat(ob[u], p + 4, strlen(p + 4));
- //printf("Got a POST\n");
- }
-
- else if (p[1] >= 3 && !strncasecmp((void *)&p[4], "GET",3)){
- vsb_bcat(ob[u], p + 4, strlen(p + 4));
- //printf("Got a GET\n");
- }
-
- else {
- //vsb_bcat(ob[u], p + 4, strlen(p + 4));
- //printf("Got something other than HEAD, POST, GET\n");
- ll[u].bogus_req = 1;
- }
- break;
-
case SLT_RxURL:
-
- vsb_cat(ob[u], " ");
- vsb_bcat(ob[u], p + 4, strlen(p + 4));
-
+ vsb_cat(lp->sb, " ");
+ vsb_bcat(lp->sb, ptr, len);
break;

case SLT_RxProtocol:
-
- vsb_cat(ob[u], " ");
- vsb_bcat(ob[u], p + 4, strlen(p + 4));
-
+ vsb_cat(lp->sb, " ");
+ vsb_bcat(lp->sb, ptr, len);
break;

case SLT_TxStatus:
-
- ll[u].df_s = strdup(p + 4);
- ll[u].df_sfini = 1;
-
+ strcpy(lp->df_s, ptr);
break;

case SLT_RxHeader:
- if (p[1] >= 11 && !strncasecmp((void *)&p[4], "user-agent:",11)){
- ll[u].df_U = strdup(p + 4);
- // Jump ahead past "User-Agent: "
- ll[u].df_U = ll[u].df_U + 12;
- ll[u].df_Ufini = 1;
- }
- if (p[1] >= 8 && !strncasecmp((void *)&p[4], "referer:",8)){
- ll[u].df_R = strdup(p + 4);
- ll[u].df_R = ll[u].df_R + 9;
- ll[u].df_Rfini = 1;
- }
-
+ if (ispfx(ptr, len, "user-agent:"))
+ lp->df_U = strdup(ptr + 12);
+ else if (ispfx(ptr, len, "referer:"))
+ lp->df_R = strdup(ptr + 9);
break;

- case SLT_ReqEnd:
-
- // We use ReqServTime to find how the time the request was delivered
- // also to define that a request is finished.
- tmpPtra = strdup(p + 4);
- ll[u].logline_time = make_timestruct(tmpPtra);
- free(tmpPtra);
- ll[u].df_rfini = 1;
- //printf("ReqServTime %s\n", temp_time);
-
- break;
-
case SLT_Length:
-
- // XXX ask DES or PHK about this one. Am I overflowing?
-
- ll[u].df_b = strdup(p + 4);
- ll[u].df_bfini = 1;
- if (!atoi(ll[u].df_b)){
- ll[u].df_b = malloc(2);
- ll[u].df_b[0] = '-';
- ll[u].df_b[1] = '\0';
- }
-
+ if (strcmp(ptr, "0"))
+ strcpy(lp->df_b, ptr);
+ else
+ strcpy(lp->df_b, "-");
break;

- case SLT_SessionClose:
-
- // Session is closed, we clean up things. But do not write.
-
- //printf("Session close [%d]\n", u);
-
- ll[u].df_hfini = 1;
-
- break;
-
- case SLT_SessionReuse:
-
- // We use SessionReuse to catch the IP adress of a session that has already
- // started with a SessionOpen that we did not catch.
- // Other than that it is not used.
-
- // Catch IP if not already done.
-
- if (ll[u].df_h[0] == '\0'){
- // We don't have IP, fetch it.
- //
-
- tmpPtr = strchr(p + 4, ' ');
- if (tmpPtr != NULL){
- j = strlen(p + 4) - strlen(tmpPtr); // length of IP
- strncpy(ll[u].df_h, p + 4, j);
- ll[u].df_h[j] = '\0'; // put on a NULL at end of buffer.
- }
- //printf("Got IP from Reuse [%d] : %s\n", u, ll[u].df_h);
- }
-
- //printf("Session reuse [%d]\n", u);
-
- break;
-
default:
-
- // printf("DEBUG: %s\n", p+4);
-
break;
}
+ if (tag != SLT_ReqEnd)
+ return (0);

-
- if (ll[u].df_rfini) {
- // We have a ReqServTime. Lets print the logline
- // and clear variables that are different for each request.
- //
-
- char temp_time[27]; // Where we store the string we take from the log
-
- // make temp_time
- strftime (temp_time, 29, "[%d/%b/%Y:%X %z] ", ll[u].logline_time);
-
- if (ll[u].df_h[0] == '\0' || ll[u].bogus_req){
- ll[u].bogus_req = 0;
- //printf("Tom IP \n");
- }
- else{
- printf("%s - - %s ", ll[u].df_h, temp_time );
- vsb_finish(ob[u]);
- printf("\"%s\"", vsb_data(ob[u]));
- printf(" %s %s ", ll[u].df_s, ll[u].df_b);
- if (ll[u].df_Rfini){
- printf(" \"%s\" ", ll[u].df_R);
- }
- else {
- printf(" \"-\" ");
- }
-
- if (ll[u].df_Ufini){
- printf(" \"%s\" ", ll[u].df_U);
- }
- else {
- printf(" \"-\" ");
- }
- printf("\n");
- }
- fflush(stdout);
-
- //vsb_finish(ob[u]);
- vsb_clear(ob[u]);
- temp_time[0] = '\0';
-
-
- ll[u].df_rfini = 0;
-
-
- // Clear the TxStaus
-
- if (ll[u].df_sfini){
- free(ll[u].df_s);
- ll[u].df_sfini = 0;
- //printf("Freed df_s [%d]\n", u);
- }
-
- if (ll[u].df_bfini){
- free(ll[u].df_b);
- ll[u].df_bfini = 0;
- //printf("Freed df_b [%d]\n", u);
- }
-
- // Clean User-Agent and Referer
- if (ll[u].df_Ufini){
- ll[u].df_U = ll[u].df_U - 12;
- free(ll[u].df_U);
- ll[u].df_Ufini = 0;
- ll[u].df_U[0] = '\0';
- //printf("Freed df_U [%d]\n", u);
- }
-
- if (ll[u].df_Rfini){
- ll[u].df_R = ll[u].df_R - 9;
- free(ll[u].df_R);
- ll[u].df_R[0] = '\0';
- ll[u].df_Rfini = 0;
- //printf("Freed df_R [%d]\n", u);
- }
-
- // Clean up ReqEnd/Time variables
-
- //if (tmpPtra != NULL){
- // free(tmpPtra);
- //printf("Freed tmpPtra [%d]\n", u);
- //}
-
- /*
- if (tmpPtrb != NULL){
- free(tmpPtrb);
- //printf("Freed tmpPtrb [%d]\n", u);
- }
- if (tmpPtrc != NULL){
- free(tmpPtrc);
- //printf("Freed tmpPtrc [%d]\n", u);
- }
- */
- temp_time[0] = '\0';
-
-
- if (ll[u].df_hfini) {
- // We have a SessionClose. Lets clean data.
- //
- // Clean IP adress
- ll[u].df_h[0] = '\0';
- //printf("Clearer [%d]\n", u);
- ll[u].df_hfini = 0;
-
- }
-
-
-
-
+ fo = priv;
+ assert(1 == sscanf(ptr, "%*u %*u.%*u %ld.", &l));
+ t = l;
+ localtime_r(&t, &tm);
+ strftime(tbuf, sizeof tbuf, "%d/%b/%Y:%T %z", &tm);
+ fprintf(fo, "%s - - %s", lp->df_h, tbuf);
+ vsb_finish(lp->sb);
+ fprintf(fo, " \"%s\"", vsb_data(lp->sb));
+ fprintf(fo, " %s", lp->df_b);
+ if (lp->df_R != NULL) {
+ fprintf(fo, " \"%s\"", lp->df_R);
+ free(lp->df_R);
+ lp->df_R = NULL;
}
-
-
+ if (lp->df_U != NULL) {
+ fprintf(fo, " \"%s\"", lp->df_U);
+ free(lp->df_U);
+ lp->df_U = NULL;
+ }
+ fprintf(fo, "\n");
+
+ return (0);
}

/*--------------------------------------------------------------------*/
@@ -421,15 +169,11 @@
main(int argc, char **argv)
{
int i, c;
- unsigned u, v;
- unsigned char *p;
- char *w_opt = NULL;
- FILE *wfile = NULL;
struct VSL_data *vd;

vd = VSL_New();

- while ((c = getopt(argc, argv, VSL_ARGS "w:")) != -1) {
+ while ((c = getopt(argc, argv, VSL_ARGS "")) != -1) {
i = VSL_Arg(vd, c, optarg);
if (i < 0)
exit (1);
@@ -439,9 +183,6 @@
case 'V':
varnish_version("varnishncsa");
exit(0);
- case 'w':
- w_opt = optarg;
- break;
default:
usage();
}
@@ -450,36 +191,11 @@
if (VSL_OpenLog(vd))
exit (1);

- if (w_opt != NULL) {
- wfile = fopen(w_opt, "w");
- if (wfile == NULL) {
- perror(w_opt);
- exit (1);
- }
- }
- u = 0;
- v = 0;
-
while (1) {
- i = VSL_NextLog(vd, &p);
+ i = VSL_Dispatch(vd, extended_log_format, stdout);
if (i < 0)
break;
- if (i == 0) {
- if (w_opt == NULL) {
- if (++v == 100){
- clean_order();
- fflush(stdout);
- }
- }
- usleep(50000);
- continue;
- }
- v = 0;
-
- extended_log_format(p);
}
-
- clean_order();
return (0);
}