Mailing List Archive

cvs commit: embperl/Embperl Tokens.pm
richter 00/05/03 00:17:40

Modified: . Tag: Embperl2 epparse.c test.pl
Embperl Tag: Embperl2 Tokens.pm
Log:
Parser

Revision Changes Path
No revision


No revision


1.1.2.3 +140 -58 embperl/Attic/epparse.c

Index: epparse.c
===================================================================
RCS file: /home/cvs/embperl/Attic/epparse.c,v
retrieving revision 1.1.2.2
retrieving revision 1.1.2.3
diff -u -r1.1.2.2 -r1.1.2.3
--- epparse.c 2000/05/02 20:03:11 1.1.2.2
+++ epparse.c 2000/05/03 07:17:39 1.1.2.3
@@ -21,7 +21,8 @@
const char * sText ; /* string of token */
int nTextLen ; /* len of string */
const char * sEndText ; /* string which ends the block */
- struct tToken * pFollowedBy ; /* table of tokens that can follow this one */
+ const char * sNodeName; /* name of the node to create */
+ struct tTokenTable * pFollowedBy ; /* table of tokens that can follow this one */
struct tTokenTable * pInside ; /* table of tokens that can apear inside this one */
} ;

@@ -52,8 +53,57 @@
return strcmp (*((const char * *)p1), *((const char * *)p2)) ;
}

+/* ------------------------------------------------------------------------ */
+/* */
+/* BuildSubTokenTable */
+/* */
+/* Build the C token table out of a Perl Hash */
+/* */
+/* ------------------------------------------------------------------------ */
+
+int BuildSubTokenTable (/*i/o*/ register req * r,
+ /*in*/ HV * pHash,
+ /*in*/ const char * pKey,
+ /*in*/ const char * pAttr,
+ /*in*/ const char * pDefEnd,
+ /*out*/ struct tTokenTable * * pTokenTable)
+
+
+ {
+ SV * * ppSV ;
+ int rc ;

+ ppSV = hv_fetch(pHash, (char *)pAttr, strlen (pAttr), 0) ;
+ if (ppSV != NULL)
+ {
+ struct tTokenTable * pNewTokenTable ;

+ if (*ppSV == NULL || !SvROK (*ppSV) || SvTYPE (SvRV (*ppSV)) != SVt_PVHV)
+ {
+ strncpy (r -> errdat1, "BuildTokenHash", sizeof (r -> errdat1)) ;
+ sprintf (r -> errdat2, "%s => %s", pKey, pAttr) ;
+ return rcNotHashRef ;
+ }
+
+ if ((pNewTokenTable = _malloc (r, sizeof (struct tTokenTable))) == NULL)
+ return rcOutOfMemory ;
+
+ if (r -> bDebug & dbgBuildToken)
+ lprintf (r, "[%d]TOKEN: -> %s\n", r -> nPid, pAttr) ;
+ if ((rc = BuildTokenTable (r, (HV *)SvRV (*ppSV), pDefEnd, pNewTokenTable)))
+ return rc ;
+ if (r -> bDebug & dbgBuildToken)
+ lprintf (r, "[%d]TOKEN: <- %s\n", r -> nPid, pAttr) ;
+
+ *pTokenTable = pNewTokenTable ;
+ return ok ;
+ }
+
+ *pTokenTable = NULL ;
+ return ok ;
+ }
+
+
/* ------------------------------------------------------------------------ */
/* */
/* BuildTokenTable */
@@ -62,17 +112,18 @@
/* */
/* ------------------------------------------------------------------------ */

-int BuildTokenTable (/*i/o*/ register req * r,
- /*in*/ const char * sTokenHash,
+int BuildTokenTable (/*i/o*/ register req * r,
+ /*in*/ HV * pTokenHash,
+ /*in*/ char * pDefEnd,
/*out*/ struct tTokenTable * pTokenTable)


{
- HV * pTokenHash ;
+ int rc ;
SV * pToken ;
HE * pEntry ;
char * pKey ;
- char * c ;
+ const char * c ;
int numTokens ;
struct tToken * pTable ;
struct tToken * p ;
@@ -82,12 +133,8 @@
unsigned char * pAllChars = pTokenTable -> cAllChars ;

memset (pStartChars, 0, sizeof (pTokenTable -> cStartChars)) ;
+ memset (pAllChars, 0, sizeof (pTokenTable -> cAllChars)) ;

- if ((pTokenHash = perl_get_hv ((char *)sTokenHash, TRUE)) == NULL)
- {
- LogError (r, rcHashError) ;
- return 1 ;
- }

numTokens = 1 ;
hv_iterinit (pTokenHash) ;
@@ -106,25 +153,24 @@
while ((pEntry = hv_iternext (pTokenHash)))
{
HV * pHash ;
+ struct tTokenTable * pNewTokenTable ;

pKey = hv_iterkey (pEntry, &l) ;
pToken = hv_iterval (pTokenHash, pEntry) ;

- if (SvTYPE (pToken) != SVt_RV)
+ if (!SvROK (pToken) || SvTYPE (SvRV (pToken)) != SVt_PVHV)
{
strncpy (r -> errdat1, "BuildTokenHash", sizeof (r -> errdat1)) ;
- sprintf (r -> errdat2, "%s, Entry no %d => %s", n + 1, pKey) ;
- LogError (r, rcNotHashRef) ;
- return 1 ;
+ sprintf (r -> errdat2, "%s", pKey) ;
+ return rcNotHashRef ;
}
pHash = (HV *)SvRV (pToken) ;

p = &pTable[n] ;
p -> sText = GetHashValueStr (pHash, "text", "") ;
p -> nTextLen = strlen (p -> sText) ;
- p -> sEndText = GetHashValueStr (pHash, "end", "") ;
- p -> pFollowedBy = NULL ;
- p -> pInside = NULL ;
+ p -> sEndText = GetHashValueStr (pHash, "end", pDefEnd) ;
+ p -> sNodeName = GetHashValueStr (pHash, "nodename", NULL) ;

c = p -> sText ;
pStartChars [*c >> 3] |= 1 << (*c & 7) ;
@@ -134,10 +180,19 @@
pAllChars [*c >> 3] |= 1 << (*c & 7) ;
c++ ;
}
-
+
if (r -> bDebug & dbgBuildToken)
lprintf (r, "[%d]TOKEN: %s ... %s\n", r -> nPid, p -> sText, p -> sEndText) ;

+ if ((rc = BuildSubTokenTable (r, pHash, pKey, "follow", p -> sEndText, &pNewTokenTable)))
+ return rc ;
+ p -> pFollowedBy = pNewTokenTable ;
+
+ if ((rc = BuildSubTokenTable (r, pHash, pKey, "inside", p -> sEndText, &pNewTokenTable)))
+ return rc ;
+ p -> pInside = pNewTokenTable ;
+
+
n++ ;
}

@@ -167,9 +222,18 @@

{
struct tTokenCmp * c = (struct tTokenCmp *)p1 ;
- printf ("strncmp (%s, %s, %d)\n", c -> pStart, *((const char * *)p2), c -> nLen) ;
- if (c -> nLen ==
- return strncmp (c -> pStart, *((const char * *)p2), c -> nLen) ;
+ int i ;
+
+ //printf ("strncmp (%s, %s, %d)\n", c -> pStart, *((const char * *)p2), c -> nLen) ;
+ if ((i = strncmp (c -> pStart, *((const char * *)p2), c -> nLen)) == 0)
+ {
+ if (c -> nLen == ((struct tToken *)p2) -> nTextLen)
+ return 0 ;
+ else if (c -> nLen > ((struct tToken *)p2) -> nTextLen)
+ return 1 ;
+ return -1 ;
+ }
+ return i ;
}


@@ -181,80 +245,86 @@
/* */
/* ------------------------------------------------------------------------ */

-int ParseTokens (/*in */ char * pCurr,
+int ParseTokens (/*in */ char * * ppCurr,
char * pEnd,
struct tTokenTable * pTokenTable,
- const char * sEndText)
+ const char * sEndText,
+ const char * pParentNodeName,
+ int level)

{
- struct tToken * pTokenTab = pTokenTable -> pTokens ;
- int numTokens = pTokenTable -> numTokens ;
unsigned char * pStartChars = pTokenTable -> cStartChars ;
- unsigned char * pAllChars = pTokenTable -> cAllChars ;
struct tTokenCmp c ;
-
-
+ int nEndText = strlen (sEndText) ;
+ char * pCurr = *ppCurr ;
+ char * pCurrStart = pCurr ;
+
while (pCurr < pEnd)
{
if (pStartChars [*pCurr >> 3] & 1 << (*pCurr & 7))
{
struct tToken * pToken = NULL ;
- struct tToken * pNext ;
-
- c.pCurr = pCurr ;
- c.pStart = pCurr ;
+ struct tTokenTable * pNextTokenTab = pTokenTable ;
+ char * pNodeName = NULL ;

do
{
- while (pAllChars [*c.pCurr >> 3] & 1 << (*c.pCurr & 7))
- c.pCurr++ ;
+ struct tToken * pTokenTab = pNextTokenTab -> pTokens ;
+ int numTokens = pNextTokenTab -> numTokens ;
+ unsigned char * pAllChars = pNextTokenTab -> cAllChars ;
+
+ c.pStart = pCurr ;
+
+ while (pAllChars [*pCurr >> 3] & (1 << (*pCurr & 7)))
+ pCurr++ ;

- c.nLen = c.pCurr - c.pStart ;
+ c.nLen = pCurr - c.pStart ;
if (pToken = (struct tToken *)bsearch (&c, pTokenTab, numTokens, sizeof (struct tToken), CmpTokenN))
{
- pNext = pToken + 1;
-
- while (pNext -> sText)
- {
- if (strncmp (pCurr, pNext -> sText, pNext -> nTextLen) != 0)
- break ;
- pToken = pNext ;
- pNext++ ;
- }
- printf ("%s ", pToken -> sText) ;
- pCurr += pToken -> nTextLen ;
while (isspace (*pCurr))
pCurr++ ;
- }
+
+ if (pToken -> sNodeName)
+ pNodeName = pToken -> sNodeName ;
+ }
else
break ;
}
- while (pTokenTab = pToken -> pFollowedBy) ;
+ while (pNextTokenTab = pToken -> pFollowedBy) ;

if (pToken)
{
struct tTokenTable * pInside ;

- printf ("<\n") ;
+ if (pNodeName == NULL)
+ pNodeName = pToken -> sText ;
+ printf ("\n%*s--> %s:", level * 4, " ", pNodeName) ;
if (pInside = pToken -> pInside)
{
- ParseTokens (pCurr, pEnd, pInside, pToken -> sEndText) ;
+ ParseTokens (&pCurr, pEnd, pInside, pToken -> sEndText, pNodeName, level+1) ;
}
else
{
char * pEndCurr = strstr (pCurr, pToken -> sEndText) ;
if (pEndCurr)
+ {
+ printf (" %*.*s <-- %s", pEndCurr - pCurr, pEndCurr - pCurr, pCurr, pNodeName) ;
pCurr = pEndCurr + strlen (pToken -> sEndText) ;
+ }
}
- }
+ pCurrStart = pCurr ;
+ }
}
- if (*pCurr == *sEndText && strcmp (pCurr, sEndText) == 0)
+ if (*pCurr == *sEndText && strncmp (pCurr, sEndText, nEndText) == 0)
{
- break ;
+ printf (" %*.*s <-- %s", pCurr - pCurrStart, pCurr - pCurrStart, pCurrStart, pParentNodeName) ;
+ pCurr += nEndText ;
+ break ;
}
pCurr++ ;
}

+ *ppCurr = pCurr ;
return 0 ;
}

@@ -272,14 +342,26 @@
int ParseFile (/*i/o*/ register req * r)

{
- char * pStart = r -> Buf.pBuf ;
- char * pEnd = r -> Buf.pEndPos ;
+ char * pStart = r -> Buf.pBuf ;
+ char * pEnd = r -> Buf.pEndPos ;
struct tTokenTable pTable ;
int numTokens ;
+ char * sTokenHash = "HTML::Embperl::Tokens::Main" ;
+ HV * pTokenHash ;
+ int rc ;

- BuildTokenTable (r, "HTML::Embperl::Tokens::Main", &pTable) ;
-
- return ParseTokens (pStart, pEnd, &pTable, "") ;
+
+ if ((pTokenHash = perl_get_hv ((char *)sTokenHash, TRUE)) == NULL)
+ {
+ return rcHashError ;
+ }
+ if (rc = BuildTokenTable (r, pTokenHash , "", &pTable))
+ {
+ LogError (r, rc) ;
+ return rc ;
+ }
+
+ return ParseTokens (&pStart, pEnd, &pTable, "", "root", 0) ;


}



1.57.2.2 +24 -23 embperl/test.pl

Index: test.pl
===================================================================
RCS file: /home/cvs/embperl/test.pl,v
retrieving revision 1.57.2.1
retrieving revision 1.57.2.2
diff -u -r1.57.2.1 -r1.57.2.2
--- test.pl 2000/05/02 14:28:19 1.57.2.1
+++ test.pl 2000/05/03 07:17:39 1.57.2.2
@@ -4,30 +4,31 @@


@tests = (
-# 'ascii',
- 'pure.htm',
- 'plain.htm',
- 'plain.htm',
- 'plain.htm',
- 'plainblock.htm',
- 'plainblock.htm',
- 'error.htm???8',
- 'error.htm???8',
- 'error.htm???8',
- 'unclosed.htm???1',
+ 'input.htm?feld5=Wert5&feld6=Wert6&feld7=Wert7&feld8=Wert8&cb5=cbv5&cb6=cbv6&cb7=cbv7&cb8=cbv8&cb9=ncbv9&cb10=ncbv10&cb11=ncbv11&mult=Wert3&mult=Wert6&esc=a<b&escmult=a>b&escmult=Wert3',
+## 'ascii',
+## 'pure.htm',
+## 'plain.htm',
+## 'plain.htm',
+## 'plain.htm',
+## 'plainblock.htm',
+## 'plainblock.htm',
+## 'error.htm???8',
+## 'error.htm???8',
+## 'error.htm???8',
+## 'unclosed.htm???1',
# 'errorright.htm???1',
- 'notfound.htm???1',
- 'notallow.xhtm???1',
- 'noerr/noerrpage.htm???8?2',
- 'errdoc/errdoc.htm???8?262144',
- 'rawinput/rawinput.htm????16',
- 'var.htm',
- 'varerr.htm???-1',
- 'varerr.htm???2',
- 'escape.htm',
- 'spaces.htm',
- 'tagscan.htm',
- 'tagscan.htm??1',
+## 'notfound.htm???1',
+## 'notallow.xhtm???1',
+## 'noerr/noerrpage.htm???8?2',
+## 'errdoc/errdoc.htm???8?262144',
+## 'rawinput/rawinput.htm????16',
+## 'var.htm',
+## 'varerr.htm???-1',
+## 'varerr.htm???2',
+## 'escape.htm',
+## 'spaces.htm',
+## 'tagscan.htm',
+## 'tagscan.htm??1',
'if.htm',
'ifperl.htm',
'loop.htm?erstes=Hallo&zweites=Leer+zeichen&drittes=%21%22%23%2a%2B&erstes=Wert2',



No revision


No revision


1.1.2.2 +119 -9 embperl/Embperl/Attic/Tokens.pm

Index: Tokens.pm
===================================================================
RCS file: /home/cvs/embperl/Embperl/Attic/Tokens.pm,v
retrieving revision 1.1.2.1
retrieving revision 1.1.2.2
diff -u -r1.1.2.1 -r1.1.2.2
--- Tokens.pm 2000/05/02 14:28:51 1.1.2.1
+++ Tokens.pm 2000/05/03 07:17:40 1.1.2.2
@@ -1,18 +1,115 @@

package HTML::Embperl::Tokens ;

-%Main = (
- 'HTML Tag' => {
- 'text' => '<',
- 'end' => '>',
- },
- 'HTML Comment' => {
- 'text' => '<!--',
- 'end' => '-->',
- },
+
+%Attr = (
+ 'Attribut ""' =>
+ {
+ 'text' => '"',
+ 'end' => '"',
+ 'inside' => \%Cmds,
+ },
+ 'Attribut \'\'' =>
+ {
+ 'text' => '\'',
+ 'end' => '\'',
+ 'inside' => \%Cmds,
+ },
+ #'Attribut' =>
+ # {
+ # 'contains' => 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789',
+ # 'inside' => \%Cmds,
+ # }
+ ) ;
+
+
+%AssignAttr = (
+ 'Assign' =>
+ {
+ 'text' => '=',
+ 'follow' => \%Attr,
+ }
+ ) ;
+
+
+%HtmlTags = (
+ 'input' => {
+ 'text' => 'input',
+ 'inside' =>
+ {
+ 'type' => { 'text' => 'type', 'nodename' => 'type', follow => \%AssignAttr },
+ 'name' => { 'text' => 'name', 'nodename' => 'name', follow => \%AssignAttr },
+ 'value' => { 'text' => 'value', 'nodename' => 'value', follow => \%AssignAttr },
+ }
+ },
+ 'tr' => {
+ 'text' => 'tr',
+ },
+ 'table' => {
+ 'text' => 'table',
+ },
+ 'th' => {
+ 'text' => 'th',
+ },
+ 'select' => {
+ 'text' => 'select',
+ 'inside' =>
+ {
+ 'name' => { 'text' => 'name', follow => \%AssignAttr },
+ }
+ },
+ 'body' => {
+ 'text' => 'body',
+ },
+ 'a' => {
+ 'text' => 'a',
+ },
+
+ ) ;
+
+
+%MetaCmds = (
+ 'if' => {
+ 'text' => 'if'
+ },
+ 'else' => {
+ 'text' => 'else'
+ },
+ 'endif' => {
+ 'text' => 'endif'
+ },
+ 'elsif' => {
+ 'text' => 'elseif'
+ },
+ 'while' => {
+ 'text' => 'while'
+ },
+ 'endwhile' => {
+ 'text' => 'endwhile'
+ },
+ 'foreach' => {
+ 'text' => 'foreach'
+ },
+ 'endforeach' => {
+ 'text' => 'endforeach'
+ },
+ 'do' => {
+ 'text' => 'do'
+ },
+ 'until' => {
+ 'text' => 'until'
+ },
+
+ ) ;
+
+
+
+
+%Cmds = (
'Embperl meta command' => {
'text' => '[$',
'end' => '$]',
+ 'follow' => \%MetaCmds,
},
'Embperl output code' => {
'text' => '[+',
@@ -32,3 +129,16 @@
},
) ;

+
+%Main = (
+ 'HTML Tag' => {
+ 'text' => '<',
+ 'end' => '>',
+ 'follow' => \%HtmlTags,
+ },
+ 'HTML Comment' => {
+ 'text' => '<!--',
+ 'end' => '-->',
+ },
+ %Cmds,
+ ) ;