/* ** This file contains all sources (including headers) to the LEMON ** LALR(1) parser generator. The sources have been combined into a ** single file to make it easy to include LEMON in the source tree ** and Makefile of another program. ** ** The author of this program disclaims copyright. */ #include #include #include #include #include #include #define ISSPACE(X) isspace((unsigned char)(X)) #define ISDIGIT(X) isdigit((unsigned char)(X)) #define ISALNUM(X) isalnum((unsigned char)(X)) #define ISALPHA(X) isalpha((unsigned char)(X)) #define ISUPPER(X) isupper((unsigned char)(X)) #define ISLOWER(X) islower((unsigned char)(X)) #ifndef __WIN32__ # if defined(_WIN32) || defined(WIN32) # define __WIN32__ # endif #endif #ifdef __WIN32__ #ifdef __cplusplus extern "C" { #endif extern int access(const char *path, int mode); #ifdef __cplusplus } #endif #else #include #endif /* #define PRIVATE static */ #define PRIVATE #ifdef TEST #define MAXRHS 5 /* Set low to exercise exception code */ #else #define MAXRHS 1000 #endif static int showPrecedenceConflict = 0; static char *msort(char *,char **,int(*)(const char *,const char *)); /* ** Compilers are getting increasingly pedantic about type conversions ** as C evolves ever closer to Ada.... To work around the latest problems ** we have to define the following variant of strlen(). */ #define lemonStrlen(X) ((int)strlen(X)) /* ** Compilers are starting to complain about the use of sprintf() and strcpy(), ** saying they are unsafe. So we define our own versions of those routines too. ** ** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and ** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). ** The third is a helper routine for vsnprintf() that adds texts to the end of a ** buffer, making sure the buffer is always zero-terminated. ** ** The string formatter is a minimal subset of stdlib sprintf() supporting only ** a few simply conversions: ** ** %d ** %s ** %.*s ** */ static void lemon_addtext( char *zBuf, /* The buffer to which text is added */ int *pnUsed, /* Slots of the buffer used so far */ const char *zIn, /* Text to add */ int nIn, /* Bytes of text to add. -1 to use strlen() */ int iWidth /* Field width. Negative to left justify */ ) { if (nIn<0) for(nIn=0; zIn[nIn]; nIn++) {} while(iWidth>nIn) { zBuf[(*pnUsed)++] = ' '; iWidth--; } if (nIn == 0) return; memcpy(&zBuf[*pnUsed], zIn, nIn); *pnUsed += nIn; while((-iWidth)>nIn) { zBuf[(*pnUsed)++] = ' '; iWidth++; } zBuf[*pnUsed] = 0; } static int lemon_vsprintf(char *str, const char *zFormat, va_list ap) { int i, j, k, c; int nUsed = 0; const char *z; char zTemp[50]; str[0] = 0; for(i=j=0; (c = zFormat[i])!=0; i++) { if (c == '%' ) { int iWidth = 0; lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); c = zFormat[++i]; if (ISDIGIT(c) || (c == '-' && ISDIGIT(zFormat[i+1])) ) { if (c == '-' ) i++; while(ISDIGIT(zFormat[i]) ) iWidth = iWidth *10 + zFormat[i++] - '0'; if (c == '-' ) iWidth = -iWidth; c = zFormat[i]; } if (c == 'd' ) { int v = va_arg(ap, int); if (v<0) { lemon_addtext(str, &nUsed, "-", 1, iWidth); v = -v; } else if (v == 0) { lemon_addtext(str, &nUsed, "0", 1, iWidth); } k = 0; while(v>0) { k++; zTemp[sizeof(zTemp)-k] = (v%10) + '0'; v /= 10; } lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); } else if (c == 's' ) { z = va_arg(ap, const char *); lemon_addtext(str, &nUsed, z, -1, iWidth); } else if (c == '.' && memcmp(&zFormat[i], ".*s", 3) == 0) { i += 2; k = va_arg(ap, int); z = va_arg(ap, const char *); lemon_addtext(str, &nUsed, z, k, iWidth); } else if (c == '%' ) { lemon_addtext(str, &nUsed, "%", 1, 0); } else{ fprintf(stderr, "illegal format\n"); exit(1); } j = i+1; } } lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); return nUsed; } static int lemon_sprintf(char *str, const char *format, ...) { va_list ap; int rc; va_start(ap, format); rc = lemon_vsprintf(str, format, ap); va_end(ap); return rc; } static void lemon_strcpy(char *dest, const char *src) { while((*(dest++) = *(src++))!=0) {} } static void lemon_strcat(char *dest, const char *src) { while(*dest) dest++; lemon_strcpy(dest, src); } /* a few forward declarations... */ struct rule; struct lemon; struct action; static struct action *Action_new(void); static struct action *Action_sort(struct action *); /********** From the file "build.h" ************************************/ void FindRulePrecedences(); void FindFirstSets(); void FindStates(); void FindLinks(); void FindFollowSets(); void FindActions(); /********* From the file "configlist.h" *********************************/ void Configlist_init(void); struct config *Configlist_add(struct rule *, int); struct config *Configlist_addbasis(struct rule *, int); void Configlist_closure(struct lemon *); void Configlist_sort(void); void Configlist_sortbasis(void); struct config *Configlist_return(void); struct config *Configlist_basis(void); void Configlist_eat(struct config *); void Configlist_reset(void); /********* From the file "error.h" ***************************************/ void ErrorMsg(const char *, int,const char *, ...); /****** From the file "option.h" ******************************************/ enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; struct s_options { enum option_type type; const char *label; char *arg; const char *message; }; int OptInit(char **,struct s_options *,FILE *); int OptNArgs(void); char *OptArg(int); void OptErr(int); void OptPrint(void); /******** From the file "parse.h" *****************************************/ void Parse(struct lemon *lemp); /********* From the file "plink.h" ***************************************/ struct plink *Plink_new(void); void Plink_add(struct plink **, struct config *); void Plink_copy(struct plink **, struct plink *); void Plink_delete(struct plink *); /********** From the file "report.h" *************************************/ void Reprint(struct lemon *); void ReportOutput(struct lemon *); void ReportTable(struct lemon *, int); void ReportHeader(struct lemon *); void CompressTables(struct lemon *); void ResortStates(struct lemon *); /********** From the file "set.h" ****************************************/ void SetSize(int); /* All sets will be of size N */ char *SetNew(void); /* A new set for element 0..N */ void SetFree(char *); /* Deallocate a set */ int SetAdd(char *,int); /* Add element to a set */ int SetUnion(char *,char *); /* A <- A U B, thru element N */ #define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ /********** From the file "struct.h" *************************************/ /* ** Principal data structures for the LEMON parser generator. */ typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; /* Symbols (terminals and nonterminals) of the grammar are stored ** in the following: */ enum symbol_type { TERMINAL, NONTERMINAL, MULTITERMINAL }; enum e_assoc { LEFT, RIGHT, NONE, UNK }; struct symbol { const char *name; /* Name of the symbol */ int index; /* Index number for this symbol */ enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ struct rule *rule; /* Linked list of rules of this (if an NT) */ struct symbol *fallback; /* fallback token in case this token doesn't parse */ int prec; /* Precedence if defined (-1 otherwise) */ enum e_assoc assoc; /* Associativity if precedence is defined */ char *firstset; /* First-set for all rules of this symbol */ Boolean lambda; /* True if NT and can generate an empty string */ int useCnt; /* Number of times used */ char *destructor; /* Code which executes whenever this symbol is ** popped from the stack during error processing */ int destructor_emitted; /* Is the destructor code emitted */ int destLineno; /* Line number for start of destructor */ char *datatype; /* The data type of information held by this ** object. Only used if type == NONTERMINAL */ int dtnum; /* The data type number. In the parser, the value ** stack is a union. The .yy%d element of this ** union is the correct data type for this object */ /* The following fields are used by MULTITERMINALs only */ int nsubsym; /* Number of constituent symbols in the MULTI */ struct symbol **subsym; /* Array of constituent symbols */ }; /* Each production rule in the grammar is stored in the following ** structure. */ struct rule { struct symbol *lhs; /* Left-hand side of the rule */ const char *lhsalias; /* Alias for the LHS (NULL if none) */ int lhsStart; /* True if left-hand side is the start symbol */ int ruleline; /* Line number for the rule */ int nrhs; /* Number of RHS symbols */ struct symbol **rhs; /* The RHS symbols */ const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ int line; /* Line number at which code begins */ const char *code; /* The code executed when this rule is reduced */ const char *codePrefix; /* Setup code before code[] above */ const char *codeSuffix; /* Breakdown code after code[] above */ struct symbol *precsym; /* Precedence symbol for this rule */ int index; /* An index number for this rule */ int iRule; /* Rule number as used in the generated tables */ Boolean canReduce; /* True if this rule is ever reduced */ struct rule *nextlhs; /* Next rule with the same LHS */ struct rule *next; /* Next rule in the global list */ }; /* A configuration is a production rule of the grammar together with ** a mark (dot) showing how much of that rule has been processed so far. ** Configurations also contain a follow-set which is a list of terminal ** symbols which are allowed to immediately follow the end of the rule. ** Every configuration is recorded as an instance of the following: */ enum cfgstatus { COMPLETE, INCOMPLETE }; struct config { struct rule *rp; /* The rule upon which the configuration is based */ int dot; /* The parse point */ char *fws; /* Follow-set for this configuration only */ struct plink *fplp; /* Follow-set forward propagation links */ struct plink *bplp; /* Follow-set backwards propagation links */ struct state *stp; /* Pointer to state which contains this */ enum cfgstatus status; /* used during followset and shift computations */ struct config *next; /* Next configuration in the state */ struct config *bp; /* The next basis configuration */ }; enum e_action { SHIFT, ACCEPT, REDUCE, ERROR, SSCONFLICT, /* A shift/shift conflict */ SRCONFLICT, /* Was a reduce, but part of a conflict */ RRCONFLICT, /* Was a reduce, but part of a conflict */ SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ NOT_USED, /* Deleted by compression */ SHIFTREDUCE /* Shift first, then reduce */ }; /* Every shift or reduce operation is stored as one of the following */ struct action { struct symbol *sp; /* The look-ahead symbol */ enum e_action type; union { struct state *stp; /* The new state, if a shift */ struct rule *rp; /* The rule, if a reduce */ } x; struct action *next; /* Next action for this state */ struct action *collide; /* Next action with the same hash */ }; /* Each state of the generated parser's finite state machine ** is encoded as an instance of the following structure. */ struct state { struct config *bp; /* The basis configurations for this state */ struct config *cfp; /* All configurations in this set */ int statenum; /* Sequential number for this state */ struct action *ap; /* Array of actions for this state */ int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ int iDfltReduce; /* Default action is to REDUCE by this rule */ struct rule *pDfltReduce;/* The default REDUCE rule. */ int autoReduce; /* True if this is an auto-reduce state */ }; #define NO_OFFSET (-2147483647) /* A followset propagation link indicates that the contents of one ** configuration followset should be propagated to another whenever ** the first changes. */ struct plink { struct config *cfp; /* The configuration to which linked */ struct plink *next; /* The next propagate link */ }; /* The state vector for the entire parser generator is recorded as ** follows. (LEMON uses no global variables and makes little use of ** static variables. Fields in the following structure can be thought ** of as begin global variables in the program.) */ struct lemon { struct state **sorted; /* Table of states sorted by state number */ struct rule *rule; /* List of all rules */ struct rule *startRule; /* First rule */ int nstate; /* Number of states */ int nxstate; /* nstate with tail degenerate states removed */ int nrule; /* Number of rules */ int nsymbol; /* Number of terminal and nonterminal symbols */ int nterminal; /* Number of terminal symbols */ struct symbol **symbols; /* Sorted array of pointers to symbols */ int errorcnt; /* Number of errors */ struct symbol *errsym; /* The error symbol */ struct symbol *wildcard; /* Token that matches anything */ char *name; /* Name of the generated parser */ char *arg; /* Declaration of the 3th argument to parser */ char *tokentype; /* Type of terminal symbols in the parser stack */ char *vartype; /* The default type of non-terminal symbols */ char *start; /* Name of the start symbol for the grammar */ char *stacksize; /* Size of the parser stack */ char *include; /* Code to put at the start of the C file */ char *error; /* Code to execute when an error is seen */ char *overflow; /* Code to execute on a stack overflow */ char *failure; /* Code to execute on parser failure */ char *accept; /* Code to execute when the parser excepts */ char *extracode; /* Code appended to the generated file */ char *tokendest; /* Code to execute to destroy token data */ char *vardest; /* Code for the default non-terminal destructor */ char *filename; /* Name of the input file */ char *outname; /* Name of the current output file, may be .c .h or .out */ char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ int nactiontab; /* Number of entries in the yy_action[] table */ int tablesize; /* Total table size of all tables in bytes */ int basisflag; /* Print only basis configurations */ int has_fallback; /* True if any %fallback is seen in the grammar */ int nolinenosflag; /* True if #line statements should not be printed */ char *argv0; /* Name of the program */ }; #define MemoryCheck(X) if ((X) == 0) { \ extern void memory_error(); \ memory_error(); \ } /**************** From the file "table.h" *********************************/ /* ** All code in this file has been automatically generated ** from a specification in the file ** "table.q" ** by the associative array code building program "aagen". ** Do not edit this file! Instead, edit the specification ** file, then rerun aagen. */ /* ** Code for processing tables in the LEMON parser generator. */ /* Routines for handling a strings */ const char *Strsafe(const char *); void Strsafe_init(void); int Strsafe_insert(const char *); const char *Strsafe_find(const char *); /* Routines for handling symbols of the grammar */ struct symbol *Symbol_new(const char *); int Symbolcmpp(const void *, const void *); void Symbol_init(void); int Symbol_insert(struct symbol *, const char *); struct symbol *Symbol_find(const char *); struct symbol *Symbol_Nth(int); int Symbol_count(void); struct symbol **Symbol_arrayof(void); /* Routines to manage the state table */ int Configcmp(const char *, const char *); struct state *State_new(void); void State_init(void); int State_insert(struct state *, struct config *); struct state *State_find(struct config *); struct state **State_arrayof(/* */); /* Routines used for efficiency in Configlist_add */ void Configtable_init(void); int Configtable_insert(struct config *); struct config *Configtable_find(struct config *); void Configtable_clear(int(*)(struct config *)); /****************** From the file "action.c" *******************************/ /* ** Routines processing parser actions in the LEMON parser generator. */ /* Allocate a new parser action */ static struct action *Action_new(void) { static struct action *freelist = 0; struct action *newaction; if (freelist == 0) { int i; int amt = 100; freelist = (struct action *)calloc(amt, sizeof(struct action)); if (freelist == 0) { fprintf(stderr,"Unable to allocate memory for a new parser action."); exit(1); } for(i = 0 ; i < amt-1; i++) freelist[i].next = &freelist[i+1]; freelist[amt-1].next = 0; } newaction = freelist; freelist = freelist->next; return newaction; } /* Compare two actions for sorting purposes. Return negative, zero, or ** positive if the first action is less than, equal to, or greater than ** the first */ static int actioncmp( struct action *ap1, struct action *ap2 ) { int rc; rc = ap1->sp->index - ap2->sp->index; if (rc == 0) { rc = (int)ap1->type - (int)ap2->type; } if (rc == 0 && (ap1->type == REDUCE || ap1->type == SHIFTREDUCE) ) { rc = ap1->x.rp->index - ap2->x.rp->index; } if (rc == 0) { rc = (int) (ap2 - ap1); } return rc; } /* Sort parser actions */ static struct action *Action_sort( struct action *ap ) { ap = (struct action *)msort((char *)ap,(char **)&ap->next, (int(*)(const char *,const char *))actioncmp); return ap; } void Action_add( struct action **app, enum e_action type, struct symbol *sp, char *arg ) { struct action *newaction; newaction = Action_new(); newaction->next = *app; *app = newaction; newaction->type = type; newaction->sp = sp; if (type == SHIFT) { newaction->x.stp = (struct state *)arg; } else{ newaction->x.rp = (struct rule *)arg; } } /********************** New code to implement the "acttab" module ***********/ /* ** This module implements routines use to construct the yy_action[] table. */ /* ** The state of the yy_action table under construction is an instance of ** the following structure. ** ** The yy_action table maps the pair (state_number, lookahead) into an ** action_number. The table is an array of integers pairs. The state_number ** determines an initial offset into the yy_action array. The lookahead ** value is then added to this initial offset to get an index X into the ** yy_action array. If the aAction[X].lookahead equals the value of the ** of the lookahead input, then the value of the action_number output is ** aAction[X].action. If the lookaheads do not match then the ** default action for the state_number is returned. ** ** All actions associated with a single state_number are first entered ** into aLookahead[] using multiple calls to acttab_action(). Then the ** actions for that single state_number are placed into the aAction[] ** array with a single call to acttab_insert(). The acttab_insert() call ** also resets the aLookahead[] array in preparation for the next ** state number. */ struct lookahead_action { int lookahead; /* Value of the lookahead token */ int action; /* Action to take on the given lookahead */ }; typedef struct acttab acttab; struct acttab { int nAction; /* Number of used slots in aAction[] */ int nActionAlloc; /* Slots allocated for aAction[] */ struct lookahead_action *aAction, /* The yy_action[] table under construction */ *aLookahead; /* A single new transaction set */ int mnLookahead; /* Minimum aLookahead[].lookahead */ int mnAction; /* Action associated with mnLookahead */ int mxLookahead; /* Maximum aLookahead[].lookahead */ int nLookahead; /* Used slots in aLookahead[] */ int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ }; /* Return the number of entries in the yy_action table */ #define acttab_size(X) ((X)->nAction) /* The value for the N-th entry in yy_action */ #define acttab_yyaction(X,N) ((X)->aAction[N].action) /* The value for the N-th entry in yy_lookahead */ #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) /* Free all memory associated with the given acttab */ void acttab_free(acttab *p) { free( p->aAction); free( p->aLookahead); free( p); } /* Allocate a new acttab structure */ acttab *acttab_alloc(void) { acttab *p = (acttab *) calloc( 1, sizeof(*p) ); if (p == 0) { fprintf(stderr,"Unable to allocate memory for a new acttab."); exit(1); } memset(p, 0, sizeof(*p)); return p; } /* Add a new action to the current transaction set. ** ** This routine is called once for each lookahead for a particular ** state. */ void acttab_action(acttab *p, int lookahead, int action) { if (p->nLookahead>=p->nLookaheadAlloc) { p->nLookaheadAlloc += 25; p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, sizeof(p->aLookahead[0])*p->nLookaheadAlloc); if (p->aLookahead == 0) { fprintf(stderr,"malloc failed\n"); exit(1); } } if (p->nLookahead == 0) { p->mxLookahead = lookahead; p->mnLookahead = lookahead; p->mnAction = action; } else{ if (p->mxLookaheadmxLookahead = lookahead; if (p->mnLookahead>lookahead) { p->mnLookahead = lookahead; p->mnAction = action; } } p->aLookahead[p->nLookahead].lookahead = lookahead; p->aLookahead[p->nLookahead].action = action; p->nLookahead++; } /* ** Add the transaction set built up with prior calls to acttab_action() ** into the current action table. Then reset the transaction set back ** to an empty set in preparation for a new round of acttab_action() calls. ** ** Return the offset into the action table of the new transaction. */ int acttab_insert(acttab *p) { int i, j, k, n; assert( p->nLookahead>0); /* Make sure we have enough space to hold the expanded action table ** in the worst case. The worst case occurs if the transaction set ** must be appended to the current action table */ n = p->mxLookahead + 1; if (p->nAction + n >= p->nActionAlloc) { int oldAlloc = p->nActionAlloc; p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; p->aAction = (struct lookahead_action *) realloc( p->aAction, sizeof(p->aAction[0])*p->nActionAlloc); if (p->aAction == 0) { fprintf(stderr,"malloc failed\n"); exit(1); } for(i=oldAlloc; i < p->nActionAlloc; i++) { p->aAction[i].lookahead = -1; p->aAction[i].action = -1; } } /* Scan the existing action table looking for an offset that is a ** duplicate of the current transaction set. Fall out of the loop ** if and when the duplicate is found. ** ** i is the index in p->aAction[] where p->mnLookahead is inserted. */ for(i=p->nAction-1; i >= 0; i--) { if (p->aAction[i].lookahead == p->mnLookahead) { /* All lookaheads and actions in the aLookahead[] transaction ** must match against the candidate aAction[i] entry. */ if (p->aAction[i].action!=p->mnAction) continue; for(j=0; jnLookahead; j++) { k = p->aLookahead[j].lookahead - p->mnLookahead + i; if (k<0 || k>=p->nAction) break; if (p->aLookahead[j].lookahead!=p->aAction[k].lookahead) break; if (p->aLookahead[j].action!=p->aAction[k].action) break; } if (jnLookahead) continue; /* No possible lookahead value that is not in the aLookahead[] ** transaction is allowed to match aAction[i] */ n = 0; for(j=0; jnAction; j++) { if (p->aAction[j].lookahead<0) continue; if (p->aAction[j].lookahead == j+p->mnLookahead-i) n++; } if (n == p->nLookahead) { break; /* An exact match is found at offset i */ } } } /* If no existing offsets exactly match the current transaction, find an ** an empty offset in the aAction[] table in which we can add the ** aLookahead[] transaction. */ if (i < 0) { /* Look for holes in the aAction[] table that fit the current ** aLookahead[] transaction. Leave i set to the offset of the hole. ** If no holes are found, i is left at p->nAction, which means the ** transaction will be appended. */ for(i = 0 ; i < p->nActionAlloc - p->mxLookahead; i++) { if (p->aAction[i].lookahead<0) { for(j=0; jnLookahead; j++) { k = p->aLookahead[j].lookahead - p->mnLookahead + i; if (k<0) break; if (p->aAction[k].lookahead>=0) break; } if (jnLookahead) continue; for(j=0; jnAction; j++) { if (p->aAction[j].lookahead == j+p->mnLookahead-i) break; } if (j == p->nAction) { break; /* Fits in empty slots */ } } } } /* Insert transaction set at index i. */ for(j=0; jnLookahead; j++) { k = p->aLookahead[j].lookahead - p->mnLookahead + i; p->aAction[k] = p->aLookahead[j]; if (k>=p->nAction) p->nAction = k+1; } p->nLookahead = 0; /* Return the offset that is added to the lookahead in order to get the ** index into yy_action of the action */ return i - p->mnLookahead; } /********************** From the file "build.c" *****************************/ /* ** Routines to construction the finite state machine for the LEMON ** parser generator. */ /* Find a precedence symbol of every rule in the grammar. ** ** Those rules which have a precedence symbol coded in the input ** grammar using the "[symbol]" construct will already have the ** rp->precsym field filled. Other rules take as their precedence ** symbol the first RHS symbol with a defined precedence. If there ** are not RHS symbols with a defined precedence, the precedence ** symbol field is left blank. */ void FindRulePrecedences(struct lemon *xp) { struct rule *rp; for(rp=xp->rule; rp; rp=rp->next) { if (rp->precsym == 0) { int i, j; for(i = 0 ; i < rp->nrhs && rp->precsym == 0; i++) { struct symbol *sp = rp->rhs[i]; if (sp->type == MULTITERMINAL) { for(j=0; jnsubsym; j++) { if (sp->subsym[j]->prec>=0) { rp->precsym = sp->subsym[j]; break; } } } else if (sp->prec>=0) { rp->precsym = rp->rhs[i]; } } } } return; } /* Find all nonterminals which will generate the empty string. ** Then go back and compute the first sets of every nonterminal. ** The first set is the set of all terminal symbols which can begin ** a string generated by that nonterminal. */ void FindFirstSets(struct lemon *lemp) { int i, j; struct rule *rp; int progress; for(i = 0 ; i < lemp->nsymbol; i++) { lemp->symbols[i]->lambda = LEMON_FALSE; } for(i=lemp->nterminal; i < lemp->nsymbol; i++) { lemp->symbols[i]->firstset = SetNew(); } /* First compute all lambdas */ do{ progress = 0; for(rp=lemp->rule; rp; rp=rp->next) { if (rp->lhs->lambda) continue; for(i = 0 ; i < rp->nrhs; i++) { struct symbol *sp = rp->rhs[i]; assert( sp->type == NONTERMINAL || sp->lambda == LEMON_FALSE); if (sp->lambda == LEMON_FALSE) break; } if (i == rp->nrhs) { rp->lhs->lambda = LEMON_TRUE; progress = 1; } } }while(progress); /* Now compute all first sets */ do{ struct symbol *s1, *s2; progress = 0; for(rp=lemp->rule; rp; rp=rp->next) { s1 = rp->lhs; for(i = 0 ; i < rp->nrhs; i++) { s2 = rp->rhs[i]; if (s2->type == TERMINAL) { progress += SetAdd(s1->firstset,s2->index); break; } else if (s2->type == MULTITERMINAL) { for(j=0; jnsubsym; j++) { progress += SetAdd(s1->firstset,s2->subsym[j]->index); } break; } else if (s1==s2) { if (s1->lambda == LEMON_FALSE) break; } else{ progress += SetUnion(s1->firstset,s2->firstset); if (s2->lambda == LEMON_FALSE) break; } } } }while(progress); return; } /* Compute all LR(0) states for the grammar. Links ** are added to between some states so that the LR(1) follow sets ** can be computed later. */ PRIVATE struct state *getstate(struct lemon *); /* forward reference */ void FindStates(struct lemon *lemp) { struct symbol *sp; struct rule *rp; Configlist_init(); /* Find the start symbol */ if (lemp->start) { sp = Symbol_find(lemp->start); if (sp == 0) { ErrorMsg(lemp->filename,0, "The specified start symbol \"%s\" is not \ in a nonterminal of the grammar. \"%s\" will be used as the start \ symbol instead.",lemp->start,lemp->startRule->lhs->name); lemp->errorcnt++; sp = lemp->startRule->lhs; } } else{ sp = lemp->startRule->lhs; } /* Make sure the start symbol doesn't occur on the right-hand side of ** any rule. Report an error if it does. (YACC would generate a new ** start symbol in this case.) */ for(rp=lemp->rule; rp; rp=rp->next) { int i; for(i = 0 ; i < rp->nrhs; i++) { if (rp->rhs[i] == sp) { /* FIX ME: Deal with multiterminals */ ErrorMsg(lemp->filename,0, "The start symbol \"%s\" occurs on the \ right-hand side of a rule. This will result in a parser which \ does not work properly.",sp->name); lemp->errorcnt++; } } } /* The basis configuration set for the first state ** is all rules which have the start symbol as their ** left-hand side */ for(rp=sp->rule; rp; rp=rp->nextlhs) { struct config *newcfp; rp->lhsStart = 1; newcfp = Configlist_addbasis(rp,0); SetAdd(newcfp->fws,0); } /* Compute the first state. All other states will be ** computed automatically during the computation of the first one. ** The returned pointer to the first state is not used. */ (void)getstate(lemp); return; } /* Return a pointer to a state which is described by the configuration ** list which has been built from calls to Configlist_add. */ PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ PRIVATE struct state *getstate(struct lemon *lemp) { struct config *cfp, *bp; struct state *stp; /* Extract the sorted basis of the new state. The basis was constructed ** by prior calls to "Configlist_addbasis()". */ Configlist_sortbasis(); bp = Configlist_basis(); /* Get a state with the same basis */ stp = State_find(bp); if (stp) { /* A state with the same basis already exists! Copy all the follow-set ** propagation links from the state under construction into the ** preexisting state, then return a pointer to the preexisting state */ struct config *x, *y; for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp) { Plink_copy(&y->bplp,x->bplp); Plink_delete(x->fplp); x->fplp = x->bplp = 0; } cfp = Configlist_return(); Configlist_eat(cfp); } else{ /* This really is a new state. Construct all the details */ Configlist_closure(lemp); /* Compute the configuration closure */ Configlist_sort(); /* Sort the configuration closure */ cfp = Configlist_return(); /* Get a pointer to the config list */ stp = State_new(); /* A new state structure */ MemoryCheck(stp); stp->bp = bp; /* Remember the configuration basis */ stp->cfp = cfp; /* Remember the configuration closure */ stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ stp->ap = 0; /* No actions, yet. */ State_insert(stp,stp->bp); /* Add to the state table */ buildshifts(lemp,stp); /* Recursively compute successor states */ } return stp; } /* ** Return true if two symbols are the same. */ int same_symbol(struct symbol *a, struct symbol *b) { int i; if (a == b) return 1; if (a->type!=MULTITERMINAL) return 0; if (b->type!=MULTITERMINAL) return 0; if (a->nsubsym!=b->nsubsym) return 0; for(i = 0 ; i < a->nsubsym; i++) { if (a->subsym[i]!=b->subsym[i] ) return 0; } return 1; } /* Construct all successor states to the given state. A "successor" ** state is any state which can be reached by a shift action. */ PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) { struct config *cfp; /* For looping thru the config closure of "stp" */ struct config *bcfp; /* For the inner loop on config closure of "stp" */ struct config *newcfg; /* */ struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ struct state *newstp; /* A pointer to a successor state */ /* Each configuration becomes complete after it contibutes to a successor ** state. Initially, all configurations are incomplete */ for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; /* Loop through all configurations of the state "stp" */ for(cfp=stp->cfp; cfp; cfp=cfp->next) { if (cfp->status == COMPLETE) continue; /* Already used by inner loop */ if (cfp->dot>=cfp->rp->nrhs) continue; /* Can't shift this config */ Configlist_reset(); /* Reset the new config set */ sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ /* For every configuration in the state "stp" which has the symbol "sp" ** following its dot, add the same configuration to the basis set under ** construction but with the dot shifted one symbol to the right. */ for(bcfp=cfp; bcfp; bcfp=bcfp->next) { if (bcfp->status == COMPLETE) continue; /* Already used */ if (bcfp->dot>=bcfp->rp->nrhs) continue; /* Can't shift this one */ bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ if (!same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ bcfp->status = COMPLETE; /* Mark this config as used */ newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); Plink_add(&newcfg->bplp,bcfp); } /* Get a pointer to the state described by the basis configuration set ** constructed in the preceding loop */ newstp = getstate(lemp); /* The state "newstp" is reached from the state "stp" by a shift action ** on the symbol "sp" */ if (sp->type == MULTITERMINAL) { int i; for(i = 0 ; i < sp->nsubsym; i++) { Action_add(&stp->ap,SHIFT,sp->subsym[i],(char *)newstp); } } else{ Action_add(&stp->ap,SHIFT,sp,(char *)newstp); } } } /* ** Construct the propagation links */ void FindLinks(struct lemon *lemp) { int i; struct config *cfp, *other; struct state *stp; struct plink *plp; /* Housekeeping detail: ** Add to every propagate link a pointer back to the state to ** which the link is attached. */ for(i = 0 ; i < lemp->nstate; i++) { stp = lemp->sorted[i]; for(cfp=stp->cfp; cfp; cfp=cfp->next) { cfp->stp = stp; } } /* Convert all backlinks into forward links. Only the forward ** links are used in the follow-set computation. */ for(i = 0 ; i < lemp->nstate; i++) { stp = lemp->sorted[i]; for(cfp=stp->cfp; cfp; cfp=cfp->next) { for(plp=cfp->bplp; plp; plp=plp->next) { other = plp->cfp; Plink_add(&other->fplp,cfp); } } } } /* Compute all followsets. ** ** A followset is the set of all symbols which can come immediately ** after a configuration. */ void FindFollowSets(struct lemon *lemp) { int i; struct config *cfp; struct plink *plp; int progress; int change; for(i = 0 ; i < lemp->nstate; i++) { for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next) { cfp->status = INCOMPLETE; } } do{ progress = 0; for(i = 0 ; i < lemp->nstate; i++) { for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next) { if (cfp->status == COMPLETE) continue; for(plp=cfp->fplp; plp; plp=plp->next) { change = SetUnion(plp->cfp->fws,cfp->fws); if (change) { plp->cfp->status = INCOMPLETE; progress = 1; } } cfp->status = COMPLETE; } } }while(progress); } static int resolve_conflict(struct action *,struct action *); /* Compute the reduce actions, and resolve conflicts. */ void FindActions(struct lemon *lemp) { int i,j; struct config *cfp; struct state *stp; struct symbol *sp; struct rule *rp; /* Add all of the reduce actions ** A reduce action is added for each element of the followset of ** a configuration which has its dot at the extreme right. */ for(i = 0 ; i < lemp->nstate; i++) { /* Loop over all states */ stp = lemp->sorted[i]; for(cfp=stp->cfp; cfp; cfp=cfp->next) { /* Loop over all configurations */ if (cfp->rp->nrhs == cfp->dot) { /* Is dot at extreme right? */ for(j=0; jnterminal; j++) { if (SetFind(cfp->fws,j) ) { /* Add a reduce action to the state "stp" which will reduce by the ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); } } } } } /* Add the accepting token */ if (lemp->start) { sp = Symbol_find(lemp->start); if (sp == 0) sp = lemp->startRule->lhs; } else{ sp = lemp->startRule->lhs; } /* Add to the first state (which is always the starting state of the ** finite state machine) an action to ACCEPT if the lookahead is the ** start nonterminal. */ Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); /* Resolve conflicts */ for(i = 0 ; i < lemp->nstate; i++) { struct action *ap, *nap; stp = lemp->sorted[i]; /* assert( stp->ap); */ stp->ap = Action_sort(stp->ap); for(ap=stp->ap; ap && ap->next; ap=ap->next) { for(nap=ap->next; nap && nap->sp == ap->sp; nap=nap->next) { /* The two actions "ap" and "nap" have the same lookahead. ** Figure out which one should be used */ lemp->nconflict += resolve_conflict(ap,nap); } } } /* Report an error for each rule that can never be reduced. */ for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; for(i = 0 ; i < lemp->nstate; i++) { struct action *ap; for(ap=lemp->sorted[i]->ap; ap; ap=ap->next) { if (ap->type == REDUCE) ap->x.rp->canReduce = LEMON_TRUE; } } for(rp=lemp->rule; rp; rp=rp->next) { if (rp->canReduce) continue; ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); lemp->errorcnt++; } } /* Resolve a conflict between the two given actions. If the ** conflict can't be resolved, return non-zero. ** ** NO LONGER TRUE: ** To resolve a conflict, first look to see if either action ** is on an error rule. In that case, take the action which ** is not associated with the error rule. If neither or both ** actions are associated with an error rule, then try to ** use precedence to resolve the conflict. ** ** If either action is a SHIFT, then it must be apx. This ** function won't work if apx->type == REDUCE and apy->type == SHIFT. */ static int resolve_conflict( struct action *apx, struct action *apy ) { struct symbol *spx, *spy; int errcnt = 0; assert( apx->sp == apy->sp); /* Otherwise there would be no conflict */ if (apx->type == SHIFT && apy->type == SHIFT) { apy->type = SSCONFLICT; errcnt++; } if (apx->type == SHIFT && apy->type == REDUCE) { spx = apx->sp; spy = apy->x.rp->precsym; if (spy == 0 || spx->prec<0 || spy->prec<0) { /* Not enough precedence information. */ apy->type = SRCONFLICT; errcnt++; } else if (spx->prec>spy->prec) { /* higher precedence wins */ apy->type = RD_RESOLVED; } else if (spx->precprec) { apx->type = SH_RESOLVED; } else if (spx->prec == spy->prec && spx->assoc == RIGHT) { /* Use operator */ apy->type = RD_RESOLVED; /* associativity */ } else if (spx->prec == spy->prec && spx->assoc == LEFT) { /* to break tie */ apx->type = SH_RESOLVED; } else{ assert( spx->prec == spy->prec && spx->assoc == NONE); apx->type = ERROR; } } else if (apx->type == REDUCE && apy->type == REDUCE) { spx = apx->x.rp->precsym; spy = apy->x.rp->precsym; if (spx == 0 || spy == 0 || spx->prec<0 || spy->prec<0 || spx->prec == spy->prec) { apy->type = RRCONFLICT; errcnt++; } else if (spx->prec>spy->prec) { apy->type = RD_RESOLVED; } else if (spx->precprec) { apx->type = RD_RESOLVED; } } else{ assert( apx->type == SH_RESOLVED || apx->type == RD_RESOLVED || apx->type == SSCONFLICT || apx->type == SRCONFLICT || apx->type == RRCONFLICT || apy->type == SH_RESOLVED || apy->type == RD_RESOLVED || apy->type == SSCONFLICT || apy->type == SRCONFLICT || apy->type == RRCONFLICT ); /* The REDUCE/SHIFT case cannot happen because SHIFTs come before ** REDUCEs on the list. If we reach this point it must be because ** the parser conflict had already been resolved. */ } return errcnt; } /********************* From the file "configlist.c" *************************/ /* ** Routines to processing a configuration list and building a state ** in the LEMON parser generator. */ static struct config *freelist = 0; /* List of free configurations */ static struct config *current = 0; /* Top of list of configurations */ static struct config **currentend = 0; /* Last on list of configs */ static struct config *basis = 0; /* Top of list of basis configs */ static struct config **basisend = 0; /* End of list of basis configs */ /* Return a pointer to a new configuration */ PRIVATE struct config *newconfig() { struct config *newcfg; if (freelist == 0) { int i; int amt = 3; freelist = (struct config *)calloc( amt, sizeof(struct config) ); if (freelist == 0) { fprintf(stderr,"Unable to allocate memory for a new configuration."); exit(1); } for(i = 0 ; i < amt-1; i++) freelist[i].next = &freelist[i+1]; freelist[amt-1].next = 0; } newcfg = freelist; freelist = freelist->next; return newcfg; } /* The configuration "old" is no longer used */ PRIVATE void deleteconfig(struct config *old) { old->next = freelist; freelist = old; } /* Initialized the configuration list builder */ void Configlist_init() { current = 0; currentend = ¤t; basis = 0; basisend = &basis; Configtable_init(); return; } /* Initialized the configuration list builder */ void Configlist_reset() { current = 0; currentend = ¤t; basis = 0; basisend = &basis; Configtable_clear(0); return; } /* Add another configuration to the configuration list */ struct config *Configlist_add( struct rule *rp, /* The rule */ int dot /* Index into the RHS of the rule where the dot goes */ ) { struct config *cfp, model; assert( currentend!=0); model.rp = rp; model.dot = dot; cfp = Configtable_find(&model); if (cfp == 0) { cfp = newconfig(); cfp->rp = rp; cfp->dot = dot; cfp->fws = SetNew(); cfp->stp = 0; cfp->fplp = cfp->bplp = 0; cfp->next = 0; cfp->bp = 0; *currentend = cfp; currentend = &cfp->next; Configtable_insert(cfp); } return cfp; } /* Add a basis configuration to the configuration list */ struct config *Configlist_addbasis(struct rule *rp, int dot) { struct config *cfp, model; assert( basisend!=0); assert( currentend!=0); model.rp = rp; model.dot = dot; cfp = Configtable_find(&model); if (cfp == 0) { cfp = newconfig(); cfp->rp = rp; cfp->dot = dot; cfp->fws = SetNew(); cfp->stp = 0; cfp->fplp = cfp->bplp = 0; cfp->next = 0; cfp->bp = 0; *currentend = cfp; currentend = &cfp->next; *basisend = cfp; basisend = &cfp->bp; Configtable_insert(cfp); } return cfp; } /* Compute the closure of the configuration list */ void Configlist_closure(struct lemon *lemp) { struct config *cfp, *newcfp; struct rule *rp, *newrp; struct symbol *sp, *xsp; int i, dot; assert( currentend!=0); for(cfp=current; cfp; cfp=cfp->next) { rp = cfp->rp; dot = cfp->dot; if (dot>=rp->nrhs) continue; sp = rp->rhs[dot]; if (sp->type == NONTERMINAL) { if (sp->rule == 0 && sp!=lemp->errsym) { ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", sp->name); lemp->errorcnt++; } for(newrp=sp->rule; newrp; newrp=newrp->nextlhs) { newcfp = Configlist_add(newrp,0); for(i=dot+1; i < rp->nrhs; i++) { xsp = rp->rhs[i]; if (xsp->type == TERMINAL) { SetAdd(newcfp->fws,xsp->index); break; } else if (xsp->type == MULTITERMINAL) { int k; for(k=0; knsubsym; k++) { SetAdd(newcfp->fws, xsp->subsym[k]->index); } break; } else{ SetUnion(newcfp->fws,xsp->firstset); if (xsp->lambda == LEMON_FALSE) break; } } if (i == rp->nrhs) Plink_add(&cfp->fplp,newcfp); } } } return; } /* Sort the configuration list */ void Configlist_sort() { current = (struct config *)msort((char *)current,(char **)&(current->next), Configcmp); currentend = 0; return; } /* Sort the basis configuration list */ void Configlist_sortbasis() { basis = (struct config *)msort((char *)current,(char **)&(current->bp), Configcmp); basisend = 0; return; } /* Return a pointer to the head of the configuration list and ** reset the list */ struct config *Configlist_return() { struct config *old; old = current; current = 0; currentend = 0; return old; } /* Return a pointer to the head of the configuration list and ** reset the list */ struct config *Configlist_basis() { struct config *old; old = basis; basis = 0; basisend = 0; return old; } /* Free all elements of the given configuration list */ void Configlist_eat(struct config *cfp) { struct config *nextcfp; for(; cfp; cfp=nextcfp) { nextcfp = cfp->next; assert( cfp->fplp == 0); assert( cfp->bplp == 0); if (cfp->fws) SetFree(cfp->fws); deleteconfig(cfp); } return; } /***************** From the file "error.c" *********************************/ /* ** Code for printing error message. */ void ErrorMsg(const char *filename, int lineno, const char *format, ...) { va_list ap; fprintf(stderr, "%s:%d: ", filename, lineno); va_start(ap, format); vfprintf(stderr,format,ap); va_end(ap); fprintf(stderr, "\n"); } /**************** From the file "main.c" ************************************/ /* ** Main program file for the LEMON parser generator. */ /* Report an out-of-memory condition and abort. This function ** is used mostly by the "MemoryCheck" macro in struct.h */ void memory_error() { fprintf(stderr,"Out of memory. Aborting...\n"); exit(1); } static int nDefine = 0; /* Number of -D options on the command line */ static char **azDefine = 0; /* Name of the -D macros */ /* This routine is called with the argument to each -D command-line option. ** Add the macro defined to the azDefine array. */ static void handle_D_option(char *z) { char **paz; nDefine++; azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); if (azDefine == 0) { fprintf(stderr,"out of memory\n"); exit(1); } paz = &azDefine[nDefine-1]; *paz = (char *) malloc( lemonStrlen(z)+1); if (*paz == 0) { fprintf(stderr,"out of memory\n"); exit(1); } lemon_strcpy(*paz, z); for(z=*paz; *z && *z!='='; z++) {} *z = 0; } static char *user_templatename = NULL; static void handle_T_option(char *z) { user_templatename = (char *) malloc( lemonStrlen(z)+1); if (user_templatename == 0) { memory_error(); } lemon_strcpy(user_templatename, z); } static char *user_output_file = NULL; static void handle_o_option(char *z) { user_output_file = (char *) malloc( lemonStrlen(z)+1); if (user_output_file == 0) { memory_error(); } lemon_strcpy(user_output_file, z); } static char *user_output_header = NULL; static void handle_h_option(char *z) { user_output_header = (char *) malloc( lemonStrlen(z)+1); if (user_output_header == 0) { memory_error(); } lemon_strcpy(user_output_header, z); } /* Merge together to lists of rules order by rule.iRule */ static struct rule *Rule_merge(struct rule *pA, struct rule *pB) { struct rule *pFirst = 0; struct rule **ppPrev = &pFirst; while(pA && pB) { if (pA->iRuleiRule) { *ppPrev = pA; ppPrev = &pA->next; pA = pA->next; } else{ *ppPrev = pB; ppPrev = &pB->next; pB = pB->next; } } if (pA) { *ppPrev = pA; } else{ *ppPrev = pB; } return pFirst; } /* ** Sort a list of rules in order of increasing iRule value */ static struct rule *Rule_sort(struct rule *rp) { int i; struct rule *pNext; struct rule *x[32]; memset(x, 0, sizeof(x)); while(rp) { pNext = rp->next; rp->next = 0; for(i = 0 ; i < sizeof(x)/sizeof(x[0]) && x[i]; i++) { rp = Rule_merge(x[i], rp); x[i] = 0; } x[i] = rp; rp = pNext; } rp = 0; for(i = 0 ; i < sizeof(x)/sizeof(x[0]); i++) { rp = Rule_merge(x[i], rp); } return rp; } /* forward reference */ static const char *minimum_size_type(int lwr, int upr, int *pnByte); /* Print a single line of the "Parser Stats" output */ static void stats_line(const char *zLabel, int iValue) { int nLabel = lemonStrlen(zLabel); printf(" %s%.*s %5d\n", zLabel, 35-nLabel, "................................", iValue); } /* The main program. Parse the command line and do it... */ int main(int argc, char **argv) { static int version = 0; static int rpflag = 0; static int basisflag = 0; static int compress = 0; static int quiet = 0; static int statistics = 0; static int mhflag = 0; static int nolinenosflag = 0; static int noResort = 0; static struct s_options options[] = { {OPT_FLAG, "b", (char *)&basisflag, "Print only the basis in report."}, {OPT_FLAG, "c", (char *)&compress, "Don't compress the action table."}, {OPT_FSTR, "D", (char *)handle_D_option, "Define an %ifdef macro."}, {OPT_FSTR, "f", 0, "Ignored. (Placeholder for -f compiler options.)"}, {OPT_FLAG, "g", (char *)&rpflag, "Print grammar without actions."}, {OPT_FSTR, "I", 0, "Ignored. (Placeholder for '-I' compiler options.)"}, {OPT_FLAG, "m", (char *)&mhflag, "Output a makeheaders compatible file."}, {OPT_FLAG, "l", (char *)&nolinenosflag, "Do not print #line statements."}, {OPT_FSTR, "O", 0, "Ignored. (Placeholder for '-O' compiler options.)"}, {OPT_FLAG, "p", (char *)&showPrecedenceConflict, "Show conflicts resolved by precedence rules"}, {OPT_FLAG, "q", (char *)&quiet, "(Quiet) Don't print the report file."}, {OPT_FLAG, "r", (char *)&noResort, "Do not sort or renumber states"}, {OPT_FLAG, "s", (char *)&statistics, "Print parser stats to standard output."}, {OPT_FLAG, "x", (char *)&version, "Print the version number."}, {OPT_FSTR, "T", (char *)handle_T_option, "Specify a template file."}, {OPT_FSTR, "W", 0, "Ignored. (Placeholder for '-W' compiler options.)"}, {OPT_FSTR, "o", (char *)handle_o_option, "Specify output c file."}, {OPT_FSTR, "h", (char *)handle_h_option, "Specify output header file."}, {OPT_FLAG,0,0,0} }; int i; int exitcode; struct lemon lem; struct rule *rp; OptInit(argv,options,stderr); if (version) { printf("Lemon version 1.0\n"); exit(0); } if (OptNArgs()!=1) { fprintf(stderr,"Exactly one filename argument is required.\n"); exit(1); } memset(&lem, 0, sizeof(lem)); lem.errorcnt = 0; /* Initialize the machine */ Strsafe_init(); Symbol_init(); State_init(); lem.argv0 = argv[0]; lem.filename = OptArg(0); lem.basisflag = basisflag; lem.nolinenosflag = nolinenosflag; Symbol_new("$"); lem.errsym = Symbol_new("error"); lem.errsym->useCnt = 0; /* Parse the input file */ Parse(&lem); if (lem.errorcnt) exit(lem.errorcnt); if (lem.nrule == 0) { fprintf(stderr,"Empty grammar.\n"); exit(1); } /* Count and index the symbols of the grammar */ Symbol_new("{default}"); lem.nsymbol = Symbol_count(); lem.symbols = Symbol_arrayof(); for(i = 0 ; i < lem.nsymbol; i++) lem.symbols[i]->index = i; qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol *), Symbolcmpp); for(i = 0 ; i < lem.nsymbol; i++) lem.symbols[i]->index = i; while(lem.symbols[i-1]->type == MULTITERMINAL) { i--; } assert( strcmp(lem.symbols[i-1]->name,"{default}") == 0); lem.nsymbol = i - 1; for(i = 1; ISUPPER(lem.symbols[i]->name[0]); i++); lem.nterminal = i; /* Assign sequential rule numbers */ for(i = 0 , rp=lem.rule; rp; rp=rp->next) { rp->iRule = rp->code ? i++ : -1; } for(rp=lem.rule; rp; rp=rp->next) { if (rp->iRule<0) rp->iRule = i++; } lem.startRule = lem.rule; lem.rule = Rule_sort(lem.rule); /* Generate a reprint of the grammar, if requested on the command line */ if (rpflag) { Reprint(&lem); } else{ /* Initialize the size for all follow and first sets */ SetSize(lem.nterminal+1); /* Find the precedence for every production rule (that has one) */ FindRulePrecedences(&lem); /* Compute the lambda-nonterminals and the first-sets for every ** nonterminal */ FindFirstSets(&lem); /* Compute all LR(0) states. Also record follow-set propagation ** links so that the follow-set can be computed later */ lem.nstate = 0; FindStates(&lem); lem.sorted = State_arrayof(); /* Tie up loose ends on the propagation links */ FindLinks(&lem); /* Compute the follow set of every reducible configuration */ FindFollowSets(&lem); /* Compute the action tables */ FindActions(&lem); /* Compress the action tables */ if (compress == 0) CompressTables(&lem); /* Reorder and renumber the states so that states with fewer choices ** occur at the end. This is an optimization that helps make the ** generated parser tables smaller. */ if (noResort == 0) ResortStates(&lem); /* Generate a report of the parser generated. (the "y.output" file) */ if (!quiet) ReportOutput(&lem); /* Generate the source code for the parser */ ReportTable(&lem, mhflag); /* Produce a header file for use by the scanner. (This step is ** omitted if the "-m" option is used because makeheaders will ** generate the file for us.) */ if (!mhflag) ReportHeader(&lem); } if (statistics) { printf("Parser statistics:\n"); stats_line("terminal symbols", lem.nterminal); stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); stats_line("total symbols", lem.nsymbol); stats_line("rules", lem.nrule); stats_line("states", lem.nxstate); stats_line("conflicts", lem.nconflict); stats_line("action table entries", lem.nactiontab); stats_line("total table size (bytes)", lem.tablesize); } if (lem.nconflict > 0) { fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); } /* return 0 on success, 1 on failure. */ exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; exit(exitcode); return (exitcode); } /******************** From the file "msort.c" *******************************/ /* ** A generic merge-sort program. ** ** USAGE: ** Let "ptr" be a pointer to some structure which is at the head of ** a null-terminated list. Then to sort the list call: ** ** ptr = msort(ptr,&(ptr->next),cmpfnc); ** ** In the above, "cmpfnc" is a pointer to a function which compares ** two instances of the structure and returns an integer, as in ** strcmp. The second argument is a pointer to the pointer to the ** second element of the linked list. This address is used to compute ** the offset to the "next" field within the structure. The offset to ** the "next" field must be constant for all structures in the list. ** ** The function returns a new pointer which is the head of the list ** after sorting. ** ** ALGORITHM: ** Merge-sort. */ /* ** Return a pointer to the next structure in the linked list. */ #define NEXT(A) (*(char **)(((char *)A)+offset)) /* ** Inputs: ** a: A sorted, null-terminated linked list. (May be null). ** b: A sorted, null-terminated linked list. (May be null). ** cmp: A pointer to the comparison function. ** offset: Offset in the structure to the "next" field. ** ** Return Value: ** A pointer to the head of a sorted list containing the elements ** of both a and b. ** ** Side effects: ** The "next" pointers for elements in the lists a and b are ** changed. */ static char *merge( char *a, char *b, int (*cmp)(const char *,const char *), int offset ) { char *ptr, *head; if (a == 0) { head = b; } else if (b == 0) { head = a; } else{ if ((*cmp)(a,b)<=0) { ptr = a; a = NEXT(a); } else{ ptr = b; b = NEXT(b); } head = ptr; while(a && b) { if ((*cmp)(a,b)<=0) { NEXT(ptr) = a; ptr = a; a = NEXT(a); } else{ NEXT(ptr) = b; ptr = b; b = NEXT(b); } } if (a) NEXT(ptr) = a; else NEXT(ptr) = b; } return head; } /* ** Inputs: ** list: Pointer to a singly-linked list of structures. ** next: Pointer to pointer to the second element of the list. ** cmp: A comparison function. ** ** Return Value: ** A pointer to the head of a sorted list containing the elements ** orginally in list. ** ** Side effects: ** The "next" pointers for elements in list are changed. */ #define LISTSIZE 30 static char *msort( char *list, char **next, int (*cmp)(const char *,const char *) ) { unsigned long offset; char *ep; char *set[LISTSIZE]; int i; offset = (unsigned long)((char *)next - (char *)list); for(i = 0 ; i < LISTSIZE; i++) set[i] = 0; while(list) { ep = list; list = NEXT(list); NEXT(ep) = 0; for(i = 0 ; i < LISTSIZE-1 && set[i]!=0; i++) { ep = merge(ep,set[i],cmp,offset); set[i] = 0; } set[i] = ep; } ep = 0; for(i = 0 ; i < LISTSIZE; i++) if (set[i] ) ep = merge(set[i],ep,cmp,offset); return ep; } /************************ From the file "option.c" **************************/ static char **argv; static struct s_options *op; static FILE *errstream; #define ISOPT(X) ((X)[0] == '-'||(X)[0] == '+'||strchr((X),'=')!=0) /* ** Print the command line with a carrot pointing to the k-th character ** of the n-th field. */ static void errline(int n, int k, FILE *err) { int spcnt, i; if (argv[0] ) fprintf(err,"%s",argv[0]); spcnt = lemonStrlen(argv[0]) + 1; for(i = 1; i < n && argv[i]; i++) { fprintf(err," %s",argv[i]); spcnt += lemonStrlen(argv[i])+1; } spcnt += k; for(; argv[i]; i++) fprintf(err," %s",argv[i]); if (spcnt<20) { fprintf(err,"\n%*s^-- here\n",spcnt,""); } else{ fprintf(err,"\n%*shere --^\n",spcnt-7,""); } } /* ** Return the index of the N-th non-switch argument. Return -1 ** if N is out of range. */ static int argindex(int n) { int i; int dashdash = 0; if (argv!=0 && *argv!=0) { for(i = 1; argv[i]; i++) { if (dashdash || !ISOPT(argv[i]) ) { if (n == 0) return i; n--; } if (strcmp(argv[i],"--") == 0) dashdash = 1; } } return -1; } static char emsg[] = "Command line syntax error: "; /* ** Process a flag command line argument. */ static int handleflags(int i, FILE *err) { int v; int errcnt = 0; int j; for(j=0; op[j].label; j++) { if (strncmp(&argv[i][1],op[j].label,lemonStrlen(op[j].label)) == 0) break; } v = argv[i][0] == '-' ? 1 : 0; if (op[j].label == 0) { if (err) { fprintf(err,"%sundefined option.\n",emsg); errline(i,1,err); } errcnt++; } else if (op[j].arg == 0) { /* Ignore this option */ } else if (op[j].type == OPT_FLAG) { *((int *)op[j].arg) = v; } else if (op[j].type == OPT_FFLAG) { (*(void(*)(int))(op[j].arg))(v); } else if (op[j].type == OPT_FSTR) { (*(void(*)(char *))(op[j].arg))(&argv[i][2]); } else{ if (err) { fprintf(err,"%smissing argument on switch.\n",emsg); errline(i,1,err); } errcnt++; } return errcnt; } /* ** Process a command line switch which has an argument. */ static int handleswitch(int i, FILE *err) { int lv = 0; double dv = 0.0; char *sv = 0, *end; char *cp; int j; int errcnt = 0; cp = strchr(argv[i],'='); assert( cp!=0); *cp = 0; for(j=0; op[j].label; j++) { if (strcmp(argv[i],op[j].label) == 0) break; } *cp = '='; if (op[j].label == 0) { if (err) { fprintf(err,"%sundefined option.\n",emsg); errline(i,0,err); } errcnt++; } else{ cp++; switch(op[j].type) { case OPT_FLAG: case OPT_FFLAG: if (err) { fprintf(err,"%soption requires an argument.\n",emsg); errline(i,0,err); } errcnt++; break; case OPT_DBL: case OPT_FDBL: dv = strtod(cp,&end); if (*end) { if (err) { fprintf(err, "%sillegal character in floating-point argument.\n",emsg); errline(i,(int)((char *)end-(char *)argv[i]),err); } errcnt++; } break; case OPT_INT: case OPT_FINT: lv = strtol(cp,&end,0); if (*end) { if (err) { fprintf(err,"%sillegal character in integer argument.\n",emsg); errline(i,(int)((char *)end-(char *)argv[i]),err); } errcnt++; } break; case OPT_STR: case OPT_FSTR: sv = cp; break; } switch(op[j].type) { case OPT_FLAG: case OPT_FFLAG: break; case OPT_DBL: *(double *)(op[j].arg) = dv; break; case OPT_FDBL: (*(void(*)(double))(op[j].arg))(dv); break; case OPT_INT: *(int *)(op[j].arg) = lv; break; case OPT_FINT: (*(void(*)(int))(op[j].arg))((int)lv); break; case OPT_STR: *(char **)(op[j].arg) = sv; break; case OPT_FSTR: (*(void(*)(char *))(op[j].arg))(sv); break; } } return errcnt; } int OptInit(char **a, struct s_options *o, FILE *err) { int errcnt = 0; argv = a; op = o; errstream = err; if (argv && *argv && op) { int i; for(i = 1; argv[i]; i++) { if (argv[i][0] == '+' || argv[i][0] == '-' ) { errcnt += handleflags(i,err); } else if (strchr(argv[i],'=') ) { errcnt += handleswitch(i,err); } } } if (errcnt>0) { fprintf(err,"Valid command line options for \"%s\" are:\n",*a); OptPrint(); exit(1); } return 0; } int OptNArgs() { int cnt = 0; int dashdash = 0; int i; if (argv!=0 && argv[0]!=0) { for(i = 1; argv[i]; i++) { if (dashdash || !ISOPT(argv[i]) ) cnt++; if (strcmp(argv[i],"--") == 0) dashdash = 1; } } return cnt; } char *OptArg(int n) { int i; i = argindex(n); return i >= 0 ? argv[i] : 0; } void OptErr(int n) { int i; i = argindex(n); if (i >= 0) errline(i,0,errstream); } void OptPrint() { int i; int max, len; max = 0; for(i = 0 ; op[i].label; i++) { len = lemonStrlen(op[i].label) + 1; switch(op[i].type) { case OPT_FLAG: case OPT_FFLAG: break; case OPT_INT: case OPT_FINT: len += 9; /* length of "" */ break; case OPT_DBL: case OPT_FDBL: len += 6; /* length of "" */ break; case OPT_STR: case OPT_FSTR: len += 8; /* length of "" */ break; } if (len>max) max = len; } for(i = 0 ; op[i].label; i++) { switch(op[i].type) { case OPT_FLAG: case OPT_FFLAG: fprintf(errstream," -%-*s %s\n",max,op[i].label,op[i].message); break; case OPT_INT: case OPT_FINT: fprintf(errstream," -%s%*s %s\n",op[i].label, (int)(max-lemonStrlen(op[i].label)-9),"",op[i].message); break; case OPT_DBL: case OPT_FDBL: fprintf(errstream," -%s%*s %s\n",op[i].label, (int)(max-lemonStrlen(op[i].label)-6),"",op[i].message); break; case OPT_STR: case OPT_FSTR: fprintf(errstream," -%s%*s %s\n",op[i].label, (int)(max-lemonStrlen(op[i].label)-8),"",op[i].message); break; } } } /*********************** From the file "parse.c" ****************************/ /* ** Input file parser for the LEMON parser generator. */ /* The state of the parser */ enum e_state { INITIALIZE, WAITING_FOR_DECL_OR_RULE, WAITING_FOR_DECL_KEYWORD, WAITING_FOR_DECL_ARG, WAITING_FOR_PRECEDENCE_SYMBOL, WAITING_FOR_ARROW, IN_RHS, LHS_ALIAS_1, LHS_ALIAS_2, LHS_ALIAS_3, RHS_ALIAS_1, RHS_ALIAS_2, PRECEDENCE_MARK_1, PRECEDENCE_MARK_2, RESYNC_AFTER_RULE_ERROR, RESYNC_AFTER_DECL_ERROR, WAITING_FOR_DESTRUCTOR_SYMBOL, WAITING_FOR_DATATYPE_SYMBOL, WAITING_FOR_FALLBACK_ID, WAITING_FOR_WILDCARD_ID, WAITING_FOR_CLASS_ID, WAITING_FOR_CLASS_TOKEN }; struct pstate { char *filename; /* Name of the input file */ int tokenlineno; /* Linenumber at which current token starts */ int errorcnt; /* Number of errors so far */ char *tokenstart; /* Text of current token */ struct lemon *gp; /* Global state vector */ enum e_state state; /* The state of the parser */ struct symbol *fallback; /* The fallback token */ struct symbol *tkclass; /* Token class symbol */ struct symbol *lhs; /* Left-hand side of current rule */ const char *lhsalias; /* Alias for the LHS */ int nrhs; /* Number of right-hand side symbols seen */ struct symbol *rhs[MAXRHS]; /* RHS symbols */ const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ struct rule *prevrule; /* Previous rule parsed */ const char *declkeyword; /* Keyword of a declaration */ char **declargslot; /* Where the declaration argument should be put */ int insertLineMacro; /* Add #line before declaration insert */ int *decllinenoslot; /* Where to write declaration line number */ enum e_assoc declassoc; /* Assign this association to decl arguments */ int preccounter; /* Assign this precedence to decl arguments */ struct rule *firstrule; /* Pointer to first rule in the grammar */ struct rule *lastrule; /* Pointer to the most recently parsed rule */ }; /* Parse a single token */ static void parseonetoken(struct pstate *psp) { const char *x; x = Strsafe(psp->tokenstart); /* Save the token permanently */ #if 0 printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno, x,psp->state); #endif switch(psp->state) { case INITIALIZE: psp->prevrule = 0; psp->preccounter = 0; psp->firstrule = psp->lastrule = 0; psp->gp->nrule = 0; /* Fall thru to next case */ case WAITING_FOR_DECL_OR_RULE: if (x[0] == '%' ) { psp->state = WAITING_FOR_DECL_KEYWORD; } else if (ISLOWER(x[0]) ) { psp->lhs = Symbol_new(x); psp->nrhs = 0; psp->lhsalias = 0; psp->state = WAITING_FOR_ARROW; } else if (x[0] == '{' ) { if (psp->prevrule == 0) { ErrorMsg(psp->filename,psp->tokenlineno, "There is no prior rule upon which to attach the code \ fragment which begins on this line."); psp->errorcnt++; } else if (psp->prevrule->code!=0) { ErrorMsg(psp->filename,psp->tokenlineno, "Code fragment beginning on this line is not the first \ to follow the previous rule."); psp->errorcnt++; } else{ psp->prevrule->line = psp->tokenlineno; psp->prevrule->code = &x[1]; } } else if (x[0] == '[' ) { psp->state = PRECEDENCE_MARK_1; } else{ ErrorMsg(psp->filename,psp->tokenlineno, "Token \"%s\" should be either \"%%\" or a nonterminal name.", x); psp->errorcnt++; } break; case PRECEDENCE_MARK_1: if (!ISUPPER(x[0]) ) { ErrorMsg(psp->filename,psp->tokenlineno, "The precedence symbol must be a terminal."); psp->errorcnt++; } else if (psp->prevrule == 0) { ErrorMsg(psp->filename,psp->tokenlineno, "There is no prior rule to assign precedence \"[%s]\".",x); psp->errorcnt++; } else if (psp->prevrule->precsym!=0) { ErrorMsg(psp->filename,psp->tokenlineno, "Precedence mark on this line is not the first \ to follow the previous rule."); psp->errorcnt++; } else{ psp->prevrule->precsym = Symbol_new(x); } psp->state = PRECEDENCE_MARK_2; break; case PRECEDENCE_MARK_2: if (x[0]!=']' ) { ErrorMsg(psp->filename,psp->tokenlineno, "Missing \"]\" on precedence mark."); psp->errorcnt++; } psp->state = WAITING_FOR_DECL_OR_RULE; break; case WAITING_FOR_ARROW: if (x[0] == ':' && x[1] == ':' && x[2] == '=' ) { psp->state = IN_RHS; } else if (x[0] == '(' ) { psp->state = LHS_ALIAS_1; } else{ ErrorMsg(psp->filename,psp->tokenlineno, "Expected to see a \":\" following the LHS symbol \"%s\".", psp->lhs->name); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case LHS_ALIAS_1: if (ISALPHA(x[0]) ) { psp->lhsalias = x; psp->state = LHS_ALIAS_2; } else{ ErrorMsg(psp->filename,psp->tokenlineno, "\"%s\" is not a valid alias for the LHS \"%s\"\n", x,psp->lhs->name); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case LHS_ALIAS_2: if (x[0] == ')' ) { psp->state = LHS_ALIAS_3; } else{ ErrorMsg(psp->filename,psp->tokenlineno, "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case LHS_ALIAS_3: if (x[0] == ':' && x[1] == ':' && x[2] == '=' ) { psp->state = IN_RHS; } else{ ErrorMsg(psp->filename,psp->tokenlineno, "Missing \"->\" following: \"%s(%s)\".", psp->lhs->name,psp->lhsalias); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case IN_RHS: if (x[0] == '.' ) { struct rule *rp; rp = (struct rule *)calloc( sizeof(struct rule) + sizeof(struct symbol *)*psp->nrhs + sizeof(char *)*psp->nrhs, 1); if (rp == 0) { ErrorMsg(psp->filename,psp->tokenlineno, "Can't allocate enough memory for this rule."); psp->errorcnt++; psp->prevrule = 0; } else{ int i; rp->ruleline = psp->tokenlineno; rp->rhs = (struct symbol **)&rp[1]; rp->rhsalias = (const char **)&(rp->rhs[psp->nrhs]); for(i = 0 ; i < psp->nrhs; i++) { rp->rhs[i] = psp->rhs[i]; rp->rhsalias[i] = psp->alias[i]; } rp->lhs = psp->lhs; rp->lhsalias = psp->lhsalias; rp->nrhs = psp->nrhs; rp->code = 0; rp->precsym = 0; rp->index = psp->gp->nrule++; rp->nextlhs = rp->lhs->rule; rp->lhs->rule = rp; rp->next = 0; if (psp->firstrule == 0) { psp->firstrule = psp->lastrule = rp; } else{ psp->lastrule->next = rp; psp->lastrule = rp; } psp->prevrule = rp; } psp->state = WAITING_FOR_DECL_OR_RULE; } else if (ISALPHA(x[0]) ) { if (psp->nrhs>=MAXRHS) { ErrorMsg(psp->filename,psp->tokenlineno, "Too many symbols on RHS of rule beginning at \"%s\".", x); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } else{ psp->rhs[psp->nrhs] = Symbol_new(x); psp->alias[psp->nrhs] = 0; psp->nrhs++; } } else if ((x[0] == '|' || x[0] == '/') && psp->nrhs>0) { struct symbol *msp = psp->rhs[psp->nrhs-1]; if (msp->type!=MULTITERMINAL) { struct symbol *origsp = msp; msp = (struct symbol *) calloc(1,sizeof(*msp)); memset(msp, 0, sizeof(*msp)); msp->type = MULTITERMINAL; msp->nsubsym = 1; msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol *)); msp->subsym[0] = origsp; msp->name = origsp->name; psp->rhs[psp->nrhs-1] = msp; } msp->nsubsym++; msp->subsym = (struct symbol **) realloc(msp->subsym, sizeof(struct symbol *)*msp->nsubsym); msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); if (!ISUPPER(x[1])) { ErrorMsg(psp->filename,psp->tokenlineno, "Compound should be TERM|TERM without spaces"); psp->errorcnt++; } if (ISLOWER(x[1]) || ISLOWER(msp->subsym[0]->name[0]) ) { ErrorMsg(psp->filename,psp->tokenlineno, "Cannot form a compound containing a non-terminal"); psp->errorcnt++; } } else if (x[0] == '(' && psp->nrhs>0) { psp->state = RHS_ALIAS_1; } else{ ErrorMsg(psp->filename,psp->tokenlineno, "Illegal character on RHS of rule: \"%s\".",x); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case RHS_ALIAS_1: if (ISALPHA(x[0]) ) { psp->alias[psp->nrhs-1] = x; psp->state = RHS_ALIAS_2; } else{ ErrorMsg(psp->filename,psp->tokenlineno, "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", x,psp->rhs[psp->nrhs-1]->name); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case RHS_ALIAS_2: if (x[0] == ')' ) { psp->state = IN_RHS; } else{ ErrorMsg(psp->filename,psp->tokenlineno, "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case WAITING_FOR_DECL_KEYWORD: if (ISALPHA(x[0]) ) { psp->declkeyword = x; psp->declargslot = 0; psp->decllinenoslot = 0; psp->insertLineMacro = 1; psp->state = WAITING_FOR_DECL_ARG; if (strcmp(x,"name") == 0) { psp->declargslot = &(psp->gp->name); psp->insertLineMacro = 0; } else if (strcmp(x,"include") == 0) { psp->declargslot = &(psp->gp->include); } else if (strcmp(x,"code") == 0) { psp->declargslot = &(psp->gp->extracode); } else if (strcmp(x,"token_destructor") == 0) { psp->declargslot = &psp->gp->tokendest; } else if (strcmp(x,"default_destructor") == 0) { psp->declargslot = &psp->gp->vardest; } else if (strcmp(x,"token_prefix") == 0) { psp->declargslot = &psp->gp->tokenprefix; psp->insertLineMacro = 0; } else if (strcmp(x,"syntax_error") == 0) { psp->declargslot = &(psp->gp->error); } else if (strcmp(x,"parse_accept") == 0) { psp->declargslot = &(psp->gp->accept); } else if (strcmp(x,"parse_failure") == 0) { psp->declargslot = &(psp->gp->failure); } else if (strcmp(x,"stack_overflow") == 0) { psp->declargslot = &(psp->gp->overflow); } else if (strcmp(x,"extra_argument") == 0) { psp->declargslot = &(psp->gp->arg); psp->insertLineMacro = 0; } else if (strcmp(x,"token_type") == 0) { psp->declargslot = &(psp->gp->tokentype); psp->insertLineMacro = 0; } else if (strcmp(x,"default_type") == 0) { psp->declargslot = &(psp->gp->vartype); psp->insertLineMacro = 0; } else if (strcmp(x,"stack_size") == 0) { psp->declargslot = &(psp->gp->stacksize); psp->insertLineMacro = 0; } else if (strcmp(x,"start_symbol") == 0) { psp->declargslot = &(psp->gp->start); psp->insertLineMacro = 0; } else if (strcmp(x,"left") == 0) { psp->preccounter++; psp->declassoc = LEFT; psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; } else if (strcmp(x,"right") == 0) { psp->preccounter++; psp->declassoc = RIGHT; psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; } else if (strcmp(x,"nonassoc") == 0) { psp->preccounter++; psp->declassoc = NONE; psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; } else if (strcmp(x,"destructor") == 0) { psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; } else if (strcmp(x,"type") == 0) { psp->state = WAITING_FOR_DATATYPE_SYMBOL; } else if (strcmp(x,"fallback") == 0) { psp->fallback = 0; psp->state = WAITING_FOR_FALLBACK_ID; } else if (strcmp(x,"wildcard") == 0) { psp->state = WAITING_FOR_WILDCARD_ID; } else if (strcmp(x,"token_class") == 0) { psp->state = WAITING_FOR_CLASS_ID; } else{ ErrorMsg(psp->filename,psp->tokenlineno, "Unknown declaration keyword: \"%%%s\".",x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } } else{ ErrorMsg(psp->filename,psp->tokenlineno, "Illegal declaration keyword: \"%s\".",x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } break; case WAITING_FOR_DESTRUCTOR_SYMBOL: if (!ISALPHA(x[0]) ) { ErrorMsg(psp->filename,psp->tokenlineno, "Symbol name missing after %%destructor keyword"); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } else{ struct symbol *sp = Symbol_new(x); psp->declargslot = &sp->destructor; psp->decllinenoslot = &sp->destLineno; psp->insertLineMacro = 1; psp->state = WAITING_FOR_DECL_ARG; } break; case WAITING_FOR_DATATYPE_SYMBOL: if (!ISALPHA(x[0]) ) { ErrorMsg(psp->filename,psp->tokenlineno, "Symbol name missing after %%type keyword"); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } else{ struct symbol *sp = Symbol_find(x); if ((sp) && (sp->datatype)) { ErrorMsg(psp->filename,psp->tokenlineno, "Symbol %%type \"%s\" already defined", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } else{ if (!sp) { sp = Symbol_new(x); } psp->declargslot = &sp->datatype; psp->insertLineMacro = 0; psp->state = WAITING_FOR_DECL_ARG; } } break; case WAITING_FOR_PRECEDENCE_SYMBOL: if (x[0] == '.' ) { psp->state = WAITING_FOR_DECL_OR_RULE; } else if (ISUPPER(x[0]) ) { struct symbol *sp; sp = Symbol_new(x); if (sp->prec>=0) { ErrorMsg(psp->filename,psp->tokenlineno, "Symbol \"%s\" has already be given a precedence.",x); psp->errorcnt++; } else{ sp->prec = psp->preccounter; sp->assoc = psp->declassoc; } } else{ ErrorMsg(psp->filename,psp->tokenlineno, "Can't assign a precedence to \"%s\".",x); psp->errorcnt++; } break; case WAITING_FOR_DECL_ARG: if (x[0] == '{' || x[0] == '\"' || ISALNUM(x[0]) ) { const char *zOld, *zNew; char *zBuf, *z; int nOld, n, nLine = 0, nNew, nBack; int addLineMacro; char zLine[50]; zNew = x; if (zNew[0] == '"' || zNew[0] == '{' ) zNew++; nNew = lemonStrlen(zNew); if (*psp->declargslot) { zOld = *psp->declargslot; } else{ zOld = ""; } nOld = lemonStrlen(zOld); n = nOld + nNew + 20; addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro && (psp->decllinenoslot == 0 || psp->decllinenoslot[0]!=0); if (addLineMacro) { for(z=psp->filename, nBack=0; *z; z++) { if (*z == '\\' ) nBack++; } lemon_sprintf(zLine, "#line %d ", psp->tokenlineno); nLine = lemonStrlen(zLine); n += nLine + lemonStrlen(psp->filename) + nBack; } *psp->declargslot = (char *) realloc(*psp->declargslot, n); zBuf = *psp->declargslot + nOld; if (addLineMacro) { if (nOld && zBuf[-1]!='\n' ) { *(zBuf++) = '\n'; } memcpy(zBuf, zLine, nLine); zBuf += nLine; *(zBuf++) = '"'; for(z=psp->filename; *z; z++) { if (*z == '\\' ) { *(zBuf++) = '\\'; } *(zBuf++) = *z; } *(zBuf++) = '"'; *(zBuf++) = '\n'; } if (psp->decllinenoslot && psp->decllinenoslot[0] == 0) { psp->decllinenoslot[0] = psp->tokenlineno; } memcpy(zBuf, zNew, nNew); zBuf += nNew; *zBuf = 0; psp->state = WAITING_FOR_DECL_OR_RULE; } else{ ErrorMsg(psp->filename,psp->tokenlineno, "Illegal argument to %%%s: %s",psp->declkeyword,x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } break; case WAITING_FOR_FALLBACK_ID: if (x[0] == '.' ) { psp->state = WAITING_FOR_DECL_OR_RULE; } else if (!ISUPPER(x[0]) ) { ErrorMsg(psp->filename, psp->tokenlineno, "%%fallback argument \"%s\" should be a token", x); psp->errorcnt++; } else{ struct symbol *sp = Symbol_new(x); if (psp->fallback == 0) { psp->fallback = sp; } else if (sp->fallback) { ErrorMsg(psp->filename, psp->tokenlineno, "More than one fallback assigned to token %s", x); psp->errorcnt++; } else{ sp->fallback = psp->fallback; psp->gp->has_fallback = 1; } } break; case WAITING_FOR_WILDCARD_ID: if (x[0] == '.' ) { psp->state = WAITING_FOR_DECL_OR_RULE; } else if (!ISUPPER(x[0]) ) { ErrorMsg(psp->filename, psp->tokenlineno, "%%wildcard argument \"%s\" should be a token", x); psp->errorcnt++; } else{ struct symbol *sp = Symbol_new(x); if (psp->gp->wildcard == 0) { psp->gp->wildcard = sp; } else{ ErrorMsg(psp->filename, psp->tokenlineno, "Extra wildcard to token: %s", x); psp->errorcnt++; } } break; case WAITING_FOR_CLASS_ID: if (!ISLOWER(x[0]) ) { ErrorMsg(psp->filename, psp->tokenlineno, "%%token_class must be followed by an identifier: ", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } else if (Symbol_find(x) ) { ErrorMsg(psp->filename, psp->tokenlineno, "Symbol \"%s\" already used", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } else{ psp->tkclass = Symbol_new(x); psp->tkclass->type = MULTITERMINAL; psp->state = WAITING_FOR_CLASS_TOKEN; } break; case WAITING_FOR_CLASS_TOKEN: if (x[0] == '.' ) { psp->state = WAITING_FOR_DECL_OR_RULE; } else if (ISUPPER(x[0]) || ((x[0] == '|' || x[0] == '/') && ISUPPER(x[1])) ) { struct symbol *msp = psp->tkclass; msp->nsubsym++; msp->subsym = (struct symbol **) realloc(msp->subsym, sizeof(struct symbol *)*msp->nsubsym); if (!ISUPPER(x[0]) ) x++; msp->subsym[msp->nsubsym-1] = Symbol_new(x); } else{ ErrorMsg(psp->filename, psp->tokenlineno, "%%token_class argument \"%s\" should be a token", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } break; case RESYNC_AFTER_RULE_ERROR: /* if (x[0] == '.' ) psp->state = WAITING_FOR_DECL_OR_RULE; ** break; */ case RESYNC_AFTER_DECL_ERROR: if (x[0] == '.' ) psp->state = WAITING_FOR_DECL_OR_RULE; if (x[0] == '%' ) psp->state = WAITING_FOR_DECL_KEYWORD; break; } } /* Run the preprocessor over the input file text. The global variables ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined ** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and ** comments them out. Text in between is also commented out as appropriate. */ static void preprocess_input(char *z) { int i, j, k, n; int exclude = 0; int start = 0; int lineno = 1; int start_lineno = 1; for(i = 0 ; z[i]; i++) { if (z[i] == '\n' ) lineno++; if (z[i]!='%' || (i > 0 && z[i-1]!='\n') ) continue; if (strncmp(&z[i],"%endif",6) == 0 && ISSPACE(z[i+6]) ) { if (exclude) { exclude--; if (exclude == 0) { for(j=start; jfilename; ps.errorcnt = 0; ps.state = INITIALIZE; /* Begin by reading the input file */ fp = fopen(ps.filename,"rb"); if (fp == 0) { ErrorMsg(ps.filename,0,"Can't open this file for reading."); gp->errorcnt++; return; } fseek(fp,0,2); filesize = ftell(fp); rewind(fp); filebuf = (char *)malloc( filesize+1); if (filesize>100000000 || filebuf == 0) { ErrorMsg(ps.filename,0,"Input file too large."); free(filebuf); gp->errorcnt++; fclose(fp); return; } if (fread(filebuf,1,filesize,fp)!=filesize) { ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", filesize); free(filebuf); gp->errorcnt++; fclose(fp); return; } fclose(fp); filebuf[filesize] = 0; /* Make an initial pass through the file to handle %ifdef and %ifndef */ preprocess_input(filebuf); /* Now scan the text of the input file */ lineno = 1; for(cp=filebuf; (c= *cp)!=0; ) { if (c == '\n' ) lineno++; /* Keep track of the line number */ if (ISSPACE(c) ) { cp++; continue; } /* Skip all white space */ if (c == '/' && cp[1] == '/' ) { /* Skip C++ style comments */ cp+=2; while((c= *cp)!=0 && c!='\n' ) cp++; continue; } if (c == '/' && cp[1] == '*' ) { /* Skip C style comments */ cp+=2; while((c= *cp)!=0 && (c!='/' || cp[-1]!='*') ) { if (c == '\n' ) lineno++; cp++; } if (c) cp++; continue; } ps.tokenstart = cp; /* Mark the beginning of the token */ ps.tokenlineno = lineno; /* Linenumber on which token begins */ if (c == '\"' ) { /* String literals */ cp++; while((c= *cp)!=0 && c!='\"' ) { if (c == '\n' ) lineno++; cp++; } if (c == 0) { ErrorMsg(ps.filename,startline, "String starting on this line is not terminated before the end of the file."); ps.errorcnt++; nextcp = cp; } else{ nextcp = cp+1; } } else if (c == '{' ) { /* A block of C code */ int level; cp++; for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++) { if (c == '\n' ) lineno++; else if (c == '{' ) level++; else if (c == '}' ) level--; else if (c == '/' && cp[1] == '*' ) { /* Skip comments */ int prevc; cp = &cp[2]; prevc = 0; while((c= *cp)!=0 && (c!='/' || prevc!='*') ) { if (c == '\n' ) lineno++; prevc = c; cp++; } } else if (c == '/' && cp[1] == '/' ) { /* Skip C++ style comments too */ cp = &cp[2]; while((c= *cp)!=0 && c!='\n' ) cp++; if (c) lineno++; } else if (c == '\'' || c == '\"' ) { /* String a character literals */ int startchar, prevc; startchar = c; prevc = 0; for(cp++; (c= *cp)!=0 && (c!=startchar || prevc == '\\'); cp++) { if (c == '\n' ) lineno++; if (prevc == '\\' ) prevc = 0; else prevc = c; } } } if (c == 0) { ErrorMsg(ps.filename,ps.tokenlineno, "C code starting on this line is not terminated before the end of the file."); ps.errorcnt++; nextcp = cp; } else{ nextcp = cp+1; } } else if (ISALNUM(c) ) { /* Identifiers */ while((c= *cp)!=0 && (ISALNUM(c) || c == '_') ) cp++; nextcp = cp; } else if (c == ':' && cp[1] == ':' && cp[2] == '=' ) { /* The operator "::=" */ cp += 3; nextcp = cp; } else if ((c == '/' || c == '|') && ISALPHA(cp[1]) ) { cp += 2; while((c = *cp)!=0 && (ISALNUM(c) || c == '_') ) cp++; nextcp = cp; } else{ /* All other (one character) operators */ cp++; nextcp = cp; } c = *cp; *cp = 0; /* Null terminate the token */ parseonetoken(&ps); /* Parse the token */ *cp = (char)c; /* Restore the buffer */ cp = nextcp; } free(filebuf); /* Release the buffer after parsing */ gp->rule = ps.firstrule; gp->errorcnt = ps.errorcnt; } /*************************** From the file "plink.c" *********************/ /* ** Routines processing configuration follow-set propagation links ** in the LEMON parser generator. */ static struct plink *plink_freelist = 0; /* Allocate a new plink */ struct plink *Plink_new() { struct plink *newlink; if (plink_freelist == 0) { int i; int amt = 100; plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); if (plink_freelist == 0) { fprintf(stderr, "Unable to allocate memory for a new follow-set propagation link.\n"); exit(1); } for(i = 0 ; i < amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1]; plink_freelist[amt-1].next = 0; } newlink = plink_freelist; plink_freelist = plink_freelist->next; return newlink; } /* Add a plink to a plink list */ void Plink_add(struct plink **plpp, struct config *cfp) { struct plink *newlink; newlink = Plink_new(); newlink->next = *plpp; *plpp = newlink; newlink->cfp = cfp; } /* Transfer every plink on the list "from" to the list "to" */ void Plink_copy(struct plink **to, struct plink *from) { struct plink *nextpl; while(from) { nextpl = from->next; from->next = *to; *to = from; from = nextpl; } } /* Delete every plink on the list */ void Plink_delete(struct plink *plp) { struct plink *nextpl; while(plp) { nextpl = plp->next; plp->next = plink_freelist; plink_freelist = plp; plp = nextpl; } } /*********************** From the file "report.c" **************************/ /* ** Procedures for generating reports and tables in the LEMON parser generator. */ /* Generate a filename with the given suffix. Space to hold the ** name comes from malloc() and must be freed by the calling ** function. */ PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) { char *name; char *cp; name = (char *)malloc( lemonStrlen(lemp->filename) + lemonStrlen(suffix) + 5); if (name == 0) { fprintf(stderr,"Can't allocate space for a filename.\n"); exit(1); } lemon_strcpy(name,lemp->filename); cp = strrchr(name,'.'); if (cp) *cp = 0; lemon_strcat(name,suffix); return name; } /* Open a file with a name based on the name of the input file, ** but with a different (specified) suffix, and return a pointer ** to the stream */ PRIVATE FILE *file_open( struct lemon *lemp, const char *suffix, const char *mode ) { FILE *fp; if (lemp->outname) free(lemp->outname); lemp->outname = file_makename(lemp, suffix); fp = fopen(lemp->outname,mode); if (fp == 0 && *mode == 'w' ) { fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); lemp->errorcnt++; return 0; } return fp; } /* Duplicate the input file without comments and without actions ** on rules */ void Reprint(struct lemon *lemp) { struct rule *rp; struct symbol *sp; int i, j, maxlen, len, ncolumns, skip; printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); maxlen = 10; for(i = 0 ; i < lemp->nsymbol; i++) { sp = lemp->symbols[i]; len = lemonStrlen(sp->name); if (len>maxlen) maxlen = len; } ncolumns = 76/(maxlen+5); if (ncolumns<1) ncolumns = 1; skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; for(i = 0 ; i < skip; i++) { printf("//"); for(j=i; jnsymbol; j+=skip) { sp = lemp->symbols[j]; assert( sp->index == j); printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); } printf("\n"); } for(rp=lemp->rule; rp; rp=rp->next) { printf("%s",rp->lhs->name); /* if (rp->lhsalias) printf("(%s)",rp->lhsalias); */ printf(" ::="); for(i = 0 ; i < rp->nrhs; i++) { sp = rp->rhs[i]; if (sp->type == MULTITERMINAL) { printf(" %s", sp->subsym[0]->name); for(j=1; jnsubsym; j++) { printf("|%s", sp->subsym[j]->name); } } else{ printf(" %s", sp->name); } /* if (rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ } printf("."); if (rp->precsym) printf(" [%s]",rp->precsym->name); /* if (rp->code) printf("\n %s",rp->code); */ printf("\n"); } } /* Print a single rule. */ void RulePrint(FILE *fp, struct rule *rp, int iCursor) { struct symbol *sp; int i, j; fprintf(fp,"%s ::=",rp->lhs->name); for(i = 0 ; i <= rp->nrhs; i++) { if (i == iCursor) fprintf(fp," *"); if (i == rp->nrhs) break; sp = rp->rhs[i]; if (sp->type == MULTITERMINAL) { fprintf(fp," %s", sp->subsym[0]->name); for(j=1; jnsubsym; j++) { fprintf(fp,"|%s",sp->subsym[j]->name); } } else{ fprintf(fp," %s", sp->name); } } } /* Print the rule for a configuration. */ void ConfigPrint(FILE *fp, struct config *cfp) { RulePrint(fp, cfp->rp, cfp->dot); } /* #define TEST */ #if 0 /* Print a set */ PRIVATE void SetPrint(out,set,lemp) FILE *out; char *set; struct lemon *lemp; { int i; char *spacer; spacer = ""; fprintf(out,"%12s[",""); for(i = 0 ; i < lemp->nterminal; i++) { if (SetFind(set,i) ) { fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); spacer = " "; } } fprintf(out,"]\n"); } /* Print a plink chain */ PRIVATE void PlinkPrint(out,plp,tag) FILE *out; struct plink *plp; char *tag; { while(plp) { fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); ConfigPrint(out,plp->cfp); fprintf(out,"\n"); plp = plp->next; } } #endif /* Print an action to the given file descriptor. Return FALSE if ** nothing was actually printed. */ int PrintAction( struct action *ap, /* The action to print */ FILE *fp, /* Print the action here */ int indent /* Indent by this amount */ ) { int result = 1; switch(ap->type) { case SHIFT: { struct state *stp = ap->x.stp; fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum); break; } case REDUCE: { struct rule *rp = ap->x.rp; fprintf(fp,"%*s reduce %-7d",indent,ap->sp->name,rp->iRule); RulePrint(fp, rp, -1); break; } case SHIFTREDUCE: { struct rule *rp = ap->x.rp; fprintf(fp,"%*s shift-reduce %-7d",indent,ap->sp->name,rp->iRule); RulePrint(fp, rp, -1); break; } case ACCEPT: fprintf(fp,"%*s accept",indent,ap->sp->name); break; case ERROR: fprintf(fp,"%*s error",indent,ap->sp->name); break; case SRCONFLICT: case RRCONFLICT: fprintf(fp,"%*s reduce %-7d **Parsing conflict **", indent,ap->sp->name,ap->x.rp->iRule); break; case SSCONFLICT: fprintf(fp,"%*s shift %-7d **Parsing conflict **", indent,ap->sp->name,ap->x.stp->statenum); break; case SH_RESOLVED: if (showPrecedenceConflict) { fprintf(fp,"%*s shift %-7d -- dropped by precedence", indent,ap->sp->name,ap->x.stp->statenum); } else{ result = 0; } break; case RD_RESOLVED: if (showPrecedenceConflict) { fprintf(fp,"%*s reduce %-7d -- dropped by precedence", indent,ap->sp->name,ap->x.rp->iRule); } else{ result = 0; } break; case NOT_USED: result = 0; break; } return result; } /* Generate the "*.out" log file */ void ReportOutput(struct lemon *lemp) { int i; struct state *stp; struct config *cfp; struct action *ap; FILE *fp; fp = file_open(lemp,".out","wb"); if (fp == 0) return; for(i = 0 ; i < lemp->nxstate; i++) { stp = lemp->sorted[i]; fprintf(fp,"State %d:\n",stp->statenum); if (lemp->basisflag) cfp=stp->bp; else cfp=stp->cfp; while(cfp) { char buf[20]; if (cfp->dot == cfp->rp->nrhs) { lemon_sprintf(buf,"(%d)",cfp->rp->iRule); fprintf(fp," %5s ",buf); } else{ fprintf(fp," "); } ConfigPrint(fp,cfp); fprintf(fp,"\n"); #if 0 SetPrint(fp,cfp->fws,lemp); PlinkPrint(fp,cfp->fplp,"To "); PlinkPrint(fp,cfp->bplp,"From"); #endif if (lemp->basisflag) cfp=cfp->bp; else cfp=cfp->next; } fprintf(fp,"\n"); for(ap=stp->ap; ap; ap=ap->next) { if (PrintAction(ap,fp,30) ) fprintf(fp,"\n"); } fprintf(fp,"\n"); } fprintf(fp, "----------------------------------------------------\n"); fprintf(fp, "Symbols:\n"); for(i = 0 ; i < lemp->nsymbol; i++) { int j; struct symbol *sp; sp = lemp->symbols[i]; fprintf(fp, " %3d: %s", i, sp->name); if (sp->type == NONTERMINAL) { fprintf(fp, ":"); if (sp->lambda) { fprintf(fp, " "); } for(j=0; jnterminal; j++) { if (sp->firstset && SetFind(sp->firstset, j) ) { fprintf(fp, " %s", lemp->symbols[j]->name); } } } fprintf(fp, "\n"); } fclose(fp); return; } /* Search for the file "name" which is in the same directory as ** the exacutable */ PRIVATE char *pathsearch(char *argv0, char *name, int modemask) { const char *pathlist; char *pathbufptr; char *pathbuf; char *path,*cp; char c; #ifdef __WIN32__ cp = strrchr(argv0,'\\'); #else cp = strrchr(argv0,'/'); #endif if (cp) { c = *cp; *cp = 0; path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2); if (path) lemon_sprintf(path,"%s/%s",argv0,name); *cp = c; } else{ pathlist = getenv("PATH"); if (pathlist == 0) pathlist = ".:/bin:/usr/bin"; pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1); path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2); pathbufptr = pathbuf; if ((pathbuf != 0) && (path!=0) ) { lemon_strcpy(pathbuf, pathlist); while(*pathbuf) { cp = strchr(pathbuf,':'); if (cp == 0) cp = &pathbuf[lemonStrlen(pathbuf)]; c = *cp; *cp = 0; lemon_sprintf(path,"%s/%s",pathbuf,name); *cp = c; if (c == 0) pathbuf[0] = 0; else pathbuf = &cp[1]; if (access(path,modemask) == 0) break; } } if (pathbufptr) { free(pathbufptr); } } return path; } /* Given an action, compute the integer value for that action ** which is to be put in the action table of the generated machine. ** Return negative if no action should be generated. */ PRIVATE int compute_action(struct lemon *lemp, struct action *ap) { int act; switch(ap->type) { case SHIFT: act = ap->x.stp->statenum; break; case SHIFTREDUCE: act = ap->x.rp->iRule + lemp->nstate; break; case REDUCE: act = ap->x.rp->iRule + lemp->nstate+lemp->nrule; break; case ERROR: act = lemp->nstate + lemp->nrule *2; break; case ACCEPT: act = lemp->nstate + lemp->nrule *2 + 1; break; default: act = -1; break; } return act; } #define LINESIZE 1000 /* The next cluster of routines are for reading the template file ** and writing the results to the generated parser */ /* The first function transfers data from "in" to "out" until ** a line is seen which begins with "%%". The line number is ** tracked. ** ** if name!=0, then any word that begin with "Parse" is changed to ** begin with *name instead. */ PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) { int i, iStart; char line[LINESIZE]; while(fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ) { (*lineno)++; iStart = 0; if (name) { for(i = 0 ; line[i]; i++) { if (line[i] == 'P' && strncmp(&line[i],"Parse",5) == 0 && (i == 0 || !ISALPHA(line[i-1])) ) { if (i > iStart) fprintf(out,"%.*s",i-iStart,&line[iStart]); fprintf(out,"%s",name); i += 4; iStart = i+1; } } } fprintf(out,"%s",&line[iStart]); } } /* The next function finds the template file and opens it, returning ** a pointer to the opened file. */ PRIVATE FILE *tplt_open(struct lemon *lemp) { static char templatename[] = "lempar.c"; char buf[1000]; FILE *in; char *tpltname, *tmp = NULL; char *cp; /* first, see if user specified a template filename on the command line. */ if (user_templatename != 0) { if (access(user_templatename,004) == -1) { fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", user_templatename); lemp->errorcnt++; return 0; } in = fopen(user_templatename,"rb"); if (in == 0) { fprintf(stderr,"Can't open the template file \"%s\".\n", user_templatename); lemp->errorcnt++; return 0; } return in; } cp = strrchr(lemp->filename,'.'); if (cp) { lemon_sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); } else{ lemon_sprintf(buf,"%s.lt",lemp->filename); } if (access(buf,004) == 0) { tpltname = buf; } else if (access(templatename,004) == 0) { tpltname = templatename; } else{ tmp = pathsearch(lemp->argv0,templatename,0); tpltname = tmp; } if (tpltname == 0) { fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", templatename); lemp->errorcnt++; return 0; } in = fopen(tpltname,"rb"); if (tmp) { free(tmp); } if (in == 0) { fprintf(stderr,"Can't open the template file \"%s\".\n",templatename); lemp->errorcnt++; return 0; } return in; } /* Print a #line directive line to the output file. */ PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) { fprintf(out,"#line %d \"",lineno); while(*filename) { if (*filename == '\\' ) putc('\\',out); putc(*filename,out); filename++; } fprintf(out,"\"\n"); } /* Print a string to the file and keep the linenumber up to date */ PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) { if (str == 0) return; while(*str) { putc(*str,out); if (*str == '\n' ) (*lineno)++; str++; } if (str[-1]!='\n' ) { putc('\n',out); (*lineno)++; } if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); } return; } /* ** The following routine emits code for the destructor for the ** symbol sp */ void emit_destructor_code( FILE *out, struct symbol *sp, struct lemon *lemp, int *lineno ) { char *cp = 0; if (sp->type == TERMINAL) { cp = lemp->tokendest; if (cp == 0) return; fprintf(out,"{\n"); (*lineno)++; } else if (sp->destructor) { cp = sp->destructor; fprintf(out,"{\n"); (*lineno)++; if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,sp->destLineno,lemp->filename); } } else if (lemp->vardest) { cp = lemp->vardest; if (cp == 0) return; fprintf(out,"{\n"); (*lineno)++; } else{ assert( 0); /* Cannot happen */ } for(; *cp; cp++) { if (*cp == '$' && cp[1] == '$' ) { fprintf(out,"(yypminor->yy%d)",sp->dtnum); cp++; continue; } if (*cp == '\n' ) (*lineno)++; fputc(*cp,out); } fprintf(out,"\n"); (*lineno)++; if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); } fprintf(out,"}\n"); (*lineno)++; return; } /* ** Return TRUE (non-zero) if the given symbol has a destructor. */ int has_destructor(struct symbol *sp, struct lemon *lemp) { int ret; if (sp->type == TERMINAL) { ret = lemp->tokendest!=0; } else{ ret = lemp->vardest!=0 || sp->destructor!=0; } return ret; } /* ** Append text to a dynamically allocated string. If zText is 0 then ** reset the string to be empty again. Always return the complete text ** of the string (which is overwritten with each call). ** ** n bytes of zText are stored. If n == 0 then all of zText up to the first ** \000 terminator is stored. zText can contain up to two instances of ** %d. The values of p1 and p2 are written into the first and second ** %d. ** ** If n == -1, then the previous character is overwritten. */ PRIVATE char *append_str(const char *zText, int n, int p1, int p2) { static char empty[1] = { 0 }; static char *z = 0; static int alloced = 0; static int used = 0; int c; char zInt[40]; if (zText == 0) { if (used == 0 && z!=0) z[0] = 0; used = 0; return z; } if (n<=0) { if (n<0) { used += n; assert( used>=0); } n = lemonStrlen(zText); } if ((int) (n+sizeof(zInt)*2+used) >= alloced) { alloced = n + sizeof(zInt)*2 + used + 200; z = (char *) realloc(z, alloced); } if (z == 0) return empty; while(n-- > 0) { c = *(zText++); if (c == '%' && n>0 && zText[0] == 'd' ) { lemon_sprintf(zInt, "%d", p1); p1 = p2; lemon_strcpy(&z[used], zInt); used += lemonStrlen(&z[used]); zText++; n--; } else{ z[used++] = (char)c; } } z[used] = 0; return z; } /* ** zCode is a string that is the action associated with a rule. Expand ** the symbols in this string so that the refer to elements of the parser ** stack. ** ** Return 1 if the expanded code requires that "yylhsminor" local variable ** to be defined. */ PRIVATE int translate_code(struct lemon *lemp, struct rule *rp) { char *cp, *xp; int i; int rc = 0; /* True if yylhsminor is used */ int dontUseRhs0 = 0; /* If true, use of left-most RHS label is illegal */ const char *zSkip = 0; /* The zOvwrt comment within rp->code, or NULL */ char lhsused = 0; /* True if the LHS element has been used */ char lhsdirect; /* True if LHS writes directly into stack */ char used[MAXRHS]; /* True for each RHS element which is used */ char zLhs[50]; /* Convert the LHS symbol into this string */ char zOvwrt[900]; /* Comment that to allow LHS to overwrite RHS */ for(i = 0 ; i < rp->nrhs; i++) used[i] = 0; lhsused = 0; if (rp->code == 0) { static char newlinestr[2] = { '\n', '\0' }; rp->code = newlinestr; rp->line = rp->ruleline; } if (rp->nrhs == 0) { /* If there are no RHS symbols, then writing directly to the LHS is ok */ lhsdirect = 1; } else if (rp->rhsalias[0] == 0) { /* The left-most RHS symbol has no value. LHS direct is ok. But ** we have to call the distructor on the RHS symbol first. */ lhsdirect = 1; if (has_destructor(rp->rhs[0],lemp) ) { append_str(0,0,0,0); append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, rp->rhs[0]->index,1-rp->nrhs); rp->codePrefix = Strsafe(append_str(0,0,0,0)); } } else if (rp->lhsalias == 0) { /* There is no LHS value symbol. */ lhsdirect = 1; } else if (strcmp(rp->lhsalias,rp->rhsalias[0]) == 0) { /* The LHS symbol and the left-most RHS symbol are the same, so ** direct writing is allowed */ lhsdirect = 1; lhsused = 1; used[0] = 1; if (rp->lhs->dtnum!=rp->rhs[0]->dtnum) { ErrorMsg(lemp->filename,rp->ruleline, "%s(%s) and %s(%s) share the same label but have " "different datatypes.", rp->lhs->name, rp->lhsalias, rp->rhs[0]->name, rp->rhsalias[0]); lemp->errorcnt++; } } else{ lemon_sprintf(zOvwrt, "/*%s-overwrites-%s */", rp->lhsalias, rp->rhsalias[0]); zSkip = strstr(rp->code, zOvwrt); if (zSkip!=0) { /* The code contains a special comment that indicates that it is safe ** for the LHS label to overwrite left-most RHS label. */ lhsdirect = 1; } else{ lhsdirect = 0; } } if (lhsdirect) { sprintf(zLhs, "yymsp[%d].minor.yy%d",1-rp->nrhs,rp->lhs->dtnum); } else{ rc = 1; sprintf(zLhs, "yylhsminor.yy%d",rp->lhs->dtnum); } append_str(0,0,0,0); /* This const cast is wrong but harmless, if we're careful. */ for(cp=(char *)rp->code; *cp; cp++) { if (cp == zSkip) { append_str(zOvwrt,0,0,0); cp += lemonStrlen(zOvwrt)-1; dontUseRhs0 = 1; continue; } if (ISALPHA(*cp) && (cp == rp->code || (!ISALNUM(cp[-1]) && cp[-1]!='_')) ) { char saved; for(xp= &cp[1]; ISALNUM(*xp) || *xp == '_'; xp++); saved = *xp; *xp = 0; if (rp->lhsalias && strcmp(cp,rp->lhsalias) == 0) { append_str(zLhs,0,0,0); cp = xp; lhsused = 1; } else{ for(i = 0 ; i < rp->nrhs; i++) { if (rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i]) == 0) { if (i == 0 && dontUseRhs0) { ErrorMsg(lemp->filename,rp->ruleline, "Label %s used after '%s'.", rp->rhsalias[0], zOvwrt); lemp->errorcnt++; } else if (cp!=rp->code && cp[-1] == '@' ) { /* If the argument is of the form @X then substituted ** the token number of X, not the value of X */ append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0); } else{ struct symbol *sp = rp->rhs[i]; int dtnum; if (sp->type == MULTITERMINAL) { dtnum = sp->subsym[0]->dtnum; } else{ dtnum = sp->dtnum; } append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum); } cp = xp; used[i] = 1; break; } } } *xp = saved; } append_str(cp, 1, 0, 0); } /* End loop */ /* Main code generation completed */ cp = append_str(0,0,0,0); if (cp && cp[0] ) rp->code = Strsafe(cp); append_str(0,0,0,0); /* Check to make sure the LHS has been used */ if (rp->lhsalias && !lhsused) { ErrorMsg(lemp->filename,rp->ruleline, "Label \"%s\" for \"%s(%s)\" is never used.", rp->lhsalias,rp->lhs->name,rp->lhsalias); lemp->errorcnt++; } /* Generate destructor code for RHS minor values which are not referenced. ** Generate error messages for unused labels and duplicate labels. */ for(i = 0 ; i < rp->nrhs; i++) { if (rp->rhsalias[i] ) { if (i > 0) { int j; if (rp->lhsalias && strcmp(rp->lhsalias,rp->rhsalias[i]) == 0) { ErrorMsg(lemp->filename,rp->ruleline, "%s(%s) has the same label as the LHS but is not the left-most " "symbol on the RHS.", rp->rhs[i]->name, rp->rhsalias); lemp->errorcnt++; } for(j=0; jrhsalias[j] && strcmp(rp->rhsalias[j],rp->rhsalias[i]) == 0) { ErrorMsg(lemp->filename,rp->ruleline, "Label %s used for multiple symbols on the RHS of a rule.", rp->rhsalias[i]); lemp->errorcnt++; break; } } } if (!used[i] ) { ErrorMsg(lemp->filename,rp->ruleline, "Label %s for \"%s(%s)\" is never used.", rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); lemp->errorcnt++; } } else if (i > 0 && has_destructor(rp->rhs[i],lemp) ) { append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, rp->rhs[i]->index,i-rp->nrhs+1); } } /* If unable to write LHS values directly into the stack, write the ** saved LHS value now. */ if (lhsdirect == 0) { append_str(" yymsp[%d].minor.yy%d = ", 0, 1-rp->nrhs, rp->lhs->dtnum); append_str(zLhs, 0, 0, 0); append_str(";\n", 0, 0, 0); } /* Suffix code generation complete */ cp = append_str(0,0,0,0); if (cp && cp[0] ) rp->codeSuffix = Strsafe(cp); return rc; } /* ** Generate code which executes when the rule "rp" is reduced. Write ** the code to "out". Make sure lineno stays up-to-date. */ PRIVATE void emit_code( FILE *out, struct rule *rp, struct lemon *lemp, int *lineno ) { const char *cp; /* Setup code prior to the #line directive */ if (rp->codePrefix && rp->codePrefix[0] ) { fprintf(out, "{%s", rp->codePrefix); for(cp=rp->codePrefix; *cp; cp++) { if (*cp == '\n' ) (*lineno)++; } } /* Generate code to do the reduce action */ if (rp->code) { if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); } fprintf(out,"{%s",rp->code); for(cp=rp->code; *cp; cp++) { if (*cp == '\n' ) (*lineno)++; } fprintf(out,"}\n"); (*lineno)++; if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); } } /* Generate breakdown code that occurs after the #line directive */ if (rp->codeSuffix && rp->codeSuffix[0] ) { fprintf(out, "%s", rp->codeSuffix); for(cp=rp->codeSuffix; *cp; cp++) { if (*cp == '\n' ) (*lineno)++; } } if (rp->codePrefix) { fprintf(out, "}\n"); (*lineno)++; } return; } /* ** Print the definition of the union used for the parser's data stack. ** This union contains fields for every possible data type for tokens ** and nonterminals. In the process of computing and printing this ** union, also set the ".dtnum" field of every terminal and nonterminal ** symbol. */ void print_stack_union( FILE *out, /* The output stream */ struct lemon *lemp, /* The main info structure for this parser */ int *plineno, /* Pointer to the line number */ int mhflag /* True if generating makeheaders output */ ) { int lineno = *plineno; /* The line number of the output */ char **types; /* A hash table of datatypes */ int arraysize; /* Size of the "types" array */ int maxdtlength; /* Maximum length of any ".datatype" field. */ char *stddt; /* Standardized name for a datatype */ int i,j; /* Loop counters */ unsigned hash; /* For hashing the name of a type */ const char *name; /* Name of the parser */ /* Allocate and initialize types[] and allocate stddt[] */ arraysize = lemp->nsymbol *2; types = (char **)calloc( arraysize, sizeof(char *) ); if (types == 0) { fprintf(stderr,"Out of memory.\n"); exit(1); } for(i = 0 ; i < arraysize; i++) types[i] = 0; maxdtlength = 0; if (lemp->vartype) { maxdtlength = lemonStrlen(lemp->vartype); } for(i = 0 ; i < lemp->nsymbol; i++) { int len; struct symbol *sp = lemp->symbols[i]; if (sp->datatype == 0) continue; len = lemonStrlen(sp->datatype); if (len>maxdtlength) maxdtlength = len; } stddt = (char *)malloc( maxdtlength *2 + 1); if (stddt == 0) { fprintf(stderr,"Out of memory.\n"); exit(1); } /* Build a hash table of datatypes. The ".dtnum" field of each symbol ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is ** used for terminal symbols. If there is no %default_type defined then ** 0 is also used as the .dtnum value for nonterminals which do not specify ** a datatype using the %type directive. */ for(i = 0 ; i < lemp->nsymbol; i++) { struct symbol *sp = lemp->symbols[i]; char *cp; if (sp == lemp->errsym) { sp->dtnum = arraysize+1; continue; } if (sp->type!=NONTERMINAL || (sp->datatype == 0 && lemp->vartype == 0) ) { sp->dtnum = 0; continue; } cp = sp->datatype; if (cp == 0) cp = lemp->vartype; j = 0; while(ISSPACE(*cp) ) cp++; while(*cp) stddt[j++] = *cp++; while(j>0 && ISSPACE(stddt[j-1]) ) j--; stddt[j] = 0; if (lemp->tokentype && strcmp(stddt, lemp->tokentype) == 0) { sp->dtnum = 0; continue; } hash = 0; for(j=0; stddt[j]; j++) { hash = hash *53 + stddt[j]; } hash = (hash & 0x7fffffff)%arraysize; while(types[hash] ) { if (strcmp(types[hash],stddt) == 0) { sp->dtnum = hash + 1; break; } hash++; if (hash>=(unsigned)arraysize) hash = 0; } if (types[hash] == 0) { sp->dtnum = hash + 1; types[hash] = (char *)malloc( lemonStrlen(stddt)+1); if (types[hash] == 0) { fprintf(stderr,"Out of memory.\n"); exit(1); } lemon_strcpy(types[hash],stddt); } } /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ name = lemp->name ? lemp->name : "Parse"; lineno = *plineno; if (mhflag) { fprintf(out,"#if INTERFACE\n"); lineno++; } fprintf(out,"#define %sTOKENTYPE %s\n",name, lemp->tokentype?lemp->tokentype:"void *"); lineno++; if (mhflag) { fprintf(out,"#endif\n"); lineno++; } fprintf(out,"typedef union {\n"); lineno++; fprintf(out," int yyinit;\n"); lineno++; fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; for(i = 0 ; i < arraysize; i++) { if (types[i] == 0) continue; fprintf(out," %s yy%d;\n",types[i],i+1); lineno++; free(types[i]); } if (lemp->errsym->useCnt) { fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; } free(stddt); free(types); fprintf(out,"} YYMINORTYPE;\n"); lineno++; *plineno = lineno; } /* ** Return the name of a C datatype able to represent values between ** lwr and upr, inclusive. If pnByte!=NULL then also write the sizeof ** for that type (1, 2, or 4) into *pnByte. */ static const char *minimum_size_type(int lwr, int upr, int *pnByte) { const char *zType = "int"; int nByte = 4; if (lwr>=0) { if (upr<=255) { zType = "unsigned char"; nByte = 1; } else if (upr<65535) { zType = "unsigned short int"; nByte = 2; } else{ zType = "unsigned int"; nByte = 4; } } else if (lwr>=-127 && upr<=127) { zType = "signed char"; nByte = 1; } else if (lwr>=-32767 && upr<32767) { zType = "short"; nByte = 2; } if (pnByte) *pnByte = nByte; return zType; } /* ** Each state contains a set of token transaction and a set of ** nonterminal transactions. Each of these sets makes an instance ** of the following structure. An array of these structures is used ** to order the creation of entries in the yy_action[] table. */ struct axset { struct state *stp; /* A pointer to a state */ int isTkn; /* True to use tokens. False for non-terminals */ int nAction; /* Number of actions */ int iOrder; /* Original order of action sets */ }; /* ** Compare to axset structures for sorting purposes */ static int axset_compare(const void *a, const void *b) { struct axset *p1 = (struct axset *)a; struct axset *p2 = (struct axset *)b; int c; c = p2->nAction - p1->nAction; if (c == 0) { c = p1->iOrder - p2->iOrder; } assert( c!=0 || p1==p2); return c; } /* ** Write text on "out" that describes the rule "rp". */ static void writeRuleText(FILE *out, struct rule *rp) { int j; fprintf(out,"%s ::=", rp->lhs->name); for(j=0; jnrhs; j++) { struct symbol *sp = rp->rhs[j]; if (sp->type!=MULTITERMINAL) { fprintf(out," %s", sp->name); } else{ int k; fprintf(out," %s", sp->subsym[0]->name); for(k=1; knsubsym; k++) { fprintf(out,"|%s",sp->subsym[k]->name); } } } } static FILE *source_open(struct lemon *lemp) { if (user_output_file) { if (lemp->outname) free(lemp->outname); lemp->outname = strdup(user_output_file); FILE *fp = fopen(lemp->outname, "wb"); if (fp == 0) { fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); lemp->errorcnt++; return 0; } return fp; } else { return file_open(lemp,".c","wb"); } } /* Generate C source code for the parser */ void ReportTable( struct lemon *lemp, int mhflag /* Output in makeheaders format if true */ ) { FILE *out, *in; char line[LINESIZE]; int lineno; struct state *stp; struct action *ap; struct rule *rp; struct acttab *pActtab; int i, j, n, sz; int szActionType; /* sizeof(YYACTIONTYPE) */ int szCodeType; /* sizeof(YYCODETYPE) */ const char *name; int mnTknOfst, mxTknOfst; int mnNtOfst, mxNtOfst; struct axset *ax; in = tplt_open(lemp); if (in == 0) return; out = source_open(lemp); if (out == 0) { fclose(in); return; } lineno = 1; tplt_xfer(lemp->name,in,out,&lineno); /* Generate the include code, if any */ tplt_print(out,lemp,lemp->include,&lineno); if (mhflag) { char *incName = file_makename(lemp, ".h"); fprintf(out,"#include \"%s\"\n", incName); lineno++; free(incName); } tplt_xfer(lemp->name,in,out,&lineno); /* Generate #defines for all tokens */ if (mhflag) { const char *prefix; fprintf(out,"#if INTERFACE\n"); lineno++; if (lemp->tokenprefix) prefix = lemp->tokenprefix; else prefix = ""; for(i = 1; i < lemp->nterminal; i++) { fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); lineno++; } fprintf(out,"#endif\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate the defines */ fprintf(out,"#define YYCODETYPE %s\n", minimum_size_type(0, lemp->nsymbol+1, &szCodeType)); lineno++; fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; fprintf(out,"#define YYACTIONTYPE %s\n", minimum_size_type(0,lemp->nstate+lemp->nrule *2+5,&szActionType)); lineno++; if (lemp->wildcard) { fprintf(out,"#define YYWILDCARD %d\n", lemp->wildcard->index); lineno++; } print_stack_union(out,lemp,&lineno,mhflag); fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; if (lemp->stacksize) { fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++; } else{ fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++; } fprintf(out, "#endif\n"); lineno++; if (mhflag) { fprintf(out,"#if INTERFACE\n"); lineno++; } name = lemp->name ? lemp->name : "Parse"; if (lemp->arg && lemp->arg[0] ) { i = lemonStrlen(lemp->arg); while(i >= 1 && ISSPACE(lemp->arg[i-1]) ) i--; while(i >= 1 && (ISALNUM(lemp->arg[i-1]) || lemp->arg[i-1] == '_') ) i--; fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", name,lemp->arg,&lemp->arg[i]); lineno++; fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", name,&lemp->arg[i],&lemp->arg[i]); lineno++; } else{ fprintf(out,"#define %sARG_SDECL\n",name); lineno++; fprintf(out,"#define %sARG_PDECL\n",name); lineno++; fprintf(out,"#define %sARG_FETCH\n",name); lineno++; fprintf(out,"#define %sARG_STORE\n",name); lineno++; } if (mhflag) { fprintf(out,"#endif\n"); lineno++; } if (lemp->errsym->useCnt) { fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; } if (lemp->has_fallback) { fprintf(out,"#define YYFALLBACK 1\n"); lineno++; } /* Compute the action table, but do not output it yet. The action ** table must be computed before generating the YYNSTATE macro because ** we need to know how many states can be eliminated. */ ax = (struct axset *) calloc(lemp->nxstate *2, sizeof(ax[0])); if (ax == 0) { fprintf(stderr,"malloc failed\n"); exit(1); } for(i = 0 ; i < lemp->nxstate; i++) { stp = lemp->sorted[i]; ax[i *2].stp = stp; ax[i *2].isTkn = 1; ax[i *2].nAction = stp->nTknAct; ax[i *2+1].stp = stp; ax[i *2+1].isTkn = 0; ax[i *2+1].nAction = stp->nNtAct; } mxTknOfst = mnTknOfst = 0; mxNtOfst = mnNtOfst = 0; /* In an effort to minimize the action table size, use the heuristic ** of placing the largest action sets first */ for(i = 0 ; i < lemp->nxstate *2; i++) ax[i].iOrder = i; qsort(ax, lemp->nxstate *2, sizeof(ax[0]), axset_compare); pActtab = acttab_alloc(); for(i = 0 ; i < lemp->nxstate *2 && ax[i].nAction>0; i++) { stp = ax[i].stp; if (ax[i].isTkn) { for(ap=stp->ap; ap; ap=ap->next) { int action; if (ap->sp->index>=lemp->nterminal) continue; action = compute_action(lemp, ap); if (action<0) continue; acttab_action(pActtab, ap->sp->index, action); } stp->iTknOfst = acttab_insert(pActtab); if (stp->iTknOfstiTknOfst; if (stp->iTknOfst>mxTknOfst) mxTknOfst = stp->iTknOfst; } else{ for(ap=stp->ap; ap; ap=ap->next) { int action; if (ap->sp->indexnterminal) continue; if (ap->sp->index == lemp->nsymbol) continue; action = compute_action(lemp, ap); if (action<0) continue; acttab_action(pActtab, ap->sp->index, action); } stp->iNtOfst = acttab_insert(pActtab); if (stp->iNtOfstiNtOfst; if (stp->iNtOfst>mxNtOfst) mxNtOfst = stp->iNtOfst; } #if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ { int jj, nn; for(jj=nn=0; jjnAction; jj++) { if (pActtab->aAction[jj].action<0) nn++; } printf("%4d: State %3d %s n: %2d size: %5d freespace: %d\n", i, stp->statenum, ax[i].isTkn ? "Token" : "Var ", ax[i].nAction, pActtab->nAction, nn); } #endif } free(ax); /* Finish rendering the constants now that the action table has ** been computed */ fprintf(out,"#define YYNSTATE %d\n",lemp->nxstate); lineno++; fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; fprintf(out,"#define YY_MAX_SHIFT %d\n",lemp->nxstate-1); lineno++; fprintf(out,"#define YY_MIN_SHIFTREDUCE %d\n",lemp->nstate); lineno++; i = lemp->nstate + lemp->nrule; fprintf(out,"#define YY_MAX_SHIFTREDUCE %d\n", i-1); lineno++; fprintf(out,"#define YY_MIN_REDUCE %d\n", i); lineno++; i = lemp->nstate + lemp->nrule *2; fprintf(out,"#define YY_MAX_REDUCE %d\n", i-1); lineno++; fprintf(out,"#define YY_ERROR_ACTION %d\n", i); lineno++; fprintf(out,"#define YY_ACCEPT_ACTION %d\n", i+1); lineno++; fprintf(out,"#define YY_NO_ACTION %d\n", i+2); lineno++; tplt_xfer(lemp->name,in,out,&lineno); /* Now output the action table and its associates: ** ** yy_action[] A single table containing all actions. ** yy_lookahead[] A table containing the lookahead for each entry in ** yy_action. Used to detect hash collisions. ** yy_shift_ofst[] For each state, the offset into yy_action for ** shifting terminals. ** yy_reduce_ofst[] For each state, the offset into yy_action for ** shifting non-terminals after a reduce. ** yy_default[] Default action for each state. */ /* Output the yy_action table */ lemp->nactiontab = n = acttab_size(pActtab); lemp->tablesize += n *szActionType; fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; for(i=j=0; i < n; i++) { int action = acttab_yyaction(pActtab, i); if (action<0) action = lemp->nstate + lemp->nrule + 2; if (j == 0) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", action); if (j == 9 || i == n-1) { fprintf(out, "\n"); lineno++; j = 0; } else{ j++; } } fprintf(out, "};\n"); lineno++; /* Output the yy_lookahead table */ lemp->tablesize += n *szCodeType; fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; for(i=j=0; i < n; i++) { int la = acttab_yylookahead(pActtab, i); if (la<0) la = lemp->nsymbol; if (j == 0) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", la); if (j == 9 || i == n-1) { fprintf(out, "\n"); lineno++; j = 0; } else{ j++; } } fprintf(out, "};\n"); lineno++; /* Output the yy_shift_ofst[] table */ fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++; n = lemp->nxstate; while(n>0 && lemp->sorted[n-1]->iTknOfst == NO_OFFSET) n--; fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; fprintf(out, "static const %s yy_shift_ofst[] = {\n", minimum_size_type(mnTknOfst-1, mxTknOfst, &sz)); lineno++; lemp->tablesize += n *sz; for(i=j=0; i < n; i++) { int ofst; stp = lemp->sorted[i]; ofst = stp->iTknOfst; if (ofst == NO_OFFSET) ofst = mnTknOfst - 1; if (j == 0) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", ofst); if (j == 9 || i == n-1) { fprintf(out, "\n"); lineno++; j = 0; } else{ j++; } } fprintf(out, "};\n"); lineno++; /* Output the yy_reduce_ofst[] table */ fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; n = lemp->nxstate; while(n>0 && lemp->sorted[n-1]->iNtOfst == NO_OFFSET) n--; fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; fprintf(out, "static const %s yy_reduce_ofst[] = {\n", minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)); lineno++; lemp->tablesize += n *sz; for(i=j=0; i < n; i++) { int ofst; stp = lemp->sorted[i]; ofst = stp->iNtOfst; if (ofst == NO_OFFSET) ofst = mnNtOfst - 1; if (j == 0) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", ofst); if (j == 9 || i == n-1) { fprintf(out, "\n"); lineno++; j = 0; } else{ j++; } } fprintf(out, "};\n"); lineno++; /* Output the default action table */ fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; n = lemp->nxstate; lemp->tablesize += n *szActionType; for(i=j=0; i < n; i++) { stp = lemp->sorted[i]; if (j == 0) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", stp->iDfltReduce+lemp->nstate+lemp->nrule); if (j == 9 || i == n-1) { fprintf(out, "\n"); lineno++; j = 0; } else{ j++; } } fprintf(out, "};\n"); lineno++; tplt_xfer(lemp->name,in,out,&lineno); /* Generate the table of fallback tokens. */ if (lemp->has_fallback) { int mx = lemp->nterminal - 1; while(mx>0 && lemp->symbols[mx]->fallback == 0) { mx--; } lemp->tablesize += (mx+1)*szCodeType; for(i = 0 ; i <= mx; i++) { struct symbol *p = lemp->symbols[i]; if (p->fallback == 0) { fprintf(out, " 0, /* %10s => nothing */\n", p->name); } else{ fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, p->name, p->fallback->name); } lineno++; } } tplt_xfer(lemp->name, in, out, &lineno); /* Generate a table containing the symbolic name of every symbol */ for(i = 0 ; i < lemp->nsymbol; i++) { lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name); fprintf(out," %-15s",line); if ((i&3) == 3) { fprintf(out,"\n"); lineno++; } } if ((i&3)!=0) { fprintf(out,"\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate a table containing a text string that describes every ** rule in the rule set of the grammar. This information is used ** when tracing REDUCE actions. */ for(i = 0 , rp=lemp->rule; rp; rp=rp->next, i++) { assert( rp->iRule == i); fprintf(out," /* %3d */ \"", i); writeRuleText(out, rp); fprintf(out,"\",\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes every time a symbol is popped from ** the stack while processing errors or while destroying the parser. ** (In other words, generate the %destructor actions) */ if (lemp->tokendest) { int once = 1; for(i = 0 ; i < lemp->nsymbol; i++) { struct symbol *sp = lemp->symbols[i]; if (sp == 0 || sp->type!=TERMINAL) continue; if (once) { fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; once = 0; } fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; } for(i = 0 ; i < lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++); if (i < lemp->nsymbol) { emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); fprintf(out," break;\n"); lineno++; } } if (lemp->vardest) { struct symbol *dflt_sp = 0; int once = 1; for(i = 0 ; i < lemp->nsymbol; i++) { struct symbol *sp = lemp->symbols[i]; if (sp == 0 || sp->type == TERMINAL || sp->index<=0 || sp->destructor!=0) continue; if (once) { fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++; once = 0; } fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; dflt_sp = sp; } if (dflt_sp!=0) { emit_destructor_code(out,dflt_sp,lemp,&lineno); } fprintf(out," break;\n"); lineno++; } for(i = 0 ; i < lemp->nsymbol; i++) { struct symbol *sp = lemp->symbols[i]; if (sp == 0 || sp->type == TERMINAL || sp->destructor == 0 || sp->destructor_emitted) continue; fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; sp->destructor_emitted = 1; /* Combine duplicate destructors into a single case */ for(j=i+1; jnsymbol; j++) { struct symbol *sp2 = lemp->symbols[j]; if (sp2 && sp2->type!=TERMINAL && sp2->destructor && sp2->dtnum == sp->dtnum && strcmp(sp->destructor,sp2->destructor) == 0) { fprintf(out," case %d: /* %s */\n", sp2->index, sp2->name); lineno++; sp2->destructor_emitted = 1; } } emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); fprintf(out," break;\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes whenever the parser stack overflows */ tplt_print(out,lemp,lemp->overflow,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Generate the table of rule information ** ** Note: This code depends on the fact that rules are number ** sequentually beginning with 0. */ for(rp=lemp->rule; rp; rp=rp->next) { fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which execution during each REDUCE action */ i = 0; for(rp=lemp->rule; rp; rp=rp->next) { i += translate_code(lemp, rp); } if (i) { fprintf(out," YYMINORTYPE yylhsminor;\n"); lineno++; } /* First output rules other than the default: rule */ for(rp=lemp->rule; rp; rp=rp->next) { struct rule *rp2; /* Other rules with the same action */ if (rp->code == 0) continue; if (rp->code[0] == '\n' && rp->code[1] == 0 && rp->codePrefix == 0 && rp->codeSuffix == 0 ) { /* No actions, so this will be part of the "default:" rule */ continue; } fprintf(out," case %d: /* ", rp->iRule); writeRuleText(out, rp); fprintf(out, " */\n"); lineno++; for(rp2=rp->next; rp2; rp2=rp2->next) { if (rp2->code == rp->code && rp2->codePrefix == rp->codePrefix && rp2->codeSuffix == rp->codeSuffix) { fprintf(out," case %d: /* ", rp2->iRule); writeRuleText(out, rp2); fprintf(out," */ yytestcase(yyruleno == %d);\n", rp2->iRule); lineno++; rp2->code = 0; } } emit_code(out,rp,lemp,&lineno); fprintf(out," break;\n"); lineno++; rp->code = 0; } /* Finally, output the default: rule. We choose as the default: all ** empty actions. */ fprintf(out," default:\n"); lineno++; for(rp=lemp->rule; rp; rp=rp->next) { if (rp->code == 0) continue; assert( rp->code[0] == '\n' && rp->code[1] == 0); assert( rp->codePrefix == 0); assert( rp->codeSuffix == 0); fprintf(out," /* (%d) ", rp->iRule); writeRuleText(out, rp); fprintf(out, " */ yytestcase(yyruleno == %d);\n", rp->iRule); lineno++; } fprintf(out," break;\n"); lineno++; tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes if a parse fails */ tplt_print(out,lemp,lemp->failure,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes when a syntax error occurs */ tplt_print(out,lemp,lemp->error,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes when the parser accepts its input */ tplt_print(out,lemp,lemp->accept,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Append any addition code the user desires */ tplt_print(out,lemp,lemp->extracode,&lineno); free(pActtab); fclose(in); fclose(out); return; } static FILE *header_open(struct lemon *lemp, const char *mode) { if (user_output_header) { if (lemp->outname) free(lemp->outname); lemp->outname = strdup(user_output_header); FILE *fp = fopen(lemp->outname, mode); if (fp == 0 && *mode == 'w' ) { fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); lemp->errorcnt++; return 0; } return fp; } else { return file_open(lemp,".h",mode); } } /* Generate a header file for the parser */ void ReportHeader(struct lemon *lemp) { FILE *out, *in; const char *prefix; char line[LINESIZE]; char pattern[LINESIZE]; int i; if (lemp->tokenprefix) prefix = lemp->tokenprefix; else prefix = ""; in = header_open(lemp,"rb"); if (in) { int nextChar; for(i = 1; i < lemp->nterminal && fgets(line,LINESIZE,in); i++) { lemon_sprintf(pattern,"#define %s%-30s %3d\n", prefix,lemp->symbols[i]->name,i); if (strcmp(line,pattern) ) break; } nextChar = fgetc(in); fclose(in); if (i == lemp->nterminal && nextChar == EOF) { /* No change in the file. Don't rewrite it. */ return; } } out = header_open(lemp,"wb"); if (out) { for(i = 1; i < lemp->nterminal; i++) { fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i); } fclose(out); } return; } /* Reduce the size of the action tables, if possible, by making use ** of defaults. ** ** In this version, we take the most frequent REDUCE action and make ** it the default. Except, there is no default if the wildcard token ** is a possible look-ahead. */ void CompressTables(struct lemon *lemp) { struct state *stp; struct action *ap, *ap2; struct rule *rp, *rp2, *rbest; int nbest, n; int i; int usesWildcard; for(i = 0 ; i < lemp->nstate; i++) { stp = lemp->sorted[i]; nbest = 0; rbest = 0; usesWildcard = 0; for(ap=stp->ap; ap; ap=ap->next) { if (ap->type == SHIFT && ap->sp == lemp->wildcard) { usesWildcard = 1; } if (ap->type!=REDUCE) continue; rp = ap->x.rp; if (rp->lhsStart) continue; if (rp == rbest) continue; n = 1; for(ap2=ap->next; ap2; ap2=ap2->next) { if (ap2->type!=REDUCE) continue; rp2 = ap2->x.rp; if (rp2==rbest) continue; if (rp2==rp) n++; } if (n>nbest) { nbest = n; rbest = rp; } } /* Do not make a default if the number of rules to default ** is not at least 1 or if the wildcard token is a possible ** lookahead. */ if (nbest<1 || usesWildcard) continue; /* Combine matching REDUCE actions into a single default */ for(ap=stp->ap; ap; ap=ap->next) { if (ap->type == REDUCE && ap->x.rp == rbest) break; } assert( ap); ap->sp = Symbol_new("{default}"); for(ap=ap->next; ap; ap=ap->next) { if (ap->type == REDUCE && ap->x.rp == rbest) ap->type = NOT_USED; } stp->ap = Action_sort(stp->ap); for(ap=stp->ap; ap; ap=ap->next) { if (ap->type == SHIFT) break; if (ap->type == REDUCE && ap->x.rp!=rbest) break; } if (ap == 0) { stp->autoReduce = 1; stp->pDfltReduce = rbest; } } /* Make a second pass over all states and actions. Convert ** every action that is a SHIFT to an autoReduce state into ** a SHIFTREDUCE action. */ for(i = 0 ; i < lemp->nstate; i++) { stp = lemp->sorted[i]; for(ap=stp->ap; ap; ap=ap->next) { struct state *pNextState; if (ap->type!=SHIFT) continue; pNextState = ap->x.stp; if (pNextState->autoReduce && pNextState->pDfltReduce!=0) { ap->type = SHIFTREDUCE; ap->x.rp = pNextState->pDfltReduce; } } } } /* ** Compare two states for sorting purposes. The smaller state is the ** one with the most non-terminal actions. If they have the same number ** of non-terminal actions, then the smaller is the one with the most ** token actions. */ static int stateResortCompare(const void *a, const void *b) { const struct state *pA = *(const struct state **)a; const struct state *pB = *(const struct state **)b; int n; n = pB->nNtAct - pA->nNtAct; if (n == 0) { n = pB->nTknAct - pA->nTknAct; if (n == 0) { n = pB->statenum - pA->statenum; } } assert( n!=0); return n; } /* ** Renumber and resort states so that states with fewer choices ** occur at the end. Except, keep state 0 as the first state. */ void ResortStates(struct lemon *lemp) { int i; struct state *stp; struct action *ap; for(i = 0 ; i < lemp->nstate; i++) { stp = lemp->sorted[i]; stp->nTknAct = stp->nNtAct = 0; stp->iDfltReduce = lemp->nrule; /* Init dflt action to "syntax error" */ stp->iTknOfst = NO_OFFSET; stp->iNtOfst = NO_OFFSET; for(ap=stp->ap; ap; ap=ap->next) { int iAction = compute_action(lemp,ap); if (iAction>=0) { if (ap->sp->indexnterminal) { stp->nTknAct++; } else if (ap->sp->indexnsymbol) { stp->nNtAct++; } else{ assert( stp->autoReduce == 0 || stp->pDfltReduce == ap->x.rp); stp->iDfltReduce = iAction - lemp->nstate - lemp->nrule; } } } } qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), stateResortCompare); for(i = 0 ; i < lemp->nstate; i++) { lemp->sorted[i]->statenum = i; } lemp->nxstate = lemp->nstate; while(lemp->nxstate>1 && lemp->sorted[lemp->nxstate-1]->autoReduce) { lemp->nxstate--; } } /***************** From the file "set.c" ************************************/ /* ** Set manipulation routines for the LEMON parser generator. */ static int size = 0; /* Set the set size */ void SetSize(int n) { size = n+1; } /* Allocate a new set */ char *SetNew() { char *s; s = (char *)calloc( size, 1); if (s == 0) { extern void memory_error(); memory_error(); } return s; } /* Deallocate a set */ void SetFree(char *s) { free(s); } /* Add a new element to the set. Return TRUE if the element was added ** and FALSE if it was already there. */ int SetAdd(char *s, int e) { int rv; assert( e>=0 && esize = 1024; x1a->count = 0; x1a->tbl = (x1node *)calloc(1024, sizeof(x1node) + sizeof(x1node *)); if (x1a->tbl == 0) { free(x1a); x1a = 0; } else{ int i; x1a->ht = (x1node **)&(x1a->tbl[1024]); for(i = 0 ; i < 1024; i++) x1a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int Strsafe_insert(const char *data) { x1node *np; unsigned h; unsigned ph; if (x1a == 0) return 0; ph = strhash(data); h = ph & (x1a->size-1); np = x1a->ht[h]; while(np) { if (strcmp(np->data,data) == 0) { /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if (x1a->count>=x1a->size) { /* Need to make the hash table bigger */ int i,arrSize; struct s_x1 array; array.size = arrSize = x1a->size *2; array.count = x1a->count; array.tbl = (x1node *)calloc(arrSize, sizeof(x1node) + sizeof(x1node *)); if (array.tbl == 0) return 0; /* Fail due to malloc failure */ array.ht = (x1node **)&(array.tbl[arrSize]); for(i = 0 ; i < arrSize; i++) array.ht[i] = 0; for(i = 0 ; i < x1a->count; i++) { x1node *oldnp, *newnp; oldnp = &(x1a->tbl[i]); h = strhash(oldnp->data) & (arrSize-1); newnp = &(array.tbl[i]); if (array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } free(x1a->tbl); *x1a = array; } /* Insert the new data */ h = ph & (x1a->size-1); np = &(x1a->tbl[x1a->count++]); np->data = data; if (x1a->ht[h] ) x1a->ht[h]->from = &(np->next); np->next = x1a->ht[h]; x1a->ht[h] = np; np->from = &(x1a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ const char *Strsafe_find(const char *key) { unsigned h; x1node *np; if (x1a == 0) return 0; h = strhash(key) & (x1a->size-1); np = x1a->ht[h]; while(np) { if (strcmp(np->data,key) == 0) break; np = np->next; } return np ? np->data : 0; } /* Return a pointer to the (terminal or nonterminal) symbol "x". ** Create a new symbol if this is the first time "x" has been seen. */ struct symbol *Symbol_new(const char *x) { struct symbol *sp; sp = Symbol_find(x); if (sp == 0) { sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); MemoryCheck(sp); sp->name = Strsafe(x); sp->type = ISUPPER(*x) ? TERMINAL : NONTERMINAL; sp->rule = 0; sp->fallback = 0; sp->prec = -1; sp->assoc = UNK; sp->firstset = 0; sp->lambda = LEMON_FALSE; sp->destructor = 0; sp->destructor_emitted = 0; sp->destLineno = 0; sp->datatype = 0; sp->useCnt = 0; Symbol_insert(sp,sp->name); } sp->useCnt++; return sp; } /* Compare two symbols for sorting purposes. Return negative, ** zero, or positive if a is less then, equal to, or greater ** than b. ** ** Symbols that begin with upper case letters (terminals or tokens) ** must sort before symbols that begin with lower case letters ** (non-terminals). And MULTITERMINAL symbols (created using the ** %token_class directive) must sort at the very end. Other than ** that, the order does not matter. ** ** We find experimentally that leaving the symbols in their original ** order (the order they appeared in the grammar file) gives the ** smallest parser tables in SQLite. */ int Symbolcmpp(const void *_a, const void *_b) { const struct symbol *a = *(const struct symbol **) _a; const struct symbol *b = *(const struct symbol **) _b; int i1 = a->type == MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; int i2 = b->type == MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; return i1==i2 ? a->index - b->index : i1 - i2; } /* There is one instance of the following structure for each ** associative array of type "x2". */ struct s_x2 { int size; /* The number of available slots. */ /* Must be a power of 2 greater than or */ /* equal to 1 */ int count; /* Number of currently slots filled */ struct s_x2node *tbl; /* The data stored here */ struct s_x2node **ht; /* Hash table for lookups */ }; /* There is one instance of this structure for every data element ** in an associative array of type "x2". */ typedef struct s_x2node { struct symbol *data; /* The data */ const char *key; /* The key */ struct s_x2node *next; /* Next entry with the same hash */ struct s_x2node **from; /* Previous link */ } x2node; /* There is only one instance of the array, which is the following */ static struct s_x2 *x2a; /* Allocate a new associative array */ void Symbol_init() { if (x2a) return; x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); if (x2a) { x2a->size = 128; x2a->count = 0; x2a->tbl = (x2node *)calloc(128, sizeof(x2node) + sizeof(x2node *)); if (x2a->tbl == 0) { free(x2a); x2a = 0; } else{ int i; x2a->ht = (x2node **)&(x2a->tbl[128]); for(i = 0 ; i < 128; i++) x2a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int Symbol_insert(struct symbol *data, const char *key) { x2node *np; unsigned h; unsigned ph; if (x2a == 0) return 0; ph = strhash(key); h = ph & (x2a->size-1); np = x2a->ht[h]; while(np) { if (strcmp(np->key,key) == 0) { /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if (x2a->count>=x2a->size) { /* Need to make the hash table bigger */ int i,arrSize; struct s_x2 array; array.size = arrSize = x2a->size *2; array.count = x2a->count; array.tbl = (x2node *)calloc(arrSize, sizeof(x2node) + sizeof(x2node *)); if (array.tbl == 0) return 0; /* Fail due to malloc failure */ array.ht = (x2node **)&(array.tbl[arrSize]); for(i = 0 ; i < arrSize; i++) array.ht[i] = 0; for(i = 0 ; i < x2a->count; i++) { x2node *oldnp, *newnp; oldnp = &(x2a->tbl[i]); h = strhash(oldnp->key) & (arrSize-1); newnp = &(array.tbl[i]); if (array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->key = oldnp->key; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } free(x2a->tbl); *x2a = array; } /* Insert the new data */ h = ph & (x2a->size-1); np = &(x2a->tbl[x2a->count++]); np->key = key; np->data = data; if (x2a->ht[h] ) x2a->ht[h]->from = &(np->next); np->next = x2a->ht[h]; x2a->ht[h] = np; np->from = &(x2a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ struct symbol *Symbol_find(const char *key) { unsigned h; x2node *np; if (x2a == 0) return 0; h = strhash(key) & (x2a->size-1); np = x2a->ht[h]; while(np) { if (strcmp(np->key,key) == 0) break; np = np->next; } return np ? np->data : 0; } /* Return the n-th data. Return NULL if n is out of range. */ struct symbol *Symbol_Nth(int n) { struct symbol *data; if (x2a && n>0 && n<=x2a->count) { data = x2a->tbl[n-1].data; } else{ data = 0; } return data; } /* Return the size of the array */ int Symbol_count() { return x2a ? x2a->count : 0; } /* Return an array of pointers to all data in the table. ** The array is obtained from malloc. Return NULL if memory allocation ** problems, or if the array is empty. */ struct symbol **Symbol_arrayof() { struct symbol **array; int i,arrSize; if (x2a == 0) return 0; arrSize = x2a->count; array = (struct symbol **)calloc(arrSize, sizeof(struct symbol *)); if (array) { for(i = 0 ; i < arrSize; i++) array[i] = x2a->tbl[i].data; } return array; } /* Compare two configurations */ int Configcmp(const char *_a,const char *_b) { const struct config *a = (struct config *) _a; const struct config *b = (struct config *) _b; int x; x = a->rp->index - b->rp->index; if (x == 0) x = a->dot - b->dot; return x; } /* Compare two states */ PRIVATE int statecmp(struct config *a, struct config *b) { int rc; for(rc=0; rc == 0 && a && b; a=a->bp, b=b->bp) { rc = a->rp->index - b->rp->index; if (rc == 0) rc = a->dot - b->dot; } if (rc == 0) { if (a) rc = 1; if (b) rc = -1; } return rc; } /* Hash a state */ PRIVATE unsigned statehash(struct config *a) { unsigned h=0; while(a) { h = h *571 + a->rp->index *37 + a->dot; a = a->bp; } return h; } /* Allocate a new state structure */ struct state *State_new() { struct state *newstate; newstate = (struct state *)calloc(1, sizeof(struct state) ); MemoryCheck(newstate); return newstate; } /* There is one instance of the following structure for each ** associative array of type "x3". */ struct s_x3 { int size; /* The number of available slots. */ /* Must be a power of 2 greater than or */ /* equal to 1 */ int count; /* Number of currently slots filled */ struct s_x3node *tbl; /* The data stored here */ struct s_x3node **ht; /* Hash table for lookups */ }; /* There is one instance of this structure for every data element ** in an associative array of type "x3". */ typedef struct s_x3node { struct state *data; /* The data */ struct config *key; /* The key */ struct s_x3node *next; /* Next entry with the same hash */ struct s_x3node **from; /* Previous link */ } x3node; /* There is only one instance of the array, which is the following */ static struct s_x3 *x3a; /* Allocate a new associative array */ void State_init() { if (x3a) return; x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); if (x3a) { x3a->size = 128; x3a->count = 0; x3a->tbl = (x3node *)calloc(128, sizeof(x3node) + sizeof(x3node *)); if (x3a->tbl == 0) { free(x3a); x3a = 0; } else{ int i; x3a->ht = (x3node **)&(x3a->tbl[128]); for(i = 0 ; i < 128; i++) x3a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int State_insert(struct state *data, struct config *key) { x3node *np; unsigned h; unsigned ph; if (x3a == 0) return 0; ph = statehash(key); h = ph & (x3a->size-1); np = x3a->ht[h]; while(np) { if (statecmp(np->key,key) == 0) { /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if (x3a->count>=x3a->size) { /* Need to make the hash table bigger */ int i,arrSize; struct s_x3 array; array.size = arrSize = x3a->size *2; array.count = x3a->count; array.tbl = (x3node *)calloc(arrSize, sizeof(x3node) + sizeof(x3node *)); if (array.tbl == 0) return 0; /* Fail due to malloc failure */ array.ht = (x3node **)&(array.tbl[arrSize]); for(i = 0 ; i < arrSize; i++) array.ht[i] = 0; for(i = 0 ; i < x3a->count; i++) { x3node *oldnp, *newnp; oldnp = &(x3a->tbl[i]); h = statehash(oldnp->key) & (arrSize-1); newnp = &(array.tbl[i]); if (array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->key = oldnp->key; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } free(x3a->tbl); *x3a = array; } /* Insert the new data */ h = ph & (x3a->size-1); np = &(x3a->tbl[x3a->count++]); np->key = key; np->data = data; if (x3a->ht[h] ) x3a->ht[h]->from = &(np->next); np->next = x3a->ht[h]; x3a->ht[h] = np; np->from = &(x3a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ struct state *State_find(struct config *key) { unsigned h; x3node *np; if (x3a == 0) return 0; h = statehash(key) & (x3a->size-1); np = x3a->ht[h]; while(np) { if (statecmp(np->key,key) == 0) break; np = np->next; } return np ? np->data : 0; } /* Return an array of pointers to all data in the table. ** The array is obtained from malloc. Return NULL if memory allocation ** problems, or if the array is empty. */ struct state **State_arrayof() { struct state **array; int i,arrSize; if (x3a == 0) return 0; arrSize = x3a->count; array = (struct state **)calloc(arrSize, sizeof(struct state *)); if (array) { for(i = 0 ; i < arrSize; i++) array[i] = x3a->tbl[i].data; } return array; } /* Hash a configuration */ PRIVATE unsigned confighash(struct config *a) { unsigned h=0; h = h *571 + a->rp->index *37 + a->dot; return h; } /* There is one instance of the following structure for each ** associative array of type "x4". */ struct s_x4 { int size; /* The number of available slots. */ /* Must be a power of 2 greater than or */ /* equal to 1 */ int count; /* Number of currently slots filled */ struct s_x4node *tbl; /* The data stored here */ struct s_x4node **ht; /* Hash table for lookups */ }; /* There is one instance of this structure for every data element ** in an associative array of type "x4". */ typedef struct s_x4node { struct config *data; /* The data */ struct s_x4node *next; /* Next entry with the same hash */ struct s_x4node **from; /* Previous link */ } x4node; /* There is only one instance of the array, which is the following */ static struct s_x4 *x4a; /* Allocate a new associative array */ void Configtable_init() { if (x4a) return; x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); if (x4a) { x4a->size = 64; x4a->count = 0; x4a->tbl = (x4node *)calloc(64, sizeof(x4node) + sizeof(x4node *)); if (x4a->tbl == 0) { free(x4a); x4a = 0; } else{ int i; x4a->ht = (x4node **)&(x4a->tbl[64]); for(i = 0 ; i < 64; i++) x4a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int Configtable_insert(struct config *data) { x4node *np; unsigned h; unsigned ph; if (x4a == 0) return 0; ph = confighash(data); h = ph & (x4a->size-1); np = x4a->ht[h]; while(np) { if (Configcmp((const char *) np->data,(const char *) data) == 0) { /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if (x4a->count>=x4a->size) { /* Need to make the hash table bigger */ int i,arrSize; struct s_x4 array; array.size = arrSize = x4a->size *2; array.count = x4a->count; array.tbl = (x4node *)calloc(arrSize, sizeof(x4node) + sizeof(x4node *)); if (array.tbl == 0) return 0; /* Fail due to malloc failure */ array.ht = (x4node **)&(array.tbl[arrSize]); for(i = 0 ; i < arrSize; i++) array.ht[i] = 0; for(i = 0 ; i < x4a->count; i++) { x4node *oldnp, *newnp; oldnp = &(x4a->tbl[i]); h = confighash(oldnp->data) & (arrSize-1); newnp = &(array.tbl[i]); if (array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } free(x4a->tbl); *x4a = array; } /* Insert the new data */ h = ph & (x4a->size-1); np = &(x4a->tbl[x4a->count++]); np->data = data; if (x4a->ht[h] ) x4a->ht[h]->from = &(np->next); np->next = x4a->ht[h]; x4a->ht[h] = np; np->from = &(x4a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ struct config *Configtable_find(struct config *key) { int h; x4node *np; if (x4a == 0) return 0; h = confighash(key) & (x4a->size-1); np = x4a->ht[h]; while(np) { if (Configcmp((const char *) np->data,(const char *) key) == 0) break; np = np->next; } return np ? np->data : 0; } /* Remove all data from the table. Pass each data to the function "f" ** as it is removed. ("f" may be null to avoid this step.) */ void Configtable_clear(int(*f)(struct config *)) { int i; if (x4a == 0 || x4a->count == 0) return; if (f) for(i = 0 ; i < x4a->count; i++) (*f)(x4a->tbl[i].data); for(i = 0 ; i < x4a->size; i++) x4a->ht[i] = 0; x4a->count = 0; return; }