Mercurial > hg > index.cgi

--- a/src/buildkeywordtab.c	Mon Jan 01 02:53:44 2024 -0700
+++ b/src/buildkeywordtab.c	Mon Jan 01 15:15:45 2024 -0700
@@ -16,8 +16,13 @@
     struct treenode *firstchild;
 };

+/*
+lookaheaddepth will start at 255 and count down which gives an appropriate
+two's complement negative number.
+*/
+
 int treedepth = 0;
-void print_tree(FILE *fp, struct treenode *tn)
+void print_tree(FILE *fp, struct treenode *tn, char *lookahead, int lookaheaddepth)
 {
     struct treenode *tn1;
     int depth = ++treedepth;
@@ -26,9 +31,36 @@

     for (tn1 = tn -> firstchild; tn1; tn1 = tn1 -> nextsibling)
     {
-        fprintf(fp, " fcb 0x%02x,%s\n", tn1 -> ccode, tn1 -> toksym ? tn1 -> toksym : "token_eot");
+        // if there are child nodes, insert the sub tree
         if (tn1 -> firstchild)
-            print_tree(fp, tn1);
+        {
+            fprintf(fp, " fcb 0x%02x,token_eot\n", tn1 -> ccode);
+            if (tn1 -> toksym)
+            {
+                print_tree(fp, tn1, tn1 -> toksym, 255);
+            }
+            else
+            {
+                if (lookahead)
+                {
+                    print_tree(fp, tn1, lookahead, lookaheaddepth - 1);
+                }
+                else
+                {
+                    print_tree(fp, tn1, NULL, 0);
+                }
+            }
+        }
+        // if there is also a terminal symbol here
+        if (tn1 -> toksym)
+        {
+            fprintf(fp, " fcb 0x%02x,%s\n", tn1 -> ccode, tn1 -> toksym);
+        }
+    }
+    // handle lookahead failure
+    if (lookahead)
+    {
+        fprintf(fp, " fcb 0x%02x,%s\n", lookaheaddepth, lookahead);
     }

     fprintf(fp, "parse_wt%de\n", depth);
@@ -114,9 +146,10 @@
         exit(1);
     }
     fprintf(outfile, "; This file is automatically generated. Edit %s and rebuild to make changes.\n", argv[1]);
+    fprintf(outfile, " *pragmapush list\n *pragma list\n");
     fprintf(outfile, "parse_wordtab\n");
-    print_tree(outfile, treeroot);
-
+    print_tree(outfile, treeroot, NULL, 0);
+    fprintf(outfile, " *pragmapop list\n");
     fclose(outfile);
     exit(0);
 }
--- a/src/keywordlist.txt	Mon Jan 01 02:53:44 2024 -0700
+++ b/src/keywordlist.txt	Mon Jan 01 15:15:45 2024 -0700
@@ -1,4 +1,6 @@
 AND,token_and
+AS,token_as
+ASC,token_asc
 DATA,token_data
 ELSE,token_else
 END,token_end
--- a/src/parse.s	Mon Jan 01 02:53:44 2024 -0700
+++ b/src/parse.s	Mon Jan 01 15:15:45 2024 -0700
@@ -160,45 +160,48 @@
                 std val0+val.strlen             ; save the length of the identifier
                 ldb #token_ident                ; set token type to identifier (variable name, probably)
                 rts                             ; return token type, do not advance since we already did above
-; Parsing a potential keyword here. This works using a recursive lookup table. Each lookup table starts with a 18 bit
-; size entry for the table. Each entry is then 2 bytes. The first is the character to
-; match for this entry. The second is either token_eot to indicate a sub table needs to be consulted, token_ident to
-; indicate that the token should be parsed as an identifier, or a token type code which indicates the value should
-; be accepted. If a sub table is to be consulted, the table will appear inline with the same format. Should matching
-; fall off the end of a table, the character being considered will be "ungot" and processing will return back up the
-; call chain, ungetting characters, until the top level at which point token_ident will be returned.
+; This routine parses tokens using the table at parse_wordtab. The table is structured as follows:
+;
+; * two bytes which contain the length of the table less the two bytes for this length value
+; * a sequence of entries consisting of a single byte matching character and a token code followed
+;   by an optional sub table, structured exactly the same way.
+;
+; The optional subtable will be present if the token code is token_eot
+;
+; If the character match is negative, it means a lookahead failed. The negative value is the number
+; of characters to unget and the token code is the token value to return. No other entries after this
+; in a table will be considered since thie negative match is a global match.
 ;
-; If the match character is negative, the match character represents the number of characters to "unget" and then
-; return the specified token. This is for handling look-aheads.
-parse_nexttok16 pshs a,x                        ; save input character
-                ldd ,x++                        ; get number of entries in the table
-                addd 1,s                        ; set pointer to end of table
-                std 1,s
-parse_nexttok17 cmpa ,x++                       ; does this entry match?
-                beq parse_nexttok21             ; brif so
-                ldb -2,x                        ; was this a look-ahead non-match?
-                bpl parse_nexttok19             ; brif not
-                leay b,y                        ; back up the input pointer
-                ldb -1,x                        ; get match token
-parse_nexttok18 puls a,x,pc                     ; clean up stack and return the matched token
-parse_nexttok19 ldb -1,x                        ; is there a sub table?
-                cmpb #token_eot
-                bne parse_nexttok20             ; brif not
-                ldd ,x++                        ; move past the sub table
-                leax d,x
-parse_nexttok20 cmpx 1,s                        ; did we reach the end of this table?
-                blo parse_nexttok17             ; brif not
-                ldb #token_ident                ; flag identifier required
-                puls a,x,pc                     ; restore input character, clean up stack, and return
-parse_nexttok21 ldb -1,x                        ; what token did we match?
-                cmpb #token_eot                 ; sub table?
-                bne parse_nexttok18             ; brif not - ding! ding! ding! we have a match
-                leas 3,s                        ; clean up stack
-                bsr parse_nextcharu             ; fetch next input character
-                bne parse_nexttok16             ; process sub table entries if we have input
-                ldb #token_ident                ; indicate we have an ident
-                leay -1,y                       ; unget the end of input
-                rts
+; When a token_eot match is found, if there are no further characters in the input, the match is
+; determined to be invalid and processing continues with the next entry.
+parse_wordtab0  leas 3,s                        ; clean up stack for sub table handling
+parse_wordtab   pshs a,x                        ; save input character and start of table
+                ldd ,x++                        ; get length of this table
+                addd 1,s                        ; calculate the address of the end of the table
+                std 1,s                         ; save end address for comparison later
+                lda ,s                          ; get back input character
+parse_wordtab1  ldb -1,x                        ; fetch token code for this entry
+                cmpa ,x++                       ; does this entry match?
+                bne parse_wordtab4              ; brif not
+                cmpb #token_eot                 ; is it indicating a sub table?
+                bne parse_wordtab6              ; brif not
+                bsr parse_nextcharu             ; fetch next input character (for sub table match)
+                bne parse_wordtab0              ; brif we are going to check the sub table
+parse_wordtab2  ldd ,x++                        ; fetch length of sub table
+                leax d,x                        ; move past sub table
+parse_wordtab3  lda ,s                          ; get back input character
+                cmpx 1,s                        ; are we at the end of the table?
+                blo parse_wordtab1              ; brif not - check another entry
+                comb                            ; indicate no match
+                puls a,x,pc                     ; clean up stack and return
+parse_wordtab4  lda -2,x                        ; get the match character
+                bmi parse_wordtab5              ; brif negative - lookahead fail
+                cmpb #token_eot                 ; is there a sub table to skip?
+                beq parse_wordtab2              ; brif so - skip sub table
+                bra parse_wordtab3              ; otherwise just move to the next entry
+parse_wordtab5  leay a,y                        ; move back the specified number of characters
+parse_wordtab6  clra                            ; clear C to indicate a match
+                puls a,x,pc                     ; clean up stack and return
 parse_number    jmp parse_tokerr
 ; Relational token table, bits are > = <
 parse_reltab    fcb token_error
@@ -285,6 +288,8 @@
                 parse_tokdef token_and,parse_noop
                 parse_tokdef token_or,parse_noop
                 parse_tokdef token_go,parse_noop
+                parse_tokdef token_as,parse_noop
+                parse_tokdef token_asc,parse_noop
 parse_rem       rts

                 *pragmapop list