changeset 126:ac183a519439

Update parsing scheme with a keyword lookup by token value and other framework Add ability to turn a token code into a keyword string. Also correct some details related to token table generation with some additiona adjustments for token symbols. Also rework token symbol definitions and creation of some parsing tables as well as the main statement parsing loop.
author William Astle <lost@l-w.ca>
date Mon, 08 Jan 2024 22:58:08 -0700
parents 0607e4e20702
children 527212870064
files src/buildkeywordtab.c src/interp.s src/keywordlist.txt src/parse.s
diffstat 4 files changed, 228 insertions(+), 80 deletions(-) [+]
line wrap: on
line diff
--- a/src/buildkeywordtab.c	Sun Jan 07 20:35:51 2024 -0700
+++ b/src/buildkeywordtab.c	Mon Jan 08 22:58:08 2024 -0700
@@ -27,7 +27,7 @@
     struct treenode *tn1;
     int depth = ++treedepth;
     
-    fprintf(fp, "parse_wt%d fdb parse_wt%de-parse_wt%d-2\n", depth, depth, depth);
+    fprintf(fp, "parse_wt%d fdb parse_wt%de-parse_wt%d\n", depth, depth, depth);
 
     for (tn1 = tn -> firstchild; tn1; tn1 = tn1 -> nextsibling)
     {
--- a/src/interp.s	Sun Jan 07 20:35:51 2024 -0700
+++ b/src/interp.s	Mon Jan 08 22:58:08 2024 -0700
@@ -69,13 +69,17 @@
 immediate0      jsr readline                    ; read input line
                 bcs immediate0                  ; brif ended with BREAK
                 ldx #linebuff                   ; point to start of line input buffer
-                stx inputptr                    ; set input pointer
-                jsr curchar                     ; skip spaces and set flags
-                bcs immediate1                  ; brif there's a line number
-                tsta                            ; is there anything there at all (end of line)?
-                beq immediate0                  ; brif not - read another line
-                ldx inputptr                    ; get the modified input pointer processing above
-                jsr tokenize                    ; tokenize the line at inputptr, return with result at tokebuff and X
+immediate0a     lda ,x                          ; do we have anything at all?
+                beq immediate0                  ; brif not - just read another line
+                cmpa #0x20                      ; space?
+                bne immediate0c                 ; brif not
+immediate0b     leax 1,x                        ; move past the 
+                bra immediate0a                 ; keep looking for the start of input
+immediate0c     bsr setcifdigit                 ; do we have a line number?
+                bcs immediate1                  ; brif so - go handle program editing
+                clrb                            ; flag to do actual parsing
+                jsr parse                       ; go parse the line
+                bra *
                 jsr interpretline               ; go interpret the tokenized line
                 bra immediate                   ; go handle another line
 immediate1      bsr parse_lineno                ; parse the line number
--- a/src/keywordlist.txt	Sun Jan 07 20:35:51 2024 -0700
+++ b/src/keywordlist.txt	Mon Jan 08 22:58:08 2024 -0700
@@ -3,14 +3,14 @@
 $	token_dollar
 %	token_percent
 &	token_amp
-'	token_apos
+'	token_remabbr
 (	token_oparen
 )	token_cparen
-*	token_star
+*	token_times
 +	token_plus
-,	token_comma
+,	token_sep
 -	token_minus
-/	token_slash
+/	token_divide
 :	token_stmtsep
 ;	token_semi
 ?	token_print
@@ -32,6 +32,8 @@
 ELSE	token_else
 END	token_end
 GO	token_go
+GOTO	token_goto
+GOSUB	token_gosub
 LET	token_let
 LIST	token_list
 NEW	token_new
--- a/src/parse.s	Sun Jan 07 20:35:51 2024 -0700
+++ b/src/parse.s	Mon Jan 08 22:58:08 2024 -0700
@@ -30,27 +30,30 @@
                 stu freestart
                 ldu parse_tokenst               ; get start location we started parsing the token at
                 rts                             ; return error condition
-parse0          ldx #parse_stmtjump             ; point to jump table for token type handler
-                abx                             ; offset to handler address
-                abx
-                jsr [,x]                        ; call handler
-                bcs parse_error                 ; brif handler flagged error
+parse0          ldx #parsetab_cmd               ; point to jump table for token type handler
+parse1          cmpb ,x                         ; did we match a valid command token?
+                beq parse3                      ; brif so
+                leax 3,x                        ; move to next entry
+                cmpx #parsetab_cmde             ; end of table?
+                blo parse1                      ; brif not
+parse2          ldb #err_sn                     ; flag syntax error
+                bra parse_error                 ; and return the error
+parse3          jsr [1,x]                       ; call the handler
+                bcs parse_error                 ; brif the handler indicated error
                 jsr parse_curtoken              ; get the token we terminated on
                 cmpb #token_eot                 ; end of input?
-                bne parse1                      ; brif not
+                bne parse4                      ; brif not
                 ldb #bc_eol                     ; stash an end of line op
                 bsr parse_write
                 bcs parse_error                 ; brif we errored out writing to the result (OM?)
                 tfr u,d                         ; calculate the length of the result
                 subd ,s
                 puls u,pc                       ; get pointer to start of encoded result and return (C is already clear)
-parse1          cmpb #token_stmtsep             ; statement separator?
+parse4          cmpb #token_stmtsep             ; statement separator?
                 beq parse_nextstmt              ; brif so - do another statement
-                cmpb #token_apos                ; ' token?
+                cmpb #token_remabbr             ; ' token?
                 beq parse0                      ; brif so - parse it as a new statement
-                comb                            ; set C for error
-                ldb #err_sn                     ; raise syntax error
-                bra parse_error
+                bra parse2                      ; raise a syntax error
 parse_write     lda parse_noout                 ; are we doing output?
                 beq parse_write0                ; brif so
                 leau 1,u                        ; just count up the output and don't do anything
@@ -62,6 +65,7 @@
                 rts
 parse_write1    stb ,u+                         ; save output byte
                 stu freestart                   ; save new to of used memory
+list_noop
 parse_noop      rts                             ; return all clear - C clear from comparison above
 parse_curtoken  ldb parse_curtok                ; fetch token code of current token
                 rts
@@ -128,6 +132,8 @@
                 bhi parse_toupper0              ; brif not
                 suba #0x20                      ; adjust to upper case alpha
 parse_toupper0  rts                             ; Z only set here if input was zero entering from parse_nextcharu
+parse_number    jmp parse_tokerr
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows:
 ;
 ; * two bytes which contain the length of the table less the two bytes for this length value
@@ -155,7 +161,7 @@
                 bne parse_wordtab6              ; brif not
                 bsr parse_nextcharu             ; fetch next input character (for sub table match)
                 bne parse_wordtab0              ; brif we are going to check the sub table
-parse_wordtab2  ldd ,x++                        ; fetch length of sub table
+parse_wordtab2  ldd ,x                          ; fetch length of sub table
                 leax d,x                        ; move past sub table
 parse_wordtab3  lda ,s                          ; get back input character
                 cmpx 1,s                        ; are we at the end of the table?
@@ -170,65 +176,201 @@
 parse_wordtab5  leay a,y                        ; move back the specified number of characters
 parse_wordtab6  clra                            ; clear C to indicate a match
                 puls a,x,pc                     ; clean up stack and return
-parse_number    jmp parse_tokerr
-; Parse tokens - define them in order using the macro parse_tokdef
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Convert a token number back to its keyword. This will use the same table used by parse_wordtab. Enter with a character
+; output routine pointer in U which takes the character in A. The routine can assume that Y is preserved. Will return
+; with C set if the token does not exist in the word table and clear otherwise.
+parse_wtdc      pshs u                          ; save routine pointer
+                ldu #strbuff+20                 ; point to temporary string buffer
+                clr ,-u                         ; put a NUL at the end of the string
+                ldx #parse_wt                   ; point to keyword parse table
+                bsr parse_wtdc2                 ; call the tree walker function
+                bcc parse_wtdc1                 ; brif we do have a match
+                puls u,pc                       ; clean stack and return
+parse_wtdc0     jsr [,s]                        ; output the character
+parse_wtdc1     lda ,u+                         ; get output byte
+                bne parse_wtdc0                 ; brif we're not at the end yet
+                clra                            ; make sure C is clear
+                puls u,pc                       ; clean stack and return
+parse_wtdc2     pshs a,x                        ; save the token match value and the table pointer
+                ldd ,x++                        ; get table length
+                addd 1,s                        ; calculate end address
+                std 1,s                         ; save it
+parse_wtdc3     ldd ,x++                        ; get this table entry
+                bmi parse_wtdc6                 ; brif it's a backtracking entry - skip it
+                cmpa ,s                         ; does the token match here?
+                bne parse_wtdc5                 ; brif not
+parse_wtdc4     sta ,-y                         ; add the character to the output buffer
+                puls a,x,pc                     ; return up the call stack - C is clear from CMPA above
+parse_wtdc5     cmpb #token_eot                 ; does this entry have a sub table?
+                bne parse_wtdc6                 ; brif not
+                pshs a                          ; save the matched character
+                lda 1,s                         ; get back the token we need
+                bsr parse_wtdc2                 ; go handle the sub table
+                puls a                          ; get back the matched character
+                bcc parse_wtdc6                 ; brif it did match - record it and return
+parse_wtdc6     cmpx 1,s                        ; are we at the end of this table?
+                bne parse_wtdc3                 ; brif not - handle another table entry
+                coma                            ; make sure C is set for no match
+                puls a,x,pc                     ; clean up stack and return
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows:
+;               parse_tokdefT <sym>,<parse>,<list>,<exec>
+; where:
+; T: c for command, f for function, p for particle
+; <sym>: the symbol name without the "token_" prefix
+; <parse>: parse handler for the type, ignored for particles
+; <list>: list handler for the type, ingored for particles
+; <exec>: execution handler for the type, ignored for particles
                 *pragmapush list
                 *pragma nolist
-parse_toknum    set 0
-parse_tokdef    macro noexpand
-\1              equ parse_toknum
-parse_toknum    set parse_toknum+1
-                fdb \2
+__toknump       set 0
+__toknumc       set 0x40
+__toknumf       set 0xc0
+                setstr __cmdparset=""
+                setstr __cmdlistt=""
+                setstr __cmdexect=""
+                setstr __fnparset=""
+                setstr __fnlistt=""
+                setstr __fnexect=""
+parse_tokendefp macro noexpand
+token_\1        equ __toknump
+__toknump       set __toknump+1
+                endm
+parse_tokendefc macro noexpand
+token_\1        equ __toknumc
+__toknumc       set __toknumc+1
+                ifstr ne,"{2}",""
+                setstr __cmdparset="%(__cmdparset)\tfcb\ttoken_\1\n\tfdb {2}\n"
+                endc
+                ifstr ne,"{3}",""
+                setstr __cmdlistt="%(__cmdlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
+                endc
+                ifstr ne,"{4}",""
+                setstr __cmdexect="%(__cmdexect)\tfdb {3}\n"
+                else
+                setstr __cmdexect="%(__cmdexect)\tfdb SNERROR\n"
+                endc
+                endm
+parse_tokendeff macro noexpand
+token_\1        equ __toknumf
+__toknumf       set __toknumf+1
+                ifstr ne,"{2}",""
+                setstr __fnparset="%(__fnparset)\tfcb\ttoken_\1\n\tfdb {2}\n"
+                endc
+                ifstr ne,"{3}",""
+                setstr __fnlistt="%(__fnlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
+                endc
+                ifstr ne,"{4}",""
+                setstr __fnexect="%(__fnexect)\tfdb {3}\n"
+                else
+                setstr __fnexect="%(__fnexect)\tfdb SNERROR\n"
+                endc
+                endm
+token_cmdparse  macro
+                *pragmapush nolist
+                *pragma nolist
+                includestr "%(__cmdparset)"
+                *pragmapop nolist
+                endm
+token_cmdlist   macro
+                *pragmapush nolist
+                *pragma nolist
+                includestr "%(__cmdlistt)"
+                *pragmapop nolist
+                endm
+token_cmdexec   macro
+                *pragmapush nolist
+                *pragma nolist
+                includestr "%(__cmdexect)"
+token__maxcmd   equ __toknumc-1
+                *pragmapop nolist
+                endm
+token_fnparse   macro
+                *pragmapush nolist
+                *pragma nolist
+                includestr "%(__fnparset)"
+                *pragmapop nolist
+                endm
+token_fnlist    macro
+                *pragmapush nolist
+                *pragma nolist
+                includestr "%(__fnlistt)"
+                *pragmapop nolist
+                endm
+token_fnexec    macro
+                *pragmapush nolist
+                *pragma nolist
+                includestr "%(__fnexect)"
+token__maxfn    equ __toknumf-1
+                *pragmapop nolist
                 endm
                 *pragmapop list
-parse_stmtjump  parse_tokdef token_error,parse_tokerr
-                parse_tokdef token_eot,parse_noop
-                parse_tokdef token_lt,parse_noop
-                parse_tokdef token_le,parse_noop
-                parse_tokdef token_gt,parse_noop
-                parse_tokdef token_ge,parse_noop
-                parse_tokdef token_eq,parse_noop
-                parse_tokdef token_ne,parse_noop
-                parse_tokdef token_reltrue,parse_noop // always true relational operator
-                parse_tokdef token_stmtsep,parse_noop
-                parse_tokdef token_apos,parse_rem
-                parse_tokdef token_special,parse_noop
-                parse_tokdef token_bang,parse_noop
-                parse_tokdef token_hash,parse_noop
-                parse_tokdef token_dollar,parse_noop
-                parse_tokdef token_percent,parse_noop
-                parse_tokdef token_amp,parse_noop
-                parse_tokdef token_oparen,parse_noop
-                parse_tokdef token_cparen,parse_noop
-                parse_tokdef token_star,parse_noop
-                parse_tokdef token_plus,parse_noop
-                parse_tokdef token_comma,parse_noop
-                parse_tokdef token_minus,parse_noop
-                parse_tokdef token_slash,parse_noop
-                parse_tokdef token_semi,parse_noop
-                parse_tokdef token_at,parse_noop
-                parse_tokdef token_exp,parse_noop
-                parse_tokdef token_ident,parse_noop
-                parse_tokdef token_rem,parse_noop
-                parse_tokdef token_return,parse_noop
-                parse_tokdef token_run,parse_noop
-                parse_tokdef token_data,parse_noop
-                parse_tokdef token_else,parse_noop
-                parse_tokdef token_end,parse_noop
-                parse_tokdef token_stop,parse_noop
-                parse_tokdef token_sub,parse_noop
-                parse_tokdef token_let,parse_noop
-                parse_tokdef token_list,parse_noop
-                parse_tokdef token_new,parse_noop
-                parse_tokdef token_not,parse_noop
-                parse_tokdef token_print,parse_noop
-                parse_tokdef token_pop,parse_noop
-                parse_tokdef token_to,parse_noop
-                parse_tokdef token_and,parse_noop
-                parse_tokdef token_or,parse_noop
-                parse_tokdef token_go,parse_noop
-                parse_tokdef token_as,parse_noop
-                parse_tokdef token_asc,parse_noop
-parse_rem       rts
+                parse_tokendefp error           ; Used to mark errors; should always be first so it's token #0 
+                parse_tokendefp eot             ; End of input marker or special handling in word tables
+                parse_tokendefp stmtsep         ; statement separator
+                parse_tokendefp times           ; times (multiplication) operator (*)
+                parse_tokendefp plus            ; addition operator
+                parse_tokendefp divide          ; division operator (/)
+                parse_tokendefp minus           ; subtraction operator
+                parse_tokendefp exp             ; exponentiation operator (^)
+                parse_tokendefp lt              ; less than operator
+                parse_tokendefp le              ; less than or equal operateor
+                parse_tokendefp gt              ; greater than operator
+                parse_tokendefp ge              ; greater than or equal operator
+                parse_tokendefp eq              ; equality operator
+                parse_tokendefp ne              ; inequality operator
+                parse_tokendefp not             ; boolean NOT operator
+                parse_tokendefp and             ; boolean AND operator
+                parse_tokendefp or              ; boolean OR operator
+                parse_tokendefp bang            ; exclamation mark
+                parse_tokendefp hash            ; number sign
+                parse_tokendefp dollar          ; dollar sign (string sigil)
+                parse_tokendefp percent         ; percent sign (integer sigil)
+                parse_tokendefp amp             ; ampersand
+                parse_tokendefp oparen          ; opening paren
+                parse_tokendefp cparen          ; closing paren
+                parse_tokendefp sep             ; comma (separator)
+                parse_tokendefp semi            ; semicolon
+                parse_tokendefp at              ; @ symbol
+                parse_tokendefp ident           ; identifier (has special parsing)
+                parse_tokendefp else            ; ELSE
+                parse_tokendefp then            ; THEN
+                parse_tokendefp to              ; TO
+                parse_tokendefp sub             ; SUB
+                parse_tokendefp as              ; AS
 
+                parse_tokendefc remabbr,parse_noop,list_noop,exec_noop          ; abbreviated REM (')
+                parse_tokendefc rem,parse_noop,list_noop,exec_noop              ; REM
+                parse_tokendefc return,parse_noop,parse_noop,parse_noop         ; RETURN
+                parse_tokendefc run,parse_noop,parse_noop,parse_noop            ; RUN
+                parse_tokendefc data,parse_noop,parse_noop,parse_noop           ; DATA
+                parse_tokendefc end,parse_noop,parse_noop,parse_noop            ; END
+                parse_tokendefc stop,parse_noop,parse_noop,parse_noop           ; STOP
+                parse_tokendefc let,parse_noop,parse_noop,parse_noop            ; LET
+                parse_tokendefc list,parse_noop,parse_noop,parse_noop           ; LIST
+                parse_tokendefc new,parse_noop,parse_noop,parse_noop            ; NEW
+                parse_tokendefc print,parse_noop,parse_noop,parse_noop          ; PRINT
+                parse_tokendefc pop,parse_noop,parse_noop,parse_noop            ; POP
+                parse_tokendefc goto,parse_noop,parse_noop,parse_noop           ; GOTO
+                parse_tokendefc gosub,parse_noop,parse_noop,parse_noop          ; GOSUB
+                parse_tokendefc go,parse_noop,parse_noop,parse_noop             ; GO
+
+                parse_tokendeff asc,parse_noop,parse_noop,parse_noop            ; ASC()
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Parse handling tables
+parsetab_cmd    token_cmdparse
+parsetab_cmde
+parsetab_fn     token_fnparse
+parsetab_fne
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; List handling tables
+listtab_cmd     token_cmdlist
+listtab_cmde
+listtab_fn      token_fnlist
+listtab_fne
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Execution handling tables
+exectab_cmd     token_cmdexec
+exectab_fn      token_fnexec
                 *pragmapop list