Mercurial > hg > index.cgi
changeset 126:ac183a519439
Update parsing scheme with a keyword lookup by token value and other framework
Add ability to turn a token code into a keyword string. Also correct some
details related to token table generation with some additiona adjustments
for token symbols.
Also rework token symbol definitions and creation of some parsing tables as
well as the main statement parsing loop.
author | William Astle <lost@l-w.ca> |
---|---|
date | Mon, 08 Jan 2024 22:58:08 -0700 |
parents | 0607e4e20702 |
children | 527212870064 |
files | src/buildkeywordtab.c src/interp.s src/keywordlist.txt src/parse.s |
diffstat | 4 files changed, 228 insertions(+), 80 deletions(-) [+] |
line wrap: on
line diff
--- a/src/buildkeywordtab.c Sun Jan 07 20:35:51 2024 -0700 +++ b/src/buildkeywordtab.c Mon Jan 08 22:58:08 2024 -0700 @@ -27,7 +27,7 @@ struct treenode *tn1; int depth = ++treedepth; - fprintf(fp, "parse_wt%d fdb parse_wt%de-parse_wt%d-2\n", depth, depth, depth); + fprintf(fp, "parse_wt%d fdb parse_wt%de-parse_wt%d\n", depth, depth, depth); for (tn1 = tn -> firstchild; tn1; tn1 = tn1 -> nextsibling) {
--- a/src/interp.s Sun Jan 07 20:35:51 2024 -0700 +++ b/src/interp.s Mon Jan 08 22:58:08 2024 -0700 @@ -69,13 +69,17 @@ immediate0 jsr readline ; read input line bcs immediate0 ; brif ended with BREAK ldx #linebuff ; point to start of line input buffer - stx inputptr ; set input pointer - jsr curchar ; skip spaces and set flags - bcs immediate1 ; brif there's a line number - tsta ; is there anything there at all (end of line)? - beq immediate0 ; brif not - read another line - ldx inputptr ; get the modified input pointer processing above - jsr tokenize ; tokenize the line at inputptr, return with result at tokebuff and X +immediate0a lda ,x ; do we have anything at all? + beq immediate0 ; brif not - just read another line + cmpa #0x20 ; space? + bne immediate0c ; brif not +immediate0b leax 1,x ; move past the + bra immediate0a ; keep looking for the start of input +immediate0c bsr setcifdigit ; do we have a line number? + bcs immediate1 ; brif so - go handle program editing + clrb ; flag to do actual parsing + jsr parse ; go parse the line + bra * jsr interpretline ; go interpret the tokenized line bra immediate ; go handle another line immediate1 bsr parse_lineno ; parse the line number
--- a/src/keywordlist.txt Sun Jan 07 20:35:51 2024 -0700 +++ b/src/keywordlist.txt Mon Jan 08 22:58:08 2024 -0700 @@ -3,14 +3,14 @@ $ token_dollar % token_percent & token_amp -' token_apos +' token_remabbr ( token_oparen ) token_cparen -* token_star +* token_times + token_plus -, token_comma +, token_sep - token_minus -/ token_slash +/ token_divide : token_stmtsep ; token_semi ? token_print @@ -32,6 +32,8 @@ ELSE token_else END token_end GO token_go +GOTO token_goto +GOSUB token_gosub LET token_let LIST token_list NEW token_new
--- a/src/parse.s Sun Jan 07 20:35:51 2024 -0700 +++ b/src/parse.s Mon Jan 08 22:58:08 2024 -0700 @@ -30,27 +30,30 @@ stu freestart ldu parse_tokenst ; get start location we started parsing the token at rts ; return error condition -parse0 ldx #parse_stmtjump ; point to jump table for token type handler - abx ; offset to handler address - abx - jsr [,x] ; call handler - bcs parse_error ; brif handler flagged error +parse0 ldx #parsetab_cmd ; point to jump table for token type handler +parse1 cmpb ,x ; did we match a valid command token? + beq parse3 ; brif so + leax 3,x ; move to next entry + cmpx #parsetab_cmde ; end of table? + blo parse1 ; brif not +parse2 ldb #err_sn ; flag syntax error + bra parse_error ; and return the error +parse3 jsr [1,x] ; call the handler + bcs parse_error ; brif the handler indicated error jsr parse_curtoken ; get the token we terminated on cmpb #token_eot ; end of input? - bne parse1 ; brif not + bne parse4 ; brif not ldb #bc_eol ; stash an end of line op bsr parse_write bcs parse_error ; brif we errored out writing to the result (OM?) tfr u,d ; calculate the length of the result subd ,s puls u,pc ; get pointer to start of encoded result and return (C is already clear) -parse1 cmpb #token_stmtsep ; statement separator? +parse4 cmpb #token_stmtsep ; statement separator? beq parse_nextstmt ; brif so - do another statement - cmpb #token_apos ; ' token? + cmpb #token_remabbr ; ' token? beq parse0 ; brif so - parse it as a new statement - comb ; set C for error - ldb #err_sn ; raise syntax error - bra parse_error + bra parse2 ; raise a syntax error parse_write lda parse_noout ; are we doing output? beq parse_write0 ; brif so leau 1,u ; just count up the output and don't do anything @@ -62,6 +65,7 @@ rts parse_write1 stb ,u+ ; save output byte stu freestart ; save new to of used memory +list_noop parse_noop rts ; return all clear - C clear from comparison above parse_curtoken ldb parse_curtok ; fetch token code of current token rts @@ -128,6 +132,8 @@ bhi parse_toupper0 ; brif not suba #0x20 ; adjust to upper case alpha parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu +parse_number jmp parse_tokerr +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: ; ; * two bytes which contain the length of the table less the two bytes for this length value @@ -155,7 +161,7 @@ bne parse_wordtab6 ; brif not bsr parse_nextcharu ; fetch next input character (for sub table match) bne parse_wordtab0 ; brif we are going to check the sub table -parse_wordtab2 ldd ,x++ ; fetch length of sub table +parse_wordtab2 ldd ,x ; fetch length of sub table leax d,x ; move past sub table parse_wordtab3 lda ,s ; get back input character cmpx 1,s ; are we at the end of the table? @@ -170,65 +176,201 @@ parse_wordtab5 leay a,y ; move back the specified number of characters parse_wordtab6 clra ; clear C to indicate a match puls a,x,pc ; clean up stack and return -parse_number jmp parse_tokerr -; Parse tokens - define them in order using the macro parse_tokdef +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Convert a token number back to its keyword. This will use the same table used by parse_wordtab. Enter with a character +; output routine pointer in U which takes the character in A. The routine can assume that Y is preserved. Will return +; with C set if the token does not exist in the word table and clear otherwise. +parse_wtdc pshs u ; save routine pointer + ldu #strbuff+20 ; point to temporary string buffer + clr ,-u ; put a NUL at the end of the string + ldx #parse_wt ; point to keyword parse table + bsr parse_wtdc2 ; call the tree walker function + bcc parse_wtdc1 ; brif we do have a match + puls u,pc ; clean stack and return +parse_wtdc0 jsr [,s] ; output the character +parse_wtdc1 lda ,u+ ; get output byte + bne parse_wtdc0 ; brif we're not at the end yet + clra ; make sure C is clear + puls u,pc ; clean stack and return +parse_wtdc2 pshs a,x ; save the token match value and the table pointer + ldd ,x++ ; get table length + addd 1,s ; calculate end address + std 1,s ; save it +parse_wtdc3 ldd ,x++ ; get this table entry + bmi parse_wtdc6 ; brif it's a backtracking entry - skip it + cmpa ,s ; does the token match here? + bne parse_wtdc5 ; brif not +parse_wtdc4 sta ,-y ; add the character to the output buffer + puls a,x,pc ; return up the call stack - C is clear from CMPA above +parse_wtdc5 cmpb #token_eot ; does this entry have a sub table? + bne parse_wtdc6 ; brif not + pshs a ; save the matched character + lda 1,s ; get back the token we need + bsr parse_wtdc2 ; go handle the sub table + puls a ; get back the matched character + bcc parse_wtdc6 ; brif it did match - record it and return +parse_wtdc6 cmpx 1,s ; are we at the end of this table? + bne parse_wtdc3 ; brif not - handle another table entry + coma ; make sure C is set for no match + puls a,x,pc ; clean up stack and return +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows: +; parse_tokdefT <sym>,<parse>,<list>,<exec> +; where: +; T: c for command, f for function, p for particle +; <sym>: the symbol name without the "token_" prefix +; <parse>: parse handler for the type, ignored for particles +; <list>: list handler for the type, ingored for particles +; <exec>: execution handler for the type, ignored for particles *pragmapush list *pragma nolist -parse_toknum set 0 -parse_tokdef macro noexpand -\1 equ parse_toknum -parse_toknum set parse_toknum+1 - fdb \2 +__toknump set 0 +__toknumc set 0x40 +__toknumf set 0xc0 + setstr __cmdparset="" + setstr __cmdlistt="" + setstr __cmdexect="" + setstr __fnparset="" + setstr __fnlistt="" + setstr __fnexect="" +parse_tokendefp macro noexpand +token_\1 equ __toknump +__toknump set __toknump+1 + endm +parse_tokendefc macro noexpand +token_\1 equ __toknumc +__toknumc set __toknumc+1 + ifstr ne,"{2}","" + setstr __cmdparset="%(__cmdparset)\tfcb\ttoken_\1\n\tfdb {2}\n" + endc + ifstr ne,"{3}","" + setstr __cmdlistt="%(__cmdlistt)\tfcb\ttoken_\1\n\tfdb {3}\n" + endc + ifstr ne,"{4}","" + setstr __cmdexect="%(__cmdexect)\tfdb {3}\n" + else + setstr __cmdexect="%(__cmdexect)\tfdb SNERROR\n" + endc + endm +parse_tokendeff macro noexpand +token_\1 equ __toknumf +__toknumf set __toknumf+1 + ifstr ne,"{2}","" + setstr __fnparset="%(__fnparset)\tfcb\ttoken_\1\n\tfdb {2}\n" + endc + ifstr ne,"{3}","" + setstr __fnlistt="%(__fnlistt)\tfcb\ttoken_\1\n\tfdb {3}\n" + endc + ifstr ne,"{4}","" + setstr __fnexect="%(__fnexect)\tfdb {3}\n" + else + setstr __fnexect="%(__fnexect)\tfdb SNERROR\n" + endc + endm +token_cmdparse macro + *pragmapush nolist + *pragma nolist + includestr "%(__cmdparset)" + *pragmapop nolist + endm +token_cmdlist macro + *pragmapush nolist + *pragma nolist + includestr "%(__cmdlistt)" + *pragmapop nolist + endm +token_cmdexec macro + *pragmapush nolist + *pragma nolist + includestr "%(__cmdexect)" +token__maxcmd equ __toknumc-1 + *pragmapop nolist + endm +token_fnparse macro + *pragmapush nolist + *pragma nolist + includestr "%(__fnparset)" + *pragmapop nolist + endm +token_fnlist macro + *pragmapush nolist + *pragma nolist + includestr "%(__fnlistt)" + *pragmapop nolist + endm +token_fnexec macro + *pragmapush nolist + *pragma nolist + includestr "%(__fnexect)" +token__maxfn equ __toknumf-1 + *pragmapop nolist endm *pragmapop list -parse_stmtjump parse_tokdef token_error,parse_tokerr - parse_tokdef token_eot,parse_noop - parse_tokdef token_lt,parse_noop - parse_tokdef token_le,parse_noop - parse_tokdef token_gt,parse_noop - parse_tokdef token_ge,parse_noop - parse_tokdef token_eq,parse_noop - parse_tokdef token_ne,parse_noop - parse_tokdef token_reltrue,parse_noop // always true relational operator - parse_tokdef token_stmtsep,parse_noop - parse_tokdef token_apos,parse_rem - parse_tokdef token_special,parse_noop - parse_tokdef token_bang,parse_noop - parse_tokdef token_hash,parse_noop - parse_tokdef token_dollar,parse_noop - parse_tokdef token_percent,parse_noop - parse_tokdef token_amp,parse_noop - parse_tokdef token_oparen,parse_noop - parse_tokdef token_cparen,parse_noop - parse_tokdef token_star,parse_noop - parse_tokdef token_plus,parse_noop - parse_tokdef token_comma,parse_noop - parse_tokdef token_minus,parse_noop - parse_tokdef token_slash,parse_noop - parse_tokdef token_semi,parse_noop - parse_tokdef token_at,parse_noop - parse_tokdef token_exp,parse_noop - parse_tokdef token_ident,parse_noop - parse_tokdef token_rem,parse_noop - parse_tokdef token_return,parse_noop - parse_tokdef token_run,parse_noop - parse_tokdef token_data,parse_noop - parse_tokdef token_else,parse_noop - parse_tokdef token_end,parse_noop - parse_tokdef token_stop,parse_noop - parse_tokdef token_sub,parse_noop - parse_tokdef token_let,parse_noop - parse_tokdef token_list,parse_noop - parse_tokdef token_new,parse_noop - parse_tokdef token_not,parse_noop - parse_tokdef token_print,parse_noop - parse_tokdef token_pop,parse_noop - parse_tokdef token_to,parse_noop - parse_tokdef token_and,parse_noop - parse_tokdef token_or,parse_noop - parse_tokdef token_go,parse_noop - parse_tokdef token_as,parse_noop - parse_tokdef token_asc,parse_noop -parse_rem rts + parse_tokendefp error ; Used to mark errors; should always be first so it's token #0 + parse_tokendefp eot ; End of input marker or special handling in word tables + parse_tokendefp stmtsep ; statement separator + parse_tokendefp times ; times (multiplication) operator (*) + parse_tokendefp plus ; addition operator + parse_tokendefp divide ; division operator (/) + parse_tokendefp minus ; subtraction operator + parse_tokendefp exp ; exponentiation operator (^) + parse_tokendefp lt ; less than operator + parse_tokendefp le ; less than or equal operateor + parse_tokendefp gt ; greater than operator + parse_tokendefp ge ; greater than or equal operator + parse_tokendefp eq ; equality operator + parse_tokendefp ne ; inequality operator + parse_tokendefp not ; boolean NOT operator + parse_tokendefp and ; boolean AND operator + parse_tokendefp or ; boolean OR operator + parse_tokendefp bang ; exclamation mark + parse_tokendefp hash ; number sign + parse_tokendefp dollar ; dollar sign (string sigil) + parse_tokendefp percent ; percent sign (integer sigil) + parse_tokendefp amp ; ampersand + parse_tokendefp oparen ; opening paren + parse_tokendefp cparen ; closing paren + parse_tokendefp sep ; comma (separator) + parse_tokendefp semi ; semicolon + parse_tokendefp at ; @ symbol + parse_tokendefp ident ; identifier (has special parsing) + parse_tokendefp else ; ELSE + parse_tokendefp then ; THEN + parse_tokendefp to ; TO + parse_tokendefp sub ; SUB + parse_tokendefp as ; AS + parse_tokendefc remabbr,parse_noop,list_noop,exec_noop ; abbreviated REM (') + parse_tokendefc rem,parse_noop,list_noop,exec_noop ; REM + parse_tokendefc return,parse_noop,parse_noop,parse_noop ; RETURN + parse_tokendefc run,parse_noop,parse_noop,parse_noop ; RUN + parse_tokendefc data,parse_noop,parse_noop,parse_noop ; DATA + parse_tokendefc end,parse_noop,parse_noop,parse_noop ; END + parse_tokendefc stop,parse_noop,parse_noop,parse_noop ; STOP + parse_tokendefc let,parse_noop,parse_noop,parse_noop ; LET + parse_tokendefc list,parse_noop,parse_noop,parse_noop ; LIST + parse_tokendefc new,parse_noop,parse_noop,parse_noop ; NEW + parse_tokendefc print,parse_noop,parse_noop,parse_noop ; PRINT + parse_tokendefc pop,parse_noop,parse_noop,parse_noop ; POP + parse_tokendefc goto,parse_noop,parse_noop,parse_noop ; GOTO + parse_tokendefc gosub,parse_noop,parse_noop,parse_noop ; GOSUB + parse_tokendefc go,parse_noop,parse_noop,parse_noop ; GO + + parse_tokendeff asc,parse_noop,parse_noop,parse_noop ; ASC() +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Parse handling tables +parsetab_cmd token_cmdparse +parsetab_cmde +parsetab_fn token_fnparse +parsetab_fne +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; List handling tables +listtab_cmd token_cmdlist +listtab_cmde +listtab_fn token_fnlist +listtab_fne +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Execution handling tables +exectab_cmd token_cmdexec +exectab_fn token_fnexec *pragmapop list