LWOS: src/parse.s comparison

comparison src/parse.s @ 126:ac183a519439

Update parsing scheme with a keyword lookup by token value and other framework Add ability to turn a token code into a keyword string. Also correct some details related to token table generation with some additiona adjustments for token symbols. Also rework token symbol definitions and creation of some parsing tables as well as the main statement parsing loop.

author	William Astle <lost@l-w.ca>
date	Mon, 08 Jan 2024 22:58:08 -0700
parents	0607e4e20702
children	527212870064

comparison

equal deleted inserted replaced

-:0607e4e20702
+:ac183a519439
 bcc parse0                      ; brif we succeeded in parsing a token
 parse_error     puls u                          ; restore original free memory location - deallocate any encoding
 stu freestart
 ldu parse_tokenst               ; get start location we started parsing the token at
 rts                             ; return error condition
-parse0          ldx #parse_stmtjump             ; point to jump table for token type handler
+parse0          ldx #parsetab_cmd               ; point to jump table for token type handler
-abx                             ; offset to handler address
+parse1          cmpb ,x                         ; did we match a valid command token?
-abx
+beq parse3                      ; brif so
-jsr [,x]                        ; call handler
+leax 3,x                        ; move to next entry
-bcs parse_error                 ; brif handler flagged error
+cmpx #parsetab_cmde             ; end of table?
+blo parse1                      ; brif not
+parse2          ldb #err_sn                     ; flag syntax error
+bra parse_error                 ; and return the error
+parse3          jsr [1,x]                       ; call the handler
+bcs parse_error                 ; brif the handler indicated error
 jsr parse_curtoken              ; get the token we terminated on
 cmpb #token_eot                 ; end of input?
-bne parse1                      ; brif not
+bne parse4                      ; brif not
 ldb #bc_eol                     ; stash an end of line op
 bsr parse_write
 bcs parse_error                 ; brif we errored out writing to the result (OM?)
 tfr u,d                         ; calculate the length of the result
 subd ,s
 puls u,pc                       ; get pointer to start of encoded result and return (C is already clear)
-parse1          cmpb #token_stmtsep             ; statement separator?
+parse4          cmpb #token_stmtsep             ; statement separator?
 beq parse_nextstmt              ; brif so - do another statement
-cmpb #token_apos                ; ' token?
+cmpb #token_remabbr             ; ' token?
 beq parse0                      ; brif so - parse it as a new statement
-comb                            ; set C for error
+bra parse2                      ; raise a syntax error
-ldb #err_sn                     ; raise syntax error
-bra parse_error
 parse_write     lda parse_noout                 ; are we doing output?
 beq parse_write0                ; brif so
 leau 1,u                        ; just count up the output and don't do anything
 rts
 parse_write0    leax -stackheadroom,s           ; calculate bottom of stack with headroom
 bhs parse_write1                ; brif not - we're good
 ldb #err_om                     ; raise out of memory error, C already set from comparison
 rts
 parse_write1    stb ,u+                         ; save output byte
 stu freestart                   ; save new to of used memory
+list_noop
 parse_noop      rts                             ; return all clear - C clear from comparison above
 parse_curtoken  ldb parse_curtok                ; fetch token code of current token
 rts
 parse_tokerr    comb                            ; flag error - unexpected token
 ldb #err_sn                     ; raise syntax error
 blo parse_toupper0              ; brif not
 cmpa #'z                        ; is it still lower case alpha?
 bhi parse_toupper0              ; brif not
 suba #0x20                      ; adjust to upper case alpha
 parse_toupper0  rts                             ; Z only set here if input was zero entering from parse_nextcharu
+parse_number    jmp parse_tokerr
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows:
 ;
 ; * two bytes which contain the length of the table less the two bytes for this length value
 ; * a sequence of entries consisting of a single byte matching character and a token code followed
 ;   by an optional sub table, structured exactly the same way.
 bne parse_wordtab4              ; brif not
 cmpb #token_eot                 ; is it indicating a sub table?
 bne parse_wordtab6              ; brif not
 bsr parse_nextcharu             ; fetch next input character (for sub table match)
 bne parse_wordtab0              ; brif we are going to check the sub table
-parse_wordtab2  ldd ,x++                        ; fetch length of sub table
+parse_wordtab2  ldd ,x                          ; fetch length of sub table
 leax d,x                        ; move past sub table
 parse_wordtab3  lda ,s                          ; get back input character
 cmpx 1,s                        ; are we at the end of the table?
 blo parse_wordtab1              ; brif not - check another entry
 comb                            ; indicate no match
 beq parse_wordtab2              ; brif so - skip sub table
 bra parse_wordtab3              ; otherwise just move to the next entry
 parse_wordtab5  leay a,y                        ; move back the specified number of characters
 parse_wordtab6  clra                            ; clear C to indicate a match
 puls a,x,pc                     ; clean up stack and return
-parse_number    jmp parse_tokerr
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Parse tokens - define them in order using the macro parse_tokdef
+; Convert a token number back to its keyword. This will use the same table used by parse_wordtab. Enter with a character
+; output routine pointer in U which takes the character in A. The routine can assume that Y is preserved. Will return
+; with C set if the token does not exist in the word table and clear otherwise.
+parse_wtdc      pshs u                          ; save routine pointer
+ldu #strbuff+20                 ; point to temporary string buffer
+clr ,-u                         ; put a NUL at the end of the string
+ldx #parse_wt                   ; point to keyword parse table
+bsr parse_wtdc2                 ; call the tree walker function
+bcc parse_wtdc1                 ; brif we do have a match
+puls u,pc                       ; clean stack and return
+parse_wtdc0     jsr [,s]                        ; output the character
+parse_wtdc1     lda ,u+                         ; get output byte
+bne parse_wtdc0                 ; brif we're not at the end yet
+clra                            ; make sure C is clear
+puls u,pc                       ; clean stack and return
+parse_wtdc2     pshs a,x                        ; save the token match value and the table pointer
+ldd ,x++                        ; get table length
+addd 1,s                        ; calculate end address
+std 1,s                         ; save it
+parse_wtdc3     ldd ,x++                        ; get this table entry
+bmi parse_wtdc6                 ; brif it's a backtracking entry - skip it
+cmpa ,s                         ; does the token match here?
+bne parse_wtdc5                 ; brif not
+parse_wtdc4     sta ,-y                         ; add the character to the output buffer
+puls a,x,pc                     ; return up the call stack - C is clear from CMPA above
+parse_wtdc5     cmpb #token_eot                 ; does this entry have a sub table?
+bne parse_wtdc6                 ; brif not
+pshs a                          ; save the matched character
+lda 1,s                         ; get back the token we need
+bsr parse_wtdc2                 ; go handle the sub table
+puls a                          ; get back the matched character
+bcc parse_wtdc6                 ; brif it did match - record it and return
+parse_wtdc6     cmpx 1,s                        ; are we at the end of this table?
+bne parse_wtdc3                 ; brif not - handle another table entry
+coma                            ; make sure C is set for no match
+puls a,x,pc                     ; clean up stack and return
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows:
+;               parse_tokdefT <sym>,<parse>,<list>,<exec>
+; where:
+; T: c for command, f for function, p for particle
+; <sym>: the symbol name without the "token_" prefix
+; <parse>: parse handler for the type, ignored for particles
+; <list>: list handler for the type, ingored for particles
+; <exec>: execution handler for the type, ignored for particles
 *pragmapush list
 *pragma nolist
-parse_toknum    set 0
+__toknump       set 0
-parse_tokdef    macro noexpand
+__toknumc       set 0x40
-\1              equ parse_toknum
+__toknumf       set 0xc0
-parse_toknum    set parse_toknum+1
+setstr __cmdparset=""
-fdb \2
+setstr __cmdlistt=""
+setstr __cmdexect=""
+setstr __fnparset=""
+setstr __fnlistt=""
+setstr __fnexect=""
+parse_tokendefp macro noexpand
+token_\1        equ __toknump
+__toknump       set __toknump+1
+endm
+parse_tokendefc macro noexpand
+token_\1        equ __toknumc
+__toknumc       set __toknumc+1
+ifstr ne,"{2}",""
+setstr __cmdparset="%(__cmdparset)\tfcb\ttoken_\1\n\tfdb {2}\n"
+endc
+ifstr ne,"{3}",""
+setstr __cmdlistt="%(__cmdlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
+endc
+ifstr ne,"{4}",""
+setstr __cmdexect="%(__cmdexect)\tfdb {3}\n"
+else
+setstr __cmdexect="%(__cmdexect)\tfdb SNERROR\n"
+endc
+endm
+parse_tokendeff macro noexpand
+token_\1        equ __toknumf
+__toknumf       set __toknumf+1
+ifstr ne,"{2}",""
+setstr __fnparset="%(__fnparset)\tfcb\ttoken_\1\n\tfdb {2}\n"
+endc
+ifstr ne,"{3}",""
+setstr __fnlistt="%(__fnlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
+endc
+ifstr ne,"{4}",""
+setstr __fnexect="%(__fnexect)\tfdb {3}\n"
+else
+setstr __fnexect="%(__fnexect)\tfdb SNERROR\n"
+endc
+endm
+token_cmdparse  macro
+*pragmapush nolist
+*pragma nolist
+includestr "%(__cmdparset)"
+*pragmapop nolist
+endm
+token_cmdlist   macro
+*pragmapush nolist
+*pragma nolist
+includestr "%(__cmdlistt)"
+*pragmapop nolist
+endm
+token_cmdexec   macro
+*pragmapush nolist
+*pragma nolist
+includestr "%(__cmdexect)"
+token__maxcmd   equ __toknumc-1
+*pragmapop nolist
+endm
+token_fnparse   macro
+*pragmapush nolist
+*pragma nolist
+includestr "%(__fnparset)"
+*pragmapop nolist
+endm
+token_fnlist    macro
+*pragmapush nolist
+*pragma nolist
+includestr "%(__fnlistt)"
+*pragmapop nolist
+endm
+token_fnexec    macro
+*pragmapush nolist
+*pragma nolist
+includestr "%(__fnexect)"
+token__maxfn    equ __toknumf-1
+*pragmapop nolist
 endm
 *pragmapop list
-parse_stmtjump  parse_tokdef token_error,parse_tokerr
+parse_tokendefp error           ; Used to mark errors; should always be first so it's token #0
-parse_tokdef token_eot,parse_noop
+parse_tokendefp eot             ; End of input marker or special handling in word tables
-parse_tokdef token_lt,parse_noop
+parse_tokendefp stmtsep         ; statement separator
-parse_tokdef token_le,parse_noop
+parse_tokendefp times           ; times (multiplication) operator (*)
-parse_tokdef token_gt,parse_noop
+parse_tokendefp plus            ; addition operator
-parse_tokdef token_ge,parse_noop
+parse_tokendefp divide          ; division operator (/)
-parse_tokdef token_eq,parse_noop
+parse_tokendefp minus           ; subtraction operator
-parse_tokdef token_ne,parse_noop
+parse_tokendefp exp             ; exponentiation operator (^)
-parse_tokdef token_reltrue,parse_noop // always true relational operator
+parse_tokendefp lt              ; less than operator
-parse_tokdef token_stmtsep,parse_noop
+parse_tokendefp le              ; less than or equal operateor
-parse_tokdef token_apos,parse_rem
+parse_tokendefp gt              ; greater than operator
-parse_tokdef token_special,parse_noop
+parse_tokendefp ge              ; greater than or equal operator
-parse_tokdef token_bang,parse_noop
+parse_tokendefp eq              ; equality operator
-parse_tokdef token_hash,parse_noop
+parse_tokendefp ne              ; inequality operator
-parse_tokdef token_dollar,parse_noop
+parse_tokendefp not             ; boolean NOT operator
-parse_tokdef token_percent,parse_noop
+parse_tokendefp and             ; boolean AND operator
-parse_tokdef token_amp,parse_noop
+parse_tokendefp or              ; boolean OR operator
-parse_tokdef token_oparen,parse_noop
+parse_tokendefp bang            ; exclamation mark
-parse_tokdef token_cparen,parse_noop
+parse_tokendefp hash            ; number sign
-parse_tokdef token_star,parse_noop
+parse_tokendefp dollar          ; dollar sign (string sigil)
-parse_tokdef token_plus,parse_noop
+parse_tokendefp percent         ; percent sign (integer sigil)
-parse_tokdef token_comma,parse_noop
+parse_tokendefp amp             ; ampersand
-parse_tokdef token_minus,parse_noop
+parse_tokendefp oparen          ; opening paren
-parse_tokdef token_slash,parse_noop
+parse_tokendefp cparen          ; closing paren
-parse_tokdef token_semi,parse_noop
+parse_tokendefp sep             ; comma (separator)
-parse_tokdef token_at,parse_noop
+parse_tokendefp semi            ; semicolon
-parse_tokdef token_exp,parse_noop
+parse_tokendefp at              ; @ symbol
-parse_tokdef token_ident,parse_noop
+parse_tokendefp ident           ; identifier (has special parsing)
-parse_tokdef token_rem,parse_noop
+parse_tokendefp else            ; ELSE
-parse_tokdef token_return,parse_noop
+parse_tokendefp then            ; THEN
-parse_tokdef token_run,parse_noop
+parse_tokendefp to              ; TO
-parse_tokdef token_data,parse_noop
+parse_tokendefp sub             ; SUB
-parse_tokdef token_else,parse_noop
+parse_tokendefp as              ; AS
-parse_tokdef token_end,parse_noop
-parse_tokdef token_stop,parse_noop
-parse_tokdef token_sub,parse_noop
-parse_tokdef token_let,parse_noop
-parse_tokdef token_list,parse_noop
-parse_tokdef token_new,parse_noop
-parse_tokdef token_not,parse_noop
-parse_tokdef token_print,parse_noop
-parse_tokdef token_pop,parse_noop
-parse_tokdef token_to,parse_noop
-parse_tokdef token_and,parse_noop
-parse_tokdef token_or,parse_noop
-parse_tokdef token_go,parse_noop
-parse_tokdef token_as,parse_noop
-parse_tokdef token_asc,parse_noop
-parse_rem       rts
+parse_tokendefc remabbr,parse_noop,list_noop,exec_noop          ; abbreviated REM (')
+parse_tokendefc rem,parse_noop,list_noop,exec_noop              ; REM
+parse_tokendefc return,parse_noop,parse_noop,parse_noop         ; RETURN
+parse_tokendefc run,parse_noop,parse_noop,parse_noop            ; RUN
+parse_tokendefc data,parse_noop,parse_noop,parse_noop           ; DATA
+parse_tokendefc end,parse_noop,parse_noop,parse_noop            ; END
+parse_tokendefc stop,parse_noop,parse_noop,parse_noop           ; STOP
+parse_tokendefc let,parse_noop,parse_noop,parse_noop            ; LET
+parse_tokendefc list,parse_noop,parse_noop,parse_noop           ; LIST
+parse_tokendefc new,parse_noop,parse_noop,parse_noop            ; NEW
+parse_tokendefc print,parse_noop,parse_noop,parse_noop          ; PRINT
+parse_tokendefc pop,parse_noop,parse_noop,parse_noop            ; POP
+parse_tokendefc goto,parse_noop,parse_noop,parse_noop           ; GOTO
+parse_tokendefc gosub,parse_noop,parse_noop,parse_noop          ; GOSUB
+parse_tokendefc go,parse_noop,parse_noop,parse_noop             ; GO
+parse_tokendeff asc,parse_noop,parse_noop,parse_noop            ; ASC()
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Parse handling tables
+parsetab_cmd    token_cmdparse
+parsetab_cmde
+parsetab_fn     token_fnparse
+parsetab_fne
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; List handling tables
+listtab_cmd     token_cmdlist
+listtab_cmde
+listtab_fn      token_fnlist
+listtab_fne
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Execution handling tables
+exectab_cmd     token_cmdexec
+exectab_fn      token_fnexec
 *pragmapop list

Mercurial > hg > index.cgi

comparison src/parse.s @ 126:ac183a519439