LWOS: src/parse.s comparison

comparison src/parse.s @ 139:5d4801c0566d

Get things building again with the updated tokenization scheme

author	William Astle <lost@l-w.ca>
date	Mon, 15 Jul 2024 23:26:15 -0600
parents	917b4893bb3d
children	86f6f3a71e60

comparison

equal deleted inserted replaced

-:4983ba49f936
+:5d4801c0566d
 *pragmapush list
 *pragma list
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; This is the overall parsing package. This is responsible for converting program text into the internal byte code and
+; This is the overall parsing package. It is responsible for converting the input source code into the internal byte
-; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated
+; code.
-; code analysis. In almost all cases, the returned error will be a syntax error. The internal byte code shares the same
+;
-; token number allocations as the parser. Some allocated tokens cannot be identified by the lexer (parse_nexttok) but
+; This version only converts keywords to token codes. Additional conversions will be done in future versions.
-; are used at runtime and when "decompiling" to text.
+;
-;
+; Enter with X pointing to the text to parse. The encoded result will be placed freestart. On return, X will point to
-; In the event of a parse error, everything up to the next end of statement is retained as is using a special token
+; the encoded result and D will contain the length in bytes of the result, and C will be clear.
-; that preserves the unparsable text and parsing resumes. Only the first error is referenced by the return error
+;
-; pointer.
+; In the event that there is insufficient memory between freestart and the bottom of the stack, C will be set. This
-;
+; routine does not immediately throw an "out of memory" error to allow the caller to clear up some memory and try
-; This is a recursive descent parser.
+; again.
 ;
-; Entry:
+; Enter at parseto with U set to the encoding destination and Y set to one byte past the end of the destination buffer
-; X             Points to the text to encode
+; to specify the destination. Defaults to encoding to the buffer between freestart and the bottom of the stack (with
-; B             Nonzero to prevent generating any output (error check/length calculation only)
+; headroom accounted for).
 ;
-; Exit:
+; The stuff below that has hard coded colon checks will eventually be replaced by more complete parsing.
-; X             Points to the encoded line
+parse           ldu freestart                   ; default to the start of free memory for encoding
-; D             Length of the encoded line
+leay -stackheadroom,s           ; set the top of free memory
-; CC.C          clear
+parseto         lda #1                          ; flag to enable memory limit detection
+pshs a,u,y                      ; save start and end addresses and OM error detection flag
-; Error Exit:
+leay ,x                         ; put the input pointer somewhere less useful
-; X             Points to the encoded line
+parsea          jsr parse_curchar               ; fetch an input character
-; D             Length of the encoded line
+bne parseb                      ; brif not end of input
-; Y             Pointer to the first error location in the input
+parsez          tfr u,d                         ; get current output pointer
-; U             Error code
+subd 3,s                        ; now D is the length
-; CC.C          set
+leas 5,s                        ; clean up the stack
-;
+rts                             ; return - C will be clear from subd above
-; This is the error handler. It is responsible for resetting the stack to bail out to the top level
+parseb          jsr parse_wordtab               ; look up a keyword and see if we have a match
-; parsing loop. It must also store the input pointer if this is the first error. Finally, it has to
+bcs parsec                      ; brif no match - handle unknown stuff
-; output all the text up to either the end of the line *or* the next valid statement separator.
+tsta                            ; do we have a two byte token?
-parse_errorsn   ldb #err_sn
+bne parseq                      ; brif so - just stash it
-parse_error     lds parse_stackptr              ; restore the original stack pointer so we can call from down stack
+cmpb #token_else                ; ELSE?
-puls u                          ; get back original free pointer
+beq parsed                      ; brif so - gets a hidden statement separator
-stu freestart                   ; deallocate any allocated result
+cmpb #token_remabbr             ; REM abbreviation?
-ldu parse_tokenst               ; get start location of the token where the error was raised
+bne parsee                      ; brif not
-coma                            ; make sure C is set for error
+parsed          lda #':                         ; add a statement separator before it
-rts
+parseq          bsr parseoutw                   ; output a word
-parse           stb parse_noout                 ; save no-output flag
+bra parsef
-leay ,x                         ; save input pointer in a less useful register
+parsee          bsr parseout                    ; output the token code
-ldu freestart                   ; point to start of free memory where we will build the output
+parsef          cmpb #token_remabbr             ; REM abbreviation?
-pshs u                          ; save original free memory location
+beq parseg                      ; brif so
-sts parse_stackptr              ; save the stack pointer for bailing out on errors
+cmpb #token_rem                 ; Actual REM?
-parse_nextstmt  jsr parse_nexttok               ; fetch the next token, return type in D
+bne parseh                      ; brif not
-bcs parse_error                 ; brif we failed at parsing a token
+parseg          ldb ,y+                         ; get current input character
-parse0          ldx #parsetab_cmd               ; point to jump table for token type handler
+beq parsez                      ; brif end of input
-cmpb #token_stmtsep             ; is it a statement separator?
+bsr parseout                    ; add unmodified characters to output
-beq parse_nextstmt              ; brif so - we can just skip it
+bra parseg                      ; keep going until end of input
-parse1          cmpb ,x                         ; did we match a valid command token?
+parseh          cmpb #token_data                ; DATA command?
-beq parse3                      ; brif so
+bne parsea                      ; brif not - continue normal handling
-leax 3,x                        ; move to next entry
+clra                            ; flag for not skipping quoted string
-cmpx #parsetab_cmde             ; end of table?
+parsei          ldb ,y+                         ; get input character
-blo parse1                      ; brif not
+beq parsez                      ; brif end of input
-bra parse_errorsn               ; fell off the end
+cmpb #'"                        ; string delimiter?
-parse3          jsr [1,x]                       ; call the handler
+bne parsej                      ; brif not
-bcs parse_error                 ; brif the handler indicated error
+coma                            ; flip the quoted statement handler
-bsr parse_curtoken              ; fetch the token we left off on
+parsej          cmpb #':                        ; end of statement?
-cmpb #token_eot                 ; end of input?
+bne parsek                      ; brif not
-bne parse4                      ; brif not
+tsta                            ; are we skipping them?
-ldb #bc_eol                     ; stash an end of line op
+bne parsek                      ; brif so
-bsr parse_write
+leay -1,y                       ; unconsume it
-bcs parse_error                 ; brif we errored out writing to the result (OM?)
+bra parsea                      ; we're done with DATA
-tfr u,d                         ; calculate the length of the result
+parsek          bsr parseout                    ; put the data value into the output
-subd ,s
+bra parsei                      ; go handle another character
-puls u,pc                       ; get pointer to start of encoded result and return (C is already clear)
+parsec          cmpb #'"                        ; did we encounter a quoted string?
-parse4          cmpb #token_stmtsep             ; statement separator?
+bne parsel                      ; brif not
-beq parse_nextstmt              ; brif so - do another statement
+bsr parseout                    ; output delimiter
-cmpb #token_remabbr             ; ' token?
+parsem          ldb ,y+                         ; get string character
-beq parse0                      ; brif so - parse it as a new statement
+beq parsez                      ; brif end of input
-bra parse_errorsn               ; raise a syntax error
+bsr parseout                    ; output it
-parse_write     lda parse_noout                 ; are we doing output?
+cmpb #'"                        ; end delimiter?
-beq parse_write0                ; brif so
+bne parsem                      ; brif not - keep looking
-leau 1,u                        ; just count up the output and don't do anything
+bra parsea                      ; go handle more stuff
-rts
+parsep          cmpb #'0                        ; is it a digit?
-parse_write0    leax -stackheadroom,s           ; calculate bottom of stack with headroom
+blo parsen                      ; brif not
-cmpx freestart                  ; did the stack run into the end of the output?
+cmpb #'9                        ; is it still a digit?
-bhs parse_write1                ; brif not - we're good
+bls parseo                      ; brif so
-ldb #err_om                     ; raise out of memory error, C already set from comparison
+parsel          cmpb #'A                        ; is it a letter?
-rts
+blo parsen                      ; brif not
-parse_write1    stb ,u+                         ; save output byte
+cmpb #'Z                        ; is it still a letter (UC)?
-stu freestart                   ; save new to of used memory
+bls parseo                      ; brif so
-list_noop
+cmpb #'a                        ; is it a lower case letter?
-parse_noop      rts                             ; return all clear - C clear from comparison above
+blo parsen                      ; brif not
-parse_curtoken  ldb parse_curtok                ; fetch token code of current token
+cmpb #'z                        ; is it still a lower case letter?
-rts
+bhi parsen                      ; brif not
-parse_tokerr    comb                            ; flag error - unexpected token
+parseo          bsr parseout                    ; stash the character
-ldb #err_sn                     ; raise syntax error
+ldb ,y+                         ; fetch next input
+beq parsez                      ; brif end of input
+bra parsep                      ; go see if we're still in an identifier
+parsen          bsr parseout                    ; output unknown character (number, unknown token)
+jmp parsea                      ; go handle more
+parseoutw       exg a,b                         ; do MSB
+bsr parseout
+exg a,b                         ; and then LSB (fall through)
+parseout        tst 2,s                         ; need to test for OM?
+beq parseout0                   ; brif not
+cmpu 3,s                        ; did we run into the end of the buffer?
+blo parseout0                   ; brif not
+coma                            ; set C for error
+leas 7,s                        ; clean up stack
+rts                             ; return to original caller
+parseout0       stb ,u+                         ; stash in buffer
 rts
 parse_nextchar  lda ,y                          ; at end of input already?
 beq parse_curchar               ; brif so
 leay 1,y                        ; move to next input character
 parse_curchar   lda ,y                          ; fetch input character
 rts
-parse_nexttokc  bsr parse_nexttok               ; fetch next token
-parse_iseos     cmpb #token_eot                 ; end of text?
-beq parse_iseos0                ; brif so
-cmpb #token_stmtsep             ; is it a statement separator
-parse_iseos0    rts
-parse_nexttok   bsr parse_curchar               ; fetch current input
-beq parse_nexttok1              ; brif end of input
-parse_nexttok0  cmpa #0x20                      ; space?
-bne parse_nexttok2              ; brif not
-bsr parse_nextchar              ; eat the space
-bne parse_nexttok0              ; brif not end of input
-parse_nexttok1  ldb #token_eot                  ; flag end of input
-bra parse_nexttok6              ; go return it
-parse_nexttok2  sty parse_tokenst               ; save start of current token after skipping spaces
-bsr parse_toupper               ; make sure we have upper case letters for matching
-ldx #parse_wt                   ; point to keyword parsing table
-jsr parse_wordtab               ; go see if we have a match in the keyword table
-bcc parse_nexttok6              ; brif we do - return it
-ldy parse_tokenst               ; return to the start of the token - pointer probably clobbered
-bsr parse_curchar               ; get back input character (may have been clobbered)
-cmpa #'.                        ; leading decimal?
-beq parse_nexttok3              ; brif so - parse number
-cmpa #'0                        ; is it a digit
-blo parse_nexttok10             ; brif not
-cmpa #'9                        ; is it still a digit?
-bhi parse_nexttok10             ; brif not
-parse_nexttok3  jmp parse_number                ; go parse a number
-parse_nexttok6  stb parse_curtok                ; save token type
-leay 1,y                        ; eat the input character
-clra                            ; clear C to indicate no error (and clear Z also)
-rts
-parse_nexttok10 cmpa #'A                        ; is it alpha?
-blo parse_nexttok11             ; brif not
-cmpa #'Z                        ; is it still alpha?
-bls parse_nexttok12             ; brif so
-parse_nexttok11 comb                            ; flag error - unrecognized token
-ldb #token_error
-rts
-parse_nexttok12 bsr parse_nextcharu             ; fetch next input character
-cmpa #'0                        ; is it alphanumeric?
-blo parse_nexttok13             ; brif not
-cmpa #'9                        ; is it numeric?
-bls parse_nexttok12             ; brif so - keep skipping it
-cmpa #'A                        ; is it alpha?
-blo parse_nexttok13             ; brif not
-cmpa #'Z                        ; is it still alpha?
-bls parse_nexttok12             ; brif so - keep skipping it
-parse_nexttok13 tfr y,d                         ; calculate length of identifier
-subd parse_tokenst
-std val0+val.strlen             ; save it for reference
-ldb #token_ident                ; indicate an identifier (variable name, etc.)
-rts                             ; return result (C will be clear from SUBD above)
 parse_nextcharu bsr parse_nextchar              ; fetch next input character
 beq parse_toupper0              ; brif end of input
 parse_toupper   cmpa #'a                        ; is it lower case alpha?
 blo parse_toupper0              ; brif not
 cmpa #'z                        ; is it still lower case alpha?
 bhi parse_toupper0              ; brif not
 suba #0x20                      ; adjust to upper case alpha
 parse_toupper0  rts                             ; Z only set here if input was zero entering from parse_nextcharu
-parse_number    jmp parse_tokerr
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This routine parses tokens using the table at parse_wt. The table is structured as follows:
-; Parse a statement that consists of just the command token
-parse_cmdsingle equ parse_write                 ; just write the token out and bail
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Parse a REM or ' statement. We just copy the comment out after the REM or ' token.
-parse_rem       jsr parse_write                 ; write the token/character out
-ldb ,y+                         ; get next input character
-bne parse_rem                   ; brif not at the end of the input
-ldb #token_eot                  ; flag end of input for mainline parser
-stb parse_curtok
-rts                             ; return, pass back the C result from parse_write
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; This routine parses tokens using the table at parse_wordtab. The table is structured as follows:
 ;
 ; * two bytes which contain the length of the table less the two bytes for this length value
 ; * a sequence of entries consisting of a single byte matching character and a token code followed
-;   by an optional sub table, structured exactly the same way.
+;   by an optional sub table, structured exactly the same way. The token code is 2 bytes.
 ;
 ; The optional subtable will be present if the token code is token_eot
 ;
 ; If the character match is negative, it means a lookahead failed. The negative value is the number
 ; of characters to unget and the token code is the token value to return. No other entries after this
 ; in a table will be considered since thie negative match is a global match.
 ;
 ; When a token_eot match is found, if there are no further characters in the input, the match is
 ; determined to be invalid and processing continues with the next entry.
+parse_wordtab   ldx #parse_wt                   ; point to main lookup table
+skip2                           ; move on into the main routine
 parse_wordtab0  leas 3,s                        ; clean up stack for sub table handling
-parse_wordtab   pshs a,x                        ; save input character and start of table
+pshs a,x                        ; save input character and start of table
 ldd ,x++                        ; get length of this table
 addd 1,s                        ; calculate the address of the end of the table
 std 1,s                         ; save end address for comparison later
 lda ,s                          ; get back input character
-parse_wordtab1  ldb 1,x                         ; fetch token code for this entry
+parse_wordtab1  leax 3,x                        ; move past this entry - this order to avoid Z effects from leax
-cmpa ,x++                       ; does this entry match?
+cmpa -3,x                       ; does this entry match?
 bne parse_wordtab4              ; brif not
-cmpb #token_eot                 ; is it indicating a sub table?
+ldd -2,x                        ; get the matched token code
+cmpd #tokenf_eot                ; is it indicating a sub table?
 bne parse_wordtab6              ; brif not
 jsr parse_nextcharu             ; fetch next input character (for sub table match)
 bne parse_wordtab0              ; brif we are going to check the sub table
 parse_wordtab2  ldd ,x                          ; fetch length of sub table
 leax d,x                        ; move past sub table
 parse_wordtab3  lda ,s                          ; get back input character
 cmpx 1,s                        ; are we at the end of the table?
 blo parse_wordtab1              ; brif not - check another entry
 comb                            ; indicate no match
 puls a,x,pc                     ; clean up stack and return
-parse_wordtab4  lda -2,x                        ; get the match character
+parse_wordtab4  lda -3,x                        ; get the match character
 bmi parse_wordtab5              ; brif negative - lookahead fail
-cmpb #token_eot                 ; is there a sub table to skip?
+ldd -2,x                        ; get the token match
+cmpd #tokenf_eot                ; is there a sub table to skip?
 beq parse_wordtab2              ; brif so - skip sub table
 bra parse_wordtab3              ; otherwise just move to the next entry
 parse_wordtab5  leay a,y                        ; move back the specified number of characters
-parse_wordtab6  clra                            ; clear C to indicate a match
+ldd -2,x                        ; get the matched token
-puls a,x,pc                     ; clean up stack and return
+parse_wordtab6  sta ,s                          ; save MSB of match
+clra                            ; clear carry to indicate match
+puls a,x,pc                     ; clean up stack, restore return value and return
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Convert a token number back to its keyword. This will use the same table used by parse_wordtab. Enter with a character
 ; output routine pointer in U which takes the character in A. The routine can assume that Y is preserved. Will return
 ; with C set if the token does not exist in the word table and clear otherwise.
 parse_wtdc      pshs u                          ; save routine pointer
 parse_wtdc6     cmpx 1,s                        ; are we at the end of this table?
 bne parse_wtdc3                 ; brif not - handle another table entry
 coma                            ; make sure C is set for no match
 puls a,x,pc                     ; clean up stack and return
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Validate a line number. Must enter with the token type in B. Will return the line number in X. It will return a
+; Definition of tokens used in the interpreter.
-; syntax error if the line number is invalid or out of range. It will also consume a valid line number token.
+;
-parse_linenum   cmpb #token_int32               ; is it an integer?
+; Each token is defined as follows:
-beq parse_linenum1              ; brif so
+;               parse_tokdefT <sym>[,<handler>]
-parse_linenum0  ldb #err_sn                     ; flag syntax error
+; where T is one of:
-coma                            ; flag error
+; p: particle - utility tokens and definitions, starting at 0x00
-rts
+; c: command - a command keyword, starting at 0x80
-parse_linenum1  ldx val0+val.int                ; get high word of integer
+; f: function - a function keyword, start at 0x80 with a 0xFF prefix
-bne parse_linenum0              ; brif not a valid line number
+; n: token width specific number/code, but otherwise a particle; in this case, the code replaces <handler>
-ldx val0+val.int+2              ; get actual line number
+;
-pshs x                          ; save it
+; <sym> is the base symbol name (such as "then" or "eot")
-jsr parse_nexttok               ; consume line number
+; <handler> is the address of the execution handler routine of the natural token type (command or function)
-puls x,pc                       ; get back line number and return it
+;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; <handler> is optional for particles. If it is omitted for command or function tokens, it defaults to SNERROR.
-; Parse a line number range which is one of the following forms:
-; <linenum1>
-; <linenum1>-
-; <linenum1>-<linenum2>
-; -<linenum2>
-; The result will store two line numbers. If no - token appears, then both line numbers will be the same. Otherwise,
-; if <linenum1> is omitted, it will be assumed to be 0. If <linenum2> is omitted, it will be assumed to be 65535. Those
-; are the minimum and maximum line numbers.
-;
-; Parsing works by first looking for an integer token that is in range. If it finds one, it looks for an optional -
-; followed by an optional integer token that is in range. If the first token is not an integer, it must be a - which may
-; be optionally followed by another integer in range.
-;
-; It is technically valid to have a single - with no line numbers.
-;
-; Enter with the current token in B.
-;
-; The resulting line numbers will be returned in parse_buff
-parse_linerange ldx zero                        ; default start line number
-leau -1,x                       ; default end line number
-pshs x,u                        ; save the return range
-cmpb #token_minus               ; range with no start?
-beq parse_linerang1             ; brif so
-bsr parse_linenum               ; verify line number, return in X
-bcs parse_linerang4             ; bail out on error
-stx ,s                          ; save new start line number
-jsr parse_nexttokc              ; fetch next token, set Z if end of statement
-bne parse_linerang0             ; brif not end of line
-ldx ,s                          ; get end line to use as start line
-bra parse_linerang2             ; go set range end and return
-parse_linerang0 cmpb #token_minus               ; do we have a range character?
-bne parse_linerang3             ; brif not - we have an error
-parse_linerang1 jsr parse_nexttokc              ; parse what comes after the range mark
-beq parse_linerang2             ; brif end of statement - use the default range end
-bsr parse_linenum               ; make sure it's a valid line number
-bcs parse_linerang4             ; bail out on error
-parse_linerang2 stx 2,s                         ; set range end
-clra                            ; make sure C is clear
-puls x,u,pc                     ; fetch return values and return
-parse_linerang3 ldb #err_sn                     ; flag a syntax error
-coma                            ; make sure C is set
-parse_linerang4 puls x,u,pc                     ; clean up stack and return error condition
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows:
-;               parse_tokdefT <sym>,<parse>,<list>,<exec>
-; where:
-; T: c for command, f for function, p for particle
-; <sym>: the symbol name without the "token_" prefix
-; <parse>: parse handler for the type, ignored for particles
-; <list>: list handler for the type, ingored for particles
-; <exec>: execution handler for the type, ignored for particles
 *pragmapush list
 *pragma nolist
 __toknump       set 0
-__toknumc       set 0x40
+__toknumc       set 0x80
-__toknumf       set 0xc0
+__toknumf       set 0x80
-setstr __cmdparset=""
-setstr __cmdlistt=""
-setstr __cmdexect=""
-setstr __fnparset=""
-setstr __fnlistt=""
-setstr __fnexect=""
 parse_tokendefp macro noexpand
 token_\1        equ __toknump
+tokenf_\1       equ __toknump
 __toknump       set __toknump+1
 endm
+parse_tokendefv macro noexpand
+token_\1        equ \2
+tokenf_\1       equ \2
+endm
+setstr __cmdexect=""
+setstr __funcexect=""
 parse_tokendefc macro noexpand
 token_\1        equ __toknumc
+tokenf_\1       equ __toknumc
 __toknumc       set __toknumc+1
 ifstr ne,"{2}",""
-setstr __cmdparset="%(__cmdparset)\tfcb\ttoken_\1\n\tfdb {2}\n"
+setstr __cmdexect="%(__cmdexect)\tfdb {2}\n"
-endc
-ifstr ne,"{3}",""
-setstr __cmdlistt="%(__cmdlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
-endc
-ifstr ne,"{4}",""
-setstr __cmdexect="%(__cmdexect)\tfdb {3}\n"
 else
 setstr __cmdexect="%(__cmdexect)\tfdb SNERROR\n"
 endc
 endm
 parse_tokendeff macro noexpand
 token_\1        equ __toknumf
+tokenf_\1       equ 0xff00|__toknumf
 __toknumf       set __toknumf+1
 ifstr ne,"{2}",""
-setstr __fnparset="%(__fnparset)\tfcb\ttoken_\1\n\tfdb {2}\n"
+setstr __fnexect="%(__fnexect)\tfdb {2}\n"
-endc
-ifstr ne,"{3}",""
-setstr __fnlistt="%(__fnlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
-endc
-ifstr ne,"{4}",""
-setstr __fnexect="%(__fnexect)\tfdb {3}\n"
 else
 setstr __fnexect="%(__fnexect)\tfdb SNERROR\n"
 endc
-endm
-token_cmdparse  macro
-*pragmapush nolist
-*pragma nolist
-includestr "%(__cmdparset)"
-*pragmapop nolist
-endm
-token_cmdlist   macro
-*pragmapush nolist
-*pragma nolist
-includestr "%(__cmdlistt)"
-*pragmapop nolist
 endm
 token_cmdexec   macro
 *pragmapush nolist
 *pragma nolist
 includestr "%(__cmdexect)"
 token__maxcmd   equ __toknumc-1
 *pragmapop nolist
 endm
-token_fnparse   macro
-*pragmapush nolist
-*pragma nolist
-includestr "%(__fnparset)"
-*pragmapop nolist
-endm
-token_fnlist    macro
-*pragmapush nolist
-*pragma nolist
-includestr "%(__fnlistt)"
-*pragmapop nolist
-endm
 token_fnexec    macro
 *pragmapush nolist
 *pragma nolist
 includestr "%(__fnexect)"
 token__maxfn    equ __toknumf-1
 *pragmapop nolist
 endm
 *pragmapop list
-; the tokens defined in this section all have special parsing or meaning
+; special tokens
 parse_tokendefp error           ; Used to mark errors; should always be first so it's token #0
 parse_tokendefp eot             ; End of input marker or special handling in word tables
-parse_tokendefp int32           ; 32 bit integer (has special parsing)
+; command (and simple non-command keywords)
-parse_tokendefp float           ; floating point value (has special parsing)
+parse_tokendefc remabbr         ; abbreviated REM (')
-parse_tokendefp ident           ; identifier (has special parsing)
+parse_tokendefc rem             ; REM
-parse_tokendefp linenum         ; a 16 bit unsigned integer treated as a line number
+parse_tokendefc return          ; RETURN
-parse_tokendefp linerange       ; a pair of 16 bit unsigned integers treated as line numbers
+parse_tokendefc run             ; RUN
-; everything below here references keywords or particle characters
+parse_tokendefc data            ; DATA
-parse_tokendefp stmtsep         ; statement separator
+parse_tokendefc end             ; END
-parse_tokendefp times           ; times (multiplication) operator (*)
+parse_tokendefc stop            ; STOP
-parse_tokendefp plus            ; addition operator
+parse_tokendefc let             ; LET
-parse_tokendefp divide          ; division operator (/)
+parse_tokendefc list            ; LIST
-parse_tokendefp minus           ; subtraction operator
+parse_tokendefc new             ; NEW
-parse_tokendefp exp             ; exponentiation operator (^)
+parse_tokendefc print           ; PRINT
-parse_tokendefp lt              ; less than operator
+parse_tokendefc pop             ; POP
-parse_tokendefp le              ; less than or equal operateor
+parse_tokendefc goto            ; GOTO
-parse_tokendefp gt              ; greater than operator
+parse_tokendefc gosub           ; GOSUB
-parse_tokendefp ge              ; greater than or equal operator
+parse_tokendefc go              ; GO
-parse_tokendefp eq              ; equality operator
+parse_tokendefc times           ; times (multiplication) operator (*)
-parse_tokendefp ne              ; inequality operator
+parse_tokendefc plus            ; addition operator
-parse_tokendefp not             ; boolean NOT operator
+parse_tokendefc divide          ; division operator (/)
-parse_tokendefp and             ; boolean AND operator
+parse_tokendefc minus           ; subtraction operator
-parse_tokendefp or              ; boolean OR operator
+parse_tokendefc exp             ; exponentiation operator (^)
-parse_tokendefp bang            ; exclamation mark
+parse_tokendefc lt              ; less than operator
-parse_tokendefp hash            ; number sign
+parse_tokendefc le              ; less than or equal operateor
-parse_tokendefp dollar          ; dollar sign (string sigil)
+parse_tokendefc gt              ; greater than operator
-parse_tokendefp percent         ; percent sign (integer sigil)
+parse_tokendefc ge              ; greater than or equal operator
-parse_tokendefp amp             ; ampersand
+parse_tokendefc eq              ; equality operator
-parse_tokendefp oparen          ; opening paren
+parse_tokendefc ne              ; inequality operator
-parse_tokendefp cparen          ; closing paren
+parse_tokendefc not             ; boolean NOT operator
-parse_tokendefp sep             ; comma (separator)
+parse_tokendefc and             ; boolean AND operator
-parse_tokendefp semi            ; semicolon
+parse_tokendefc or              ; boolean OR operator
-parse_tokendefp at              ; @ symbol
+parse_tokendefc else            ; ELSE
-parse_tokendefp else            ; ELSE
+parse_tokendefc then            ; THEN
-parse_tokendefp then            ; THEN
+parse_tokendefc to              ; TO
-parse_tokendefp to              ; TO
+parse_tokendefc sub             ; SUB
-parse_tokendefp sub             ; SUB
+parse_tokendefc as              ; AS
-parse_tokendefp as              ; AS
+; secondary tokens (functions)
+parse_tokendeff asc             ; ASC()
-parse_tokendefc remabbr,parse_rem,list_noop,exec_noop           ; abbreviated REM (')
-parse_tokendefc rem,parse_rem,list_noop,exec_noop               ; REM
-parse_tokendefc return,parse_cmdsingle,parse_noop,parse_noop    ; RETURN
-parse_tokendefc run,parse_noop,parse_noop,parse_noop            ; RUN
-parse_tokendefc data,parse_noop,parse_noop,parse_noop           ; DATA
-parse_tokendefc end,parse_cmdsingle,parse_noop,parse_noop       ; END
-parse_tokendefc stop,parse_cmdsingle,parse_noop,parse_noop      ; STOP
-parse_tokendefc let,parse_noop,parse_noop,parse_noop            ; LET
-parse_tokendefc list,parse_noop,parse_noop,parse_noop           ; LIST
-parse_tokendefc new,parse_cmdsingle,parse_noop,parse_noop       ; NEW
-parse_tokendefc print,parse_noop,parse_noop,parse_noop          ; PRINT
-parse_tokendefc pop,parse_cmdsingle,parse_noop,parse_noop       ; POP
-parse_tokendefc goto,parse_noop,parse_noop,parse_noop           ; GOTO
-parse_tokendefc gosub,parse_noop,parse_noop,parse_noop          ; GOSUB
-parse_tokendefc go,parse_noop,parse_noop,parse_noop             ; GO
-parse_tokendeff asc,parse_noop,parse_noop,parse_noop            ; ASC()
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Parse handling tables
-parsetab_cmd    token_cmdparse
-parsetab_cmde
-parsetab_fn     token_fnparse
-parsetab_fne
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; List handling tables
-listtab_cmd     token_cmdlist
-listtab_cmde
-listtab_fn      token_fnlist
-listtab_fne
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Execution handling tables
 exectab_cmd     token_cmdexec
 exectab_fn      token_fnexec
 *pragmapop list

Mercurial > hg > index.cgi

comparison src/parse.s @ 139:5d4801c0566d