changeset 139:5d4801c0566d

Get things building again with the updated tokenization scheme
author William Astle <lost@l-w.ca>
date Mon, 15 Jul 2024 23:26:15 -0600
parents 4983ba49f936
children 86f6f3a71e60
files src/buildkeywordtab.c src/keywordlist.txt src/parse.s
diffstat 3 files changed, 219 insertions(+), 385 deletions(-) [+]
line wrap: on
line diff
--- a/src/buildkeywordtab.c	Sat Jul 13 17:33:01 2024 -0600
+++ b/src/buildkeywordtab.c	Mon Jul 15 23:26:15 2024 -0600
@@ -34,7 +34,7 @@
         // if there are child nodes, insert the sub tree
         if (tn1 -> firstchild)
         {
-            fprintf(fp, " fcb 0x%02x,token_eot\n", tn1 -> ccode);
+            fprintf(fp, " fcb 0x%02x\n fdb token_eot\n", tn1 -> ccode);
             if (tn1 -> toksym)
             {
                 print_tree(fp, tn1, tn1 -> toksym, 255);
@@ -54,13 +54,13 @@
         // if there is also a terminal symbol here
         if (tn1 -> toksym)
         {
-            fprintf(fp, " fcb 0x%02x,%s\n", tn1 -> ccode, tn1 -> toksym);
+            fprintf(fp, " fcb 0x%02x\n fdb %s\n", tn1 -> ccode, tn1 -> toksym);
         }
     }
     // handle lookahead failure
     if (lookahead)
     {
-        fprintf(fp, " fcb 0x%02x,%s\n", lookaheaddepth, lookahead);
+        fprintf(fp, " fcb 0x%02x\n fdb %s\n", lookaheaddepth, lookahead);
     }
     
     fprintf(fp, "parse_wt%de\n", depth);
--- a/src/keywordlist.txt	Sat Jul 13 17:33:01 2024 -0600
+++ b/src/keywordlist.txt	Mon Jul 15 23:26:15 2024 -0600
@@ -1,49 +1,38 @@
-!	token_bang
-#	token_hash
-$	token_dollar
-%	token_percent
-&	token_amp
-'	token_remabbr
-(	token_oparen
-)	token_cparen
-*	token_times
-+	token_plus
-,	token_sep
--	token_minus
-/	token_divide
-:	token_stmtsep
-;	token_semi
-?	token_print
-@	token_at
-^	token_exp
-<	token_lt
-<=	token_le
-=>	token_le
->	token_gt
->=	token_ge
-=>	token_ge
-<>	token_ne
-><	token_ne
-=	token_eq
-AND	token_and
-AS	token_as
-ASC	token_asc
-DATA	token_data
-ELSE	token_else
-END	token_end
-GO	token_go
-GOTO	token_goto
-GOSUB	token_gosub
-LET	token_let
-LIST	token_list
-NEW	token_new
-NOT	token_not
-OR	token_or
-POP	token_pop
-PRINT	token_print
-REM	token_rem
-RETURN	token_return
-RUN	token_run
-STOP	token_stop
-SUB	token_sub
-TO	token_to
+'	tokenf_remabbr
+*	tokenf_times
++	tokenf_plus
+-	tokenf_minus
+/	tokenf_divide
+?	tokenf_print
+^	tokenf_exp
+<	tokenf_lt
+<=	tokenf_le
+=>	tokenf_le
+>	tokenf_gt
+>=	tokenf_ge
+=>	tokenf_ge
+<>	tokenf_ne
+><	tokenf_ne
+=	tokenf_eq
+AND	tokenf_and
+AS	tokenf_as
+ASC	tokenf_asc
+DATA	tokenf_data
+ELSE	tokenf_else
+END	tokenf_end
+GO	tokenf_go
+GOTO	tokenf_goto
+GOSUB	tokenf_gosub
+LET	tokenf_let
+LIST	tokenf_list
+NEW	tokenf_new
+NOT	tokenf_not
+OR	tokenf_or
+POP	tokenf_pop
+PRINT	tokenf_print
+REM	tokenf_rem
+RETURN	tokenf_return
+RUN	tokenf_run
+STOP	tokenf_stop
+SUB	tokenf_sub
+TO	tokenf_to
--- a/src/parse.s	Sat Jul 13 17:33:01 2024 -0600
+++ b/src/parse.s	Mon Jul 15 23:26:15 2024 -0600
@@ -1,151 +1,114 @@
                 *pragmapush list
                 *pragma list
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; This is the overall parsing package. This is responsible for converting program text into the internal byte code and
-; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated
-; code analysis. In almost all cases, the returned error will be a syntax error. The internal byte code shares the same
-; token number allocations as the parser. Some allocated tokens cannot be identified by the lexer (parse_nexttok) but
-; are used at runtime and when "decompiling" to text.
+; This is the overall parsing package. It is responsible for converting the input source code into the internal byte
+; code.
 ;
-; In the event of a parse error, everything up to the next end of statement is retained as is using a special token
-; that preserves the unparsable text and parsing resumes. Only the first error is referenced by the return error
-; pointer.
+; This version only converts keywords to token codes. Additional conversions will be done in future versions.
 ;
-; This is a recursive descent parser.
+; Enter with X pointing to the text to parse. The encoded result will be placed freestart. On return, X will point to
+; the encoded result and D will contain the length in bytes of the result, and C will be clear.
 ;
-; Entry:
-; X             Points to the text to encode
-; B             Nonzero to prevent generating any output (error check/length calculation only)
+; In the event that there is insufficient memory between freestart and the bottom of the stack, C will be set. This
+; routine does not immediately throw an "out of memory" error to allow the caller to clear up some memory and try
+; again.
 ;
-; Exit:
-; X             Points to the encoded line
-; D             Length of the encoded line
-; CC.C          clear
-
-; Error Exit:
-; X             Points to the encoded line
-; D             Length of the encoded line
-; Y             Pointer to the first error location in the input
-; U             Error code
-; CC.C          set
+; Enter at parseto with U set to the encoding destination and Y set to one byte past the end of the destination buffer
+; to specify the destination. Defaults to encoding to the buffer between freestart and the bottom of the stack (with
+; headroom accounted for).
 ;
-; This is the error handler. It is responsible for resetting the stack to bail out to the top level
-; parsing loop. It must also store the input pointer if this is the first error. Finally, it has to
-; output all the text up to either the end of the line *or* the next valid statement separator.
-parse_errorsn   ldb #err_sn
-parse_error     lds parse_stackptr              ; restore the original stack pointer so we can call from down stack
-                puls u                          ; get back original free pointer
-                stu freestart                   ; deallocate any allocated result
-                ldu parse_tokenst               ; get start location of the token where the error was raised
-                coma                            ; make sure C is set for error
-                rts
-parse           stb parse_noout                 ; save no-output flag
-                leay ,x                         ; save input pointer in a less useful register
-                ldu freestart                   ; point to start of free memory where we will build the output
-                pshs u                          ; save original free memory location
-                sts parse_stackptr              ; save the stack pointer for bailing out on errors
-parse_nextstmt  jsr parse_nexttok               ; fetch the next token, return type in D
-                bcs parse_error                 ; brif we failed at parsing a token
-parse0          ldx #parsetab_cmd               ; point to jump table for token type handler
-                cmpb #token_stmtsep             ; is it a statement separator?
-                beq parse_nextstmt              ; brif so - we can just skip it
-parse1          cmpb ,x                         ; did we match a valid command token?
-                beq parse3                      ; brif so
-                leax 3,x                        ; move to next entry
-                cmpx #parsetab_cmde             ; end of table?
-                blo parse1                      ; brif not
-                bra parse_errorsn               ; fell off the end
-parse3          jsr [1,x]                       ; call the handler
-                bcs parse_error                 ; brif the handler indicated error
-                bsr parse_curtoken              ; fetch the token we left off on
-                cmpb #token_eot                 ; end of input?
-                bne parse4                      ; brif not
-                ldb #bc_eol                     ; stash an end of line op
-                bsr parse_write
-                bcs parse_error                 ; brif we errored out writing to the result (OM?)
-                tfr u,d                         ; calculate the length of the result
-                subd ,s
-                puls u,pc                       ; get pointer to start of encoded result and return (C is already clear)
-parse4          cmpb #token_stmtsep             ; statement separator?
-                beq parse_nextstmt              ; brif so - do another statement
-                cmpb #token_remabbr             ; ' token?
-                beq parse0                      ; brif so - parse it as a new statement
-                bra parse_errorsn               ; raise a syntax error
-parse_write     lda parse_noout                 ; are we doing output?
-                beq parse_write0                ; brif so
-                leau 1,u                        ; just count up the output and don't do anything
-                rts
-parse_write0    leax -stackheadroom,s           ; calculate bottom of stack with headroom
-                cmpx freestart                  ; did the stack run into the end of the output?
-                bhs parse_write1                ; brif not - we're good
-                ldb #err_om                     ; raise out of memory error, C already set from comparison
-                rts
-parse_write1    stb ,u+                         ; save output byte
-                stu freestart                   ; save new to of used memory
-list_noop
-parse_noop      rts                             ; return all clear - C clear from comparison above
-parse_curtoken  ldb parse_curtok                ; fetch token code of current token
-                rts
-parse_tokerr    comb                            ; flag error - unexpected token
-                ldb #err_sn                     ; raise syntax error
+; The stuff below that has hard coded colon checks will eventually be replaced by more complete parsing.
+parse           ldu freestart                   ; default to the start of free memory for encoding
+                leay -stackheadroom,s           ; set the top of free memory
+parseto         lda #1                          ; flag to enable memory limit detection
+                pshs a,u,y                      ; save start and end addresses and OM error detection flag
+                leay ,x                         ; put the input pointer somewhere less useful                
+parsea          jsr parse_curchar               ; fetch an input character
+                bne parseb                      ; brif not end of input
+parsez          tfr u,d                         ; get current output pointer
+                subd 3,s                        ; now D is the length
+                leas 5,s                        ; clean up the stack
+                rts                             ; return - C will be clear from subd above
+parseb          jsr parse_wordtab               ; look up a keyword and see if we have a match
+                bcs parsec                      ; brif no match - handle unknown stuff
+                tsta                            ; do we have a two byte token?
+                bne parseq                      ; brif so - just stash it
+                cmpb #token_else                ; ELSE?
+                beq parsed                      ; brif so - gets a hidden statement separator
+                cmpb #token_remabbr             ; REM abbreviation?
+                bne parsee                      ; brif not
+parsed          lda #':                         ; add a statement separator before it
+parseq          bsr parseoutw                   ; output a word
+                bra parsef
+parsee          bsr parseout                    ; output the token code
+parsef          cmpb #token_remabbr             ; REM abbreviation?
+                beq parseg                      ; brif so
+                cmpb #token_rem                 ; Actual REM?
+                bne parseh                      ; brif not
+parseg          ldb ,y+                         ; get current input character
+                beq parsez                      ; brif end of input
+                bsr parseout                    ; add unmodified characters to output
+                bra parseg                      ; keep going until end of input
+parseh          cmpb #token_data                ; DATA command?
+                bne parsea                      ; brif not - continue normal handling
+                clra                            ; flag for not skipping quoted string
+parsei          ldb ,y+                         ; get input character
+                beq parsez                      ; brif end of input
+                cmpb #'"                        ; string delimiter?
+                bne parsej                      ; brif not
+                coma                            ; flip the quoted statement handler
+parsej          cmpb #':                        ; end of statement?
+                bne parsek                      ; brif not
+                tsta                            ; are we skipping them?
+                bne parsek                      ; brif so
+                leay -1,y                       ; unconsume it
+                bra parsea                      ; we're done with DATA
+parsek          bsr parseout                    ; put the data value into the output
+                bra parsei                      ; go handle another character
+parsec          cmpb #'"                        ; did we encounter a quoted string?
+                bne parsel                      ; brif not
+                bsr parseout                    ; output delimiter
+parsem          ldb ,y+                         ; get string character
+                beq parsez                      ; brif end of input
+                bsr parseout                    ; output it
+                cmpb #'"                        ; end delimiter?
+                bne parsem                      ; brif not - keep looking
+                bra parsea                      ; go handle more stuff
+parsep          cmpb #'0                        ; is it a digit?
+                blo parsen                      ; brif not
+                cmpb #'9                        ; is it still a digit?
+                bls parseo                      ; brif so
+parsel          cmpb #'A                        ; is it a letter?
+                blo parsen                      ; brif not
+                cmpb #'Z                        ; is it still a letter (UC)?
+                bls parseo                      ; brif so
+                cmpb #'a                        ; is it a lower case letter?
+                blo parsen                      ; brif not
+                cmpb #'z                        ; is it still a lower case letter?
+                bhi parsen                      ; brif not
+parseo          bsr parseout                    ; stash the character
+                ldb ,y+                         ; fetch next input
+                beq parsez                      ; brif end of input
+                bra parsep                      ; go see if we're still in an identifier
+parsen          bsr parseout                    ; output unknown character (number, unknown token)
+                jmp parsea                      ; go handle more
+parseoutw       exg a,b                         ; do MSB
+                bsr parseout
+                exg a,b                         ; and then LSB (fall through)
+parseout        tst 2,s                         ; need to test for OM?
+                beq parseout0                   ; brif not
+                cmpu 3,s                        ; did we run into the end of the buffer?
+                blo parseout0                   ; brif not
+                coma                            ; set C for error
+                leas 7,s                        ; clean up stack
+                rts                             ; return to original caller
+parseout0       stb ,u+                         ; stash in buffer
                 rts
 parse_nextchar  lda ,y                          ; at end of input already?
                 beq parse_curchar               ; brif so
                 leay 1,y                        ; move to next input character
 parse_curchar   lda ,y                          ; fetch input character
                 rts
-parse_nexttokc  bsr parse_nexttok               ; fetch next token
-parse_iseos     cmpb #token_eot                 ; end of text?
-                beq parse_iseos0                ; brif so
-                cmpb #token_stmtsep             ; is it a statement separator
-parse_iseos0    rts
-parse_nexttok   bsr parse_curchar               ; fetch current input
-                beq parse_nexttok1              ; brif end of input
-parse_nexttok0  cmpa #0x20                      ; space?
-                bne parse_nexttok2              ; brif not
-                bsr parse_nextchar              ; eat the space
-                bne parse_nexttok0              ; brif not end of input
-parse_nexttok1  ldb #token_eot                  ; flag end of input
-                bra parse_nexttok6              ; go return it
-parse_nexttok2  sty parse_tokenst               ; save start of current token after skipping spaces
-                bsr parse_toupper               ; make sure we have upper case letters for matching
-                ldx #parse_wt                   ; point to keyword parsing table
-                jsr parse_wordtab               ; go see if we have a match in the keyword table
-                bcc parse_nexttok6              ; brif we do - return it
-                ldy parse_tokenst               ; return to the start of the token - pointer probably clobbered
-                bsr parse_curchar               ; get back input character (may have been clobbered)
-                cmpa #'.                        ; leading decimal?
-                beq parse_nexttok3              ; brif so - parse number
-                cmpa #'0                        ; is it a digit
-                blo parse_nexttok10             ; brif not
-                cmpa #'9                        ; is it still a digit?
-                bhi parse_nexttok10             ; brif not
-parse_nexttok3  jmp parse_number                ; go parse a number
-parse_nexttok6  stb parse_curtok                ; save token type
-                leay 1,y                        ; eat the input character
-                clra                            ; clear C to indicate no error (and clear Z also)
-                rts
-parse_nexttok10 cmpa #'A                        ; is it alpha?
-                blo parse_nexttok11             ; brif not
-                cmpa #'Z                        ; is it still alpha?
-                bls parse_nexttok12             ; brif so
-parse_nexttok11 comb                            ; flag error - unrecognized token
-                ldb #token_error
-                rts
-parse_nexttok12 bsr parse_nextcharu             ; fetch next input character
-                cmpa #'0                        ; is it alphanumeric?
-                blo parse_nexttok13             ; brif not
-                cmpa #'9                        ; is it numeric?
-                bls parse_nexttok12             ; brif so - keep skipping it
-                cmpa #'A                        ; is it alpha?
-                blo parse_nexttok13             ; brif not
-                cmpa #'Z                        ; is it still alpha?
-                bls parse_nexttok12             ; brif so - keep skipping it
-parse_nexttok13 tfr y,d                         ; calculate length of identifier
-                subd parse_tokenst
-                std val0+val.strlen             ; save it for reference
-                ldb #token_ident                ; indicate an identifier (variable name, etc.)
-                rts                             ; return result (C will be clear from SUBD above)
 parse_nextcharu bsr parse_nextchar              ; fetch next input character
                 beq parse_toupper0              ; brif end of input
 parse_toupper   cmpa #'a                        ; is it lower case alpha?
@@ -154,24 +117,12 @@
                 bhi parse_toupper0              ; brif not
                 suba #0x20                      ; adjust to upper case alpha
 parse_toupper0  rts                             ; Z only set here if input was zero entering from parse_nextcharu
-parse_number    jmp parse_tokerr
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Parse a statement that consists of just the command token
-parse_cmdsingle equ parse_write                 ; just write the token out and bail
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Parse a REM or ' statement. We just copy the comment out after the REM or ' token.
-parse_rem       jsr parse_write                 ; write the token/character out
-                ldb ,y+                         ; get next input character
-                bne parse_rem                   ; brif not at the end of the input
-                ldb #token_eot                  ; flag end of input for mainline parser
-                stb parse_curtok
-                rts                             ; return, pass back the C result from parse_write
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; This routine parses tokens using the table at parse_wordtab. The table is structured as follows:
+; This routine parses tokens using the table at parse_wt. The table is structured as follows:
 ;
 ; * two bytes which contain the length of the table less the two bytes for this length value
 ; * a sequence of entries consisting of a single byte matching character and a token code followed
-;   by an optional sub table, structured exactly the same way.
+;   by an optional sub table, structured exactly the same way. The token code is 2 bytes.
 ;
 ; The optional subtable will be present if the token code is token_eot
 ;
@@ -181,16 +132,19 @@
 ;
 ; When a token_eot match is found, if there are no further characters in the input, the match is
 ; determined to be invalid and processing continues with the next entry.
+parse_wordtab   ldx #parse_wt                   ; point to main lookup table
+                skip2                           ; move on into the main routine
 parse_wordtab0  leas 3,s                        ; clean up stack for sub table handling
-parse_wordtab   pshs a,x                        ; save input character and start of table
+                pshs a,x                        ; save input character and start of table
                 ldd ,x++                        ; get length of this table
                 addd 1,s                        ; calculate the address of the end of the table
                 std 1,s                         ; save end address for comparison later
                 lda ,s                          ; get back input character
-parse_wordtab1  ldb 1,x                         ; fetch token code for this entry
-                cmpa ,x++                       ; does this entry match?
+parse_wordtab1  leax 3,x                        ; move past this entry - this order to avoid Z effects from leax
+                cmpa -3,x                       ; does this entry match?
                 bne parse_wordtab4              ; brif not
-                cmpb #token_eot                 ; is it indicating a sub table?
+                ldd -2,x                        ; get the matched token code
+                cmpd #tokenf_eot                ; is it indicating a sub table?
                 bne parse_wordtab6              ; brif not
                 jsr parse_nextcharu             ; fetch next input character (for sub table match)
                 bne parse_wordtab0              ; brif we are going to check the sub table
@@ -201,14 +155,17 @@
                 blo parse_wordtab1              ; brif not - check another entry
                 comb                            ; indicate no match
                 puls a,x,pc                     ; clean up stack and return
-parse_wordtab4  lda -2,x                        ; get the match character
+parse_wordtab4  lda -3,x                        ; get the match character
                 bmi parse_wordtab5              ; brif negative - lookahead fail
-                cmpb #token_eot                 ; is there a sub table to skip?
+                ldd -2,x                        ; get the token match
+                cmpd #tokenf_eot                ; is there a sub table to skip?
                 beq parse_wordtab2              ; brif so - skip sub table
                 bra parse_wordtab3              ; otherwise just move to the next entry
 parse_wordtab5  leay a,y                        ; move back the specified number of characters
-parse_wordtab6  clra                            ; clear C to indicate a match
-                puls a,x,pc                     ; clean up stack and return
+                ldd -2,x                        ; get the matched token
+parse_wordtab6  sta ,s                          ; save MSB of match
+                clra                            ; clear carry to indicate match
+                puls a,x,pc                     ; clean up stack, restore return value and return
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Convert a token number back to its keyword. This will use the same table used by parse_wordtab. Enter with a character
 ; output routine pointer in U which takes the character in A. The routine can assume that Y is preserved. Will return
@@ -247,128 +204,56 @@
                 coma                            ; make sure C is set for no match
                 puls a,x,pc                     ; clean up stack and return
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Validate a line number. Must enter with the token type in B. Will return the line number in X. It will return a
-; syntax error if the line number is invalid or out of range. It will also consume a valid line number token.
-parse_linenum   cmpb #token_int32               ; is it an integer?
-                beq parse_linenum1              ; brif so
-parse_linenum0  ldb #err_sn                     ; flag syntax error
-                coma                            ; flag error
-                rts
-parse_linenum1  ldx val0+val.int                ; get high word of integer
-                bne parse_linenum0              ; brif not a valid line number
-                ldx val0+val.int+2              ; get actual line number
-                pshs x                          ; save it
-                jsr parse_nexttok               ; consume line number
-                puls x,pc                       ; get back line number and return it
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Parse a line number range which is one of the following forms:
-; <linenum1>
-; <linenum1>-
-; <linenum1>-<linenum2>
-; -<linenum2>
-; The result will store two line numbers. If no - token appears, then both line numbers will be the same. Otherwise,
-; if <linenum1> is omitted, it will be assumed to be 0. If <linenum2> is omitted, it will be assumed to be 65535. Those
-; are the minimum and maximum line numbers.
-;
-; Parsing works by first looking for an integer token that is in range. If it finds one, it looks for an optional -
-; followed by an optional integer token that is in range. If the first token is not an integer, it must be a - which may
-; be optionally followed by another integer in range.
-;
-; It is technically valid to have a single - with no line numbers.
-;
-; Enter with the current token in B.
+; Definition of tokens used in the interpreter.
 ;
-; The resulting line numbers will be returned in parse_buff
-parse_linerange ldx zero                        ; default start line number
-                leau -1,x                       ; default end line number
-                pshs x,u                        ; save the return range
-                cmpb #token_minus               ; range with no start?
-                beq parse_linerang1             ; brif so
-                bsr parse_linenum               ; verify line number, return in X
-                bcs parse_linerang4             ; bail out on error
-                stx ,s                          ; save new start line number
-                jsr parse_nexttokc              ; fetch next token, set Z if end of statement
-                bne parse_linerang0             ; brif not end of line
-                ldx ,s                          ; get end line to use as start line
-                bra parse_linerang2             ; go set range end and return
-parse_linerang0 cmpb #token_minus               ; do we have a range character?
-                bne parse_linerang3             ; brif not - we have an error
-parse_linerang1 jsr parse_nexttokc              ; parse what comes after the range mark     
-                beq parse_linerang2             ; brif end of statement - use the default range end
-                bsr parse_linenum               ; make sure it's a valid line number
-                bcs parse_linerang4             ; bail out on error
-parse_linerang2 stx 2,s                         ; set range end
-                clra                            ; make sure C is clear
-                puls x,u,pc                     ; fetch return values and return
-parse_linerang3 ldb #err_sn                     ; flag a syntax error
-                coma                            ; make sure C is set
-parse_linerang4 puls x,u,pc                     ; clean up stack and return error condition
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows:
-;               parse_tokdefT <sym>,<parse>,<list>,<exec>
-; where:
-; T: c for command, f for function, p for particle
-; <sym>: the symbol name without the "token_" prefix
-; <parse>: parse handler for the type, ignored for particles
-; <list>: list handler for the type, ingored for particles
-; <exec>: execution handler for the type, ignored for particles
+; Each token is defined as follows:
+;               parse_tokdefT <sym>[,<handler>]
+; where T is one of:
+; p: particle - utility tokens and definitions, starting at 0x00
+; c: command - a command keyword, starting at 0x80
+; f: function - a function keyword, start at 0x80 with a 0xFF prefix
+; n: token width specific number/code, but otherwise a particle; in this case, the code replaces <handler>
+;
+; <sym> is the base symbol name (such as "then" or "eot")
+; <handler> is the address of the execution handler routine of the natural token type (command or function)
+;
+; <handler> is optional for particles. If it is omitted for command or function tokens, it defaults to SNERROR.
                 *pragmapush list
                 *pragma nolist
 __toknump       set 0
-__toknumc       set 0x40
-__toknumf       set 0xc0
-                setstr __cmdparset=""
-                setstr __cmdlistt=""
-                setstr __cmdexect=""
-                setstr __fnparset=""
-                setstr __fnlistt=""
-                setstr __fnexect=""
+__toknumc       set 0x80
+__toknumf       set 0x80
 parse_tokendefp macro noexpand
 token_\1        equ __toknump
+tokenf_\1       equ __toknump
 __toknump       set __toknump+1
                 endm
+parse_tokendefv macro noexpand
+token_\1        equ \2
+tokenf_\1       equ \2
+                endm
+                setstr __cmdexect=""
+                setstr __funcexect=""
 parse_tokendefc macro noexpand
 token_\1        equ __toknumc
+tokenf_\1       equ __toknumc
 __toknumc       set __toknumc+1
                 ifstr ne,"{2}",""
-                setstr __cmdparset="%(__cmdparset)\tfcb\ttoken_\1\n\tfdb {2}\n"
-                endc
-                ifstr ne,"{3}",""
-                setstr __cmdlistt="%(__cmdlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
-                endc
-                ifstr ne,"{4}",""
-                setstr __cmdexect="%(__cmdexect)\tfdb {3}\n"
+                setstr __cmdexect="%(__cmdexect)\tfdb {2}\n"
                 else
                 setstr __cmdexect="%(__cmdexect)\tfdb SNERROR\n"
                 endc
                 endm
 parse_tokendeff macro noexpand
 token_\1        equ __toknumf
+tokenf_\1       equ 0xff00|__toknumf
 __toknumf       set __toknumf+1
                 ifstr ne,"{2}",""
-                setstr __fnparset="%(__fnparset)\tfcb\ttoken_\1\n\tfdb {2}\n"
-                endc
-                ifstr ne,"{3}",""
-                setstr __fnlistt="%(__fnlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
-                endc
-                ifstr ne,"{4}",""
-                setstr __fnexect="%(__fnexect)\tfdb {3}\n"
+                setstr __fnexect="%(__fnexect)\tfdb {2}\n"
                 else
                 setstr __fnexect="%(__fnexect)\tfdb SNERROR\n"
                 endc
                 endm
-token_cmdparse  macro
-                *pragmapush nolist
-                *pragma nolist
-                includestr "%(__cmdparset)"
-                *pragmapop nolist
-                endm
-token_cmdlist   macro
-                *pragmapush nolist
-                *pragma nolist
-                includestr "%(__cmdlistt)"
-                *pragmapop nolist
-                endm
 token_cmdexec   macro
                 *pragmapush nolist
                 *pragma nolist
@@ -376,18 +261,6 @@
 token__maxcmd   equ __toknumc-1
                 *pragmapop nolist
                 endm
-token_fnparse   macro
-                *pragmapush nolist
-                *pragma nolist
-                includestr "%(__fnparset)"
-                *pragmapop nolist
-                endm
-token_fnlist    macro
-                *pragmapush nolist
-                *pragma nolist
-                includestr "%(__fnlistt)"
-                *pragmapop nolist
-                endm
 token_fnexec    macro
                 *pragmapush nolist
                 *pragma nolist
@@ -396,77 +269,49 @@
                 *pragmapop nolist
                 endm
                 *pragmapop list
-                ; the tokens defined in this section all have special parsing or meaning
+                ; special tokens
                 parse_tokendefp error           ; Used to mark errors; should always be first so it's token #0 
                 parse_tokendefp eot             ; End of input marker or special handling in word tables
-                parse_tokendefp int32           ; 32 bit integer (has special parsing)
-                parse_tokendefp float           ; floating point value (has special parsing)
-                parse_tokendefp ident           ; identifier (has special parsing)
-                parse_tokendefp linenum         ; a 16 bit unsigned integer treated as a line number
-                parse_tokendefp linerange       ; a pair of 16 bit unsigned integers treated as line numbers
-                ; everything below here references keywords or particle characters
-                parse_tokendefp stmtsep         ; statement separator
-                parse_tokendefp times           ; times (multiplication) operator (*)
-                parse_tokendefp plus            ; addition operator
-                parse_tokendefp divide          ; division operator (/)
-                parse_tokendefp minus           ; subtraction operator
-                parse_tokendefp exp             ; exponentiation operator (^)
-                parse_tokendefp lt              ; less than operator
-                parse_tokendefp le              ; less than or equal operateor
-                parse_tokendefp gt              ; greater than operator
-                parse_tokendefp ge              ; greater than or equal operator
-                parse_tokendefp eq              ; equality operator
-                parse_tokendefp ne              ; inequality operator
-                parse_tokendefp not             ; boolean NOT operator
-                parse_tokendefp and             ; boolean AND operator
-                parse_tokendefp or              ; boolean OR operator
-                parse_tokendefp bang            ; exclamation mark
-                parse_tokendefp hash            ; number sign
-                parse_tokendefp dollar          ; dollar sign (string sigil)
-                parse_tokendefp percent         ; percent sign (integer sigil)
-                parse_tokendefp amp             ; ampersand
-                parse_tokendefp oparen          ; opening paren
-                parse_tokendefp cparen          ; closing paren
-                parse_tokendefp sep             ; comma (separator)
-                parse_tokendefp semi            ; semicolon
-                parse_tokendefp at              ; @ symbol
-                parse_tokendefp else            ; ELSE
-                parse_tokendefp then            ; THEN
-                parse_tokendefp to              ; TO
-                parse_tokendefp sub             ; SUB
-                parse_tokendefp as              ; AS
-
-                parse_tokendefc remabbr,parse_rem,list_noop,exec_noop           ; abbreviated REM (')
-                parse_tokendefc rem,parse_rem,list_noop,exec_noop               ; REM
-                parse_tokendefc return,parse_cmdsingle,parse_noop,parse_noop    ; RETURN
-                parse_tokendefc run,parse_noop,parse_noop,parse_noop            ; RUN
-                parse_tokendefc data,parse_noop,parse_noop,parse_noop           ; DATA
-                parse_tokendefc end,parse_cmdsingle,parse_noop,parse_noop       ; END
-                parse_tokendefc stop,parse_cmdsingle,parse_noop,parse_noop      ; STOP
-                parse_tokendefc let,parse_noop,parse_noop,parse_noop            ; LET
-                parse_tokendefc list,parse_noop,parse_noop,parse_noop           ; LIST
-                parse_tokendefc new,parse_cmdsingle,parse_noop,parse_noop       ; NEW
-                parse_tokendefc print,parse_noop,parse_noop,parse_noop          ; PRINT
-                parse_tokendefc pop,parse_cmdsingle,parse_noop,parse_noop       ; POP
-                parse_tokendefc goto,parse_noop,parse_noop,parse_noop           ; GOTO
-                parse_tokendefc gosub,parse_noop,parse_noop,parse_noop          ; GOSUB
-                parse_tokendefc go,parse_noop,parse_noop,parse_noop             ; GO
-
-                parse_tokendeff asc,parse_noop,parse_noop,parse_noop            ; ASC()
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Parse handling tables
-parsetab_cmd    token_cmdparse
-parsetab_cmde
-parsetab_fn     token_fnparse
-parsetab_fne
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; List handling tables
-listtab_cmd     token_cmdlist
-listtab_cmde
-listtab_fn      token_fnlist
-listtab_fne
+                ; command (and simple non-command keywords)
+                parse_tokendefc remabbr         ; abbreviated REM (')
+                parse_tokendefc rem             ; REM
+                parse_tokendefc return          ; RETURN
+                parse_tokendefc run             ; RUN
+                parse_tokendefc data            ; DATA
+                parse_tokendefc end             ; END
+                parse_tokendefc stop            ; STOP
+                parse_tokendefc let             ; LET
+                parse_tokendefc list            ; LIST
+                parse_tokendefc new             ; NEW
+                parse_tokendefc print           ; PRINT
+                parse_tokendefc pop             ; POP
+                parse_tokendefc goto            ; GOTO
+                parse_tokendefc gosub           ; GOSUB
+                parse_tokendefc go              ; GO
+                parse_tokendefc times           ; times (multiplication) operator (*)
+                parse_tokendefc plus            ; addition operator
+                parse_tokendefc divide          ; division operator (/)
+                parse_tokendefc minus           ; subtraction operator
+                parse_tokendefc exp             ; exponentiation operator (^)
+                parse_tokendefc lt              ; less than operator
+                parse_tokendefc le              ; less than or equal operateor
+                parse_tokendefc gt              ; greater than operator
+                parse_tokendefc ge              ; greater than or equal operator
+                parse_tokendefc eq              ; equality operator
+                parse_tokendefc ne              ; inequality operator
+                parse_tokendefc not             ; boolean NOT operator
+                parse_tokendefc and             ; boolean AND operator
+                parse_tokendefc or              ; boolean OR operator
+                parse_tokendefc else            ; ELSE
+                parse_tokendefc then            ; THEN
+                parse_tokendefc to              ; TO
+                parse_tokendefc sub             ; SUB
+                parse_tokendefc as              ; AS
+                ; secondary tokens (functions)
+                parse_tokendeff asc             ; ASC()
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Execution handling tables
 exectab_cmd     token_cmdexec
 exectab_fn      token_fnexec
+                
                 *pragmapop list