changeset 128:9d57279c900e

Remove old style keyword lists and jump tables As part of the move to the new pre-parsing scheme, remove the old keyword lists and jump tables. The main loop still needs modification to work with this new system.
author William Astle <lost@l-w.ca>
date Tue, 09 Jan 2024 22:54:42 -0700
parents 527212870064
children d5886daa4f65
files Makefile src/expr.s src/interp.s src/keywords.s src/lwbasic.s src/number.s src/progctrl.s src/token.s
diffstat 8 files changed, 9 insertions(+), 363 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Tue Jan 09 22:47:11 2024 -0700
+++ b/Makefile	Tue Jan 09 22:54:42 2024 -0700
@@ -3,7 +3,7 @@
 
 all: bin/lwbasic.rom bin/lwbasic-coco2b.rom bin/lwbasic-coco3.rom bin/coco.zip bin/coco2.zip bin/coco2b.zip bin/coco3.zip
 
-lwb_srcs := bytecode.s consscr.s defs.s error.s expr.s fps.s genio.s init.s int.s interp.s irq.s keyb.s keywords.s keywordtab.s miscdata.s number.s parse.s print.s progctrl.s stack.s token.s vars.s
+lwb_srcs := bytecode.s consscr.s defs.s error.s expr.s fps.s genio.s init.s int.s interp.s irq.s keyb.s keywordtab.s miscdata.s number.s parse.s print.s progctrl.s stack.s vars.s
 lwb_srcs := $(addprefix src/,$(lwb_srcs))
 
 bin/lwbasic.rom: src/lwbasic.s $(lwb_srcs)
--- a/src/expr.s	Tue Jan 09 22:47:11 2024 -0700
+++ b/src/expr.s	Tue Jan 09 22:54:42 2024 -0700
@@ -17,9 +17,9 @@
                 puls b                          ; get back operator precedence flag
 eval_expr0      jsr curchar                     ; fetch current input
                 beq eval_expr1                  ; brif end of expression - we're done
-                cmpa #tok_or                    ; is it above operators?
+                cmpa #token_or                  ; is it above operators?
                 bhi eval_expr1                  ; brif so
-                suba #tok_plus                  ; offset to zero for first operator token
+                suba #token_plus                ; offset to zero for first operator token
                 bcc eval_expr2                  ; brif it is an operator
 eval_expr1      rts
 eval_expr2      pshs b                          ; save previous operator precedence
--- a/src/interp.s	Tue Jan 09 22:47:11 2024 -0700
+++ b/src/interp.s	Tue Jan 09 22:54:42 2024 -0700
@@ -97,7 +97,7 @@
                 beq immediate6                  ; brif so - we don't need to insert a line
                 pshs x                          ; save program insert location and line number
                 ldx inputptr                    ; point to line text
-                jsr tokenize                    ; tokenize line, get length to D
+                jsr parse                       ; tokenize line, get length to D
                 leay ,x                         ; save tokenized line pointer
                 addd #4                         ; account for next line pointer and line number
                 ldx vartab                      ; get start of copy location
@@ -223,8 +223,7 @@
                 beq interpret                   ; brif end of statement - do the next statement dance
                 tsta                            ; set flags properly for token
                 lbpl cmd_let                    ; brif no command - do assignment (LET command is optional)
-                ldx #primaryjump                ; point to jump table
-                anda #0x7f                      ; lose bit 7
+                ldx #exectab_cmd                ; point to jump table
                 leax a,x                        ; get half way to the correct offset
                 ldx a,x                         ; get the address the other half of the way from here
                 jsr nextchar                    ; skip past token and set flags
--- a/src/keywords.s	Tue Jan 09 22:47:11 2024 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-                *pragmapush list
-                *pragma list
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Keyword dictionaries and jump tables. These are defined by several macros which ensure that each command or function
-; entry has an associated jump table entry. These macros are:
-;
-;               defcmd string,symbase
-;               deffunc string,symbase,flags
-;               cmdtab
-;               functab
-;               cmdjump
-;               funcjump
-; defcmd and deffunc will add an entry into the relevant dictionary table as well as adding one to the relevant jump
-; tables. The cmdtab, functab, cmdjump, and funcjump will output the table definitions.
-                *pragmapush list
-                *pragma nolist
-__cmdnum        set 0x80
-__funcnum       set 0x80
-defcmd          macro noexpand
-                setstr __cmdtab="%(__cmdtab)\tfcs {1}\n"
-                ifstr ne,"{3}",""
-                setstr __cmdjump="%(__cmdjump)\tfdb {3}\n"
-                else
-                setstr __cmdjump="%(__cmdjump)\tfdb cmd_{2}\n"
-                endc
-tok_{2}         equ __cmdnum
-__cmdnum        set __cmdnum+1
-                endm
-deffunc         macro noexpand
-                setstr __functab="%(__functab)\tfcs {1}\n"
-                ifstr ne,"{4}",""
-                setstr __funcjump="%(__funcjump)\tfcb {3}\n\tfdb {4}\n"
-                else
-                setstr __funcjump="%(__funcjump)\tfcb {3}\n\tfdb func_{2}\n"
-                endc
-tok_{2}         equ __funcnum
-__funcnum       set __funcnum+1
-                endm
-cmdtab          macro
-                *pragmapush list
-                *pragma nolist
-                includestr "%(__cmdtab)"
-                *pragmapop list
-                fcb 0                           ; flag end of table
-                endm
-functab         macro
-                *pragmapush list
-                *pragma nolist
-                includestr "%(__functab)"
-                *pragmapop list
-                fcb 0                           ; flag end of table
-                endm
-cmdjump         macro
-                *pragmapush nolist
-                *pragma nolist
-                includestr "%(__cmdjump)"
-                *pragmapop list
-                endm
-funcjump        macro
-                *pragmapush nolist
-                *pragma nolist
-                includestr "%(__funcjump)"
-                *pragmapop list
-                endm
-                *pragmapop list
-                defcmd 'REM',rem
-                defcmd /'/,apos
-                defcmd 'DATA',data
-                defcmd 'ELSE',else
-                defcmd 'END',end
-                defcmd 'STOP',stop
-                defcmd 'LET',let
-                defcmd 'NEW',new
-                defcmd 'PRINT',print
-                defcmd 'LIST',list
-                defcmd 'RUN',run
-                defcmd 'GOTO',goto
-                defcmd 'GOSUB',gosub
-                defcmd 'RETURN',return
-                defcmd 'POP',pop
-                defcmd '+',plus,SNERROR         ; IMPORTANT: the operators from + to OR MUST stay in this exact sequence
-                defcmd '-',minus,SNERROR        ; with no gaps because a secondary lookup table is used for operator
-                defcmd '*',times,SNERROR        ; handling during binary operator handling.
-                defcmd '/',divide,SNERROR
-                defcmd '^',power,SNERROR
-                defcmd '<',less,SNERROR
-                defcmd '>',greater,SNERROR
-                defcmd '=',equal,SNERROR
-                defcmd '<=',lessequal,SNERROR
-                defcmd '>=',greaterequal,SNERROR
-                defcmd '<>',notequal,SNERROR
-                defcmd 'AND',and,SNERROR
-                defcmd 'OR',or,SNERROR
-                defcmd 'NOT',not,SNERROR
-primarydict     cmdtab
-secondarydict   functab
-primaryjump     cmdjump
-secondaryjump   funcjump
-                *pragmapop list
--- a/src/lwbasic.s	Tue Jan 09 22:47:11 2024 -0700
+++ b/src/lwbasic.s	Tue Jan 09 22:54:42 2024 -0700
@@ -51,12 +51,10 @@
                 include number.s
                 include int.s
                 include fps.s
-                include token.s
                 include bytecode.s
                 include parse.s
                 include keywordtab.s
                 include miscdata.s
-                include keywords.s
                 *pragmapop list
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Need to ensure the vectors are at 0xbff2
--- a/src/number.s	Tue Jan 09 22:47:11 2024 -0700
+++ b/src/number.s	Tue Jan 09 22:54:42 2024 -0700
@@ -140,11 +140,11 @@
                 bcs val_parsenum9               ; brif digit
                 cmpa #'+                        ; positive?
                 beq val_parsenum8               ; brif no
-                cmpa #tok_plus                  ; tokenized plus?
+                cmpa #token_plus                ; tokenized plus?
                 beq val_parsenum8               ; brif so
                 cmpa #'-                        ; negative?
                 beq val_parsenum7a              ; brif so
-                cmpa #tok_minus                 ; tokenized minus?
+                cmpa #token_minus               ; tokenized minus?
                 lbne SNERROR                    ; brif not positive, negative, or digit
 val_parsenum7a  com fpaextra+5                  ; make sign of exponent negative
 val_parsenum8   jsr nextchar                    ; eat exponent sign/get next digit
--- a/src/progctrl.s	Tue Jan 09 22:47:11 2024 -0700
+++ b/src/progctrl.s	Tue Jan 09 22:54:42 2024 -0700
@@ -105,7 +105,7 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; GOSUB command
 cmd_gosub       jsr parse_lineno                ; parse the destination line so return location is after the line number
-                ldd #tok_gosub*256+4            ; stack frame details
+                ldd #token_gosub*256+4          ; stack frame details
                 jsr cstack_alloc                ; make a stack frame
                 ldx curline                     ; save current line pointer
                 stx ,u
@@ -127,7 +127,7 @@
                 jmp ERROR
 cmd_return0     jsr cstack_next                 ; move to next entry
                 beq RG_ERROR                    ; brif end of stack - raise error
-cmd_return1     cmpb #tok_gosub                 ; do we have a GOSUB frame?
+cmd_return1     cmpb #token_gosub               ; do we have a GOSUB frame?
                 bne cmd_return0                 ; brif not - try again
                 lda ,s+                         ; is it "POP"?
                 bne cmd_return2                 ; brif so - don't change flow control but clear stack frame
--- a/src/token.s	Tue Jan 09 22:47:11 2024 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,252 +0,0 @@
-                *pragmapush list
-                *pragma list
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; The LIST command.
-;
-; Syntax:
-; LIST
-; LIST <line>
-; LIST <line>-
-; LIST -<line>
-; LIST <start>-<end>
-cmd_list        bne cmd_list1                   ; brif we have arguments
-                ldx progtext                    ; point to start of program
-cmd_list0       ldd #65535                      ; set last line to list to max line number
-                std binval
-                bra cmd_list2                   ; go do the listing
-cmd_list1       jsr parse_lineno                ; parse starting line number (will default to 0)
-                jsr prog_findline               ; find the line or the one after where it would be
-                jsr curchar                     ; are we at the end of the command?
-                beq cmd_list2                   ; brif so - we have a single line (binval will have the start line #)
-                ldb #tok_minus                  ; insist on a - for a range if more than one line number
-                jsr syncheckb
-                beq cmd_list0                   ; brif open ended ending - set to max line number
-                jsr parse_lineno                ; parse ending of range
-cmd_list2       ldd ,x                          ; are we at the end of the program?
-                bne cmd_list4                   ; brif not
-cmd_list3       rts
-cmd_list4       ldd 2,x                         ; get line number
-                cmpd binval                     ; have we reached the end of the range?
-                bhi cmd_list3                   ; brif so - we're done
-                jsr print_uint16d               ; print out line number
-                lda #0x20                       ; and a space
-                jsr writechr
-                pshs x                          ; save start of this line (in case detokenizing exits early)
-                leax 4,x                        ; move past line header
-                bsr detokenize                  ; detokenize line to current output stream
-                ldx [,s++]                      ; point to next line using saved pointer and clear it from the stack
-                ; need to add a break check here
-                bra cmd_list2                   ; go handle another line
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Detokenize a line to the current output stream
-detokenize      lda ,x+                         ; get character from tokenized line
-                bmi detokenize1                 ; brif it's a keyword token
-                lbeq writecondnl                ; do a newline if needed and return
-                cmpa #':                        ; is it a colon?
-                bne detokenize0                 ; brif not
-                ldb ,x                          ; fetch subsequent character
-                cmpb #tok_apos                  ; apostrophe version of REM?
-                beq detokenize                  ; brif so - skip the colon
-                cmpb #tok_else                  ; ELSE?
-                beq detokenize                  ; brif so - skip the colon
-detokenize0     jsr writechr                    ; output it unmolested
-                bra detokenize                  ; go handle another character
-detokenize1     ldu #primarydict                ; point to primary dictionary table
-                cmpa #0xff                      ; is it a secondary token?
-                bne detokenize3                 ; brif not
-                ldu #secondarydict              ; point to secondary dictionary table
-                lda ,x+                         ; get secondary token value
-                bne detokenize3                 ; brif not end of line
-                leax -1,x                       ; don't consume the NUL
-detokenize2     lda #'!                         ; invalid token flag
-                bra detokenize0                 ; output it and continue
-detokenize3     anda #0x7f                      ; lose the high bit
-                beq detokenize6                 ; brif already at the right place
-detokenize4     ldb ,u                          ; end of dictionary table?
-                beq detokenize2                 ; brif so - show invalid tokenf lag
-detokenize5     ldb ,u+                         ; fetch character in this keyboard
-                bpl detokenize5                 ; brif not end of keyword (high bit set)
-                deca                            ; at the right token?
-                bne detokenize4                 ; brif not - skip another
-detokenize6     lda ,u+                         ; get keyword character
-                bmi detokenize7                 ; brif end of keyword
-                jsr writechr                    ; output it
-                bra detokenize6                 ; go fetch another
-detokenize7     anda #0x7f                      ; lose the high bit
-                bra detokenize0                 ; write it and move on with the input
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Canonicalize certain sequences; ALL the rewrite sequences must make the result shorter or keep it the same size
-makecanontab    fcb tok_less,2
-                fcb tok_greater,tok_notequal
-                fcb tok_equal,tok_lessequal
-                fcb tok_greater,2
-                fcb tok_less,tok_notequal
-                fcb tok_equal,tok_greaterequal
-                fcb tok_equal,2
-                fcb tok_greater,tok_greaterequal
-                fcb tok_less,tok_lessequal
-                fcb 0
-makecanon       leay ,x                         ; point output to start of the buffer
-makecanon0      lda ,x+                         ; get current byte
-                sta ,y+                         ; save in output
-                bne makecanon1                  ; brif not end of line
-                rts
-makecanon1      bpl makecanon0                  ; brif not a token
-                cmpa #0xff                      ; is it secondary?
-                bne makecanon2                  ; brif not
-                leax 1,x                        ; move past second half
-                bra makecanon0                  ; go handle next byte
-makecanon2      ldu #makecanontab               ; point to replacement table
-makecanon3      cmpa ,u+                        ; is it this entry?
-                beq makecanon4                  ; brif so
-                ldb ,u+                         ; get number of entries
-                lslb                            ; 2 bytes per
-                leau b,u                        ; move past entry
-                ldb ,u                          ; end of table?
-                bne makecanon3                  ; brif not
-                bra makecanon0                  ; no substitutions found
-makecanon4      pshs x                          ; save original source pointer
-makecanon5      lda ,x+                         ; get next character
-                cmpa #0x20                      ; is it space?
-                beq makecanon5                  ; brif so - skip it
-                ldb ,u+                         ; get number of replacement candidates
-makecanon6      cmpa ,u++                       ; does it match?
-                beq makecanon7                  ; brif so
-                decb                            ; seen all of them?
-                bne makecanon6                  ; brif not
-                puls x                          ; restore input pointer
-                bra makecanon0                  ; go handle next input
-makecanon7      leas 2,s                        ; clear saved input pointer
-                lda -1,u                        ; get replacement token
-                sta -1,y                        ; put it in the output
-                bra makecanon0                  ; go handle more input
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Tokenize line to tokebuff
-;
-; Enter with X pointing to the text to tokenize.
-; Exit with X pointing to the start of the tokenized line and D holding the length of the tokenized line.
-tokenize        clr tok_skipkw                  ; clear "not token" flag
-                clr tok_skipdt                  ; clear the "in data" flag
-                ldy #tokebuff                   ; point to destination buffer
-                pshs y                          ; set return value
-tokenize0       lda ,x+                         ; get input character
-                bne tokenize3                   ; brif not end of input
-tokenize1       sta ,y+                         ; blank out final byte in result
-tokenize2       ldx #tokebuff                   ; point to start of tokenized line
-                bsr makecanon                   ; canonicalize certain sequences
-                tfr y,d                         ; get end address to accumulator
-                subd #tokebuff                  ; subtract out start; gives length of result
-                puls x,pc                       ; set return pointer and return
-tokenize3       tst tok_skipkw                  ; are we in the middle of a "not token"?
-                beq tokenize6                   ; brif not
-                jsr setcifalpha                 ; is it alpha
-                bcs tokenize4                   ; brif so - store it and continue
-                jsr setcifdigit                 ; is it numeric?
-                bcc tokenize5                   ; brif not
-tokenize4       sta ,y+                         ; save output character
-                bra tokenize0                   ; check for another
-tokenize5       clr tok_skipkw                  ; clear the "not token" flag
-tokenize6       cmpa #'"                        ; is it a string?
-                bne tokenize8                   ; brif not
-                sta ,y+                         ; save string delimiter
-tokenize7       lda ,x+                         ; get input character
-                beq tokenize1                   ; brif end of input
-                sta ,y+                         ; save it in output
-                cmpa #'"                        ; end of string?
-                bne tokenize7                   ; brif not
-                bra tokenize0                   ; brif 
-tokenize8       cmpa #':                        ; end of statement?
-                bne tokenize9                   ; brif not
-                clr tok_skipdt                  ; reset "in data" flag
-                bra tokenize4                   ; stash it and continue
-tokenize9       cmpa #0x20                      ; is it a space?
-                beq tokenize4                   ; brif so - stash it unmodified
-                tst tok_skipdt                  ; are we "in data"?
-                bne tokenize4                   ; brif so - don't tokenize it
-                cmpa #'?                        ; PRINT shortcut?
-                bne tokenize10                  ; brif not
-                lda #tok_print                  ; load token for PRINT
-                bra tokenize4                   ; move stash it and move on
-tokenize10      cmpa #''                        ; ' shortcut for remark?
-                bne tokenize12                  ; brif not
-                ldd #':*256+tok_apos            ; put token for ' and an implied colon
-                std ,y++                        ; stash it
-tokenize11      lda ,x+                         ; fetch byte from input
-                sta ,y+                         ; stash in output
-                bne tokenize11                  ; brif not end of input
-                bra tokenize2                   ; go finish up
-tokenize12      jsr setcifdigit                 ; is it a digit?
-                bcs tokenize4                   ; brif so - pass it through
-                tsta                            ; is the high bit set?
-                bmi tokenize0                   ; ignore it if so
-                ldu #primarydict                ; point to keyword table
-                leax -1,x                       ; back up input to start of potential token
-                clr tok_kwtype                  ; set secondary table flag to primary table
-                clr tok_kwmatch                 ; clear the matched token
-                clr tok_kwmatch+1
-                clr tok_kwmatchl                ; set length matched
-                pshs x                          ; save start of input token
-tokenize13      clr tok_kwnum                   ; clear keyword number
-tokenize14      ldb ,u                          ; are we at the end of the table?
-                bne tokenize16                  ; brif not
-                ldu #secondarydict              ; point to secondary token dictionary
-                com tok_kwtype                  ; flip to secondary token flag
-                bne tokenize13                  ; brif we haven't already done the secondaries
-                puls x                          ; get back input pointer
-                ldb tok_kwmatchl                ; get length of best match
-                beq tokenize15                  ; brif we don't have a match
-                abx                             ; move input pointer past matched token
-                ldd tok_kwmatch                 ; get matched token number
-                tsta                            ; is it a primary?
-                beq tokenize24                  ; brif so
-                bra tokenize23                  ; go stash two byte token
-tokenize15      com tok_skipkw                  ; set "not token flag"
-                lda ,x+                         ; get character
-                bra tokenize4                   ; stash it and continue
-tokenize16      ldx ,s                          ; get back start of input token
-                clra                            ; initalize match length counter
-tokenize17      inca                            ; bump length counter
-                ldb ,x+                         ; get input character
-                cmpb #'z                        ; is it above lower case Z?
-                bhi tokenize18                  ; brif so
-                cmpb #'a                        ; is it below lower case A?
-                blo tokenize18                  ; brif so
-                subb #0x20                      ; convert to upper case
-tokenize18      subb ,u+                        ; does it match?
-                beq tokenize17                  ; brif so - check another
-                cmpb #0x80                      ; did it match with high bit set?
-                beq tokenize21                  ; brif so - exact match
-                leau -1,u                       ; back up to current test character
-tokenize19      ldb ,u+                         ; end of token?
-                bpl tokenize19                  ; brif not
-tokenize20      inc tok_kwnum                   ; bump token counter
-                bra tokenize14                  ; go check another one
-tokenize21      cmpa tok_kwmatchl               ; is it a longer match?
-                bls tokenize20                  ; brif not, ignore it
-                sta tok_kwmatchl                ; save new match length
-                ldd tok_kwtype                  ; get the matched token count
-                orb #0x80                       ; set token flag
-                std tok_kwmatch                 ; save matched token
-                bra tokenize20                  ; keep looking through the tables
-tokenize22      lda #':                         ; for putting implied colons in
-tokenize23      std ,y++                        ; put output into buffer
-                jmp tokenize0                   ; go handle more input
-tokenize24      cmpb #tok_else                  ; is it ELSE?
-                beq tokenize22                  ; brif so - stash it with colon
-                cmpb #tok_data                  ; is it DATA?
-                bne tokenize26                  ; brif not
-                stb tok_skipdt                  ; set "in data" flag
-tokenize25      stb ,y+                         ; stash token
-                jmp tokenize0                   ; go handle more
-tokenize26      cmpb #tok_rem                   ; is it REM?
-                beq tokenize28                  ; brif so
-                cmpb #tok_apos                  ; apostrophe REM?
-                bne tokenize25                  ; brif not - stash token and continue
-                lda #':                         ; stash the implied colon
-                sta ,y+
-                bra tokenize28
-tokenize27      ldb ,x+                         ; fetch next input character
-tokenize28      stb ,y+                         ; stash the character
-                bne tokenize27                  ; brif not end of input - do another
-                jmp tokenize2                   ; stash end of buffer and handle cleanup
-                *pragmapop list