# HG changeset patch # User William Astle # Date 1704866082 25200 # Node ID 9d57279c900e04cfdfb0a3027821820689250ea6 # Parent 527212870064cc8e4d7a5ee400dddce28b9080cf Remove old style keyword lists and jump tables As part of the move to the new pre-parsing scheme, remove the old keyword lists and jump tables. The main loop still needs modification to work with this new system. diff -r 527212870064 -r 9d57279c900e Makefile --- a/Makefile Tue Jan 09 22:47:11 2024 -0700 +++ b/Makefile Tue Jan 09 22:54:42 2024 -0700 @@ -3,7 +3,7 @@ all: bin/lwbasic.rom bin/lwbasic-coco2b.rom bin/lwbasic-coco3.rom bin/coco.zip bin/coco2.zip bin/coco2b.zip bin/coco3.zip -lwb_srcs := bytecode.s consscr.s defs.s error.s expr.s fps.s genio.s init.s int.s interp.s irq.s keyb.s keywords.s keywordtab.s miscdata.s number.s parse.s print.s progctrl.s stack.s token.s vars.s +lwb_srcs := bytecode.s consscr.s defs.s error.s expr.s fps.s genio.s init.s int.s interp.s irq.s keyb.s keywordtab.s miscdata.s number.s parse.s print.s progctrl.s stack.s vars.s lwb_srcs := $(addprefix src/,$(lwb_srcs)) bin/lwbasic.rom: src/lwbasic.s $(lwb_srcs) diff -r 527212870064 -r 9d57279c900e src/expr.s --- a/src/expr.s Tue Jan 09 22:47:11 2024 -0700 +++ b/src/expr.s Tue Jan 09 22:54:42 2024 -0700 @@ -17,9 +17,9 @@ puls b ; get back operator precedence flag eval_expr0 jsr curchar ; fetch current input beq eval_expr1 ; brif end of expression - we're done - cmpa #tok_or ; is it above operators? + cmpa #token_or ; is it above operators? bhi eval_expr1 ; brif so - suba #tok_plus ; offset to zero for first operator token + suba #token_plus ; offset to zero for first operator token bcc eval_expr2 ; brif it is an operator eval_expr1 rts eval_expr2 pshs b ; save previous operator precedence diff -r 527212870064 -r 9d57279c900e src/interp.s --- a/src/interp.s Tue Jan 09 22:47:11 2024 -0700 +++ b/src/interp.s Tue Jan 09 22:54:42 2024 -0700 @@ -97,7 +97,7 @@ beq immediate6 ; brif so - we don't need to insert a line pshs x ; save program insert location and line number ldx inputptr ; point to line text - jsr tokenize ; tokenize line, get length to D + jsr parse ; tokenize line, get length to D leay ,x ; save tokenized line pointer addd #4 ; account for next line pointer and line number ldx vartab ; get start of copy location @@ -223,8 +223,7 @@ beq interpret ; brif end of statement - do the next statement dance tsta ; set flags properly for token lbpl cmd_let ; brif no command - do assignment (LET command is optional) - ldx #primaryjump ; point to jump table - anda #0x7f ; lose bit 7 + ldx #exectab_cmd ; point to jump table leax a,x ; get half way to the correct offset ldx a,x ; get the address the other half of the way from here jsr nextchar ; skip past token and set flags diff -r 527212870064 -r 9d57279c900e src/keywords.s --- a/src/keywords.s Tue Jan 09 22:47:11 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ - *pragmapush list - *pragma list -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Keyword dictionaries and jump tables. These are defined by several macros which ensure that each command or function -; entry has an associated jump table entry. These macros are: -; -; defcmd string,symbase -; deffunc string,symbase,flags -; cmdtab -; functab -; cmdjump -; funcjump -; defcmd and deffunc will add an entry into the relevant dictionary table as well as adding one to the relevant jump -; tables. The cmdtab, functab, cmdjump, and funcjump will output the table definitions. - *pragmapush list - *pragma nolist -__cmdnum set 0x80 -__funcnum set 0x80 -defcmd macro noexpand - setstr __cmdtab="%(__cmdtab)\tfcs {1}\n" - ifstr ne,"{3}","" - setstr __cmdjump="%(__cmdjump)\tfdb {3}\n" - else - setstr __cmdjump="%(__cmdjump)\tfdb cmd_{2}\n" - endc -tok_{2} equ __cmdnum -__cmdnum set __cmdnum+1 - endm -deffunc macro noexpand - setstr __functab="%(__functab)\tfcs {1}\n" - ifstr ne,"{4}","" - setstr __funcjump="%(__funcjump)\tfcb {3}\n\tfdb {4}\n" - else - setstr __funcjump="%(__funcjump)\tfcb {3}\n\tfdb func_{2}\n" - endc -tok_{2} equ __funcnum -__funcnum set __funcnum+1 - endm -cmdtab macro - *pragmapush list - *pragma nolist - includestr "%(__cmdtab)" - *pragmapop list - fcb 0 ; flag end of table - endm -functab macro - *pragmapush list - *pragma nolist - includestr "%(__functab)" - *pragmapop list - fcb 0 ; flag end of table - endm -cmdjump macro - *pragmapush nolist - *pragma nolist - includestr "%(__cmdjump)" - *pragmapop list - endm -funcjump macro - *pragmapush nolist - *pragma nolist - includestr "%(__funcjump)" - *pragmapop list - endm - *pragmapop list - defcmd 'REM',rem - defcmd /'/,apos - defcmd 'DATA',data - defcmd 'ELSE',else - defcmd 'END',end - defcmd 'STOP',stop - defcmd 'LET',let - defcmd 'NEW',new - defcmd 'PRINT',print - defcmd 'LIST',list - defcmd 'RUN',run - defcmd 'GOTO',goto - defcmd 'GOSUB',gosub - defcmd 'RETURN',return - defcmd 'POP',pop - defcmd '+',plus,SNERROR ; IMPORTANT: the operators from + to OR MUST stay in this exact sequence - defcmd '-',minus,SNERROR ; with no gaps because a secondary lookup table is used for operator - defcmd '*',times,SNERROR ; handling during binary operator handling. - defcmd '/',divide,SNERROR - defcmd '^',power,SNERROR - defcmd '<',less,SNERROR - defcmd '>',greater,SNERROR - defcmd '=',equal,SNERROR - defcmd '<=',lessequal,SNERROR - defcmd '>=',greaterequal,SNERROR - defcmd '<>',notequal,SNERROR - defcmd 'AND',and,SNERROR - defcmd 'OR',or,SNERROR - defcmd 'NOT',not,SNERROR -primarydict cmdtab -secondarydict functab -primaryjump cmdjump -secondaryjump funcjump - *pragmapop list diff -r 527212870064 -r 9d57279c900e src/lwbasic.s --- a/src/lwbasic.s Tue Jan 09 22:47:11 2024 -0700 +++ b/src/lwbasic.s Tue Jan 09 22:54:42 2024 -0700 @@ -51,12 +51,10 @@ include number.s include int.s include fps.s - include token.s include bytecode.s include parse.s include keywordtab.s include miscdata.s - include keywords.s *pragmapop list ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Need to ensure the vectors are at 0xbff2 diff -r 527212870064 -r 9d57279c900e src/number.s --- a/src/number.s Tue Jan 09 22:47:11 2024 -0700 +++ b/src/number.s Tue Jan 09 22:54:42 2024 -0700 @@ -140,11 +140,11 @@ bcs val_parsenum9 ; brif digit cmpa #'+ ; positive? beq val_parsenum8 ; brif no - cmpa #tok_plus ; tokenized plus? + cmpa #token_plus ; tokenized plus? beq val_parsenum8 ; brif so cmpa #'- ; negative? beq val_parsenum7a ; brif so - cmpa #tok_minus ; tokenized minus? + cmpa #token_minus ; tokenized minus? lbne SNERROR ; brif not positive, negative, or digit val_parsenum7a com fpaextra+5 ; make sign of exponent negative val_parsenum8 jsr nextchar ; eat exponent sign/get next digit diff -r 527212870064 -r 9d57279c900e src/progctrl.s --- a/src/progctrl.s Tue Jan 09 22:47:11 2024 -0700 +++ b/src/progctrl.s Tue Jan 09 22:54:42 2024 -0700 @@ -105,7 +105,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; GOSUB command cmd_gosub jsr parse_lineno ; parse the destination line so return location is after the line number - ldd #tok_gosub*256+4 ; stack frame details + ldd #token_gosub*256+4 ; stack frame details jsr cstack_alloc ; make a stack frame ldx curline ; save current line pointer stx ,u @@ -127,7 +127,7 @@ jmp ERROR cmd_return0 jsr cstack_next ; move to next entry beq RG_ERROR ; brif end of stack - raise error -cmd_return1 cmpb #tok_gosub ; do we have a GOSUB frame? +cmd_return1 cmpb #token_gosub ; do we have a GOSUB frame? bne cmd_return0 ; brif not - try again lda ,s+ ; is it "POP"? bne cmd_return2 ; brif so - don't change flow control but clear stack frame diff -r 527212870064 -r 9d57279c900e src/token.s --- a/src/token.s Tue Jan 09 22:47:11 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,252 +0,0 @@ - *pragmapush list - *pragma list -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; The LIST command. -; -; Syntax: -; LIST -; LIST -; LIST - -; LIST - -; LIST - -cmd_list bne cmd_list1 ; brif we have arguments - ldx progtext ; point to start of program -cmd_list0 ldd #65535 ; set last line to list to max line number - std binval - bra cmd_list2 ; go do the listing -cmd_list1 jsr parse_lineno ; parse starting line number (will default to 0) - jsr prog_findline ; find the line or the one after where it would be - jsr curchar ; are we at the end of the command? - beq cmd_list2 ; brif so - we have a single line (binval will have the start line #) - ldb #tok_minus ; insist on a - for a range if more than one line number - jsr syncheckb - beq cmd_list0 ; brif open ended ending - set to max line number - jsr parse_lineno ; parse ending of range -cmd_list2 ldd ,x ; are we at the end of the program? - bne cmd_list4 ; brif not -cmd_list3 rts -cmd_list4 ldd 2,x ; get line number - cmpd binval ; have we reached the end of the range? - bhi cmd_list3 ; brif so - we're done - jsr print_uint16d ; print out line number - lda #0x20 ; and a space - jsr writechr - pshs x ; save start of this line (in case detokenizing exits early) - leax 4,x ; move past line header - bsr detokenize ; detokenize line to current output stream - ldx [,s++] ; point to next line using saved pointer and clear it from the stack - ; need to add a break check here - bra cmd_list2 ; go handle another line -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Detokenize a line to the current output stream -detokenize lda ,x+ ; get character from tokenized line - bmi detokenize1 ; brif it's a keyword token - lbeq writecondnl ; do a newline if needed and return - cmpa #': ; is it a colon? - bne detokenize0 ; brif not - ldb ,x ; fetch subsequent character - cmpb #tok_apos ; apostrophe version of REM? - beq detokenize ; brif so - skip the colon - cmpb #tok_else ; ELSE? - beq detokenize ; brif so - skip the colon -detokenize0 jsr writechr ; output it unmolested - bra detokenize ; go handle another character -detokenize1 ldu #primarydict ; point to primary dictionary table - cmpa #0xff ; is it a secondary token? - bne detokenize3 ; brif not - ldu #secondarydict ; point to secondary dictionary table - lda ,x+ ; get secondary token value - bne detokenize3 ; brif not end of line - leax -1,x ; don't consume the NUL -detokenize2 lda #'! ; invalid token flag - bra detokenize0 ; output it and continue -detokenize3 anda #0x7f ; lose the high bit - beq detokenize6 ; brif already at the right place -detokenize4 ldb ,u ; end of dictionary table? - beq detokenize2 ; brif so - show invalid tokenf lag -detokenize5 ldb ,u+ ; fetch character in this keyboard - bpl detokenize5 ; brif not end of keyword (high bit set) - deca ; at the right token? - bne detokenize4 ; brif not - skip another -detokenize6 lda ,u+ ; get keyword character - bmi detokenize7 ; brif end of keyword - jsr writechr ; output it - bra detokenize6 ; go fetch another -detokenize7 anda #0x7f ; lose the high bit - bra detokenize0 ; write it and move on with the input -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Canonicalize certain sequences; ALL the rewrite sequences must make the result shorter or keep it the same size -makecanontab fcb tok_less,2 - fcb tok_greater,tok_notequal - fcb tok_equal,tok_lessequal - fcb tok_greater,2 - fcb tok_less,tok_notequal - fcb tok_equal,tok_greaterequal - fcb tok_equal,2 - fcb tok_greater,tok_greaterequal - fcb tok_less,tok_lessequal - fcb 0 -makecanon leay ,x ; point output to start of the buffer -makecanon0 lda ,x+ ; get current byte - sta ,y+ ; save in output - bne makecanon1 ; brif not end of line - rts -makecanon1 bpl makecanon0 ; brif not a token - cmpa #0xff ; is it secondary? - bne makecanon2 ; brif not - leax 1,x ; move past second half - bra makecanon0 ; go handle next byte -makecanon2 ldu #makecanontab ; point to replacement table -makecanon3 cmpa ,u+ ; is it this entry? - beq makecanon4 ; brif so - ldb ,u+ ; get number of entries - lslb ; 2 bytes per - leau b,u ; move past entry - ldb ,u ; end of table? - bne makecanon3 ; brif not - bra makecanon0 ; no substitutions found -makecanon4 pshs x ; save original source pointer -makecanon5 lda ,x+ ; get next character - cmpa #0x20 ; is it space? - beq makecanon5 ; brif so - skip it - ldb ,u+ ; get number of replacement candidates -makecanon6 cmpa ,u++ ; does it match? - beq makecanon7 ; brif so - decb ; seen all of them? - bne makecanon6 ; brif not - puls x ; restore input pointer - bra makecanon0 ; go handle next input -makecanon7 leas 2,s ; clear saved input pointer - lda -1,u ; get replacement token - sta -1,y ; put it in the output - bra makecanon0 ; go handle more input -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Tokenize line to tokebuff -; -; Enter with X pointing to the text to tokenize. -; Exit with X pointing to the start of the tokenized line and D holding the length of the tokenized line. -tokenize clr tok_skipkw ; clear "not token" flag - clr tok_skipdt ; clear the "in data" flag - ldy #tokebuff ; point to destination buffer - pshs y ; set return value -tokenize0 lda ,x+ ; get input character - bne tokenize3 ; brif not end of input -tokenize1 sta ,y+ ; blank out final byte in result -tokenize2 ldx #tokebuff ; point to start of tokenized line - bsr makecanon ; canonicalize certain sequences - tfr y,d ; get end address to accumulator - subd #tokebuff ; subtract out start; gives length of result - puls x,pc ; set return pointer and return -tokenize3 tst tok_skipkw ; are we in the middle of a "not token"? - beq tokenize6 ; brif not - jsr setcifalpha ; is it alpha - bcs tokenize4 ; brif so - store it and continue - jsr setcifdigit ; is it numeric? - bcc tokenize5 ; brif not -tokenize4 sta ,y+ ; save output character - bra tokenize0 ; check for another -tokenize5 clr tok_skipkw ; clear the "not token" flag -tokenize6 cmpa #'" ; is it a string? - bne tokenize8 ; brif not - sta ,y+ ; save string delimiter -tokenize7 lda ,x+ ; get input character - beq tokenize1 ; brif end of input - sta ,y+ ; save it in output - cmpa #'" ; end of string? - bne tokenize7 ; brif not - bra tokenize0 ; brif -tokenize8 cmpa #': ; end of statement? - bne tokenize9 ; brif not - clr tok_skipdt ; reset "in data" flag - bra tokenize4 ; stash it and continue -tokenize9 cmpa #0x20 ; is it a space? - beq tokenize4 ; brif so - stash it unmodified - tst tok_skipdt ; are we "in data"? - bne tokenize4 ; brif so - don't tokenize it - cmpa #'? ; PRINT shortcut? - bne tokenize10 ; brif not - lda #tok_print ; load token for PRINT - bra tokenize4 ; move stash it and move on -tokenize10 cmpa #'' ; ' shortcut for remark? - bne tokenize12 ; brif not - ldd #':*256+tok_apos ; put token for ' and an implied colon - std ,y++ ; stash it -tokenize11 lda ,x+ ; fetch byte from input - sta ,y+ ; stash in output - bne tokenize11 ; brif not end of input - bra tokenize2 ; go finish up -tokenize12 jsr setcifdigit ; is it a digit? - bcs tokenize4 ; brif so - pass it through - tsta ; is the high bit set? - bmi tokenize0 ; ignore it if so - ldu #primarydict ; point to keyword table - leax -1,x ; back up input to start of potential token - clr tok_kwtype ; set secondary table flag to primary table - clr tok_kwmatch ; clear the matched token - clr tok_kwmatch+1 - clr tok_kwmatchl ; set length matched - pshs x ; save start of input token -tokenize13 clr tok_kwnum ; clear keyword number -tokenize14 ldb ,u ; are we at the end of the table? - bne tokenize16 ; brif not - ldu #secondarydict ; point to secondary token dictionary - com tok_kwtype ; flip to secondary token flag - bne tokenize13 ; brif we haven't already done the secondaries - puls x ; get back input pointer - ldb tok_kwmatchl ; get length of best match - beq tokenize15 ; brif we don't have a match - abx ; move input pointer past matched token - ldd tok_kwmatch ; get matched token number - tsta ; is it a primary? - beq tokenize24 ; brif so - bra tokenize23 ; go stash two byte token -tokenize15 com tok_skipkw ; set "not token flag" - lda ,x+ ; get character - bra tokenize4 ; stash it and continue -tokenize16 ldx ,s ; get back start of input token - clra ; initalize match length counter -tokenize17 inca ; bump length counter - ldb ,x+ ; get input character - cmpb #'z ; is it above lower case Z? - bhi tokenize18 ; brif so - cmpb #'a ; is it below lower case A? - blo tokenize18 ; brif so - subb #0x20 ; convert to upper case -tokenize18 subb ,u+ ; does it match? - beq tokenize17 ; brif so - check another - cmpb #0x80 ; did it match with high bit set? - beq tokenize21 ; brif so - exact match - leau -1,u ; back up to current test character -tokenize19 ldb ,u+ ; end of token? - bpl tokenize19 ; brif not -tokenize20 inc tok_kwnum ; bump token counter - bra tokenize14 ; go check another one -tokenize21 cmpa tok_kwmatchl ; is it a longer match? - bls tokenize20 ; brif not, ignore it - sta tok_kwmatchl ; save new match length - ldd tok_kwtype ; get the matched token count - orb #0x80 ; set token flag - std tok_kwmatch ; save matched token - bra tokenize20 ; keep looking through the tables -tokenize22 lda #': ; for putting implied colons in -tokenize23 std ,y++ ; put output into buffer - jmp tokenize0 ; go handle more input -tokenize24 cmpb #tok_else ; is it ELSE? - beq tokenize22 ; brif so - stash it with colon - cmpb #tok_data ; is it DATA? - bne tokenize26 ; brif not - stb tok_skipdt ; set "in data" flag -tokenize25 stb ,y+ ; stash token - jmp tokenize0 ; go handle more -tokenize26 cmpb #tok_rem ; is it REM? - beq tokenize28 ; brif so - cmpb #tok_apos ; apostrophe REM? - bne tokenize25 ; brif not - stash token and continue - lda #': ; stash the implied colon - sta ,y+ - bra tokenize28 -tokenize27 ldb ,x+ ; fetch next input character -tokenize28 stb ,y+ ; stash the character - bne tokenize27 ; brif not end of input - do another - jmp tokenize2 ; stash end of buffer and handle cleanup - *pragmapop list