Mercurial > hg > index.cgi
changeset 26:001b9ab63731
Add tokenization, keyword lists, and some interpretation loop bits
Add simple interpretation loop for immediate mode. Also implement
tokenziation routine, nextchar/curchar routines, and a framework for
handling the keyword and jump tables.
The keyword and jump tables rely on features only available in lwasm
to create macros to make certain bits automatic rather than requiring
magic numbers all over the code, notably with token numbers when they
need to be referenced explicitly in the code.
author | William Astle <lost@l-w.ca> |
---|---|
date | Sun, 13 Nov 2022 23:42:09 -0700 |
parents | 9593401d23cf |
children | 5db76d113c37 |
files | src/lwbasic.s |
diffstat | 1 files changed, 216 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lwbasic.s Sun Nov 13 22:07:38 2022 -0700 +++ b/src/lwbasic.s Sun Nov 13 23:42:09 2022 -0700 @@ -219,6 +219,7 @@ rmb 0x71-* ; align RSTFLG/RSTVEC for stock ROM compatibility RSTFLG rmb 1 ; 0x55 if RSTVEC is valid RSTVEC rmb 2 ; points to warm start routine (must start with NOP) +inputptr rmb 2 ; pointer to current program execution location rmb 0x100-* ; make sure the stuff that isn't direct page is outside of it SW3VEC rmb 3 ; SWI3 vector (for compatibility) SW2VEC rmb 3 ; SWI2 vector (for compatibility) @@ -229,6 +230,7 @@ keyb_state rmb 8 ; rollover table state keyb_buff rmb keyb_bufflen ; the keyboard ring buffer linebuff rmb linebuffsize ; the line input buffer +tokebuff rmb linebuffsize+50 ; make it as long as line buffer plus a margin ifne *&0x1ff rmb 0x200-(*&0x1ff) endc @@ -963,6 +965,36 @@ suba #0x20 ; shift to upper case bra writechr ; go output it ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Fetch next input character, skip spaces. This is structured the way it is to avoid burning any register except A +; which is used for the returned value. Z will be set if the input character is NUL or a colon. C will be set if the +; input character is an ASCII digit. This allows testing Z to identify the end of a command due to either a colon or +; the end of a line. +; +; Compared to Color Basic, the instruction sequence only varies in the handling of the LDA. In Color Basic, the sequence +; is an LDA extended followed by a JMP extended. This totals to 9 cycles (5 for LDA, 4 for JMP). In LWBasic, an LDA +; with extended indirect addressing is used. This also totals 9 cycles. The only other difference is when a space is +; detected where the branch can be direct to the nextchar code instead of having to branch around a direct page JUMP +; which saves 3 cycles for the case where a space is detected. In other words, this is only slower by virtue of the +; fact that it is called with an extended JSR instead of a direct JSR which causes one extra cycle to be used there +; and one extra byte for each call to nextchar or curchar. +; +; On 6309, native move saves an extra cycle in the LDA sequence using the LDA extended followed by JMP extended +; sequence. +; +; This whole thing could be sped up by keeping the input pointer in a register. However, retaining the ability to +; use Y without having to save it first is likely more beneficial. +nextchar inc inputptr+1 ; bump LSB of input pointer + bne curchar ; brif no carry + inc inputptr ; bump MSB +curchar lda [inputptr] ; read the byte + cmpa #'9+1 ; clear C if above ASCII digits, Z if colon + bhs curchar0 ; brif above the ASCII digits + cmpa #0x20 ; is it a space? + beq nextchar ; brif so - skip over it + suba #'0 ; clever way to set C if >= ASCII 0, Z if zero + suba #-'0 +curchar0 rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; The error handler ; ; Enter with the error number in B. This routine will do some cleanup and handle any ON ERROR GOTO handler that @@ -994,15 +1026,195 @@ jsr console_outstrn immediate0 jsr readline ; read input line bcs immediate0 ; brif ended with BREAK - ; handle line - bra immediate + ldy #linebuff ; initialize input pointer + jsr curchar ; get current input character + bcs immediate1 ; brif it starts with a digit - we have a line number + tsta ; is there anything there at all (end of line)? + beq immediate0 ; brif not - read another line + jsr tokenize ; tokenize the line at Y, return with pointer to result in Y +; jsr interpretline ; go interpret the tokenized line + bra immediate ; go handle another line +immediate1 ; handle line insert/delete/modify + bra immediate0 prompt fcn 'OK' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Error messages errormsg fcn 'NEXT without FOR' ; 0 fcn 'Syntax error' ; 1 - ifndef COCO3 -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Set carry if upper/lower case alpha +setcifalpha cmpa #'z+1 ; is it above lower case Z? + bhs setcifalpha0 ; brif so, C clear + suba #'a ; set C if >= lower case A + suba #-'a + bcs setcifalpha0 ; brif lower case alpha +setcifualpha cmpa #'Z+1 ; is it above upper case Z? + bhs setcifalpha0 ; brif so, C clear + suba #'A ; set C if >= upper case A + suba #-'A +setcifalpha0 rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Set carry if digit +setcifdigit cmpa #'9+1 ; is it above digit 9? + bhs setcifdigit0 ; brif so, C clear + suba #'0 ; set C if >= digit 0 + suba #-'0 +setcifdigit0 rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Tokenize line to tokebuff +; +; Enter with Y pointing to the text to tokenize. +; Exit with Y pointing to the start of the tokenized line and D holding the length of the tokenized line. +tokenize clra ; clear "not token" flag + clrb ; clear the "in data" flag + ldu #tokebuff ; point to destination buffer + pshs a,u ; set return value and "not token" flag +tokenize0 lda ,y+ ; get input character + bne tokenize1 ; brif not end of input +tokenize0a sta ,u+ ; blank out final byte in result +tokenize0b leas 2,s ; clean up temporaries on stack + tfr u,d ; get end address to accumulator + subd #tokebuff ; subtract out start; gives length of result + puls y,pc ; set return pointer and return +tokenize1 tst ,s ; are we in the middle of a "not token"? + beq tokenize2 ; brif not + bsr setcifalpha ; is it alpha + bcs tokenize2 ; brif so - store it and continue + bsr setcifdigit ; is it numeric? + bcc tokenize3 ; brif not +tokenize2 sta ,u+ ; save output character + bra tokenize0 ; check for another +tokenize3 clr ,s ; clear the "not token" flag + cmpa #'" ; is it a string? + bne tokenize5 ; brif not + sta ,u+ ; save string delimiter +tokenize4 lda ,y+ ; get input character + beq tokenize0a ; brif end of input + sta ,u+ ; save it in output + cmpa #'" ; end of string? + bne tokenize4 ; brif not + bra tokenize0 ; brif +tokenize5 cmpa #': ; end of statement? + bne tokenize6 ; brif not + clr 1,s ; reset "in data" flag + bra tokenize0a ; stash it and continue +tokenize6 cmpa #0x20 ; is it a space? + beq tokenize0a ; brif so - stash it unmodified + tst 1,s ; are we "in data"? + bne tokenize0a ; brif so - don't tokenize it + cmpa #'' ; ' shortcut for remark? + bne tokenize9 ; brif not + ldd #':*256+tok_apos ; put token for ' and an implied colon + std ,u++ ; stash it +tokenize8 lda ,y+ ; fetch byte from input + sta ,u+ ; stash in output + beq tokenize0b ; brif end of input + bra tokenize8 ; keep copying line data over +tokenize9 bsr setcifdigit ; is it a digit? + bcs tokenize0a ; brif so - pass it through + tsta ; is the high bit set? + bmi tokenize0b ; ignore it if so + ldx #primarydict ; point to keyword table + leay -1,y ; back up input to start of potential token + clrb ; initialize the token number + clra ; initialize secondary table flag + pshs d,y ; save start of input token and the token counter +tokenize10 ldb ,x ; are we at the end of the table? + bne tokenize11 ; brif not + ldx #secondarydict ; point to secondary token dictionary + clr 1,s ; reset token counter + com ,s ; flip to secondary token flag + bne tokenize10 ; brif we haven't already done the secondaries + puls d,y ; get back input pointer and clear stack temporaries + com ,s ; set "not token flag" + lda ,y+ ; get character + bra tokenize0a ; stash it and continue +tokenize11 ldy 2,s ; get back start of input token +tokenize12 ldb ,y+ ; get input character + cmpb #'z ; is it above lower case Z? + bhi tokenize13 ; brif so + cmpb #'a ; is it below lower case A? + blo tokenize13 ; brif so + subb #0x20 ; convert to upper case +tokenize13 subb ,x+ ; does it match? + beq tokenize12 ; brif so - check another + cmpb #0x80 ; did it match with high bit set? + beq tokenize15 ; brif so - exact match + leax -1,x ; back up to current test character +tokenize14 ldb ,x+ ; end of token? + bpl tokenize14 ; brif not + inc ,s ; bump token counter + bra tokenize11 ; go check another one +tokenize15 orb ,s+ ; merge token number with the high bit (bit 7 set from above) + lda ,s+ ; get back secondary flag and set flags on it + leas 2,s ; clean up saved input pointer from stack + bpl tokenize17 ; brif primary token + skip2 +tokenize18 lda #': ; for putting implied colons in +tokenize16 std ,u++ ; put output into buffer + jmp tokenize0 ; go handle more input +tokenize17 cmpb #tok_else ; is it ELSE? + beq tokenize18 ; brif so - stash it with colon + cmpb #tok_data ; is it DATA? + bne tokenize18a ; brif not + stb 1,s ; set "in data" flag +tokenize20 stb ,u+ ; stash token + jmp tokenize0 ; go handle more +tokenize18a cmpb #tok_rem ; is it REM? + beq tokenize19 ; brif so + cmpb #tok_apos ; apostrophe REM? + bne tokenize20 ; brif not - stash token and continue + lda #': ; stash the implied colon + sta ,u+ + bra tokenize19 +tokenize19a ldb ,y+ ; fetch next input character +tokenize19 stb ,u+ ; stash the character + bne tokenize19a ; brif not end of input - do another + jmp tokenize0b ; stash end of buffer and handle cleanup +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Keyword dictionaries and jump tables. These are defined by several macros which ensure that each command or function +; entry has an associated jump table entry. These macros are: +; +; defcmd string,symbase +; deffunc string,symbase,flags +; cmdtab +; functab +; cmdjump +; funcjump +; defcmd and deffunc will add an entry into the relevant dictionary table as well as adding one to the relevant jump +; tables. The cmdtab, functab, cmdjump, and funcjump will output the table definitions. + *pragmapush list + *pragma nolist +__cmdnum set 0x80 +__funcnum set 0x80 +defcmd macro noexpand + setstr __cmdtab="%(__cmdtab)\tfcs {1}\n" + setstr __cmdjump="%(__cmdjump)\tfdb cmd_{2}\n" +tok_{2} equ __cmdnum +__cmdnum set __cmdnum+1 + endm +deffunc macro noexpand + setstr __functab="%(__functab)\tfcs {1}\n" + setstr __funcjump="%(__funcjump)\tfcb {3}\n\tfdb func_{2}\n" +tok_{2} equ __funcnum +__funcnum set __funcnum+1 + endm +cmdtab macro + includestr "%(__cmdtab)" + fcb 0 ; flag end of table + endm +functab macro + includestr "%(__functab)" + fcb 0 ; flag end of table + endm + *pragmapop list + defcmd 'REM',rem + defcmd /'/,apos + defcmd 'DATA',data + defcmd 'ELSE',else +primarydict cmdtab +secondarydict functab +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Need to ensure the vectors are at 0xbff2 zmb 0xbff2-* ; pad ROM up to the vector point fdb SW3VEC ; SWI3 vector