changeset 26:001b9ab63731

Add tokenization, keyword lists, and some interpretation loop bits Add simple interpretation loop for immediate mode. Also implement tokenziation routine, nextchar/curchar routines, and a framework for handling the keyword and jump tables. The keyword and jump tables rely on features only available in lwasm to create macros to make certain bits automatic rather than requiring magic numbers all over the code, notably with token numbers when they need to be referenced explicitly in the code.
author William Astle <lost@l-w.ca>
date Sun, 13 Nov 2022 23:42:09 -0700
parents 9593401d23cf
children 5db76d113c37
files src/lwbasic.s
diffstat 1 files changed, 216 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/src/lwbasic.s	Sun Nov 13 22:07:38 2022 -0700
+++ b/src/lwbasic.s	Sun Nov 13 23:42:09 2022 -0700
@@ -219,6 +219,7 @@
                 rmb 0x71-*                      ; align RSTFLG/RSTVEC for stock ROM compatibility
 RSTFLG          rmb 1                           ; 0x55 if RSTVEC is valid
 RSTVEC          rmb 2                           ; points to warm start routine (must start with NOP)
+inputptr        rmb 2                           ; pointer to current program execution location
                 rmb 0x100-*                     ; make sure the stuff that isn't direct page is outside of it
 SW3VEC          rmb 3                           ; SWI3 vector (for compatibility)
 SW2VEC          rmb 3                           ; SWI2 vector (for compatibility)
@@ -229,6 +230,7 @@
 keyb_state      rmb 8                           ; rollover table state
 keyb_buff       rmb keyb_bufflen                ; the keyboard ring buffer
 linebuff        rmb linebuffsize                ; the line input buffer
+tokebuff        rmb linebuffsize+50             ; make it as long as line buffer plus a margin
                 ifne *&0x1ff
                 rmb 0x200-(*&0x1ff)
                 endc
@@ -963,6 +965,36 @@
                 suba #0x20                      ; shift to upper case
                 bra writechr                    ; go output it
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Fetch next input character, skip spaces. This is structured the way it is to avoid burning any register except A
+; which is used for the returned value. Z will be set if the input character is NUL or a colon. C will be set if the
+; input character is an ASCII digit. This allows testing Z to identify the end of a command due to either a colon or
+; the end of a line.
+;
+; Compared to Color Basic, the instruction sequence only varies in the handling of the LDA. In Color Basic, the sequence
+; is an LDA extended followed by a JMP extended. This totals to 9 cycles (5 for LDA, 4 for JMP). In LWBasic, an LDA
+; with extended indirect addressing is used. This also totals 9 cycles. The only other difference is when a space is
+; detected where the branch can be direct to the nextchar code instead of having to branch around a direct page JUMP
+; which saves 3 cycles for the case where a space is detected. In other words, this is only slower by virtue of the
+; fact that it is called with an extended JSR instead of a direct JSR which causes one extra cycle to be used there
+; and one extra byte for each call to nextchar or curchar.
+;
+; On 6309, native move saves an extra cycle in the LDA sequence using the LDA extended followed by JMP extended
+; sequence.
+;
+; This whole thing could be sped up by keeping the input pointer in a register. However, retaining the ability to
+; use Y without having to save it first is likely more beneficial.
+nextchar        inc inputptr+1                  ; bump LSB of input pointer
+                bne curchar                     ; brif no carry
+                inc inputptr                    ; bump MSB
+curchar         lda [inputptr]                  ; read the byte
+                cmpa #'9+1                      ; clear C if above ASCII digits, Z if colon
+                bhs curchar0                    ; brif above the ASCII digits
+                cmpa #0x20                      ; is it a space?
+                beq nextchar                    ; brif so - skip over it
+                suba #'0                        ; clever way to set C if >= ASCII 0, Z if zero
+                suba #-'0
+curchar0        rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; The error handler
 ;
 ; Enter with the error number in B. This routine will do some cleanup and handle any ON ERROR GOTO handler that
@@ -994,15 +1026,195 @@
                 jsr console_outstrn
 immediate0      jsr readline                    ; read input line
                 bcs immediate0                  ; brif ended with BREAK
-                ; handle line
-                bra immediate
+                ldy #linebuff                   ; initialize input pointer
+                jsr curchar                     ; get current input character
+                bcs immediate1                  ; brif it starts with a digit - we have a line number
+                tsta                            ; is there anything there at all (end of line)?
+                beq immediate0                  ; brif not - read another line
+                jsr tokenize                    ; tokenize the line at Y, return with pointer to result in Y
+;                jsr interpretline               ; go interpret the tokenized line
+                bra immediate                   ; go handle another line
+immediate1      ; handle line insert/delete/modify
+                bra immediate0
 prompt          fcn 'OK'
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Error messages
 errormsg        fcn 'NEXT without FOR'          ; 0
                 fcn 'Syntax error'              ; 1
-                ifndef COCO3
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Set carry if upper/lower case alpha
+setcifalpha     cmpa #'z+1                      ; is it above lower case Z?
+                bhs setcifalpha0                ; brif so, C clear
+                suba #'a                        ; set C if >= lower case A
+                suba #-'a
+                bcs setcifalpha0                ; brif lower case alpha
+setcifualpha    cmpa #'Z+1                      ; is it above upper case Z?
+                bhs setcifalpha0                ; brif so, C clear
+                suba #'A                        ; set C if >= upper case A
+                suba #-'A
+setcifalpha0    rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Set carry if digit
+setcifdigit     cmpa #'9+1                      ; is it above digit 9?
+                bhs setcifdigit0                ; brif so, C clear
+                suba #'0                        ; set C if >= digit 0
+                suba #-'0
+setcifdigit0    rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Tokenize line to tokebuff
+;
+; Enter with Y pointing to the text to tokenize.
+; Exit with Y pointing to the start of the tokenized line and D holding the length of the tokenized line.
+tokenize        clra                            ; clear "not token" flag
+                clrb                            ; clear the "in data" flag
+                ldu #tokebuff                   ; point to destination buffer
+                pshs a,u                        ; set return value and "not token" flag
+tokenize0       lda ,y+                         ; get input character
+                bne tokenize1                   ; brif not end of input
+tokenize0a      sta ,u+                         ; blank out final byte in result
+tokenize0b      leas 2,s                        ; clean up temporaries on stack
+                tfr u,d                         ; get end address to accumulator
+                subd #tokebuff                  ; subtract out start; gives length of result
+                puls y,pc                       ; set return pointer and return
+tokenize1       tst ,s                          ; are we in the middle of a "not token"?
+                beq tokenize2                   ; brif not
+                bsr setcifalpha                 ; is it alpha
+                bcs tokenize2                   ; brif so - store it and continue
+                bsr setcifdigit                 ; is it numeric?
+                bcc tokenize3                   ; brif not
+tokenize2       sta ,u+                         ; save output character
+                bra tokenize0                   ; check for another
+tokenize3       clr ,s                          ; clear the "not token" flag
+                cmpa #'"                        ; is it a string?
+                bne tokenize5                   ; brif not
+                sta ,u+                         ; save string delimiter
+tokenize4       lda ,y+                         ; get input character
+                beq tokenize0a                  ; brif end of input
+                sta ,u+                         ; save it in output
+                cmpa #'"                        ; end of string?
+                bne tokenize4                   ; brif not
+                bra tokenize0                   ; brif 
+tokenize5       cmpa #':                        ; end of statement?
+                bne tokenize6                   ; brif not
+                clr 1,s                         ; reset "in data" flag
+                bra tokenize0a                  ; stash it and continue
+tokenize6       cmpa #0x20                      ; is it a space?
+                beq tokenize0a                  ; brif so - stash it unmodified
+                tst 1,s                         ; are we "in data"?
+                bne tokenize0a                  ; brif so - don't tokenize it
+                cmpa #''                        ; ' shortcut for remark?
+                bne tokenize9                   ; brif not
+                ldd #':*256+tok_apos            ; put token for ' and an implied colon
+                std ,u++                        ; stash it
+tokenize8       lda ,y+                         ; fetch byte from input
+                sta ,u+                         ; stash in output
+                beq tokenize0b                  ; brif end of input
+                bra tokenize8                   ; keep copying line data over
+tokenize9       bsr setcifdigit                 ; is it a digit?
+                bcs tokenize0a                  ; brif so - pass it through
+                tsta                            ; is the high bit set?
+                bmi tokenize0b                  ; ignore it if so
+                ldx #primarydict                ; point to keyword table
+                leay -1,y                       ; back up input to start of potential token
+                clrb                            ; initialize the token number
+                clra                            ; initialize secondary table flag
+                pshs d,y                        ; save start of input token and the token counter
+tokenize10      ldb ,x                          ; are we at the end of the table?
+                bne tokenize11                  ; brif not
+                ldx #secondarydict              ; point to secondary token dictionary
+                clr 1,s                         ; reset token counter
+                com ,s                          ; flip to secondary token flag
+                bne tokenize10                  ; brif we haven't already done the secondaries
+                puls d,y                        ; get back input pointer and clear stack temporaries
+                com ,s                          ; set "not token flag"
+                lda ,y+                         ; get character
+                bra tokenize0a                  ; stash it and continue
+tokenize11      ldy 2,s                         ; get back start of input token
+tokenize12      ldb ,y+                         ; get input character
+                cmpb #'z                        ; is it above lower case Z?
+                bhi tokenize13                  ; brif so
+                cmpb #'a                        ; is it below lower case A?
+                blo tokenize13                  ; brif so
+                subb #0x20                      ; convert to upper case
+tokenize13      subb ,x+                        ; does it match?
+                beq tokenize12                  ; brif so - check another
+                cmpb #0x80                      ; did it match with high bit set?
+                beq tokenize15                  ; brif so - exact match
+                leax -1,x                       ; back up to current test character
+tokenize14      ldb ,x+                         ; end of token?
+                bpl tokenize14                  ; brif not
+                inc ,s                          ; bump token counter
+                bra tokenize11                  ; go check another one
+tokenize15      orb ,s+                         ; merge token number with the high bit (bit 7 set from above)
+                lda ,s+                         ; get back secondary flag and set flags on it
+                leas 2,s                        ; clean up saved input pointer from stack
+                bpl tokenize17                  ; brif primary token
+                skip2
+tokenize18      lda #':                         ; for putting implied colons in
+tokenize16      std ,u++                        ; put output into buffer
+                jmp tokenize0                   ; go handle more input
+tokenize17      cmpb #tok_else                  ; is it ELSE?
+                beq tokenize18                  ; brif so - stash it with colon
+                cmpb #tok_data                  ; is it DATA?
+                bne tokenize18a                 ; brif not
+                stb 1,s                         ; set "in data" flag
+tokenize20      stb ,u+                         ; stash token
+                jmp tokenize0                   ; go handle more
+tokenize18a     cmpb #tok_rem                   ; is it REM?
+                beq tokenize19                  ; brif so
+                cmpb #tok_apos                  ; apostrophe REM?
+                bne tokenize20                  ; brif not - stash token and continue
+                lda #':                         ; stash the implied colon
+                sta ,u+
+                bra tokenize19
+tokenize19a     ldb ,y+                         ; fetch next input character
+tokenize19      stb ,u+                         ; stash the character
+                bne tokenize19a                 ; brif not end of input - do another
+                jmp tokenize0b                  ; stash end of buffer and handle cleanup
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Keyword dictionaries and jump tables. These are defined by several macros which ensure that each command or function
+; entry has an associated jump table entry. These macros are:
+;
+;               defcmd string,symbase
+;               deffunc string,symbase,flags
+;               cmdtab
+;               functab
+;               cmdjump
+;               funcjump
+; defcmd and deffunc will add an entry into the relevant dictionary table as well as adding one to the relevant jump
+; tables. The cmdtab, functab, cmdjump, and funcjump will output the table definitions.
+                *pragmapush list
+                *pragma nolist
+__cmdnum        set 0x80
+__funcnum       set 0x80
+defcmd          macro noexpand
+                setstr __cmdtab="%(__cmdtab)\tfcs {1}\n"
+                setstr __cmdjump="%(__cmdjump)\tfdb cmd_{2}\n"
+tok_{2}         equ __cmdnum
+__cmdnum        set __cmdnum+1
+                endm
+deffunc         macro noexpand
+                setstr __functab="%(__functab)\tfcs {1}\n"
+                setstr __funcjump="%(__funcjump)\tfcb {3}\n\tfdb func_{2}\n"
+tok_{2}         equ __funcnum
+__funcnum       set __funcnum+1
+                endm
+cmdtab          macro
+                includestr "%(__cmdtab)"
+                fcb 0                           ; flag end of table
+                endm
+functab         macro
+                includestr "%(__functab)"
+                fcb 0                           ; flag end of table
+                endm                
+                *pragmapop list
+                defcmd 'REM',rem
+                defcmd /'/,apos
+                defcmd 'DATA',data
+                defcmd 'ELSE',else
+primarydict     cmdtab
+secondarydict   functab
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Need to ensure the vectors are at 0xbff2
                 zmb 0xbff2-*                    ; pad ROM up to the vector point
                 fdb SW3VEC                      ; SWI3 vector