view src/token.s @ 125:0607e4e20702

Correct offset error for keyword table lookup
author William Astle <lost@l-w.ca>
date Sun, 07 Jan 2024 20:35:51 -0700
parents eb2681108660
children
line wrap: on
line source

                *pragmapush list
                *pragma list
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The LIST command.
;
; Syntax:
; LIST
; LIST <line>
; LIST <line>-
; LIST -<line>
; LIST <start>-<end>
cmd_list        bne cmd_list1                   ; brif we have arguments
                ldx progtext                    ; point to start of program
cmd_list0       ldd #65535                      ; set last line to list to max line number
                std binval
                bra cmd_list2                   ; go do the listing
cmd_list1       jsr parse_lineno                ; parse starting line number (will default to 0)
                jsr prog_findline               ; find the line or the one after where it would be
                jsr curchar                     ; are we at the end of the command?
                beq cmd_list2                   ; brif so - we have a single line (binval will have the start line #)
                ldb #tok_minus                  ; insist on a - for a range if more than one line number
                jsr syncheckb
                beq cmd_list0                   ; brif open ended ending - set to max line number
                jsr parse_lineno                ; parse ending of range
cmd_list2       ldd ,x                          ; are we at the end of the program?
                bne cmd_list4                   ; brif not
cmd_list3       rts
cmd_list4       ldd 2,x                         ; get line number
                cmpd binval                     ; have we reached the end of the range?
                bhi cmd_list3                   ; brif so - we're done
                jsr print_uint16d               ; print out line number
                lda #0x20                       ; and a space
                jsr writechr
                pshs x                          ; save start of this line (in case detokenizing exits early)
                leax 4,x                        ; move past line header
                bsr detokenize                  ; detokenize line to current output stream
                ldx [,s++]                      ; point to next line using saved pointer and clear it from the stack
                ; need to add a break check here
                bra cmd_list2                   ; go handle another line
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Detokenize a line to the current output stream
detokenize      lda ,x+                         ; get character from tokenized line
                bmi detokenize1                 ; brif it's a keyword token
                lbeq writecondnl                ; do a newline if needed and return
                cmpa #':                        ; is it a colon?
                bne detokenize0                 ; brif not
                ldb ,x                          ; fetch subsequent character
                cmpb #tok_apos                  ; apostrophe version of REM?
                beq detokenize                  ; brif so - skip the colon
                cmpb #tok_else                  ; ELSE?
                beq detokenize                  ; brif so - skip the colon
detokenize0     jsr writechr                    ; output it unmolested
                bra detokenize                  ; go handle another character
detokenize1     ldu #primarydict                ; point to primary dictionary table
                cmpa #0xff                      ; is it a secondary token?
                bne detokenize3                 ; brif not
                ldu #secondarydict              ; point to secondary dictionary table
                lda ,x+                         ; get secondary token value
                bne detokenize3                 ; brif not end of line
                leax -1,x                       ; don't consume the NUL
detokenize2     lda #'!                         ; invalid token flag
                bra detokenize0                 ; output it and continue
detokenize3     anda #0x7f                      ; lose the high bit
                beq detokenize6                 ; brif already at the right place
detokenize4     ldb ,u                          ; end of dictionary table?
                beq detokenize2                 ; brif so - show invalid tokenf lag
detokenize5     ldb ,u+                         ; fetch character in this keyboard
                bpl detokenize5                 ; brif not end of keyword (high bit set)
                deca                            ; at the right token?
                bne detokenize4                 ; brif not - skip another
detokenize6     lda ,u+                         ; get keyword character
                bmi detokenize7                 ; brif end of keyword
                jsr writechr                    ; output it
                bra detokenize6                 ; go fetch another
detokenize7     anda #0x7f                      ; lose the high bit
                bra detokenize0                 ; write it and move on with the input
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Canonicalize certain sequences; ALL the rewrite sequences must make the result shorter or keep it the same size
makecanontab    fcb tok_less,2
                fcb tok_greater,tok_notequal
                fcb tok_equal,tok_lessequal
                fcb tok_greater,2
                fcb tok_less,tok_notequal
                fcb tok_equal,tok_greaterequal
                fcb tok_equal,2
                fcb tok_greater,tok_greaterequal
                fcb tok_less,tok_lessequal
                fcb 0
makecanon       leay ,x                         ; point output to start of the buffer
makecanon0      lda ,x+                         ; get current byte
                sta ,y+                         ; save in output
                bne makecanon1                  ; brif not end of line
                rts
makecanon1      bpl makecanon0                  ; brif not a token
                cmpa #0xff                      ; is it secondary?
                bne makecanon2                  ; brif not
                leax 1,x                        ; move past second half
                bra makecanon0                  ; go handle next byte
makecanon2      ldu #makecanontab               ; point to replacement table
makecanon3      cmpa ,u+                        ; is it this entry?
                beq makecanon4                  ; brif so
                ldb ,u+                         ; get number of entries
                lslb                            ; 2 bytes per
                leau b,u                        ; move past entry
                ldb ,u                          ; end of table?
                bne makecanon3                  ; brif not
                bra makecanon0                  ; no substitutions found
makecanon4      pshs x                          ; save original source pointer
makecanon5      lda ,x+                         ; get next character
                cmpa #0x20                      ; is it space?
                beq makecanon5                  ; brif so - skip it
                ldb ,u+                         ; get number of replacement candidates
makecanon6      cmpa ,u++                       ; does it match?
                beq makecanon7                  ; brif so
                decb                            ; seen all of them?
                bne makecanon6                  ; brif not
                puls x                          ; restore input pointer
                bra makecanon0                  ; go handle next input
makecanon7      leas 2,s                        ; clear saved input pointer
                lda -1,u                        ; get replacement token
                sta -1,y                        ; put it in the output
                bra makecanon0                  ; go handle more input
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Tokenize line to tokebuff
;
; Enter with X pointing to the text to tokenize.
; Exit with X pointing to the start of the tokenized line and D holding the length of the tokenized line.
tokenize        clr tok_skipkw                  ; clear "not token" flag
                clr tok_skipdt                  ; clear the "in data" flag
                ldy #tokebuff                   ; point to destination buffer
                pshs y                          ; set return value
tokenize0       lda ,x+                         ; get input character
                bne tokenize3                   ; brif not end of input
tokenize1       sta ,y+                         ; blank out final byte in result
tokenize2       ldx #tokebuff                   ; point to start of tokenized line
                bsr makecanon                   ; canonicalize certain sequences
                tfr y,d                         ; get end address to accumulator
                subd #tokebuff                  ; subtract out start; gives length of result
                puls x,pc                       ; set return pointer and return
tokenize3       tst tok_skipkw                  ; are we in the middle of a "not token"?
                beq tokenize6                   ; brif not
                jsr setcifalpha                 ; is it alpha
                bcs tokenize4                   ; brif so - store it and continue
                jsr setcifdigit                 ; is it numeric?
                bcc tokenize5                   ; brif not
tokenize4       sta ,y+                         ; save output character
                bra tokenize0                   ; check for another
tokenize5       clr tok_skipkw                  ; clear the "not token" flag
tokenize6       cmpa #'"                        ; is it a string?
                bne tokenize8                   ; brif not
                sta ,y+                         ; save string delimiter
tokenize7       lda ,x+                         ; get input character
                beq tokenize1                   ; brif end of input
                sta ,y+                         ; save it in output
                cmpa #'"                        ; end of string?
                bne tokenize7                   ; brif not
                bra tokenize0                   ; brif 
tokenize8       cmpa #':                        ; end of statement?
                bne tokenize9                   ; brif not
                clr tok_skipdt                  ; reset "in data" flag
                bra tokenize4                   ; stash it and continue
tokenize9       cmpa #0x20                      ; is it a space?
                beq tokenize4                   ; brif so - stash it unmodified
                tst tok_skipdt                  ; are we "in data"?
                bne tokenize4                   ; brif so - don't tokenize it
                cmpa #'?                        ; PRINT shortcut?
                bne tokenize10                  ; brif not
                lda #tok_print                  ; load token for PRINT
                bra tokenize4                   ; move stash it and move on
tokenize10      cmpa #''                        ; ' shortcut for remark?
                bne tokenize12                  ; brif not
                ldd #':*256+tok_apos            ; put token for ' and an implied colon
                std ,y++                        ; stash it
tokenize11      lda ,x+                         ; fetch byte from input
                sta ,y+                         ; stash in output
                bne tokenize11                  ; brif not end of input
                bra tokenize2                   ; go finish up
tokenize12      jsr setcifdigit                 ; is it a digit?
                bcs tokenize4                   ; brif so - pass it through
                tsta                            ; is the high bit set?
                bmi tokenize0                   ; ignore it if so
                ldu #primarydict                ; point to keyword table
                leax -1,x                       ; back up input to start of potential token
                clr tok_kwtype                  ; set secondary table flag to primary table
                clr tok_kwmatch                 ; clear the matched token
                clr tok_kwmatch+1
                clr tok_kwmatchl                ; set length matched
                pshs x                          ; save start of input token
tokenize13      clr tok_kwnum                   ; clear keyword number
tokenize14      ldb ,u                          ; are we at the end of the table?
                bne tokenize16                  ; brif not
                ldu #secondarydict              ; point to secondary token dictionary
                com tok_kwtype                  ; flip to secondary token flag
                bne tokenize13                  ; brif we haven't already done the secondaries
                puls x                          ; get back input pointer
                ldb tok_kwmatchl                ; get length of best match
                beq tokenize15                  ; brif we don't have a match
                abx                             ; move input pointer past matched token
                ldd tok_kwmatch                 ; get matched token number
                tsta                            ; is it a primary?
                beq tokenize24                  ; brif so
                bra tokenize23                  ; go stash two byte token
tokenize15      com tok_skipkw                  ; set "not token flag"
                lda ,x+                         ; get character
                bra tokenize4                   ; stash it and continue
tokenize16      ldx ,s                          ; get back start of input token
                clra                            ; initalize match length counter
tokenize17      inca                            ; bump length counter
                ldb ,x+                         ; get input character
                cmpb #'z                        ; is it above lower case Z?
                bhi tokenize18                  ; brif so
                cmpb #'a                        ; is it below lower case A?
                blo tokenize18                  ; brif so
                subb #0x20                      ; convert to upper case
tokenize18      subb ,u+                        ; does it match?
                beq tokenize17                  ; brif so - check another
                cmpb #0x80                      ; did it match with high bit set?
                beq tokenize21                  ; brif so - exact match
                leau -1,u                       ; back up to current test character
tokenize19      ldb ,u+                         ; end of token?
                bpl tokenize19                  ; brif not
tokenize20      inc tok_kwnum                   ; bump token counter
                bra tokenize14                  ; go check another one
tokenize21      cmpa tok_kwmatchl               ; is it a longer match?
                bls tokenize20                  ; brif not, ignore it
                sta tok_kwmatchl                ; save new match length
                ldd tok_kwtype                  ; get the matched token count
                orb #0x80                       ; set token flag
                std tok_kwmatch                 ; save matched token
                bra tokenize20                  ; keep looking through the tables
tokenize22      lda #':                         ; for putting implied colons in
tokenize23      std ,y++                        ; put output into buffer
                jmp tokenize0                   ; go handle more input
tokenize24      cmpb #tok_else                  ; is it ELSE?
                beq tokenize22                  ; brif so - stash it with colon
                cmpb #tok_data                  ; is it DATA?
                bne tokenize26                  ; brif not
                stb tok_skipdt                  ; set "in data" flag
tokenize25      stb ,y+                         ; stash token
                jmp tokenize0                   ; go handle more
tokenize26      cmpb #tok_rem                   ; is it REM?
                beq tokenize28                  ; brif so
                cmpb #tok_apos                  ; apostrophe REM?
                bne tokenize25                  ; brif not - stash token and continue
                lda #':                         ; stash the implied colon
                sta ,y+
                bra tokenize28
tokenize27      ldb ,x+                         ; fetch next input character
tokenize28      stb ,y+                         ; stash the character
                bne tokenize27                  ; brif not end of input - do another
                jmp tokenize2                   ; stash end of buffer and handle cleanup
                *pragmapop list