view src/token.s @ 76:eb2681108660

Split some code into separate files for easier management (4) Because the source for lwbasic is so large, split it into several different files to make it easier to navigate and modify. This is part four of the split.
author William Astle <lost@l-w.ca>
date Sun, 06 Aug 2023 00:51:22 -0600
parents
children
line wrap: on
line source

                *pragmapush list
                *pragma list
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The LIST command.
;
; Syntax:
; LIST
; LIST <line>
; LIST <line>-
; LIST -<line>
; LIST <start>-<end>
cmd_list        bne cmd_list1                   ; brif we have arguments
                ldx progtext                    ; point to start of program
cmd_list0       ldd #65535                      ; set last line to list to max line number
                std binval
                bra cmd_list2                   ; go do the listing
cmd_list1       jsr parse_lineno                ; parse starting line number (will default to 0)
                jsr prog_findline               ; find the line or the one after where it would be
                jsr curchar                     ; are we at the end of the command?
                beq cmd_list2                   ; brif so - we have a single line (binval will have the start line #)
                ldb #tok_minus                  ; insist on a - for a range if more than one line number
                jsr syncheckb
                beq cmd_list0                   ; brif open ended ending - set to max line number
                jsr parse_lineno                ; parse ending of range
cmd_list2       ldd ,x                          ; are we at the end of the program?
                bne cmd_list4                   ; brif not
cmd_list3       rts
cmd_list4       ldd 2,x                         ; get line number
                cmpd binval                     ; have we reached the end of the range?
                bhi cmd_list3                   ; brif so - we're done
                jsr print_uint16d               ; print out line number
                lda #0x20                       ; and a space
                jsr writechr
                pshs x                          ; save start of this line (in case detokenizing exits early)
                leax 4,x                        ; move past line header
                bsr detokenize                  ; detokenize line to current output stream
                ldx [,s++]                      ; point to next line using saved pointer and clear it from the stack
                ; need to add a break check here
                bra cmd_list2                   ; go handle another line
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Detokenize a line to the current output stream
detokenize      lda ,x+                         ; get character from tokenized line
                bmi detokenize1                 ; brif it's a keyword token
                lbeq writecondnl                ; do a newline if needed and return
                cmpa #':                        ; is it a colon?
                bne detokenize0                 ; brif not
                ldb ,x                          ; fetch subsequent character
                cmpb #tok_apos                  ; apostrophe version of REM?
                beq detokenize                  ; brif so - skip the colon
                cmpb #tok_else                  ; ELSE?
                beq detokenize                  ; brif so - skip the colon
detokenize0     jsr writechr                    ; output it unmolested
                bra detokenize                  ; go handle another character
detokenize1     ldu #primarydict                ; point to primary dictionary table
                cmpa #0xff                      ; is it a secondary token?
                bne detokenize3                 ; brif not
                ldu #secondarydict              ; point to secondary dictionary table
                lda ,x+                         ; get secondary token value
                bne detokenize3                 ; brif not end of line
                leax -1,x                       ; don't consume the NUL
detokenize2     lda #'!                         ; invalid token flag
                bra detokenize0                 ; output it and continue
detokenize3     anda #0x7f                      ; lose the high bit
                beq detokenize6                 ; brif already at the right place
detokenize4     ldb ,u                          ; end of dictionary table?
                beq detokenize2                 ; brif so - show invalid tokenf lag
detokenize5     ldb ,u+                         ; fetch character in this keyboard
                bpl detokenize5                 ; brif not end of keyword (high bit set)
                deca                            ; at the right token?
                bne detokenize4                 ; brif not - skip another
detokenize6     lda ,u+                         ; get keyword character
                bmi detokenize7                 ; brif end of keyword
                jsr writechr                    ; output it
                bra detokenize6                 ; go fetch another
detokenize7     anda #0x7f                      ; lose the high bit
                bra detokenize0                 ; write it and move on with the input
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Canonicalize certain sequences; ALL the rewrite sequences must make the result shorter or keep it the same size
makecanontab    fcb tok_less,2
                fcb tok_greater,tok_notequal
                fcb tok_equal,tok_lessequal
                fcb tok_greater,2
                fcb tok_less,tok_notequal
                fcb tok_equal,tok_greaterequal
                fcb tok_equal,2
                fcb tok_greater,tok_greaterequal
                fcb tok_less,tok_lessequal
                fcb 0
makecanon       leay ,x                         ; point output to start of the buffer
makecanon0      lda ,x+                         ; get current byte
                sta ,y+                         ; save in output
                bne makecanon1                  ; brif not end of line
                rts
makecanon1      bpl makecanon0                  ; brif not a token
                cmpa #0xff                      ; is it secondary?
                bne makecanon2                  ; brif not
                leax 1,x                        ; move past second half
                bra makecanon0                  ; go handle next byte
makecanon2      ldu #makecanontab               ; point to replacement table
makecanon3      cmpa ,u+                        ; is it this entry?
                beq makecanon4                  ; brif so
                ldb ,u+                         ; get number of entries
                lslb                            ; 2 bytes per
                leau b,u                        ; move past entry
                ldb ,u                          ; end of table?
                bne makecanon3                  ; brif not
                bra makecanon0                  ; no substitutions found
makecanon4      pshs x                          ; save original source pointer
makecanon5      lda ,x+                         ; get next character
                cmpa #0x20                      ; is it space?
                beq makecanon5                  ; brif so - skip it
                ldb ,u+                         ; get number of replacement candidates
makecanon6      cmpa ,u++                       ; does it match?
                beq makecanon7                  ; brif so
                decb                            ; seen all of them?
                bne makecanon6                  ; brif not
                puls x                          ; restore input pointer
                bra makecanon0                  ; go handle next input
makecanon7      leas 2,s                        ; clear saved input pointer
                lda -1,u                        ; get replacement token
                sta -1,y                        ; put it in the output
                bra makecanon0                  ; go handle more input
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Tokenize line to tokebuff
;
; Enter with X pointing to the text to tokenize.
; Exit with X pointing to the start of the tokenized line and D holding the length of the tokenized line.
tokenize        clr tok_skipkw                  ; clear "not token" flag
                clr tok_skipdt                  ; clear the "in data" flag
                ldy #tokebuff                   ; point to destination buffer
                pshs y                          ; set return value
tokenize0       lda ,x+                         ; get input character
                bne tokenize3                   ; brif not end of input
tokenize1       sta ,y+                         ; blank out final byte in result
tokenize2       ldx #tokebuff                   ; point to start of tokenized line
                bsr makecanon                   ; canonicalize certain sequences
                tfr y,d                         ; get end address to accumulator
                subd #tokebuff                  ; subtract out start; gives length of result
                puls x,pc                       ; set return pointer and return
tokenize3       tst tok_skipkw                  ; are we in the middle of a "not token"?
                beq tokenize6                   ; brif not
                jsr setcifalpha                 ; is it alpha
                bcs tokenize4                   ; brif so - store it and continue
                jsr setcifdigit                 ; is it numeric?
                bcc tokenize5                   ; brif not
tokenize4       sta ,y+                         ; save output character
                bra tokenize0                   ; check for another
tokenize5       clr tok_skipkw                  ; clear the "not token" flag
tokenize6       cmpa #'"                        ; is it a string?
                bne tokenize8                   ; brif not
                sta ,y+                         ; save string delimiter
tokenize7       lda ,x+                         ; get input character
                beq tokenize1                   ; brif end of input
                sta ,y+                         ; save it in output
                cmpa #'"                        ; end of string?
                bne tokenize7                   ; brif not
                bra tokenize0                   ; brif 
tokenize8       cmpa #':                        ; end of statement?
                bne tokenize9                   ; brif not
                clr tok_skipdt                  ; reset "in data" flag
                bra tokenize4                   ; stash it and continue
tokenize9       cmpa #0x20                      ; is it a space?
                beq tokenize4                   ; brif so - stash it unmodified
                tst tok_skipdt                  ; are we "in data"?
                bne tokenize4                   ; brif so - don't tokenize it
                cmpa #'?                        ; PRINT shortcut?
                bne tokenize10                  ; brif not
                lda #tok_print                  ; load token for PRINT
                bra tokenize4                   ; move stash it and move on
tokenize10      cmpa #''                        ; ' shortcut for remark?
                bne tokenize12                  ; brif not
                ldd #':*256+tok_apos            ; put token for ' and an implied colon
                std ,y++                        ; stash it
tokenize11      lda ,x+                         ; fetch byte from input
                sta ,y+                         ; stash in output
                bne tokenize11                  ; brif not end of input
                bra tokenize2                   ; go finish up
tokenize12      jsr setcifdigit                 ; is it a digit?
                bcs tokenize4                   ; brif so - pass it through
                tsta                            ; is the high bit set?
                bmi tokenize0                   ; ignore it if so
                ldu #primarydict                ; point to keyword table
                leax -1,x                       ; back up input to start of potential token
                clr tok_kwtype                  ; set secondary table flag to primary table
                clr tok_kwmatch                 ; clear the matched token
                clr tok_kwmatch+1
                clr tok_kwmatchl                ; set length matched
                pshs x                          ; save start of input token
tokenize13      clr tok_kwnum                   ; clear keyword number
tokenize14      ldb ,u                          ; are we at the end of the table?
                bne tokenize16                  ; brif not
                ldu #secondarydict              ; point to secondary token dictionary
                com tok_kwtype                  ; flip to secondary token flag
                bne tokenize13                  ; brif we haven't already done the secondaries
                puls x                          ; get back input pointer
                ldb tok_kwmatchl                ; get length of best match
                beq tokenize15                  ; brif we don't have a match
                abx                             ; move input pointer past matched token
                ldd tok_kwmatch                 ; get matched token number
                tsta                            ; is it a primary?
                beq tokenize24                  ; brif so
                bra tokenize23                  ; go stash two byte token
tokenize15      com tok_skipkw                  ; set "not token flag"
                lda ,x+                         ; get character
                bra tokenize4                   ; stash it and continue
tokenize16      ldx ,s                          ; get back start of input token
                clra                            ; initalize match length counter
tokenize17      inca                            ; bump length counter
                ldb ,x+                         ; get input character
                cmpb #'z                        ; is it above lower case Z?
                bhi tokenize18                  ; brif so
                cmpb #'a                        ; is it below lower case A?
                blo tokenize18                  ; brif so
                subb #0x20                      ; convert to upper case
tokenize18      subb ,u+                        ; does it match?
                beq tokenize17                  ; brif so - check another
                cmpb #0x80                      ; did it match with high bit set?
                beq tokenize21                  ; brif so - exact match
                leau -1,u                       ; back up to current test character
tokenize19      ldb ,u+                         ; end of token?
                bpl tokenize19                  ; brif not
tokenize20      inc tok_kwnum                   ; bump token counter
                bra tokenize14                  ; go check another one
tokenize21      cmpa tok_kwmatchl               ; is it a longer match?
                bls tokenize20                  ; brif not, ignore it
                sta tok_kwmatchl                ; save new match length
                ldd tok_kwtype                  ; get the matched token count
                orb #0x80                       ; set token flag
                std tok_kwmatch                 ; save matched token
                bra tokenize20                  ; keep looking through the tables
tokenize22      lda #':                         ; for putting implied colons in
tokenize23      std ,y++                        ; put output into buffer
                jmp tokenize0                   ; go handle more input
tokenize24      cmpb #tok_else                  ; is it ELSE?
                beq tokenize22                  ; brif so - stash it with colon
                cmpb #tok_data                  ; is it DATA?
                bne tokenize26                  ; brif not
                stb tok_skipdt                  ; set "in data" flag
tokenize25      stb ,y+                         ; stash token
                jmp tokenize0                   ; go handle more
tokenize26      cmpb #tok_rem                   ; is it REM?
                beq tokenize28                  ; brif so
                cmpb #tok_apos                  ; apostrophe REM?
                bne tokenize25                  ; brif not - stash token and continue
                lda #':                         ; stash the implied colon
                sta ,y+
                bra tokenize28
tokenize27      ldb ,x+                         ; fetch next input character
tokenize28      stb ,y+                         ; stash the character
                bne tokenize27                  ; brif not end of input - do another
                jmp tokenize2                   ; stash end of buffer and handle cleanup
                *pragmapop list