changeset 140:86f6f3a71e60 default tip

Fix some bugs in tokenization/parsing routine
author William Astle <lost@l-w.ca>
date Tue, 16 Jul 2024 22:30:07 -0600
parents 5d4801c0566d
children
files src/parse.s
diffstat 1 files changed, 11 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/src/parse.s	Mon Jul 15 23:26:15 2024 -0600
+++ b/src/parse.s	Tue Jul 16 22:30:07 2024 -0600
@@ -22,8 +22,8 @@
                 leay -stackheadroom,s           ; set the top of free memory
 parseto         lda #1                          ; flag to enable memory limit detection
                 pshs a,u,y                      ; save start and end addresses and OM error detection flag
-                leay ,x                         ; put the input pointer somewhere less useful                
-parsea          jsr parse_curchar               ; fetch an input character
+                leay -1,x                       ; put the input pointer somewhere less useful and back up one spot
+parsea          jsr parse_nextchar              ; fetch an input character
                 bne parseb                      ; brif not end of input
 parsez          tfr u,d                         ; get current output pointer
                 subd 3,s                        ; now D is the length
@@ -65,10 +65,12 @@
                 bra parsea                      ; we're done with DATA
 parsek          bsr parseout                    ; put the data value into the output
                 bra parsei                      ; go handle another character
-parsec          cmpb #'"                        ; did we encounter a quoted string?
+parsec          ldb ,y                          ; get back the current input in the right register
+                cmpb #'"                        ; did we encounter a quoted string?
                 bne parsel                      ; brif not
                 bsr parseout                    ; output delimiter
-parsem          ldb ,y+                         ; get string character
+parsem          leay 1,y                        ; move to next input character
+                ldb ,y                          ; get string character
                 beq parsez                      ; brif end of input
                 bsr parseout                    ; output it
                 cmpb #'"                        ; end delimiter?
@@ -87,9 +89,10 @@
                 cmpb #'z                        ; is it still a lower case letter?
                 bhi parsen                      ; brif not
 parseo          bsr parseout                    ; stash the character
-                ldb ,y+                         ; fetch next input
-                beq parsez                      ; brif end of input
-                bra parsep                      ; go see if we're still in an identifier
+                leay 1,y                        ; move to next character
+                ldb ,y                          ; fetch next input
+                bne parsep                      ; brif not end of input
+                jmp parsez                      ; go handle end of input
 parsen          bsr parseout                    ; output unknown character (number, unknown token)
                 jmp parsea                      ; go handle more
 parseoutw       exg a,b                         ; do MSB
@@ -104,9 +107,7 @@
                 rts                             ; return to original caller
 parseout0       stb ,u+                         ; stash in buffer
                 rts
-parse_nextchar  lda ,y                          ; at end of input already?
-                beq parse_curchar               ; brif so
-                leay 1,y                        ; move to next input character
+parse_nextchar  leay 1,y                        ; move to next input character
 parse_curchar   lda ,y                          ; fetch input character
                 rts
 parse_nextcharu bsr parse_nextchar              ; fetch next input character