changeset 130:9f23ddc5165f

Various updates to parsing scheme to handle errors and make it build
author William Astle <lost@l-w.ca>
date Wed, 15 May 2024 20:16:56 -0600
parents d5886daa4f65
children 95f174bf459b
files src/parse.s src/vars.s
diffstat 2 files changed, 64 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/src/parse.s	Sat May 04 15:18:51 2024 -0600
+++ b/src/parse.s	Wed May 15 20:16:56 2024 -0600
@@ -3,7 +3,7 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; This is the overall parsing package. This is responsible for converting program text into the internal byte code and
 ; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated
-; code analysis.
+; code analysis. In almost all cases, the returned error will be a syntax error.
 ;
 ; This is a recursive descent parser.
 ;
@@ -20,16 +20,20 @@
 ; B             Error code
 ; U             Offset to error input
 ; CC.C          set
+parse_errorsn   ldb #err_sn
+parse_error     lds parse_stackptr              ; restore the original stack pointer so we can call from down stack
+                puls u                          ; get back original free pointer
+                stu freestart                   ; deallocate any allocated result
+                ldu parse_tokenst               ; get start location of the token where the error was raised
+                coma                            ; make sure C is set for error
+                rts
 parse           stb parse_noout                 ; save no-output flag
                 leay ,x                         ; save input pointer in a less useful register
                 ldu freestart                   ; point to start of free memory where we will build the output
                 pshs u                          ; save original free memory location
+                sts parse_stackptr              ; save the stack pointer for bailing out on errors
 parse_nextstmt  jsr parse_nexttok               ; fetch the next token, return type in D
-                bcc parse0                      ; brif we succeeded in parsing a token
-parse_error     puls u                          ; restore original free memory location - deallocate any encoding
-                stu freestart
-                ldu parse_tokenst               ; get start location we started parsing the token at
-                rts                             ; return error condition
+                bcs parse_error                 ; brif we failed at parsing a token
 parse0          ldx #parsetab_cmd               ; point to jump table for token type handler
                 cmpb #token_stmtsep             ; is it a statement separator?
                 beq parse_nextstmt              ; brif so - we can just skip it
@@ -38,8 +42,6 @@
                 leax 3,x                        ; move to next entry
                 cmpx #parsetab_cmde             ; end of table?
                 blo parse1                      ; brif not
-parse2          ldb #err_sn                     ; flag syntax error
-                bra parse_error                 ; and return the error
 parse3          jsr [1,x]                       ; call the handler
                 bcs parse_error                 ; brif the handler indicated error
                 bsr parse_curtoken              ; fetch the token we left off on
@@ -55,7 +57,7 @@
                 beq parse_nextstmt              ; brif so - do another statement
                 cmpb #token_remabbr             ; ' token?
                 beq parse0                      ; brif so - parse it as a new statement
-                bra parse2                      ; raise a syntax error
+                bra parse_errorsn               ; raise a syntax error
 parse_write     lda parse_noout                 ; are we doing output?
                 beq parse_write0                ; brif so
                 leau 1,u                        ; just count up the output and don't do anything
@@ -90,7 +92,7 @@
 parse_nexttok2  sty parse_tokenst               ; save start of current token after skipping spaces
                 bsr parse_toupper               ; make sure we have upper case letters for matching
                 ldx #parse_wt                   ; point to keyword parsing table
-                bsr parse_wordtab               ; go see if we have a match in the keyword table
+                jsr parse_wordtab               ; go see if we have a match in the keyword table
                 bcc parse_nexttok6              ; brif we do - return it
                 ldy parse_tokenst               ; return to the start of the token - pointer probably clobbered
                 bsr parse_curchar               ; get back input character (may have been clobbered)
@@ -134,6 +136,10 @@
                 bhi parse_toupper0              ; brif not
                 suba #0x20                      ; adjust to upper case alpha
 parse_toupper0  rts                             ; Z only set here if input was zero entering from parse_nextcharu
+parse_iseos     cmpa #token_stmtsep             ; end of statement?
+                beq parse_iseos0                ; brif so
+                cmpa #token_eot                 ; end of text?
+parse_iseos0    rts
 parse_number    jmp parse_tokerr
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Parse a statement that consists of just the command token
@@ -147,6 +153,49 @@
                 stb parse_curtok
                 rts                             ; return, pass back the C result from parse_write
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Parse an optional line number range which may be [lineno][-[lineno]]
+parse_range     jsr parse_write                 ; output the token
+                jsr parse_nexttok               ; fetch input token
+                ldx zero                        ; set default start and end line numbers - whole program
+                leau -1,x
+                pshs x,u
+                bsr parse_iseos                 ; are there arguments?
+                beq parse_range3                ; brif so
+                cmpa #token_int32               ; is it an integer (line number)?
+                bne parse_range0                ; brif not
+                ldd val0+val.int                ; is the upper 16 bits set?
+                beq parse_rangee                ; brif yes - we have an error
+                ldd val0+val.int+2              ; set the start line number
+                std ,s
+                jsr parse_nexttok               ; see what's after the line number
+parse_range0    cmpa #token_minus               ; do we have a range?
+                beq parse_range1                ; brif so
+                bsr parse_iseos                 ; end of statement?
+                bne parse_rangee                ; brif not - error
+                ldd ,s                          ; set end line to start line
+                std 2,s
+                bra parse_range3                ; go output things
+parse_range1    jsr parse_nexttok               ; skip the -
+                bsr parse_iseos                 ; end of statement?
+                beq parse_range3                ; brif so
+                cmpa #token_int32               ; is it an integer?
+                bne parse_rangee                ; brif not
+                ldx val0+val.int                ; upper 16 bits set?
+                bne parse_rangee                ; brif so - invalid number
+                ldx val0+val.int+2              ; get end line number
+                stx 2,s                         ; save end line number
+                cmpx ,s                         ; is end line lower than start line?
+                blo parse_rangee                ; brif so - error
+parse_range3    puls a                          ; write out the range
+                jsr parse_write
+                puls a
+                jsr parse_write
+                puls a
+                jsr parse_write
+                puls a
+                jmp parse_write
+parse_rangee    jmp parse_errorsn               ; go raise the parse error
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows:
 ;
 ; * two bytes which contain the length of the table less the two bytes for this length value
@@ -172,7 +221,7 @@
                 bne parse_wordtab4              ; brif not
                 cmpb #token_eot                 ; is it indicating a sub table?
                 bne parse_wordtab6              ; brif not
-                bsr parse_nextcharu             ; fetch next input character (for sub table match)
+                jsr parse_nextcharu             ; fetch next input character (for sub table match)
                 bne parse_wordtab0              ; brif we are going to check the sub table
 parse_wordtab2  ldd ,x                          ; fetch length of sub table
                 leax d,x                        ; move past sub table
@@ -321,6 +370,9 @@
                 *pragmapop list
                 parse_tokendefp error           ; Used to mark errors; should always be first so it's token #0 
                 parse_tokendefp eot             ; End of input marker or special handling in word tables
+                parse_tokendefp int32           ; 32 bit integer (has special parsing)
+                parse_tokendefp float           ; floating point value (has special parsing)
+                parse_tokendefp ident           ; identifier (has special parsing)
                 parse_tokendefp stmtsep         ; statement separator
                 parse_tokendefp times           ; times (multiplication) operator (*)
                 parse_tokendefp plus            ; addition operator
@@ -346,7 +398,6 @@
                 parse_tokendefp sep             ; comma (separator)
                 parse_tokendefp semi            ; semicolon
                 parse_tokendefp at              ; @ symbol
-                parse_tokendefp ident           ; identifier (has special parsing)
                 parse_tokendefp else            ; ELSE
                 parse_tokendefp then            ; THEN
                 parse_tokendefp to              ; TO
--- a/src/vars.s	Sat May 04 15:18:51 2024 -0600
+++ b/src/vars.s	Wed May 15 20:16:56 2024 -0600
@@ -42,6 +42,7 @@
 parse_noout     rmb 1                           ; flag for whether we're outputting encoded lines when parsing
 parse_tokenst   rmb 2                           ; pointer into input buffer of start of currently parsed token
 parse_curtok    rmb 1                           ; current token type code
+parse_stackptr  rmb 2                           ; saved stack pointer for bailing out from random parse points
 ; General value accumulators used during expression evaluation. These are in the same format used for storing
 ; values in variables with the exception of having a type flag.
 val0            rmb val.size                    ; value accumulator 0 - current expression value