# HG changeset patch # User William Astle # Date 1715825816 21600 # Node ID 9f23ddc5165f104f8ad4f4b0e155a081e1d4c38a # Parent d5886daa4f65ae53593971f7a619d8c3f7348daf Various updates to parsing scheme to handle errors and make it build diff -r d5886daa4f65 -r 9f23ddc5165f src/parse.s --- a/src/parse.s Sat May 04 15:18:51 2024 -0600 +++ b/src/parse.s Wed May 15 20:16:56 2024 -0600 @@ -3,7 +3,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; This is the overall parsing package. This is responsible for converting program text into the internal byte code and ; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated -; code analysis. +; code analysis. In almost all cases, the returned error will be a syntax error. ; ; This is a recursive descent parser. ; @@ -20,16 +20,20 @@ ; B Error code ; U Offset to error input ; CC.C set +parse_errorsn ldb #err_sn +parse_error lds parse_stackptr ; restore the original stack pointer so we can call from down stack + puls u ; get back original free pointer + stu freestart ; deallocate any allocated result + ldu parse_tokenst ; get start location of the token where the error was raised + coma ; make sure C is set for error + rts parse stb parse_noout ; save no-output flag leay ,x ; save input pointer in a less useful register ldu freestart ; point to start of free memory where we will build the output pshs u ; save original free memory location + sts parse_stackptr ; save the stack pointer for bailing out on errors parse_nextstmt jsr parse_nexttok ; fetch the next token, return type in D - bcc parse0 ; brif we succeeded in parsing a token -parse_error puls u ; restore original free memory location - deallocate any encoding - stu freestart - ldu parse_tokenst ; get start location we started parsing the token at - rts ; return error condition + bcs parse_error ; brif we failed at parsing a token parse0 ldx #parsetab_cmd ; point to jump table for token type handler cmpb #token_stmtsep ; is it a statement separator? beq parse_nextstmt ; brif so - we can just skip it @@ -38,8 +42,6 @@ leax 3,x ; move to next entry cmpx #parsetab_cmde ; end of table? blo parse1 ; brif not -parse2 ldb #err_sn ; flag syntax error - bra parse_error ; and return the error parse3 jsr [1,x] ; call the handler bcs parse_error ; brif the handler indicated error bsr parse_curtoken ; fetch the token we left off on @@ -55,7 +57,7 @@ beq parse_nextstmt ; brif so - do another statement cmpb #token_remabbr ; ' token? beq parse0 ; brif so - parse it as a new statement - bra parse2 ; raise a syntax error + bra parse_errorsn ; raise a syntax error parse_write lda parse_noout ; are we doing output? beq parse_write0 ; brif so leau 1,u ; just count up the output and don't do anything @@ -90,7 +92,7 @@ parse_nexttok2 sty parse_tokenst ; save start of current token after skipping spaces bsr parse_toupper ; make sure we have upper case letters for matching ldx #parse_wt ; point to keyword parsing table - bsr parse_wordtab ; go see if we have a match in the keyword table + jsr parse_wordtab ; go see if we have a match in the keyword table bcc parse_nexttok6 ; brif we do - return it ldy parse_tokenst ; return to the start of the token - pointer probably clobbered bsr parse_curchar ; get back input character (may have been clobbered) @@ -134,6 +136,10 @@ bhi parse_toupper0 ; brif not suba #0x20 ; adjust to upper case alpha parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu +parse_iseos cmpa #token_stmtsep ; end of statement? + beq parse_iseos0 ; brif so + cmpa #token_eot ; end of text? +parse_iseos0 rts parse_number jmp parse_tokerr ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Parse a statement that consists of just the command token @@ -147,6 +153,49 @@ stb parse_curtok rts ; return, pass back the C result from parse_write ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Parse an optional line number range which may be [lineno][-[lineno]] +parse_range jsr parse_write ; output the token + jsr parse_nexttok ; fetch input token + ldx zero ; set default start and end line numbers - whole program + leau -1,x + pshs x,u + bsr parse_iseos ; are there arguments? + beq parse_range3 ; brif so + cmpa #token_int32 ; is it an integer (line number)? + bne parse_range0 ; brif not + ldd val0+val.int ; is the upper 16 bits set? + beq parse_rangee ; brif yes - we have an error + ldd val0+val.int+2 ; set the start line number + std ,s + jsr parse_nexttok ; see what's after the line number +parse_range0 cmpa #token_minus ; do we have a range? + beq parse_range1 ; brif so + bsr parse_iseos ; end of statement? + bne parse_rangee ; brif not - error + ldd ,s ; set end line to start line + std 2,s + bra parse_range3 ; go output things +parse_range1 jsr parse_nexttok ; skip the - + bsr parse_iseos ; end of statement? + beq parse_range3 ; brif so + cmpa #token_int32 ; is it an integer? + bne parse_rangee ; brif not + ldx val0+val.int ; upper 16 bits set? + bne parse_rangee ; brif so - invalid number + ldx val0+val.int+2 ; get end line number + stx 2,s ; save end line number + cmpx ,s ; is end line lower than start line? + blo parse_rangee ; brif so - error +parse_range3 puls a ; write out the range + jsr parse_write + puls a + jsr parse_write + puls a + jsr parse_write + puls a + jmp parse_write +parse_rangee jmp parse_errorsn ; go raise the parse error +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: ; ; * two bytes which contain the length of the table less the two bytes for this length value @@ -172,7 +221,7 @@ bne parse_wordtab4 ; brif not cmpb #token_eot ; is it indicating a sub table? bne parse_wordtab6 ; brif not - bsr parse_nextcharu ; fetch next input character (for sub table match) + jsr parse_nextcharu ; fetch next input character (for sub table match) bne parse_wordtab0 ; brif we are going to check the sub table parse_wordtab2 ldd ,x ; fetch length of sub table leax d,x ; move past sub table @@ -321,6 +370,9 @@ *pragmapop list parse_tokendefp error ; Used to mark errors; should always be first so it's token #0 parse_tokendefp eot ; End of input marker or special handling in word tables + parse_tokendefp int32 ; 32 bit integer (has special parsing) + parse_tokendefp float ; floating point value (has special parsing) + parse_tokendefp ident ; identifier (has special parsing) parse_tokendefp stmtsep ; statement separator parse_tokendefp times ; times (multiplication) operator (*) parse_tokendefp plus ; addition operator @@ -346,7 +398,6 @@ parse_tokendefp sep ; comma (separator) parse_tokendefp semi ; semicolon parse_tokendefp at ; @ symbol - parse_tokendefp ident ; identifier (has special parsing) parse_tokendefp else ; ELSE parse_tokendefp then ; THEN parse_tokendefp to ; TO diff -r d5886daa4f65 -r 9f23ddc5165f src/vars.s --- a/src/vars.s Sat May 04 15:18:51 2024 -0600 +++ b/src/vars.s Wed May 15 20:16:56 2024 -0600 @@ -42,6 +42,7 @@ parse_noout rmb 1 ; flag for whether we're outputting encoded lines when parsing parse_tokenst rmb 2 ; pointer into input buffer of start of currently parsed token parse_curtok rmb 1 ; current token type code +parse_stackptr rmb 2 ; saved stack pointer for bailing out from random parse points ; General value accumulators used during expression evaluation. These are in the same format used for storing ; values in variables with the exception of having a type flag. val0 rmb val.size ; value accumulator 0 - current expression value