Mercurial > hg > index.cgi
diff src/parse.s @ 132:917b4893bb3d
Checkpoint before redoing a bunch of code for clarity
author | William Astle <lost@l-w.ca> |
---|---|
date | Mon, 24 Jun 2024 23:44:39 -0600 |
parents | 95f174bf459b |
children | 5d4801c0566d |
line wrap: on
line diff
--- a/src/parse.s Sat May 18 00:41:46 2024 -0600 +++ b/src/parse.s Mon Jun 24 23:44:39 2024 -0600 @@ -3,7 +3,13 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; This is the overall parsing package. This is responsible for converting program text into the internal byte code and ; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated -; code analysis. In almost all cases, the returned error will be a syntax error. +; code analysis. In almost all cases, the returned error will be a syntax error. The internal byte code shares the same +; token number allocations as the parser. Some allocated tokens cannot be identified by the lexer (parse_nexttok) but +; are used at runtime and when "decompiling" to text. +; +; In the event of a parse error, everything up to the next end of statement is retained as is using a special token +; that preserves the unparsable text and parsing resumes. Only the first error is referenced by the return error +; pointer. ; ; This is a recursive descent parser. ; @@ -12,14 +18,20 @@ ; B Nonzero to prevent generating any output (error check/length calculation only) ; ; Exit: -; U Points to the encoded line +; X Points to the encoded line ; D Length of the encoded line ; CC.C clear ; Error Exit: -; B Error code -; U Offset to error input +; X Points to the encoded line +; D Length of the encoded line +; Y Pointer to the first error location in the input +; U Error code ; CC.C set +; +; This is the error handler. It is responsible for resetting the stack to bail out to the top level +; parsing loop. It must also store the input pointer if this is the first error. Finally, it has to +; output all the text up to either the end of the line *or* the next valid statement separator. parse_errorsn ldb #err_sn parse_error lds parse_stackptr ; restore the original stack pointer so we can call from down stack puls u ; get back original free pointer @@ -82,6 +94,11 @@ leay 1,y ; move to next input character parse_curchar lda ,y ; fetch input character rts +parse_nexttokc bsr parse_nexttok ; fetch next token +parse_iseos cmpb #token_eot ; end of text? + beq parse_iseos0 ; brif so + cmpb #token_stmtsep ; is it a statement separator +parse_iseos0 rts parse_nexttok bsr parse_curchar ; fetch current input beq parse_nexttok1 ; brif end of input parse_nexttok0 cmpa #0x20 ; space? @@ -137,10 +154,6 @@ bhi parse_toupper0 ; brif not suba #0x20 ; adjust to upper case alpha parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu -parse_iseos cmpa #token_stmtsep ; end of statement? - beq parse_iseos0 ; brif so - cmpa #token_eot ; end of text? -parse_iseos0 rts parse_number jmp parse_tokerr ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Parse a statement that consists of just the command token @@ -154,49 +167,6 @@ stb parse_curtok rts ; return, pass back the C result from parse_write ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Parse an optional line number range which may be [lineno][-[lineno]] -parse_range jsr parse_write ; output the token - jsr parse_nexttok ; fetch input token - ldx zero ; set default start and end line numbers - whole program - leau -1,x - pshs x,u - bsr parse_iseos ; are there arguments? - beq parse_range3 ; brif so - cmpa #token_int32 ; is it an integer (line number)? - bne parse_range0 ; brif not - ldd val0+val.int ; is the upper 16 bits set? - beq parse_rangee ; brif yes - we have an error - ldd val0+val.int+2 ; set the start line number - std ,s - jsr parse_nexttok ; see what's after the line number -parse_range0 cmpa #token_minus ; do we have a range? - beq parse_range1 ; brif so - bsr parse_iseos ; end of statement? - bne parse_rangee ; brif not - error - ldd ,s ; set end line to start line - std 2,s - bra parse_range3 ; go output things -parse_range1 jsr parse_nexttok ; skip the - - bsr parse_iseos ; end of statement? - beq parse_range3 ; brif so - cmpa #token_int32 ; is it an integer? - bne parse_rangee ; brif not - ldx val0+val.int ; upper 16 bits set? - bne parse_rangee ; brif so - invalid number - ldx val0+val.int+2 ; get end line number - stx 2,s ; save end line number - cmpx ,s ; is end line lower than start line? - blo parse_rangee ; brif so - error -parse_range3 puls a ; write out the range - jsr parse_write - puls a - jsr parse_write - puls a - jsr parse_write - puls a - jmp parse_write -parse_rangee jmp parse_errorsn ; go raise the parse error -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: ; ; * two bytes which contain the length of the table less the two bytes for this length value @@ -277,6 +247,63 @@ coma ; make sure C is set for no match puls a,x,pc ; clean up stack and return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Validate a line number. Must enter with the token type in B. Will return the line number in X. It will return a +; syntax error if the line number is invalid or out of range. It will also consume a valid line number token. +parse_linenum cmpb #token_int32 ; is it an integer? + beq parse_linenum1 ; brif so +parse_linenum0 ldb #err_sn ; flag syntax error + coma ; flag error + rts +parse_linenum1 ldx val0+val.int ; get high word of integer + bne parse_linenum0 ; brif not a valid line number + ldx val0+val.int+2 ; get actual line number + pshs x ; save it + jsr parse_nexttok ; consume line number + puls x,pc ; get back line number and return it +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Parse a line number range which is one of the following forms: +; <linenum1> +; <linenum1>- +; <linenum1>-<linenum2> +; -<linenum2> +; The result will store two line numbers. If no - token appears, then both line numbers will be the same. Otherwise, +; if <linenum1> is omitted, it will be assumed to be 0. If <linenum2> is omitted, it will be assumed to be 65535. Those +; are the minimum and maximum line numbers. +; +; Parsing works by first looking for an integer token that is in range. If it finds one, it looks for an optional - +; followed by an optional integer token that is in range. If the first token is not an integer, it must be a - which may +; be optionally followed by another integer in range. +; +; It is technically valid to have a single - with no line numbers. +; +; Enter with the current token in B. +; +; The resulting line numbers will be returned in parse_buff +parse_linerange ldx zero ; default start line number + leau -1,x ; default end line number + pshs x,u ; save the return range + cmpb #token_minus ; range with no start? + beq parse_linerang1 ; brif so + bsr parse_linenum ; verify line number, return in X + bcs parse_linerang4 ; bail out on error + stx ,s ; save new start line number + jsr parse_nexttokc ; fetch next token, set Z if end of statement + bne parse_linerang0 ; brif not end of line + ldx ,s ; get end line to use as start line + bra parse_linerang2 ; go set range end and return +parse_linerang0 cmpb #token_minus ; do we have a range character? + bne parse_linerang3 ; brif not - we have an error +parse_linerang1 jsr parse_nexttokc ; parse what comes after the range mark + beq parse_linerang2 ; brif end of statement - use the default range end + bsr parse_linenum ; make sure it's a valid line number + bcs parse_linerang4 ; bail out on error +parse_linerang2 stx 2,s ; set range end + clra ; make sure C is clear + puls x,u,pc ; fetch return values and return +parse_linerang3 ldb #err_sn ; flag a syntax error + coma ; make sure C is set +parse_linerang4 puls x,u,pc ; clean up stack and return error condition +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows: ; parse_tokdefT <sym>,<parse>,<list>,<exec> ; where: @@ -369,11 +396,15 @@ *pragmapop nolist endm *pragmapop list + ; the tokens defined in this section all have special parsing or meaning parse_tokendefp error ; Used to mark errors; should always be first so it's token #0 parse_tokendefp eot ; End of input marker or special handling in word tables parse_tokendefp int32 ; 32 bit integer (has special parsing) parse_tokendefp float ; floating point value (has special parsing) parse_tokendefp ident ; identifier (has special parsing) + parse_tokendefp linenum ; a 16 bit unsigned integer treated as a line number + parse_tokendefp linerange ; a pair of 16 bit unsigned integers treated as line numbers + ; everything below here references keywords or particle characters parse_tokendefp stmtsep ; statement separator parse_tokendefp times ; times (multiplication) operator (*) parse_tokendefp plus ; addition operator