LWOS: src/parse.s comparison

comparison src/parse.s @ 132:917b4893bb3d

Checkpoint before redoing a bunch of code for clarity

author	William Astle <lost@l-w.ca>
date	Mon, 24 Jun 2024 23:44:39 -0600
parents	95f174bf459b
children	5d4801c0566d

comparison

equal deleted inserted replaced

-:95f174bf459b
+:917b4893bb3d
 *pragmapush list
 *pragma list
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; This is the overall parsing package. This is responsible for converting program text into the internal byte code and
 ; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated
-; code analysis. In almost all cases, the returned error will be a syntax error.
+; code analysis. In almost all cases, the returned error will be a syntax error. The internal byte code shares the same
+; token number allocations as the parser. Some allocated tokens cannot be identified by the lexer (parse_nexttok) but
+; are used at runtime and when "decompiling" to text.
+;
+; In the event of a parse error, everything up to the next end of statement is retained as is using a special token
+; that preserves the unparsable text and parsing resumes. Only the first error is referenced by the return error
+; pointer.
 ;
 ; This is a recursive descent parser.
 ;
 ; Entry:
 ; X             Points to the text to encode
 ; B             Nonzero to prevent generating any output (error check/length calculation only)
 ;
 ; Exit:
-; U             Points to the encoded line
+; X             Points to the encoded line
 ; D             Length of the encoded line
 ; CC.C          clear
 ; Error Exit:
-; B             Error code
+; X             Points to the encoded line
-; U             Offset to error input
+; D             Length of the encoded line
+; Y             Pointer to the first error location in the input
+; U             Error code
 ; CC.C          set
+;
+; This is the error handler. It is responsible for resetting the stack to bail out to the top level
+; parsing loop. It must also store the input pointer if this is the first error. Finally, it has to
+; output all the text up to either the end of the line *or* the next valid statement separator.
 parse_errorsn   ldb #err_sn
 parse_error     lds parse_stackptr              ; restore the original stack pointer so we can call from down stack
 puls u                          ; get back original free pointer
 stu freestart                   ; deallocate any allocated result
 ldu parse_tokenst               ; get start location of the token where the error was raised
 parse_nextchar  lda ,y                          ; at end of input already?
 beq parse_curchar               ; brif so
 leay 1,y                        ; move to next input character
 parse_curchar   lda ,y                          ; fetch input character
 rts
+parse_nexttokc  bsr parse_nexttok               ; fetch next token
+parse_iseos     cmpb #token_eot                 ; end of text?
+beq parse_iseos0                ; brif so
+cmpb #token_stmtsep             ; is it a statement separator
+parse_iseos0    rts
 parse_nexttok   bsr parse_curchar               ; fetch current input
 beq parse_nexttok1              ; brif end of input
 parse_nexttok0  cmpa #0x20                      ; space?
 bne parse_nexttok2              ; brif not
 bsr parse_nextchar              ; eat the space
 blo parse_toupper0              ; brif not
 cmpa #'z                        ; is it still lower case alpha?
 bhi parse_toupper0              ; brif not
 suba #0x20                      ; adjust to upper case alpha
 parse_toupper0  rts                             ; Z only set here if input was zero entering from parse_nextcharu
-parse_iseos     cmpa #token_stmtsep             ; end of statement?
-beq parse_iseos0                ; brif so
-cmpa #token_eot                 ; end of text?
-parse_iseos0    rts
 parse_number    jmp parse_tokerr
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Parse a statement that consists of just the command token
 parse_cmdsingle equ parse_write                 ; just write the token out and bail
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ldb ,y+                         ; get next input character
 bne parse_rem                   ; brif not at the end of the input
 ldb #token_eot                  ; flag end of input for mainline parser
 stb parse_curtok
 rts                             ; return, pass back the C result from parse_write
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Parse an optional line number range which may be [lineno][-[lineno]]
-parse_range     jsr parse_write                 ; output the token
-jsr parse_nexttok               ; fetch input token
-ldx zero                        ; set default start and end line numbers - whole program
-leau -1,x
-pshs x,u
-bsr parse_iseos                 ; are there arguments?
-beq parse_range3                ; brif so
-cmpa #token_int32               ; is it an integer (line number)?
-bne parse_range0                ; brif not
-ldd val0+val.int                ; is the upper 16 bits set?
-beq parse_rangee                ; brif yes - we have an error
-ldd val0+val.int+2              ; set the start line number
-std ,s
-jsr parse_nexttok               ; see what's after the line number
-parse_range0    cmpa #token_minus               ; do we have a range?
-beq parse_range1                ; brif so
-bsr parse_iseos                 ; end of statement?
-bne parse_rangee                ; brif not - error
-ldd ,s                          ; set end line to start line
-std 2,s
-bra parse_range3                ; go output things
-parse_range1    jsr parse_nexttok               ; skip the -
-bsr parse_iseos                 ; end of statement?
-beq parse_range3                ; brif so
-cmpa #token_int32               ; is it an integer?
-bne parse_rangee                ; brif not
-ldx val0+val.int                ; upper 16 bits set?
-bne parse_rangee                ; brif so - invalid number
-ldx val0+val.int+2              ; get end line number
-stx 2,s                         ; save end line number
-cmpx ,s                         ; is end line lower than start line?
-blo parse_rangee                ; brif so - error
-parse_range3    puls a                          ; write out the range
-jsr parse_write
-puls a
-jsr parse_write
-puls a
-jsr parse_write
-puls a
-jmp parse_write
-parse_rangee    jmp parse_errorsn               ; go raise the parse error
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows:
 ;
 ; * two bytes which contain the length of the table less the two bytes for this length value
 ; * a sequence of entries consisting of a single byte matching character and a token code followed
 parse_wtdc6     cmpx 1,s                        ; are we at the end of this table?
 bne parse_wtdc3                 ; brif not - handle another table entry
 coma                            ; make sure C is set for no match
 puls a,x,pc                     ; clean up stack and return
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Validate a line number. Must enter with the token type in B. Will return the line number in X. It will return a
+; syntax error if the line number is invalid or out of range. It will also consume a valid line number token.
+parse_linenum   cmpb #token_int32               ; is it an integer?
+beq parse_linenum1              ; brif so
+parse_linenum0  ldb #err_sn                     ; flag syntax error
+coma                            ; flag error
+rts
+parse_linenum1  ldx val0+val.int                ; get high word of integer
+bne parse_linenum0              ; brif not a valid line number
+ldx val0+val.int+2              ; get actual line number
+pshs x                          ; save it
+jsr parse_nexttok               ; consume line number
+puls x,pc                       ; get back line number and return it
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Parse a line number range which is one of the following forms:
+; <linenum1>
+; <linenum1>-
+; <linenum1>-<linenum2>
+; -<linenum2>
+; The result will store two line numbers. If no - token appears, then both line numbers will be the same. Otherwise,
+; if <linenum1> is omitted, it will be assumed to be 0. If <linenum2> is omitted, it will be assumed to be 65535. Those
+; are the minimum and maximum line numbers.
+;
+; Parsing works by first looking for an integer token that is in range. If it finds one, it looks for an optional -
+; followed by an optional integer token that is in range. If the first token is not an integer, it must be a - which may
+; be optionally followed by another integer in range.
+;
+; It is technically valid to have a single - with no line numbers.
+;
+; Enter with the current token in B.
+;
+; The resulting line numbers will be returned in parse_buff
+parse_linerange ldx zero                        ; default start line number
+leau -1,x                       ; default end line number
+pshs x,u                        ; save the return range
+cmpb #token_minus               ; range with no start?
+beq parse_linerang1             ; brif so
+bsr parse_linenum               ; verify line number, return in X
+bcs parse_linerang4             ; bail out on error
+stx ,s                          ; save new start line number
+jsr parse_nexttokc              ; fetch next token, set Z if end of statement
+bne parse_linerang0             ; brif not end of line
+ldx ,s                          ; get end line to use as start line
+bra parse_linerang2             ; go set range end and return
+parse_linerang0 cmpb #token_minus               ; do we have a range character?
+bne parse_linerang3             ; brif not - we have an error
+parse_linerang1 jsr parse_nexttokc              ; parse what comes after the range mark
+beq parse_linerang2             ; brif end of statement - use the default range end
+bsr parse_linenum               ; make sure it's a valid line number
+bcs parse_linerang4             ; bail out on error
+parse_linerang2 stx 2,s                         ; set range end
+clra                            ; make sure C is clear
+puls x,u,pc                     ; fetch return values and return
+parse_linerang3 ldb #err_sn                     ; flag a syntax error
+coma                            ; make sure C is set
+parse_linerang4 puls x,u,pc                     ; clean up stack and return error condition
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows:
 ;               parse_tokdefT <sym>,<parse>,<list>,<exec>
 ; where:
 ; T: c for command, f for function, p for particle
 ; <sym>: the symbol name without the "token_" prefix
 includestr "%(__fnexect)"
 token__maxfn    equ __toknumf-1
 *pragmapop nolist
 endm
 *pragmapop list
+; the tokens defined in this section all have special parsing or meaning
 parse_tokendefp error           ; Used to mark errors; should always be first so it's token #0
 parse_tokendefp eot             ; End of input marker or special handling in word tables
 parse_tokendefp int32           ; 32 bit integer (has special parsing)
 parse_tokendefp float           ; floating point value (has special parsing)
 parse_tokendefp ident           ; identifier (has special parsing)
+parse_tokendefp linenum         ; a 16 bit unsigned integer treated as a line number
+parse_tokendefp linerange       ; a pair of 16 bit unsigned integers treated as line numbers
+; everything below here references keywords or particle characters
 parse_tokendefp stmtsep         ; statement separator
 parse_tokendefp times           ; times (multiplication) operator (*)
 parse_tokendefp plus            ; addition operator
 parse_tokendefp divide          ; division operator (/)
 parse_tokendefp minus           ; subtraction operator

Mercurial > hg > index.cgi

comparison src/parse.s @ 132:917b4893bb3d