comparison src/parse.s @ 132:917b4893bb3d

Checkpoint before redoing a bunch of code for clarity
author William Astle <lost@l-w.ca>
date Mon, 24 Jun 2024 23:44:39 -0600
parents 95f174bf459b
children 5d4801c0566d
comparison
equal deleted inserted replaced
131:95f174bf459b 132:917b4893bb3d
1 *pragmapush list 1 *pragmapush list
2 *pragma list 2 *pragma list
3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4 ; This is the overall parsing package. This is responsible for converting program text into the internal byte code and 4 ; This is the overall parsing package. This is responsible for converting program text into the internal byte code and
5 ; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated 5 ; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated
6 ; code analysis. In almost all cases, the returned error will be a syntax error. 6 ; code analysis. In almost all cases, the returned error will be a syntax error. The internal byte code shares the same
7 ; token number allocations as the parser. Some allocated tokens cannot be identified by the lexer (parse_nexttok) but
8 ; are used at runtime and when "decompiling" to text.
9 ;
10 ; In the event of a parse error, everything up to the next end of statement is retained as is using a special token
11 ; that preserves the unparsable text and parsing resumes. Only the first error is referenced by the return error
12 ; pointer.
7 ; 13 ;
8 ; This is a recursive descent parser. 14 ; This is a recursive descent parser.
9 ; 15 ;
10 ; Entry: 16 ; Entry:
11 ; X Points to the text to encode 17 ; X Points to the text to encode
12 ; B Nonzero to prevent generating any output (error check/length calculation only) 18 ; B Nonzero to prevent generating any output (error check/length calculation only)
13 ; 19 ;
14 ; Exit: 20 ; Exit:
15 ; U Points to the encoded line 21 ; X Points to the encoded line
16 ; D Length of the encoded line 22 ; D Length of the encoded line
17 ; CC.C clear 23 ; CC.C clear
18 24
19 ; Error Exit: 25 ; Error Exit:
20 ; B Error code 26 ; X Points to the encoded line
21 ; U Offset to error input 27 ; D Length of the encoded line
28 ; Y Pointer to the first error location in the input
29 ; U Error code
22 ; CC.C set 30 ; CC.C set
31 ;
32 ; This is the error handler. It is responsible for resetting the stack to bail out to the top level
33 ; parsing loop. It must also store the input pointer if this is the first error. Finally, it has to
34 ; output all the text up to either the end of the line *or* the next valid statement separator.
23 parse_errorsn ldb #err_sn 35 parse_errorsn ldb #err_sn
24 parse_error lds parse_stackptr ; restore the original stack pointer so we can call from down stack 36 parse_error lds parse_stackptr ; restore the original stack pointer so we can call from down stack
25 puls u ; get back original free pointer 37 puls u ; get back original free pointer
26 stu freestart ; deallocate any allocated result 38 stu freestart ; deallocate any allocated result
27 ldu parse_tokenst ; get start location of the token where the error was raised 39 ldu parse_tokenst ; get start location of the token where the error was raised
80 parse_nextchar lda ,y ; at end of input already? 92 parse_nextchar lda ,y ; at end of input already?
81 beq parse_curchar ; brif so 93 beq parse_curchar ; brif so
82 leay 1,y ; move to next input character 94 leay 1,y ; move to next input character
83 parse_curchar lda ,y ; fetch input character 95 parse_curchar lda ,y ; fetch input character
84 rts 96 rts
97 parse_nexttokc bsr parse_nexttok ; fetch next token
98 parse_iseos cmpb #token_eot ; end of text?
99 beq parse_iseos0 ; brif so
100 cmpb #token_stmtsep ; is it a statement separator
101 parse_iseos0 rts
85 parse_nexttok bsr parse_curchar ; fetch current input 102 parse_nexttok bsr parse_curchar ; fetch current input
86 beq parse_nexttok1 ; brif end of input 103 beq parse_nexttok1 ; brif end of input
87 parse_nexttok0 cmpa #0x20 ; space? 104 parse_nexttok0 cmpa #0x20 ; space?
88 bne parse_nexttok2 ; brif not 105 bne parse_nexttok2 ; brif not
89 bsr parse_nextchar ; eat the space 106 bsr parse_nextchar ; eat the space
135 blo parse_toupper0 ; brif not 152 blo parse_toupper0 ; brif not
136 cmpa #'z ; is it still lower case alpha? 153 cmpa #'z ; is it still lower case alpha?
137 bhi parse_toupper0 ; brif not 154 bhi parse_toupper0 ; brif not
138 suba #0x20 ; adjust to upper case alpha 155 suba #0x20 ; adjust to upper case alpha
139 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu 156 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu
140 parse_iseos cmpa #token_stmtsep ; end of statement?
141 beq parse_iseos0 ; brif so
142 cmpa #token_eot ; end of text?
143 parse_iseos0 rts
144 parse_number jmp parse_tokerr 157 parse_number jmp parse_tokerr
145 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
146 ; Parse a statement that consists of just the command token 159 ; Parse a statement that consists of just the command token
147 parse_cmdsingle equ parse_write ; just write the token out and bail 160 parse_cmdsingle equ parse_write ; just write the token out and bail
148 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 161 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
151 ldb ,y+ ; get next input character 164 ldb ,y+ ; get next input character
152 bne parse_rem ; brif not at the end of the input 165 bne parse_rem ; brif not at the end of the input
153 ldb #token_eot ; flag end of input for mainline parser 166 ldb #token_eot ; flag end of input for mainline parser
154 stb parse_curtok 167 stb parse_curtok
155 rts ; return, pass back the C result from parse_write 168 rts ; return, pass back the C result from parse_write
156 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
157 ; Parse an optional line number range which may be [lineno][-[lineno]]
158 parse_range jsr parse_write ; output the token
159 jsr parse_nexttok ; fetch input token
160 ldx zero ; set default start and end line numbers - whole program
161 leau -1,x
162 pshs x,u
163 bsr parse_iseos ; are there arguments?
164 beq parse_range3 ; brif so
165 cmpa #token_int32 ; is it an integer (line number)?
166 bne parse_range0 ; brif not
167 ldd val0+val.int ; is the upper 16 bits set?
168 beq parse_rangee ; brif yes - we have an error
169 ldd val0+val.int+2 ; set the start line number
170 std ,s
171 jsr parse_nexttok ; see what's after the line number
172 parse_range0 cmpa #token_minus ; do we have a range?
173 beq parse_range1 ; brif so
174 bsr parse_iseos ; end of statement?
175 bne parse_rangee ; brif not - error
176 ldd ,s ; set end line to start line
177 std 2,s
178 bra parse_range3 ; go output things
179 parse_range1 jsr parse_nexttok ; skip the -
180 bsr parse_iseos ; end of statement?
181 beq parse_range3 ; brif so
182 cmpa #token_int32 ; is it an integer?
183 bne parse_rangee ; brif not
184 ldx val0+val.int ; upper 16 bits set?
185 bne parse_rangee ; brif so - invalid number
186 ldx val0+val.int+2 ; get end line number
187 stx 2,s ; save end line number
188 cmpx ,s ; is end line lower than start line?
189 blo parse_rangee ; brif so - error
190 parse_range3 puls a ; write out the range
191 jsr parse_write
192 puls a
193 jsr parse_write
194 puls a
195 jsr parse_write
196 puls a
197 jmp parse_write
198 parse_rangee jmp parse_errorsn ; go raise the parse error
199 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 169 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
200 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: 170 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows:
201 ; 171 ;
202 ; * two bytes which contain the length of the table less the two bytes for this length value 172 ; * two bytes which contain the length of the table less the two bytes for this length value
203 ; * a sequence of entries consisting of a single byte matching character and a token code followed 173 ; * a sequence of entries consisting of a single byte matching character and a token code followed
275 parse_wtdc6 cmpx 1,s ; are we at the end of this table? 245 parse_wtdc6 cmpx 1,s ; are we at the end of this table?
276 bne parse_wtdc3 ; brif not - handle another table entry 246 bne parse_wtdc3 ; brif not - handle another table entry
277 coma ; make sure C is set for no match 247 coma ; make sure C is set for no match
278 puls a,x,pc ; clean up stack and return 248 puls a,x,pc ; clean up stack and return
279 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 249 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
250 ; Validate a line number. Must enter with the token type in B. Will return the line number in X. It will return a
251 ; syntax error if the line number is invalid or out of range. It will also consume a valid line number token.
252 parse_linenum cmpb #token_int32 ; is it an integer?
253 beq parse_linenum1 ; brif so
254 parse_linenum0 ldb #err_sn ; flag syntax error
255 coma ; flag error
256 rts
257 parse_linenum1 ldx val0+val.int ; get high word of integer
258 bne parse_linenum0 ; brif not a valid line number
259 ldx val0+val.int+2 ; get actual line number
260 pshs x ; save it
261 jsr parse_nexttok ; consume line number
262 puls x,pc ; get back line number and return it
263 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
264 ; Parse a line number range which is one of the following forms:
265 ; <linenum1>
266 ; <linenum1>-
267 ; <linenum1>-<linenum2>
268 ; -<linenum2>
269 ; The result will store two line numbers. If no - token appears, then both line numbers will be the same. Otherwise,
270 ; if <linenum1> is omitted, it will be assumed to be 0. If <linenum2> is omitted, it will be assumed to be 65535. Those
271 ; are the minimum and maximum line numbers.
272 ;
273 ; Parsing works by first looking for an integer token that is in range. If it finds one, it looks for an optional -
274 ; followed by an optional integer token that is in range. If the first token is not an integer, it must be a - which may
275 ; be optionally followed by another integer in range.
276 ;
277 ; It is technically valid to have a single - with no line numbers.
278 ;
279 ; Enter with the current token in B.
280 ;
281 ; The resulting line numbers will be returned in parse_buff
282 parse_linerange ldx zero ; default start line number
283 leau -1,x ; default end line number
284 pshs x,u ; save the return range
285 cmpb #token_minus ; range with no start?
286 beq parse_linerang1 ; brif so
287 bsr parse_linenum ; verify line number, return in X
288 bcs parse_linerang4 ; bail out on error
289 stx ,s ; save new start line number
290 jsr parse_nexttokc ; fetch next token, set Z if end of statement
291 bne parse_linerang0 ; brif not end of line
292 ldx ,s ; get end line to use as start line
293 bra parse_linerang2 ; go set range end and return
294 parse_linerang0 cmpb #token_minus ; do we have a range character?
295 bne parse_linerang3 ; brif not - we have an error
296 parse_linerang1 jsr parse_nexttokc ; parse what comes after the range mark
297 beq parse_linerang2 ; brif end of statement - use the default range end
298 bsr parse_linenum ; make sure it's a valid line number
299 bcs parse_linerang4 ; bail out on error
300 parse_linerang2 stx 2,s ; set range end
301 clra ; make sure C is clear
302 puls x,u,pc ; fetch return values and return
303 parse_linerang3 ldb #err_sn ; flag a syntax error
304 coma ; make sure C is set
305 parse_linerang4 puls x,u,pc ; clean up stack and return error condition
306 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
280 ; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows: 307 ; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows:
281 ; parse_tokdefT <sym>,<parse>,<list>,<exec> 308 ; parse_tokdefT <sym>,<parse>,<list>,<exec>
282 ; where: 309 ; where:
283 ; T: c for command, f for function, p for particle 310 ; T: c for command, f for function, p for particle
284 ; <sym>: the symbol name without the "token_" prefix 311 ; <sym>: the symbol name without the "token_" prefix
367 includestr "%(__fnexect)" 394 includestr "%(__fnexect)"
368 token__maxfn equ __toknumf-1 395 token__maxfn equ __toknumf-1
369 *pragmapop nolist 396 *pragmapop nolist
370 endm 397 endm
371 *pragmapop list 398 *pragmapop list
399 ; the tokens defined in this section all have special parsing or meaning
372 parse_tokendefp error ; Used to mark errors; should always be first so it's token #0 400 parse_tokendefp error ; Used to mark errors; should always be first so it's token #0
373 parse_tokendefp eot ; End of input marker or special handling in word tables 401 parse_tokendefp eot ; End of input marker or special handling in word tables
374 parse_tokendefp int32 ; 32 bit integer (has special parsing) 402 parse_tokendefp int32 ; 32 bit integer (has special parsing)
375 parse_tokendefp float ; floating point value (has special parsing) 403 parse_tokendefp float ; floating point value (has special parsing)
376 parse_tokendefp ident ; identifier (has special parsing) 404 parse_tokendefp ident ; identifier (has special parsing)
405 parse_tokendefp linenum ; a 16 bit unsigned integer treated as a line number
406 parse_tokendefp linerange ; a pair of 16 bit unsigned integers treated as line numbers
407 ; everything below here references keywords or particle characters
377 parse_tokendefp stmtsep ; statement separator 408 parse_tokendefp stmtsep ; statement separator
378 parse_tokendefp times ; times (multiplication) operator (*) 409 parse_tokendefp times ; times (multiplication) operator (*)
379 parse_tokendefp plus ; addition operator 410 parse_tokendefp plus ; addition operator
380 parse_tokendefp divide ; division operator (/) 411 parse_tokendefp divide ; division operator (/)
381 parse_tokendefp minus ; subtraction operator 412 parse_tokendefp minus ; subtraction operator