Mercurial > hg > index.cgi
comparison src/parse.s @ 132:917b4893bb3d
Checkpoint before redoing a bunch of code for clarity
author | William Astle <lost@l-w.ca> |
---|---|
date | Mon, 24 Jun 2024 23:44:39 -0600 |
parents | 95f174bf459b |
children | 5d4801c0566d |
comparison
equal
deleted
inserted
replaced
131:95f174bf459b | 132:917b4893bb3d |
---|---|
1 *pragmapush list | 1 *pragmapush list |
2 *pragma list | 2 *pragma list |
3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
4 ; This is the overall parsing package. This is responsible for converting program text into the internal byte code and | 4 ; This is the overall parsing package. This is responsible for converting program text into the internal byte code and |
5 ; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated | 5 ; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated |
6 ; code analysis. In almost all cases, the returned error will be a syntax error. | 6 ; code analysis. In almost all cases, the returned error will be a syntax error. The internal byte code shares the same |
7 ; token number allocations as the parser. Some allocated tokens cannot be identified by the lexer (parse_nexttok) but | |
8 ; are used at runtime and when "decompiling" to text. | |
9 ; | |
10 ; In the event of a parse error, everything up to the next end of statement is retained as is using a special token | |
11 ; that preserves the unparsable text and parsing resumes. Only the first error is referenced by the return error | |
12 ; pointer. | |
7 ; | 13 ; |
8 ; This is a recursive descent parser. | 14 ; This is a recursive descent parser. |
9 ; | 15 ; |
10 ; Entry: | 16 ; Entry: |
11 ; X Points to the text to encode | 17 ; X Points to the text to encode |
12 ; B Nonzero to prevent generating any output (error check/length calculation only) | 18 ; B Nonzero to prevent generating any output (error check/length calculation only) |
13 ; | 19 ; |
14 ; Exit: | 20 ; Exit: |
15 ; U Points to the encoded line | 21 ; X Points to the encoded line |
16 ; D Length of the encoded line | 22 ; D Length of the encoded line |
17 ; CC.C clear | 23 ; CC.C clear |
18 | 24 |
19 ; Error Exit: | 25 ; Error Exit: |
20 ; B Error code | 26 ; X Points to the encoded line |
21 ; U Offset to error input | 27 ; D Length of the encoded line |
28 ; Y Pointer to the first error location in the input | |
29 ; U Error code | |
22 ; CC.C set | 30 ; CC.C set |
31 ; | |
32 ; This is the error handler. It is responsible for resetting the stack to bail out to the top level | |
33 ; parsing loop. It must also store the input pointer if this is the first error. Finally, it has to | |
34 ; output all the text up to either the end of the line *or* the next valid statement separator. | |
23 parse_errorsn ldb #err_sn | 35 parse_errorsn ldb #err_sn |
24 parse_error lds parse_stackptr ; restore the original stack pointer so we can call from down stack | 36 parse_error lds parse_stackptr ; restore the original stack pointer so we can call from down stack |
25 puls u ; get back original free pointer | 37 puls u ; get back original free pointer |
26 stu freestart ; deallocate any allocated result | 38 stu freestart ; deallocate any allocated result |
27 ldu parse_tokenst ; get start location of the token where the error was raised | 39 ldu parse_tokenst ; get start location of the token where the error was raised |
80 parse_nextchar lda ,y ; at end of input already? | 92 parse_nextchar lda ,y ; at end of input already? |
81 beq parse_curchar ; brif so | 93 beq parse_curchar ; brif so |
82 leay 1,y ; move to next input character | 94 leay 1,y ; move to next input character |
83 parse_curchar lda ,y ; fetch input character | 95 parse_curchar lda ,y ; fetch input character |
84 rts | 96 rts |
97 parse_nexttokc bsr parse_nexttok ; fetch next token | |
98 parse_iseos cmpb #token_eot ; end of text? | |
99 beq parse_iseos0 ; brif so | |
100 cmpb #token_stmtsep ; is it a statement separator | |
101 parse_iseos0 rts | |
85 parse_nexttok bsr parse_curchar ; fetch current input | 102 parse_nexttok bsr parse_curchar ; fetch current input |
86 beq parse_nexttok1 ; brif end of input | 103 beq parse_nexttok1 ; brif end of input |
87 parse_nexttok0 cmpa #0x20 ; space? | 104 parse_nexttok0 cmpa #0x20 ; space? |
88 bne parse_nexttok2 ; brif not | 105 bne parse_nexttok2 ; brif not |
89 bsr parse_nextchar ; eat the space | 106 bsr parse_nextchar ; eat the space |
135 blo parse_toupper0 ; brif not | 152 blo parse_toupper0 ; brif not |
136 cmpa #'z ; is it still lower case alpha? | 153 cmpa #'z ; is it still lower case alpha? |
137 bhi parse_toupper0 ; brif not | 154 bhi parse_toupper0 ; brif not |
138 suba #0x20 ; adjust to upper case alpha | 155 suba #0x20 ; adjust to upper case alpha |
139 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu | 156 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu |
140 parse_iseos cmpa #token_stmtsep ; end of statement? | |
141 beq parse_iseos0 ; brif so | |
142 cmpa #token_eot ; end of text? | |
143 parse_iseos0 rts | |
144 parse_number jmp parse_tokerr | 157 parse_number jmp parse_tokerr |
145 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
146 ; Parse a statement that consists of just the command token | 159 ; Parse a statement that consists of just the command token |
147 parse_cmdsingle equ parse_write ; just write the token out and bail | 160 parse_cmdsingle equ parse_write ; just write the token out and bail |
148 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 161 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
151 ldb ,y+ ; get next input character | 164 ldb ,y+ ; get next input character |
152 bne parse_rem ; brif not at the end of the input | 165 bne parse_rem ; brif not at the end of the input |
153 ldb #token_eot ; flag end of input for mainline parser | 166 ldb #token_eot ; flag end of input for mainline parser |
154 stb parse_curtok | 167 stb parse_curtok |
155 rts ; return, pass back the C result from parse_write | 168 rts ; return, pass back the C result from parse_write |
156 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
157 ; Parse an optional line number range which may be [lineno][-[lineno]] | |
158 parse_range jsr parse_write ; output the token | |
159 jsr parse_nexttok ; fetch input token | |
160 ldx zero ; set default start and end line numbers - whole program | |
161 leau -1,x | |
162 pshs x,u | |
163 bsr parse_iseos ; are there arguments? | |
164 beq parse_range3 ; brif so | |
165 cmpa #token_int32 ; is it an integer (line number)? | |
166 bne parse_range0 ; brif not | |
167 ldd val0+val.int ; is the upper 16 bits set? | |
168 beq parse_rangee ; brif yes - we have an error | |
169 ldd val0+val.int+2 ; set the start line number | |
170 std ,s | |
171 jsr parse_nexttok ; see what's after the line number | |
172 parse_range0 cmpa #token_minus ; do we have a range? | |
173 beq parse_range1 ; brif so | |
174 bsr parse_iseos ; end of statement? | |
175 bne parse_rangee ; brif not - error | |
176 ldd ,s ; set end line to start line | |
177 std 2,s | |
178 bra parse_range3 ; go output things | |
179 parse_range1 jsr parse_nexttok ; skip the - | |
180 bsr parse_iseos ; end of statement? | |
181 beq parse_range3 ; brif so | |
182 cmpa #token_int32 ; is it an integer? | |
183 bne parse_rangee ; brif not | |
184 ldx val0+val.int ; upper 16 bits set? | |
185 bne parse_rangee ; brif so - invalid number | |
186 ldx val0+val.int+2 ; get end line number | |
187 stx 2,s ; save end line number | |
188 cmpx ,s ; is end line lower than start line? | |
189 blo parse_rangee ; brif so - error | |
190 parse_range3 puls a ; write out the range | |
191 jsr parse_write | |
192 puls a | |
193 jsr parse_write | |
194 puls a | |
195 jsr parse_write | |
196 puls a | |
197 jmp parse_write | |
198 parse_rangee jmp parse_errorsn ; go raise the parse error | |
199 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 169 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
200 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: | 170 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: |
201 ; | 171 ; |
202 ; * two bytes which contain the length of the table less the two bytes for this length value | 172 ; * two bytes which contain the length of the table less the two bytes for this length value |
203 ; * a sequence of entries consisting of a single byte matching character and a token code followed | 173 ; * a sequence of entries consisting of a single byte matching character and a token code followed |
275 parse_wtdc6 cmpx 1,s ; are we at the end of this table? | 245 parse_wtdc6 cmpx 1,s ; are we at the end of this table? |
276 bne parse_wtdc3 ; brif not - handle another table entry | 246 bne parse_wtdc3 ; brif not - handle another table entry |
277 coma ; make sure C is set for no match | 247 coma ; make sure C is set for no match |
278 puls a,x,pc ; clean up stack and return | 248 puls a,x,pc ; clean up stack and return |
279 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 249 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
250 ; Validate a line number. Must enter with the token type in B. Will return the line number in X. It will return a | |
251 ; syntax error if the line number is invalid or out of range. It will also consume a valid line number token. | |
252 parse_linenum cmpb #token_int32 ; is it an integer? | |
253 beq parse_linenum1 ; brif so | |
254 parse_linenum0 ldb #err_sn ; flag syntax error | |
255 coma ; flag error | |
256 rts | |
257 parse_linenum1 ldx val0+val.int ; get high word of integer | |
258 bne parse_linenum0 ; brif not a valid line number | |
259 ldx val0+val.int+2 ; get actual line number | |
260 pshs x ; save it | |
261 jsr parse_nexttok ; consume line number | |
262 puls x,pc ; get back line number and return it | |
263 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
264 ; Parse a line number range which is one of the following forms: | |
265 ; <linenum1> | |
266 ; <linenum1>- | |
267 ; <linenum1>-<linenum2> | |
268 ; -<linenum2> | |
269 ; The result will store two line numbers. If no - token appears, then both line numbers will be the same. Otherwise, | |
270 ; if <linenum1> is omitted, it will be assumed to be 0. If <linenum2> is omitted, it will be assumed to be 65535. Those | |
271 ; are the minimum and maximum line numbers. | |
272 ; | |
273 ; Parsing works by first looking for an integer token that is in range. If it finds one, it looks for an optional - | |
274 ; followed by an optional integer token that is in range. If the first token is not an integer, it must be a - which may | |
275 ; be optionally followed by another integer in range. | |
276 ; | |
277 ; It is technically valid to have a single - with no line numbers. | |
278 ; | |
279 ; Enter with the current token in B. | |
280 ; | |
281 ; The resulting line numbers will be returned in parse_buff | |
282 parse_linerange ldx zero ; default start line number | |
283 leau -1,x ; default end line number | |
284 pshs x,u ; save the return range | |
285 cmpb #token_minus ; range with no start? | |
286 beq parse_linerang1 ; brif so | |
287 bsr parse_linenum ; verify line number, return in X | |
288 bcs parse_linerang4 ; bail out on error | |
289 stx ,s ; save new start line number | |
290 jsr parse_nexttokc ; fetch next token, set Z if end of statement | |
291 bne parse_linerang0 ; brif not end of line | |
292 ldx ,s ; get end line to use as start line | |
293 bra parse_linerang2 ; go set range end and return | |
294 parse_linerang0 cmpb #token_minus ; do we have a range character? | |
295 bne parse_linerang3 ; brif not - we have an error | |
296 parse_linerang1 jsr parse_nexttokc ; parse what comes after the range mark | |
297 beq parse_linerang2 ; brif end of statement - use the default range end | |
298 bsr parse_linenum ; make sure it's a valid line number | |
299 bcs parse_linerang4 ; bail out on error | |
300 parse_linerang2 stx 2,s ; set range end | |
301 clra ; make sure C is clear | |
302 puls x,u,pc ; fetch return values and return | |
303 parse_linerang3 ldb #err_sn ; flag a syntax error | |
304 coma ; make sure C is set | |
305 parse_linerang4 puls x,u,pc ; clean up stack and return error condition | |
306 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
280 ; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows: | 307 ; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows: |
281 ; parse_tokdefT <sym>,<parse>,<list>,<exec> | 308 ; parse_tokdefT <sym>,<parse>,<list>,<exec> |
282 ; where: | 309 ; where: |
283 ; T: c for command, f for function, p for particle | 310 ; T: c for command, f for function, p for particle |
284 ; <sym>: the symbol name without the "token_" prefix | 311 ; <sym>: the symbol name without the "token_" prefix |
367 includestr "%(__fnexect)" | 394 includestr "%(__fnexect)" |
368 token__maxfn equ __toknumf-1 | 395 token__maxfn equ __toknumf-1 |
369 *pragmapop nolist | 396 *pragmapop nolist |
370 endm | 397 endm |
371 *pragmapop list | 398 *pragmapop list |
399 ; the tokens defined in this section all have special parsing or meaning | |
372 parse_tokendefp error ; Used to mark errors; should always be first so it's token #0 | 400 parse_tokendefp error ; Used to mark errors; should always be first so it's token #0 |
373 parse_tokendefp eot ; End of input marker or special handling in word tables | 401 parse_tokendefp eot ; End of input marker or special handling in word tables |
374 parse_tokendefp int32 ; 32 bit integer (has special parsing) | 402 parse_tokendefp int32 ; 32 bit integer (has special parsing) |
375 parse_tokendefp float ; floating point value (has special parsing) | 403 parse_tokendefp float ; floating point value (has special parsing) |
376 parse_tokendefp ident ; identifier (has special parsing) | 404 parse_tokendefp ident ; identifier (has special parsing) |
405 parse_tokendefp linenum ; a 16 bit unsigned integer treated as a line number | |
406 parse_tokendefp linerange ; a pair of 16 bit unsigned integers treated as line numbers | |
407 ; everything below here references keywords or particle characters | |
377 parse_tokendefp stmtsep ; statement separator | 408 parse_tokendefp stmtsep ; statement separator |
378 parse_tokendefp times ; times (multiplication) operator (*) | 409 parse_tokendefp times ; times (multiplication) operator (*) |
379 parse_tokendefp plus ; addition operator | 410 parse_tokendefp plus ; addition operator |
380 parse_tokendefp divide ; division operator (/) | 411 parse_tokendefp divide ; division operator (/) |
381 parse_tokendefp minus ; subtraction operator | 412 parse_tokendefp minus ; subtraction operator |