Mercurial > hg > index.cgi
comparison src/parse.s @ 126:ac183a519439
Update parsing scheme with a keyword lookup by token value and other framework
Add ability to turn a token code into a keyword string. Also correct some
details related to token table generation with some additiona adjustments
for token symbols.
Also rework token symbol definitions and creation of some parsing tables as
well as the main statement parsing loop.
author | William Astle <lost@l-w.ca> |
---|---|
date | Mon, 08 Jan 2024 22:58:08 -0700 |
parents | 0607e4e20702 |
children | 527212870064 |
comparison
equal
deleted
inserted
replaced
125:0607e4e20702 | 126:ac183a519439 |
---|---|
28 bcc parse0 ; brif we succeeded in parsing a token | 28 bcc parse0 ; brif we succeeded in parsing a token |
29 parse_error puls u ; restore original free memory location - deallocate any encoding | 29 parse_error puls u ; restore original free memory location - deallocate any encoding |
30 stu freestart | 30 stu freestart |
31 ldu parse_tokenst ; get start location we started parsing the token at | 31 ldu parse_tokenst ; get start location we started parsing the token at |
32 rts ; return error condition | 32 rts ; return error condition |
33 parse0 ldx #parse_stmtjump ; point to jump table for token type handler | 33 parse0 ldx #parsetab_cmd ; point to jump table for token type handler |
34 abx ; offset to handler address | 34 parse1 cmpb ,x ; did we match a valid command token? |
35 abx | 35 beq parse3 ; brif so |
36 jsr [,x] ; call handler | 36 leax 3,x ; move to next entry |
37 bcs parse_error ; brif handler flagged error | 37 cmpx #parsetab_cmde ; end of table? |
38 blo parse1 ; brif not | |
39 parse2 ldb #err_sn ; flag syntax error | |
40 bra parse_error ; and return the error | |
41 parse3 jsr [1,x] ; call the handler | |
42 bcs parse_error ; brif the handler indicated error | |
38 jsr parse_curtoken ; get the token we terminated on | 43 jsr parse_curtoken ; get the token we terminated on |
39 cmpb #token_eot ; end of input? | 44 cmpb #token_eot ; end of input? |
40 bne parse1 ; brif not | 45 bne parse4 ; brif not |
41 ldb #bc_eol ; stash an end of line op | 46 ldb #bc_eol ; stash an end of line op |
42 bsr parse_write | 47 bsr parse_write |
43 bcs parse_error ; brif we errored out writing to the result (OM?) | 48 bcs parse_error ; brif we errored out writing to the result (OM?) |
44 tfr u,d ; calculate the length of the result | 49 tfr u,d ; calculate the length of the result |
45 subd ,s | 50 subd ,s |
46 puls u,pc ; get pointer to start of encoded result and return (C is already clear) | 51 puls u,pc ; get pointer to start of encoded result and return (C is already clear) |
47 parse1 cmpb #token_stmtsep ; statement separator? | 52 parse4 cmpb #token_stmtsep ; statement separator? |
48 beq parse_nextstmt ; brif so - do another statement | 53 beq parse_nextstmt ; brif so - do another statement |
49 cmpb #token_apos ; ' token? | 54 cmpb #token_remabbr ; ' token? |
50 beq parse0 ; brif so - parse it as a new statement | 55 beq parse0 ; brif so - parse it as a new statement |
51 comb ; set C for error | 56 bra parse2 ; raise a syntax error |
52 ldb #err_sn ; raise syntax error | |
53 bra parse_error | |
54 parse_write lda parse_noout ; are we doing output? | 57 parse_write lda parse_noout ; are we doing output? |
55 beq parse_write0 ; brif so | 58 beq parse_write0 ; brif so |
56 leau 1,u ; just count up the output and don't do anything | 59 leau 1,u ; just count up the output and don't do anything |
57 rts | 60 rts |
58 parse_write0 leax -stackheadroom,s ; calculate bottom of stack with headroom | 61 parse_write0 leax -stackheadroom,s ; calculate bottom of stack with headroom |
60 bhs parse_write1 ; brif not - we're good | 63 bhs parse_write1 ; brif not - we're good |
61 ldb #err_om ; raise out of memory error, C already set from comparison | 64 ldb #err_om ; raise out of memory error, C already set from comparison |
62 rts | 65 rts |
63 parse_write1 stb ,u+ ; save output byte | 66 parse_write1 stb ,u+ ; save output byte |
64 stu freestart ; save new to of used memory | 67 stu freestart ; save new to of used memory |
68 list_noop | |
65 parse_noop rts ; return all clear - C clear from comparison above | 69 parse_noop rts ; return all clear - C clear from comparison above |
66 parse_curtoken ldb parse_curtok ; fetch token code of current token | 70 parse_curtoken ldb parse_curtok ; fetch token code of current token |
67 rts | 71 rts |
68 parse_tokerr comb ; flag error - unexpected token | 72 parse_tokerr comb ; flag error - unexpected token |
69 ldb #err_sn ; raise syntax error | 73 ldb #err_sn ; raise syntax error |
126 blo parse_toupper0 ; brif not | 130 blo parse_toupper0 ; brif not |
127 cmpa #'z ; is it still lower case alpha? | 131 cmpa #'z ; is it still lower case alpha? |
128 bhi parse_toupper0 ; brif not | 132 bhi parse_toupper0 ; brif not |
129 suba #0x20 ; adjust to upper case alpha | 133 suba #0x20 ; adjust to upper case alpha |
130 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu | 134 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu |
135 parse_number jmp parse_tokerr | |
136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
131 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: | 137 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: |
132 ; | 138 ; |
133 ; * two bytes which contain the length of the table less the two bytes for this length value | 139 ; * two bytes which contain the length of the table less the two bytes for this length value |
134 ; * a sequence of entries consisting of a single byte matching character and a token code followed | 140 ; * a sequence of entries consisting of a single byte matching character and a token code followed |
135 ; by an optional sub table, structured exactly the same way. | 141 ; by an optional sub table, structured exactly the same way. |
153 bne parse_wordtab4 ; brif not | 159 bne parse_wordtab4 ; brif not |
154 cmpb #token_eot ; is it indicating a sub table? | 160 cmpb #token_eot ; is it indicating a sub table? |
155 bne parse_wordtab6 ; brif not | 161 bne parse_wordtab6 ; brif not |
156 bsr parse_nextcharu ; fetch next input character (for sub table match) | 162 bsr parse_nextcharu ; fetch next input character (for sub table match) |
157 bne parse_wordtab0 ; brif we are going to check the sub table | 163 bne parse_wordtab0 ; brif we are going to check the sub table |
158 parse_wordtab2 ldd ,x++ ; fetch length of sub table | 164 parse_wordtab2 ldd ,x ; fetch length of sub table |
159 leax d,x ; move past sub table | 165 leax d,x ; move past sub table |
160 parse_wordtab3 lda ,s ; get back input character | 166 parse_wordtab3 lda ,s ; get back input character |
161 cmpx 1,s ; are we at the end of the table? | 167 cmpx 1,s ; are we at the end of the table? |
162 blo parse_wordtab1 ; brif not - check another entry | 168 blo parse_wordtab1 ; brif not - check another entry |
163 comb ; indicate no match | 169 comb ; indicate no match |
168 beq parse_wordtab2 ; brif so - skip sub table | 174 beq parse_wordtab2 ; brif so - skip sub table |
169 bra parse_wordtab3 ; otherwise just move to the next entry | 175 bra parse_wordtab3 ; otherwise just move to the next entry |
170 parse_wordtab5 leay a,y ; move back the specified number of characters | 176 parse_wordtab5 leay a,y ; move back the specified number of characters |
171 parse_wordtab6 clra ; clear C to indicate a match | 177 parse_wordtab6 clra ; clear C to indicate a match |
172 puls a,x,pc ; clean up stack and return | 178 puls a,x,pc ; clean up stack and return |
173 parse_number jmp parse_tokerr | 179 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
174 ; Parse tokens - define them in order using the macro parse_tokdef | 180 ; Convert a token number back to its keyword. This will use the same table used by parse_wordtab. Enter with a character |
181 ; output routine pointer in U which takes the character in A. The routine can assume that Y is preserved. Will return | |
182 ; with C set if the token does not exist in the word table and clear otherwise. | |
183 parse_wtdc pshs u ; save routine pointer | |
184 ldu #strbuff+20 ; point to temporary string buffer | |
185 clr ,-u ; put a NUL at the end of the string | |
186 ldx #parse_wt ; point to keyword parse table | |
187 bsr parse_wtdc2 ; call the tree walker function | |
188 bcc parse_wtdc1 ; brif we do have a match | |
189 puls u,pc ; clean stack and return | |
190 parse_wtdc0 jsr [,s] ; output the character | |
191 parse_wtdc1 lda ,u+ ; get output byte | |
192 bne parse_wtdc0 ; brif we're not at the end yet | |
193 clra ; make sure C is clear | |
194 puls u,pc ; clean stack and return | |
195 parse_wtdc2 pshs a,x ; save the token match value and the table pointer | |
196 ldd ,x++ ; get table length | |
197 addd 1,s ; calculate end address | |
198 std 1,s ; save it | |
199 parse_wtdc3 ldd ,x++ ; get this table entry | |
200 bmi parse_wtdc6 ; brif it's a backtracking entry - skip it | |
201 cmpa ,s ; does the token match here? | |
202 bne parse_wtdc5 ; brif not | |
203 parse_wtdc4 sta ,-y ; add the character to the output buffer | |
204 puls a,x,pc ; return up the call stack - C is clear from CMPA above | |
205 parse_wtdc5 cmpb #token_eot ; does this entry have a sub table? | |
206 bne parse_wtdc6 ; brif not | |
207 pshs a ; save the matched character | |
208 lda 1,s ; get back the token we need | |
209 bsr parse_wtdc2 ; go handle the sub table | |
210 puls a ; get back the matched character | |
211 bcc parse_wtdc6 ; brif it did match - record it and return | |
212 parse_wtdc6 cmpx 1,s ; are we at the end of this table? | |
213 bne parse_wtdc3 ; brif not - handle another table entry | |
214 coma ; make sure C is set for no match | |
215 puls a,x,pc ; clean up stack and return | |
216 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
217 ; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows: | |
218 ; parse_tokdefT <sym>,<parse>,<list>,<exec> | |
219 ; where: | |
220 ; T: c for command, f for function, p for particle | |
221 ; <sym>: the symbol name without the "token_" prefix | |
222 ; <parse>: parse handler for the type, ignored for particles | |
223 ; <list>: list handler for the type, ingored for particles | |
224 ; <exec>: execution handler for the type, ignored for particles | |
175 *pragmapush list | 225 *pragmapush list |
176 *pragma nolist | 226 *pragma nolist |
177 parse_toknum set 0 | 227 __toknump set 0 |
178 parse_tokdef macro noexpand | 228 __toknumc set 0x40 |
179 \1 equ parse_toknum | 229 __toknumf set 0xc0 |
180 parse_toknum set parse_toknum+1 | 230 setstr __cmdparset="" |
181 fdb \2 | 231 setstr __cmdlistt="" |
232 setstr __cmdexect="" | |
233 setstr __fnparset="" | |
234 setstr __fnlistt="" | |
235 setstr __fnexect="" | |
236 parse_tokendefp macro noexpand | |
237 token_\1 equ __toknump | |
238 __toknump set __toknump+1 | |
239 endm | |
240 parse_tokendefc macro noexpand | |
241 token_\1 equ __toknumc | |
242 __toknumc set __toknumc+1 | |
243 ifstr ne,"{2}","" | |
244 setstr __cmdparset="%(__cmdparset)\tfcb\ttoken_\1\n\tfdb {2}\n" | |
245 endc | |
246 ifstr ne,"{3}","" | |
247 setstr __cmdlistt="%(__cmdlistt)\tfcb\ttoken_\1\n\tfdb {3}\n" | |
248 endc | |
249 ifstr ne,"{4}","" | |
250 setstr __cmdexect="%(__cmdexect)\tfdb {3}\n" | |
251 else | |
252 setstr __cmdexect="%(__cmdexect)\tfdb SNERROR\n" | |
253 endc | |
254 endm | |
255 parse_tokendeff macro noexpand | |
256 token_\1 equ __toknumf | |
257 __toknumf set __toknumf+1 | |
258 ifstr ne,"{2}","" | |
259 setstr __fnparset="%(__fnparset)\tfcb\ttoken_\1\n\tfdb {2}\n" | |
260 endc | |
261 ifstr ne,"{3}","" | |
262 setstr __fnlistt="%(__fnlistt)\tfcb\ttoken_\1\n\tfdb {3}\n" | |
263 endc | |
264 ifstr ne,"{4}","" | |
265 setstr __fnexect="%(__fnexect)\tfdb {3}\n" | |
266 else | |
267 setstr __fnexect="%(__fnexect)\tfdb SNERROR\n" | |
268 endc | |
269 endm | |
270 token_cmdparse macro | |
271 *pragmapush nolist | |
272 *pragma nolist | |
273 includestr "%(__cmdparset)" | |
274 *pragmapop nolist | |
275 endm | |
276 token_cmdlist macro | |
277 *pragmapush nolist | |
278 *pragma nolist | |
279 includestr "%(__cmdlistt)" | |
280 *pragmapop nolist | |
281 endm | |
282 token_cmdexec macro | |
283 *pragmapush nolist | |
284 *pragma nolist | |
285 includestr "%(__cmdexect)" | |
286 token__maxcmd equ __toknumc-1 | |
287 *pragmapop nolist | |
288 endm | |
289 token_fnparse macro | |
290 *pragmapush nolist | |
291 *pragma nolist | |
292 includestr "%(__fnparset)" | |
293 *pragmapop nolist | |
294 endm | |
295 token_fnlist macro | |
296 *pragmapush nolist | |
297 *pragma nolist | |
298 includestr "%(__fnlistt)" | |
299 *pragmapop nolist | |
300 endm | |
301 token_fnexec macro | |
302 *pragmapush nolist | |
303 *pragma nolist | |
304 includestr "%(__fnexect)" | |
305 token__maxfn equ __toknumf-1 | |
306 *pragmapop nolist | |
182 endm | 307 endm |
183 *pragmapop list | 308 *pragmapop list |
184 parse_stmtjump parse_tokdef token_error,parse_tokerr | 309 parse_tokendefp error ; Used to mark errors; should always be first so it's token #0 |
185 parse_tokdef token_eot,parse_noop | 310 parse_tokendefp eot ; End of input marker or special handling in word tables |
186 parse_tokdef token_lt,parse_noop | 311 parse_tokendefp stmtsep ; statement separator |
187 parse_tokdef token_le,parse_noop | 312 parse_tokendefp times ; times (multiplication) operator (*) |
188 parse_tokdef token_gt,parse_noop | 313 parse_tokendefp plus ; addition operator |
189 parse_tokdef token_ge,parse_noop | 314 parse_tokendefp divide ; division operator (/) |
190 parse_tokdef token_eq,parse_noop | 315 parse_tokendefp minus ; subtraction operator |
191 parse_tokdef token_ne,parse_noop | 316 parse_tokendefp exp ; exponentiation operator (^) |
192 parse_tokdef token_reltrue,parse_noop // always true relational operator | 317 parse_tokendefp lt ; less than operator |
193 parse_tokdef token_stmtsep,parse_noop | 318 parse_tokendefp le ; less than or equal operateor |
194 parse_tokdef token_apos,parse_rem | 319 parse_tokendefp gt ; greater than operator |
195 parse_tokdef token_special,parse_noop | 320 parse_tokendefp ge ; greater than or equal operator |
196 parse_tokdef token_bang,parse_noop | 321 parse_tokendefp eq ; equality operator |
197 parse_tokdef token_hash,parse_noop | 322 parse_tokendefp ne ; inequality operator |
198 parse_tokdef token_dollar,parse_noop | 323 parse_tokendefp not ; boolean NOT operator |
199 parse_tokdef token_percent,parse_noop | 324 parse_tokendefp and ; boolean AND operator |
200 parse_tokdef token_amp,parse_noop | 325 parse_tokendefp or ; boolean OR operator |
201 parse_tokdef token_oparen,parse_noop | 326 parse_tokendefp bang ; exclamation mark |
202 parse_tokdef token_cparen,parse_noop | 327 parse_tokendefp hash ; number sign |
203 parse_tokdef token_star,parse_noop | 328 parse_tokendefp dollar ; dollar sign (string sigil) |
204 parse_tokdef token_plus,parse_noop | 329 parse_tokendefp percent ; percent sign (integer sigil) |
205 parse_tokdef token_comma,parse_noop | 330 parse_tokendefp amp ; ampersand |
206 parse_tokdef token_minus,parse_noop | 331 parse_tokendefp oparen ; opening paren |
207 parse_tokdef token_slash,parse_noop | 332 parse_tokendefp cparen ; closing paren |
208 parse_tokdef token_semi,parse_noop | 333 parse_tokendefp sep ; comma (separator) |
209 parse_tokdef token_at,parse_noop | 334 parse_tokendefp semi ; semicolon |
210 parse_tokdef token_exp,parse_noop | 335 parse_tokendefp at ; @ symbol |
211 parse_tokdef token_ident,parse_noop | 336 parse_tokendefp ident ; identifier (has special parsing) |
212 parse_tokdef token_rem,parse_noop | 337 parse_tokendefp else ; ELSE |
213 parse_tokdef token_return,parse_noop | 338 parse_tokendefp then ; THEN |
214 parse_tokdef token_run,parse_noop | 339 parse_tokendefp to ; TO |
215 parse_tokdef token_data,parse_noop | 340 parse_tokendefp sub ; SUB |
216 parse_tokdef token_else,parse_noop | 341 parse_tokendefp as ; AS |
217 parse_tokdef token_end,parse_noop | |
218 parse_tokdef token_stop,parse_noop | |
219 parse_tokdef token_sub,parse_noop | |
220 parse_tokdef token_let,parse_noop | |
221 parse_tokdef token_list,parse_noop | |
222 parse_tokdef token_new,parse_noop | |
223 parse_tokdef token_not,parse_noop | |
224 parse_tokdef token_print,parse_noop | |
225 parse_tokdef token_pop,parse_noop | |
226 parse_tokdef token_to,parse_noop | |
227 parse_tokdef token_and,parse_noop | |
228 parse_tokdef token_or,parse_noop | |
229 parse_tokdef token_go,parse_noop | |
230 parse_tokdef token_as,parse_noop | |
231 parse_tokdef token_asc,parse_noop | |
232 parse_rem rts | |
233 | 342 |
343 parse_tokendefc remabbr,parse_noop,list_noop,exec_noop ; abbreviated REM (') | |
344 parse_tokendefc rem,parse_noop,list_noop,exec_noop ; REM | |
345 parse_tokendefc return,parse_noop,parse_noop,parse_noop ; RETURN | |
346 parse_tokendefc run,parse_noop,parse_noop,parse_noop ; RUN | |
347 parse_tokendefc data,parse_noop,parse_noop,parse_noop ; DATA | |
348 parse_tokendefc end,parse_noop,parse_noop,parse_noop ; END | |
349 parse_tokendefc stop,parse_noop,parse_noop,parse_noop ; STOP | |
350 parse_tokendefc let,parse_noop,parse_noop,parse_noop ; LET | |
351 parse_tokendefc list,parse_noop,parse_noop,parse_noop ; LIST | |
352 parse_tokendefc new,parse_noop,parse_noop,parse_noop ; NEW | |
353 parse_tokendefc print,parse_noop,parse_noop,parse_noop ; PRINT | |
354 parse_tokendefc pop,parse_noop,parse_noop,parse_noop ; POP | |
355 parse_tokendefc goto,parse_noop,parse_noop,parse_noop ; GOTO | |
356 parse_tokendefc gosub,parse_noop,parse_noop,parse_noop ; GOSUB | |
357 parse_tokendefc go,parse_noop,parse_noop,parse_noop ; GO | |
358 | |
359 parse_tokendeff asc,parse_noop,parse_noop,parse_noop ; ASC() | |
360 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
361 ; Parse handling tables | |
362 parsetab_cmd token_cmdparse | |
363 parsetab_cmde | |
364 parsetab_fn token_fnparse | |
365 parsetab_fne | |
366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
367 ; List handling tables | |
368 listtab_cmd token_cmdlist | |
369 listtab_cmde | |
370 listtab_fn token_fnlist | |
371 listtab_fne | |
372 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
373 ; Execution handling tables | |
374 exectab_cmd token_cmdexec | |
375 exectab_fn token_fnexec | |
234 *pragmapop list | 376 *pragmapop list |