comparison src/parse.s @ 126:ac183a519439

Update parsing scheme with a keyword lookup by token value and other framework Add ability to turn a token code into a keyword string. Also correct some details related to token table generation with some additiona adjustments for token symbols. Also rework token symbol definitions and creation of some parsing tables as well as the main statement parsing loop.
author William Astle <lost@l-w.ca>
date Mon, 08 Jan 2024 22:58:08 -0700
parents 0607e4e20702
children 527212870064
comparison
equal deleted inserted replaced
125:0607e4e20702 126:ac183a519439
28 bcc parse0 ; brif we succeeded in parsing a token 28 bcc parse0 ; brif we succeeded in parsing a token
29 parse_error puls u ; restore original free memory location - deallocate any encoding 29 parse_error puls u ; restore original free memory location - deallocate any encoding
30 stu freestart 30 stu freestart
31 ldu parse_tokenst ; get start location we started parsing the token at 31 ldu parse_tokenst ; get start location we started parsing the token at
32 rts ; return error condition 32 rts ; return error condition
33 parse0 ldx #parse_stmtjump ; point to jump table for token type handler 33 parse0 ldx #parsetab_cmd ; point to jump table for token type handler
34 abx ; offset to handler address 34 parse1 cmpb ,x ; did we match a valid command token?
35 abx 35 beq parse3 ; brif so
36 jsr [,x] ; call handler 36 leax 3,x ; move to next entry
37 bcs parse_error ; brif handler flagged error 37 cmpx #parsetab_cmde ; end of table?
38 blo parse1 ; brif not
39 parse2 ldb #err_sn ; flag syntax error
40 bra parse_error ; and return the error
41 parse3 jsr [1,x] ; call the handler
42 bcs parse_error ; brif the handler indicated error
38 jsr parse_curtoken ; get the token we terminated on 43 jsr parse_curtoken ; get the token we terminated on
39 cmpb #token_eot ; end of input? 44 cmpb #token_eot ; end of input?
40 bne parse1 ; brif not 45 bne parse4 ; brif not
41 ldb #bc_eol ; stash an end of line op 46 ldb #bc_eol ; stash an end of line op
42 bsr parse_write 47 bsr parse_write
43 bcs parse_error ; brif we errored out writing to the result (OM?) 48 bcs parse_error ; brif we errored out writing to the result (OM?)
44 tfr u,d ; calculate the length of the result 49 tfr u,d ; calculate the length of the result
45 subd ,s 50 subd ,s
46 puls u,pc ; get pointer to start of encoded result and return (C is already clear) 51 puls u,pc ; get pointer to start of encoded result and return (C is already clear)
47 parse1 cmpb #token_stmtsep ; statement separator? 52 parse4 cmpb #token_stmtsep ; statement separator?
48 beq parse_nextstmt ; brif so - do another statement 53 beq parse_nextstmt ; brif so - do another statement
49 cmpb #token_apos ; ' token? 54 cmpb #token_remabbr ; ' token?
50 beq parse0 ; brif so - parse it as a new statement 55 beq parse0 ; brif so - parse it as a new statement
51 comb ; set C for error 56 bra parse2 ; raise a syntax error
52 ldb #err_sn ; raise syntax error
53 bra parse_error
54 parse_write lda parse_noout ; are we doing output? 57 parse_write lda parse_noout ; are we doing output?
55 beq parse_write0 ; brif so 58 beq parse_write0 ; brif so
56 leau 1,u ; just count up the output and don't do anything 59 leau 1,u ; just count up the output and don't do anything
57 rts 60 rts
58 parse_write0 leax -stackheadroom,s ; calculate bottom of stack with headroom 61 parse_write0 leax -stackheadroom,s ; calculate bottom of stack with headroom
60 bhs parse_write1 ; brif not - we're good 63 bhs parse_write1 ; brif not - we're good
61 ldb #err_om ; raise out of memory error, C already set from comparison 64 ldb #err_om ; raise out of memory error, C already set from comparison
62 rts 65 rts
63 parse_write1 stb ,u+ ; save output byte 66 parse_write1 stb ,u+ ; save output byte
64 stu freestart ; save new to of used memory 67 stu freestart ; save new to of used memory
68 list_noop
65 parse_noop rts ; return all clear - C clear from comparison above 69 parse_noop rts ; return all clear - C clear from comparison above
66 parse_curtoken ldb parse_curtok ; fetch token code of current token 70 parse_curtoken ldb parse_curtok ; fetch token code of current token
67 rts 71 rts
68 parse_tokerr comb ; flag error - unexpected token 72 parse_tokerr comb ; flag error - unexpected token
69 ldb #err_sn ; raise syntax error 73 ldb #err_sn ; raise syntax error
126 blo parse_toupper0 ; brif not 130 blo parse_toupper0 ; brif not
127 cmpa #'z ; is it still lower case alpha? 131 cmpa #'z ; is it still lower case alpha?
128 bhi parse_toupper0 ; brif not 132 bhi parse_toupper0 ; brif not
129 suba #0x20 ; adjust to upper case alpha 133 suba #0x20 ; adjust to upper case alpha
130 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu 134 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu
135 parse_number jmp parse_tokerr
136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
131 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: 137 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows:
132 ; 138 ;
133 ; * two bytes which contain the length of the table less the two bytes for this length value 139 ; * two bytes which contain the length of the table less the two bytes for this length value
134 ; * a sequence of entries consisting of a single byte matching character and a token code followed 140 ; * a sequence of entries consisting of a single byte matching character and a token code followed
135 ; by an optional sub table, structured exactly the same way. 141 ; by an optional sub table, structured exactly the same way.
153 bne parse_wordtab4 ; brif not 159 bne parse_wordtab4 ; brif not
154 cmpb #token_eot ; is it indicating a sub table? 160 cmpb #token_eot ; is it indicating a sub table?
155 bne parse_wordtab6 ; brif not 161 bne parse_wordtab6 ; brif not
156 bsr parse_nextcharu ; fetch next input character (for sub table match) 162 bsr parse_nextcharu ; fetch next input character (for sub table match)
157 bne parse_wordtab0 ; brif we are going to check the sub table 163 bne parse_wordtab0 ; brif we are going to check the sub table
158 parse_wordtab2 ldd ,x++ ; fetch length of sub table 164 parse_wordtab2 ldd ,x ; fetch length of sub table
159 leax d,x ; move past sub table 165 leax d,x ; move past sub table
160 parse_wordtab3 lda ,s ; get back input character 166 parse_wordtab3 lda ,s ; get back input character
161 cmpx 1,s ; are we at the end of the table? 167 cmpx 1,s ; are we at the end of the table?
162 blo parse_wordtab1 ; brif not - check another entry 168 blo parse_wordtab1 ; brif not - check another entry
163 comb ; indicate no match 169 comb ; indicate no match
168 beq parse_wordtab2 ; brif so - skip sub table 174 beq parse_wordtab2 ; brif so - skip sub table
169 bra parse_wordtab3 ; otherwise just move to the next entry 175 bra parse_wordtab3 ; otherwise just move to the next entry
170 parse_wordtab5 leay a,y ; move back the specified number of characters 176 parse_wordtab5 leay a,y ; move back the specified number of characters
171 parse_wordtab6 clra ; clear C to indicate a match 177 parse_wordtab6 clra ; clear C to indicate a match
172 puls a,x,pc ; clean up stack and return 178 puls a,x,pc ; clean up stack and return
173 parse_number jmp parse_tokerr 179 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
174 ; Parse tokens - define them in order using the macro parse_tokdef 180 ; Convert a token number back to its keyword. This will use the same table used by parse_wordtab. Enter with a character
181 ; output routine pointer in U which takes the character in A. The routine can assume that Y is preserved. Will return
182 ; with C set if the token does not exist in the word table and clear otherwise.
183 parse_wtdc pshs u ; save routine pointer
184 ldu #strbuff+20 ; point to temporary string buffer
185 clr ,-u ; put a NUL at the end of the string
186 ldx #parse_wt ; point to keyword parse table
187 bsr parse_wtdc2 ; call the tree walker function
188 bcc parse_wtdc1 ; brif we do have a match
189 puls u,pc ; clean stack and return
190 parse_wtdc0 jsr [,s] ; output the character
191 parse_wtdc1 lda ,u+ ; get output byte
192 bne parse_wtdc0 ; brif we're not at the end yet
193 clra ; make sure C is clear
194 puls u,pc ; clean stack and return
195 parse_wtdc2 pshs a,x ; save the token match value and the table pointer
196 ldd ,x++ ; get table length
197 addd 1,s ; calculate end address
198 std 1,s ; save it
199 parse_wtdc3 ldd ,x++ ; get this table entry
200 bmi parse_wtdc6 ; brif it's a backtracking entry - skip it
201 cmpa ,s ; does the token match here?
202 bne parse_wtdc5 ; brif not
203 parse_wtdc4 sta ,-y ; add the character to the output buffer
204 puls a,x,pc ; return up the call stack - C is clear from CMPA above
205 parse_wtdc5 cmpb #token_eot ; does this entry have a sub table?
206 bne parse_wtdc6 ; brif not
207 pshs a ; save the matched character
208 lda 1,s ; get back the token we need
209 bsr parse_wtdc2 ; go handle the sub table
210 puls a ; get back the matched character
211 bcc parse_wtdc6 ; brif it did match - record it and return
212 parse_wtdc6 cmpx 1,s ; are we at the end of this table?
213 bne parse_wtdc3 ; brif not - handle another table entry
214 coma ; make sure C is set for no match
215 puls a,x,pc ; clean up stack and return
216 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
217 ; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows:
218 ; parse_tokdefT <sym>,<parse>,<list>,<exec>
219 ; where:
220 ; T: c for command, f for function, p for particle
221 ; <sym>: the symbol name without the "token_" prefix
222 ; <parse>: parse handler for the type, ignored for particles
223 ; <list>: list handler for the type, ingored for particles
224 ; <exec>: execution handler for the type, ignored for particles
175 *pragmapush list 225 *pragmapush list
176 *pragma nolist 226 *pragma nolist
177 parse_toknum set 0 227 __toknump set 0
178 parse_tokdef macro noexpand 228 __toknumc set 0x40
179 \1 equ parse_toknum 229 __toknumf set 0xc0
180 parse_toknum set parse_toknum+1 230 setstr __cmdparset=""
181 fdb \2 231 setstr __cmdlistt=""
232 setstr __cmdexect=""
233 setstr __fnparset=""
234 setstr __fnlistt=""
235 setstr __fnexect=""
236 parse_tokendefp macro noexpand
237 token_\1 equ __toknump
238 __toknump set __toknump+1
239 endm
240 parse_tokendefc macro noexpand
241 token_\1 equ __toknumc
242 __toknumc set __toknumc+1
243 ifstr ne,"{2}",""
244 setstr __cmdparset="%(__cmdparset)\tfcb\ttoken_\1\n\tfdb {2}\n"
245 endc
246 ifstr ne,"{3}",""
247 setstr __cmdlistt="%(__cmdlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
248 endc
249 ifstr ne,"{4}",""
250 setstr __cmdexect="%(__cmdexect)\tfdb {3}\n"
251 else
252 setstr __cmdexect="%(__cmdexect)\tfdb SNERROR\n"
253 endc
254 endm
255 parse_tokendeff macro noexpand
256 token_\1 equ __toknumf
257 __toknumf set __toknumf+1
258 ifstr ne,"{2}",""
259 setstr __fnparset="%(__fnparset)\tfcb\ttoken_\1\n\tfdb {2}\n"
260 endc
261 ifstr ne,"{3}",""
262 setstr __fnlistt="%(__fnlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
263 endc
264 ifstr ne,"{4}",""
265 setstr __fnexect="%(__fnexect)\tfdb {3}\n"
266 else
267 setstr __fnexect="%(__fnexect)\tfdb SNERROR\n"
268 endc
269 endm
270 token_cmdparse macro
271 *pragmapush nolist
272 *pragma nolist
273 includestr "%(__cmdparset)"
274 *pragmapop nolist
275 endm
276 token_cmdlist macro
277 *pragmapush nolist
278 *pragma nolist
279 includestr "%(__cmdlistt)"
280 *pragmapop nolist
281 endm
282 token_cmdexec macro
283 *pragmapush nolist
284 *pragma nolist
285 includestr "%(__cmdexect)"
286 token__maxcmd equ __toknumc-1
287 *pragmapop nolist
288 endm
289 token_fnparse macro
290 *pragmapush nolist
291 *pragma nolist
292 includestr "%(__fnparset)"
293 *pragmapop nolist
294 endm
295 token_fnlist macro
296 *pragmapush nolist
297 *pragma nolist
298 includestr "%(__fnlistt)"
299 *pragmapop nolist
300 endm
301 token_fnexec macro
302 *pragmapush nolist
303 *pragma nolist
304 includestr "%(__fnexect)"
305 token__maxfn equ __toknumf-1
306 *pragmapop nolist
182 endm 307 endm
183 *pragmapop list 308 *pragmapop list
184 parse_stmtjump parse_tokdef token_error,parse_tokerr 309 parse_tokendefp error ; Used to mark errors; should always be first so it's token #0
185 parse_tokdef token_eot,parse_noop 310 parse_tokendefp eot ; End of input marker or special handling in word tables
186 parse_tokdef token_lt,parse_noop 311 parse_tokendefp stmtsep ; statement separator
187 parse_tokdef token_le,parse_noop 312 parse_tokendefp times ; times (multiplication) operator (*)
188 parse_tokdef token_gt,parse_noop 313 parse_tokendefp plus ; addition operator
189 parse_tokdef token_ge,parse_noop 314 parse_tokendefp divide ; division operator (/)
190 parse_tokdef token_eq,parse_noop 315 parse_tokendefp minus ; subtraction operator
191 parse_tokdef token_ne,parse_noop 316 parse_tokendefp exp ; exponentiation operator (^)
192 parse_tokdef token_reltrue,parse_noop // always true relational operator 317 parse_tokendefp lt ; less than operator
193 parse_tokdef token_stmtsep,parse_noop 318 parse_tokendefp le ; less than or equal operateor
194 parse_tokdef token_apos,parse_rem 319 parse_tokendefp gt ; greater than operator
195 parse_tokdef token_special,parse_noop 320 parse_tokendefp ge ; greater than or equal operator
196 parse_tokdef token_bang,parse_noop 321 parse_tokendefp eq ; equality operator
197 parse_tokdef token_hash,parse_noop 322 parse_tokendefp ne ; inequality operator
198 parse_tokdef token_dollar,parse_noop 323 parse_tokendefp not ; boolean NOT operator
199 parse_tokdef token_percent,parse_noop 324 parse_tokendefp and ; boolean AND operator
200 parse_tokdef token_amp,parse_noop 325 parse_tokendefp or ; boolean OR operator
201 parse_tokdef token_oparen,parse_noop 326 parse_tokendefp bang ; exclamation mark
202 parse_tokdef token_cparen,parse_noop 327 parse_tokendefp hash ; number sign
203 parse_tokdef token_star,parse_noop 328 parse_tokendefp dollar ; dollar sign (string sigil)
204 parse_tokdef token_plus,parse_noop 329 parse_tokendefp percent ; percent sign (integer sigil)
205 parse_tokdef token_comma,parse_noop 330 parse_tokendefp amp ; ampersand
206 parse_tokdef token_minus,parse_noop 331 parse_tokendefp oparen ; opening paren
207 parse_tokdef token_slash,parse_noop 332 parse_tokendefp cparen ; closing paren
208 parse_tokdef token_semi,parse_noop 333 parse_tokendefp sep ; comma (separator)
209 parse_tokdef token_at,parse_noop 334 parse_tokendefp semi ; semicolon
210 parse_tokdef token_exp,parse_noop 335 parse_tokendefp at ; @ symbol
211 parse_tokdef token_ident,parse_noop 336 parse_tokendefp ident ; identifier (has special parsing)
212 parse_tokdef token_rem,parse_noop 337 parse_tokendefp else ; ELSE
213 parse_tokdef token_return,parse_noop 338 parse_tokendefp then ; THEN
214 parse_tokdef token_run,parse_noop 339 parse_tokendefp to ; TO
215 parse_tokdef token_data,parse_noop 340 parse_tokendefp sub ; SUB
216 parse_tokdef token_else,parse_noop 341 parse_tokendefp as ; AS
217 parse_tokdef token_end,parse_noop
218 parse_tokdef token_stop,parse_noop
219 parse_tokdef token_sub,parse_noop
220 parse_tokdef token_let,parse_noop
221 parse_tokdef token_list,parse_noop
222 parse_tokdef token_new,parse_noop
223 parse_tokdef token_not,parse_noop
224 parse_tokdef token_print,parse_noop
225 parse_tokdef token_pop,parse_noop
226 parse_tokdef token_to,parse_noop
227 parse_tokdef token_and,parse_noop
228 parse_tokdef token_or,parse_noop
229 parse_tokdef token_go,parse_noop
230 parse_tokdef token_as,parse_noop
231 parse_tokdef token_asc,parse_noop
232 parse_rem rts
233 342
343 parse_tokendefc remabbr,parse_noop,list_noop,exec_noop ; abbreviated REM (')
344 parse_tokendefc rem,parse_noop,list_noop,exec_noop ; REM
345 parse_tokendefc return,parse_noop,parse_noop,parse_noop ; RETURN
346 parse_tokendefc run,parse_noop,parse_noop,parse_noop ; RUN
347 parse_tokendefc data,parse_noop,parse_noop,parse_noop ; DATA
348 parse_tokendefc end,parse_noop,parse_noop,parse_noop ; END
349 parse_tokendefc stop,parse_noop,parse_noop,parse_noop ; STOP
350 parse_tokendefc let,parse_noop,parse_noop,parse_noop ; LET
351 parse_tokendefc list,parse_noop,parse_noop,parse_noop ; LIST
352 parse_tokendefc new,parse_noop,parse_noop,parse_noop ; NEW
353 parse_tokendefc print,parse_noop,parse_noop,parse_noop ; PRINT
354 parse_tokendefc pop,parse_noop,parse_noop,parse_noop ; POP
355 parse_tokendefc goto,parse_noop,parse_noop,parse_noop ; GOTO
356 parse_tokendefc gosub,parse_noop,parse_noop,parse_noop ; GOSUB
357 parse_tokendefc go,parse_noop,parse_noop,parse_noop ; GO
358
359 parse_tokendeff asc,parse_noop,parse_noop,parse_noop ; ASC()
360 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
361 ; Parse handling tables
362 parsetab_cmd token_cmdparse
363 parsetab_cmde
364 parsetab_fn token_fnparse
365 parsetab_fne
366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
367 ; List handling tables
368 listtab_cmd token_cmdlist
369 listtab_cmde
370 listtab_fn token_fnlist
371 listtab_fne
372 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
373 ; Execution handling tables
374 exectab_cmd token_cmdexec
375 exectab_fn token_fnexec
234 *pragmapop list 376 *pragmapop list