comparison src/parse.s @ 139:5d4801c0566d

Get things building again with the updated tokenization scheme
author William Astle <lost@l-w.ca>
date Mon, 15 Jul 2024 23:26:15 -0600
parents 917b4893bb3d
children 86f6f3a71e60
comparison
equal deleted inserted replaced
138:4983ba49f936 139:5d4801c0566d
1 *pragmapush list 1 *pragmapush list
2 *pragma list 2 *pragma list
3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4 ; This is the overall parsing package. This is responsible for converting program text into the internal byte code and 4 ; This is the overall parsing package. It is responsible for converting the input source code into the internal byte
5 ; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated 5 ; code.
6 ; code analysis. In almost all cases, the returned error will be a syntax error. The internal byte code shares the same 6 ;
7 ; token number allocations as the parser. Some allocated tokens cannot be identified by the lexer (parse_nexttok) but 7 ; This version only converts keywords to token codes. Additional conversions will be done in future versions.
8 ; are used at runtime and when "decompiling" to text. 8 ;
9 ; 9 ; Enter with X pointing to the text to parse. The encoded result will be placed freestart. On return, X will point to
10 ; In the event of a parse error, everything up to the next end of statement is retained as is using a special token 10 ; the encoded result and D will contain the length in bytes of the result, and C will be clear.
11 ; that preserves the unparsable text and parsing resumes. Only the first error is referenced by the return error 11 ;
12 ; pointer. 12 ; In the event that there is insufficient memory between freestart and the bottom of the stack, C will be set. This
13 ; 13 ; routine does not immediately throw an "out of memory" error to allow the caller to clear up some memory and try
14 ; This is a recursive descent parser. 14 ; again.
15 ; 15 ;
16 ; Entry: 16 ; Enter at parseto with U set to the encoding destination and Y set to one byte past the end of the destination buffer
17 ; X Points to the text to encode 17 ; to specify the destination. Defaults to encoding to the buffer between freestart and the bottom of the stack (with
18 ; B Nonzero to prevent generating any output (error check/length calculation only) 18 ; headroom accounted for).
19 ; 19 ;
20 ; Exit: 20 ; The stuff below that has hard coded colon checks will eventually be replaced by more complete parsing.
21 ; X Points to the encoded line 21 parse ldu freestart ; default to the start of free memory for encoding
22 ; D Length of the encoded line 22 leay -stackheadroom,s ; set the top of free memory
23 ; CC.C clear 23 parseto lda #1 ; flag to enable memory limit detection
24 24 pshs a,u,y ; save start and end addresses and OM error detection flag
25 ; Error Exit: 25 leay ,x ; put the input pointer somewhere less useful
26 ; X Points to the encoded line 26 parsea jsr parse_curchar ; fetch an input character
27 ; D Length of the encoded line 27 bne parseb ; brif not end of input
28 ; Y Pointer to the first error location in the input 28 parsez tfr u,d ; get current output pointer
29 ; U Error code 29 subd 3,s ; now D is the length
30 ; CC.C set 30 leas 5,s ; clean up the stack
31 ; 31 rts ; return - C will be clear from subd above
32 ; This is the error handler. It is responsible for resetting the stack to bail out to the top level 32 parseb jsr parse_wordtab ; look up a keyword and see if we have a match
33 ; parsing loop. It must also store the input pointer if this is the first error. Finally, it has to 33 bcs parsec ; brif no match - handle unknown stuff
34 ; output all the text up to either the end of the line *or* the next valid statement separator. 34 tsta ; do we have a two byte token?
35 parse_errorsn ldb #err_sn 35 bne parseq ; brif so - just stash it
36 parse_error lds parse_stackptr ; restore the original stack pointer so we can call from down stack 36 cmpb #token_else ; ELSE?
37 puls u ; get back original free pointer 37 beq parsed ; brif so - gets a hidden statement separator
38 stu freestart ; deallocate any allocated result 38 cmpb #token_remabbr ; REM abbreviation?
39 ldu parse_tokenst ; get start location of the token where the error was raised 39 bne parsee ; brif not
40 coma ; make sure C is set for error 40 parsed lda #': ; add a statement separator before it
41 rts 41 parseq bsr parseoutw ; output a word
42 parse stb parse_noout ; save no-output flag 42 bra parsef
43 leay ,x ; save input pointer in a less useful register 43 parsee bsr parseout ; output the token code
44 ldu freestart ; point to start of free memory where we will build the output 44 parsef cmpb #token_remabbr ; REM abbreviation?
45 pshs u ; save original free memory location 45 beq parseg ; brif so
46 sts parse_stackptr ; save the stack pointer for bailing out on errors 46 cmpb #token_rem ; Actual REM?
47 parse_nextstmt jsr parse_nexttok ; fetch the next token, return type in D 47 bne parseh ; brif not
48 bcs parse_error ; brif we failed at parsing a token 48 parseg ldb ,y+ ; get current input character
49 parse0 ldx #parsetab_cmd ; point to jump table for token type handler 49 beq parsez ; brif end of input
50 cmpb #token_stmtsep ; is it a statement separator? 50 bsr parseout ; add unmodified characters to output
51 beq parse_nextstmt ; brif so - we can just skip it 51 bra parseg ; keep going until end of input
52 parse1 cmpb ,x ; did we match a valid command token? 52 parseh cmpb #token_data ; DATA command?
53 beq parse3 ; brif so 53 bne parsea ; brif not - continue normal handling
54 leax 3,x ; move to next entry 54 clra ; flag for not skipping quoted string
55 cmpx #parsetab_cmde ; end of table? 55 parsei ldb ,y+ ; get input character
56 blo parse1 ; brif not 56 beq parsez ; brif end of input
57 bra parse_errorsn ; fell off the end 57 cmpb #'" ; string delimiter?
58 parse3 jsr [1,x] ; call the handler 58 bne parsej ; brif not
59 bcs parse_error ; brif the handler indicated error 59 coma ; flip the quoted statement handler
60 bsr parse_curtoken ; fetch the token we left off on 60 parsej cmpb #': ; end of statement?
61 cmpb #token_eot ; end of input? 61 bne parsek ; brif not
62 bne parse4 ; brif not 62 tsta ; are we skipping them?
63 ldb #bc_eol ; stash an end of line op 63 bne parsek ; brif so
64 bsr parse_write 64 leay -1,y ; unconsume it
65 bcs parse_error ; brif we errored out writing to the result (OM?) 65 bra parsea ; we're done with DATA
66 tfr u,d ; calculate the length of the result 66 parsek bsr parseout ; put the data value into the output
67 subd ,s 67 bra parsei ; go handle another character
68 puls u,pc ; get pointer to start of encoded result and return (C is already clear) 68 parsec cmpb #'" ; did we encounter a quoted string?
69 parse4 cmpb #token_stmtsep ; statement separator? 69 bne parsel ; brif not
70 beq parse_nextstmt ; brif so - do another statement 70 bsr parseout ; output delimiter
71 cmpb #token_remabbr ; ' token? 71 parsem ldb ,y+ ; get string character
72 beq parse0 ; brif so - parse it as a new statement 72 beq parsez ; brif end of input
73 bra parse_errorsn ; raise a syntax error 73 bsr parseout ; output it
74 parse_write lda parse_noout ; are we doing output? 74 cmpb #'" ; end delimiter?
75 beq parse_write0 ; brif so 75 bne parsem ; brif not - keep looking
76 leau 1,u ; just count up the output and don't do anything 76 bra parsea ; go handle more stuff
77 rts 77 parsep cmpb #'0 ; is it a digit?
78 parse_write0 leax -stackheadroom,s ; calculate bottom of stack with headroom 78 blo parsen ; brif not
79 cmpx freestart ; did the stack run into the end of the output? 79 cmpb #'9 ; is it still a digit?
80 bhs parse_write1 ; brif not - we're good 80 bls parseo ; brif so
81 ldb #err_om ; raise out of memory error, C already set from comparison 81 parsel cmpb #'A ; is it a letter?
82 rts 82 blo parsen ; brif not
83 parse_write1 stb ,u+ ; save output byte 83 cmpb #'Z ; is it still a letter (UC)?
84 stu freestart ; save new to of used memory 84 bls parseo ; brif so
85 list_noop 85 cmpb #'a ; is it a lower case letter?
86 parse_noop rts ; return all clear - C clear from comparison above 86 blo parsen ; brif not
87 parse_curtoken ldb parse_curtok ; fetch token code of current token 87 cmpb #'z ; is it still a lower case letter?
88 rts 88 bhi parsen ; brif not
89 parse_tokerr comb ; flag error - unexpected token 89 parseo bsr parseout ; stash the character
90 ldb #err_sn ; raise syntax error 90 ldb ,y+ ; fetch next input
91 beq parsez ; brif end of input
92 bra parsep ; go see if we're still in an identifier
93 parsen bsr parseout ; output unknown character (number, unknown token)
94 jmp parsea ; go handle more
95 parseoutw exg a,b ; do MSB
96 bsr parseout
97 exg a,b ; and then LSB (fall through)
98 parseout tst 2,s ; need to test for OM?
99 beq parseout0 ; brif not
100 cmpu 3,s ; did we run into the end of the buffer?
101 blo parseout0 ; brif not
102 coma ; set C for error
103 leas 7,s ; clean up stack
104 rts ; return to original caller
105 parseout0 stb ,u+ ; stash in buffer
91 rts 106 rts
92 parse_nextchar lda ,y ; at end of input already? 107 parse_nextchar lda ,y ; at end of input already?
93 beq parse_curchar ; brif so 108 beq parse_curchar ; brif so
94 leay 1,y ; move to next input character 109 leay 1,y ; move to next input character
95 parse_curchar lda ,y ; fetch input character 110 parse_curchar lda ,y ; fetch input character
96 rts 111 rts
97 parse_nexttokc bsr parse_nexttok ; fetch next token
98 parse_iseos cmpb #token_eot ; end of text?
99 beq parse_iseos0 ; brif so
100 cmpb #token_stmtsep ; is it a statement separator
101 parse_iseos0 rts
102 parse_nexttok bsr parse_curchar ; fetch current input
103 beq parse_nexttok1 ; brif end of input
104 parse_nexttok0 cmpa #0x20 ; space?
105 bne parse_nexttok2 ; brif not
106 bsr parse_nextchar ; eat the space
107 bne parse_nexttok0 ; brif not end of input
108 parse_nexttok1 ldb #token_eot ; flag end of input
109 bra parse_nexttok6 ; go return it
110 parse_nexttok2 sty parse_tokenst ; save start of current token after skipping spaces
111 bsr parse_toupper ; make sure we have upper case letters for matching
112 ldx #parse_wt ; point to keyword parsing table
113 jsr parse_wordtab ; go see if we have a match in the keyword table
114 bcc parse_nexttok6 ; brif we do - return it
115 ldy parse_tokenst ; return to the start of the token - pointer probably clobbered
116 bsr parse_curchar ; get back input character (may have been clobbered)
117 cmpa #'. ; leading decimal?
118 beq parse_nexttok3 ; brif so - parse number
119 cmpa #'0 ; is it a digit
120 blo parse_nexttok10 ; brif not
121 cmpa #'9 ; is it still a digit?
122 bhi parse_nexttok10 ; brif not
123 parse_nexttok3 jmp parse_number ; go parse a number
124 parse_nexttok6 stb parse_curtok ; save token type
125 leay 1,y ; eat the input character
126 clra ; clear C to indicate no error (and clear Z also)
127 rts
128 parse_nexttok10 cmpa #'A ; is it alpha?
129 blo parse_nexttok11 ; brif not
130 cmpa #'Z ; is it still alpha?
131 bls parse_nexttok12 ; brif so
132 parse_nexttok11 comb ; flag error - unrecognized token
133 ldb #token_error
134 rts
135 parse_nexttok12 bsr parse_nextcharu ; fetch next input character
136 cmpa #'0 ; is it alphanumeric?
137 blo parse_nexttok13 ; brif not
138 cmpa #'9 ; is it numeric?
139 bls parse_nexttok12 ; brif so - keep skipping it
140 cmpa #'A ; is it alpha?
141 blo parse_nexttok13 ; brif not
142 cmpa #'Z ; is it still alpha?
143 bls parse_nexttok12 ; brif so - keep skipping it
144 parse_nexttok13 tfr y,d ; calculate length of identifier
145 subd parse_tokenst
146 std val0+val.strlen ; save it for reference
147 ldb #token_ident ; indicate an identifier (variable name, etc.)
148 rts ; return result (C will be clear from SUBD above)
149 parse_nextcharu bsr parse_nextchar ; fetch next input character 112 parse_nextcharu bsr parse_nextchar ; fetch next input character
150 beq parse_toupper0 ; brif end of input 113 beq parse_toupper0 ; brif end of input
151 parse_toupper cmpa #'a ; is it lower case alpha? 114 parse_toupper cmpa #'a ; is it lower case alpha?
152 blo parse_toupper0 ; brif not 115 blo parse_toupper0 ; brif not
153 cmpa #'z ; is it still lower case alpha? 116 cmpa #'z ; is it still lower case alpha?
154 bhi parse_toupper0 ; brif not 117 bhi parse_toupper0 ; brif not
155 suba #0x20 ; adjust to upper case alpha 118 suba #0x20 ; adjust to upper case alpha
156 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu 119 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu
157 parse_number jmp parse_tokerr 120 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 121 ; This routine parses tokens using the table at parse_wt. The table is structured as follows:
159 ; Parse a statement that consists of just the command token
160 parse_cmdsingle equ parse_write ; just write the token out and bail
161 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
162 ; Parse a REM or ' statement. We just copy the comment out after the REM or ' token.
163 parse_rem jsr parse_write ; write the token/character out
164 ldb ,y+ ; get next input character
165 bne parse_rem ; brif not at the end of the input
166 ldb #token_eot ; flag end of input for mainline parser
167 stb parse_curtok
168 rts ; return, pass back the C result from parse_write
169 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
170 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows:
171 ; 122 ;
172 ; * two bytes which contain the length of the table less the two bytes for this length value 123 ; * two bytes which contain the length of the table less the two bytes for this length value
173 ; * a sequence of entries consisting of a single byte matching character and a token code followed 124 ; * a sequence of entries consisting of a single byte matching character and a token code followed
174 ; by an optional sub table, structured exactly the same way. 125 ; by an optional sub table, structured exactly the same way. The token code is 2 bytes.
175 ; 126 ;
176 ; The optional subtable will be present if the token code is token_eot 127 ; The optional subtable will be present if the token code is token_eot
177 ; 128 ;
178 ; If the character match is negative, it means a lookahead failed. The negative value is the number 129 ; If the character match is negative, it means a lookahead failed. The negative value is the number
179 ; of characters to unget and the token code is the token value to return. No other entries after this 130 ; of characters to unget and the token code is the token value to return. No other entries after this
180 ; in a table will be considered since thie negative match is a global match. 131 ; in a table will be considered since thie negative match is a global match.
181 ; 132 ;
182 ; When a token_eot match is found, if there are no further characters in the input, the match is 133 ; When a token_eot match is found, if there are no further characters in the input, the match is
183 ; determined to be invalid and processing continues with the next entry. 134 ; determined to be invalid and processing continues with the next entry.
135 parse_wordtab ldx #parse_wt ; point to main lookup table
136 skip2 ; move on into the main routine
184 parse_wordtab0 leas 3,s ; clean up stack for sub table handling 137 parse_wordtab0 leas 3,s ; clean up stack for sub table handling
185 parse_wordtab pshs a,x ; save input character and start of table 138 pshs a,x ; save input character and start of table
186 ldd ,x++ ; get length of this table 139 ldd ,x++ ; get length of this table
187 addd 1,s ; calculate the address of the end of the table 140 addd 1,s ; calculate the address of the end of the table
188 std 1,s ; save end address for comparison later 141 std 1,s ; save end address for comparison later
189 lda ,s ; get back input character 142 lda ,s ; get back input character
190 parse_wordtab1 ldb 1,x ; fetch token code for this entry 143 parse_wordtab1 leax 3,x ; move past this entry - this order to avoid Z effects from leax
191 cmpa ,x++ ; does this entry match? 144 cmpa -3,x ; does this entry match?
192 bne parse_wordtab4 ; brif not 145 bne parse_wordtab4 ; brif not
193 cmpb #token_eot ; is it indicating a sub table? 146 ldd -2,x ; get the matched token code
147 cmpd #tokenf_eot ; is it indicating a sub table?
194 bne parse_wordtab6 ; brif not 148 bne parse_wordtab6 ; brif not
195 jsr parse_nextcharu ; fetch next input character (for sub table match) 149 jsr parse_nextcharu ; fetch next input character (for sub table match)
196 bne parse_wordtab0 ; brif we are going to check the sub table 150 bne parse_wordtab0 ; brif we are going to check the sub table
197 parse_wordtab2 ldd ,x ; fetch length of sub table 151 parse_wordtab2 ldd ,x ; fetch length of sub table
198 leax d,x ; move past sub table 152 leax d,x ; move past sub table
199 parse_wordtab3 lda ,s ; get back input character 153 parse_wordtab3 lda ,s ; get back input character
200 cmpx 1,s ; are we at the end of the table? 154 cmpx 1,s ; are we at the end of the table?
201 blo parse_wordtab1 ; brif not - check another entry 155 blo parse_wordtab1 ; brif not - check another entry
202 comb ; indicate no match 156 comb ; indicate no match
203 puls a,x,pc ; clean up stack and return 157 puls a,x,pc ; clean up stack and return
204 parse_wordtab4 lda -2,x ; get the match character 158 parse_wordtab4 lda -3,x ; get the match character
205 bmi parse_wordtab5 ; brif negative - lookahead fail 159 bmi parse_wordtab5 ; brif negative - lookahead fail
206 cmpb #token_eot ; is there a sub table to skip? 160 ldd -2,x ; get the token match
161 cmpd #tokenf_eot ; is there a sub table to skip?
207 beq parse_wordtab2 ; brif so - skip sub table 162 beq parse_wordtab2 ; brif so - skip sub table
208 bra parse_wordtab3 ; otherwise just move to the next entry 163 bra parse_wordtab3 ; otherwise just move to the next entry
209 parse_wordtab5 leay a,y ; move back the specified number of characters 164 parse_wordtab5 leay a,y ; move back the specified number of characters
210 parse_wordtab6 clra ; clear C to indicate a match 165 ldd -2,x ; get the matched token
211 puls a,x,pc ; clean up stack and return 166 parse_wordtab6 sta ,s ; save MSB of match
167 clra ; clear carry to indicate match
168 puls a,x,pc ; clean up stack, restore return value and return
212 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 169 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
213 ; Convert a token number back to its keyword. This will use the same table used by parse_wordtab. Enter with a character 170 ; Convert a token number back to its keyword. This will use the same table used by parse_wordtab. Enter with a character
214 ; output routine pointer in U which takes the character in A. The routine can assume that Y is preserved. Will return 171 ; output routine pointer in U which takes the character in A. The routine can assume that Y is preserved. Will return
215 ; with C set if the token does not exist in the word table and clear otherwise. 172 ; with C set if the token does not exist in the word table and clear otherwise.
216 parse_wtdc pshs u ; save routine pointer 173 parse_wtdc pshs u ; save routine pointer
245 parse_wtdc6 cmpx 1,s ; are we at the end of this table? 202 parse_wtdc6 cmpx 1,s ; are we at the end of this table?
246 bne parse_wtdc3 ; brif not - handle another table entry 203 bne parse_wtdc3 ; brif not - handle another table entry
247 coma ; make sure C is set for no match 204 coma ; make sure C is set for no match
248 puls a,x,pc ; clean up stack and return 205 puls a,x,pc ; clean up stack and return
249 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 206 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
250 ; Validate a line number. Must enter with the token type in B. Will return the line number in X. It will return a 207 ; Definition of tokens used in the interpreter.
251 ; syntax error if the line number is invalid or out of range. It will also consume a valid line number token. 208 ;
252 parse_linenum cmpb #token_int32 ; is it an integer? 209 ; Each token is defined as follows:
253 beq parse_linenum1 ; brif so 210 ; parse_tokdefT <sym>[,<handler>]
254 parse_linenum0 ldb #err_sn ; flag syntax error 211 ; where T is one of:
255 coma ; flag error 212 ; p: particle - utility tokens and definitions, starting at 0x00
256 rts 213 ; c: command - a command keyword, starting at 0x80
257 parse_linenum1 ldx val0+val.int ; get high word of integer 214 ; f: function - a function keyword, start at 0x80 with a 0xFF prefix
258 bne parse_linenum0 ; brif not a valid line number 215 ; n: token width specific number/code, but otherwise a particle; in this case, the code replaces <handler>
259 ldx val0+val.int+2 ; get actual line number 216 ;
260 pshs x ; save it 217 ; <sym> is the base symbol name (such as "then" or "eot")
261 jsr parse_nexttok ; consume line number 218 ; <handler> is the address of the execution handler routine of the natural token type (command or function)
262 puls x,pc ; get back line number and return it 219 ;
263 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 220 ; <handler> is optional for particles. If it is omitted for command or function tokens, it defaults to SNERROR.
264 ; Parse a line number range which is one of the following forms:
265 ; <linenum1>
266 ; <linenum1>-
267 ; <linenum1>-<linenum2>
268 ; -<linenum2>
269 ; The result will store two line numbers. If no - token appears, then both line numbers will be the same. Otherwise,
270 ; if <linenum1> is omitted, it will be assumed to be 0. If <linenum2> is omitted, it will be assumed to be 65535. Those
271 ; are the minimum and maximum line numbers.
272 ;
273 ; Parsing works by first looking for an integer token that is in range. If it finds one, it looks for an optional -
274 ; followed by an optional integer token that is in range. If the first token is not an integer, it must be a - which may
275 ; be optionally followed by another integer in range.
276 ;
277 ; It is technically valid to have a single - with no line numbers.
278 ;
279 ; Enter with the current token in B.
280 ;
281 ; The resulting line numbers will be returned in parse_buff
282 parse_linerange ldx zero ; default start line number
283 leau -1,x ; default end line number
284 pshs x,u ; save the return range
285 cmpb #token_minus ; range with no start?
286 beq parse_linerang1 ; brif so
287 bsr parse_linenum ; verify line number, return in X
288 bcs parse_linerang4 ; bail out on error
289 stx ,s ; save new start line number
290 jsr parse_nexttokc ; fetch next token, set Z if end of statement
291 bne parse_linerang0 ; brif not end of line
292 ldx ,s ; get end line to use as start line
293 bra parse_linerang2 ; go set range end and return
294 parse_linerang0 cmpb #token_minus ; do we have a range character?
295 bne parse_linerang3 ; brif not - we have an error
296 parse_linerang1 jsr parse_nexttokc ; parse what comes after the range mark
297 beq parse_linerang2 ; brif end of statement - use the default range end
298 bsr parse_linenum ; make sure it's a valid line number
299 bcs parse_linerang4 ; bail out on error
300 parse_linerang2 stx 2,s ; set range end
301 clra ; make sure C is clear
302 puls x,u,pc ; fetch return values and return
303 parse_linerang3 ldb #err_sn ; flag a syntax error
304 coma ; make sure C is set
305 parse_linerang4 puls x,u,pc ; clean up stack and return error condition
306 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
307 ; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows:
308 ; parse_tokdefT <sym>,<parse>,<list>,<exec>
309 ; where:
310 ; T: c for command, f for function, p for particle
311 ; <sym>: the symbol name without the "token_" prefix
312 ; <parse>: parse handler for the type, ignored for particles
313 ; <list>: list handler for the type, ingored for particles
314 ; <exec>: execution handler for the type, ignored for particles
315 *pragmapush list 221 *pragmapush list
316 *pragma nolist 222 *pragma nolist
317 __toknump set 0 223 __toknump set 0
318 __toknumc set 0x40 224 __toknumc set 0x80
319 __toknumf set 0xc0 225 __toknumf set 0x80
320 setstr __cmdparset=""
321 setstr __cmdlistt=""
322 setstr __cmdexect=""
323 setstr __fnparset=""
324 setstr __fnlistt=""
325 setstr __fnexect=""
326 parse_tokendefp macro noexpand 226 parse_tokendefp macro noexpand
327 token_\1 equ __toknump 227 token_\1 equ __toknump
228 tokenf_\1 equ __toknump
328 __toknump set __toknump+1 229 __toknump set __toknump+1
329 endm 230 endm
231 parse_tokendefv macro noexpand
232 token_\1 equ \2
233 tokenf_\1 equ \2
234 endm
235 setstr __cmdexect=""
236 setstr __funcexect=""
330 parse_tokendefc macro noexpand 237 parse_tokendefc macro noexpand
331 token_\1 equ __toknumc 238 token_\1 equ __toknumc
239 tokenf_\1 equ __toknumc
332 __toknumc set __toknumc+1 240 __toknumc set __toknumc+1
333 ifstr ne,"{2}","" 241 ifstr ne,"{2}",""
334 setstr __cmdparset="%(__cmdparset)\tfcb\ttoken_\1\n\tfdb {2}\n" 242 setstr __cmdexect="%(__cmdexect)\tfdb {2}\n"
335 endc
336 ifstr ne,"{3}",""
337 setstr __cmdlistt="%(__cmdlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
338 endc
339 ifstr ne,"{4}",""
340 setstr __cmdexect="%(__cmdexect)\tfdb {3}\n"
341 else 243 else
342 setstr __cmdexect="%(__cmdexect)\tfdb SNERROR\n" 244 setstr __cmdexect="%(__cmdexect)\tfdb SNERROR\n"
343 endc 245 endc
344 endm 246 endm
345 parse_tokendeff macro noexpand 247 parse_tokendeff macro noexpand
346 token_\1 equ __toknumf 248 token_\1 equ __toknumf
249 tokenf_\1 equ 0xff00|__toknumf
347 __toknumf set __toknumf+1 250 __toknumf set __toknumf+1
348 ifstr ne,"{2}","" 251 ifstr ne,"{2}",""
349 setstr __fnparset="%(__fnparset)\tfcb\ttoken_\1\n\tfdb {2}\n" 252 setstr __fnexect="%(__fnexect)\tfdb {2}\n"
350 endc
351 ifstr ne,"{3}",""
352 setstr __fnlistt="%(__fnlistt)\tfcb\ttoken_\1\n\tfdb {3}\n"
353 endc
354 ifstr ne,"{4}",""
355 setstr __fnexect="%(__fnexect)\tfdb {3}\n"
356 else 253 else
357 setstr __fnexect="%(__fnexect)\tfdb SNERROR\n" 254 setstr __fnexect="%(__fnexect)\tfdb SNERROR\n"
358 endc 255 endc
359 endm
360 token_cmdparse macro
361 *pragmapush nolist
362 *pragma nolist
363 includestr "%(__cmdparset)"
364 *pragmapop nolist
365 endm
366 token_cmdlist macro
367 *pragmapush nolist
368 *pragma nolist
369 includestr "%(__cmdlistt)"
370 *pragmapop nolist
371 endm 256 endm
372 token_cmdexec macro 257 token_cmdexec macro
373 *pragmapush nolist 258 *pragmapush nolist
374 *pragma nolist 259 *pragma nolist
375 includestr "%(__cmdexect)" 260 includestr "%(__cmdexect)"
376 token__maxcmd equ __toknumc-1 261 token__maxcmd equ __toknumc-1
377 *pragmapop nolist 262 *pragmapop nolist
378 endm 263 endm
379 token_fnparse macro
380 *pragmapush nolist
381 *pragma nolist
382 includestr "%(__fnparset)"
383 *pragmapop nolist
384 endm
385 token_fnlist macro
386 *pragmapush nolist
387 *pragma nolist
388 includestr "%(__fnlistt)"
389 *pragmapop nolist
390 endm
391 token_fnexec macro 264 token_fnexec macro
392 *pragmapush nolist 265 *pragmapush nolist
393 *pragma nolist 266 *pragma nolist
394 includestr "%(__fnexect)" 267 includestr "%(__fnexect)"
395 token__maxfn equ __toknumf-1 268 token__maxfn equ __toknumf-1
396 *pragmapop nolist 269 *pragmapop nolist
397 endm 270 endm
398 *pragmapop list 271 *pragmapop list
399 ; the tokens defined in this section all have special parsing or meaning 272 ; special tokens
400 parse_tokendefp error ; Used to mark errors; should always be first so it's token #0 273 parse_tokendefp error ; Used to mark errors; should always be first so it's token #0
401 parse_tokendefp eot ; End of input marker or special handling in word tables 274 parse_tokendefp eot ; End of input marker or special handling in word tables
402 parse_tokendefp int32 ; 32 bit integer (has special parsing) 275 ; command (and simple non-command keywords)
403 parse_tokendefp float ; floating point value (has special parsing) 276 parse_tokendefc remabbr ; abbreviated REM (')
404 parse_tokendefp ident ; identifier (has special parsing) 277 parse_tokendefc rem ; REM
405 parse_tokendefp linenum ; a 16 bit unsigned integer treated as a line number 278 parse_tokendefc return ; RETURN
406 parse_tokendefp linerange ; a pair of 16 bit unsigned integers treated as line numbers 279 parse_tokendefc run ; RUN
407 ; everything below here references keywords or particle characters 280 parse_tokendefc data ; DATA
408 parse_tokendefp stmtsep ; statement separator 281 parse_tokendefc end ; END
409 parse_tokendefp times ; times (multiplication) operator (*) 282 parse_tokendefc stop ; STOP
410 parse_tokendefp plus ; addition operator 283 parse_tokendefc let ; LET
411 parse_tokendefp divide ; division operator (/) 284 parse_tokendefc list ; LIST
412 parse_tokendefp minus ; subtraction operator 285 parse_tokendefc new ; NEW
413 parse_tokendefp exp ; exponentiation operator (^) 286 parse_tokendefc print ; PRINT
414 parse_tokendefp lt ; less than operator 287 parse_tokendefc pop ; POP
415 parse_tokendefp le ; less than or equal operateor 288 parse_tokendefc goto ; GOTO
416 parse_tokendefp gt ; greater than operator 289 parse_tokendefc gosub ; GOSUB
417 parse_tokendefp ge ; greater than or equal operator 290 parse_tokendefc go ; GO
418 parse_tokendefp eq ; equality operator 291 parse_tokendefc times ; times (multiplication) operator (*)
419 parse_tokendefp ne ; inequality operator 292 parse_tokendefc plus ; addition operator
420 parse_tokendefp not ; boolean NOT operator 293 parse_tokendefc divide ; division operator (/)
421 parse_tokendefp and ; boolean AND operator 294 parse_tokendefc minus ; subtraction operator
422 parse_tokendefp or ; boolean OR operator 295 parse_tokendefc exp ; exponentiation operator (^)
423 parse_tokendefp bang ; exclamation mark 296 parse_tokendefc lt ; less than operator
424 parse_tokendefp hash ; number sign 297 parse_tokendefc le ; less than or equal operateor
425 parse_tokendefp dollar ; dollar sign (string sigil) 298 parse_tokendefc gt ; greater than operator
426 parse_tokendefp percent ; percent sign (integer sigil) 299 parse_tokendefc ge ; greater than or equal operator
427 parse_tokendefp amp ; ampersand 300 parse_tokendefc eq ; equality operator
428 parse_tokendefp oparen ; opening paren 301 parse_tokendefc ne ; inequality operator
429 parse_tokendefp cparen ; closing paren 302 parse_tokendefc not ; boolean NOT operator
430 parse_tokendefp sep ; comma (separator) 303 parse_tokendefc and ; boolean AND operator
431 parse_tokendefp semi ; semicolon 304 parse_tokendefc or ; boolean OR operator
432 parse_tokendefp at ; @ symbol 305 parse_tokendefc else ; ELSE
433 parse_tokendefp else ; ELSE 306 parse_tokendefc then ; THEN
434 parse_tokendefp then ; THEN 307 parse_tokendefc to ; TO
435 parse_tokendefp to ; TO 308 parse_tokendefc sub ; SUB
436 parse_tokendefp sub ; SUB 309 parse_tokendefc as ; AS
437 parse_tokendefp as ; AS 310 ; secondary tokens (functions)
438 311 parse_tokendeff asc ; ASC()
439 parse_tokendefc remabbr,parse_rem,list_noop,exec_noop ; abbreviated REM (')
440 parse_tokendefc rem,parse_rem,list_noop,exec_noop ; REM
441 parse_tokendefc return,parse_cmdsingle,parse_noop,parse_noop ; RETURN
442 parse_tokendefc run,parse_noop,parse_noop,parse_noop ; RUN
443 parse_tokendefc data,parse_noop,parse_noop,parse_noop ; DATA
444 parse_tokendefc end,parse_cmdsingle,parse_noop,parse_noop ; END
445 parse_tokendefc stop,parse_cmdsingle,parse_noop,parse_noop ; STOP
446 parse_tokendefc let,parse_noop,parse_noop,parse_noop ; LET
447 parse_tokendefc list,parse_noop,parse_noop,parse_noop ; LIST
448 parse_tokendefc new,parse_cmdsingle,parse_noop,parse_noop ; NEW
449 parse_tokendefc print,parse_noop,parse_noop,parse_noop ; PRINT
450 parse_tokendefc pop,parse_cmdsingle,parse_noop,parse_noop ; POP
451 parse_tokendefc goto,parse_noop,parse_noop,parse_noop ; GOTO
452 parse_tokendefc gosub,parse_noop,parse_noop,parse_noop ; GOSUB
453 parse_tokendefc go,parse_noop,parse_noop,parse_noop ; GO
454
455 parse_tokendeff asc,parse_noop,parse_noop,parse_noop ; ASC()
456 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
457 ; Parse handling tables
458 parsetab_cmd token_cmdparse
459 parsetab_cmde
460 parsetab_fn token_fnparse
461 parsetab_fne
462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
463 ; List handling tables
464 listtab_cmd token_cmdlist
465 listtab_cmde
466 listtab_fn token_fnlist
467 listtab_fne
468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
469 ; Execution handling tables 313 ; Execution handling tables
470 exectab_cmd token_cmdexec 314 exectab_cmd token_cmdexec
471 exectab_fn token_fnexec 315 exectab_fn token_fnexec
316
472 *pragmapop list 317 *pragmapop list