Mercurial > hg > index.cgi
comparison src/parse.s @ 124:8770e6f977c3
Rework parser to use parse_wordtab for symbols too
There's no reason not to use the parse_wordtab table thing to match the
symbols with their token codes. It takes less space than the combined code
and tables to do it separately.
author | William Astle <lost@l-w.ca> |
---|---|
date | Mon, 01 Jan 2024 15:57:59 -0700 |
parents | 5681cdada362 |
children | 0607e4e20702 |
comparison
equal
deleted
inserted
replaced
123:5681cdada362 | 124:8770e6f977c3 |
---|---|
80 bsr parse_nextchar ; eat the space | 80 bsr parse_nextchar ; eat the space |
81 bne parse_nexttok0 ; brif not end of input | 81 bne parse_nexttok0 ; brif not end of input |
82 parse_nexttok1 ldb #token_eot ; flag end of input | 82 parse_nexttok1 ldb #token_eot ; flag end of input |
83 bra parse_nexttok6 ; go return it | 83 bra parse_nexttok6 ; go return it |
84 parse_nexttok2 sty parse_tokenst ; save start of current token after skipping spaces | 84 parse_nexttok2 sty parse_tokenst ; save start of current token after skipping spaces |
85 bsr parse_toupper ; make sure we have upper case letters for matching | |
86 ldx #parse_wt ; point to keyword parsing table | |
87 bsr parse_wordtab ; go see if we have a match in the keyword table | |
88 bcc parse_nexttok6 ; brif we do - return it | |
89 ldy parse_tokenst ; return to the start of the token - pointer probably clobbered | |
90 bsr parse_curchar ; get back input character (may have been clobbered) | |
85 cmpa #'. ; leading decimal? | 91 cmpa #'. ; leading decimal? |
86 beq parse_nexttok3 ; brif so - parse number | 92 beq parse_nexttok3 ; brif so - parse number |
87 cmpa #'0 ; is it a digit | 93 cmpa #'0 ; is it a digit |
88 blo parse_nexttok4 ; brif not | 94 blo parse_nexttok10 ; brif not |
89 cmpa #'9 ; is it still a digit? | 95 cmpa #'9 ; is it still a digit? |
90 bhi parse_nexttok4 ; brif not | 96 bhi parse_nexttok10 ; brif not |
91 parse_nexttok3 jmp parse_number ; go parse a number | 97 parse_nexttok3 jmp parse_number ; go parse a number |
92 parse_nexttok4 ldx #parse_chartab ; point to list of single character tokens to recognize | |
93 parse_nexttok5 ldb 1,x ; get token value | |
94 cmpa ,x++ ; character match (and move to next entry) | |
95 bne parse_nexttok7 ; brif not | |
96 parse_nexttok6 stb parse_curtok ; save token type | 98 parse_nexttok6 stb parse_curtok ; save token type |
97 leay 1,y ; eat the input character | 99 leay 1,y ; eat the input character |
98 clra ; clear C to indicate no error (and clear Z also) | 100 clra ; clear C to indicate no error (and clear Z also) |
99 rts | 101 rts |
100 parse_nexttok7 cmpb #token_eot ; end of table? | 102 parse_nexttok10 cmpa #'A ; is it alpha? |
101 bne parse_nexttok5 ; brif not | |
102 clrb ; initialize relational flags | |
103 pshs d ; save input character and relational flags for later | |
104 parse_nexttok8 cmpa #'< ; less than? | |
105 blo parse_nexttok9 ; brif not <, =, or > | |
106 cmpa #'> ; still <, =, or >? | |
107 bhi parse_nexttok9 ; brif not | |
108 suba #'< ; adjust < to 0 | |
109 cmpa #1 ; set C if <, clear if = or > | |
110 rola ; now 4 if >, 2 if =, or 1 if < | |
111 eora 1,s ; merge with previous relational characters | |
112 cmpa 1,s ; if it doesn't match, we have a dupe | |
113 bne parse_nexttok9 ; brif it's not valid - we won't recognize more in the token | |
114 sta 1,s ; save new relational flags | |
115 bsr parse_nextchar ; fetch next input | |
116 sta ,s ; save input character | |
117 bne parse_nexttok8 ; brif there was one - go handle it | |
118 parse_nexttok9 puls d ; get back input character and relational flag | |
119 tstb ; was it a relational operator? | |
120 beq parse_nexttok10 ; brif not | |
121 ldx #parse_reltab ; point to relational operator token table | |
122 ldb b,x ; get the token code | |
123 clra ; flag no error | |
124 rts ; return - but don't advance - we already did looking for multiples | |
125 parse_nexttok10 bsr parse_toupper ; convert to upper case | |
126 cmpa #'A ; is it alpha? | |
127 blo parse_nexttok11 ; brif not | 103 blo parse_nexttok11 ; brif not |
128 cmpa #'Z ; is it still alpha? | 104 cmpa #'Z ; is it still alpha? |
129 bls parse_nexttok12 ; brif so | 105 bls parse_nexttok12 ; brif so |
130 parse_nexttok11 comb ; flag error - unrecognized token | 106 parse_nexttok11 comb ; flag error - unrecognized token |
131 ldb #token_error | 107 ldb #token_error |
132 rts | 108 rts |
109 parse_nexttok12 bsr parse_nextcharu ; fetch next input character | |
110 cmpa #'0 ; is it alphanumeric? | |
111 blo parse_nexttok13 ; brif not | |
112 cmpa #'9 ; is it numeric? | |
113 bls parse_nexttok12 ; brif so - keep skipping it | |
114 cmpa #'A ; is it alpha? | |
115 blo parse_nexttok13 ; brif not | |
116 cmpa #'Z ; is it still alpha? | |
117 bls parse_nexttok12 ; brif so - keep skipping it | |
118 parse_nexttok13 tfr y,d ; calculate length of identifier | |
119 subd parse_tokenst | |
120 std val0+val.strlen ; save it for reference | |
121 ldb #token_ident ; indicate an identifier (variable name, etc.) | |
122 rts ; return result (C will be clear from SUBD above) | |
133 parse_nextcharu bsr parse_nextchar ; fetch next input character | 123 parse_nextcharu bsr parse_nextchar ; fetch next input character |
134 beq parse_toupper0 ; brif end of input | 124 beq parse_toupper0 ; brif end of input |
135 parse_toupper cmpa #'a ; is it lower case alpha? | 125 parse_toupper cmpa #'a ; is it lower case alpha? |
136 blo parse_toupper0 ; brif not | 126 blo parse_toupper0 ; brif not |
137 cmpa #'z ; is it still lower case alpha? | 127 cmpa #'z ; is it still lower case alpha? |
138 bhi parse_toupper0 ; brif not | 128 bhi parse_toupper0 ; brif not |
139 suba #0x20 ; adjust to upper case alpha | 129 suba #0x20 ; adjust to upper case alpha |
140 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu | 130 parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu |
141 ; We parse alpha keywords and identifiers here, of the form [a-zA-Z][a-zA-Z0-9]* with a possible nonalpha characters | |
142 ; in actual keywords. We use a table to parse keywords. As soon as we find a character that doesn't match a keyword | |
143 ; table entry, we fall back to looking for the end of an identifier and then returning that. | |
144 parse_nexttok12 ldx #parse_wordtab ; point to keyword table | |
145 bsr parse_nexttok16 ; process this table entry | |
146 cmpb #token_ident ; did we match a token? | |
147 bne parse_nexttok6 ; brif so - go return it | |
148 parse_nexttok13 cmpa #'0 ; was it alphanumeric? | |
149 blo parse_nexttok15 ; brif not | |
150 cmpa #'9 ; was it numeric? | |
151 bls parse_nexttok14 ; brif so | |
152 cmpa #'A ; was it alpha? | |
153 blo parse_nexttok15 ; brif not | |
154 cmpa #'Z ; is it still alpha? | |
155 bhi parse_nexttok15 ; brif not | |
156 parse_nexttok14 bsr parse_nextcharu ; fetch next character and force upper case | |
157 bne parse_nexttok13 ; if not end of input, see if we have alphanumeric | |
158 parse_nexttok15 tfr y,d ; fetch input location | |
159 subd parse_tokenst ; calculate length of token | |
160 std val0+val.strlen ; save the length of the identifier | |
161 ldb #token_ident ; set token type to identifier (variable name, probably) | |
162 rts ; return token type, do not advance since we already did above | |
163 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: | 131 ; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: |
164 ; | 132 ; |
165 ; * two bytes which contain the length of the table less the two bytes for this length value | 133 ; * two bytes which contain the length of the table less the two bytes for this length value |
166 ; * a sequence of entries consisting of a single byte matching character and a token code followed | 134 ; * a sequence of entries consisting of a single byte matching character and a token code followed |
167 ; by an optional sub table, structured exactly the same way. | 135 ; by an optional sub table, structured exactly the same way. |
201 bra parse_wordtab3 ; otherwise just move to the next entry | 169 bra parse_wordtab3 ; otherwise just move to the next entry |
202 parse_wordtab5 leay a,y ; move back the specified number of characters | 170 parse_wordtab5 leay a,y ; move back the specified number of characters |
203 parse_wordtab6 clra ; clear C to indicate a match | 171 parse_wordtab6 clra ; clear C to indicate a match |
204 puls a,x,pc ; clean up stack and return | 172 puls a,x,pc ; clean up stack and return |
205 parse_number jmp parse_tokerr | 173 parse_number jmp parse_tokerr |
206 ; Relational token table, bits are > = < | |
207 parse_reltab fcb token_error | |
208 fcb token_lt | |
209 fcb token_eq | |
210 fcb token_le | |
211 fcb token_gt | |
212 fcb token_ne | |
213 fcb token_ge | |
214 fcb token_reltrue | |
215 ; Single character token lookup table | |
216 parse_chartab fcb 0x21,token_bang ; ! | |
217 fcb 0x23,token_hash ; # | |
218 fcb 0x24,token_dollar ; $ | |
219 fcb 0x25,token_percent ; % | |
220 fcb 0x26,token_amp ; & | |
221 fcb 0x27,token_apos ; ' | |
222 fcb 0x28,token_oparen ; ( | |
223 fcb 0x29,token_cparen ; ) | |
224 fcb 0x2a,token_star ; * | |
225 fcb 0x2b,token_plus ; + | |
226 fcb 0x2c,token_comma ; , | |
227 fcb 0x2d,token_minus ; - | |
228 fcb 0x2f,token_slash ; / | |
229 fcb 0x3a,token_stmtsep ; : | |
230 fcb 0x3b,token_semi ; ; | |
231 fcb 0x3f,token_print ; ? - print shortcut | |
232 fcb 0x40,token_at ; @ | |
233 fcb 0x5e,token_exp ; ^ - exponentiation | |
234 fcb 0x00,token_eot ; end of table flag | |
235 ; Parse tokens - define them in order using the macro parse_tokdef | 174 ; Parse tokens - define them in order using the macro parse_tokdef |
236 *pragmapush list | 175 *pragmapush list |
237 *pragma nolist | 176 *pragma nolist |
238 parse_toknum set 0 | 177 parse_toknum set 0 |
239 parse_tokdef macro noexpand | 178 parse_tokdef macro noexpand |