# HG changeset patch # User William Astle # Date 1721107575 21600 # Node ID 5d4801c0566d4b9d654501b4adb105c066334bce # Parent 4983ba49f936a12048126ff7cbfbd503c4bc3e5d Get things building again with the updated tokenization scheme diff -r 4983ba49f936 -r 5d4801c0566d src/buildkeywordtab.c --- a/src/buildkeywordtab.c Sat Jul 13 17:33:01 2024 -0600 +++ b/src/buildkeywordtab.c Mon Jul 15 23:26:15 2024 -0600 @@ -34,7 +34,7 @@ // if there are child nodes, insert the sub tree if (tn1 -> firstchild) { - fprintf(fp, " fcb 0x%02x,token_eot\n", tn1 -> ccode); + fprintf(fp, " fcb 0x%02x\n fdb token_eot\n", tn1 -> ccode); if (tn1 -> toksym) { print_tree(fp, tn1, tn1 -> toksym, 255); @@ -54,13 +54,13 @@ // if there is also a terminal symbol here if (tn1 -> toksym) { - fprintf(fp, " fcb 0x%02x,%s\n", tn1 -> ccode, tn1 -> toksym); + fprintf(fp, " fcb 0x%02x\n fdb %s\n", tn1 -> ccode, tn1 -> toksym); } } // handle lookahead failure if (lookahead) { - fprintf(fp, " fcb 0x%02x,%s\n", lookaheaddepth, lookahead); + fprintf(fp, " fcb 0x%02x\n fdb %s\n", lookaheaddepth, lookahead); } fprintf(fp, "parse_wt%de\n", depth); diff -r 4983ba49f936 -r 5d4801c0566d src/keywordlist.txt --- a/src/keywordlist.txt Sat Jul 13 17:33:01 2024 -0600 +++ b/src/keywordlist.txt Mon Jul 15 23:26:15 2024 -0600 @@ -1,49 +1,38 @@ -! token_bang -# token_hash -$ token_dollar -% token_percent -& token_amp -' token_remabbr -( token_oparen -) token_cparen -* token_times -+ token_plus -, token_sep -- token_minus -/ token_divide -: token_stmtsep -; token_semi -? token_print -@ token_at -^ token_exp -< token_lt -<= token_le -=> token_le -> token_gt ->= token_ge -=> token_ge -<> token_ne ->< token_ne -= token_eq -AND token_and -AS token_as -ASC token_asc -DATA token_data -ELSE token_else -END token_end -GO token_go -GOTO token_goto -GOSUB token_gosub -LET token_let -LIST token_list -NEW token_new -NOT token_not -OR token_or -POP token_pop -PRINT token_print -REM token_rem -RETURN token_return -RUN token_run -STOP token_stop -SUB token_sub -TO token_to +' tokenf_remabbr +* tokenf_times ++ tokenf_plus +- tokenf_minus +/ tokenf_divide +? tokenf_print +^ tokenf_exp +< tokenf_lt +<= tokenf_le +=> tokenf_le +> tokenf_gt +>= tokenf_ge +=> tokenf_ge +<> tokenf_ne +>< tokenf_ne += tokenf_eq +AND tokenf_and +AS tokenf_as +ASC tokenf_asc +DATA tokenf_data +ELSE tokenf_else +END tokenf_end +GO tokenf_go +GOTO tokenf_goto +GOSUB tokenf_gosub +LET tokenf_let +LIST tokenf_list +NEW tokenf_new +NOT tokenf_not +OR tokenf_or +POP tokenf_pop +PRINT tokenf_print +REM tokenf_rem +RETURN tokenf_return +RUN tokenf_run +STOP tokenf_stop +SUB tokenf_sub +TO tokenf_to diff -r 4983ba49f936 -r 5d4801c0566d src/parse.s --- a/src/parse.s Sat Jul 13 17:33:01 2024 -0600 +++ b/src/parse.s Mon Jul 15 23:26:15 2024 -0600 @@ -1,151 +1,114 @@ *pragmapush list *pragma list ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; This is the overall parsing package. This is responsible for converting program text into the internal byte code and -; reporting any syntax errors and anything else reasonably detectable at parse time without having overly complicated -; code analysis. In almost all cases, the returned error will be a syntax error. The internal byte code shares the same -; token number allocations as the parser. Some allocated tokens cannot be identified by the lexer (parse_nexttok) but -; are used at runtime and when "decompiling" to text. +; This is the overall parsing package. It is responsible for converting the input source code into the internal byte +; code. ; -; In the event of a parse error, everything up to the next end of statement is retained as is using a special token -; that preserves the unparsable text and parsing resumes. Only the first error is referenced by the return error -; pointer. +; This version only converts keywords to token codes. Additional conversions will be done in future versions. ; -; This is a recursive descent parser. +; Enter with X pointing to the text to parse. The encoded result will be placed freestart. On return, X will point to +; the encoded result and D will contain the length in bytes of the result, and C will be clear. ; -; Entry: -; X Points to the text to encode -; B Nonzero to prevent generating any output (error check/length calculation only) +; In the event that there is insufficient memory between freestart and the bottom of the stack, C will be set. This +; routine does not immediately throw an "out of memory" error to allow the caller to clear up some memory and try +; again. ; -; Exit: -; X Points to the encoded line -; D Length of the encoded line -; CC.C clear - -; Error Exit: -; X Points to the encoded line -; D Length of the encoded line -; Y Pointer to the first error location in the input -; U Error code -; CC.C set +; Enter at parseto with U set to the encoding destination and Y set to one byte past the end of the destination buffer +; to specify the destination. Defaults to encoding to the buffer between freestart and the bottom of the stack (with +; headroom accounted for). ; -; This is the error handler. It is responsible for resetting the stack to bail out to the top level -; parsing loop. It must also store the input pointer if this is the first error. Finally, it has to -; output all the text up to either the end of the line *or* the next valid statement separator. -parse_errorsn ldb #err_sn -parse_error lds parse_stackptr ; restore the original stack pointer so we can call from down stack - puls u ; get back original free pointer - stu freestart ; deallocate any allocated result - ldu parse_tokenst ; get start location of the token where the error was raised - coma ; make sure C is set for error - rts -parse stb parse_noout ; save no-output flag - leay ,x ; save input pointer in a less useful register - ldu freestart ; point to start of free memory where we will build the output - pshs u ; save original free memory location - sts parse_stackptr ; save the stack pointer for bailing out on errors -parse_nextstmt jsr parse_nexttok ; fetch the next token, return type in D - bcs parse_error ; brif we failed at parsing a token -parse0 ldx #parsetab_cmd ; point to jump table for token type handler - cmpb #token_stmtsep ; is it a statement separator? - beq parse_nextstmt ; brif so - we can just skip it -parse1 cmpb ,x ; did we match a valid command token? - beq parse3 ; brif so - leax 3,x ; move to next entry - cmpx #parsetab_cmde ; end of table? - blo parse1 ; brif not - bra parse_errorsn ; fell off the end -parse3 jsr [1,x] ; call the handler - bcs parse_error ; brif the handler indicated error - bsr parse_curtoken ; fetch the token we left off on - cmpb #token_eot ; end of input? - bne parse4 ; brif not - ldb #bc_eol ; stash an end of line op - bsr parse_write - bcs parse_error ; brif we errored out writing to the result (OM?) - tfr u,d ; calculate the length of the result - subd ,s - puls u,pc ; get pointer to start of encoded result and return (C is already clear) -parse4 cmpb #token_stmtsep ; statement separator? - beq parse_nextstmt ; brif so - do another statement - cmpb #token_remabbr ; ' token? - beq parse0 ; brif so - parse it as a new statement - bra parse_errorsn ; raise a syntax error -parse_write lda parse_noout ; are we doing output? - beq parse_write0 ; brif so - leau 1,u ; just count up the output and don't do anything - rts -parse_write0 leax -stackheadroom,s ; calculate bottom of stack with headroom - cmpx freestart ; did the stack run into the end of the output? - bhs parse_write1 ; brif not - we're good - ldb #err_om ; raise out of memory error, C already set from comparison - rts -parse_write1 stb ,u+ ; save output byte - stu freestart ; save new to of used memory -list_noop -parse_noop rts ; return all clear - C clear from comparison above -parse_curtoken ldb parse_curtok ; fetch token code of current token - rts -parse_tokerr comb ; flag error - unexpected token - ldb #err_sn ; raise syntax error +; The stuff below that has hard coded colon checks will eventually be replaced by more complete parsing. +parse ldu freestart ; default to the start of free memory for encoding + leay -stackheadroom,s ; set the top of free memory +parseto lda #1 ; flag to enable memory limit detection + pshs a,u,y ; save start and end addresses and OM error detection flag + leay ,x ; put the input pointer somewhere less useful +parsea jsr parse_curchar ; fetch an input character + bne parseb ; brif not end of input +parsez tfr u,d ; get current output pointer + subd 3,s ; now D is the length + leas 5,s ; clean up the stack + rts ; return - C will be clear from subd above +parseb jsr parse_wordtab ; look up a keyword and see if we have a match + bcs parsec ; brif no match - handle unknown stuff + tsta ; do we have a two byte token? + bne parseq ; brif so - just stash it + cmpb #token_else ; ELSE? + beq parsed ; brif so - gets a hidden statement separator + cmpb #token_remabbr ; REM abbreviation? + bne parsee ; brif not +parsed lda #': ; add a statement separator before it +parseq bsr parseoutw ; output a word + bra parsef +parsee bsr parseout ; output the token code +parsef cmpb #token_remabbr ; REM abbreviation? + beq parseg ; brif so + cmpb #token_rem ; Actual REM? + bne parseh ; brif not +parseg ldb ,y+ ; get current input character + beq parsez ; brif end of input + bsr parseout ; add unmodified characters to output + bra parseg ; keep going until end of input +parseh cmpb #token_data ; DATA command? + bne parsea ; brif not - continue normal handling + clra ; flag for not skipping quoted string +parsei ldb ,y+ ; get input character + beq parsez ; brif end of input + cmpb #'" ; string delimiter? + bne parsej ; brif not + coma ; flip the quoted statement handler +parsej cmpb #': ; end of statement? + bne parsek ; brif not + tsta ; are we skipping them? + bne parsek ; brif so + leay -1,y ; unconsume it + bra parsea ; we're done with DATA +parsek bsr parseout ; put the data value into the output + bra parsei ; go handle another character +parsec cmpb #'" ; did we encounter a quoted string? + bne parsel ; brif not + bsr parseout ; output delimiter +parsem ldb ,y+ ; get string character + beq parsez ; brif end of input + bsr parseout ; output it + cmpb #'" ; end delimiter? + bne parsem ; brif not - keep looking + bra parsea ; go handle more stuff +parsep cmpb #'0 ; is it a digit? + blo parsen ; brif not + cmpb #'9 ; is it still a digit? + bls parseo ; brif so +parsel cmpb #'A ; is it a letter? + blo parsen ; brif not + cmpb #'Z ; is it still a letter (UC)? + bls parseo ; brif so + cmpb #'a ; is it a lower case letter? + blo parsen ; brif not + cmpb #'z ; is it still a lower case letter? + bhi parsen ; brif not +parseo bsr parseout ; stash the character + ldb ,y+ ; fetch next input + beq parsez ; brif end of input + bra parsep ; go see if we're still in an identifier +parsen bsr parseout ; output unknown character (number, unknown token) + jmp parsea ; go handle more +parseoutw exg a,b ; do MSB + bsr parseout + exg a,b ; and then LSB (fall through) +parseout tst 2,s ; need to test for OM? + beq parseout0 ; brif not + cmpu 3,s ; did we run into the end of the buffer? + blo parseout0 ; brif not + coma ; set C for error + leas 7,s ; clean up stack + rts ; return to original caller +parseout0 stb ,u+ ; stash in buffer rts parse_nextchar lda ,y ; at end of input already? beq parse_curchar ; brif so leay 1,y ; move to next input character parse_curchar lda ,y ; fetch input character rts -parse_nexttokc bsr parse_nexttok ; fetch next token -parse_iseos cmpb #token_eot ; end of text? - beq parse_iseos0 ; brif so - cmpb #token_stmtsep ; is it a statement separator -parse_iseos0 rts -parse_nexttok bsr parse_curchar ; fetch current input - beq parse_nexttok1 ; brif end of input -parse_nexttok0 cmpa #0x20 ; space? - bne parse_nexttok2 ; brif not - bsr parse_nextchar ; eat the space - bne parse_nexttok0 ; brif not end of input -parse_nexttok1 ldb #token_eot ; flag end of input - bra parse_nexttok6 ; go return it -parse_nexttok2 sty parse_tokenst ; save start of current token after skipping spaces - bsr parse_toupper ; make sure we have upper case letters for matching - ldx #parse_wt ; point to keyword parsing table - jsr parse_wordtab ; go see if we have a match in the keyword table - bcc parse_nexttok6 ; brif we do - return it - ldy parse_tokenst ; return to the start of the token - pointer probably clobbered - bsr parse_curchar ; get back input character (may have been clobbered) - cmpa #'. ; leading decimal? - beq parse_nexttok3 ; brif so - parse number - cmpa #'0 ; is it a digit - blo parse_nexttok10 ; brif not - cmpa #'9 ; is it still a digit? - bhi parse_nexttok10 ; brif not -parse_nexttok3 jmp parse_number ; go parse a number -parse_nexttok6 stb parse_curtok ; save token type - leay 1,y ; eat the input character - clra ; clear C to indicate no error (and clear Z also) - rts -parse_nexttok10 cmpa #'A ; is it alpha? - blo parse_nexttok11 ; brif not - cmpa #'Z ; is it still alpha? - bls parse_nexttok12 ; brif so -parse_nexttok11 comb ; flag error - unrecognized token - ldb #token_error - rts -parse_nexttok12 bsr parse_nextcharu ; fetch next input character - cmpa #'0 ; is it alphanumeric? - blo parse_nexttok13 ; brif not - cmpa #'9 ; is it numeric? - bls parse_nexttok12 ; brif so - keep skipping it - cmpa #'A ; is it alpha? - blo parse_nexttok13 ; brif not - cmpa #'Z ; is it still alpha? - bls parse_nexttok12 ; brif so - keep skipping it -parse_nexttok13 tfr y,d ; calculate length of identifier - subd parse_tokenst - std val0+val.strlen ; save it for reference - ldb #token_ident ; indicate an identifier (variable name, etc.) - rts ; return result (C will be clear from SUBD above) parse_nextcharu bsr parse_nextchar ; fetch next input character beq parse_toupper0 ; brif end of input parse_toupper cmpa #'a ; is it lower case alpha? @@ -154,24 +117,12 @@ bhi parse_toupper0 ; brif not suba #0x20 ; adjust to upper case alpha parse_toupper0 rts ; Z only set here if input was zero entering from parse_nextcharu -parse_number jmp parse_tokerr ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Parse a statement that consists of just the command token -parse_cmdsingle equ parse_write ; just write the token out and bail -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Parse a REM or ' statement. We just copy the comment out after the REM or ' token. -parse_rem jsr parse_write ; write the token/character out - ldb ,y+ ; get next input character - bne parse_rem ; brif not at the end of the input - ldb #token_eot ; flag end of input for mainline parser - stb parse_curtok - rts ; return, pass back the C result from parse_write -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; This routine parses tokens using the table at parse_wordtab. The table is structured as follows: +; This routine parses tokens using the table at parse_wt. The table is structured as follows: ; ; * two bytes which contain the length of the table less the two bytes for this length value ; * a sequence of entries consisting of a single byte matching character and a token code followed -; by an optional sub table, structured exactly the same way. +; by an optional sub table, structured exactly the same way. The token code is 2 bytes. ; ; The optional subtable will be present if the token code is token_eot ; @@ -181,16 +132,19 @@ ; ; When a token_eot match is found, if there are no further characters in the input, the match is ; determined to be invalid and processing continues with the next entry. +parse_wordtab ldx #parse_wt ; point to main lookup table + skip2 ; move on into the main routine parse_wordtab0 leas 3,s ; clean up stack for sub table handling -parse_wordtab pshs a,x ; save input character and start of table + pshs a,x ; save input character and start of table ldd ,x++ ; get length of this table addd 1,s ; calculate the address of the end of the table std 1,s ; save end address for comparison later lda ,s ; get back input character -parse_wordtab1 ldb 1,x ; fetch token code for this entry - cmpa ,x++ ; does this entry match? +parse_wordtab1 leax 3,x ; move past this entry - this order to avoid Z effects from leax + cmpa -3,x ; does this entry match? bne parse_wordtab4 ; brif not - cmpb #token_eot ; is it indicating a sub table? + ldd -2,x ; get the matched token code + cmpd #tokenf_eot ; is it indicating a sub table? bne parse_wordtab6 ; brif not jsr parse_nextcharu ; fetch next input character (for sub table match) bne parse_wordtab0 ; brif we are going to check the sub table @@ -201,14 +155,17 @@ blo parse_wordtab1 ; brif not - check another entry comb ; indicate no match puls a,x,pc ; clean up stack and return -parse_wordtab4 lda -2,x ; get the match character +parse_wordtab4 lda -3,x ; get the match character bmi parse_wordtab5 ; brif negative - lookahead fail - cmpb #token_eot ; is there a sub table to skip? + ldd -2,x ; get the token match + cmpd #tokenf_eot ; is there a sub table to skip? beq parse_wordtab2 ; brif so - skip sub table bra parse_wordtab3 ; otherwise just move to the next entry parse_wordtab5 leay a,y ; move back the specified number of characters -parse_wordtab6 clra ; clear C to indicate a match - puls a,x,pc ; clean up stack and return + ldd -2,x ; get the matched token +parse_wordtab6 sta ,s ; save MSB of match + clra ; clear carry to indicate match + puls a,x,pc ; clean up stack, restore return value and return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Convert a token number back to its keyword. This will use the same table used by parse_wordtab. Enter with a character ; output routine pointer in U which takes the character in A. The routine can assume that Y is preserved. Will return @@ -247,128 +204,56 @@ coma ; make sure C is set for no match puls a,x,pc ; clean up stack and return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Validate a line number. Must enter with the token type in B. Will return the line number in X. It will return a -; syntax error if the line number is invalid or out of range. It will also consume a valid line number token. -parse_linenum cmpb #token_int32 ; is it an integer? - beq parse_linenum1 ; brif so -parse_linenum0 ldb #err_sn ; flag syntax error - coma ; flag error - rts -parse_linenum1 ldx val0+val.int ; get high word of integer - bne parse_linenum0 ; brif not a valid line number - ldx val0+val.int+2 ; get actual line number - pshs x ; save it - jsr parse_nexttok ; consume line number - puls x,pc ; get back line number and return it -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Parse a line number range which is one of the following forms: -; -; - -; - -; - -; The result will store two line numbers. If no - token appears, then both line numbers will be the same. Otherwise, -; if is omitted, it will be assumed to be 0. If is omitted, it will be assumed to be 65535. Those -; are the minimum and maximum line numbers. -; -; Parsing works by first looking for an integer token that is in range. If it finds one, it looks for an optional - -; followed by an optional integer token that is in range. If the first token is not an integer, it must be a - which may -; be optionally followed by another integer in range. -; -; It is technically valid to have a single - with no line numbers. -; -; Enter with the current token in B. +; Definition of tokens used in the interpreter. ; -; The resulting line numbers will be returned in parse_buff -parse_linerange ldx zero ; default start line number - leau -1,x ; default end line number - pshs x,u ; save the return range - cmpb #token_minus ; range with no start? - beq parse_linerang1 ; brif so - bsr parse_linenum ; verify line number, return in X - bcs parse_linerang4 ; bail out on error - stx ,s ; save new start line number - jsr parse_nexttokc ; fetch next token, set Z if end of statement - bne parse_linerang0 ; brif not end of line - ldx ,s ; get end line to use as start line - bra parse_linerang2 ; go set range end and return -parse_linerang0 cmpb #token_minus ; do we have a range character? - bne parse_linerang3 ; brif not - we have an error -parse_linerang1 jsr parse_nexttokc ; parse what comes after the range mark - beq parse_linerang2 ; brif end of statement - use the default range end - bsr parse_linenum ; make sure it's a valid line number - bcs parse_linerang4 ; bail out on error -parse_linerang2 stx 2,s ; set range end - clra ; make sure C is clear - puls x,u,pc ; fetch return values and return -parse_linerang3 ldb #err_sn ; flag a syntax error - coma ; make sure C is set -parse_linerang4 puls x,u,pc ; clean up stack and return error condition -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; This table defines the various handler routines for the various bytecode tokens. Each token is defined as follows: -; parse_tokdefT ,,, -; where: -; T: c for command, f for function, p for particle -; : the symbol name without the "token_" prefix -; : parse handler for the type, ignored for particles -; : list handler for the type, ingored for particles -; : execution handler for the type, ignored for particles +; Each token is defined as follows: +; parse_tokdefT [,] +; where T is one of: +; p: particle - utility tokens and definitions, starting at 0x00 +; c: command - a command keyword, starting at 0x80 +; f: function - a function keyword, start at 0x80 with a 0xFF prefix +; n: token width specific number/code, but otherwise a particle; in this case, the code replaces +; +; is the base symbol name (such as "then" or "eot") +; is the address of the execution handler routine of the natural token type (command or function) +; +; is optional for particles. If it is omitted for command or function tokens, it defaults to SNERROR. *pragmapush list *pragma nolist __toknump set 0 -__toknumc set 0x40 -__toknumf set 0xc0 - setstr __cmdparset="" - setstr __cmdlistt="" - setstr __cmdexect="" - setstr __fnparset="" - setstr __fnlistt="" - setstr __fnexect="" +__toknumc set 0x80 +__toknumf set 0x80 parse_tokendefp macro noexpand token_\1 equ __toknump +tokenf_\1 equ __toknump __toknump set __toknump+1 endm +parse_tokendefv macro noexpand +token_\1 equ \2 +tokenf_\1 equ \2 + endm + setstr __cmdexect="" + setstr __funcexect="" parse_tokendefc macro noexpand token_\1 equ __toknumc +tokenf_\1 equ __toknumc __toknumc set __toknumc+1 ifstr ne,"{2}","" - setstr __cmdparset="%(__cmdparset)\tfcb\ttoken_\1\n\tfdb {2}\n" - endc - ifstr ne,"{3}","" - setstr __cmdlistt="%(__cmdlistt)\tfcb\ttoken_\1\n\tfdb {3}\n" - endc - ifstr ne,"{4}","" - setstr __cmdexect="%(__cmdexect)\tfdb {3}\n" + setstr __cmdexect="%(__cmdexect)\tfdb {2}\n" else setstr __cmdexect="%(__cmdexect)\tfdb SNERROR\n" endc endm parse_tokendeff macro noexpand token_\1 equ __toknumf +tokenf_\1 equ 0xff00|__toknumf __toknumf set __toknumf+1 ifstr ne,"{2}","" - setstr __fnparset="%(__fnparset)\tfcb\ttoken_\1\n\tfdb {2}\n" - endc - ifstr ne,"{3}","" - setstr __fnlistt="%(__fnlistt)\tfcb\ttoken_\1\n\tfdb {3}\n" - endc - ifstr ne,"{4}","" - setstr __fnexect="%(__fnexect)\tfdb {3}\n" + setstr __fnexect="%(__fnexect)\tfdb {2}\n" else setstr __fnexect="%(__fnexect)\tfdb SNERROR\n" endc endm -token_cmdparse macro - *pragmapush nolist - *pragma nolist - includestr "%(__cmdparset)" - *pragmapop nolist - endm -token_cmdlist macro - *pragmapush nolist - *pragma nolist - includestr "%(__cmdlistt)" - *pragmapop nolist - endm token_cmdexec macro *pragmapush nolist *pragma nolist @@ -376,18 +261,6 @@ token__maxcmd equ __toknumc-1 *pragmapop nolist endm -token_fnparse macro - *pragmapush nolist - *pragma nolist - includestr "%(__fnparset)" - *pragmapop nolist - endm -token_fnlist macro - *pragmapush nolist - *pragma nolist - includestr "%(__fnlistt)" - *pragmapop nolist - endm token_fnexec macro *pragmapush nolist *pragma nolist @@ -396,77 +269,49 @@ *pragmapop nolist endm *pragmapop list - ; the tokens defined in this section all have special parsing or meaning + ; special tokens parse_tokendefp error ; Used to mark errors; should always be first so it's token #0 parse_tokendefp eot ; End of input marker or special handling in word tables - parse_tokendefp int32 ; 32 bit integer (has special parsing) - parse_tokendefp float ; floating point value (has special parsing) - parse_tokendefp ident ; identifier (has special parsing) - parse_tokendefp linenum ; a 16 bit unsigned integer treated as a line number - parse_tokendefp linerange ; a pair of 16 bit unsigned integers treated as line numbers - ; everything below here references keywords or particle characters - parse_tokendefp stmtsep ; statement separator - parse_tokendefp times ; times (multiplication) operator (*) - parse_tokendefp plus ; addition operator - parse_tokendefp divide ; division operator (/) - parse_tokendefp minus ; subtraction operator - parse_tokendefp exp ; exponentiation operator (^) - parse_tokendefp lt ; less than operator - parse_tokendefp le ; less than or equal operateor - parse_tokendefp gt ; greater than operator - parse_tokendefp ge ; greater than or equal operator - parse_tokendefp eq ; equality operator - parse_tokendefp ne ; inequality operator - parse_tokendefp not ; boolean NOT operator - parse_tokendefp and ; boolean AND operator - parse_tokendefp or ; boolean OR operator - parse_tokendefp bang ; exclamation mark - parse_tokendefp hash ; number sign - parse_tokendefp dollar ; dollar sign (string sigil) - parse_tokendefp percent ; percent sign (integer sigil) - parse_tokendefp amp ; ampersand - parse_tokendefp oparen ; opening paren - parse_tokendefp cparen ; closing paren - parse_tokendefp sep ; comma (separator) - parse_tokendefp semi ; semicolon - parse_tokendefp at ; @ symbol - parse_tokendefp else ; ELSE - parse_tokendefp then ; THEN - parse_tokendefp to ; TO - parse_tokendefp sub ; SUB - parse_tokendefp as ; AS - - parse_tokendefc remabbr,parse_rem,list_noop,exec_noop ; abbreviated REM (') - parse_tokendefc rem,parse_rem,list_noop,exec_noop ; REM - parse_tokendefc return,parse_cmdsingle,parse_noop,parse_noop ; RETURN - parse_tokendefc run,parse_noop,parse_noop,parse_noop ; RUN - parse_tokendefc data,parse_noop,parse_noop,parse_noop ; DATA - parse_tokendefc end,parse_cmdsingle,parse_noop,parse_noop ; END - parse_tokendefc stop,parse_cmdsingle,parse_noop,parse_noop ; STOP - parse_tokendefc let,parse_noop,parse_noop,parse_noop ; LET - parse_tokendefc list,parse_noop,parse_noop,parse_noop ; LIST - parse_tokendefc new,parse_cmdsingle,parse_noop,parse_noop ; NEW - parse_tokendefc print,parse_noop,parse_noop,parse_noop ; PRINT - parse_tokendefc pop,parse_cmdsingle,parse_noop,parse_noop ; POP - parse_tokendefc goto,parse_noop,parse_noop,parse_noop ; GOTO - parse_tokendefc gosub,parse_noop,parse_noop,parse_noop ; GOSUB - parse_tokendefc go,parse_noop,parse_noop,parse_noop ; GO - - parse_tokendeff asc,parse_noop,parse_noop,parse_noop ; ASC() -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Parse handling tables -parsetab_cmd token_cmdparse -parsetab_cmde -parsetab_fn token_fnparse -parsetab_fne -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; List handling tables -listtab_cmd token_cmdlist -listtab_cmde -listtab_fn token_fnlist -listtab_fne + ; command (and simple non-command keywords) + parse_tokendefc remabbr ; abbreviated REM (') + parse_tokendefc rem ; REM + parse_tokendefc return ; RETURN + parse_tokendefc run ; RUN + parse_tokendefc data ; DATA + parse_tokendefc end ; END + parse_tokendefc stop ; STOP + parse_tokendefc let ; LET + parse_tokendefc list ; LIST + parse_tokendefc new ; NEW + parse_tokendefc print ; PRINT + parse_tokendefc pop ; POP + parse_tokendefc goto ; GOTO + parse_tokendefc gosub ; GOSUB + parse_tokendefc go ; GO + parse_tokendefc times ; times (multiplication) operator (*) + parse_tokendefc plus ; addition operator + parse_tokendefc divide ; division operator (/) + parse_tokendefc minus ; subtraction operator + parse_tokendefc exp ; exponentiation operator (^) + parse_tokendefc lt ; less than operator + parse_tokendefc le ; less than or equal operateor + parse_tokendefc gt ; greater than operator + parse_tokendefc ge ; greater than or equal operator + parse_tokendefc eq ; equality operator + parse_tokendefc ne ; inequality operator + parse_tokendefc not ; boolean NOT operator + parse_tokendefc and ; boolean AND operator + parse_tokendefc or ; boolean OR operator + parse_tokendefc else ; ELSE + parse_tokendefc then ; THEN + parse_tokendefc to ; TO + parse_tokendefc sub ; SUB + parse_tokendefc as ; AS + ; secondary tokens (functions) + parse_tokendeff asc ; ASC() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Execution handling tables exectab_cmd token_cmdexec exectab_fn token_fnexec + *pragmapop list