changeset 63:a3122251b5fe

Add initial expression evaluator and some supporting details To avoid complexity in the expression evaluator itself, the tokenization routine is modified to do some substitutions on the relational operators. Relational operator sequences potentially separated by spaces will be replaced by their canonical forms. These forms are: <, >, =, <=, >=, and <>. This is to maintain source compatibility with other basics, and also programmer compatibilty. These replacements will be reflected in the program in memory and will show when LISTing the program. This also includes the operator precedence table for binary operators and binary operator handling in the main expression evaluator loop (eval_expr). Not yet implemented here is eval_term which actually handles things like numbers or function calls.
author William Astle <lost@l-w.ca>
date Thu, 23 Feb 2023 21:56:49 -0700
parents eba95ed43423
children 2205c3c59a33
files src/lwbasic.s
diffstat 1 files changed, 145 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/src/lwbasic.s	Tue Feb 21 20:57:25 2023 -0700
+++ b/src/lwbasic.s	Thu Feb 23 21:56:49 2023 -0700
@@ -230,6 +230,10 @@
 tok_kwnum       rmb 1                           ; the actual token number
 tok_kwmatchl    rmb 1                           ; the length of the best match during lookup
 tok_kwmatch     rmb 2                           ; the current best matched token number
+valtype0        rmb 1                           ; type of value in valaccum0
+valaccum0       rmb 6                           ; bucket of bytes for valaccum0
+valtype1        rmb 1                           ; type of value in valaccum1
+valaccum1       rmb 6                           ; bucket of bytes for valaccum1
                 rmb 0x71-*                      ; align RSTFLG/RSTVEC for stock ROM compatibility
 RSTFLG          rmb 1                           ; 0x55 if RSTVEC is valid
 RSTVEC          rmb 2                           ; points to warm start routine (must start with NOP)
@@ -1558,6 +1562,82 @@
 ; The LET command which is the default if no token begins a statement
 cmd_let         jmp SNERROR                     ; not yet implemented
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Expression Evaluation Package
+;
+; This is the expression evaluator. It handles everything from parsing numbers to dispatching function calls. The main
+; entry point is eval_expr which will evaluate an arbitrary expression. It returns as soon as it reaches something it
+; doesn't understand as part of an expression.
+;
+; The special handling for relational operators is required because Basic allows them in all 
+eval_expr       clrb                            ; flag previous operator as minimum precdence (end of expression)
+eval_expraux    jsr eval_term                   ; evaluate the first term of the expression
+eval_expr0      jsr curchar                     ; fetch current input
+                beq eval_expr1                  ; brif end of expression - we're done
+                cmpa #tok_or                    ; is it above operators?
+                bhi eval_expr1                  ; brif so
+                suba #tok_plus                  ; offset to zero for first operator token
+                bcc eval_expr2                  ; brif it is an operator
+eval_expr1      rts
+eval_expr2      pshs b                          ; save previous operator precedence
+                ldx #oper_tab                   ; point to operator table
+                tfr a,b                         ; shift to B for "ABX"
+                abx                             ; add three times (3 bytes per entry)
+                abx                             ; OBS: TFR + ABX + ABX + ABX is faster than LDB + MUL + ABX
+                abx                             ; now X points to the operator entry in the table
+                ldb ,x                          ; get precedence of current operation
+                cmpb ,s                         ; is it higher than the current operation?
+                bhi eval_expr3                  ; brif so - process this operator
+                puls b,pc                       ; return current value to complete previous operation
+eval_expr3      jsr nextchar                    ; eat the operator token
+                ldx 1,x                         ; get handler address of this operator
+                pshs x                          ; save handler address for later
+                lda valtype0                    ; get current value type
+                ldx valaccum0                   ; get value accumlator contents (6 bytes)
+                ldy valaccum0+2
+                ldu valaccum0+4
+                pshs a,x,y,u                    ; save it on the stack
+                jsr eval_expraux                ; evaluate the following term and higher precedence expressions
+                puls a,x,y,u                    ; get back saved value
+                stx valaccum1                   ; save it to the second value accumulator
+                sty valaccum1+2
+                stu valaccum1+4
+                sta valtype1                    ; save previous value type
+                jsr [,s++]                      ; go handle the operator
+                puls b                          ; get back the previous operator precedence
+                bra eval_expr0                  ; go process another operator or end of expression
+eval_term       jmp SNERROR
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Operator table
+;
+; Each entry starts with the precedence value followed by the handler routine. Each handler will receive its left
+; operand in valaccum1 and its right operand in valaccum0 and should return its result in valaccum0.
+oper_tab        fcb 0x79                        ; addition
+                fdb SNERROR
+                fcb 0x79                        ; subtraction
+                fdb SNERROR
+                fcb 0x7b                        ; multiplication
+                fdb SNERROR
+                fcb 0x7b                        ; division
+                fdb SNERROR
+                fcb 0x7f                        ; exponentiation
+                fdb SNERROR
+                fcb 0x64                        ; less than
+                fdb SNERROR
+                fcb 0x64                        ; equal to
+                fdb SNERROR
+                fcb 0x64                        ; greater than
+                fdb SNERROR
+                fcb 0x64                        ; less than or equal to
+                fdb SNERROR
+                fcb 0x64                        ; greater than or equal to
+                fdb SNERROR
+                fcb 0x64                        ; not equal to
+                fdb SNERROR
+                fcb 0x50                        ; boolean AND
+                fdb SNERROR
+                fcb 0x46                        ; boolean OR
+                fdb SNERROR
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Set carry if upper/lower case alpha
 setcifalpha     cmpa #'z+1                      ; is it above lower case Z?
                 bhs setcifalpha0                ; brif so, C clear
@@ -1651,6 +1731,52 @@
 detokenize7     anda #0x7f                      ; lose the high bit
                 bra detokenize0                 ; write it and move on with the input
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Canonicalize certain sequences; ALL the rewrite sequences must make the result shorter or keep it the same size
+makecanontab    fcb tok_less,2
+                fcb tok_greater,tok_notequal
+                fcb tok_equal,tok_lessequal
+                fcb tok_greater,2
+                fcb tok_less,tok_notequal
+                fcb tok_equal,tok_greaterequal
+                fcb tok_equal,2
+                fcb tok_greater,tok_greaterequal
+                fcb tok_less,tok_lessequal
+                fcb 0
+makecanon       leay ,x                         ; point output to start of the buffer
+makecanon0      lda ,x+                         ; get current byte
+                sta ,y+                         ; save in output
+                bne makecanon1                  ; brif not end of line
+                rts
+makecanon1      bpl makecanon0                  ; brif not a token
+                cmpa #0xff                      ; is it secondary?
+                bne makecanon2                  ; brif not
+                leax 1,x                        ; move past second half
+                bra makecanon0                  ; go handle next byte
+makecanon2      ldu #makecanontab               ; point to replacement table
+makecanon3      cmpa ,u+                        ; is it this entry?
+                beq makecanon4                  ; brif so
+                ldb ,u+                         ; get number of entries
+                lslb                            ; 2 bytes per
+                leau b,u                        ; move past entry
+                ldb ,u                          ; end of table?
+                bne makecanon3                  ; brif not
+                bra makecanon0                  ; no substitutions found
+makecanon4      pshs x                          ; save original source pointer
+makecanon5      lda ,x+                         ; get next character
+                cmpa #0x20                      ; is it space?
+                beq makecanon5                  ; brif so - skip it
+                ldb ,u+                         ; get number of replacement candidates
+makecanon6      cmpa ,u++                       ; does it match?
+                beq makecanon7                  ; brif so
+                decb                            ; seen all of them?
+                bne makecanon6                  ; brif not
+                puls x                          ; restore input pointer
+                bra makecanon0                  ; go handle next input
+makecanon7      leas 2,s                        ; clear saved input pointer
+                lda -1,u                        ; get replacement token
+                sta -1,y                        ; put it in the output
+                bra makecanon0                  ; go handle more input
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Tokenize line to tokebuff
 ;
 ; Enter with X pointing to the text to tokenize.
@@ -1662,7 +1788,9 @@
 tokenize0       lda ,x+                         ; get input character
                 bne tokenize3                   ; brif not end of input
 tokenize1       sta ,y+                         ; blank out final byte in result
-tokenize2       tfr y,d                         ; get end address to accumulator
+tokenize2       ldx #tokebuff                   ; point to start of tokenized line
+                bsr makecanon                   ; canonicalize certain sequences
+                tfr y,d                         ; get end address to accumulator
                 subd #tokebuff                  ; subtract out start; gives length of result
                 puls x,pc                       ; set return pointer and return
 tokenize3       tst tok_skipkw                  ; are we in the middle of a "not token"?
@@ -1778,6 +1906,8 @@
                 bne tokenize27                  ; brif not end of input - do another
                 jmp tokenize2                   ; stash end of buffer and handle cleanup
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Special tokenization handling
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Keyword dictionaries and jump tables. These are defined by several macros which ensure that each command or function
 ; entry has an associated jump table entry. These macros are:
 ;
@@ -1855,7 +1985,20 @@
                 defcmd 'GOSUB',gosub
                 defcmd 'RETURN',return
                 defcmd 'POP',pop
-                defcmd '-',minus,SNERROR
+                defcmd '+',plus,SNERROR         ; IMPORTANT: the operators from + to OR MUST stay in this exact sequence
+                defcmd '-',minus,SNERROR        ; with no gaps because a secondary lookup table is used for operator
+                defcmd '*',times,SNERROR        ; handling during binary operator handling.
+                defcmd '/',divide,SNERROR
+                defcmd '^',power,SNERROR
+                defcmd '<',less,SNERROR
+                defcmd '>',greater,SNERROR
+                defcmd '=',equal,SNERROR
+                defcmd '<=',lessequal,SNERROR
+                defcmd '>=',greaterequal,SNERROR
+                defcmd '<>',notequal,SNERROR
+                defcmd 'AND',and,SNERROR
+                defcmd 'OR',or,SNERROR
+                defcmd 'NOT',not,SNERROR
 primarydict     cmdtab
 secondarydict   functab
 primaryjump     cmdjump