Mercurial > hg > index.cgi

--- a/Makefile	Sun Sep 10 23:18:17 2023 -0600
+++ b/Makefile	Sat Oct 07 02:56:59 2023 -0600
@@ -1,15 +1,15 @@
 .PHONY: all
 all: bin/lwbasic.rom bin/lwbasic-coco2b.rom bin/lwbasic-coco3.rom bin/coco2.zip bin/coco2b.zip bin/coco3.zip

-lwb_srcs := consscr.s defs.s error.s expr.s genio.s init.s interp.s irq.s keyb.s keywords.s miscdata.s number.s print.s progctrl.s token.s vars.s
+lwb_srcs := consscr.s defs.s error.s expr.s fps.s genio.s init.s int.s interp.s irq.s keyb.s keywords.s miscdata.s number.s print.s progctrl.s token.s vars.s
 lwb_srcs := $(addprefix src/,$(lwb_srcs))

 bin/lwbasic.rom: src/lwbasic.s $(lwb_srcs)
-	lwasm --tabs=16 --raw --list=src/lwbasic-coco2.list --symbols --output=bin/lwbasic.rom src/lwbasic.s
+	lwasm --6809 --tabs=16 --raw --list=src/lwbasic-coco2.list --symbols --output=bin/lwbasic.rom src/lwbasic.s
 bin/lwbasic-coco2b.rom: src/lwbasic.s $(lwb_srcs)
-	lwasm --tabs=16 --raw --list=src/lwbasic-coco2b.list --symbols --output=bin/lwbasic-coco2b.rom -DCOCO2B=1 src/lwbasic.s
+	lwasm --6809 --tabs=16 --raw --list=src/lwbasic-coco2b.list --symbols --output=bin/lwbasic-coco2b.rom -DCOCO2B=1 src/lwbasic.s
 bin/lwbasic-coco3.rom: src/lwbasic.s $(lwb_srcs)
-	lwasm --tabs=16 --raw --list=src/lwbasic-coco3.list --symbols --output=bin/lwbasic-coco3.rom -DCOCO3=1 src/lwbasic.s
+	lwasm --6809 --tabs=16 --raw --list=src/lwbasic-coco3.list --symbols --output=bin/lwbasic-coco3.rom -DCOCO3=1 src/lwbasic.s

 .PHONY: clean
 clean:
--- a/src/defs.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/defs.s	Sat Oct 07 02:56:59 2023 -0600
@@ -45,20 +45,27 @@
 valtype_int     equ 1                           ; integer (32 bit) value (signed)
 valtype_float   equ 2                           ; float type (40 bit) value
 valtype_string  equ 3                           ; string type (16 bit length, 16(32) bit data pointer
-; Value accumulator structure definitions
-;
-; Notes:
-;
-; Much code using value accumulators depends on the specific layout of this structure so reorganizing it is dangerous.
-; Notably, the integer value and floating point mantissa must be at the same offset.
-val.type        equ 6                           ; value type offset
-val.fpexp       equ 0                           ; fp exponent offset
-val.fpmant      equ 1                           ; fp mantissa offset
-val.fpsign      equ 5                           ; fp sign offset
-val.int         equ 1                           ; integer offset
-val.strlen      equ 0                           ; string length offset
-val.strptr      equ 4                           ; string data pointer (low word)
-val.size        equ 7                           ; size of a value accumulator
+; Floating point accumulator structure definitions
+fps.exp         equ 0                           ; single precision exponent
+fps.sig         equ fps.exp+1                   ; single precision significand
+fps.sign        equ fps.sig+4                   ; single precision sign
+fps.size        equ fps.sign+1
+fpa.size        equ fps.size                    ; use the largest floating point accumulator size
+; String data definition
+str.len         equ 0                           ; string length (2 bytes)
+str.ptr         equ str.len+2                   ; string data pointer (3 bytes)
+str.size        equ str.ptr+3
+; Value accumulator structure definitions; note that the actual value data must be first and any
+; incidental meta data must follow
+val.value       equ 0                           ; offset of the value stored in the accumulator
+val.fpsexp      equ val.value+fps.exp           ; floating point exponent
+val.fpssig      equ val.value+fps.sig           ; floating point significand
+val.fpssign     equ val.value+fps.sign          ; floating point sign
+val.int         equ val.value                   ; integer offset
+val.strlen      equ val.value+str.len           ; string length offset
+val.strptr      equ val.value+str.ptr           ; string data pointer (low word)
+val.type        equ val.value+fps.size          ; use the largest of the data types here
+val.size        equ val.type+1                  ; size of a value accumulator
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
                 ifdef COCO3
 ; GIME INIT0
--- a/src/error.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/error.s	Sat Oct 07 02:56:59 2023 -0600
@@ -61,4 +61,6 @@
                 fcn 'Overflow'
                 deferr tm
                 fcn 'Type mismatch'
+                deferr div0
+                fcn 'Division by zero'
                 *pragmapop list
--- a/src/expr.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/expr.s	Sat Oct 07 02:56:59 2023 -0600
@@ -88,8 +88,10 @@
 ; 15. Read a character and go to step 7
 ;
 ; If the result ends up being larger than a floating point value can hold, return Overflow
-eval_number     ldb #valtype_int                ; flag result as an integer
-                stb val0_val.type
+eval_number
+                if 0
+                ldb #valtype_int                ; flag result as an integer
+                stb val0+val.type
                 ldx zero                        ; blank out the value except type
                 stx val0
                 stx val0+2
@@ -101,7 +103,7 @@
                 beq eval_number3                ; brif so
                 cmpa #tok_minus                 ; negative (operator negative)?
                 bne eval_number2                ; brif not
-eval_number3    com val0+val.fpsign             ; invert sign (multiple negatives will flip this multiple times)
+eval_number3    com val0+val.fpssign             ; invert sign (multiple negatives will flip this multiple times)
                 bra eval_number0                ; deal with next input
 eval_number2    cmpa #'+                        ; unary +?
                 beq eval_number0                ; brif so - skip it
@@ -122,12 +124,12 @@
                 rol val0+val.int+2
                 rol val0+val.int+1
                 rol val0+val.int
-                rol val0+val.fpexp              ; overflow into fp exponent
+                rol val0+val.fpsexp              ; overflow into fp exponent
                 lsl val0+val.int+3              ; times 4
                 rol val0+val.int+2
                 rol val0+val.int+1
                 rol val0+val.int
-                rol val0+val.fpexp              ; brif overflowed
+                rol val0+val.fpsexp              ; brif overflowed
                 ldd val0+val.int+2              ; times 5 (add original value)
                 addd ,s++
                 std val0+val.int+2
@@ -135,14 +137,14 @@
                 adcb 1,s
                 adca ,s++
                 std val0+val.int
-                ldb val0+val.fpexp              ; and handle overflow bits
+                ldb val0+val.fpsexp              ; and handle overflow bits
                 adcb #0
-                stb val0+val.fpexp
+                stb val0+val.fpsexp
                 lsl val0+val.int+3              ; times 10
                 rol val0+val.int+2
                 rol val0+val.int+1
                 rol val0+val.int
-                rol val0+val.fpexp
+                rol val0+val.fpsexp
                 ldd val0+val.int+2              ; get low word
                 addb ,s+                        ; add in current digit
                 adca #0
@@ -151,9 +153,9 @@
                 adcb #0
                 adca #0
                 std val0+val.int
-                lda val0+val.fpexp              ; and handle overflow
+                lda val0+val.fpsexp              ; and handle overflow
                 adca #0
-                sta val0+val.fpexp
+                sta val0+val.fpsexp
                 bne eval_number11               ; if we overflowed, go continue parsing as floating point
                 lda val0+val.int                ; get back high byte and check for overflow
                 bpl eval_number4                ; brif we haven't wrapped negative
@@ -167,28 +169,28 @@
                 beq eval_number8                ; brif so
                 cmpa #'e                        ; base 10 exponent in lower case?
                 beq eval_number8                ; brif so
-                ldb val0+val.fpsign             ; did we want a negative value?
+                ldb val0+val.fpssign             ; did we want a negative value?
                 beq eval_number7                ; brif not
                 jsr val_negint32                ; negate the 32 bit integer to correct two's complement
-eval_number7    clr val0+val.fpsign             ; clear sign bits for book keeping
+eval_number7    clr val0+val.fpssign             ; clear sign bits for book keeping
                 rts
 eval_number11   jsr nextchar                    ; each the character already processed
 eval_number8    lda #0x9f                       ; exponent if binary point is to the right of the mantissa
                 clr val0extra                   ; clear extra precision bits for val0
                 ldb #valtype_float              ; flag value as floating point
                 stb val0+val.type
-                ldb val0+val.fpexp              ; do we have overflow bits to shift?
+                ldb val0+val.fpsexp              ; do we have overflow bits to shift?
                 beq eval_number10               ; brif not
 eval_number9    inca                            ; bump exponent to account for extra bits
                 lsrb                            ; shift some bits over
-                ror val0+val.fpmant
-                ror val0+val.fpmant+1
-                ror val0+val.fpmant+2
-                ror val0+val.fpmant+3
+                ror val0+val.fpssig
+                ror val0+val.fpssig+1
+                ror val0+val.fpssig+2
+                ror val0+val.fpssig+3
                 ror val0extra
                 tstb                            ; all bits shifted into mantissa?
                 bne eval_number9                ; brif not
-eval_number10   sta val0+val.fpexp              ; save adjusted exponent
+eval_number10   sta val0+val.fpsexp              ; save adjusted exponent
                 ldx #val0                       ; normalize the result for further operations
                 jsr fp_normalize
                 clr ,-s                         ; flag for decimal point seen
@@ -261,6 +263,7 @@
                 jsr val_int32tofloat            ; convert to floating point
                 jsr fp_add                      ; add val1 to val0
                 bra eval_number40               ; go handle another character
+                endc
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Operator table
 ;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/fps.s	Sat Oct 07 02:56:59 2023 -0600
@@ -0,0 +1,425 @@
+                *pragmapush list
+                *pragma list
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision floating point arithmetic package
+;
+; Floating point values are stored in 6 byte packets (single precision) organized as follows:
+; Offset        Length          Contents
+; 0             1               8 bit binary exponent with a bias of 128; 0 means the number is 0
+; 1             4               32 bit significand
+; 5             1               sign flag; zero for positive, 0xff for negative
+;
+; Binary operateions take pointers to their arguments in X and U. Y contains the result location. In all cases, it is
+; safe to specify either one of the arguments as the result location. Unary operations take their operand pointer in
+; X and their result pointer in Y. In all cases, there is an in place version of the unary operation that operates on
+; its input and only needs the single pointer in X.
+;
+; On the topic of optimization: these routines copy their operands to accumulators in the direct page. This saves one
+; cycle per instruction which has a nonzero offset. In addition, this means that the input operands can remain
+; unmodified by the actual operations. For instance, addition requires denormalizing one operand which would otherwise
+; have to be done in place.
+;
+; NOTE: the input pointers X and U may be clobbered.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Convert 32 bit unsigned integer at (X) to single precision floating point at (U)
+fps_fromuint32  clr fpa0+fps.sign               ; set result sign to positive
+                bra fps_fromint32a              ; go do conversion
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Convert 32 bit signed integer at (X) to single precision floating point at value accumulator in (Y)
+fps_fromint32   ldb ,x                          ; set sign based on signed integer
+                sex
+                sta fpa0+fps.sign               ; set result sign to the two's complement sign
+                bpl fps_fromint32a              ; brif positive - no need to mess with bits
+                jsr int32_neg                   ; make the bits positive
+fps_fromint32a  ldb valtype_float               ; set output value type to floating point
+                stb val.type,y
+                ldd ,x                          ; copy integer bits to fpa0
+                std fpa0+fps.sig
+                ldd 2,x
+                std fpa0+fps.sig+2
+                ldb #0xa0                       ; put binary point to the right of the significand
+                stb fpa0+fps.exp
+                clr fpa0extra                   ; clear extra precision
+                jmp fps_add10                   ; go normalize the result and return
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Unary negation - negate (X) to (Y)
+fps_neg         ldd 2,x                         ; copy to output and keep exponent in A
+                std 2,y
+                ldd 4,x
+                std 4,y
+                ldd ,x
+                std ,y
+                tsta                            ; is the number zero?
+                beq fps_neg0                    ; brif so - do nothing
+                com fps.sign,y                  ; flip the sign
+fps_neg0        rts
+fps_negx        lda fps.exp,x                   ; is the number zero?
+                beq fps_negx0                   ; brif so - do nothing
+                com fps.sign,x                  ; flip the sign
+fps_negx0       rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copy (X) to fpa0 and (U) to fpa1
+fps_copyinputs  ldd ,x                          ; copy (X) to fpa0
+                std fpa0
+                ldd 2,x
+                std fpa0+2
+                ldd 4,x
+                std fpa0+4
+                ldd ,u                          ; copy (U) to fpa0
+                std fpa1
+                ldd 2,u
+                std fpa1+2
+                ldd 4,u
+                std fpa1+4
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Subtraction (X) - (U) to (Y)
+fps_sub         bsr fps_copyinputs              ; copy input operands
+                com fpa1+fps.sign               ; negate the subtrahend (don't need to handle zero here)
+                bra fps_add0                    ; go handle it as addition
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Addition (X) + (U) to (Y)
+fps_add         bsr fps_copyinputs              ; copy input operands
+fps_add0        lda fpa1+fps.exp                ; is the second operand 0?
+                bne fps_add1                    ; brif not
+                ldd fpa0                        ; copy first operand to output
+                std ,y
+                ldd fpa0+2
+                std 2,y
+                ldd fpa0+4
+                std 4,y
+                rts
+fps_add1        lda fpa0+fps.exp                ; get exponent of first operand
+                bne fps_add2                    ; brif not zero
+                ldd fpa1                        ; copy second operand to output
+                std ,y
+                ldd fpa1+2
+                std 2,y
+                ldd fpa1+4
+                std 4,y
+                rts
+fps_add2        clr fpa0extra                   ; clear extra precision bits
+                lda fpa0+fps.exp                ; get first operand exponent
+                suba fpa1+fps.exp               ; get difference in exponents
+                beq fps_add8                    ; brif we don't need to denormalize - they're the same
+                blo fps_add3                    ; brif we need to denormalize the first operand
+                ldx #fpa1                       ; point to second operand to denormalize
+                bra fps_add4                    ; go denormalize
+fps_add3        ldb fpa1+fps.exp                ; get exponent of second operand (result exponent)
+                stb fpa0+fps.exp                ; set result exponent
+                ldx #fpa0                       ; point to first operand to denormalize
+                nega                            ; get number of bits to shift as positive number
+fps_add4        suba #8                         ; is there 8 bits left?
+                blo fps_add5                    ; brif not
+                ldb fps.sig+3,x                 ; shift significand 8 bits right
+                stb fpa0extra                   ; save extra precision bits
+                ldb fps.sig+2,x
+                stb fps.sig+3,x
+                ldb fps.sig+1,x
+                stb fps.sig+2,x
+                ldb fps.sig,x
+                stb fps.sig+1,x
+                clr fps.sig,x
+                bra fps_add4                    ; see if we have another byte to shift
+fps_add5        adda #8                         ; adjust for extra subtract above
+                bra fps_add7                    ; do the bit shifting
+fps_add6        lsr fps.sig,x                   ; shift one bit right
+                ror fps.sig+1,x
+                ror fps.sig+2,x
+                ror fps.sig+3,x
+                ror fpa0extra
+                deca                            ; done all of the bits?
+fps_add7        bne fps_add6                    ; brif not
+fps_add8        lda fpa0+fps.sign               ; compare the signs of the operands
+                eora fpa1+fps.sign              ; set A if signs differ
+                bne fps_add9                    ; brif they differ - do subtraction
+                ldd fpa0+fps.sig+2              ; add low word of significands
+                addd fpa1+fps.sig+2
+                std fpa0+fps.sig+2
+                ldd fpa0+fps.sig                ; and the high word
+                adcb fpa1+fps.sig+1
+                adca fpa1+fps.sig
+                std fpa0+fps.sig
+                bcc fps_add9                    ; brif no carry
+                ror fpa0+fps.sig                ; shift carry into significand
+                ror fpa0+fps.sig+1
+                ror fpa0+fps.sig+2
+                ror fpa0+fps.sig+3
+                ror fpa0extra                   ; and the extra bits (for rounding)
+                inc fpa0+fps.exp                ; bump exponent to account for bit shift
+                bne fps_add14                   ; go check for round-off if not overflow
+OVERROR         ldb #err_ov                     ; raise overflow
+                jmp ERROR
+fps_add9        ldd fpa0+fps.sig+2              ; subtract low word
+                subd fpa1+fps.sig+2
+                std fpa0+fps.sig+2
+                ldd fpa0+fps.sig                ; and now the high word
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                std fpa0+fps.sig
+                bcc fps_add10                   ; brif we didn't carry
+                com fpa0+fps.sign               ; flip result sign - other number was bigger
+                com fpa0+fps.sig+3              ; negate two's complement result to be just the magnitude
+                com fpa0+fps.sig+2
+                com fpa0+fps.sig+1
+                com fpa0+fps.sig
+                ldx fpa0+fps.sig+2              ; add 1 to complete negation
+                leax 1,x
+                stx fpa0+fps.sig+2
+                bne fps_add10                   ; brif carry doesn't propagate
+                ldx fpa0+fps.sig                ; propagate carry
+                leax 1,x
+                stx fpa0+fps.sig                ; NOTE: this cannot carry because magnitude got smaller
+fps_add10       clra                            ; initialize exponent offset
+fps_add11       ldb fpa0+fps.sig                ; do we have nonzero bits in high byte of significand?
+                bne fps_add13                   ; brif so
+                ldb fpa0+fps.sig+1              ; shift left 8 bits
+                stb fpa0+fps.sig
+                ldb fpa0+fps.sig+2
+                stb fpa0+fps.sig+1
+                ldb fpa0+fps.sig+3
+                stb fpa0+fps.sig+2
+                ldb fpa0extra                   ; and extra precision bits
+                stb fpa0+fps.sig+3
+                clr fpa0extra
+                addb #8                         ; account for number of bits shifted
+                cmpb #40                        ; done 40 bits?
+                blo fps_add11                   ; brif not - see if we have more bits to shift
+                clr fpa0+fps.exp                ; number underflowed to zero - set it so
+                clr fpa0+fps.sign
+                clr fpa0+fps.sig
+                clr fpa0+fps.sig+1
+                clr fpa0+fps.sig+2
+                clr fpa0+fps.sig+3
+                bra fps_add16                   ; go return result
+fps_add12       inca                            ; account for a bit shift
+                lsl fpa0extra                   ; shift significand and extra bits left
+                rol fpa0+fps.sig+3
+                rol fpa0+fps.sig+2
+                rol fpa0+fps.sig+1
+                rol fpa0+fps.sig
+fps_add13       bpl fps_add12                   ; brif we haven't normalized yet
+fps_add14       ldb fpa0extra                   ; do we need to round?
+                bpl fps_add16                   ; brif not
+                ldx fpa0+fps.sig+2              ; add one to significand
+                leax 1,x
+                stx fpa0+fps.sig+2
+                bne fps_add16                   ; brif no carry
+                ldx fpa0+fps.sig                ; bump the upper word of significand
+                leax 1,x
+                stx fpa0+fps.sig
+                bne fps_add16
+fps_add15       inc fpa0+fps.exp                ; bump exponent
+                beq OVERROR                     ; brif it overflowed
+                lda #0x80                       ; set high bit of significand (rest is zero)
+                sta fpa0+fps.sig
+fps_add16       ldd fpa0                        ; copy result to destination
+                std ,y
+                ldd fpa0+2
+                std 2,y
+                ldd fpa0+4
+                std 4,y
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision multiplication (X) × (U) to (Y)
+fps_mul         lda fps.exp,x                   ; is first operand zero?
+                beq fps_mul0                    ; brif so - return zero
+                lda fps.exp,u                   ; is second operand zero?
+                bne fps_mul1                    ; brif not - have to do the multiply
+fps_mul0        ldd zero                        ; return zero result
+                std ,y
+                std 2,y
+                std 4,y
+                rts
+fps_mul1        jsr fps_copyinputs              ; copy input operands
+                lda fpa0+fps.sign               ; calculate sign of result - xor of the two signs
+                eora fpa1+fps.sign
+                sta fpa0+fps.sign               ; save result sign
+                lda fpa0+fps.exp                ; get exponent of first value
+                adda fpa1+fps.exp               ; calculate new exponent; also cancels the bias
+                rora                            ; set V if C and N differ
+                rola
+                bvc fps_mul3                    ; brif maybe an overflow
+                adda #0x80                      ; add back the bias
+                sta fpa0+fps.sign               ; save new sign
+                beq fps_mul0                    ; brif we underflowed - zero out sign
+; This does a shift-and-add multiplication algorithm. This is slower than an equivalent using MUL but is smaller.
+; The high order bytes will be left in fpa0 with the low order bytes in fpa0extra and fpa0extra[1-3]. The low order
+; bytes are kept for the odd case where extra precision is useful. Uses fpa0extra[4-7] as temporaries.
+fps_mul2        ldd zero                        ; zero out temporary bytes
+                std fpa0extra4
+                std fpa0extra6
+                ldb fpa0+fps.sig+3              ; multiply by low byte of fpa0
+                bsr fps_mul4
+                ldb fpa0extra                   ; move low bites
+                stb fpa0extra3
+                ldb fpa0+fps.sig+2              ; multiply by next higher byte
+                bsr fps_mul4
+                ldb fpa0extra                   ; move low bits
+                stb fpa0extra2
+                ldb fpa0+fps.sig+1              ; and again for the next higher byte
+                bsr fps_mul4
+                ldb fpa0extra
+                stb fpa0extra1
+                ldb fpa0+fps.sig                ; and the high order byte
+                bsr fps_mul4
+                ldd fpa0extra4                  ; copy high order product bits to result
+                std fpa0+fps.sig
+                ldd fpa0extra6
+                std fpa0+fps.sig+2
+                jmp fps_add10                   ; go normalize the result and return
+fps_mul3        bpl fps_mul0                    ; brif we underflowed - return 0
+                jmp OVERROR                     ; raise overflow
+fps_mul4        bne fps_mul5                    ; brif not multiply by zero
+                lda fpa0+fps.sig+3              ; shift 8 bits right
+                sta fpa0extra
+                lda fpa0+fps.sig+2
+                sta fpa0+fps.sig+3
+                lda fpa0+fps.sig+1
+                sta fpa0+fps.sig+2
+                lda fpa0+fps.sig
+                sta fpa0+fps.sig+1
+                clr fpa0+fps.sig
+fps_mul8        rts
+fps_mul5        coma                            ; set C
+fps_mul6        lda fpa0extra4                  ; get high byte of result bytes
+                rorb                            ; is multiplier bit set?
+                beq fps_mul8                    ; brif 8 shifts done (C set above makes sure of that)
+                bcc fps_mul7                    ; brif bit not set - don't do addition
+                lda fpa0extra7                  ; add multiplier (fpa1) to result
+                adda fpa1+fps.sig+3
+                sta fpa0extra7
+                lda fpa0extra6
+                adca fpa1+fps.sig+2
+                sta fpa0extra6
+                lda fpa0extra5
+                adca fpa1+fps.sig+1
+                sta fpa0extra5
+                lda fpa0extra4
+                adca fpa1+fps.sig
+fps_mul7        rora                            ; rotate carry in (from add or 0)
+                sta fpa0extra4
+                ror fpa0extra5
+                ror fpa0extra6
+                ror fpa0extra7
+                ror fpa0extra                   ; and into the extra precision bits
+                clra                            ; clear carry - so shift above will terminate
+                bra fps_mul6                    ; go do another bit
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision division (X) ÷ (U) -> (Y)
+;
+; This is basically the same algorithm used in the Color Basic ROM
+fps_div         lda fps.exp,u                   ; is divisor 0?
+                bne fps_div0
+DIV0ERROR       ldb #err_div0                   ; raise division by zero
+                jmp ERROR
+fps_div0        lda fps.exp,x                   ; is dividend 0?
+                lbeq fps_mul0                   ; brif so - return 0
+                jsr fps_copyinputs              ; copy input values
+                lda fpa0+fps.sign               ; calculate result sign - xor of operand signs
+                eora fpa1+fps.sign
+                sta fpa0+fps.sign               ; save result sign
+                lda fpa1+fps.exp                ; get divisor exponent
+                nega                            ; negate for subtraction
+                adda fpa0+fps.exp               ; subtract it from dividend exponent
+                rora                            ; set V if C and N differ
+                rola
+                bvc fps_mul3                    ; brif overflow or underflow
+                adda #0x80                      ; add back the bias
+                sta fpa0+fps.sign               ; save new sign
+                lbeq fps_mul0                   ; brif we underflowed - zero out sign
+                inca                            ; bump exponent - why?? related to the bias stuff above?
+                lbeq OVERROR                    ; brif it overflows
+                sta fpa0+fps.exp                ; save result exponent
+                ldx #fpa0extra4                 ; point to temporary storage bytes for quotient
+                ldb #4                          ; counter for 4 significand bytes and one extra partial byte
+                stb fpa0extra1                  ; save counter since we need both accumulators
+                ldb #1                          ; shift counter flag and quotient byte
+fps_div1        lda fpa1+fps.sig                ; set C if fpa0 significand <= fpa1 significand
+                cmpa fpa0+fps.sig
+                bne fps_div2
+                lda fpa1+fps.sig+1
+                cmpa fpa0+fps.sig+1
+                bne fps_div2
+                lda fpa1+fps.sig+2
+                cmpa fpa0+fps.sig+2
+                bne fps_div2
+                lda fpa1+fps.sig+3
+                cmpa fpa0+fps.sig+3
+                bne fps_div2
+                coma                            ; set C if values are the same
+fps_div2        tfr cc,a                        ; save C for later, C clear if fpa1 > fpa0
+                rolb                            ; shift carry into quotient
+                bcc fps_div3                    ; brif carry clear - we haven't done 8 bits yet
+                stb ,x+                         ; save quotient byte after a full set of bits
+                dec fpa0extra1                  ; have we done all the bytes?
+                bmi fps_div7                    ; brif all bytes plus extra precision - done all
+                beq fps_div6                    ; brif all main sigificand bytes - do a couple extra bits
+                ldb #1                          ; reset the bit counter flag
+fps_div3        tfr cc,a                        ; get back original carry from compare
+                bcs fps_div5                    ; brif it "went"
+fps_div4        lsl fpa0+fps.sig+3              ; shift dividend left
+                rol fpa0+fps.sig+2
+                rol fpa0+fps.sig+1
+                rol fpa0+fps.sig
+                bcs fps_div2                    ; brif it carries - next bit "goes"
+                bmi fps_div1                    ; check magnitudes of next bit
+                bra fps_div2                    ; carry clear - check another bit
+fps_div5        lda fpa0+fps.sig+3              ; subtract divisor from dividend bits
+                suba fpa1+fps.sig+3
+                sta fpa0+fps.sig+3
+                lda fpa0+fps.sig+2
+                sbca fpa1+fps.sig+2
+                sta fpa0+fps.sig+2
+                lda fpa0+fps.sig+1
+                sbca fpa1+fps.sig+1
+                sta fpa0+fps.sig+1
+                lda fpa0+fps.sig
+                sbca fpa1+fps.sig
+                sta fpa0+fps.sig
+                bra fps_div4                    ; now do the bit shift to line things up
+fps_div6        ldb #0x40                       ; only do two bits of extra precision byte
+                bra fps_div2                    ; go handle these bitsd
+fps_div7        rorb                            ; get extra quotient bits to bit 7,6 and bit 5 set
+                rorb
+                rorb
+                stb fpa0extra                   ; save extra precision bits
+                ldd fpa0extra4                  ; copy result bits to fpa0
+                std fpa0+fps.sig
+                ldd fpa0extra6
+                std fpa0+fps.sig+2
+                jmp fps_add10                   ; go normalize the result
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Pack single precision number at (X) to (U)
+fps_pack        lda fps.sign,x                  ; get sign of number (will be 0 for 0)
+                ora #0x7f                       ; make sure low bits are set for merging
+                anda fps.sig,x                  ; merge with high bits of significand
+                ldb fps.sig+1,x                 ; get upper mid bits of significand
+                std fps.sig,u
+                ldd fps.sig+2,x
+                std fps.sig+2,u
+                lda fps.exp,x
+                sta fps.exp,u
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Unpack single precision number at (X) to (U)
+fps_unpack      lda fps.exp,x                   ; get exponent of value
+                beq fps_unpack0                 ; brif value is zero
+                sta fps.exp,u
+                ldb fps.sig,x                   ; get high byte of significand
+                sex                             ; make sign value in A
+                sta fps.sign,u                  ; set sign in result
+                ldd fps.sig,x                   ; get high word of sifnificand
+                ora #0x80                       ; make sure high bit is set
+                std fps.sig,u                   ; save high word in result
+                ldd fps.sig+2,x                 ; copy middle bytes of significand
+                std fps.sig+2,u
+                rts
+fps_unpack0     sta fps.exp,u                   ; zero out destination
+                sta fps.sig,u
+                sta fps.sig+1,u
+                sta fps.sig+2,u
+                sta fps.sig+3,u
+                sta fps.sign,u
+                rts
+                *pragmapop list
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/int.s	Sat Oct 07 02:56:59 2023 -0600
@@ -0,0 +1,323 @@
+                *pragmapush list
+                *pragma list
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; 32 bit integer handling package.
+;
+; Negate a 32 bit integer in (X); done by subtracting it from zero
+int32_neg       ldd zero                        ; subtract low word
+                subd val.int+2,x
+                std val.int+2,x
+                ldd zero                        ; and now the high word
+                sbcb val.int+1,x
+                sbca val.int,x
+                std val.int,x
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; 32 bit integer addition (X) + (U) -> (Y)
+int32_add       ldd val.int+2,x                 ; do low word
+                addd val.int+2,u
+                std val.int+2,y
+                ldd val.int,x                   ; and the high word
+                adcb val.int+1,u
+                adca val.int,u
+                std val.int,y
+                bvc int32_add0                  ; raise overflow if needed
+OVERROR2        jmp OVERROR
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; 32 bit integer subtraction (X) - (U) -> (Y)
+int32_sub       ldd val.int+2,x                 ; do low word
+                subd val.int+2,u
+                std val.int+2,y
+                ldd val.int,x                   ; and the high word
+                sbcb val.int+1,u
+                sbca val.int,u
+                std val.int,y
+                bvs OVERROR2                    ; raise overflow if needed
+int32_add0      rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Signed 32 bit integer multiply (X) * (U) -> (Y), overflow if exceeds signed 32 bit range
+int32_mul       ldd val.int+2,x                 ; copy left operand to temporary
+                std fpa0+fps.sig+2
+                ldd val.int,x
+                std fpa0+fps.sig
+                eora val.int,u                  ; set sign bit in A if signs differ
+                pshs a                          ; save result sign
+                ldd val.int+2,u                 ; copy right operand to temporary
+                std fpa1+fps.sig+2
+                ldd val.int,u
+                std fpa1+fps.sig
+                bpl int32_mul0                  ; brif right operand is positive
+                ldd zero                        ; negate right operand
+                subd fpa1+fps.sig+2
+                std fpa1+fps.sig+2
+                ldd zero
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                std fpa1+fps.sig
+int32_mul0      lda fpa0+fps.sig                ; is left operand negative?
+                bpl int32_mul1                  ; brif not
+                ldd zero                        ; negate left operand
+                subd fpa0+fps.sig+2
+                std fpa0+fps.sig+2
+                ldd zero
+                sbcb fpa0+fps.sig+1
+                sbca fpa0+fps.sig
+                std fpa0+fps.sig
+int32_mul1      bsr util_mul32                  ; do the actual multiplication
+                ldb fpa0extra                   ; are upper bits all zero?
+                orb fpa0extra1
+                orb fpa0extra2
+                orb fpa0extra3
+                bne OVERROR2                    ; brif not - overflowed
+                ldb fpa0extra4                  ; is bit 31 set?
+                bpl int32_mul2                  ; brif not - no overflow
+                lda ,s                          ; negative result wanted?
+                bpl OVERROR2                    ; brif not - we overflowed
+                andb #0x7f                      ; lose extra sign bit
+                orb fpa0extra5                  ; "or" in other bytes to see if all but bit 31 are zero
+                orb fpa0extra6
+                orb fpa0extra7
+                bne OVERROR2                    ; brif any nonzero bits - we overflowed maximum negative number
+                ldb ,s+                         ; do we want a negative result?
+                bpl int32_mul2                  ; brif not
+                ldd zero                        ; negate result
+                subd fpa0extra6
+                std fpa0extra6
+                ldd zero
+                sbcb fpa0extra5
+                sbca fpa0extra4
+                std fpa0extra4
+int32_mul2      ldd fpa0extra4                  ; copy result to destination
+                std val.int,y
+                ldd fpa0extra6
+                std val.int+2,y
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; 32 bit multiply.
+;
+; Significands of fpa0 and fpa1, treated as unsigned, are multiplied with the product being stored in the fpa0extra
+; memory locations.
+;
+; The agorithm is simply this: zero out the result, then multiply fpa0 by each byte of fpa1 and then add the result
+; to the result location. This yields a 64 bit product which is somewhat wasteful.
+util_mul32      ldd zero                        ;* zero out result bits; low 16 bits don't need to be cleared and
+                stb fpa0extra3                  ;* upper 24 bits also don't
+                std fpa0extra4
+                ldb fpa1+fps.sig+3              ; multiply by low byte of fpa1 - no carries possible for this iteration
+                lda fpa0+fps.sig+3
+                mul
+                std fpa0extra6
+                ldb fpa1+fps.sig+3
+                lda fpa0+fps.sig+2
+                mul
+                addd fpa0extra5
+                std fpa0extra5
+                ldb fpa1+fps.sig+3
+                lda fpa0+fps.sig+1
+                mul
+                addd fpa0extra4
+                std fpa0extra4
+                ldb fpa1+fps.sig+3
+                lda fpa0+fps.sig
+                mul
+                addd fpa0extra3
+                std fpa0extra3
+; Now we potentially have cascading carries at every stage; it makes more sense to handle those in a separate
+; addition pass after each partial calculation. The partial calculations are identical to above. This is completely
+; unrolled for speed.
+                ldd zero                        ; zero out extra work bytes
+                std fpa0extra8
+                stb fpa0extra10
+                ldb fpa1+fps.sig+2              ; multiply by second low byte of fpa1
+                lda fpa0+fps.sig+3
+                mul
+                std fpa0extra11
+                ldb fpa1+fps.sig+2
+                lda fpa0+fps.sig+2
+                mul
+                addd fpa0extra10
+                std fpa0extra10
+                ldb fpa1+fps.sig+2
+                lda fpa0+fps.sig+1
+                mul
+                addd fpa0extra9
+                std fpa0extra9
+                ldb fpa1+fps.sig+2
+                lda fpa0+fps.sig
+                mul
+                addd fpa0extra8
+                std fpa0extra8
+                ldd fpa0extra11                 ; add to partial product (shifted left 8 bits)
+                addd fpa0extra5
+                std fpa0extra5
+                ldd fpa0extra9
+                adcb fpa0extra4
+                adca fpa0extra3
+                std fpa0extra3
+                ldb #0
+                adcb fpa0extra8
+                stb fpa0extra2
+                ldd zero                        ; and do it all again for next byte of fpa1
+                std fpa0extra8
+                stb fpa0extra10
+                ldb fpa1+fps.sig+1
+                lda fpa0+fps.sig+3
+                mul
+                std fpa0extra11
+                ldb fpa1+fps.sig+1
+                lda fpa0+fps.sig+2
+                mul
+                addd fpa0extra10
+                std fpa0extra10
+                ldb fpa1+fps.sig+1
+                lda fpa0+fps.sig+1
+                mul
+                addd fpa0extra9
+                std fpa0extra9
+                ldb fpa1+fps.sig+1
+                lda fpa0+fps.sig
+                mul
+                addd fpa0extra8
+                std fpa0extra8
+                ldd fpa0extra11
+                addd fpa0extra4
+                std fpa0extra4
+                ldd fpa0extra9
+                adcb fpa0extra3
+                adca fpa0extra2
+                std fpa0extra2
+                ldb #0
+                adcb fpa0extra8
+                stb fpa0extra1
+                ldd zero                        ; and the final sequence with the fpa1 high byte
+                std fpa0extra8
+                stb fpa0extra10
+                ldb fpa1+fps.sig
+                lda fpa0+fps.sig+3
+                mul
+                std fpa0extra11
+                ldb fpa1+fps.sig
+                lda fpa0+fps.sig+2
+                mul
+                addd fpa0extra10
+                std fpa0extra10
+                ldb fpa1+fps.sig
+                lda fpa0+fps.sig+1
+                mul
+                addd fpa0extra9
+                std fpa0extra9
+                ldb fpa1+fps.sig
+                lda fpa0+fps.sig
+                mul
+                addd fpa0extra8
+                std fpa0extra8
+                ldd fpa0extra11
+                addd fpa0extra3
+                std fpa0extra3
+                ldd fpa0extra9
+                adcb fpa0extra2
+                adca fpa0extra1
+                std fpa0extra1
+                ldb #0
+                adcb fpa0extra
+                stb fpa0extra
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; 32 bit division, integer only, truncate fraction without rounding. Note that there is exactly one case where integer
+; division can overflow: dividing -0x80000000 by -1 which yields 0x80000000. All other cases reduce the magnitude.
+int32_div       ldd val.int+2,x                 ; copy left operand to temporary
+                std fpa0+fps.sig+2
+                ldd val.int,x
+                std fpa0+fps.sig
+                eora val.int,u                  ; set sign bit in A if signs differ
+                pshs a                          ; save result sign
+                ldd val.int+2,u                 ; copy right operand to temporary
+                std fpa1+fps.sig+2
+                ldd val.int,u
+                std fpa1+fps.sig
+                bpl int32_div0                  ; brif right operand is positive
+                ldd zero                        ; negate right operand
+                subd fpa1+fps.sig+2
+                std fpa1+fps.sig+2
+                ldd zero
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                std fpa1+fps.sig
+int32_div0      lda fpa0+fps.sig                ; is left operand negative?
+                bpl int32_div1                  ; brif not
+                ldd zero                        ; negate left operand
+                subd fpa0+fps.sig+2
+                std fpa0+fps.sig+2
+                ldd zero
+                sbcb fpa0+fps.sig+1
+                sbca fpa0+fps.sig
+                std fpa0+fps.sig
+int32_div1      ldb fpa1+fps.sig                ; check for division by zero
+                orb fpa1+fps.sig+1
+                orb fpa1+fps.sig+2
+                orb fpa1+fps.sig+3
+                lbne DIV0ERROR                  ; brif division by zero
+                bsr util_div32                  ; do the actual division
+                lda ,s+                         ; get desired sign
+                bmi int32_div2                  ; brif want negative - we can't overflow in that case
+                ldb fpa0extra                   ; get high byte of result
+                lbmi OVERROR2                   ; brif we ended up with 0x80000000 positive
+                bra int32_div3                  ; go return result
+int32_div2      ldd zero                        ; negate result to correct sign
+                subd fpa0extra+2
+                std fpa0extra+2
+                ldd zero
+                sbcb fpa0extra+1
+                sbca fpa0extra
+                std fpa0extra
+int32_div3      ldd fpa0extra                   ; copy result to destination
+                std val.int,y
+                ldd fpa0extra2
+                std val.int+2,y
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Divide 32 bit integer in fpa0 significand by 32 bit integer in fpa1 significand, both treated as unsigned. Leave
+; quotient at fpa0extra...fpa0extra3 and remainder at fpa0extra4...fpa0extra7; does not check for division by zero
+; which will result in a quotient of 0xffffffff and a remainder will be the dividend. It will not get suck in a loop.
+;
+; Algorithm is basically pencil and paper long division. We check to see if the divisor "goes" at each step by doing
+; a trial subtraction without saving the result. If it doesn't go, we just loop around again. If it does go, we stash
+; a 1 bit in the quotient and actually do the subtraction. Then go loop around again. Doing it this way rather than
+; with an actual subtraction and then undoing it with addition saves two store instructions on the comparison saves
+; having to do a restore in the no-go case which is going to be quite common with values whose upper bits are
+; mostly zeroes, thus it makes the operations faster in that case, for integers. (Floating point is a different
+; problem.)
+util_div32      ldd fpa0+fps.sig+2              ; copy dividend to result location
+                std fpa0extra6
+                ldd fpa0+fps.sig
+                std fpa0extra4
+                ldb #32                         ; do 32 bits
+                stb fpa0+fps.exp                ; save counter somewhere because we don't have enough registers
+                ldd zero                        ; zero out remainder
+                std fpa0extra4
+                std fpa0extra6
+util_div32a     lsl fpa0extra3                  ; shift dividend residue into remainder
+                rol fpa0extra2
+                rol fpa0extra1
+                rol fpa0extra
+                rol fpa0extra7
+                rol fpa0extra6
+                rol fpa0extra5
+                rol fpa0extra4
+                ldd fpa0extra6                  ; now subtract divisor from remainder
+                subd fpa1+fps.sig+2
+                ldd fpa0extra4
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                bcs util_div32b                 ; brif it doesn't go - need to restore
+                inc fpa0extra3                  ; set quotient bit
+                ldd fpa0extra6                  ; actuall do the subtraction
+                subd fpa1+fps.sig+2
+                std fpa0extra6
+                ldd fpa0extra4
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                std fpa0extra4
+util_div32b     dec fpa0+fps.exp                ; done all 32 bits?
+                bne util_div32a                 ; do another
+                *pragmapop list
--- a/src/lwbasic.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/lwbasic.s	Sat Oct 07 02:56:59 2023 -0600
@@ -48,6 +48,8 @@
                 include error.s
                 include expr.s
                 include number.s
+                include int.s
+                include fps.s
                 include token.s
                 include miscdata.s
                 include keywords.s
--- a/src/number.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/number.s	Sat Oct 07 02:56:59 2023 -0600
@@ -3,7 +3,8 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Arithmetic package
 ;
-; This section contains routines that handle floating point and integer arithmetic.
+; This section contains routines that handle floating point and integer arithmetic. It mostly delegates to int.s and
+; fps.s.
 ;
 ; Most routines take a pointer to a value accumulator in X. Some take two pointers with the second in U.
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,7 +12,7 @@
 ;
 ; * If both operands are the same, ensure the type is numeric and return
 ; * If one operand is floating point, convert the other to floating point, as long as it is numeric
-; * If one or both oeprands are not numeric, raise a type mismatch
+; * If one or both operands are not numeric, raise a type mismatch
 ; The operands are in (X) and (U)
 val_matchtypes  ldb val.type,x                  ; get the type of first argument
                 cmpb #valtype_int               ; is it integer?
@@ -23,244 +24,80 @@
 val_matchtypes0 ldb val.type,u                  ; get type of second operand
                 cmpb #valtype_int               ; is it integer?
                 bne val_matchtypes2             ; brif not
-val_matchtypes3 rts
+val_matchtypes3 rts                             ; both types int - we're good so return
 val_matchtypes2 cmpb #valtype_float             ; is it floating point?
                 bne TMERROR                     ; brif not - raise error
-                pshs u                          ; save pointer to second operand
-                bsr val_int32tofp               ; convert first argument to floating point
-                puls u,pc                       ; restore second operand pointer and return
+                pshs x                          ; save X which may be clobbered
+                leay ,x                         ; point to input operand as destination for conversion
+                jsr fps_fromint32               ; convert first argument to floating point
+                puls x,pc                       ; restore second operand pointer and return
 val_matchtypes1 ldb val.type,u                  ; get second argument type
                 cmpb #valtype_float             ; is it floating point?
                 beq val_matchtypes3             ; brif so - we're good
                 cmpb #valtype_int               ; is it integer?
                 bne TMERROR                     ; brif not - invalid type combination
-                pshs x,u                        ; save value pointers
+                pshs x                          ; save X which mill be clobbered
                 leax ,u                         ; convert (U) to floating point
-                bsr val_int32tofp
-                puls x,u,pc                     ; restore argument pointers and return
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Negate the 32 bit integer (for fp mantissa) at (X)
-val_negint32    ldd zero                        ; subtract integer value from zero
-                subd val.int+2,x
-                std val.int+2,x
-                ldd zero
-                sbcb val.int+1,x
-                sbca val.int,x
-                std val.int,x
-                rts
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Convert integer value at (X) to floating point value at (X). Enter at val_uint32tofp to treat the 32 bit value as
-; unsigned. Otherwise enter at val_int32tofp to treat it as signed.
-val_uint32tofp  clr val.fpsign,x                ; for positive sign
-                bra val_int32tofpp              ; go process as positive
-val_int32tofp   ldb val.int,x                   ; get sign to A
-                sex
-                sta val.fpsign,x                ; set sign of result
-                bpl val_int32tofpp              ; brif positive - don't need to do a two's complement adjustment
-                bsr val_negint32                ; negate the integer value
-val_int32tofpp  ldb valtype_float               ; set result to floating point
-                stb val.type,x
-                ldb #0xa0                       ; exponent to have binary point to the right of the mantissa
-                stb val.fpexp,x                 ; set the exponent
-                clrb                            ; clear out extra precision bits
-                ; fall through to normalize the value at (X)
+                leay ,u
+                jsr fps_fromint32
+                puls x,pc                       ; restore argument pointer and return
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Normalize floating point value at (X); this will shift the mantissa until there is a one in the leftmost
-; bit of the mantissa. The algorithm is as follows:
-;
-; 1. Shift the mantissa left until a 1 bit is found in the high bit of the mantissa.
-; 1a. If more than 40 bits of left shifts occur, determine that the value is zero and return
-; 2. Adjust exponent based on number of shifts
-; 2a. If new exponent went below -127, then underflow occurred and zero out value
-; 2b. If new exponent went above +127, raise an overflow
-; 3. If bit 7 of the extra precision byte is clear, return the resulting value
-; 4. Add one to the mantissa
-; 5. If a carry in (4) occurred, then set high bit of mantissa and bump exponent
-; 6. If new exponent carries, then raise overflow
-; 7. Return result.
-;
-; Note that if we carried in (4), the only possible result is that the mantissa
-; rolled over to all zeroes so there is no need to shift the entire mantissa right
-; nor is there any reason to check for additional rounding.
-;
-; The above algorithm has some optimizations in the code sequence below.
-fp_normalize    pshs b                          ; save extra bits
-                clrb                            ; set shift counter/exponent adjustment
-fp_normalize0   lda val.fpmant,x                ; set flags on high word of mantissa
-                bne fp_normalize2               ; brif we don't have a full byte to shift
-                addb #8                         ; account for a while byte of shifts
-                ldu val.fpmant+1,x              ; shift mantissa left 8 bits
-                stu val.fpmant,x
-                lda val.fpmant+3,x
-                sta val.fpmant+2,x
-                lda ,s                          ; and include extra bits
-                sta val.fpmant+3,x
-                clr ,s                          ; and blank extra bits
-                cmpb #40                        ; have we shifted 40 bits?
-                blo fp_normalize0               ; brif not - keep shifting
-                bra fp_normalize7               ; go zero out the value
-fp_normalize1   incb                            ; account for one bit of shifting
-                lsl ,s                          ; shift mantissa and extra bits left (will not be more than 7 shifts)
-                rol val.fpmant+3,x
-                rol val.fpmant+2,x
-                rol val.fpmant+1,x
-                rol val.fpmant,x
-fp_normalize2   bpl fp_normalize1               ; brif we have to do a bit shift
-                pshs b                          ; apply exponent counter to exponent
-                lda val.fpexp,x
-                suba ,s+
-                bls fp_normalize6               ; brif we underflowed to zero; overflow is impossible
-fp_normalize3   lsl ,s+                         ; set C if the high bit of extra precision is set
-                bcs fp_normalize5               ; brif bit set - we have to do rounding
-fp_normalize4   rts                             ; return if no rounding
-fp_normalize5   ldu val.fpmant+2,x              ; add one to mantissa
-                leau 1,u
-                stu val.fpmant+2,x
-                bne fp_normalize4               ; brif low word doesn't carry
-                ldu val.fpmant,x
-                leau 1,u
-                stu val.fpmant,x
-                bne fp_normalize4               ; brif high word doesn't carry
-                ror val.fpmant,x                ; shift right C in to high bit of mantissa (already set to get here)
-                inc val.fpexp,x                 ; bump exponent for a right shift
-                bne fp_normalize4               ; brif it doesn't overflow (> +127)
-OVERROR2        jmp OVERROR                     ; raise overflow
-fp_normalize6   clr val.fpmant,x                ; clear mantissa
-                clr val.fpmant+1,x
-                clr val.fpmant+2,x
-                clr val.fpmant+3,x
-fp_normalize7   clr val.fpexp,x                 ; clear exponent and sign
-                clr val.fpsign,x
-                puls b,pc                       ; clean up stack and return
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Addition and subtraction of values; must enter with values of matching types
+; Addition and subtraction of values; must enter with values of matching types and the result type already set
+; to the correct type.
 ;
 ; Calculates (X) + (U) -> (Y) (addition)
 ; Calculates (X) - (U) -> (Y) (subtraction)
 val_add         ldb val.type,x                  ; get type of left operand
-                stb val.type,y                  ; set result type
-                cmpb #valtype_float             ; is it float?
-                beq fp_add                      ; brif so
-                ldd val.int+2,x                 ; do the addition
-                addd val.int+2,u
-                std val.int+2,y
-                ldd val.int,x
-                adcb val.int+1,u
-                adca val.int,u
-                std val.int,y
-                bvs OVERROR2                    ; brif calculation overflowed
-                rts
+                cmpb valtype_int                ; is it integer?
+                lbeq int32_add                  ; brif so - do integer addition
+                cmpb #valtype_float             ; floating point?
+                lbeq fps_add                    ; brif so - do floating point addition
+                jmp TMERROR                     ; we have a type we don't understand
 val_sub         ldb val.type,x                  ; get type of left operand
-                stb val.type,y                  ; set result type
+                cmpb valtype_int                ; is it integer?
+                lbeq int32_sub                  ; brif so - do integer addition
                 cmpb #valtype_float             ; floating point?
-                beq fp_sub                      ; brif so
-                ldd val.int+2,x                 ; do the subtraction
-                subd val.int+2,u
-                std val.int+2,y
-                ldd val.int,x
-                sbcb val.int+1,u
-                sbca val.int,u
-                std val.int,y
-                bvs OVERROR2                    ; brif overflow
-                rts
+                lbeq fps_sub                    ; brif so - do floating point addition
+                jmp TMERROR                     ; we have a type we don't understand
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Multiplication
+;
+; Calculates (X) × (U) -> (Y)
+;
+; The result might overflow the integer type. In this case, an actual overflow error will occur.
+val_mul         ldb val.type,x                  ; get type of left operand
+                cmpb #valtype_int               ; integer?
+                lbeq int32_mul                  ; brif so - do integer multiplication
+                cmpb #valtype_float             ; is it float?
+                lbeq fps_mul                    ; brif so - do floating point multiplication
+                jmp TMERROR                     ; have an unhandled type - bail on it
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; FP subtraction: just invert the sign of the second operand and add; operands must be writable and they should be
-; considered to be clobbered
-fp_sub          com val.fpsign,u                ; negate right operand
-                ; fall through to addition
-; FP addition: this requires that *both operands* are writable and they may be clobbered
-fp_add          ldb val.fpexp,u                 ; is the second operand zero?
-                beq fp_add0                     ; brif so - it's a no-op - copy the left operand to the output
-                lda val.fpexp,x                 ; is left operand zero?
-                bne fp_add1                     ; brif not - we have to do the add
-                leau ,x                         ; copy the right operand to the output
-fp_add0         ldd ,u                          ; copy the value across
-                std ,y
-                ldd 2,u
-                std 2,y
-                ldd 4,u
-                std 4,y
-                rts
-fp_add1         subb val.fpexp,x                ; get difference in exponents
-                beq fp_add6                     ; brif they're the same - no denormalizing is needed
-                bhi fp_add2                     ; brif second one is bigger, need to right-shift the mantissa of first
-                exg x,u                         ; swap the operands (we can do that for addition)l second is now biggest
-                negb                            ; invert the shift count
-fp_add2         cmpb #32                        ; are we shifting more than 32 bits?
-                blo fp_add0                     ; brif so - we're effectively adding zero so bail out
-fp_add3         cmpb #8                         ; have 8 bits to move?
-                blo fp_add5                     ; brif not
-                lda val.fpmant+2,x              ; shift 8 bits right
-                sta val.fpmant+3,x
-                lda val.fpmant+1,x
-                sta val.fpmant+2,x
-                lda val.fpmant,x
-                sta val.fpmant+1,x
-                clr val.fpmant,x
-                subb #8                         ; account for 8 shifts
-                bra fp_add3                     ; see if we have a whole byte to shift
-fp_add4         lsr val.fpmant,x                ; shift right one bit
-                ror val.fpmant+1,x
-                ror val.fpmant+2,x
-                ror val.fpmant+3,x
-fp_add5         decb                            ; done all shifts?
-                bmi fp_add4                     ; brif not - do a shift
-fp_add6         ldb val.fpexp,u                 ; set exponent of result
-                stb val.fpexp,y
-                ldb val.fpsign,u                ; fetch sign of larger value
-                stb val.fpsign,y                ; set result sign
-                cmpb val.fpsign,x
-                bne fp_add8                     ; brif not - need to subtract the operands
-                ldd val.fpmant+2,u              ; add the mantissas
-                addd val.fpmant+2,x
-                std val.fpmant+2,y
-                ldd val.fpmant,u
-                adcb val.fpmant+1,x
-                adca val.fpmant,x
-                std val.fpmant,y
-                ldb #0                          ; clear extra precision bits (preserve carry)
-                bcc fp_add7                     ; brif no carry
-                ror val.fpmant,y                ; shift carry into mantissa
-                ror val.fpmant+1,y
-                ror val.fpmant+2,y
-                ror val.fpmant+3,y
-                rorb                            ; keep bits for founding
-                inc val.fpexp,y                 ; bump exponent to account for shift
-                beq OVERROR2                    ; brif it overflowed
-fp_add7         leax ,y                         ; point to result
-                jmp fp_normalize                ; go normalize the result
-fp_add8         ldd val.fpmant+2,u              ; subtract operands
-                subd val.fpmant+2,x
-                std val.fpmant+2,y
-                ldd val.fpmant,u
-                sbcb val.fpmant+1,x
-                sbca val.fpmant,x
-                std val.fpmant,y
-                bcc fp_add7                     ; brif we didn't carry - no need to fix up
-                ldd zero                        ; negate the mantissa bits since we use sign+magnitude
-                subd val.fpmant+2,y
-                std val.fpmant+2,y
-                ldd zero
-                sbcb val.fpmant+1,y
-                sbca val.fpmant,y
-                std val.fpmant,y
-                neg val.fpsign,y                ; invert sign of result since we went past zero
-                clrb                            ; clear extra precision bits
-                bra fp_add7                     ; go normalize the result and return
+; Division
+;
+; Calculates (X) ÷ (U) -> (Y)
+;
+; The integer operation simply truncates the result ("rounds toward zero")
+val_div         ldb val.type,x                  ; get type of left operand
+                cmpb #valtype_int               ; integer?
+                lbeq int32_div                  ; brif so - do integerdivision
+                cmpb #valtype_float             ; floating point?
+                lbeq fps_div                    ; brif so - do floating point division
+                jmp TMERROR                     ; unsupported type
+                if 0
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Pack a floating point value at (X)
-fp_packval      ldb val.fpsign,x                ; get sign
-                bmi fp_packval                  ; brif negative - the default 1 bit will do
-                ldb val.fpmant,x                ; clear high bit of mantissa for positive
-                andb #0x7f
-                stb val.fpmant,x
-fp_packval0     rts
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Unpack a floating point value at (X)
-fp_unpackval0   ldb val.fpmant,x                ; get high byte of mantissa
-                sex                             ; now A is value for sign byte
-                sta val.fpsign,x                ; set sign
-                orb #0x80                       ; set high bit of mantissa
-                stb val.fpmant,x
-                rts
+; Modulus - note that this is a division operator returning effectively the remainder, not an absolute value as is
+; sometimes meant by "modulus".
+;
+; Calculates (X) <MOD> (U) -> (Y)
+;
+; Note: modulus will have the same sign as the quotient so that (U) * [(X) / (U)] + [(X) MOD (U)] gives (X) (integer)
+; Note2: the modulus can be calculated on floating point values in which case it will represent the fraction part
+;    of the quotient multiplied by the divisor, again with the same sign as the quotient
+val_mod         ldb val.type,x                  ; get type of left operand
+                cmpb #valtype_int               ; integer?
+                lbeq int32_mod                  ; do integer modulus
+                cmpb #valtype_float             ; floating point?
+                lbeq fps_mod                    ; floating point modulus
+                jmp TMERROR                     ; unsupported type
+                endc
                 *pragmapop list
--- a/src/print.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/print.s	Sat Oct 07 02:56:59 2023 -0600
@@ -66,7 +66,7 @@
                 lda #0x20                       ; default sign (positive)
                 ldb val0+val.int                ; is it negative?
                 bpl cmd_printint0               ; brif not
-                jsr val_negint32                ; negate the integer
+                jsr int32_neg                   ; negate the integer
                 lda #'-                         ; negative sign
 cmd_printint0   sta ,y+                         ; save sign
                 ldu #cmd_printintpc             ; point to positive constant table
--- a/src/vars.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/vars.s	Sat Oct 07 02:56:59 2023 -0600
@@ -39,8 +39,29 @@
 tok_kwnum       rmb 1                           ; the actual token number
 tok_kwmatchl    rmb 1                           ; the length of the best match during lookup
 tok_kwmatch     rmb 2                           ; the current best matched token number
-val0            rmb val.size                    ; value accumulator 0
-val1            rmb val.size                    ; value accumulator 1
+; General value accumulators used during expression evaluation
+val0            rmb val.size                    ; value accumulator 0 - current expression value
+val1            rmb val.size                    ; value accumulator 1 - usually left operand of binary operator
+; The fpa0 and fpa1 areas are used for scratch work during floating point operations. They are only used
+; by floating point operations. This saves a fair fiew clock cycles over simply working off the index register
+; pointers passed into the routines and it also allows for being able to leave the input operands for the
+; routines unmodified, or to overlap the input and output operands. These floating point accumulators can hold
+; the maximum precision floating point values used by the system.
+fpa0            rmb fpa.size                    ; floating point accumulator 1
+fpa1            rmb fpa.size                    ; floating point accumulator 1
+fpa0extra       rmb 1                           ; "extra" bytes for calculations
+fpa0extra1      rmb 1
+fpa0extra2      rmb 1
+fpa0extra3      rmb 1
+fpa0extra4      rmb 1
+fpa0extra5      rmb 1
+fpa0extra6      rmb 1
+fpa0extra7      rmb 1
+fpa0extra8      rmb 1
+fpa0extra9      rmb 1
+fpa0extra10     rmb 1
+fpa0extra11     rmb 1
+fpa0extra12     rmb 1
                 rmb 0x71-*                      ; align RSTFLG/RSTVEC for stock ROM compatibility
 RSTFLG          rmb 1                           ; 0x55 if RSTVEC is valid
 RSTVEC          rmb 2                           ; points to warm start routine (must start with NOP)