# HG changeset patch
# User William Astle <lost@l-w.ca>
# Date 1696669019 21600
# Node ID bb50ac9fdf37b6fc3cebba2542b476efd7fc7613
# Parent  df86e6d64ce252edbdf025849678e1b8ead60fb8
Checkpoint with very basic integer and floating point arithmetic, untested

This commit has implementations for floating point add, subtract, multiply,
and divide, along with 32 bit signed integer equivalents. These can probably
be optimized and they are untested.

diff -r df86e6d64ce2 -r bb50ac9fdf37 Makefile
--- a/Makefile	Sun Sep 10 23:18:17 2023 -0600
+++ b/Makefile	Sat Oct 07 02:56:59 2023 -0600
@@ -1,15 +1,15 @@
 .PHONY: all
 all: bin/lwbasic.rom bin/lwbasic-coco2b.rom bin/lwbasic-coco3.rom bin/coco2.zip bin/coco2b.zip bin/coco3.zip
 
-lwb_srcs := consscr.s defs.s error.s expr.s genio.s init.s interp.s irq.s keyb.s keywords.s miscdata.s number.s print.s progctrl.s token.s vars.s
+lwb_srcs := consscr.s defs.s error.s expr.s fps.s genio.s init.s int.s interp.s irq.s keyb.s keywords.s miscdata.s number.s print.s progctrl.s token.s vars.s
 lwb_srcs := $(addprefix src/,$(lwb_srcs))
 
 bin/lwbasic.rom: src/lwbasic.s $(lwb_srcs)
-	lwasm --tabs=16 --raw --list=src/lwbasic-coco2.list --symbols --output=bin/lwbasic.rom src/lwbasic.s
+	lwasm --6809 --tabs=16 --raw --list=src/lwbasic-coco2.list --symbols --output=bin/lwbasic.rom src/lwbasic.s
 bin/lwbasic-coco2b.rom: src/lwbasic.s $(lwb_srcs)
-	lwasm --tabs=16 --raw --list=src/lwbasic-coco2b.list --symbols --output=bin/lwbasic-coco2b.rom -DCOCO2B=1 src/lwbasic.s
+	lwasm --6809 --tabs=16 --raw --list=src/lwbasic-coco2b.list --symbols --output=bin/lwbasic-coco2b.rom -DCOCO2B=1 src/lwbasic.s
 bin/lwbasic-coco3.rom: src/lwbasic.s $(lwb_srcs)
-	lwasm --tabs=16 --raw --list=src/lwbasic-coco3.list --symbols --output=bin/lwbasic-coco3.rom -DCOCO3=1 src/lwbasic.s
+	lwasm --6809 --tabs=16 --raw --list=src/lwbasic-coco3.list --symbols --output=bin/lwbasic-coco3.rom -DCOCO3=1 src/lwbasic.s
 
 .PHONY: clean
 clean:
diff -r df86e6d64ce2 -r bb50ac9fdf37 src/defs.s
--- a/src/defs.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/defs.s	Sat Oct 07 02:56:59 2023 -0600
@@ -45,20 +45,27 @@
 valtype_int     equ 1                           ; integer (32 bit) value (signed)
 valtype_float   equ 2                           ; float type (40 bit) value
 valtype_string  equ 3                           ; string type (16 bit length, 16(32) bit data pointer
-; Value accumulator structure definitions
-;
-; Notes:
-;
-; Much code using value accumulators depends on the specific layout of this structure so reorganizing it is dangerous.
-; Notably, the integer value and floating point mantissa must be at the same offset.
-val.type        equ 6                           ; value type offset
-val.fpexp       equ 0                           ; fp exponent offset
-val.fpmant      equ 1                           ; fp mantissa offset
-val.fpsign      equ 5                           ; fp sign offset
-val.int         equ 1                           ; integer offset
-val.strlen      equ 0                           ; string length offset
-val.strptr      equ 4                           ; string data pointer (low word)
-val.size        equ 7                           ; size of a value accumulator
+; Floating point accumulator structure definitions
+fps.exp         equ 0                           ; single precision exponent
+fps.sig         equ fps.exp+1                   ; single precision significand
+fps.sign        equ fps.sig+4                   ; single precision sign
+fps.size        equ fps.sign+1
+fpa.size        equ fps.size                    ; use the largest floating point accumulator size
+; String data definition
+str.len         equ 0                           ; string length (2 bytes)
+str.ptr         equ str.len+2                   ; string data pointer (3 bytes)
+str.size        equ str.ptr+3
+; Value accumulator structure definitions; note that the actual value data must be first and any
+; incidental meta data must follow
+val.value       equ 0                           ; offset of the value stored in the accumulator
+val.fpsexp      equ val.value+fps.exp           ; floating point exponent
+val.fpssig      equ val.value+fps.sig           ; floating point significand
+val.fpssign     equ val.value+fps.sign          ; floating point sign
+val.int         equ val.value                   ; integer offset
+val.strlen      equ val.value+str.len           ; string length offset
+val.strptr      equ val.value+str.ptr           ; string data pointer (low word)
+val.type        equ val.value+fps.size          ; use the largest of the data types here
+val.size        equ val.type+1                  ; size of a value accumulator
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
                 ifdef COCO3
 ; GIME INIT0
diff -r df86e6d64ce2 -r bb50ac9fdf37 src/error.s
--- a/src/error.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/error.s	Sat Oct 07 02:56:59 2023 -0600
@@ -61,4 +61,6 @@
                 fcn 'Overflow'
                 deferr tm
                 fcn 'Type mismatch'
+                deferr div0
+                fcn 'Division by zero'
                 *pragmapop list
diff -r df86e6d64ce2 -r bb50ac9fdf37 src/expr.s
--- a/src/expr.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/expr.s	Sat Oct 07 02:56:59 2023 -0600
@@ -88,8 +88,10 @@
 ; 15. Read a character and go to step 7
 ;
 ; If the result ends up being larger than a floating point value can hold, return Overflow
-eval_number     ldb #valtype_int                ; flag result as an integer
-                stb val0_val.type
+eval_number     
+                if 0
+                ldb #valtype_int                ; flag result as an integer
+                stb val0+val.type
                 ldx zero                        ; blank out the value except type
                 stx val0
                 stx val0+2
@@ -101,7 +103,7 @@
                 beq eval_number3                ; brif so
                 cmpa #tok_minus                 ; negative (operator negative)?
                 bne eval_number2                ; brif not
-eval_number3    com val0+val.fpsign             ; invert sign (multiple negatives will flip this multiple times)
+eval_number3    com val0+val.fpssign             ; invert sign (multiple negatives will flip this multiple times)
                 bra eval_number0                ; deal with next input
 eval_number2    cmpa #'+                        ; unary +?
                 beq eval_number0                ; brif so - skip it
@@ -122,12 +124,12 @@
                 rol val0+val.int+2
                 rol val0+val.int+1
                 rol val0+val.int
-                rol val0+val.fpexp              ; overflow into fp exponent
+                rol val0+val.fpsexp              ; overflow into fp exponent
                 lsl val0+val.int+3              ; times 4
                 rol val0+val.int+2
                 rol val0+val.int+1
                 rol val0+val.int
-                rol val0+val.fpexp              ; brif overflowed
+                rol val0+val.fpsexp              ; brif overflowed
                 ldd val0+val.int+2              ; times 5 (add original value)
                 addd ,s++
                 std val0+val.int+2
@@ -135,14 +137,14 @@
                 adcb 1,s
                 adca ,s++
                 std val0+val.int
-                ldb val0+val.fpexp              ; and handle overflow bits
+                ldb val0+val.fpsexp              ; and handle overflow bits
                 adcb #0
-                stb val0+val.fpexp
+                stb val0+val.fpsexp
                 lsl val0+val.int+3              ; times 10
                 rol val0+val.int+2
                 rol val0+val.int+1
                 rol val0+val.int
-                rol val0+val.fpexp
+                rol val0+val.fpsexp
                 ldd val0+val.int+2              ; get low word
                 addb ,s+                        ; add in current digit
                 adca #0
@@ -151,9 +153,9 @@
                 adcb #0
                 adca #0
                 std val0+val.int
-                lda val0+val.fpexp              ; and handle overflow
+                lda val0+val.fpsexp              ; and handle overflow
                 adca #0
-                sta val0+val.fpexp
+                sta val0+val.fpsexp
                 bne eval_number11               ; if we overflowed, go continue parsing as floating point
                 lda val0+val.int                ; get back high byte and check for overflow
                 bpl eval_number4                ; brif we haven't wrapped negative
@@ -167,28 +169,28 @@
                 beq eval_number8                ; brif so
                 cmpa #'e                        ; base 10 exponent in lower case?
                 beq eval_number8                ; brif so
-                ldb val0+val.fpsign             ; did we want a negative value?
+                ldb val0+val.fpssign             ; did we want a negative value?
                 beq eval_number7                ; brif not
                 jsr val_negint32                ; negate the 32 bit integer to correct two's complement
-eval_number7    clr val0+val.fpsign             ; clear sign bits for book keeping
+eval_number7    clr val0+val.fpssign             ; clear sign bits for book keeping
                 rts
 eval_number11   jsr nextchar                    ; each the character already processed
 eval_number8    lda #0x9f                       ; exponent if binary point is to the right of the mantissa                           
                 clr val0extra                   ; clear extra precision bits for val0
                 ldb #valtype_float              ; flag value as floating point
                 stb val0+val.type
-                ldb val0+val.fpexp              ; do we have overflow bits to shift?
+                ldb val0+val.fpsexp              ; do we have overflow bits to shift?
                 beq eval_number10               ; brif not
 eval_number9    inca                            ; bump exponent to account for extra bits
                 lsrb                            ; shift some bits over
-                ror val0+val.fpmant
-                ror val0+val.fpmant+1
-                ror val0+val.fpmant+2
-                ror val0+val.fpmant+3
+                ror val0+val.fpssig
+                ror val0+val.fpssig+1
+                ror val0+val.fpssig+2
+                ror val0+val.fpssig+3
                 ror val0extra
                 tstb                            ; all bits shifted into mantissa?
                 bne eval_number9                ; brif not
-eval_number10   sta val0+val.fpexp              ; save adjusted exponent
+eval_number10   sta val0+val.fpsexp              ; save adjusted exponent
                 ldx #val0                       ; normalize the result for further operations
                 jsr fp_normalize
                 clr ,-s                         ; flag for decimal point seen
@@ -261,6 +263,7 @@
                 jsr val_int32tofloat            ; convert to floating point
                 jsr fp_add                      ; add val1 to val0
                 bra eval_number40               ; go handle another character
+                endc
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Operator table
 ;
diff -r df86e6d64ce2 -r bb50ac9fdf37 src/fps.s
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/fps.s	Sat Oct 07 02:56:59 2023 -0600
@@ -0,0 +1,425 @@
+                *pragmapush list
+                *pragma list
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision floating point arithmetic package
+;
+; Floating point values are stored in 6 byte packets (single precision) organized as follows:
+; Offset        Length          Contents
+; 0             1               8 bit binary exponent with a bias of 128; 0 means the number is 0
+; 1             4               32 bit significand
+; 5             1               sign flag; zero for positive, 0xff for negative
+;
+; Binary operateions take pointers to their arguments in X and U. Y contains the result location. In all cases, it is
+; safe to specify either one of the arguments as the result location. Unary operations take their operand pointer in
+; X and their result pointer in Y. In all cases, there is an in place version of the unary operation that operates on
+; its input and only needs the single pointer in X.
+;
+; On the topic of optimization: these routines copy their operands to accumulators in the direct page. This saves one
+; cycle per instruction which has a nonzero offset. In addition, this means that the input operands can remain
+; unmodified by the actual operations. For instance, addition requires denormalizing one operand which would otherwise
+; have to be done in place.
+;
+; NOTE: the input pointers X and U may be clobbered.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Convert 32 bit unsigned integer at (X) to single precision floating point at (U)
+fps_fromuint32  clr fpa0+fps.sign               ; set result sign to positive
+                bra fps_fromint32a              ; go do conversion
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Convert 32 bit signed integer at (X) to single precision floating point at value accumulator in (Y)
+fps_fromint32   ldb ,x                          ; set sign based on signed integer
+                sex
+                sta fpa0+fps.sign               ; set result sign to the two's complement sign
+                bpl fps_fromint32a              ; brif positive - no need to mess with bits
+                jsr int32_neg                   ; make the bits positive
+fps_fromint32a  ldb valtype_float               ; set output value type to floating point
+                stb val.type,y
+                ldd ,x                          ; copy integer bits to fpa0
+                std fpa0+fps.sig
+                ldd 2,x
+                std fpa0+fps.sig+2
+                ldb #0xa0                       ; put binary point to the right of the significand
+                stb fpa0+fps.exp
+                clr fpa0extra                   ; clear extra precision
+                jmp fps_add10                   ; go normalize the result and return
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Unary negation - negate (X) to (Y)
+fps_neg         ldd 2,x                         ; copy to output and keep exponent in A
+                std 2,y
+                ldd 4,x
+                std 4,y
+                ldd ,x
+                std ,y
+                tsta                            ; is the number zero?
+                beq fps_neg0                    ; brif so - do nothing
+                com fps.sign,y                  ; flip the sign
+fps_neg0        rts
+fps_negx        lda fps.exp,x                   ; is the number zero?
+                beq fps_negx0                   ; brif so - do nothing
+                com fps.sign,x                  ; flip the sign
+fps_negx0       rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copy (X) to fpa0 and (U) to fpa1
+fps_copyinputs  ldd ,x                          ; copy (X) to fpa0
+                std fpa0
+                ldd 2,x
+                std fpa0+2
+                ldd 4,x
+                std fpa0+4
+                ldd ,u                          ; copy (U) to fpa0
+                std fpa1
+                ldd 2,u
+                std fpa1+2
+                ldd 4,u
+                std fpa1+4
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Subtraction (X) - (U) to (Y)
+fps_sub         bsr fps_copyinputs              ; copy input operands
+                com fpa1+fps.sign               ; negate the subtrahend (don't need to handle zero here)
+                bra fps_add0                    ; go handle it as addition
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Addition (X) + (U) to (Y)
+fps_add         bsr fps_copyinputs              ; copy input operands
+fps_add0        lda fpa1+fps.exp                ; is the second operand 0?
+                bne fps_add1                    ; brif not
+                ldd fpa0                        ; copy first operand to output
+                std ,y
+                ldd fpa0+2
+                std 2,y
+                ldd fpa0+4
+                std 4,y
+                rts
+fps_add1        lda fpa0+fps.exp                ; get exponent of first operand
+                bne fps_add2                    ; brif not zero
+                ldd fpa1                        ; copy second operand to output
+                std ,y
+                ldd fpa1+2
+                std 2,y
+                ldd fpa1+4
+                std 4,y
+                rts
+fps_add2        clr fpa0extra                   ; clear extra precision bits
+                lda fpa0+fps.exp                ; get first operand exponent
+                suba fpa1+fps.exp               ; get difference in exponents
+                beq fps_add8                    ; brif we don't need to denormalize - they're the same
+                blo fps_add3                    ; brif we need to denormalize the first operand
+                ldx #fpa1                       ; point to second operand to denormalize
+                bra fps_add4                    ; go denormalize
+fps_add3        ldb fpa1+fps.exp                ; get exponent of second operand (result exponent)
+                stb fpa0+fps.exp                ; set result exponent
+                ldx #fpa0                       ; point to first operand to denormalize
+                nega                            ; get number of bits to shift as positive number
+fps_add4        suba #8                         ; is there 8 bits left?
+                blo fps_add5                    ; brif not
+                ldb fps.sig+3,x                 ; shift significand 8 bits right
+                stb fpa0extra                   ; save extra precision bits
+                ldb fps.sig+2,x
+                stb fps.sig+3,x
+                ldb fps.sig+1,x
+                stb fps.sig+2,x
+                ldb fps.sig,x
+                stb fps.sig+1,x
+                clr fps.sig,x
+                bra fps_add4                    ; see if we have another byte to shift
+fps_add5        adda #8                         ; adjust for extra subtract above
+                bra fps_add7                    ; do the bit shifting
+fps_add6        lsr fps.sig,x                   ; shift one bit right
+                ror fps.sig+1,x
+                ror fps.sig+2,x
+                ror fps.sig+3,x
+                ror fpa0extra
+                deca                            ; done all of the bits?
+fps_add7        bne fps_add6                    ; brif not
+fps_add8        lda fpa0+fps.sign               ; compare the signs of the operands
+                eora fpa1+fps.sign              ; set A if signs differ
+                bne fps_add9                    ; brif they differ - do subtraction
+                ldd fpa0+fps.sig+2              ; add low word of significands
+                addd fpa1+fps.sig+2
+                std fpa0+fps.sig+2
+                ldd fpa0+fps.sig                ; and the high word
+                adcb fpa1+fps.sig+1
+                adca fpa1+fps.sig
+                std fpa0+fps.sig
+                bcc fps_add9                    ; brif no carry
+                ror fpa0+fps.sig                ; shift carry into significand
+                ror fpa0+fps.sig+1
+                ror fpa0+fps.sig+2
+                ror fpa0+fps.sig+3
+                ror fpa0extra                   ; and the extra bits (for rounding)
+                inc fpa0+fps.exp                ; bump exponent to account for bit shift
+                bne fps_add14                   ; go check for round-off if not overflow
+OVERROR         ldb #err_ov                     ; raise overflow
+                jmp ERROR 
+fps_add9        ldd fpa0+fps.sig+2              ; subtract low word
+                subd fpa1+fps.sig+2
+                std fpa0+fps.sig+2
+                ldd fpa0+fps.sig                ; and now the high word
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                std fpa0+fps.sig
+                bcc fps_add10                   ; brif we didn't carry
+                com fpa0+fps.sign               ; flip result sign - other number was bigger
+                com fpa0+fps.sig+3              ; negate two's complement result to be just the magnitude
+                com fpa0+fps.sig+2
+                com fpa0+fps.sig+1
+                com fpa0+fps.sig
+                ldx fpa0+fps.sig+2              ; add 1 to complete negation
+                leax 1,x
+                stx fpa0+fps.sig+2
+                bne fps_add10                   ; brif carry doesn't propagate
+                ldx fpa0+fps.sig                ; propagate carry
+                leax 1,x
+                stx fpa0+fps.sig                ; NOTE: this cannot carry because magnitude got smaller
+fps_add10       clra                            ; initialize exponent offset
+fps_add11       ldb fpa0+fps.sig                ; do we have nonzero bits in high byte of significand?
+                bne fps_add13                   ; brif so
+                ldb fpa0+fps.sig+1              ; shift left 8 bits
+                stb fpa0+fps.sig
+                ldb fpa0+fps.sig+2
+                stb fpa0+fps.sig+1
+                ldb fpa0+fps.sig+3
+                stb fpa0+fps.sig+2
+                ldb fpa0extra                   ; and extra precision bits
+                stb fpa0+fps.sig+3
+                clr fpa0extra
+                addb #8                         ; account for number of bits shifted
+                cmpb #40                        ; done 40 bits?
+                blo fps_add11                   ; brif not - see if we have more bits to shift
+                clr fpa0+fps.exp                ; number underflowed to zero - set it so
+                clr fpa0+fps.sign
+                clr fpa0+fps.sig
+                clr fpa0+fps.sig+1
+                clr fpa0+fps.sig+2
+                clr fpa0+fps.sig+3
+                bra fps_add16                   ; go return result
+fps_add12       inca                            ; account for a bit shift
+                lsl fpa0extra                   ; shift significand and extra bits left
+                rol fpa0+fps.sig+3
+                rol fpa0+fps.sig+2
+                rol fpa0+fps.sig+1
+                rol fpa0+fps.sig
+fps_add13       bpl fps_add12                   ; brif we haven't normalized yet
+fps_add14       ldb fpa0extra                   ; do we need to round?
+                bpl fps_add16                   ; brif not
+                ldx fpa0+fps.sig+2              ; add one to significand
+                leax 1,x
+                stx fpa0+fps.sig+2
+                bne fps_add16                   ; brif no carry
+                ldx fpa0+fps.sig                ; bump the upper word of significand
+                leax 1,x
+                stx fpa0+fps.sig
+                bne fps_add16
+fps_add15       inc fpa0+fps.exp                ; bump exponent
+                beq OVERROR                     ; brif it overflowed
+                lda #0x80                       ; set high bit of significand (rest is zero)
+                sta fpa0+fps.sig
+fps_add16       ldd fpa0                        ; copy result to destination
+                std ,y
+                ldd fpa0+2
+                std 2,y
+                ldd fpa0+4
+                std 4,y
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision multiplication (X) × (U) to (Y)
+fps_mul         lda fps.exp,x                   ; is first operand zero?
+                beq fps_mul0                    ; brif so - return zero
+                lda fps.exp,u                   ; is second operand zero?
+                bne fps_mul1                    ; brif not - have to do the multiply
+fps_mul0        ldd zero                        ; return zero result
+                std ,y
+                std 2,y
+                std 4,y
+                rts
+fps_mul1        jsr fps_copyinputs              ; copy input operands
+                lda fpa0+fps.sign               ; calculate sign of result - xor of the two signs
+                eora fpa1+fps.sign
+                sta fpa0+fps.sign               ; save result sign
+                lda fpa0+fps.exp                ; get exponent of first value
+                adda fpa1+fps.exp               ; calculate new exponent; also cancels the bias
+                rora                            ; set V if C and N differ
+                rola
+                bvc fps_mul3                    ; brif maybe an overflow
+                adda #0x80                      ; add back the bias
+                sta fpa0+fps.sign               ; save new sign
+                beq fps_mul0                    ; brif we underflowed - zero out sign
+; This does a shift-and-add multiplication algorithm. This is slower than an equivalent using MUL but is smaller.
+; The high order bytes will be left in fpa0 with the low order bytes in fpa0extra and fpa0extra[1-3]. The low order
+; bytes are kept for the odd case where extra precision is useful. Uses fpa0extra[4-7] as temporaries.
+fps_mul2        ldd zero                        ; zero out temporary bytes
+                std fpa0extra4
+                std fpa0extra6
+                ldb fpa0+fps.sig+3              ; multiply by low byte of fpa0
+                bsr fps_mul4
+                ldb fpa0extra                   ; move low bites
+                stb fpa0extra3
+                ldb fpa0+fps.sig+2              ; multiply by next higher byte
+                bsr fps_mul4
+                ldb fpa0extra                   ; move low bits
+                stb fpa0extra2
+                ldb fpa0+fps.sig+1              ; and again for the next higher byte
+                bsr fps_mul4
+                ldb fpa0extra
+                stb fpa0extra1
+                ldb fpa0+fps.sig                ; and the high order byte
+                bsr fps_mul4
+                ldd fpa0extra4                  ; copy high order product bits to result
+                std fpa0+fps.sig
+                ldd fpa0extra6
+                std fpa0+fps.sig+2
+                jmp fps_add10                   ; go normalize the result and return
+fps_mul3        bpl fps_mul0                    ; brif we underflowed - return 0
+                jmp OVERROR                     ; raise overflow
+fps_mul4        bne fps_mul5                    ; brif not multiply by zero
+                lda fpa0+fps.sig+3              ; shift 8 bits right
+                sta fpa0extra
+                lda fpa0+fps.sig+2
+                sta fpa0+fps.sig+3
+                lda fpa0+fps.sig+1
+                sta fpa0+fps.sig+2
+                lda fpa0+fps.sig
+                sta fpa0+fps.sig+1
+                clr fpa0+fps.sig
+fps_mul8        rts
+fps_mul5        coma                            ; set C
+fps_mul6        lda fpa0extra4                  ; get high byte of result bytes
+                rorb                            ; is multiplier bit set?
+                beq fps_mul8                    ; brif 8 shifts done (C set above makes sure of that)
+                bcc fps_mul7                    ; brif bit not set - don't do addition
+                lda fpa0extra7                  ; add multiplier (fpa1) to result
+                adda fpa1+fps.sig+3
+                sta fpa0extra7
+                lda fpa0extra6
+                adca fpa1+fps.sig+2
+                sta fpa0extra6
+                lda fpa0extra5
+                adca fpa1+fps.sig+1
+                sta fpa0extra5
+                lda fpa0extra4
+                adca fpa1+fps.sig
+fps_mul7        rora                            ; rotate carry in (from add or 0)
+                sta fpa0extra4
+                ror fpa0extra5
+                ror fpa0extra6
+                ror fpa0extra7
+                ror fpa0extra                   ; and into the extra precision bits
+                clra                            ; clear carry - so shift above will terminate
+                bra fps_mul6                    ; go do another bit
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision division (X) ÷ (U) -> (Y)
+;
+; This is basically the same algorithm used in the Color Basic ROM
+fps_div         lda fps.exp,u                   ; is divisor 0?
+                bne fps_div0
+DIV0ERROR       ldb #err_div0                   ; raise division by zero
+                jmp ERROR
+fps_div0        lda fps.exp,x                   ; is dividend 0?
+                lbeq fps_mul0                   ; brif so - return 0
+                jsr fps_copyinputs              ; copy input values
+                lda fpa0+fps.sign               ; calculate result sign - xor of operand signs
+                eora fpa1+fps.sign
+                sta fpa0+fps.sign               ; save result sign
+                lda fpa1+fps.exp                ; get divisor exponent
+                nega                            ; negate for subtraction
+                adda fpa0+fps.exp               ; subtract it from dividend exponent
+                rora                            ; set V if C and N differ
+                rola
+                bvc fps_mul3                    ; brif overflow or underflow
+                adda #0x80                      ; add back the bias
+                sta fpa0+fps.sign               ; save new sign
+                lbeq fps_mul0                   ; brif we underflowed - zero out sign
+                inca                            ; bump exponent - why?? related to the bias stuff above?
+                lbeq OVERROR                    ; brif it overflows
+                sta fpa0+fps.exp                ; save result exponent
+                ldx #fpa0extra4                 ; point to temporary storage bytes for quotient
+                ldb #4                          ; counter for 4 significand bytes and one extra partial byte
+                stb fpa0extra1                  ; save counter since we need both accumulators
+                ldb #1                          ; shift counter flag and quotient byte
+fps_div1        lda fpa1+fps.sig                ; set C if fpa0 significand <= fpa1 significand
+                cmpa fpa0+fps.sig
+                bne fps_div2
+                lda fpa1+fps.sig+1
+                cmpa fpa0+fps.sig+1
+                bne fps_div2
+                lda fpa1+fps.sig+2
+                cmpa fpa0+fps.sig+2
+                bne fps_div2
+                lda fpa1+fps.sig+3
+                cmpa fpa0+fps.sig+3
+                bne fps_div2
+                coma                            ; set C if values are the same
+fps_div2        tfr cc,a                        ; save C for later, C clear if fpa1 > fpa0
+                rolb                            ; shift carry into quotient
+                bcc fps_div3                    ; brif carry clear - we haven't done 8 bits yet
+                stb ,x+                         ; save quotient byte after a full set of bits
+                dec fpa0extra1                  ; have we done all the bytes?
+                bmi fps_div7                    ; brif all bytes plus extra precision - done all
+                beq fps_div6                    ; brif all main sigificand bytes - do a couple extra bits
+                ldb #1                          ; reset the bit counter flag
+fps_div3        tfr cc,a                        ; get back original carry from compare
+                bcs fps_div5                    ; brif it "went"
+fps_div4        lsl fpa0+fps.sig+3              ; shift dividend left
+                rol fpa0+fps.sig+2
+                rol fpa0+fps.sig+1
+                rol fpa0+fps.sig
+                bcs fps_div2                    ; brif it carries - next bit "goes"
+                bmi fps_div1                    ; check magnitudes of next bit
+                bra fps_div2                    ; carry clear - check another bit
+fps_div5        lda fpa0+fps.sig+3              ; subtract divisor from dividend bits
+                suba fpa1+fps.sig+3
+                sta fpa0+fps.sig+3
+                lda fpa0+fps.sig+2
+                sbca fpa1+fps.sig+2
+                sta fpa0+fps.sig+2
+                lda fpa0+fps.sig+1
+                sbca fpa1+fps.sig+1
+                sta fpa0+fps.sig+1
+                lda fpa0+fps.sig
+                sbca fpa1+fps.sig
+                sta fpa0+fps.sig
+                bra fps_div4                    ; now do the bit shift to line things up
+fps_div6        ldb #0x40                       ; only do two bits of extra precision byte
+                bra fps_div2                    ; go handle these bitsd
+fps_div7        rorb                            ; get extra quotient bits to bit 7,6 and bit 5 set
+                rorb
+                rorb
+                stb fpa0extra                   ; save extra precision bits
+                ldd fpa0extra4                  ; copy result bits to fpa0
+                std fpa0+fps.sig
+                ldd fpa0extra6
+                std fpa0+fps.sig+2
+                jmp fps_add10                   ; go normalize the result
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Pack single precision number at (X) to (U)
+fps_pack        lda fps.sign,x                  ; get sign of number (will be 0 for 0)
+                ora #0x7f                       ; make sure low bits are set for merging
+                anda fps.sig,x                  ; merge with high bits of significand
+                ldb fps.sig+1,x                 ; get upper mid bits of significand
+                std fps.sig,u
+                ldd fps.sig+2,x
+                std fps.sig+2,u
+                lda fps.exp,x
+                sta fps.exp,u
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Unpack single precision number at (X) to (U)
+fps_unpack      lda fps.exp,x                   ; get exponent of value
+                beq fps_unpack0                 ; brif value is zero
+                sta fps.exp,u
+                ldb fps.sig,x                   ; get high byte of significand
+                sex                             ; make sign value in A
+                sta fps.sign,u                  ; set sign in result
+                ldd fps.sig,x                   ; get high word of sifnificand
+                ora #0x80                       ; make sure high bit is set
+                std fps.sig,u                   ; save high word in result
+                ldd fps.sig+2,x                 ; copy middle bytes of significand
+                std fps.sig+2,u
+                rts
+fps_unpack0     sta fps.exp,u                   ; zero out destination
+                sta fps.sig,u
+                sta fps.sig+1,u
+                sta fps.sig+2,u
+                sta fps.sig+3,u
+                sta fps.sign,u
+                rts
+                *pragmapop list
diff -r df86e6d64ce2 -r bb50ac9fdf37 src/int.s
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/int.s	Sat Oct 07 02:56:59 2023 -0600
@@ -0,0 +1,323 @@
+                *pragmapush list
+                *pragma list
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; 32 bit integer handling package.
+;
+; Negate a 32 bit integer in (X); done by subtracting it from zero
+int32_neg       ldd zero                        ; subtract low word
+                subd val.int+2,x
+                std val.int+2,x
+                ldd zero                        ; and now the high word
+                sbcb val.int+1,x
+                sbca val.int,x
+                std val.int,x
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; 32 bit integer addition (X) + (U) -> (Y)
+int32_add       ldd val.int+2,x                 ; do low word
+                addd val.int+2,u
+                std val.int+2,y
+                ldd val.int,x                   ; and the high word
+                adcb val.int+1,u
+                adca val.int,u
+                std val.int,y
+                bvc int32_add0                  ; raise overflow if needed
+OVERROR2        jmp OVERROR
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; 32 bit integer subtraction (X) - (U) -> (Y)
+int32_sub       ldd val.int+2,x                 ; do low word
+                subd val.int+2,u
+                std val.int+2,y
+                ldd val.int,x                   ; and the high word
+                sbcb val.int+1,u
+                sbca val.int,u
+                std val.int,y
+                bvs OVERROR2                    ; raise overflow if needed
+int32_add0      rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Signed 32 bit integer multiply (X) * (U) -> (Y), overflow if exceeds signed 32 bit range                
+int32_mul       ldd val.int+2,x                 ; copy left operand to temporary
+                std fpa0+fps.sig+2
+                ldd val.int,x
+                std fpa0+fps.sig
+                eora val.int,u                  ; set sign bit in A if signs differ
+                pshs a                          ; save result sign
+                ldd val.int+2,u                 ; copy right operand to temporary
+                std fpa1+fps.sig+2
+                ldd val.int,u
+                std fpa1+fps.sig
+                bpl int32_mul0                  ; brif right operand is positive
+                ldd zero                        ; negate right operand
+                subd fpa1+fps.sig+2
+                std fpa1+fps.sig+2
+                ldd zero
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                std fpa1+fps.sig
+int32_mul0      lda fpa0+fps.sig                ; is left operand negative?
+                bpl int32_mul1                  ; brif not
+                ldd zero                        ; negate left operand
+                subd fpa0+fps.sig+2
+                std fpa0+fps.sig+2
+                ldd zero
+                sbcb fpa0+fps.sig+1
+                sbca fpa0+fps.sig
+                std fpa0+fps.sig
+int32_mul1      bsr util_mul32                  ; do the actual multiplication
+                ldb fpa0extra                   ; are upper bits all zero?
+                orb fpa0extra1
+                orb fpa0extra2
+                orb fpa0extra3
+                bne OVERROR2                    ; brif not - overflowed
+                ldb fpa0extra4                  ; is bit 31 set?
+                bpl int32_mul2                  ; brif not - no overflow
+                lda ,s                          ; negative result wanted?
+                bpl OVERROR2                    ; brif not - we overflowed
+                andb #0x7f                      ; lose extra sign bit
+                orb fpa0extra5                  ; "or" in other bytes to see if all but bit 31 are zero
+                orb fpa0extra6
+                orb fpa0extra7
+                bne OVERROR2                    ; brif any nonzero bits - we overflowed maximum negative number
+                ldb ,s+                         ; do we want a negative result?
+                bpl int32_mul2                  ; brif not
+                ldd zero                        ; negate result
+                subd fpa0extra6
+                std fpa0extra6
+                ldd zero
+                sbcb fpa0extra5
+                sbca fpa0extra4
+                std fpa0extra4
+int32_mul2      ldd fpa0extra4                  ; copy result to destination
+                std val.int,y
+                ldd fpa0extra6
+                std val.int+2,y
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; 32 bit multiply.
+;
+; Significands of fpa0 and fpa1, treated as unsigned, are multiplied with the product being stored in the fpa0extra
+; memory locations.
+;
+; The agorithm is simply this: zero out the result, then multiply fpa0 by each byte of fpa1 and then add the result
+; to the result location. This yields a 64 bit product which is somewhat wasteful.
+util_mul32      ldd zero                        ;* zero out result bits; low 16 bits don't need to be cleared and
+                stb fpa0extra3                  ;* upper 24 bits also don't
+                std fpa0extra4
+                ldb fpa1+fps.sig+3              ; multiply by low byte of fpa1 - no carries possible for this iteration
+                lda fpa0+fps.sig+3
+                mul
+                std fpa0extra6
+                ldb fpa1+fps.sig+3
+                lda fpa0+fps.sig+2
+                mul
+                addd fpa0extra5
+                std fpa0extra5
+                ldb fpa1+fps.sig+3
+                lda fpa0+fps.sig+1
+                mul
+                addd fpa0extra4
+                std fpa0extra4
+                ldb fpa1+fps.sig+3
+                lda fpa0+fps.sig
+                mul
+                addd fpa0extra3
+                std fpa0extra3
+; Now we potentially have cascading carries at every stage; it makes more sense to handle those in a separate
+; addition pass after each partial calculation. The partial calculations are identical to above. This is completely
+; unrolled for speed.
+                ldd zero                        ; zero out extra work bytes
+                std fpa0extra8
+                stb fpa0extra10
+                ldb fpa1+fps.sig+2              ; multiply by second low byte of fpa1
+                lda fpa0+fps.sig+3
+                mul
+                std fpa0extra11
+                ldb fpa1+fps.sig+2
+                lda fpa0+fps.sig+2
+                mul
+                addd fpa0extra10
+                std fpa0extra10
+                ldb fpa1+fps.sig+2
+                lda fpa0+fps.sig+1
+                mul
+                addd fpa0extra9
+                std fpa0extra9
+                ldb fpa1+fps.sig+2
+                lda fpa0+fps.sig
+                mul
+                addd fpa0extra8
+                std fpa0extra8
+                ldd fpa0extra11                 ; add to partial product (shifted left 8 bits)
+                addd fpa0extra5
+                std fpa0extra5
+                ldd fpa0extra9
+                adcb fpa0extra4
+                adca fpa0extra3
+                std fpa0extra3
+                ldb #0
+                adcb fpa0extra8
+                stb fpa0extra2
+                ldd zero                        ; and do it all again for next byte of fpa1
+                std fpa0extra8
+                stb fpa0extra10
+                ldb fpa1+fps.sig+1
+                lda fpa0+fps.sig+3
+                mul
+                std fpa0extra11
+                ldb fpa1+fps.sig+1
+                lda fpa0+fps.sig+2
+                mul
+                addd fpa0extra10
+                std fpa0extra10
+                ldb fpa1+fps.sig+1
+                lda fpa0+fps.sig+1
+                mul
+                addd fpa0extra9
+                std fpa0extra9
+                ldb fpa1+fps.sig+1
+                lda fpa0+fps.sig
+                mul
+                addd fpa0extra8
+                std fpa0extra8
+                ldd fpa0extra11
+                addd fpa0extra4
+                std fpa0extra4
+                ldd fpa0extra9
+                adcb fpa0extra3
+                adca fpa0extra2
+                std fpa0extra2
+                ldb #0
+                adcb fpa0extra8
+                stb fpa0extra1
+                ldd zero                        ; and the final sequence with the fpa1 high byte
+                std fpa0extra8
+                stb fpa0extra10
+                ldb fpa1+fps.sig
+                lda fpa0+fps.sig+3
+                mul
+                std fpa0extra11
+                ldb fpa1+fps.sig
+                lda fpa0+fps.sig+2
+                mul
+                addd fpa0extra10
+                std fpa0extra10
+                ldb fpa1+fps.sig
+                lda fpa0+fps.sig+1
+                mul
+                addd fpa0extra9
+                std fpa0extra9
+                ldb fpa1+fps.sig
+                lda fpa0+fps.sig
+                mul
+                addd fpa0extra8
+                std fpa0extra8
+                ldd fpa0extra11
+                addd fpa0extra3
+                std fpa0extra3
+                ldd fpa0extra9
+                adcb fpa0extra2
+                adca fpa0extra1
+                std fpa0extra1
+                ldb #0
+                adcb fpa0extra
+                stb fpa0extra
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; 32 bit division, integer only, truncate fraction without rounding. Note that there is exactly one case where integer
+; division can overflow: dividing -0x80000000 by -1 which yields 0x80000000. All other cases reduce the magnitude.
+int32_div       ldd val.int+2,x                 ; copy left operand to temporary
+                std fpa0+fps.sig+2
+                ldd val.int,x
+                std fpa0+fps.sig
+                eora val.int,u                  ; set sign bit in A if signs differ
+                pshs a                          ; save result sign
+                ldd val.int+2,u                 ; copy right operand to temporary
+                std fpa1+fps.sig+2
+                ldd val.int,u
+                std fpa1+fps.sig
+                bpl int32_div0                  ; brif right operand is positive
+                ldd zero                        ; negate right operand
+                subd fpa1+fps.sig+2
+                std fpa1+fps.sig+2
+                ldd zero
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                std fpa1+fps.sig
+int32_div0      lda fpa0+fps.sig                ; is left operand negative?
+                bpl int32_div1                  ; brif not
+                ldd zero                        ; negate left operand
+                subd fpa0+fps.sig+2
+                std fpa0+fps.sig+2
+                ldd zero
+                sbcb fpa0+fps.sig+1
+                sbca fpa0+fps.sig
+                std fpa0+fps.sig
+int32_div1      ldb fpa1+fps.sig                ; check for division by zero
+                orb fpa1+fps.sig+1
+                orb fpa1+fps.sig+2
+                orb fpa1+fps.sig+3
+                lbne DIV0ERROR                  ; brif division by zero
+                bsr util_div32                  ; do the actual division
+                lda ,s+                         ; get desired sign
+                bmi int32_div2                  ; brif want negative - we can't overflow in that case
+                ldb fpa0extra                   ; get high byte of result
+                lbmi OVERROR2                   ; brif we ended up with 0x80000000 positive
+                bra int32_div3                  ; go return result
+int32_div2      ldd zero                        ; negate result to correct sign
+                subd fpa0extra+2
+                std fpa0extra+2
+                ldd zero
+                sbcb fpa0extra+1
+                sbca fpa0extra
+                std fpa0extra
+int32_div3      ldd fpa0extra                   ; copy result to destination
+                std val.int,y
+                ldd fpa0extra2
+                std val.int+2,y
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Divide 32 bit integer in fpa0 significand by 32 bit integer in fpa1 significand, both treated as unsigned. Leave
+; quotient at fpa0extra...fpa0extra3 and remainder at fpa0extra4...fpa0extra7; does not check for division by zero
+; which will result in a quotient of 0xffffffff and a remainder will be the dividend. It will not get suck in a loop.
+;
+; Algorithm is basically pencil and paper long division. We check to see if the divisor "goes" at each step by doing
+; a trial subtraction without saving the result. If it doesn't go, we just loop around again. If it does go, we stash
+; a 1 bit in the quotient and actually do the subtraction. Then go loop around again. Doing it this way rather than
+; with an actual subtraction and then undoing it with addition saves two store instructions on the comparison saves
+; having to do a restore in the no-go case which is going to be quite common with values whose upper bits are
+; mostly zeroes, thus it makes the operations faster in that case, for integers. (Floating point is a different
+; problem.) 
+util_div32      ldd fpa0+fps.sig+2              ; copy dividend to result location
+                std fpa0extra6
+                ldd fpa0+fps.sig
+                std fpa0extra4
+                ldb #32                         ; do 32 bits
+                stb fpa0+fps.exp                ; save counter somewhere because we don't have enough registers
+                ldd zero                        ; zero out remainder
+                std fpa0extra4
+                std fpa0extra6
+util_div32a     lsl fpa0extra3                  ; shift dividend residue into remainder
+                rol fpa0extra2
+                rol fpa0extra1
+                rol fpa0extra
+                rol fpa0extra7
+                rol fpa0extra6
+                rol fpa0extra5
+                rol fpa0extra4
+                ldd fpa0extra6                  ; now subtract divisor from remainder
+                subd fpa1+fps.sig+2
+                ldd fpa0extra4
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                bcs util_div32b                 ; brif it doesn't go - need to restore
+                inc fpa0extra3                  ; set quotient bit
+                ldd fpa0extra6                  ; actuall do the subtraction
+                subd fpa1+fps.sig+2
+                std fpa0extra6
+                ldd fpa0extra4
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                std fpa0extra4
+util_div32b     dec fpa0+fps.exp                ; done all 32 bits?
+                bne util_div32a                 ; do another
+                *pragmapop list
diff -r df86e6d64ce2 -r bb50ac9fdf37 src/lwbasic.s
--- a/src/lwbasic.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/lwbasic.s	Sat Oct 07 02:56:59 2023 -0600
@@ -48,6 +48,8 @@
                 include error.s
                 include expr.s
                 include number.s
+                include int.s
+                include fps.s
                 include token.s
                 include miscdata.s
                 include keywords.s
diff -r df86e6d64ce2 -r bb50ac9fdf37 src/number.s
--- a/src/number.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/number.s	Sat Oct 07 02:56:59 2023 -0600
@@ -3,7 +3,8 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Arithmetic package
 ;
-; This section contains routines that handle floating point and integer arithmetic.
+; This section contains routines that handle floating point and integer arithmetic. It mostly delegates to int.s and
+; fps.s.
 ;
 ; Most routines take a pointer to a value accumulator in X. Some take two pointers with the second in U.
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,7 +12,7 @@
 ;
 ; * If both operands are the same, ensure the type is numeric and return
 ; * If one operand is floating point, convert the other to floating point, as long as it is numeric
-; * If one or both oeprands are not numeric, raise a type mismatch
+; * If one or both operands are not numeric, raise a type mismatch
 ; The operands are in (X) and (U)
 val_matchtypes  ldb val.type,x                  ; get the type of first argument
                 cmpb #valtype_int               ; is it integer?
@@ -23,244 +24,80 @@
 val_matchtypes0 ldb val.type,u                  ; get type of second operand
                 cmpb #valtype_int               ; is it integer?
                 bne val_matchtypes2             ; brif not
-val_matchtypes3 rts
+val_matchtypes3 rts                             ; both types int - we're good so return
 val_matchtypes2 cmpb #valtype_float             ; is it floating point?
                 bne TMERROR                     ; brif not - raise error
-                pshs u                          ; save pointer to second operand
-                bsr val_int32tofp               ; convert first argument to floating point
-                puls u,pc                       ; restore second operand pointer and return
+                pshs x                          ; save X which may be clobbered
+                leay ,x                         ; point to input operand as destination for conversion
+                jsr fps_fromint32               ; convert first argument to floating point
+                puls x,pc                       ; restore second operand pointer and return
 val_matchtypes1 ldb val.type,u                  ; get second argument type
                 cmpb #valtype_float             ; is it floating point?
                 beq val_matchtypes3             ; brif so - we're good
                 cmpb #valtype_int               ; is it integer?
                 bne TMERROR                     ; brif not - invalid type combination
-                pshs x,u                        ; save value pointers
+                pshs x                          ; save X which mill be clobbered
                 leax ,u                         ; convert (U) to floating point
-                bsr val_int32tofp
-                puls x,u,pc                     ; restore argument pointers and return
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Negate the 32 bit integer (for fp mantissa) at (X)
-val_negint32    ldd zero                        ; subtract integer value from zero
-                subd val.int+2,x
-                std val.int+2,x
-                ldd zero
-                sbcb val.int+1,x
-                sbca val.int,x
-                std val.int,x
-                rts
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Convert integer value at (X) to floating point value at (X). Enter at val_uint32tofp to treat the 32 bit value as
-; unsigned. Otherwise enter at val_int32tofp to treat it as signed.
-val_uint32tofp  clr val.fpsign,x                ; for positive sign
-                bra val_int32tofpp              ; go process as positive
-val_int32tofp   ldb val.int,x                   ; get sign to A
-                sex
-                sta val.fpsign,x                ; set sign of result
-                bpl val_int32tofpp              ; brif positive - don't need to do a two's complement adjustment
-                bsr val_negint32                ; negate the integer value
-val_int32tofpp  ldb valtype_float               ; set result to floating point
-                stb val.type,x
-                ldb #0xa0                       ; exponent to have binary point to the right of the mantissa
-                stb val.fpexp,x                 ; set the exponent
-                clrb                            ; clear out extra precision bits
-                ; fall through to normalize the value at (X)
+                leay ,u
+                jsr fps_fromint32
+                puls x,pc                       ; restore argument pointer and return
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Normalize floating point value at (X); this will shift the mantissa until there is a one in the leftmost
-; bit of the mantissa. The algorithm is as follows:
-;
-; 1. Shift the mantissa left until a 1 bit is found in the high bit of the mantissa.
-; 1a. If more than 40 bits of left shifts occur, determine that the value is zero and return
-; 2. Adjust exponent based on number of shifts
-; 2a. If new exponent went below -127, then underflow occurred and zero out value
-; 2b. If new exponent went above +127, raise an overflow
-; 3. If bit 7 of the extra precision byte is clear, return the resulting value
-; 4. Add one to the mantissa
-; 5. If a carry in (4) occurred, then set high bit of mantissa and bump exponent
-; 6. If new exponent carries, then raise overflow
-; 7. Return result.
-;
-; Note that if we carried in (4), the only possible result is that the mantissa
-; rolled over to all zeroes so there is no need to shift the entire mantissa right
-; nor is there any reason to check for additional rounding.
-;
-; The above algorithm has some optimizations in the code sequence below.
-fp_normalize    pshs b                          ; save extra bits
-                clrb                            ; set shift counter/exponent adjustment
-fp_normalize0   lda val.fpmant,x                ; set flags on high word of mantissa
-                bne fp_normalize2               ; brif we don't have a full byte to shift
-                addb #8                         ; account for a while byte of shifts
-                ldu val.fpmant+1,x              ; shift mantissa left 8 bits
-                stu val.fpmant,x
-                lda val.fpmant+3,x
-                sta val.fpmant+2,x
-                lda ,s                          ; and include extra bits
-                sta val.fpmant+3,x
-                clr ,s                          ; and blank extra bits
-                cmpb #40                        ; have we shifted 40 bits?
-                blo fp_normalize0               ; brif not - keep shifting
-                bra fp_normalize7               ; go zero out the value
-fp_normalize1   incb                            ; account for one bit of shifting
-                lsl ,s                          ; shift mantissa and extra bits left (will not be more than 7 shifts)
-                rol val.fpmant+3,x
-                rol val.fpmant+2,x
-                rol val.fpmant+1,x
-                rol val.fpmant,x
-fp_normalize2   bpl fp_normalize1               ; brif we have to do a bit shift
-                pshs b                          ; apply exponent counter to exponent
-                lda val.fpexp,x
-                suba ,s+
-                bls fp_normalize6               ; brif we underflowed to zero; overflow is impossible
-fp_normalize3   lsl ,s+                         ; set C if the high bit of extra precision is set
-                bcs fp_normalize5               ; brif bit set - we have to do rounding
-fp_normalize4   rts                             ; return if no rounding
-fp_normalize5   ldu val.fpmant+2,x              ; add one to mantissa
-                leau 1,u
-                stu val.fpmant+2,x
-                bne fp_normalize4               ; brif low word doesn't carry
-                ldu val.fpmant,x
-                leau 1,u
-                stu val.fpmant,x
-                bne fp_normalize4               ; brif high word doesn't carry
-                ror val.fpmant,x                ; shift right C in to high bit of mantissa (already set to get here)
-                inc val.fpexp,x                 ; bump exponent for a right shift
-                bne fp_normalize4               ; brif it doesn't overflow (> +127)
-OVERROR2        jmp OVERROR                     ; raise overflow
-fp_normalize6   clr val.fpmant,x                ; clear mantissa
-                clr val.fpmant+1,x
-                clr val.fpmant+2,x
-                clr val.fpmant+3,x
-fp_normalize7   clr val.fpexp,x                 ; clear exponent and sign
-                clr val.fpsign,x
-                puls b,pc                       ; clean up stack and return
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Addition and subtraction of values; must enter with values of matching types
+; Addition and subtraction of values; must enter with values of matching types and the result type already set
+; to the correct type.
 ;
 ; Calculates (X) + (U) -> (Y) (addition)
 ; Calculates (X) - (U) -> (Y) (subtraction)
 val_add         ldb val.type,x                  ; get type of left operand
-                stb val.type,y                  ; set result type
-                cmpb #valtype_float             ; is it float?
-                beq fp_add                      ; brif so
-                ldd val.int+2,x                 ; do the addition
-                addd val.int+2,u
-                std val.int+2,y
-                ldd val.int,x
-                adcb val.int+1,u
-                adca val.int,u
-                std val.int,y
-                bvs OVERROR2                    ; brif calculation overflowed
-                rts
+                cmpb valtype_int                ; is it integer?
+                lbeq int32_add                  ; brif so - do integer addition
+                cmpb #valtype_float             ; floating point?
+                lbeq fps_add                    ; brif so - do floating point addition
+                jmp TMERROR                     ; we have a type we don't understand
 val_sub         ldb val.type,x                  ; get type of left operand
-                stb val.type,y                  ; set result type
+                cmpb valtype_int                ; is it integer?
+                lbeq int32_sub                  ; brif so - do integer addition
                 cmpb #valtype_float             ; floating point?
-                beq fp_sub                      ; brif so
-                ldd val.int+2,x                 ; do the subtraction
-                subd val.int+2,u
-                std val.int+2,y
-                ldd val.int,x
-                sbcb val.int+1,u
-                sbca val.int,u
-                std val.int,y
-                bvs OVERROR2                    ; brif overflow
-                rts
+                lbeq fps_sub                    ; brif so - do floating point addition
+                jmp TMERROR                     ; we have a type we don't understand
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Multiplication
+;
+; Calculates (X) × (U) -> (Y)
+;
+; The result might overflow the integer type. In this case, an actual overflow error will occur.
+val_mul         ldb val.type,x                  ; get type of left operand
+                cmpb #valtype_int               ; integer?
+                lbeq int32_mul                  ; brif so - do integer multiplication
+                cmpb #valtype_float             ; is it float?
+                lbeq fps_mul                    ; brif so - do floating point multiplication
+                jmp TMERROR                     ; have an unhandled type - bail on it
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; FP subtraction: just invert the sign of the second operand and add; operands must be writable and they should be
-; considered to be clobbered
-fp_sub          com val.fpsign,u                ; negate right operand
-                ; fall through to addition
-; FP addition: this requires that *both operands* are writable and they may be clobbered
-fp_add          ldb val.fpexp,u                 ; is the second operand zero?
-                beq fp_add0                     ; brif so - it's a no-op - copy the left operand to the output
-                lda val.fpexp,x                 ; is left operand zero?
-                bne fp_add1                     ; brif not - we have to do the add
-                leau ,x                         ; copy the right operand to the output
-fp_add0         ldd ,u                          ; copy the value across
-                std ,y
-                ldd 2,u
-                std 2,y
-                ldd 4,u
-                std 4,y
-                rts
-fp_add1         subb val.fpexp,x                ; get difference in exponents
-                beq fp_add6                     ; brif they're the same - no denormalizing is needed
-                bhi fp_add2                     ; brif second one is bigger, need to right-shift the mantissa of first
-                exg x,u                         ; swap the operands (we can do that for addition)l second is now biggest
-                negb                            ; invert the shift count
-fp_add2         cmpb #32                        ; are we shifting more than 32 bits?
-                blo fp_add0                     ; brif so - we're effectively adding zero so bail out
-fp_add3         cmpb #8                         ; have 8 bits to move?
-                blo fp_add5                     ; brif not
-                lda val.fpmant+2,x              ; shift 8 bits right
-                sta val.fpmant+3,x
-                lda val.fpmant+1,x
-                sta val.fpmant+2,x
-                lda val.fpmant,x
-                sta val.fpmant+1,x
-                clr val.fpmant,x
-                subb #8                         ; account for 8 shifts
-                bra fp_add3                     ; see if we have a whole byte to shift
-fp_add4         lsr val.fpmant,x                ; shift right one bit
-                ror val.fpmant+1,x
-                ror val.fpmant+2,x
-                ror val.fpmant+3,x
-fp_add5         decb                            ; done all shifts?
-                bmi fp_add4                     ; brif not - do a shift
-fp_add6         ldb val.fpexp,u                 ; set exponent of result
-                stb val.fpexp,y
-                ldb val.fpsign,u                ; fetch sign of larger value
-                stb val.fpsign,y                ; set result sign
-                cmpb val.fpsign,x
-                bne fp_add8                     ; brif not - need to subtract the operands
-                ldd val.fpmant+2,u              ; add the mantissas
-                addd val.fpmant+2,x
-                std val.fpmant+2,y
-                ldd val.fpmant,u
-                adcb val.fpmant+1,x
-                adca val.fpmant,x
-                std val.fpmant,y
-                ldb #0                          ; clear extra precision bits (preserve carry)
-                bcc fp_add7                     ; brif no carry
-                ror val.fpmant,y                ; shift carry into mantissa
-                ror val.fpmant+1,y
-                ror val.fpmant+2,y
-                ror val.fpmant+3,y
-                rorb                            ; keep bits for founding
-                inc val.fpexp,y                 ; bump exponent to account for shift
-                beq OVERROR2                    ; brif it overflowed
-fp_add7         leax ,y                         ; point to result
-                jmp fp_normalize                ; go normalize the result
-fp_add8         ldd val.fpmant+2,u              ; subtract operands
-                subd val.fpmant+2,x
-                std val.fpmant+2,y
-                ldd val.fpmant,u
-                sbcb val.fpmant+1,x
-                sbca val.fpmant,x
-                std val.fpmant,y
-                bcc fp_add7                     ; brif we didn't carry - no need to fix up
-                ldd zero                        ; negate the mantissa bits since we use sign+magnitude
-                subd val.fpmant+2,y
-                std val.fpmant+2,y
-                ldd zero
-                sbcb val.fpmant+1,y
-                sbca val.fpmant,y
-                std val.fpmant,y
-                neg val.fpsign,y                ; invert sign of result since we went past zero
-                clrb                            ; clear extra precision bits
-                bra fp_add7                     ; go normalize the result and return
+; Division
+;
+; Calculates (X) ÷ (U) -> (Y)
+;
+; The integer operation simply truncates the result ("rounds toward zero")
+val_div         ldb val.type,x                  ; get type of left operand
+                cmpb #valtype_int               ; integer?
+                lbeq int32_div                  ; brif so - do integerdivision
+                cmpb #valtype_float             ; floating point?
+                lbeq fps_div                    ; brif so - do floating point division
+                jmp TMERROR                     ; unsupported type
+                if 0
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Pack a floating point value at (X)
-fp_packval      ldb val.fpsign,x                ; get sign
-                bmi fp_packval                  ; brif negative - the default 1 bit will do
-                ldb val.fpmant,x                ; clear high bit of mantissa for positive
-                andb #0x7f
-                stb val.fpmant,x
-fp_packval0     rts
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Unpack a floating point value at (X)
-fp_unpackval0   ldb val.fpmant,x                ; get high byte of mantissa
-                sex                             ; now A is value for sign byte
-                sta val.fpsign,x                ; set sign
-                orb #0x80                       ; set high bit of mantissa
-                stb val.fpmant,x
-                rts
+; Modulus - note that this is a division operator returning effectively the remainder, not an absolute value as is
+; sometimes meant by "modulus".
+;
+; Calculates (X) <MOD> (U) -> (Y)
+;
+; Note: modulus will have the same sign as the quotient so that (U) * [(X) / (U)] + [(X) MOD (U)] gives (X) (integer)
+; Note2: the modulus can be calculated on floating point values in which case it will represent the fraction part
+;    of the quotient multiplied by the divisor, again with the same sign as the quotient
+val_mod         ldb val.type,x                  ; get type of left operand
+                cmpb #valtype_int               ; integer?
+                lbeq int32_mod                  ; do integer modulus
+                cmpb #valtype_float             ; floating point?
+                lbeq fps_mod                    ; floating point modulus
+                jmp TMERROR                     ; unsupported type
+                endc
                 *pragmapop list
diff -r df86e6d64ce2 -r bb50ac9fdf37 src/print.s
--- a/src/print.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/print.s	Sat Oct 07 02:56:59 2023 -0600
@@ -66,7 +66,7 @@
                 lda #0x20                       ; default sign (positive)
                 ldb val0+val.int                ; is it negative?
                 bpl cmd_printint0               ; brif not
-                jsr val_negint32                ; negate the integer
+                jsr int32_neg                   ; negate the integer
                 lda #'-                         ; negative sign
 cmd_printint0   sta ,y+                         ; save sign
                 ldu #cmd_printintpc             ; point to positive constant table
diff -r df86e6d64ce2 -r bb50ac9fdf37 src/vars.s
--- a/src/vars.s	Sun Sep 10 23:18:17 2023 -0600
+++ b/src/vars.s	Sat Oct 07 02:56:59 2023 -0600
@@ -39,8 +39,29 @@
 tok_kwnum       rmb 1                           ; the actual token number
 tok_kwmatchl    rmb 1                           ; the length of the best match during lookup
 tok_kwmatch     rmb 2                           ; the current best matched token number
-val0            rmb val.size                    ; value accumulator 0
-val1            rmb val.size                    ; value accumulator 1
+; General value accumulators used during expression evaluation
+val0            rmb val.size                    ; value accumulator 0 - current expression value
+val1            rmb val.size                    ; value accumulator 1 - usually left operand of binary operator
+; The fpa0 and fpa1 areas are used for scratch work during floating point operations. They are only used
+; by floating point operations. This saves a fair fiew clock cycles over simply working off the index register
+; pointers passed into the routines and it also allows for being able to leave the input operands for the
+; routines unmodified, or to overlap the input and output operands. These floating point accumulators can hold
+; the maximum precision floating point values used by the system.
+fpa0            rmb fpa.size                    ; floating point accumulator 1
+fpa1            rmb fpa.size                    ; floating point accumulator 1
+fpa0extra       rmb 1                           ; "extra" bytes for calculations
+fpa0extra1      rmb 1
+fpa0extra2      rmb 1
+fpa0extra3      rmb 1
+fpa0extra4      rmb 1
+fpa0extra5      rmb 1
+fpa0extra6      rmb 1
+fpa0extra7      rmb 1
+fpa0extra8      rmb 1
+fpa0extra9      rmb 1
+fpa0extra10     rmb 1
+fpa0extra11     rmb 1
+fpa0extra12     rmb 1
                 rmb 0x71-*                      ; align RSTFLG/RSTVEC for stock ROM compatibility
 RSTFLG          rmb 1                           ; 0x55 if RSTVEC is valid
 RSTVEC          rmb 2                           ; points to warm start routine (must start with NOP)