# HG changeset patch
# User William Astle <lost@l-w.ca>
# Date 1697429736 21600
# Node ID 663d8e77b579cb07accdd045039691d2cdc68585
# Parent  f959c92bc3294bb4cfbc0df30499a71755b7bfea
Implmement BCD floating point and update number parsing and printing

Implements a BCD floating point system with 10 decimal digits of precistion
and an exponent range of -63 to +63. Also include parsing integer and
floating point values and printing them out.

diff -r f959c92bc329 -r 663d8e77b579 README.txt
--- a/README.txt	Sun Oct 08 00:17:20 2023 -0600
+++ b/README.txt	Sun Oct 15 22:15:36 2023 -0600
@@ -43,3 +43,49 @@
 and, thus, will not autostart whatever Disk Basic you happen to have
 installed. This is most relevant on a Coco 3 where the upper 16K of the
 LWBasic ROM is internal instead of in a cartrige.
+
+
+Numbers
+=======
+
+LWBasic has three numeric types: a 32 bit signed integer, stored as two's
+complement, a decimal floating point type stored in packed BCD with
+10 digits of precision and a base 10 exponent range of -63 to +63, and a
+double precision decimal floating point type with 20 decimal digits of
+precision and a base 10 exponent range from -2047 to +2047.
+
+The BCD format using 48 bits is stored as follows:
+
+Offset	Size	Content
+0	1	Sign bit - 1 for negative
+1	7	Decimal exponent with a bias of 64; 0 indicates a value of 0
+8	40	10 BCD digits of the significand
+
+*** Planned but not implememted
+The BCD double format using 96 bits is stored as follows:
+
+Offset	Size	Content
+0	1	sign bit - 1 for negative
+1	3	<reserved>
+4	12	Decimal exponent with a bias of 2048; 0 indicates value of 0
+16	80	20 BCD digits of the significand
+
+It is worth noting the reason for using the BCD format instead of binary
+floating point. Because interactions with the computer are typically in base
+10, it makes sense to avoid the horrendous complications of attempting to
+maximize the accuracy of converting binary floating point to decimal and
+back again. While this is trivial for integers, it is non-trivial for
+floating point where the variations in accuracy and the need for rounding is
+unavoidable.
+
+At the expense of some extra CPU cycles for doing the calculations, base 10
+accuracy is preserved, at least to the precision limit of the data type.
+This also allows pre-parsing numbers and being able to display them again
+accurately when generating a program listing, even if extra nonsignificant
+digits like trailing decimal zeroes are not reproduced.
+
+It is, however, critical to note that despite the BCD representation, these
+values are still floating point and have the inherent inaccuracies of all
+limited precision data types. The simple difference is that it makes the
+numbers in the computer behave exactly like scientific notiation with no
+unexpected surprises.
diff -r f959c92bc329 -r 663d8e77b579 src/defs.s
--- a/src/defs.s	Sun Oct 08 00:17:20 2023 -0600
+++ b/src/defs.s	Sun Oct 15 22:15:36 2023 -0600
@@ -43,14 +43,18 @@
 ; Value type constants
 valtype_none    equ 0                           ; unknown value type
 valtype_int     equ 1                           ; integer (32 bit) value (signed)
-valtype_float   equ 2                           ; float type (40 bit) value
+valtype_float   equ 2                           ; BCD float type (48 bit) value
 valtype_string  equ 3                           ; string type (16 bit length, 16(32) bit data pointer
 ; Floating point accumulator structure definitions
-fps.exp         equ 0                           ; single precision exponent
-fps.sig         equ fps.exp+1                   ; single precision significand
-fps.sign        equ fps.sig+4                   ; single precision sign
-fps.size        equ fps.sign+1
-fpa.size        equ fps.size                    ; use the largest floating point accumulator size
+;
+; Note: the extra precision bytes are needed for general calculations and need to be at least the same
+; size as the significand itself; the exponent will be stored with the bias. The accumulators are
+; unpacked.
+fpa.exp         equ 0                           ; exponent - use largest size needed for any precision
+fpa.sig         equ fpa.exp+1                   ; significand - use largest size needed for any precision
+fpa.extra       equ fpa.sig+5                   ; extras; largest size needed for any precision, must follow significand
+fpa.sign        equ fpa.extra+5                 ; sign flag
+fpa.size        equ fpa.sign+1                  ; use the largest floating point accumulator size
 ; String data definition
 str.len         equ 0                           ; string length (2 bytes)
 str.ptr         equ str.len+2                   ; string data pointer (3 bytes)
@@ -58,13 +62,13 @@
 ; Value accumulator structure definitions; note that the actual value data must be first and any
 ; incidental meta data must follow
 val.value       equ 0                           ; offset of the value stored in the accumulator
-val.fpsexp      equ val.value+fps.exp           ; floating point exponent
-val.fpssig      equ val.value+fps.sig           ; floating point significand
-val.fpssign     equ val.value+fps.sign          ; floating point sign
+val.fpsexp      equ val.value                   ; floating point exponent
+val.fpssig      equ val.fpsexp+1                ; floating point significand
+val.fpssign     equ val.fpssig+5                ; floating point sign
 val.int         equ val.value                   ; integer offset
 val.strlen      equ val.value+str.len           ; string length offset
 val.strptr      equ val.value+str.ptr           ; string data pointer (low word)
-val.type        equ val.value+fps.size          ; use the largest of the data types here
+val.type        equ val.value+val.fpssign+1     ; use the largest of the data types here
 val.size        equ val.type+1                  ; size of a value accumulator
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
                 ifdef COCO3
diff -r f959c92bc329 -r 663d8e77b579 src/fps-bin.s
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/fps-bin.s	Sun Oct 15 22:15:36 2023 -0600
@@ -0,0 +1,488 @@
+                *pragmapush list
+                *pragma list
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision floating point arithmetic package
+;
+; Floating point values are stored in 6 byte packets (single precision) organized as follows:
+; Offset        Length          Contents
+; 0             1               8 bit binary exponent with a bias of 128; 0 means the number is 0
+; 1             4               32 bit significand
+; 5             1               sign flag; zero for positive, 0xff for negative
+;
+; Binary operateions take pointers to their arguments in X and U. Y contains the result location. In all cases, it is
+; safe to specify either one of the arguments as the result location. Unary operations take their operand pointer in
+; X and their result pointer in Y. In all cases, there is an in place version of the unary operation that operates on
+; its input and only needs the single pointer in X.
+;
+; On the topic of optimization: these routines copy their operands to accumulators in the direct page. This saves one
+; cycle per instruction which has a nonzero offset. In addition, this means that the input operands can remain
+; unmodified by the actual operations. For instance, addition requires denormalizing one operand which would otherwise
+; have to be done in place.
+;
+; NOTE: the input pointers X and U may be clobbered.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Convert 32 bit unsigned integer at (X) to single precision floating point at (U)
+fps_fromuint32  clr fpa0+fps.sign               ; set result sign to positive
+                bra fps_fromint32a              ; go do conversion
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Convert 32 bit signed integer at (X) to single precision floating point at value accumulator in (Y)
+fps_fromint32   ldb ,x                          ; set sign based on signed integer
+                sex
+                sta fpa0+fps.sign               ; set result sign to the two's complement sign
+                bpl fps_fromint32a              ; brif positive - no need to mess with bits
+                jsr int32_neg                   ; make the bits positive
+fps_fromint32a  ldb valtype_float               ; set output value type to floating point
+                stb val.type,y
+                ldd ,x                          ; copy integer bits to fpa0
+                std fpa0+fps.sig
+                ldd 2,x
+                std fpa0+fps.sig+2
+                ldb #0xa0                       ; put binary point to the right of the significand
+                stb fpa0+fps.exp
+                clr fpa0extra                   ; clear extra precision
+                jmp fps_add10                   ; go normalize the result and return
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Convert 64 bit unsigned value at (X) to single precision floating point in value accumulator at (Y)
+;
+; Cases:
+; * byte 0 is first nonzero - exponent at 64 bits right, use upper 0 to 4
+; * byte 1 is first nonzero - exponent at 56 bits right, use bytes 1 to 5
+; * byte 2 is first nonzero - exponent at 48 bits right, use bytes 2 to 6
+; * otherwise - exponent at 40 bits right, use bytes 3 to 7
+fps_fromuint64  clra                            ; set sign to positive
+fps_fromuint64s sta fpa0+fps.sign               ; save sign of result
+                ldb #0xc0                       ; exponent if binary point is 64 bits to the right
+                lda ,x+                         ; is the first byte zero?
+                bne fps_fromuint64a             ; brif not
+                subb #8                         ; lose a byte off exponent
+                lda ,x+                         ; is the second byte zero?
+                bne fps_fromuint64a             ; brif not
+                subb #8                         ; lose another byte off exponent
+                lda ,x+                         ; is third byte zero?
+                bne fps_fromuint64a             ; brif not
+                subb #8                         ; lose another byte
+                lda ,x+                         ; get first byte to copy
+fps_fromuint64a stb fpa0+fps.exp                ; save exponent
+                sta fpa0+fps.sig                ; save high byte of significand
+                ldd ,x                          ; copy next two bytes to significand
+                std fpa0+fps.sig+1
+                ldd 2,x                         ; and the final byte and extra precision
+                sta fpa0+fps.sig+3
+                stb fpa0extra
+                jmp fps_add10                   ; go normalize the result and return
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Fast multiply (X) by 10, in place.
+;
+; * first, save original value
+; * then, shift left by 2 bits (add 2 to exponent)
+; * then, add original value
+; * then, shift left one more (add 1 to exponent)
+;
+; This should be faster than multiplying by 10.
+fps_mul10       leas -fps.size,s                ; make a temporary to hold original value
+                ldd ,x                          ; copy original value
+                std ,s
+                ldd 2,x
+                std 2,s
+                ldd 4,x
+                std 4,s
+                lda fps.exp,x                   ; bump original exponent by 2 (times 4)
+                adda #2
+                bcc fps_mul10b                  ; brif it overflowed
+fps_mul10a      jmp OVERROR                     ; raise overflow
+fps_mul10b      sta fps.exp,x
+                leay ,x
+                leau ,s
+                bsr fps_add                     ; add original value (times 5)
+                leas fps.size,s                 ; clean up temporary
+                inc fps.exp,y                   ; bump exponent (times 10) in result
+                beq fps_mul10a                  ; brif it overflowed
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Unary negation - negate (X) to (Y)
+fps_neg         ldd 2,x                         ; copy to output and keep exponent in A
+                std 2,y
+                ldd 4,x
+                std 4,y
+                ldd ,x
+                std ,y
+                tsta                            ; is the number zero?
+                beq fps_neg0                    ; brif so - do nothing
+                com fps.sign,y                  ; flip the sign
+fps_neg0        rts
+fps_negx        lda fps.exp,x                   ; is the number zero?
+                beq fps_negx0                   ; brif so - do nothing
+                com fps.sign,x                  ; flip the sign
+fps_negx0       rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copy (X) to fpa0 and (U) to fpa1
+fps_copyinputs  ldd ,x                          ; copy (X) to fpa0
+                std fpa0
+                ldd 2,x
+                std fpa0+2
+                ldd 4,x
+                std fpa0+4
+                ldd ,u                          ; copy (U) to fpa0
+                std fpa1
+                ldd 2,u
+                std fpa1+2
+                ldd 4,u
+                std fpa1+4
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Subtraction (X) - (U) to (Y)
+fps_sub         bsr fps_copyinputs              ; copy input operands
+                com fpa1+fps.sign               ; negate the subtrahend (don't need to handle zero here)
+                bra fps_add0                    ; go handle it as addition
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Addition (X) + (U) to (Y)
+fps_add         bsr fps_copyinputs              ; copy input operands
+fps_add0        lda fpa1+fps.exp                ; is the second operand 0?
+                bne fps_add1                    ; brif not
+                ldd fpa0                        ; copy first operand to output
+                std ,y
+                ldd fpa0+2
+                std 2,y
+                ldd fpa0+4
+                std 4,y
+                rts
+fps_add1        lda fpa0+fps.exp                ; get exponent of first operand
+                bne fps_add2                    ; brif not zero
+                ldd fpa1                        ; copy second operand to output
+                std ,y
+                ldd fpa1+2
+                std 2,y
+                ldd fpa1+4
+                std 4,y
+                rts
+fps_add2        clr fpa0extra                   ; clear extra precision bits
+                lda fpa0+fps.exp                ; get first operand exponent
+                suba fpa1+fps.exp               ; get difference in exponents
+                beq fps_add8                    ; brif we don't need to denormalize - they're the same
+                blo fps_add3                    ; brif we need to denormalize the first operand
+                ldx #fpa1                       ; point to second operand to denormalize
+                bra fps_add4                    ; go denormalize
+fps_add3        ldb fpa1+fps.exp                ; get exponent of second operand (result exponent)
+                stb fpa0+fps.exp                ; set result exponent
+                ldx #fpa0                       ; point to first operand to denormalize
+                nega                            ; get number of bits to shift as positive number
+fps_add4        suba #8                         ; is there 8 bits left?
+                blo fps_add5                    ; brif not
+                ldb fps.sig+3,x                 ; shift significand 8 bits right
+                stb fpa0extra                   ; save extra precision bits
+                ldb fps.sig+2,x
+                stb fps.sig+3,x
+                ldb fps.sig+1,x
+                stb fps.sig+2,x
+                ldb fps.sig,x
+                stb fps.sig+1,x
+                clr fps.sig,x
+                bra fps_add4                    ; see if we have another byte to shift
+fps_add5        adda #8                         ; adjust for extra subtract above
+                bra fps_add7                    ; do the bit shifting
+fps_add6        lsr fps.sig,x                   ; shift one bit right
+                ror fps.sig+1,x
+                ror fps.sig+2,x
+                ror fps.sig+3,x
+                ror fpa0extra
+                deca                            ; done all of the bits?
+fps_add7        bne fps_add6                    ; brif not
+fps_add8        lda fpa0+fps.sign               ; compare the signs of the operands
+                eora fpa1+fps.sign              ; set A if signs differ
+                bne fps_add9                    ; brif they differ - do subtraction
+                ldd fpa0+fps.sig+2              ; add low word of significands
+                addd fpa1+fps.sig+2
+                std fpa0+fps.sig+2
+                ldd fpa0+fps.sig                ; and the high word
+                adcb fpa1+fps.sig+1
+                adca fpa1+fps.sig
+                std fpa0+fps.sig
+                bcc fps_add9                    ; brif no carry
+                ror fpa0+fps.sig                ; shift carry into significand
+                ror fpa0+fps.sig+1
+                ror fpa0+fps.sig+2
+                ror fpa0+fps.sig+3
+                ror fpa0extra                   ; and the extra bits (for rounding)
+                inc fpa0+fps.exp                ; bump exponent to account for bit shift
+                bne fps_add14                   ; go check for round-off if not overflow
+OVERROR         ldb #err_ov                     ; raise overflow
+                jmp ERROR 
+fps_add9        ldd fpa0+fps.sig+2              ; subtract low word
+                subd fpa1+fps.sig+2
+                std fpa0+fps.sig+2
+                ldd fpa0+fps.sig                ; and now the high word
+                sbcb fpa1+fps.sig+1
+                sbca fpa1+fps.sig
+                std fpa0+fps.sig
+                bcc fps_add10                   ; brif we didn't carry
+                com fpa0+fps.sign               ; flip result sign - other number was bigger
+                com fpa0+fps.sig+3              ; negate two's complement result to be just the magnitude
+                com fpa0+fps.sig+2
+                com fpa0+fps.sig+1
+                com fpa0+fps.sig
+                ldx fpa0+fps.sig+2              ; add 1 to complete negation
+                leax 1,x
+                stx fpa0+fps.sig+2
+                bne fps_add10                   ; brif carry doesn't propagate
+                ldx fpa0+fps.sig                ; propagate carry
+                leax 1,x
+                stx fpa0+fps.sig                ; NOTE: this cannot carry because magnitude got smaller
+fps_add10       clra                            ; initialize exponent offset
+fps_add11       ldb fpa0+fps.sig                ; do we have nonzero bits in high byte of significand?
+                bne fps_add13                   ; brif so
+                ldb fpa0+fps.sig+1              ; shift left 8 bits
+                stb fpa0+fps.sig
+                ldb fpa0+fps.sig+2
+                stb fpa0+fps.sig+1
+                ldb fpa0+fps.sig+3
+                stb fpa0+fps.sig+2
+                ldb fpa0extra                   ; and extra precision bits
+                stb fpa0+fps.sig+3
+                clr fpa0extra
+                addb #8                         ; account for number of bits shifted
+                cmpb #40                        ; done 40 bits?
+                blo fps_add11                   ; brif not - see if we have more bits to shift
+                clr fpa0+fps.exp                ; number underflowed to zero - set it so
+                clr fpa0+fps.sign
+                clr fpa0+fps.sig
+                clr fpa0+fps.sig+1
+                clr fpa0+fps.sig+2
+                clr fpa0+fps.sig+3
+                bra fps_add16                   ; go return result
+fps_add12       inca                            ; account for a bit shift
+                lsl fpa0extra                   ; shift significand and extra bits left
+                rol fpa0+fps.sig+3
+                rol fpa0+fps.sig+2
+                rol fpa0+fps.sig+1
+                rol fpa0+fps.sig
+fps_add13       bpl fps_add12                   ; brif we haven't normalized yet
+fps_add14       ldb fpa0extra                   ; do we need to round?
+                bpl fps_add16                   ; brif not
+                ldx fpa0+fps.sig+2              ; add one to significand
+                leax 1,x
+                stx fpa0+fps.sig+2
+                bne fps_add16                   ; brif no carry
+                ldx fpa0+fps.sig                ; bump the upper word of significand
+                leax 1,x
+                stx fpa0+fps.sig
+                bne fps_add16
+fps_add15       inc fpa0+fps.exp                ; bump exponent
+                beq OVERROR                     ; brif it overflowed
+                lda #0x80                       ; set high bit of significand (rest is zero)
+                sta fpa0+fps.sig
+fps_add16       ldd fpa0                        ; copy result to destination
+                std ,y
+                ldd fpa0+2
+                std 2,y
+                ldd fpa0+4
+                std 4,y
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision multiplication (X) × (U) to (Y)
+fps_mul         lda fps.exp,x                   ; is first operand zero?
+                beq fps_mul0                    ; brif so - return zero
+                lda fps.exp,u                   ; is second operand zero?
+                bne fps_mul1                    ; brif not - have to do the multiply
+fps_mul0        ldd zero                        ; return zero result
+                std ,y
+                std 2,y
+                std 4,y
+                rts
+fps_mul1        jsr fps_copyinputs              ; copy input operands
+                lda fpa0+fps.sign               ; calculate sign of result - xor of the two signs
+                eora fpa1+fps.sign
+                sta fpa0+fps.sign               ; save result sign
+                lda fpa0+fps.exp                ; get exponent of first value
+                adda fpa1+fps.exp               ; calculate new exponent; also cancels the bias
+                rora                            ; set V if C and N differ
+                rola
+                bvc fps_mul3                    ; brif maybe an overflow
+                adda #0x80                      ; add back the bias
+                sta fpa0+fps.sign               ; save new sign
+                beq fps_mul0                    ; brif we underflowed - zero out sign
+; This does a shift-and-add multiplication algorithm. This is slower than an equivalent using MUL but is smaller.
+; The high order bytes will be left in fpa0 with the low order bytes in fpa0extra and fpa0extra[1-3]. The low order
+; bytes are kept for the odd case where extra precision is useful. Uses fpa0extra[4-7] as temporaries.
+fps_mul2        ldd zero                        ; zero out temporary bytes
+                std fpa0extra4
+                std fpa0extra6
+                ldb fpa0+fps.sig+3              ; multiply by low byte of fpa0
+                bsr fps_mul4
+                ldb fpa0extra                   ; move low bites
+                stb fpa0extra3
+                ldb fpa0+fps.sig+2              ; multiply by next higher byte
+                bsr fps_mul4
+                ldb fpa0extra                   ; move low bits
+                stb fpa0extra2
+                ldb fpa0+fps.sig+1              ; and again for the next higher byte
+                bsr fps_mul4
+                ldb fpa0extra
+                stb fpa0extra1
+                ldb fpa0+fps.sig                ; and the high order byte
+                bsr fps_mul4
+                ldd fpa0extra4                  ; copy high order product bits to result
+                std fpa0+fps.sig
+                ldd fpa0extra6
+                std fpa0+fps.sig+2
+                jmp fps_add10                   ; go normalize the result and return
+fps_mul3        bpl fps_mul0                    ; brif we underflowed - return 0
+                jmp OVERROR                     ; raise overflow
+fps_mul4        bne fps_mul5                    ; brif not multiply by zero
+                lda fpa0+fps.sig+3              ; shift 8 bits right
+                sta fpa0extra
+                lda fpa0+fps.sig+2
+                sta fpa0+fps.sig+3
+                lda fpa0+fps.sig+1
+                sta fpa0+fps.sig+2
+                lda fpa0+fps.sig
+                sta fpa0+fps.sig+1
+                clr fpa0+fps.sig
+fps_mul8        rts
+fps_mul5        coma                            ; set C
+fps_mul6        lda fpa0extra4                  ; get high byte of result bytes
+                rorb                            ; is multiplier bit set?
+                beq fps_mul8                    ; brif 8 shifts done (C set above makes sure of that)
+                bcc fps_mul7                    ; brif bit not set - don't do addition
+                lda fpa0extra7                  ; add multiplier (fpa1) to result
+                adda fpa1+fps.sig+3
+                sta fpa0extra7
+                lda fpa0extra6
+                adca fpa1+fps.sig+2
+                sta fpa0extra6
+                lda fpa0extra5
+                adca fpa1+fps.sig+1
+                sta fpa0extra5
+                lda fpa0extra4
+                adca fpa1+fps.sig
+fps_mul7        rora                            ; rotate carry in (from add or 0)
+                sta fpa0extra4
+                ror fpa0extra5
+                ror fpa0extra6
+                ror fpa0extra7
+                ror fpa0extra                   ; and into the extra precision bits
+                clra                            ; clear carry - so shift above will terminate
+                bra fps_mul6                    ; go do another bit
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Divide (X) by 10 in place
+fps_const10     fcb 0x83,0xa0,0x00,0x00,0x00,0x00 ; single precision unpacked constant 10
+fps_div10       ldu #fps_const10                ; point to constant 10
+                leay ,x                         ; put output in input
+                ; fall through to regular division
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision division (X) ÷ (U) -> (Y)
+;
+; This is basically the same algorithm used in the Color Basic ROM
+fps_div         lda fps.exp,u                   ; is divisor 0?
+                bne fps_div0
+DIV0ERROR       ldb #err_div0                   ; raise division by zero
+                jmp ERROR
+fps_div0        lda fps.exp,x                   ; is dividend 0?
+                lbeq fps_mul0                   ; brif so - return 0
+                jsr fps_copyinputs              ; copy input values
+                lda fpa0+fps.sign               ; calculate result sign - xor of operand signs
+                eora fpa1+fps.sign
+                sta fpa0+fps.sign               ; save result sign
+                lda fpa1+fps.exp                ; get divisor exponent
+                nega                            ; negate for subtraction
+                adda fpa0+fps.exp               ; subtract it from dividend exponent
+                rora                            ; set V if C and N differ
+                rola
+                bvc fps_mul3                    ; brif overflow or underflow
+                adda #0x80                      ; add back the bias
+                sta fpa0+fps.sign               ; save new sign
+                lbeq fps_mul0                   ; brif we underflowed - zero out sign
+                inca                            ; bump exponent - why?? related to the bias stuff above?
+                lbeq OVERROR                    ; brif it overflows
+                sta fpa0+fps.exp                ; save result exponent
+                ldx #fpa0extra4                 ; point to temporary storage bytes for quotient
+                ldb #4                          ; counter for 4 significand bytes and one extra partial byte
+                stb fpa0extra1                  ; save counter since we need both accumulators
+                ldb #1                          ; shift counter flag and quotient byte
+fps_div1        lda fpa1+fps.sig                ; set C if fpa0 significand <= fpa1 significand
+                cmpa fpa0+fps.sig
+                bne fps_div2
+                lda fpa1+fps.sig+1
+                cmpa fpa0+fps.sig+1
+                bne fps_div2
+                lda fpa1+fps.sig+2
+                cmpa fpa0+fps.sig+2
+                bne fps_div2
+                lda fpa1+fps.sig+3
+                cmpa fpa0+fps.sig+3
+                bne fps_div2
+                coma                            ; set C if values are the same
+fps_div2        tfr cc,a                        ; save C for later, C clear if fpa1 > fpa0
+                rolb                            ; shift carry into quotient
+                bcc fps_div3                    ; brif carry clear - we haven't done 8 bits yet
+                stb ,x+                         ; save quotient byte after a full set of bits
+                dec fpa0extra1                  ; have we done all the bytes?
+                bmi fps_div7                    ; brif all bytes plus extra precision - done all
+                beq fps_div6                    ; brif all main sigificand bytes - do a couple extra bits
+                ldb #1                          ; reset the bit counter flag
+fps_div3        tfr cc,a                        ; get back original carry from compare
+                bcs fps_div5                    ; brif it "went"
+fps_div4        lsl fpa0+fps.sig+3              ; shift dividend left
+                rol fpa0+fps.sig+2
+                rol fpa0+fps.sig+1
+                rol fpa0+fps.sig
+                bcs fps_div2                    ; brif it carries - next bit "goes"
+                bmi fps_div1                    ; check magnitudes of next bit
+                bra fps_div2                    ; carry clear - check another bit
+fps_div5        lda fpa0+fps.sig+3              ; subtract divisor from dividend bits
+                suba fpa1+fps.sig+3
+                sta fpa0+fps.sig+3
+                lda fpa0+fps.sig+2
+                sbca fpa1+fps.sig+2
+                sta fpa0+fps.sig+2
+                lda fpa0+fps.sig+1
+                sbca fpa1+fps.sig+1
+                sta fpa0+fps.sig+1
+                lda fpa0+fps.sig
+                sbca fpa1+fps.sig
+                sta fpa0+fps.sig
+                bra fps_div4                    ; now do the bit shift to line things up
+fps_div6        ldb #0x40                       ; only do two bits of extra precision byte
+                bra fps_div2                    ; go handle these bitsd
+fps_div7        rorb                            ; get extra quotient bits to bit 7,6 and bit 5 set
+                rorb
+                rorb
+                stb fpa0extra                   ; save extra precision bits
+                ldd fpa0extra4                  ; copy result bits to fpa0
+                std fpa0+fps.sig
+                ldd fpa0extra6
+                std fpa0+fps.sig+2
+                jmp fps_add10                   ; go normalize the result
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Pack single precision number at (X) to (U)
+fps_pack        lda fps.sign,x                  ; get sign of number (will be 0 for 0)
+                ora #0x7f                       ; make sure low bits are set for merging
+                anda fps.sig,x                  ; merge with high bits of significand
+                ldb fps.sig+1,x                 ; get upper mid bits of significand
+                std fps.sig,u
+                ldd fps.sig+2,x
+                std fps.sig+2,u
+                lda fps.exp,x
+                sta fps.exp,u
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Unpack single precision number at (X) to (U)
+fps_unpack      lda fps.exp,x                   ; get exponent of value
+                beq fps_unpack0                 ; brif value is zero
+                sta fps.exp,u
+                ldb fps.sig,x                   ; get high byte of significand
+                sex                             ; make sign value in A
+                sta fps.sign,u                  ; set sign in result
+                ldd fps.sig,x                   ; get high word of sifnificand
+                ora #0x80                       ; make sure high bit is set
+                std fps.sig,u                   ; save high word in result
+                ldd fps.sig+2,x                 ; copy middle bytes of significand
+                std fps.sig+2,u
+                rts
+fps_unpack0     sta fps.exp,u                   ; zero out destination
+                sta fps.sig,u
+                sta fps.sig+1,u
+                sta fps.sig+2,u
+                sta fps.sig+3,u
+                sta fps.sign,u
+                rts
+                *pragmapop list
diff -r f959c92bc329 -r 663d8e77b579 src/fps.s
--- a/src/fps.s	Sun Oct 08 00:17:20 2023 -0600
+++ b/src/fps.s	Sun Oct 15 22:15:36 2023 -0600
@@ -3,486 +3,885 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Single precision floating point arithmetic package
 ;
-; Floating point values are stored in 6 byte packets (single precision) organized as follows:
-; Offset        Length          Contents
-; 0             1               8 bit binary exponent with a bias of 128; 0 means the number is 0
-; 1             4               32 bit significand
-; 5             1               sign flag; zero for positive, 0xff for negative
-;
-; Binary operateions take pointers to their arguments in X and U. Y contains the result location. In all cases, it is
-; safe to specify either one of the arguments as the result location. Unary operations take their operand pointer in
-; X and their result pointer in Y. In all cases, there is an in place version of the unary operation that operates on
-; its input and only needs the single pointer in X.
-;
-; On the topic of optimization: these routines copy their operands to accumulators in the direct page. This saves one
-; cycle per instruction which has a nonzero offset. In addition, this means that the input operands can remain
-; unmodified by the actual operations. For instance, addition requires denormalizing one operand which would otherwise
-; have to be done in place.
+; The single precision floating point values are stored as follows (unpacked):
 ;
-; NOTE: the input pointers X and U may be clobbered.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Convert 32 bit unsigned integer at (X) to single precision floating point at (U)
-fps_fromuint32  clr fpa0+fps.sign               ; set result sign to positive
-                bra fps_fromint32a              ; go do conversion
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Convert 32 bit signed integer at (X) to single precision floating point at value accumulator in (Y)
-fps_fromint32   ldb ,x                          ; set sign based on signed integer
-                sex
-                sta fpa0+fps.sign               ; set result sign to the two's complement sign
-                bpl fps_fromint32a              ; brif positive - no need to mess with bits
-                jsr int32_neg                   ; make the bits positive
-fps_fromint32a  ldb valtype_float               ; set output value type to floating point
-                stb val.type,y
-                ldd ,x                          ; copy integer bits to fpa0
-                std fpa0+fps.sig
-                ldd 2,x
-                std fpa0+fps.sig+2
-                ldb #0xa0                       ; put binary point to the right of the significand
-                stb fpa0+fps.exp
-                clr fpa0extra                   ; clear extra precision
-                jmp fps_add10                   ; go normalize the result and return
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Convert 64 bit unsigned value at (X) to single precision floating point in value accumulator at (Y)
+; Byte          Length          What
+; 0             1               Exponent with a +63 bias, 0 = number is zero
+; 1             5               BCD significand (10 digits)
+; 6             1               Sign (00 = positive, FF = negative)
 ;
-; Cases:
-; * byte 0 is first nonzero - exponent at 64 bits right, use upper 0 to 4
-; * byte 1 is first nonzero - exponent at 56 bits right, use bytes 1 to 5
-; * byte 2 is first nonzero - exponent at 48 bits right, use bytes 2 to 6
-; * otherwise - exponent at 40 bits right, use bytes 3 to 7
-fps_fromuint64  clra                            ; set sign to positive
-fps_fromuint64s sta fpa0+fps.sign               ; save sign of result
-                ldb #0xc0                       ; exponent if binary point is 64 bits to the right
-                lda ,x+                         ; is the first byte zero?
-                bne fps_fromuint64a             ; brif not
-                subb #8                         ; lose a byte off exponent
-                lda ,x+                         ; is the second byte zero?
-                bne fps_fromuint64a             ; brif not
-                subb #8                         ; lose another byte off exponent
-                lda ,x+                         ; is third byte zero?
-                bne fps_fromuint64a             ; brif not
-                subb #8                         ; lose another byte
-                lda ,x+                         ; get first byte to copy
-fps_fromuint64a stb fpa0+fps.exp                ; save exponent
-                sta fpa0+fps.sig                ; save high byte of significand
-                ldd ,x                          ; copy next two bytes to significand
-                std fpa0+fps.sig+1
-                ldd 2,x                         ; and the final byte and extra precision
-                sta fpa0+fps.sig+3
-                stb fpa0extra
-                jmp fps_add10                   ; go normalize the result and return
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Fast multiply (X) by 10, in place.
-;
-; * first, save original value
-; * then, shift left by 2 bits (add 2 to exponent)
-; * then, add original value
-; * then, shift left one more (add 1 to exponent)
-;
-; This should be faster than multiplying by 10.
-fps_mul10       leas -fps.size,s                ; make a temporary to hold original value
-                ldd ,x                          ; copy original value
-                std ,s
-                ldd 2,x
-                std 2,s
-                ldd 4,x
-                std 4,s
-                lda fps.exp,x                   ; bump original exponent by 2 (times 4)
-                adda #2
-                bcc fps_mul10b                  ; brif it overflowed
-fps_mul10a      jmp OVERROR                     ; raise overflow
-fps_mul10b      sta fps.exp,x
-                leay ,x
-                leau ,s
-                bsr fps_add                     ; add original value (times 5)
-                leas fps.size,s                 ; clean up temporary
-                inc fps.exp,y                   ; bump exponent (times 10) in result
-                beq fps_mul10a                  ; brif it overflowed
+; Unpack single precision BCD floating point at (X) to fpa1
+fps_unpack1     ldb ,x                          ; get sign and exponent
+                bne fps_unpack1b                ; brif not zero
+                ldd zero                        ; zero out the entire value to represent 0
+                std fpa1
+                std fpa1+2
+                std fpa1+4
+                sta fpa1+6
+                rts
+fps_unpack1b    sex                             ; get sign flag to store
+                sta fpa1+fpa.sign               ; set accumulator
+                anda #0x7f                      ; lose sign from exponent
+                sta fpa1+fpa.exp                ; set exponent
+                ldd 1,x                         ; copy signficand over
+                std fpa1+fpa.sig
+                ldd 3,x
+                std fpa1+fpa.sig+2
+                lda 5,x
+                sta fpa1+fpa.sig+4
                 rts
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Unary negation - negate (X) to (Y)
-fps_neg         ldd 2,x                         ; copy to output and keep exponent in A
-                std 2,y
-                ldd 4,x
-                std 4,y
-                ldd ,x
-                std ,y
-                tsta                            ; is the number zero?
-                beq fps_neg0                    ; brif so - do nothing
-                com fps.sign,y                  ; flip the sign
-fps_neg0        rts
-fps_negx        lda fps.exp,x                   ; is the number zero?
-                beq fps_negx0                   ; brif so - do nothing
-                com fps.sign,x                  ; flip the sign
-fps_negx0       rts
+; Unpack single precision BCD floating point at (X) to fpa0
+fps_unpack0     ldb ,x                          ; get sign and exponent
+                bne fps_unpack0b                ; brif not zero
+                ldd zero                        ; zero out the entire value to represent 0
+                std fpa0
+                std fpa0+2
+                std fpa0+4
+                sta fpa0+6
+                rts
+fps_unpack0b    sex                             ; get sign flag to store
+                sta fpa0+fpa.sign               ; set accumulator
+                anda #0x7f                      ; lose sign from exponent
+                sta fpa0+fpa.exp                ; set exponent
+                ldd 1,x                         ; copy signficand over
+                std fpa0+fpa.sig
+                ldd 3,x
+                std fpa0+fpa.sig+2
+                lda 5,x
+                sta fpa0+fpa.sig+4
+                rts
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Copy (X) to fpa0 and (U) to fpa1
-fps_copyinputs  ldd ,x                          ; copy (X) to fpa0
-                std fpa0
-                ldd 2,x
-                std fpa0+2
-                ldd 4,x
-                std fpa0+4
-                ldd ,u                          ; copy (U) to fpa0
-                std fpa1
-                ldd 2,u
-                std fpa1+2
-                ldd 4,u
-                std fpa1+4
+; Pack single precision BCD floating point in fpa1 to (X)
+fps_pack1       lda fpa1+fpa.sign               ; get sign bits
+                anda #0x80                      ; only keep high bit
+                ora fpa1+fpa.exp                ; merge with exponent
+                sta ,x                          ; put in destination
+                ldd fpa1+fpa.sig                ; copy significand over
+                std 1,x
+                ldd fpa1+fpa.sig+2
+                std 3,x
+                lda fpa1+fpa.sig+4
+                sta 5,x
+                rts
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Pack single precision BCD floating point in fpa0 to (X)
+fps_pack0       lda fpa0+fpa.sign               ; get sign bits
+                anda #0x80                      ; only keep high bit
+                ora fpa0+fpa.exp                ; merge with exponent
+                sta ,x                          ; put in destination
+                ldd fpa0+fpa.sig                ; copy significand over
+                std 1,x
+                ldd fpa0+fpa.sig+2
+                std 3,x
+                lda fpa0+fpa.sig+4
+                sta 5,x
                 rts
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Subtraction (X) - (U) to (Y)
-fps_sub         bsr fps_copyinputs              ; copy input operands
-                com fpa1+fps.sign               ; negate the subtrahend (don't need to handle zero here)
-                bra fps_add0                    ; go handle it as addition
+; Copy binary arguments
+fps_copyargs    clr fpa0+fpa.extra              ; clear extra precision bits
+                clr fpa1+fpa.extra
+                bsr fps_unpack0                 ; unpack first argument to fpa0
+                leax ,u                         ; point to second argument
+                bra fps_unpack1                 ; now unpack it to fpa1 and return
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Addition (X) + (U) to (Y)
-fps_add         bsr fps_copyinputs              ; copy input operands
-fps_add0        lda fpa1+fps.exp                ; is the second operand 0?
+; Single precision BCD floating point subtraction
+; (X) - (U) -> (Y)
+fps_sub         bsr fps_copyargs                ; copy input arguments
+                ldb fpa1+fpa.exp                ; subtracting zero?
+                beq fps_add0                    ; brif so - do nothing
+                com fpa1+fpa.sign               ; invert sign
+                bra fps_add1                    ; go handle addition of negative
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision BCD floating point addition
+; (X) + (U) -> (Y)
+;
+; Note: must denormalize the *shorter* value. However, we may end up with values the same length so we may not be able
+; to easily identify which one is larger.
+;
+; Note that the magnitude can get substantially smaller after addition of a negative so renormalization is necessary
+; after the operation. As a result, some additional precision is maintained during the addition operation to allow for
+; correct rounding.
+;
+; Rounding will be according to the standard rules: 0...4 go toward zero, 5...9 go away from zero.
+fps_add         bsr fps_copyargs                ; fetch arguments
+                ldb fpa1+fpa.exp                ; is second argument zero?
                 bne fps_add1                    ; brif not
-                ldd fpa0                        ; copy first operand to output
-                std ,y
-                ldd fpa0+2
-                std 2,y
-                ldd fpa0+4
-                std 4,y
-                rts
-fps_add1        lda fpa0+fps.exp                ; get exponent of first operand
-                bne fps_add2                    ; brif not zero
-                ldd fpa1                        ; copy second operand to output
-                std ,y
-                ldd fpa1+2
-                std 2,y
-                ldd fpa1+4
-                std 4,y
-                rts
-fps_add2        clr fpa0extra                   ; clear extra precision bits
-                lda fpa0+fps.exp                ; get first operand exponent
-                suba fpa1+fps.exp               ; get difference in exponents
-                beq fps_add8                    ; brif we don't need to denormalize - they're the same
-                blo fps_add3                    ; brif we need to denormalize the first operand
+fps_add0        leax ,y                         ; if second argument is zero, do nothing and return first operand
+                bra fps_pack0
+fps_add1        ldb fpa0+fpa.exp                ; is first argument zero?
+                bne fps_add3                    ; brif not
+fps_add2        leax ,y                         ; point to output argument
+                bra fps_pack1                   ; pack second argument to result and return
+fps_add3        ldd zero                        ;* initialize extra precision bytes following significand which we need
+                std fpa0+fpa.sig+5              ;* to handle denormalization of the smaller operand
+                std fpa0+fpa.sig+7
+                sta fpa0+fpa.sig+9
+                std fpa1+fpa.sig+5
+                std fpa1+fpa.sig+7
+                sta fpa1+fpa.sig+9
+                subb fpa0+fpa.exp               ; calculate exponent difference
+                lbeq fps_add8                   ; brif same exponent - no need to denormalize
+                bcs fps_add4                    ; brif second operand is bigger
                 ldx #fpa1                       ; point to second operand to denormalize
-                bra fps_add4                    ; go denormalize
-fps_add3        ldb fpa1+fps.exp                ; get exponent of second operand (result exponent)
-                stb fpa0+fps.exp                ; set result exponent
-                ldx #fpa0                       ; point to first operand to denormalize
-                nega                            ; get number of bits to shift as positive number
-fps_add4        suba #8                         ; is there 8 bits left?
-                blo fps_add5                    ; brif not
-                ldb fps.sig+3,x                 ; shift significand 8 bits right
-                stb fpa0extra                   ; save extra precision bits
-                ldb fps.sig+2,x
-                stb fps.sig+3,x
-                ldb fps.sig+1,x
-                stb fps.sig+2,x
-                ldb fps.sig,x
-                stb fps.sig+1,x
-                clr fps.sig,x
-                bra fps_add4                    ; see if we have another byte to shift
-fps_add5        adda #8                         ; adjust for extra subtract above
-                bra fps_add7                    ; do the bit shifting
-fps_add6        lsr fps.sig,x                   ; shift one bit right
-                ror fps.sig+1,x
-                ror fps.sig+2,x
-                ror fps.sig+3,x
-                ror fpa0extra
-                deca                            ; done all of the bits?
-fps_add7        bne fps_add6                    ; brif not
-fps_add8        lda fpa0+fps.sign               ; compare the signs of the operands
-                eora fpa1+fps.sign              ; set A if signs differ
-                bne fps_add9                    ; brif they differ - do subtraction
-                ldd fpa0+fps.sig+2              ; add low word of significands
-                addd fpa1+fps.sig+2
-                std fpa0+fps.sig+2
-                ldd fpa0+fps.sig                ; and the high word
-                adcb fpa1+fps.sig+1
-                adca fpa1+fps.sig
-                std fpa0+fps.sig
-                bcc fps_add9                    ; brif no carry
-                ror fpa0+fps.sig                ; shift carry into significand
-                ror fpa0+fps.sig+1
-                ror fpa0+fps.sig+2
-                ror fpa0+fps.sig+3
-                ror fpa0extra                   ; and the extra bits (for rounding)
-                inc fpa0+fps.exp                ; bump exponent to account for bit shift
-                bne fps_add14                   ; go check for round-off if not overflow
+                cmpb #10                        ; are we going to shift more than the precision?
+                bhs fps_add0                    ; brif so - return first operand
+                bra fps_add5                    ; go shift operand right by B places
+fps_add4        negb                            ; get positive number of shifts
+                ldx #fpa0                       ; shift left operand right B places
+                lda fpa1+fpa.exp                ; set exponent of result to larger of two
+                sta fpa0+fpa.exp
+                cmpb #10                        ; shifting more than precision?
+                bhs fps_add2                    ; brif so - return second operand
+fps_add5        subb #2                         ; do we need at least two places?
+                bcs fps_add6                    ; brif not
+                lda fpa.sig+8,x                 ; shift right into extra digits
+                sta fpa.sig+9,x
+                lda fpa.sig+7,x
+                sta fpa.sig+8,x
+                lda fpa.sig+6,x
+                sta fpa.sig+7,x
+                lda fpa.sig+5,x
+                sta fpa.sig+6,x
+                lda fpa.sig+4,x
+                sta fpa.sig+5,x
+                lda fpa.sig+3,x
+                sta fpa.sig+4,x
+                lda fpa.sig+2,x
+                sta fpa.sig+3,x
+                lda fpa.sig+1,x
+                sta fpa.sig+2,x
+                lda fpa.sig,x
+                sta fpa.sig+1,x
+                clr fpa.sig,x
+                bra fps_add5                    ; go see if we have more to shift
+fps_add6        incb                            ; do we still have a digit to shift?
+                bne fps_add8                    ; brif not
+                lsr fpa.sig,x                   ; shift a digit right
+                ror fpa.sig+1,x
+                ror fpa.sig+2,x
+                ror fpa.sig+3,x
+                ror fpa.sig+4,x
+                ror fpa.sig+5,x
+                ror fpa.sig+6,x
+                ror fpa.sig+7,x
+                ror fpa.sig+8,x
+                ror fpa.sig+9,x
+                lsr fpa.sig,x
+                ror fpa.sig+1,x
+                ror fpa.sig+2,x
+                ror fpa.sig+3,x
+                ror fpa.sig+4,x
+                ror fpa.sig+5,x
+                ror fpa.sig+6,x
+                ror fpa.sig+7,x
+                ror fpa.sig+8,x
+                ror fpa.sig+9,x
+                lsr fpa.sig,x
+                ror fpa.sig+1,x
+                ror fpa.sig+2,x
+                ror fpa.sig+3,x
+                ror fpa.sig+4,x
+                ror fpa.sig+5,x
+                ror fpa.sig+6,x
+                ror fpa.sig+7,x
+                ror fpa.sig+8,x
+                ror fpa.sig+9,x
+                lsr fpa.sig,x
+                ror fpa.sig+1,x
+                ror fpa.sig+2,x
+                ror fpa.sig+3,x
+                ror fpa.sig+4,x
+                ror fpa.sig+5,x
+                ror fpa.sig+6,x
+                ror fpa.sig+7,x
+                ror fpa.sig+8,x
+                ror fpa.sig+9,x
+fps_add8        clra                            ; clear carry so regular addition works below
+                lda fpa0+fpa.sign               ; do signs differ?
+                eora fpa1+fpa.sign
+                sta fpa1+fpa.exp                ; non-zero if signs differ; we don't need fpa1 exponent any more
+                beq fps_add11                   ; brif not - just add
+                ldx #fpa1                       ; default to second argument being smaller
+                lda fpa0+fpa.sig                ; compare high digits of significand
+                cmpa fpa1+fpa.sig
+                bne fps_add9                    ; brif top digits differ
+                lda fpa0+fpa.sig+1              ; next digits?
+                cmpa fpa1+fpa.sig+1
+                bne fps_add9                    ; brif digits differ; pattern continues
+                lda fpa0+fpa.sig+2
+                cmpa fpa1+fpa.sig+2
+                bne fps_add9
+                lda fpa0+fpa.sig+3
+                cmpa fpa1+fpa.sig+3
+                bne fps_add9
+                lda fpa0+fpa.sig+4
+                cmpa fpa1+fpa.sig+4
+                bne fps_add9
+                lda fpa0+fpa.sig+5
+                cmpa fpa1+fpa.sig+5             ; don't have to check other extras; only one set will be set
+fps_add9        bhs fps_add10                   ; brif first operand is bigger
+                ldx #fpa0                       ; point to first operand as smaller
+                lda fpa1+fpa.sign               ; set result sign to that of larger operand
+                sta fpa0+fpa.sign
+fps_add10       ldd #0x9999                     ;* calculate 9's complement of smaller operand (X must already point to it)
+                subd fpa.sig,x                  ;* we'll complete the 10's complement by setting C on the way into the
+                std fpa.sig,x                   ;* addition sequence
+                ldd #0x9999
+                subd fpa.sig+2,x
+                std fpa.sig+2,x
+                ldd #0x9999
+                subd fpa.sig+4,x
+                std fpa.sig+4,x
+                ldd #0x9999
+                subd fpa.sig+6,x
+                std fpa.sig+6,x
+                ldd #0x9999
+                subd fpa.sig+8,x
+                std fpa.sig+8,x
+                coma                            ; set carry going into add
+fps_add11       lda fpa0+fpa.sig+9              ; do the addition (10 bytes)
+                adca fpa1+fpa.sig+9
+                daa
+                sta fpa0+fpa.sig+9
+                lda fpa0+fpa.sig+8
+                adca fpa1+fpa.sig+8
+                daa
+                sta fpa0+fpa.sig+8
+                lda fpa0+fpa.sig+7
+                adca fpa1+fpa.sig+7
+                daa
+                sta fpa0+fpa.sig+7
+                lda fpa0+fpa.sig+6
+                adca fpa1+fpa.sig+6
+                daa
+                sta fpa0+fpa.sig+6
+                lda fpa0+fpa.sig+5
+                adca fpa1+fpa.sig+5
+                daa
+                sta fpa0+fpa.sig+5
+                lda fpa0+fpa.sig+4
+                adca fpa1+fpa.sig+4
+                daa
+                sta fpa0+fpa.sig+4
+                lda fpa0+fpa.sig+3
+                adca fpa1+fpa.sig+3
+                daa
+                sta fpa0+fpa.sig+3
+                lda fpa0+fpa.sig+2
+                adca fpa1+fpa.sig+2
+                daa
+                sta fpa0+fpa.sig+2
+                lda fpa0+fpa.sig+1
+                adca fpa1+fpa.sig+1
+                daa
+                sta fpa0+fpa.sig+1
+                lda fpa0+fpa.sig
+                adca fpa1+fpa.sig
+                daa
+                sta fpa0+fpa.sig
+                ror fpa1+fpa.exp                ;* do sign flag and carry differ? will set V if so; we will never have
+                rol fpa1+fpa.exp                ;* a real carry on the subtract case but might on the add case
+                bvc fps_normalize               ; brif we didn't overflow significand - we can normalize the result
+                inc fpa0+fpa.exp                ; bump exponent to account for overflow
+                lbmi OVERROR                    ; brif it overflowed
+                ror fpa0+fpa.sig                ; do a bit dance
+                ror fpa0+fpa.sig+1
+                ror fpa0+fpa.sig+2
+                ror fpa0+fpa.sig+3
+                ror fpa0+fpa.sig+4
+                ror fpa0+fpa.sig+5
+                ror fpa0+fpa.sig+6
+                ror fpa0+fpa.sig+7
+                ror fpa0+fpa.sig+8
+                ror fpa0+fpa.sig+9
+                ror fpa0+fpa.sig
+                ror fpa0+fpa.sig+1
+                ror fpa0+fpa.sig+2
+                ror fpa0+fpa.sig+3
+                ror fpa0+fpa.sig+4
+                ror fpa0+fpa.sig+5
+                ror fpa0+fpa.sig+6
+                ror fpa0+fpa.sig+7
+                ror fpa0+fpa.sig+8
+                ror fpa0+fpa.sig+9
+                ror fpa0+fpa.sig
+                ror fpa0+fpa.sig+1
+                ror fpa0+fpa.sig+2
+                ror fpa0+fpa.sig+3
+                ror fpa0+fpa.sig+4
+                ror fpa0+fpa.sig+5
+                ror fpa0+fpa.sig+6
+                ror fpa0+fpa.sig+7
+                ror fpa0+fpa.sig+8
+                ror fpa0+fpa.sig+9
+                ror fpa0+fpa.sig
+                ror fpa0+fpa.sig+1
+                ror fpa0+fpa.sig+2
+                ror fpa0+fpa.sig+3
+                ror fpa0+fpa.sig+4
+                ror fpa0+fpa.sig+5
+                ror fpa0+fpa.sig+6
+                ror fpa0+fpa.sig+7
+                ror fpa0+fpa.sig+8
+                ror fpa0+fpa.sig+9
+                ; fall through to normalization routine
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Normalize a floating point value in fpa0 with extra precision digits at fpaextra (up to 5 bytes) and return the
+; packed result at (Y)
+;
+; The first step is to shift the significand left until a nonzero digit is in the leftmost position. This will bring
+; in extra precision digits from fpaextra through fpaextra4 until either 10 shifts are done or the decimal exponent
+; underflows. In both of those cases, the result will be zero.
+;
+; Once the leftmost digit is nonzero, the leftmost extra precision digit is checked to see if it is >= 5. If so, the
+; significand will have 1 added to it. If that triggers a carry, the decimal exponent will be incremented and the
+; significand will have its leftmost digit set to 1.
+;
+; This will trigger an overflow if the decimal exponent exceeds the allowed range.
+fps_normalize   clrb                            ; initialize the exponent adjustment counter
+fps_normalize0  lda fpa0+fpa.sig                ; do we have a nonzero digit in the first pair?
+                bne fps_normalize1              ; brif so
+                lda fpa0+fpa.sig+1              ; shift everything left 2 spaces
+                sta fpa0+fpa.sig
+                lda fpa0+fpa.sig+2
+                sta fpa0+fpa.sig+1
+                lda fpa0+fpa.sig+3
+                sta fpa0+fpa.sig+2
+                lda fpa0+fpa.sig+4
+                sta fpa0+fpa.sig+3
+                lda fpaextra
+                sta fpa0+fpa.sig+4
+                lda fpaextra+1
+                sta fpaextra
+                lda fpaextra+2
+                sta fpaextra+1
+                lda fpaextra+3
+                sta fpaextra+2
+                lda fpaextra+4
+                sta fpaextra+3
+                clr fpaextra+4
+                subb #2                         ; account for two digit positions
+                cmpb #-10                       ; have we shifted the whole set of digits (assumes originally normalized)
+                bgt fps_normalize0              ; brif not
+fps_normalize4  clr fpa0+fpa.exp                ; set result to zero
+                clr fpa0+fpa.sign
+fps_normalize5  leax ,y                         ; point to return location
+                jmp fps_pack0                   ; return result
+fps_normalize1  bita #0xf0                      ; is the high digit zero?
+                bne fps_normalize3              ; brif not
+                lsl fpaextra                    ; only need to shift one extra position here since there won't be more shifts
+                rol fpa0+fpa.sig+4
+                rol fpa0+fpa.sig+3
+                rol fpa0+fpa.sig+2
+                rol fpa0+fpa.sig+1
+                rol fpa0+fpa.sig
+                lsl fpaextra
+                rol fpa0+fpa.sig+4
+                rol fpa0+fpa.sig+3
+                rol fpa0+fpa.sig+2
+                rol fpa0+fpa.sig+1
+                rol fpa0+fpa.sig
+                lsl fpaextra
+                rol fpa0+fpa.sig+4
+                rol fpa0+fpa.sig+3
+                rol fpa0+fpa.sig+2
+                rol fpa0+fpa.sig+1
+                rol fpa0+fpa.sig
+                lsl fpaextra
+                rol fpa0+fpa.sig+4
+                rol fpa0+fpa.sig+3
+                rol fpa0+fpa.sig+2
+                rol fpa0+fpa.sig+1
+                rol fpa0+fpa.sig
+                decb                            ; account for digit shift
+fps_normalize3  addb fpa0+fpa.exp               ; adjust exponent
+                stb fpa0+fpa.exp
+                ble fps_normalize4              ; brif we underflowed to zero
+                ldb fpaextra                    ; get extra precision digit
+                andb #0xf0                      ; keep only the highest extra precision digit
+                cmpb #0x50                      ; do we need to round?
+                blo fps_normalize5              ; brif not
+                lda fpa0+fpa.sig+4              ; bump low digits
+                adda #1
+                daa
+                sta fpa0+fpa.sig+4
+                bcc fps_normalize5              ; brif no carry - done
+                lda fpa0+fpa.sig+3              ; keep going until all significand bytes handled
+                adda #1
+                daa
+                sta fpa0+fpa.sig+3
+                bcc fps_normalize5
+                lda fpa0+fpa.sig+2
+                adda #1
+                daa
+                sta fpa0+fpa.sig+2
+                bcc fps_normalize5
+                lda fpa0+fpa.sig+1
+                adda #1
+                daa
+                sta fpa0+fpa.sig+1
+                bcc fps_normalize5
+                lda fpa0+fpa.sig
+                adda #1
+                daa
+                sta fpa0+fpa.sig
+                bcc fps_normalize5
+                lda #0x10                       ; overflowed the significand - shift a 1 digit in
+                sta fpa0+fpa.sig
+                inc fpa0+fpa.exp                ; and bump exponent to reflect that
+                bpl fps_normalize5              ; brif we didn't overflow - return result
 OVERROR         ldb #err_ov                     ; raise overflow
-                jmp ERROR 
-fps_add9        ldd fpa0+fps.sig+2              ; subtract low word
-                subd fpa1+fps.sig+2
-                std fpa0+fps.sig+2
-                ldd fpa0+fps.sig                ; and now the high word
-                sbcb fpa1+fps.sig+1
-                sbca fpa1+fps.sig
-                std fpa0+fps.sig
-                bcc fps_add10                   ; brif we didn't carry
-                com fpa0+fps.sign               ; flip result sign - other number was bigger
-                com fpa0+fps.sig+3              ; negate two's complement result to be just the magnitude
-                com fpa0+fps.sig+2
-                com fpa0+fps.sig+1
-                com fpa0+fps.sig
-                ldx fpa0+fps.sig+2              ; add 1 to complete negation
-                leax 1,x
-                stx fpa0+fps.sig+2
-                bne fps_add10                   ; brif carry doesn't propagate
-                ldx fpa0+fps.sig                ; propagate carry
-                leax 1,x
-                stx fpa0+fps.sig                ; NOTE: this cannot carry because magnitude got smaller
-fps_add10       clra                            ; initialize exponent offset
-fps_add11       ldb fpa0+fps.sig                ; do we have nonzero bits in high byte of significand?
-                bne fps_add13                   ; brif so
-                ldb fpa0+fps.sig+1              ; shift left 8 bits
-                stb fpa0+fps.sig
-                ldb fpa0+fps.sig+2
-                stb fpa0+fps.sig+1
-                ldb fpa0+fps.sig+3
-                stb fpa0+fps.sig+2
-                ldb fpa0extra                   ; and extra precision bits
-                stb fpa0+fps.sig+3
-                clr fpa0extra
-                addb #8                         ; account for number of bits shifted
-                cmpb #40                        ; done 40 bits?
-                blo fps_add11                   ; brif not - see if we have more bits to shift
-                clr fpa0+fps.exp                ; number underflowed to zero - set it so
-                clr fpa0+fps.sign
-                clr fpa0+fps.sig
-                clr fpa0+fps.sig+1
-                clr fpa0+fps.sig+2
-                clr fpa0+fps.sig+3
-                bra fps_add16                   ; go return result
-fps_add12       inca                            ; account for a bit shift
-                lsl fpa0extra                   ; shift significand and extra bits left
-                rol fpa0+fps.sig+3
-                rol fpa0+fps.sig+2
-                rol fpa0+fps.sig+1
-                rol fpa0+fps.sig
-fps_add13       bpl fps_add12                   ; brif we haven't normalized yet
-fps_add14       ldb fpa0extra                   ; do we need to round?
-                bpl fps_add16                   ; brif not
-                ldx fpa0+fps.sig+2              ; add one to significand
-                leax 1,x
-                stx fpa0+fps.sig+2
-                bne fps_add16                   ; brif no carry
-                ldx fpa0+fps.sig                ; bump the upper word of significand
-                leax 1,x
-                stx fpa0+fps.sig
-                bne fps_add16
-fps_add15       inc fpa0+fps.exp                ; bump exponent
-                beq OVERROR                     ; brif it overflowed
-                lda #0x80                       ; set high bit of significand (rest is zero)
-                sta fpa0+fps.sig
-fps_add16       ldd fpa0                        ; copy result to destination
-                std ,y
-                ldd fpa0+2
-                std 2,y
-                ldd fpa0+4
-                std 4,y
+                jmp ERROR
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision BCD multiply (X) by (U)
+;
+; Calculate: (X) * (U) -> (Y)
+;
+; First, the routine calculates the new exponent and sign. The exponents simply add together while the sign is the
+; exclusive OR of the two input signs. That gives negative for differing signs and positive for matching signs.
+; Then the multiplication of the significands works by initializing an accumulator large enough to hold twice as
+; many digits as each significand to zeroes. It also needs an extra byte to the left to handle overflow for each
+; digit operation. It then works through each digit of the multiplier and uses that as a count to add the
+; multiplicand to the *high digits* of the accumulator. Then, before moving to the next digit, the accumulator and
+; mulltiplier are shifted right and the process is repeated. Once the process is done, the result required for the
+; floating point calculation will be the high 10 digits of the accumulator with extra digits stored in the
+; remaining bytes.
+;
+; Once the multiplication is complete, the significand is normalized. See above.
+fps_mul         jsr fps_copyargs                ; fetch arguments
+                lda fpa0+fpa.exp                ; first argumennt zero?
+                bne fps_mul0                    ; brif not
+                leax ,y                         ; return first argument (0)
+                jmp fps_pack0
+fps_mul0        lda fpa1+fpa.exp                ; second argument zero?
+                bne fps_mul1                    ; brif not
+                leax ,y                         ; return second argument (0)
+                jmp fps_pack1
+fps_mul1        lda fpa0+fpa.sign               ; calculate result sign
+                eora fpa1+fpa.sign
+                sta fpa0+fpa.sign               ; result will go into fpa0
+                lda fpa0+fpa.exp                ; fetch exponent of multiplicand
+                suba #64                        ; remove bias
+                sta fpa0+fpa.exp                ; save it
+                lda fpa1+fpa.exp                ; get exponent of multplier
+                suba #64                        ; remove bias
+                adda fpa0+fpa.exp               ; add exponents
+                cmpa #63                        ; did we overflow upward?
+                bgt OVERROR                     ; brif so
+                cmpa #-63                       ; did we underflow?
+                bhs fps_mul3                    ; brif not
+fps_mul2        clr fpa0+fpa.exp                ; return zero if we underflow
+                leax ,y
+                jmp fps_pack0
+fps_mul3        adda #64                        ; add bias to resulting exponent
+                sta fpa0+fpa.exp                ; save result exponent
+                ldd zero                        ;* zero out result buffer high digits (extra digits + significand and
+                std fpaextra                    ;* one extra byte to make sure lowest extra digit is zero)
+                std fpaextra+2
+                std fpaextra+4
+                sta fpaextra+6
+                ldb fpa1+fpa.sig+4              ;* do each byte of multiplier significand in sequence
+                bsr fps_mul4
+                ldb fpa1+fpa.sig+3
+                bsr fps_mul4
+                ldb fpa1+fpa.sig+2
+                bsr fps_mul4
+                ldb fpa1+fpa.sig+1
+                bsr fps_mul4
+                ldb fpa1+fpa.sig
+                bsr fps_mul4
+                ldd fpaextra+1                  ; copy result into fpa0 significand (overflow byte will be zero)
+                std fpa0+fpa.sig
+                ldd fpaextra+3
+                std fpa0+fpa.sig+2
+                ldd fpaextra+5
+                sta fpa0+fpa.sig+4
+                ldd fpaextra+7
+                std fpa0+fpa.sig+6
+                ldd fpaextra+9
+                std fpa0+fpa.sig+8
+                jmp fps_normalize               ; go normalize the result
+fps_mul4        bne fps_mul6                    ; brif at least one digit is nonzero
+                ldd fpaextra+8                  ; shift right by 8 bits
+                std fpaextra+9
+                ldd fpaextra+6
+                std fpaextra+7
+                ldd fpaextra+4
+                std fpaextra+5
+                ldd fpaextra+2
+                sta fpaextra+3
+                ldd fpaextra
+                std fpaextra+1
+                clr fpaextra
+                rts
+fps_mul5        bsr fps_mul11                   ; add multiplicand to accumulator
+                decb                            ; account for iteration
+fps_mul6        bitb #0x0f                      ; done everything for this digit?
+                bne fps_mul7
+                bsr fps_mul9                    ; shift accumulator
+                bra fps_mul8
+fps_mul7        bsr fps_mul11                   ; add multiplicand to accumulator
+                subb #0x10                      ; account for iteration
+fps_mul8        bitb #0xf0                      ; done all iterations?
+                bne fps_mul7                    ; brif not
+fps_mul9        lsr fpaextra                    ; shift result
+                ror fpaextra+1
+                ror fpaextra+2
+                ror fpaextra+3
+                ror fpaextra+4
+                ror fpaextra+5
+                ror fpaextra+6
+                ror fpaextra+7
+                ror fpaextra+8
+                ror fpaextra+9
+                ror fpaextra+10
+                lsr fpaextra
+                ror fpaextra+1
+                ror fpaextra+2
+                ror fpaextra+3
+                ror fpaextra+4
+                ror fpaextra+5
+                ror fpaextra+6
+                ror fpaextra+7
+                ror fpaextra+8
+                ror fpaextra+9
+                ror fpaextra+10
+                lsr fpaextra
+                ror fpaextra+1
+                ror fpaextra+2
+                ror fpaextra+3
+                ror fpaextra+4
+                ror fpaextra+5
+                ror fpaextra+6
+                ror fpaextra+7
+                ror fpaextra+8
+                ror fpaextra+9
+                ror fpaextra+10
+                lsr fpaextra
+                ror fpaextra+1
+                ror fpaextra+2
+                ror fpaextra+3
+                ror fpaextra+4
+                ror fpaextra+5
+                ror fpaextra+6
+                ror fpaextra+7
+                ror fpaextra+8
+                ror fpaextra+9
+                ror fpaextra+10
+                rts
+fps_mul11       lda fpa0+fpa.sig+4              ; add digits starting at the bottom
+                adda fpaextra+5
+                daa
+                sta fpaextra+5
+                lda fpa0+fpa.sig+3
+                adca fpaextra+4
+                daa
+                sta fpaextra+4
+                lda fpa0+fpa.sig+2
+                adca fpaextra+3
+                daa
+                sta fpaextra+3
+                lda fpa0+fpa.sig+1
+                adca fpaextra+2
+                daa
+                sta fpaextra+2
+                lda fpa0+fpa.sig
+                adca fpaextra+1
+                daa
+                sta fpaextra+1
+                bcc fps_mul12
+                inc fpaextra                    ; handle carry out
+fps_mul12       rts 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Single precision BCD divide
+;
+; Calculate (X) ÷ (U) -> (Y)
+;
+; First, calculate the result exponent by subtracting the decimal exponent of (U) from that of (X). If the result goes
+; above the exponent range, raise overflow. If it goes below the exponent range, return zero.
+;
+; The calculate the result sign the same as for multiplcation. Also calculate the 10's complement of the divisor.
+;
+; First, we copy the divdend into the extra precision digits in fpa1 since they aren't needed for anything else
+; here. Then we set the desired nonzero digit counter to 11 to indicate 10 significant digits plus a digit for
+; rounding and set the flag indicating a nonzero digit was seen to zero to indicate it hasn't been seen. Also set
+; the extra carry byte to zero. Then we run through the digit pair routine 10 times to set 20 digits of the
+; significand and extra precision for fpa0. The digit pair routine just calls the digit loop twice while doing
+; some bookkeeping.
+;
+; We also pre-cacluate the 9's complement of the divisor and store that in fpaextra to save time during the
+; subtraction steps.
+;
+; For each digit, the process is as follows:
+;
+; 1. If the desired digit count is zero, do nothing and just return a zero for the quotient digit.
+; 2. Initialize the quotient digit to 0
+; 3. If the extra carry is nonzero, we know the result "goes" so go on to step 6
+; 4. Compare the divisor to the residue; this is faster on average than doing the subtraction and then
+;    reversing the last subtraction loop.
+; 5. If the divisor goes *exactly* into the residue, bump the quotient digit, clear the desired digit count
+;    and return the quotient digit; we have no need to do anything more
+; 6. If the divisor does go, subtract the divisor from the residue, bump the digit, and go back to step 3
+; 7. If the resulting digit is nonzero, set the "nonzero digit seen" flag to 1 (exactly one)
+; 8. Subtract the nonzero digit flag from the desired digits flag so after enough digits, we stop doing the
+;    division loops.
+; 9. Return the quotient digit.
+fps_div         jsr fps_copyargs                ; get arguments to the accumulators
+                lda fpa1+fpa.exp                ; get divisor exponent
+                bne fps_div0                    ; brif not zero
+DIV0ERROR       ldb #err_div0                   ; raise division by zero
+                jmp ERROR
+fps_div0        suba #64                        ; remove bias
+                sta fpa1+fpa.exp                ; save for later calculation
+                lda fpa0+fpa.exp                ; get quotient exponent
+                bne fps_div2                    ; brif not zero - we have to do work
+fps_div1        clr fpa0+fpa.exp                ; make sure result is zero
+                leax ,y                         ; return zero result
+                jmp fps_pack0
+fps_div2        suba #64                        ; remove bias
+                suba fpa1+fpa.exp               ; subtract divisor exponent
+                cmpa #64                        ; did we overflow upward?
+                lbge OVERROR                    ; brif so
+                cmpa #-64                       ; did we overflow downward (underflow)?
+                bls fps_div1                    ; brif we underflow
+                adda #64                        ; add back the bias
+                sta fpa0+fpa.exp                ; set result exponent
+                lda fpa0+fpa.sign               ; calculate result sign (XOR of argument signs)
+                eora fpa1+fpa.sign
+                sta fpa0+fpa.sign
+                ldd fpa0+fpa.sig                ; initialize residue to dividend
+                std fpa1+fpa.sig
+                ldd fpa0+fpa.sig+2
+                std fpa1+fpa.sig+2
+                lda fpa0+fpa.sig+4
+                sta fpa1+fpa.sig+4
+                ldd #11                         ; initialize digit counter and nonzero seen flag
+                std fpaextra+5
+                sta fpaextra+7                  ; set ongoing extra carry digits to zero 
+                ldd #0x9999                     ;* calculate 9's complement of divisor for later; we'll introduce a carry
+                subd fpa1+fpa.sig               ;* to the first byte to complete the 10's complement's +1 to save doing
+                std fpaextra                    ;* the extra work here
+                ldd #0x9999
+                subd fpa1+fpa.sig+2
+                std fpaextra+2
+                lda #0x99
+                suba fpa1+fpa.sig+4
+                sta fpaextra+4
+                bsr fps_div3                    ; calculate the quotient byte by byte
+                stb fpa0+fpa.sig
+                bsr fps_div3
+                stb fpa0+fpa.sig+1
+                bsr fps_div3
+                stb fpa0+fpa.sig+2
+                bsr fps_div3
+                stb fpa0+fpa.sig+3
+                bsr fps_div3
+                stb fpa0+fpa.sig+4
+                bsr fps_div3
+                stb fpa0+fpa.sig+5
+                bsr fps_div3
+                stb fpa0+fpa.sig+6
+                bsr fps_div3
+                stb fpa0+fpa.sig+7
+                bsr fps_div3
+                stb fpa0+fpa.sig+8
+                bsr fps_div3
+                stb fpa0+fpa.sig+9
+                jmp fps_normalize               ; go normalize the result and return
+fps_div3        bsr fps_div5                    ; do a digit
+                lslb                            ; shift it over
+                lslb
+                lslb
+                lslb
+                sta fpaextra+8                  ; save it
+                bsr fps_div5                    ; do next digit
+                addb fpaextra+8                 ; combine the two quotient digits
+fps_div4        rts
+fps_div5        ldb fpaextra+6                  ; do we even need to do the division?
+                beq fps_div4                    ; brif not - return 0
+                clrb                            ; initialize quotient digit
+fps_div6        lda fpaextra+7                  ; did we have a carry last time?
+                bne fps_div7                    ; brif so - we know it "goes"
+                lda fpa1+fpa.sig                ; is the divisor less than the dividend residue?
+                cmpa fpa1+fpa.sig+5
+                bhi fps_div8                    ; brif high byte is larger than residue
+                lda fpa1+fpa.sig+1              ; and keep going for all 5 bytes
+                cmpa fpa1+fpa.sig+6
+                bhi fps_div8
+                lda fpa1+fpa.sig+2
+                cmpa fpa1+fpa.sig+7
+                bhi fps_div8
+                lda fpa1+fpa.sig+3
+                cmpa fpa1+fpa.sig+8
+                bhi fps_div8
+                lda fpa1+fpa.sig+4
+                cmpa fpa1+fpa.sig+9
+                bhi fps_div8                    ; brif divisor is greater than the residue
+                bne fps_div7                    ; brif it didn't go exactly
+                incb                            ; bump quotient for this "go"
+                clr fpaextra+6                  ; indicate no more digits needed
+                rts
+fps_div7        coma                            ; set carry to complete 10's complement of divisor
+                lda fpa1+fpa.sig+9              ; do the "subtraction"
+                adca fpaextra+4
+                daa
+                sta fpa1+fpa.sig+9
+                lda fpa1+fpa.sig+8
+                adca fpaextra+3
+                daa
+                sta fpa1+fpa.sig+8
+                lda fpa1+fpa.sig+7
+                adca fpaextra+2
+                daa
+                sta fpa1+fpa.sig+7
+                lda fpa1+fpa.sig+6
+                adca fpaextra+1
+                daa
+                sta fpa1+fpa.sig+6
+                lda fpa1+fpa.sig+5
+                adca fpaextra
+                daa
+                sta fpa1+fpa.sig+5
+                lda fpaextra+7                  ; and handle the carry byte
+                adca #0x99
+                sta fpaextra+7
+                incb                            ; bump digit count
+                bra fps_div6                    ; go see if we need another subtraction
+fps_div8        tstb                            ; nonzero digit?
+                beq fps_div9                    ; brif not
+                lda #1                          ; set nonzero flag
+                sta fpaextra+5
+fps_div9        lda fpaextra+6                  ; adjust digit count
+                suba fpaextra+5
+                sta fpaextra+6
+                lsl fpa1+fpa.sig+9              ; shift residue one decimal digit
+                rol fpa1+fpa.sig+8
+                rol fpa1+fpa.sig+7
+                rol fpa1+fpa.sig+6
+                rol fpa1+fpa.sig+5
+                rol fpaextra+7
+                lsl fpa1+fpa.sig+9
+                rol fpa1+fpa.sig+8
+                rol fpa1+fpa.sig+7
+                rol fpa1+fpa.sig+6
+                rol fpa1+fpa.sig+5
+                rol fpaextra+7
+                lsl fpa1+fpa.sig+9
+                rol fpa1+fpa.sig+8
+                rol fpa1+fpa.sig+7
+                rol fpa1+fpa.sig+6
+                rol fpa1+fpa.sig+5
+                rol fpaextra+7
+                lsl fpa1+fpa.sig+9
+                rol fpa1+fpa.sig+8
+                rol fpa1+fpa.sig+7
+                rol fpa1+fpa.sig+6
+                rol fpa1+fpa.sig+5
+                rol fpaextra+7
                 rts
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Single precision multiplication (X) × (U) to (Y)
-fps_mul         lda fps.exp,x                   ; is first operand zero?
-                beq fps_mul0                    ; brif so - return zero
-                lda fps.exp,u                   ; is second operand zero?
-                bne fps_mul1                    ; brif not - have to do the multiply
-fps_mul0        ldd zero                        ; return zero result
-                std ,y
-                std 2,y
-                std 4,y
+; Convert a floating point number in val0 to a string in strbuff
+
+; The maximum size of a string generated here is 19 bytes representing 10 significant digits, 5 leading zeroes after the
+; decimal point, the decimal point, a leading zero, a leading sign, and a trailing NUL.
+fps_toascii     ldu #strbuff+1                  ; point to output buffer
+                lda #0x20                       ; set sign to "blank"
+                sta -1,u
+                clr fpaextra+1                  ; set decimal point offset; zero means none
+                clr fpaextra+3                  ; disable "E" notation
+                ldx #val0+val.value             ; unpack value to fpa0 so we can mess with it
+                jsr fps_unpack0
+                lda fpa0+fpa.exp                ; check exponent
+                bne fps_toascii0                ; brif not zero
+                lda #'0                         ; make the number zero and print it
+                sta ,u
+                clr 1,u
                 rts
-fps_mul1        jsr fps_copyinputs              ; copy input operands
-                lda fpa0+fps.sign               ; calculate sign of result - xor of the two signs
-                eora fpa1+fps.sign
-                sta fpa0+fps.sign               ; save result sign
-                lda fpa0+fps.exp                ; get exponent of first value
-                adda fpa1+fps.exp               ; calculate new exponent; also cancels the bias
-                rora                            ; set V if C and N differ
-                rola
-                bvc fps_mul3                    ; brif maybe an overflow
-                adda #0x80                      ; add back the bias
-                sta fpa0+fps.sign               ; save new sign
-                beq fps_mul0                    ; brif we underflowed - zero out sign
-; This does a shift-and-add multiplication algorithm. This is slower than an equivalent using MUL but is smaller.
-; The high order bytes will be left in fpa0 with the low order bytes in fpa0extra and fpa0extra[1-3]. The low order
-; bytes are kept for the odd case where extra precision is useful. Uses fpa0extra[4-7] as temporaries.
-fps_mul2        ldd zero                        ; zero out temporary bytes
-                std fpa0extra4
-                std fpa0extra6
-                ldb fpa0+fps.sig+3              ; multiply by low byte of fpa0
-                bsr fps_mul4
-                ldb fpa0extra                   ; move low bites
-                stb fpa0extra3
-                ldb fpa0+fps.sig+2              ; multiply by next higher byte
-                bsr fps_mul4
-                ldb fpa0extra                   ; move low bits
-                stb fpa0extra2
-                ldb fpa0+fps.sig+1              ; and again for the next higher byte
-                bsr fps_mul4
-                ldb fpa0extra
-                stb fpa0extra1
-                ldb fpa0+fps.sig                ; and the high order byte
-                bsr fps_mul4
-                ldd fpa0extra4                  ; copy high order product bits to result
-                std fpa0+fps.sig
-                ldd fpa0extra6
-                std fpa0+fps.sig+2
-                jmp fps_add10                   ; go normalize the result and return
-fps_mul3        bpl fps_mul0                    ; brif we underflowed - return 0
-                jmp OVERROR                     ; raise overflow
-fps_mul4        bne fps_mul5                    ; brif not multiply by zero
-                lda fpa0+fps.sig+3              ; shift 8 bits right
-                sta fpa0extra
-                lda fpa0+fps.sig+2
-                sta fpa0+fps.sig+3
-                lda fpa0+fps.sig+1
-                sta fpa0+fps.sig+2
-                lda fpa0+fps.sig
-                sta fpa0+fps.sig+1
-                clr fpa0+fps.sig
-fps_mul8        rts
-fps_mul5        coma                            ; set C
-fps_mul6        lda fpa0extra4                  ; get high byte of result bytes
-                rorb                            ; is multiplier bit set?
-                beq fps_mul8                    ; brif 8 shifts done (C set above makes sure of that)
-                bcc fps_mul7                    ; brif bit not set - don't do addition
-                lda fpa0extra7                  ; add multiplier (fpa1) to result
-                adda fpa1+fps.sig+3
-                sta fpa0extra7
-                lda fpa0extra6
-                adca fpa1+fps.sig+2
-                sta fpa0extra6
-                lda fpa0extra5
-                adca fpa1+fps.sig+1
-                sta fpa0extra5
-                lda fpa0extra4
-                adca fpa1+fps.sig
-fps_mul7        rora                            ; rotate carry in (from add or 0)
-                sta fpa0extra4
-                ror fpa0extra5
-                ror fpa0extra6
-                ror fpa0extra7
-                ror fpa0extra                   ; and into the extra precision bits
-                clra                            ; clear carry - so shift above will terminate
-                bra fps_mul6                    ; go do another bit
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Divide (X) by 10 in place
-fps_const10     fcb 0x83,0xa0,0x00,0x00,0x00,0x00 ; single precision unpacked constant 10
-fps_div10       ldu #fps_const10                ; point to constant 10
-                leay ,x                         ; put output in input
-                ; fall through to regular division
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Single precision division (X) ÷ (U) -> (Y)
-;
-; This is basically the same algorithm used in the Color Basic ROM
-fps_div         lda fps.exp,u                   ; is divisor 0?
-                bne fps_div0
-DIV0ERROR       ldb #err_div0                   ; raise division by zero
-                jmp ERROR
-fps_div0        lda fps.exp,x                   ; is dividend 0?
-                lbeq fps_mul0                   ; brif so - return 0
-                jsr fps_copyinputs              ; copy input values
-                lda fpa0+fps.sign               ; calculate result sign - xor of operand signs
-                eora fpa1+fps.sign
-                sta fpa0+fps.sign               ; save result sign
-                lda fpa1+fps.exp                ; get divisor exponent
-                nega                            ; negate for subtraction
-                adda fpa0+fps.exp               ; subtract it from dividend exponent
-                rora                            ; set V if C and N differ
-                rola
-                bvc fps_mul3                    ; brif overflow or underflow
-                adda #0x80                      ; add back the bias
-                sta fpa0+fps.sign               ; save new sign
-                lbeq fps_mul0                   ; brif we underflowed - zero out sign
-                inca                            ; bump exponent - why?? related to the bias stuff above?
-                lbeq OVERROR                    ; brif it overflows
-                sta fpa0+fps.exp                ; save result exponent
-                ldx #fpa0extra4                 ; point to temporary storage bytes for quotient
-                ldb #4                          ; counter for 4 significand bytes and one extra partial byte
-                stb fpa0extra1                  ; save counter since we need both accumulators
-                ldb #1                          ; shift counter flag and quotient byte
-fps_div1        lda fpa1+fps.sig                ; set C if fpa0 significand <= fpa1 significand
-                cmpa fpa0+fps.sig
-                bne fps_div2
-                lda fpa1+fps.sig+1
-                cmpa fpa0+fps.sig+1
-                bne fps_div2
-                lda fpa1+fps.sig+2
-                cmpa fpa0+fps.sig+2
-                bne fps_div2
-                lda fpa1+fps.sig+3
-                cmpa fpa0+fps.sig+3
-                bne fps_div2
-                coma                            ; set C if values are the same
-fps_div2        tfr cc,a                        ; save C for later, C clear if fpa1 > fpa0
-                rolb                            ; shift carry into quotient
-                bcc fps_div3                    ; brif carry clear - we haven't done 8 bits yet
-                stb ,x+                         ; save quotient byte after a full set of bits
-                dec fpa0extra1                  ; have we done all the bytes?
-                bmi fps_div7                    ; brif all bytes plus extra precision - done all
-                beq fps_div6                    ; brif all main sigificand bytes - do a couple extra bits
-                ldb #1                          ; reset the bit counter flag
-fps_div3        tfr cc,a                        ; get back original carry from compare
-                bcs fps_div5                    ; brif it "went"
-fps_div4        lsl fpa0+fps.sig+3              ; shift dividend left
-                rol fpa0+fps.sig+2
-                rol fpa0+fps.sig+1
-                rol fpa0+fps.sig
-                bcs fps_div2                    ; brif it carries - next bit "goes"
-                bmi fps_div1                    ; check magnitudes of next bit
-                bra fps_div2                    ; carry clear - check another bit
-fps_div5        lda fpa0+fps.sig+3              ; subtract divisor from dividend bits
-                suba fpa1+fps.sig+3
-                sta fpa0+fps.sig+3
-                lda fpa0+fps.sig+2
-                sbca fpa1+fps.sig+2
-                sta fpa0+fps.sig+2
-                lda fpa0+fps.sig+1
-                sbca fpa1+fps.sig+1
-                sta fpa0+fps.sig+1
-                lda fpa0+fps.sig
-                sbca fpa1+fps.sig
-                sta fpa0+fps.sig
-                bra fps_div4                    ; now do the bit shift to line things up
-fps_div6        ldb #0x40                       ; only do two bits of extra precision byte
-                bra fps_div2                    ; go handle these bitsd
-fps_div7        rorb                            ; get extra quotient bits to bit 7,6 and bit 5 set
-                rorb
-                rorb
-                stb fpa0extra                   ; save extra precision bits
-                ldd fpa0extra4                  ; copy result bits to fpa0
-                std fpa0+fps.sig
-                ldd fpa0extra6
-                std fpa0+fps.sig+2
-                jmp fps_add10                   ; go normalize the result
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Pack single precision number at (X) to (U)
-fps_pack        lda fps.sign,x                  ; get sign of number (will be 0 for 0)
-                ora #0x7f                       ; make sure low bits are set for merging
-                anda fps.sig,x                  ; merge with high bits of significand
-                ldb fps.sig+1,x                 ; get upper mid bits of significand
-                std fps.sig,u
-                ldd fps.sig+2,x
-                std fps.sig+2,u
-                lda fps.exp,x
-                sta fps.exp,u
+fps_toascii0    lda fpa0+fpa.sign               ; negative?
+                bpl fps_toascii1                ; brif not
+                lda #'-                         ; negative sign
+                sta -1,u
+fps_toascii1    clra                            ; set number of significant digits that exist
+                ldx #fpa0+fpa.sig               ; point to significand
+fps_toascii2    ldb ,x+                         ; get digit pair
+                bitb #0xf0                      ; is left digit set?
+                beq fps_toascii3                ; brif not - we've counted all the significant digits
+                inca                            ; count digit
+                bitb #0x0f                      ; is right digit set?
+                beq fps_toascii3                ; brif not - we've counted all the sigificant digits
+                deca                            ; done all significant digits?
+                bne fps_toascii2                ; brif not
+fps_toascii3    stb fpaextra                    ; save significant digits that exist
+                lda fpa0+fpa.exp                ; fetch exponent
+                suba #64                        ; remove bias
+                sta fpa0+fpa.exp                ; save for later
+                bpl fps_toascii14               ; brif no leading zeroes
+                suba fpaextra                   ; get number of significant digits plus decimal count
+                cmpa #-15                       ; do we end up with too many digits (leading 0 + significant digits)
+                blt fps_toascii15               ; brif too many - do scientific notation
+                ldd #'0*256+'.                  ; put a leading "0."
+                std ,u++
+                ldb fpa0+fpa.exp                ; get exponent back
+                addb fpaextra                   ; add number of available significant digits
+                beq fps_toascii5                ; brif no leading digits
+fps_toascii4    sta ,u+                         ; put a zero
+                incb                            ; done all 
+                bne fps_toascii4                ; brif not
+fps_toascii5    ldx #fpa0+fpa.sig               ; point to significand
+                ldd #0xf000                     ; set digit mask and counter
+                sta fpaextra+2
+fps_toascii6    lda ,x                          ; get digit pair
+                anda fpaextra+2                 ; keep the desired digit
+                bita #0xf0                      ; is it the left hand digit?
+                beq fps_toascii7                ; brif not or digit is 0
+                lsra                            ; right justify digit
+                lsra
+                lsra
+fps_toascii7    adda #0x30                      ; turn it into ascii
+                sta ,u+                         ; stuff it in the output
+                dec fpaextra                    ; done all digits?
+                beq fps_toascii9                ; brif so
+                cmpa fpaextra+1                 ; are we at the decimal point?
+                bne fps_toascii8                ; brif not
+                lda #'.                         ; put a decimal
+                sta ,u+
+fps_toascii8    com fpaextra+2                  ; flip digit mask
+                bpl fps_toascii6                ; handle another digit
+                leax 1,x                        ; move to next digit byte
+                bra fps_toascii6                ; now go handle next digit
+fps_toascii9    ldb fpaextra+3                  ; get decimal exponent to display
+                beq fps_toascii13
+                lda #'E                         ; output "E"
+                sta ,u+
+                tstb                            ; negative?
+                bpl fps_toascii10                ; brif not
+                negb                            ; positivize it
+                lda #'-                         ; put a minus for the exponent
+                sta ,u+
+fps_toascii10   cmpb #10                        ; do we have two digits for exponent?
+                blo fps_toascii12               ; brif not
+                lda #0x2f                       ; initialize left digit
+fps_toascii11   inca                            ; bump digit
+                subb #10                        ; are we at the right digit?
+                bhi fps_toascii11               ; brif not yet
+                addb #10                        ; undo extra subtraction
+                sta ,u+                         ; save left digit
+fps_toascii12   addb #0x30                      ; turn right digit to ascii
+                stb ,u+                         ; save right digit
+fps_toascii13   clr ,u                          ; put a NUL at the end of the result
                 rts
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Unpack single precision number at (X) to (U)
-fps_unpack      lda fps.exp,x                   ; get exponent of value
-                beq fps_unpack0                 ; brif value is zero
-                sta fps.exp,u
-                ldb fps.sig,x                   ; get high byte of significand
-                sex                             ; make sign value in A
-                sta fps.sign,u                  ; set sign in result
-                ldd fps.sig,x                   ; get high word of sifnificand
-                ora #0x80                       ; make sure high bit is set
-                std fps.sig,u                   ; save high word in result
-                ldd fps.sig+2,x                 ; copy middle bytes of significand
-                std fps.sig+2,u
-                rts
-fps_unpack0     sta fps.exp,u                   ; zero out destination
-                sta fps.sig,u
-                sta fps.sig+1,u
-                sta fps.sig+2,u
-                sta fps.sig+3,u
-                sta fps.sign,u
-                rts
+fps_toascii14   cmpa #9                         ; is it in range for number of significant digits?
+                bgt fps_toascii15               ; brif not - do scientific notation
+                inca                            ; exponent 0 has decimal point after first digit
+                sta fpaextra+1                  ; save decimal point location
+                cmpa fpaextra                   ; is it more than the number of significant digits?
+                ble fps_toascii5                ; brif not - just convert the significand
+                sta fpaextra                    ; make sure we include the pre-decimal zeroes
+                bra fps_toascii5                ; go convert the significand
+fps_toascii15   ldb #1                          ; put decimal after the first digit
+                stb fpaextra+1
+                sta fpaextra+3                  ; enable the "E" notation with the correct exponent
+                bra fps_toascii5                ; actually convert the number
                 *pragmapop list
diff -r f959c92bc329 -r 663d8e77b579 src/int.s
--- a/src/int.s	Sun Oct 08 00:17:20 2023 -0600
+++ b/src/int.s	Sun Oct 15 22:15:36 2023 -0600
@@ -1,6 +1,74 @@
                 *pragmapush list
                 *pragma list
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Convert a signed integer in val0 to a string in strbuff
+;
+; The maximum size of a string generated here is 12 bytes representing 10 significant digits, a leading sign, and the
+; trailing NUL.
+int_toascii     ldu #strbuff+1                  ; point to start of digits
+                lda #0x20                       ; default sign to space
+                sta -1,u
+                ldx #int_toascii5               ; point to digit constant table
+                ldd #10                         ; do 10 digits, no nonzero seen yet
+                stb fpaextra                    ; save digit count
+                ldd val0+val.int+2              ; copy to temporary accumulator
+                std fpaextra+4
+                ldd val0+val.int
+                std fpaextra+2                  ; (will set N if the number is negative)
+                bpl int_toascii0                ; brif so
+                lda #'-                         ; negative sign
+                sta -1,u                        ; set sign
+                ldd zero                        ; negate the value
+                subd fpaextra+4
+                std fpaextra+4
+                ldd zero
+                sbcb fpaextra+3
+                sbca fpaextra+2
+                std fpaextra+2
+int_toascii0    lda #0x2f                       ; initialize digit (account for inc below)
+                sta ,u
+int_toascii1    inc ,u                          ; bump digit
+                ldd fpaextra+4                  ; subtract digit constnat
+                subd 2,x
+                std fpaextra+4
+                ldd fpaextra+2
+                sbcb 1,x
+                sbca ,x
+                std fpaextra+2
+                bcc int_toascii1                ; brif we aren't at the right digit
+                ldd fpaextra+4                  ; undo extra subtraction
+                addd 2,x
+                std fpaextra+4
+                ldd fpaextra+2
+                adcb 1,x
+                adca ,x
+                std fpaextra+2
+                leax 4,x                        ; move to next constant
+                lda ,u                          ; get digit count
+                cmpa #'0                        ; is it zero?
+                bne int_toascii2                ; brif not
+                lda fpaextra+1                  ; get nonzero digit count
+                beq int_toascii3                ; brif we haven't already got a nonzero - don't count the zero
+int_toascii2    inc fpaextra+1                  ; bump nonzero digit count
+                leau 1,u                        ; move to next digit position                
+int_toascii3    dec fpaextra                    ; done all digits?
+                bne int_toascii0                ; brif not
+                ldb fpaextra+1                  ; did we have any nonzero digits?
+                bne int_toascii4                ; brif so
+                leau 1,u                        ; move past the only zero in the output
+int_toascii4    clr ,u                          ; NUL terminate the string
+                rts
+int_toascii5    fqb 1000000000                  ; digit place values
+                fqb 100000000
+                fqb 10000000
+                fqb 1000000
+                fqb 100000
+                fqb 10000
+                fqb 1000
+                fqb 100
+                fqb 10
+                fqb 1
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; 32 bit integer handling package.
 ;
 ; Negate a 32 bit integer in (X); done by subtracting it from zero
@@ -69,194 +137,194 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Signed 32 bit integer multiply (X) * (U) -> (Y), overflow if exceeds signed 32 bit range                
 int32_mul       ldd val.int+2,x                 ; copy left operand to temporary
-                std fpa0+fps.sig+2
+                std fpa0+fpa.sig+2
                 ldd val.int,x
-                std fpa0+fps.sig
+                std fpa0+fpa.sig
                 eora val.int,u                  ; set sign bit in A if signs differ
                 pshs a                          ; save result sign
                 ldd val.int+2,u                 ; copy right operand to temporary
-                std fpa1+fps.sig+2
+                std fpa1+fpa.sig+2
                 ldd val.int,u
-                std fpa1+fps.sig
+                std fpa1+fpa.sig
                 bpl int32_mul0                  ; brif right operand is positive
                 ldd zero                        ; negate right operand
-                subd fpa1+fps.sig+2
-                std fpa1+fps.sig+2
+                subd fpa1+fpa.sig+2
+                std fpa1+fpa.sig+2
                 ldd zero
-                sbcb fpa1+fps.sig+1
-                sbca fpa1+fps.sig
-                std fpa1+fps.sig
-int32_mul0      lda fpa0+fps.sig                ; is left operand negative?
+                sbcb fpa1+fpa.sig+1
+                sbca fpa1+fpa.sig
+                std fpa1+fpa.sig
+int32_mul0      lda fpa0+fpa.sig                ; is left operand negative?
                 bpl int32_mul1                  ; brif not
                 ldd zero                        ; negate left operand
-                subd fpa0+fps.sig+2
-                std fpa0+fps.sig+2
+                subd fpa0+fpa.sig+2
+                std fpa0+fpa.sig+2
                 ldd zero
-                sbcb fpa0+fps.sig+1
-                sbca fpa0+fps.sig
-                std fpa0+fps.sig
+                sbcb fpa0+fpa.sig+1
+                sbca fpa0+fpa.sig
+                std fpa0+fpa.sig
 int32_mul1      bsr util_mul32                  ; do the actual multiplication
-                ldb fpa0extra                   ; are upper bits all zero?
-                orb fpa0extra1
-                orb fpa0extra2
-                orb fpa0extra3
+                ldb fpaextra                   ; are upper bits all zero?
+                orb fpaextra+1
+                orb fpaextra+2
+                orb fpaextra+3
                 bne int32_mul4                  ; brif not - overflow to floating point
-                ldb fpa0extra4                  ; is bit 31 set?
+                ldb fpaextra+4                  ; is bit 31 set?
                 bpl int32_mul2                  ; brif not - no overflow
                 lda ,s                          ; negative result wanted?
                 bpl int32_mul4                  ; brif not - overflow to floating point
                 andb #0x7f                      ; lose extra sign bit
-                orb fpa0extra2                  ; "or" in other bytes to see if all but bit 31 are zero
-                orb fpa0extra6
-                orb fpa0extra7
+                orb fpaextra+2                  ; "or" in other bytes to see if all but bit 31 are zero
+                orb fpaextra+6
+                orb fpaextra+7
                 bne int32_mul4                  ; brif any nonzero bits - we overflowed maximum negative number
 int32_mul2      ldb ,s+                         ; do we want a negative result?
                 bpl int32_mul3                  ; brif not - don't negate result
                 ldd zero                        ; negate result
-                subd fpa0extra6
-                std fpa0extra6
+                subd fpaextra+6
+                std fpaextra+6
                 ldd zero
-                sbcb fpa0extra5
-                sbca fpa0extra4
-                std fpa0extra4
-int32_mul3      ldd fpa0extra4                  ; copy result to destination
+                sbcb fpaextra+5
+                sbca fpaextra+4
+                std fpaextra+4
+int32_mul3      ldd fpaextra+4                  ; copy result to destination
                 std val.int,y
-                ldd fpa0extra6
+                ldd fpaextra+6
                 std val.int+2,y
                 rts
 int32_mul4      puls b                          ; get back desired sign
                 sex                             ; set proper sign for floating point result
-                ldx #fpa0extra                  ; point to 64 bit unsigned result
-                jmp fps_fromuint64s             ; go convert to floating point using the sign in A
+                ldx #fpaextra                  ; point to 64 bit unsigned result
+;                jmp fps_fromuint64s             ; go convert to floating point using the sign in A
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; 32 bit multiply.
 ;
-; Significands of fpa0 and fpa1, treated as unsigned, are multiplied with the product being stored in the fpa0extra
+; Significands of fpa0 and fpa1, treated as unsigned, are multiplied with the product being stored in the fpaextra
 ; memory locations.
 ;
 ; The agorithm is simply this: zero out the result, then multiply fpa0 by each byte of fpa1 and then add the result
 ; to the result location. This yields a 64 bit product which is somewhat wasteful.
 util_mul32      ldd zero                        ;* zero out result bits; low 16 bits don't need to be cleared and
-                stb fpa0extra3                  ;* upper 24 bits also don't
-                std fpa0extra4
-                ldb fpa1+fps.sig+3              ; multiply by low byte of fpa1 - no carries possible for this iteration
-                lda fpa0+fps.sig+3
+                stb fpaextra+3                  ;* upper 24 bits also don't
+                std fpaextra+4
+                ldb fpa1+fpa.sig+3              ; multiply by low byte of fpa1 - no carries possible for this iteration
+                lda fpa0+fpa.sig+3
                 mul
-                std fpa0extra6
-                ldb fpa1+fps.sig+3
-                lda fpa0+fps.sig+2
+                std fpaextra+6
+                ldb fpa1+fpa.sig+3
+                lda fpa0+fpa.sig+2
                 mul
-                addd fpa0extra5
-                std fpa0extra5
-                ldb fpa1+fps.sig+3
-                lda fpa0+fps.sig+1
+                addd fpaextra+5
+                std fpaextra+5
+                ldb fpa1+fpa.sig+3
+                lda fpa0+fpa.sig+1
                 mul
-                addd fpa0extra4
-                std fpa0extra4
-                ldb fpa1+fps.sig+3
-                lda fpa0+fps.sig
+                addd fpaextra+4
+                std fpaextra+4
+                ldb fpa1+fpa.sig+3
+                lda fpa0+fpa.sig
                 mul
-                addd fpa0extra3
-                std fpa0extra3
+                addd fpaextra+3
+                std fpaextra+3
 ; Now we potentially have cascading carries at every stage; it makes more sense to handle those in a separate
 ; addition pass after each partial calculation. The partial calculations are identical to above. This is completely
 ; unrolled for speed.
                 ldd zero                        ; zero out extra work bytes
-                std fpa0extra8
-                stb fpa0extra10
-                ldb fpa1+fps.sig+2              ; multiply by second low byte of fpa1
-                lda fpa0+fps.sig+3
+                std fpaextra+8
+                stb fpaextra+10
+                ldb fpa1+fpa.sig+2              ; multiply by second low byte of fpa1
+                lda fpa0+fpa.sig+3
                 mul
-                std fpa0extra11
-                ldb fpa1+fps.sig+2
-                lda fpa0+fps.sig+2
+                std fpaextra+11
+                ldb fpa1+fpa.sig+2
+                lda fpa0+fpa.sig+2
                 mul
-                addd fpa0extra10
-                std fpa0extra10
-                ldb fpa1+fps.sig+2
-                lda fpa0+fps.sig+1
+                addd fpaextra+10
+                std fpaextra+10
+                ldb fpa1+fpa.sig+2
+                lda fpa0+fpa.sig+1
                 mul
-                addd fpa0extra9
-                std fpa0extra9
-                ldb fpa1+fps.sig+2
-                lda fpa0+fps.sig
+                addd fpaextra+9
+                std fpaextra+9
+                ldb fpa1+fpa.sig+2
+                lda fpa0+fpa.sig
                 mul
-                addd fpa0extra8
-                std fpa0extra8
-                ldd fpa0extra11                 ; add to partial product (shifted left 8 bits)
-                addd fpa0extra5
-                std fpa0extra5
-                ldd fpa0extra9
-                adcb fpa0extra4
-                adca fpa0extra3
-                std fpa0extra3
+                addd fpaextra+8
+                std fpaextra+8
+                ldd fpaextra+11                 ; add to partial product (shifted left 8 bits)
+                addd fpaextra+5
+                std fpaextra+5
+                ldd fpaextra+9
+                adcb fpaextra+4
+                adca fpaextra+3
+                std fpaextra+3
                 ldb #0
-                adcb fpa0extra8
-                stb fpa0extra2
+                adcb fpaextra+8
+                stb fpaextra+2
                 ldd zero                        ; and do it all again for next byte of fpa1
-                std fpa0extra8
-                stb fpa0extra10
-                ldb fpa1+fps.sig+1
-                lda fpa0+fps.sig+3
+                std fpaextra+8
+                stb fpaextra+10
+                ldb fpa1+fpa.sig+1
+                lda fpa0+fpa.sig+3
                 mul
-                std fpa0extra11
-                ldb fpa1+fps.sig+1
-                lda fpa0+fps.sig+2
+                std fpaextra+11
+                ldb fpa1+fpa.sig+1
+                lda fpa0+fpa.sig+2
                 mul
-                addd fpa0extra10
-                std fpa0extra10
-                ldb fpa1+fps.sig+1
-                lda fpa0+fps.sig+1
+                addd fpaextra+10
+                std fpaextra+10
+                ldb fpa1+fpa.sig+1
+                lda fpa0+fpa.sig+1
                 mul
-                addd fpa0extra9
-                std fpa0extra9
-                ldb fpa1+fps.sig+1
-                lda fpa0+fps.sig
+                addd fpaextra+9
+                std fpaextra+9
+                ldb fpa1+fpa.sig+1
+                lda fpa0+fpa.sig
                 mul
-                addd fpa0extra8
-                std fpa0extra8
-                ldd fpa0extra11
-                addd fpa0extra4
-                std fpa0extra4
-                ldd fpa0extra9
-                adcb fpa0extra3
-                adca fpa0extra2
-                std fpa0extra2
+                addd fpaextra+8
+                std fpaextra+8
+                ldd fpaextra+11
+                addd fpaextra+4
+                std fpaextra+4
+                ldd fpaextra+9
+                adcb fpaextra+3
+                adca fpaextra+2
+                std fpaextra+2
                 ldb #0
-                adcb fpa0extra8
-                stb fpa0extra1
+                adcb fpaextra+8
+                stb fpaextra+1
                 ldd zero                        ; and the final sequence with the fpa1 high byte
-                std fpa0extra8
-                stb fpa0extra10
-                ldb fpa1+fps.sig
-                lda fpa0+fps.sig+3
+                std fpaextra+8
+                stb fpaextra+10
+                ldb fpa1+fpa.sig
+                lda fpa0+fpa.sig+3
                 mul
-                std fpa0extra11
-                ldb fpa1+fps.sig
-                lda fpa0+fps.sig+2
+                std fpaextra+11
+                ldb fpa1+fpa.sig
+                lda fpa0+fpa.sig+2
                 mul
-                addd fpa0extra10
-                std fpa0extra10
-                ldb fpa1+fps.sig
-                lda fpa0+fps.sig+1
+                addd fpaextra+10
+                std fpaextra+10
+                ldb fpa1+fpa.sig
+                lda fpa0+fpa.sig+1
                 mul
-                addd fpa0extra9
-                std fpa0extra9
-                ldb fpa1+fps.sig
-                lda fpa0+fps.sig
+                addd fpaextra+9
+                std fpaextra+9
+                ldb fpa1+fpa.sig
+                lda fpa0+fpa.sig
                 mul
-                addd fpa0extra8
-                std fpa0extra8
-                ldd fpa0extra11
-                addd fpa0extra3
-                std fpa0extra3
-                ldd fpa0extra9
-                adcb fpa0extra2
-                adca fpa0extra1
-                std fpa0extra1
+                addd fpaextra+8
+                std fpaextra+8
+                ldd fpaextra+11
+                addd fpaextra+3
+                std fpaextra+3
+                ldd fpaextra+9
+                adcb fpaextra+2
+                adca fpaextra+1
+                std fpaextra+1
                 ldb #0
-                adcb fpa0extra
-                stb fpa0extra
+                adcb fpaextra
+                stb fpaextra
                 rts
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Integer divide (X) by 10 *in place*
@@ -268,58 +336,58 @@
 ; 32 bit division, integer only, truncate fraction without rounding. Note that there is exactly one case where integer
 ; division can overflow: dividing -0x80000000 by -1 which yields 0x80000000. All other cases reduce the magnitude.
 int32_div       ldd val.int+2,x                 ; copy left operand to temporary
-                std fpa0+fps.sig+2
+                std fpa0+fpa.sig+2
                 ldd val.int,x
-                std fpa0+fps.sig
+                std fpa0+fpa.sig
                 eora val.int,u                  ; set sign bit in A if signs differ
                 pshs a                          ; save result sign
                 ldd val.int+2,u                 ; copy right operand to temporary
-                std fpa1+fps.sig+2
+                std fpa1+fpa.sig+2
                 ldd val.int,u
-                std fpa1+fps.sig
+                std fpa1+fpa.sig
                 bpl int32_div0                  ; brif right operand is positive
                 ldd zero                        ; negate right operand
-                subd fpa1+fps.sig+2
-                std fpa1+fps.sig+2
+                subd fpa1+fpa.sig+2
+                std fpa1+fpa.sig+2
                 ldd zero
-                sbcb fpa1+fps.sig+1
-                sbca fpa1+fps.sig
-                std fpa1+fps.sig
-int32_div0      lda fpa0+fps.sig                ; is left operand negative?
+                sbcb fpa1+fpa.sig+1
+                sbca fpa1+fpa.sig
+                std fpa1+fpa.sig
+int32_div0      lda fpa0+fpa.sig                ; is left operand negative?
                 bpl int32_div1                  ; brif not
                 ldd zero                        ; negate left operand
-                subd fpa0+fps.sig+2
-                std fpa0+fps.sig+2
+                subd fpa0+fpa.sig+2
+                std fpa0+fpa.sig+2
                 ldd zero
-                sbcb fpa0+fps.sig+1
-                sbca fpa0+fps.sig
-                std fpa0+fps.sig
-int32_div1      ldb fpa1+fps.sig                ; check for division by zero
-                orb fpa1+fps.sig+1
-                orb fpa1+fps.sig+2
-                orb fpa1+fps.sig+3
+                sbcb fpa0+fpa.sig+1
+                sbca fpa0+fpa.sig
+                std fpa0+fpa.sig
+int32_div1      ldb fpa1+fpa.sig                ; check for division by zero
+                orb fpa1+fpa.sig+1
+                orb fpa1+fpa.sig+2
+                orb fpa1+fpa.sig+3
                 lbne DIV0ERROR                  ; brif division by zero
                 bsr util_div32                  ; do the actual division
                 lda ,s+                         ; get desired sign
                 bmi int32_div2                  ; brif want negative - we can't overflow in that case
-                ldb fpa0extra                   ; get high byte of result
+                ldb fpaextra                   ; get high byte of result
                 lbmi OVERROR2                   ; brif we ended up with 0x80000000 positive
                 bra int32_div3                  ; go return result
 int32_div2      ldd zero                        ; negate result to correct sign
-                subd fpa0extra+2
-                std fpa0extra+2
+                subd fpaextra+2
+                std fpaextra+2
                 ldd zero
-                sbcb fpa0extra+1
-                sbca fpa0extra
-                std fpa0extra
-int32_div3      ldd fpa0extra                   ; copy result to destination
+                sbcb fpaextra+1
+                sbca fpaextra
+                std fpaextra
+int32_div3      ldd fpaextra                   ; copy result to destination
                 std val.int,y
-                ldd fpa0extra2
+                ldd fpaextra+2
                 std val.int+2,y
                 rts
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Divide 32 bit integer in fpa0 significand by 32 bit integer in fpa1 significand, both treated as unsigned. Leave
-; quotient at fpa0extra...fpa0extra3 and remainder at fpa0extra4...fpa0extra7; does not check for division by zero
+; quotient at fpaextra...fpaextra+3 and remainder at fpaextra+4...fpaextra+7; does not check for division by zero
 ; which will result in a quotient of 0xffffffff and a remainder will be the dividend. It will not get suck in a loop.
 ;
 ; Algorithm is basically pencil and paper long division. We check to see if the divisor "goes" at each step by doing
@@ -329,37 +397,37 @@
 ; having to do a restore in the no-go case which is going to be quite common with values whose upper bits are
 ; mostly zeroes, thus it makes the operations faster in that case, for integers. (Floating point is a different
 ; problem.) 
-util_div32      ldd fpa0+fps.sig+2              ; copy dividend to result location
-                std fpa0extra6
-                ldd fpa0+fps.sig
-                std fpa0extra4
+util_div32      ldd fpa0+fpa.sig+2              ; copy dividend to result location
+                std fpaextra+6
+                ldd fpa0+fpa.sig
+                std fpaextra+4
                 ldb #32                         ; do 32 bits
-                stb fpa0+fps.exp                ; save counter somewhere because we don't have enough registers
+                stb fpa0+fpa.exp                ; save counter somewhere because we don't have enough registers
                 ldd zero                        ; zero out remainder
-                std fpa0extra4
-                std fpa0extra6
-util_div32a     lsl fpa0extra3                  ; shift dividend residue into remainder
-                rol fpa0extra2
-                rol fpa0extra1
-                rol fpa0extra
-                rol fpa0extra7
-                rol fpa0extra6
-                rol fpa0extra5
-                rol fpa0extra4
-                ldd fpa0extra6                  ; now subtract divisor from remainder
-                subd fpa1+fps.sig+2
-                ldd fpa0extra4
-                sbcb fpa1+fps.sig+1
-                sbca fpa1+fps.sig
+                std fpaextra+4
+                std fpaextra+6
+util_div32a     lsl fpaextra+3                  ; shift dividend residue into remainder
+                rol fpaextra+2
+                rol fpaextra+1
+                rol fpaextra
+                rol fpaextra+7
+                rol fpaextra+6
+                rol fpaextra+5
+                rol fpaextra+4
+                ldd fpaextra+6                  ; now subtract divisor from remainder
+                subd fpa1+fpa.sig+2
+                ldd fpaextra+4
+                sbcb fpa1+fpa.sig+1
+                sbca fpa1+fpa.sig
                 bcs util_div32b                 ; brif it doesn't go - don't subtract or set bit
-                inc fpa0extra3                  ; set quotient bit
-                ldd fpa0extra6                  ; actually do the subtraction
-                subd fpa1+fps.sig+2
-                std fpa0extra6
-                ldd fpa0extra4
-                sbcb fpa1+fps.sig+1
-                sbca fpa1+fps.sig
-                std fpa0extra4
-util_div32b     dec fpa0+fps.exp                ; done all 32 bits?
+                inc fpaextra+3                  ; set quotient bit
+                ldd fpaextra+6                  ; actually do the subtraction
+                subd fpa1+fpa.sig+2
+                std fpaextra+6
+                ldd fpaextra+4
+                sbcb fpa1+fpa.sig+1
+                sbca fpa1+fpa.sig
+                std fpaextra+4
+util_div32b     dec fpa0+fpa.exp                ; done all 32 bits?
                 bne util_div32a                 ; do another
                 *pragmapop list
diff -r f959c92bc329 -r 663d8e77b579 src/number.s
--- a/src/number.s	Sun Oct 08 00:17:20 2023 -0600
+++ b/src/number.s	Sun Oct 15 22:15:36 2023 -0600
@@ -29,7 +29,7 @@
                 bne TMERROR                     ; brif not - raise error
                 pshs x                          ; save X which may be clobbered
                 leay ,x                         ; point to input operand as destination for conversion
-                jsr fps_fromint32               ; convert first argument to floating point
+;                jsr fps_fromint32               ; convert first argument to floating point
                 puls x,pc                       ; restore second operand pointer and return
 val_matchtypes1 ldb val.type,u                  ; get second argument type
                 cmpb #valtype_float             ; is it floating point?
@@ -39,7 +39,7 @@
                 pshs x                          ; save X which mill be clobbered
                 leax ,u                         ; convert (U) to floating point
                 leay ,u
-                jsr fps_fromint32
+;                jsr fps_fromint32
                 puls x,pc                       ; restore argument pointer and return
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Addition and subtraction of values; must enter with values of matching types and the result type already set
@@ -101,196 +101,195 @@
                 jmp TMERROR                     ; unsupported type
                 endc
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Parse a number to either an integer or a floating point value
+; Parse a number to either an integer or a floating point value and return the result in val0
+;
+; This works by first detecting any sign indicators and handling those. Multiple prefix signs are supported. Note that
+; in the regular expression evaluation sequence, unary minus and plus will be handled by the expression evaluator so
+; in that case, the number evaluator would not need to care about those. However, in the case of an arbitrary string
+; fed into the evaluator, those must be handled. Note that there is no need to handle tokenized sign indicators because
+; the only place where they would be tokenized is in a proper expression.
+;
+; Once leading signs are handled, any base specifiers or other modifiers are handled. If none of those intercept the
+; parsing, the regular number parsing continues as follows.
 ;
-; First, identify any sign present. Then parse the remainder as an integer until either a decimal point, an exponential
-; indicator, or the value gets larger than 32 bits. If any of those eventualities happens, convert to floating point
-; and then continue parsing the number as floating point. The result will be stored to (Y).
+; 1. Read a sequence of digits. The digits are stored in backed BCD form in the significand of fpa0. An arbitrary number
+;    of leading zeroes will be skipped. A count of significant digits is maintained while reading digits as is the
+;    position of a decimal point if one is encountered. Once one digit more than the maximum possible precision of any
+;    supported number type is read, subsequent digits will not be stored, but the counters will still be updated.
+; 2. Any subsequent "E" or "e" followed by either a positive or negative decimal exponent is read, with the sign
+;    indicator being optional.
+; 3. The decimal offset calculated in (1) is adjusted by the exponent read in (2) if any.
+; 4. Range checks are completed as follows:
+; 4a. If the calculated decimal exponent is beyond the supported range of any floating point or integer type, raise
+;    an overflow error.
+; 4b. If the number is an integer in the range of -0x80000000 to 0x7fffffff, it is converted to a signed binary integer
+;    and the result is returned
+; 4b. Set the exponent correctly then normalize the result to val0
 val_parsenum    lbeq SNERROR                    ; brif no numberr to parse
-                ldd zero                        ; zero out integer value accumulator
-                std fpa0extra
-                std fpa0extra+2
-                std fpa0extra+4
-                std fpa0extra+6
-                sta fpa0extra12                 ; zero out result sign to default positive
-                jsr curchar                     ; get current input character
-                bra val_parsenum1               ; parse flags
-val_parsenum0   jsr nextchar                    ; get next input character
-val_parsenum1   bcs val_parsenum5               ; brif digit
-                beq val_parsenum                ; brif end of input
-                cmpa #'.                        ; decimal?
-                lbeq val_parsefloat             ; switch to parsing floating point
-                cmpa #'-                        ; minus?
-                beq val_parsenum2               ; brif so
-                cmpa #tok_minus                 ; unary minus operator?
+                ldd zero                        ; zero out digit accumulator
+                stx fpa0+fpa.sig
+                stx fpa0+fpa.sig+2
+                stx fpa0+fpa.sig+4
+                stx fpa0+fpa.sig+6
+                stx fpa0+fpa.sig+8
+                sta fpa0+fpa.sign               ; set number sign to positive
+                std fpaextra+4                  ; clear out decimal exponent and sign
+                std fpaextra                    ; set digit count and decimal flag to zero and no decimal
+                sta fpaextra+2                  ; set decimal position to 0 - unused if decimal not seen
+                ldx #fpa0+fpa.sig               ; point to digit storage location
+                lda #0xf0                       ; set digit mask
+                sta fpaextra+3
+                jsr curchar                     ; get back current input
+                bra val_parsenum1
+val_parsenum0   jsr nextchar                    ; fetch next input
+val_parsenum1   bcs val_parsenum3               ; brif digit - short ciruit other checks
+                cmpa #'+                        ; unary plus?
+                beq val_parsenum0               ; brif so - it's a no-op but supported for symmetry with unary minus
+                cmpa #'-                        ; negative?
+                bne val_parsenum5               ; brif not
+                com fpa0+fpa.sign               ; invert the sign
+                bra val_parsenum0               ; eat the sign and see if there's more signs
+val_parsenum2   jsr nextchar                    ; fetch next character in number
+                bcc val_parsenum5               ; brif not a digit
+val_parsenum3   ldb fpaextra                    ; is it within the digit count?
+                cmpb #11                        ; (11 digits holds both 10 digit fp and 32 bit integer)
+                bhs val_parsenum4               ; brif so - don't convert it
+                suba #0x30                      ; binary-ize the digit
+                bne val_parsenum3a              ; brif not zero
+                tstb                            ; no digits?
+                beq val_parsenum2               ; brif so - skip leading zeroes
+val_parsenum3a  ldb #0x11                       ; put in both digit spots
+                mul
+                andb fpaextra+3                 ; only keep the one we need
+                orb ,x                          ; merge with existing digit
+                stb ,x                          ; put in digit location
+                com fpaextra+3                  ; flip digit position mask
+                bpl val_parsenum4               ; brif not moving to new location
+                leax 1,x                        ; move to new digit storage location
+val_parsenum4   inc fpaextra                    ; bump digit count
+                lbmi OVERROR                    ; brif it overflowed - we can't parse more than 127 digits!
+                ldb fpaextra+2                  ; get decimal position counter
+                subb fpaextra+1                 ; subtract decimal flag (will be 0xff or -1 if decimal seen)
+                stb fpaextra+2
+                bra val_parsenum2               ; go handle another digit or whatever
+val_parsenum5   cmpa #'.                        ; decimal?
                 bne val_parsenum3               ; brif not
-val_parsenum2   com fpa0extra12                 ; invert current sign
-                bra val_parsenum0               ; go handle more stuff at the start of the number
-val_parsenum3   cmpa #'+                        ; unary +?
-                beq val_parsenum0               ; brif so - skip it
-                cmpa #tok_plus                  ; unary + operator?
-                beq val_parsenum0               ; brif so - skip it
-val_parsenum4   lda fpa0extra4                  ; is bit 7 of high byte set?
-                bpl val_parsenum4a              ; brif not - no overflow
-                ldb fpa0extra12                 ; do we want negative?
-                lbpl val_parsefloat             ; brif not - we overflowed so convert to floating point
-                anda #0x7f                      ; lose sign bit then see if any other bits are set
-                ora fpa0extra5
-                ora fpa0extra6
-                ora fpa0extra7
-                lbne val_parsefloat             ; brif nonzero bits - too big for max negative 2's complement
-val_parsenum4a  lda fpa0extra12                 ; do we want negative?
-                bpl val_parsenum4b              ; brif not
-                ldd zero                        ; negate it
-                subd fpa0extra6
-                std fpa0extra6
+                com fpaextra                    ; flag decimal seen
+                lbeq SNERROR                    ; brif already seen a decimal point - syntax error
+                bra val_parsenum2               ; go parse more digits
+val_parsenum6   cmpa #'E                        ; decimal exponent?
+                beq val_parsenum7               ; brif so
+                cmpa #'e                        ; lower case exponent indicator?
+                bne val_parsenum11              ; brif not - we have the end of the number here
+val_parsenum7   jsr nextchar                    ; eat exponent indicator
+                bcs val_parsenum9               ; brif digit
+                cmpa #'+                        ; positive?
+                beq val_parsenum8               ; brif no
+                cmpa #'-                        ; negative?
+                lbne SNERROR                    ; brif not positive, negative, or digit
+                com fpaextra+5                  ; make sign of exponent negative
+val_parsenum8   jsr nextchar                    ; eat exponent sign/get next digit
+                bcc val_parsenum10              ; brif not a digit - done with number
+val_parsenum9   suba #0x30                      ; binary-ize the digit
+                sta fpaextra+6                  ; save digit value
+                ldb fpaextra+4                  ; get calculated exponent
+                lda #10                         ; multiply by 10
+                mul
+                lbcs OVERROR                    ; brif decimal exponent overlows ±127 - we just don't handle that
+                addb fpaextra+6                 ; add digit in
+                lbmi OVERROR                    ; same as above - make sure exponent in range
+                stb fpaextra+4                  ; save new decimal exponent
+                bra val_parsenum8               ; handle another digit
+val_parsenum10  lda fpaextra+5                  ; get sign of exponent
+                bpl val_parsenum11              ; brif positive
+                neg fpaextra+4                  ; negate resulting exponent
+val_parsenum11  ldb fpaextra                    ; get number of digits provided
+                subb fpaextra+2                 ; subtract out count of fractional digits giving whole number digits
+                addb fpaextra+4                 ; add in decimal exponent adjustment
+                stb fpa0+fpa.exp                ; set result exponent
+; Normalization is not required here though rounding might be. Rounding will be handled during floating point return.
+; By ensuring there were no leading zeroes converted, the result is already pre-normalized without losing precision due
+; to an aribtrary number of leading zeroes.
+                cmpb fpaextra                   ; is the exponent less than the number of digits?
+                blt val_parsenum13              ; brif so - return floating point (signed comparison!)
+                cmpb #10                        ; is exponent in the range for a binary integer?
+                bgt val_parsenum13              ; brif not - return floating point
+; Compare with 2147483648, the maximum *negative* value; note that this is a floating point comparison because we
+; already normalized everything above and it handles exponents properly
+                lda fpa0+fpa.exp                ; compare exponents (unbiased)
+                cmpa #9
+                bne val_parsenum12
+                ldx fpa0+fpa.sig                ; compare top of significand
+                cmpx #0x2147
+                bne val_parsenum12
+                ldx fpa0+fpa.sig+2              ; compare middle of significand
+                cmpx #0x4836
+                bne val_parsenum12
+                ldx fpa0+fpa.sig+4              ; compare bottom of significand plus extra digits
+                cmpx #0x4800
+val_parsenum12  bgt val_parsenum13              ; brif too big for integer
+                blt val_parsenum14              ; brif it fits in a positive integer
+                ldb fpa0+fpa.sign               ; negative?
+                bpl val_parsenum14              ; brif not - doesn't fit in integer
+val_parsenum13  lda #valtype_float              ; set return value to floating point
+                sta val0+val.type
+                lda fpa0+fpa.exp                ; put the bias into the exponent
+                adda #64
+                sta fpa0+fpa.exp
+                ldy #val0+val.value             ; normalize/round and return the result
+                jmp fps_normalize
+val_parsenum14  lda #valtype_int                ; set value type to integer
+                sta val0+val.type
+                ldb #9                          ; exponent needed for decimal point to the right of significand
+                subb fpa0+fpa.exp               ; number of digit shifts needed to denormalize
+                beq val_parsenum16              ; brif already denormalized
+                lslb                            ; do 4 shifts per digit
+                lslb
+val_parsenum15  lsr fpa0+fpa.sig                ; shift a digit right
+                ror fpa0+fpa.sig+1
+                ror fpa0+fpa.sig+2
+                ror fpa0+fpa.sig+3
+                ror fpa0+fpa.sig+4
+                decb                            ; done all shifts?
+                bne val_parsenum15
+; Now convert BCD digit sequence in fpa0 significand to binary value in val0
+val_parsenum16  ldb #40                         ; 40 bit shifts needed for whole significand
+                stb fpa0+fpa.extra              ; use extra precision byte as counter
+val_parsenum17  lsl fpa0+fpa.sig+4              ; shift a bit into the binary result
+                rol fpa0+fpa.sig+3
+                rol fpa0+fpa.sig+2
+                rol fpa0+fpa.sig+1
+                rol fpa0+fpa.sig
+                rol val0+val.int+3
+                rol val0+val.int+2
+                rol val0+val.int+1
+                rol val0+val.int
+                ldx #fpa0+fpa.sig               ; point to BCD digits
+val_parsenum18  lda ,x                          ; get byte to check
+                beq val_parsenum20              ; short circuit check if digits are 0
+                anda #0x88                      ; keep bit 3 of each digit; adjustment on >= 8
+                lsra                            ; shift over and mulply by adjustment factor
+                lsra
+                lsra
+                ldb #3                          ; the adjustment is a subtraction by 3
+                mul
+                negb                            ; now subtract from digit
+                addb ,x
+                stb ,x+
+val_parsenum18a cmpx #fpa0+fpa.sig+5            ; done all 5 bytes?
+                blo val_parsenum18              ; brif not
+                dec fpa0+fpa.extra              ; done all bits?
+                bne val_parsenum17              ; brif not
+                ldb fpa0+fpa.sign               ; do we want negative?
+                bpl val_parsenum19              ; brif not
+                ldd zero                        ; negate the value through subtracting from 0
+                subd val0+val.int+2
+                std val0+val.int+2
                 ldd zero
-                sbcb fpa0extra5
-                sbca fpa0extra4
-                std fpa0extra4
-val_parsenum4b  ldd fpa0extra6                  ; copy value to result location
-                std val.int+2,y
-                ldd fpa0extra4
-                std val.int,y
-                lda #valtype_int                ; set value type to integer
-                sta val.type,y
-                rts
-val_parsenum4c  jsr nextchar                    ; fetch next character (after a digit)
-                bcs val_parsenum5               ; it's a digit
-                cmpa #'.                        ; decimal?
-                beq val_parsefloat              ; brif so - handle floating point
-                cmpa #'E                        ; exponent?
-                beq val_parsefloat              ; brif so - handle floating point
-                cmpa #'e                        ; exponent but lower case?
-                beq val_parsefloat              ; brif so - handle floating point
-                bra val_parsenum4               ; unrecognized character - treat as end of number
-val_parsenum5   suba #'0                        ; offset digit to binary
-                pshs a                          ; save it for later addition
-                ldx fpa0extra4                  ; save original value
-                stx fpa0extra8
-                ldx fpa0extra6
-                stx fpa0extra10
-                lsl fpa0extra7                  ; shift partial result left (times 2)
-                rol fpa0extra6
-                rol fpa0extra5
-                rol fpa0extra4
-                rol fpa0extra3
-                lsl fpa0extra7                  ; shift partial result left (times 4)
-                rol fpa0extra6
-                rol fpa0extra5
-                rol fpa0extra4
-                rol fpa0extra3
-                ldd fpa0extra6                  ; add in original value (time 5)
-                addd fpa0extra10
-                std fpa0extra6
-                ldd fpa0extra8
-                adcb fpa0extra5
-                adca fpa0extra4
-                std fpa0extra4
-                ldb fpa0extra3
-                adcb #0
-                stb fpa0extra3
-                lsl fpa0extra7                  ; shift partial result left (times 10)
-                rol fpa0extra6
-                rol fpa0extra5
-                rol fpa0extra4
-                rol fpa0extra3
-                ldd fpa0extra6                  ; add in new digit
-                addb ,s+
-                adca #0
-                std fpa0extra6
-                ldd fpa0extra4                  ; and propagate carry
-                adcb #0
-                adca #0
-                std fpa0extra4
-                ldb fpa0extra3
-                adcb #0
-                stb fpa0extra3
-                beq val_parsenum4c              ; go handle next digit if we didn't overflow past 32 bits
-                jsr nextchar                    ; eat the digit we just handled
-val_parsefloat  pshs y                          ; save destination pointer
-                lda #valtype_float              ; set return type to floating point
-                sta val.type,y
-                ldx #fpa0extra                  ; point to integer accumulator
-                jsr fps_fromuint64              ; convert to floating point
-                clr fpa0extra11                 ; zero out decimal counter
-                clr fpa0extra10                 ; zero out decimal exponent counter
-                clr fpa0extra9                  ; flag for decimal seen
-                jsr curchar                     ; fetch current character
-                bra val_parsefloat1             ; go handle character
-val_parsefloat0 jsr nextchar                    ; fetch next character
-val_parsefloat1 bcc val_parsefloat2             ; brif not digit
-                suba #'0                        ; adjust digit to binary
-                sta fpa0extra3                  ; save it for later (upper 3 bytes of 32 bit value already 0)
-                ldx ,s                          ; get destination value
-                jsr fps_mul10                   ; do a quick multiply by 10
-                ldx #fpa0extra                  ; convert digit to floating point
-                ldy #fpa1
-                jsr fps_fromuint32
-                ldu #fpa1                       ; add digit to accumulated value
-                ldx ,s
-                leay ,x
-                jsr fps_add
-                lda fpa0extra11                 ; update decimal counter
-                suba fpa0extra9
-                sta fpa0extra11
-                bra val_parsefloat0             ; go handle another digit
-val_parsefloat2 cmpa #'.                        ; decimal?
-                bne val_parsefloat7             ; brif not
-                com fpa0extra9                  ; flag for decimal
-                bne val_parsefloat0             ; brif not two decimals - keep parsing
-val_parsefloat3 ldb fpa0extra10                 ; fetch decimal exponent counter
-                subb fpa0extra11                ; subtract out decimal places provided
-                beq val_parsefloat6             ; brif no adjustment needed
-                stb fpa0extra9                  ; save counter
-                bmi val_parsefloat5             ; brif negative exponent - need to do divisions
-val_parsefloat4 ldx ,s                          ; point to destination value
-                jsr fps_mul10                   ; multiply by 10
-                dec fpa0extra9                  ; done all of them?
-                bne val_parsefloat4             ; brif not
-                bra val_parsefloat6
-val_parsefloat5 ldx ,s                          ; point to destination value
-                jsr fps_div10                   ; divide by 10
-                inc fpa0extra9                  ; done all of them?
-                bne val_parsefloat5             ; brif not
-val_parsefloat6 puls y                          ; get back destination pointer
-                lda fpa0extra12                 ; get desired sign
-                sta val.fpssign,y               ; set in result
-                rts
-val_parsefloat7 cmpa #'E                        ; decimal exponent?
-                beq val_parsefloat8             ; brif so
-                cmpa #'e                        ; decimal exponent, lower case edition?
-                bne val_parsefloat3             ; brif not - must be end of number
-val_parsefloat8 clr fpa0extra9                  ; set sign of exponent to positive
-                jsr nextchar                    ; fetch exponent character
-                bcs val_parsefloat11            ; brif digit
-                cmpa #'+                        ; positive exponent?
-                beq val_parsefloat10            ; brif so - skip it
-                cmpa #tok_plus                  ; positive exponent, operator style?
-                beq val_parsefloat10            ; brif so - skip it
-                cmpa #'-                        ; negative exponent?
-                beq val_parsefloat9             ; brif so
-                cmpa #tok_minus                 ; negative exponent, operator style?
-                bne val_parsefloat3             ; brif not - must be end of exponent
-val_parsefloat9 com fpa0extra9                  ; set exponent to negative
-val_parsefloat10
-                jsr nextchar                    ; eat exponent sign
-                bcc val_parsefloat12            ; brif end of exponent - apply sign
-val_parsefloat11
-                suba #'0                        ; binary-ize digit
-                sta fpa0extra8                  ; save digit for later
-                lda #10                         ; mutiply current decimal exponent by 10
-                ldb fpa0extra10                 ; get current exponent
-                mul
-                adca #0                         ; set A if we overflowed *or* bit 7 of B is set
-                lbne OVERROR                    ; brif exponent overflow
-                addb fpa0extra8                 ; add in digit
-                lbvs OVERROR                    ; brif exponent overflow
-                stb fpa0extra10                 ; save new exponent
-                bra val_parsefloat10            ; go handle next exponent digit
-val_parsefloat12
-                ldb fpa0extra9                  ; do we have a negative exponent?
-                beq val_parsefloat3             ; brif not, go adjust value by exponent and return
-                neg fpa0extra10                 ; set base 10 exponent negative
-                bra val_parsefloat3             ; go adjust value by exponent and return
+                sbcb val0+val.int+1
+                sbca val0+val.int
+                std val0+val.int
+val_parsenum19  rts
+val_parsenum20  leax 1,x                        ; move to next digit
+                bra val_parsenum18a             ; go back to mainline
                 *pragmapop list
diff -r f959c92bc329 -r 663d8e77b579 src/print.s
--- a/src/print.s	Sun Oct 08 00:17:20 2023 -0600
+++ b/src/print.s	Sun Oct 15 22:15:36 2023 -0600
@@ -54,6 +54,8 @@
                 ldb val0+val.type               ; get value type
                 cmpb #valtype_int               ; integer?
                 beq cmd_printint                ; brif so - print integer
+                cmpb #valtype_float             ; floatingp point?
+                beq cmd_printfps                ; brif so - print floating point
                 lda #'!                         ; flag unknown expression type
                 jsr console_outchr
                 jsr console_outchr
@@ -61,60 +63,10 @@
 cmd_printnext   jsr curchar                     ; see what we have here
                 bra cmd_print                   ; and go process
 cmd_printeol    jmp console_outnl               ; do a newline and return
-cmd_printint    leas -12,s                      ; make a buffer
-                leay ,s                         ; point to buffer
-                lda #0x20                       ; default sign (positive)
-                ldb val0+val.int                ; is it negative?
-                bpl cmd_printint0               ; brif not
-                jsr int32_neg                   ; negate the integer
-                lda #'-                         ; negative sign
-cmd_printint0   sta ,y+                         ; save sign
-                ldu #cmd_printintpc             ; point to positive constant table
-                ldx #10                         ; there are 10 constants to process
-; subtraction loop - positive residue
-cmd_printint1   lda #'0-1                       ; initialize digit
-                sta ,y
-cmd_printint2   inc ,y                          ; bump digit
-                ldd val0+val.int+2              ; subtract constant
-                subd 2,u
-                std val0+val.int+2
-                ldd val0+val.int
-                sbcb 1,u
-                sbca ,u
-                std val0+val.int
-                bcc cmd_printint2               ; brif we didn't go negative
-                ldd val0+val.int+2              ; undo last subtract
-                addd 2,u
-                std val0+val.int+2
-                ldd val0+val.int
-                adcb 1,u
-                adca ,u
-                std val0+val.int
-                leay 1,y                        ; move to next digit in buffer
-                leau 4,u                        ; move to next constant
-                leax -1,x                       ; done all constants?
-                bne cmd_printint1               ; brif not - done all
-cmd_printint5   clr ,y                          ; NUL terminate the string
-                leax 1,s                        ; point past the sign
-cmd_printint6   lda ,x+                         ; get digit
-                beq cmd_printint8               ; brif end of number
-                cmpa #'0                        ; is it a zero?
-                beq cmd_printint6               ; brif so - skip it
-cmd_printint7   lda ,s                          ; get the sign
-                sta ,--x                        ; put it at the start of the number
-                jsr console_outstr              ; display the number
-                leas 12,s                       ; clean up stack
-                bra cmd_printnext               ; go print the next thing
-cmd_printint8   leax -1,x                       ; restore one of the zeros
-                bra cmd_printint7               ; go finish up
-cmd_printintpc  fqb 1000000000                  ; 10^9
-                fqb 100000000                   ; 10^8
-                fqb 10000000                    ; 10^7
-                fqb 1000000                     ; 10^6
-                fqb 100000                      ; 10^5
-                fqb 10000                       ; 10^4
-                fqb 1000                        ; 10^3
-                fqb 100                         ; 10^2
-                fqb 10                          ; 10^1
-                fqb 1                           ; 10^0
+cmd_printint    jsr int_toascii                 ; convert val0 to string
+cmd_printstrb   ldx #strbuff                    ; point to resulting string
+                jsr console_outstr              ; output the string
+                bra cmd_printnext               ; go handle next stuff
+cmd_printfps    jsr fps_toascii                 ; convert val0 to string
+                bra cmd_printstrb               ; go output the resulting string
                 *pragmapop list
diff -r f959c92bc329 -r 663d8e77b579 src/vars.s
--- a/src/vars.s	Sun Oct 08 00:17:20 2023 -0600
+++ b/src/vars.s	Sun Oct 15 22:15:36 2023 -0600
@@ -49,19 +49,7 @@
 ; the maximum precision floating point values used by the system.
 fpa0            rmb fpa.size                    ; floating point accumulator 1
 fpa1            rmb fpa.size                    ; floating point accumulator 1
-fpa0extra       rmb 1                           ; "extra" bytes for calculations
-fpa0extra1      rmb 1
-fpa0extra2      rmb 1
-fpa0extra3      rmb 1
-fpa0extra4      rmb 1
-fpa0extra5      rmb 1
-fpa0extra6      rmb 1
-fpa0extra7      rmb 1
-fpa0extra8      rmb 1
-fpa0extra9      rmb 1
-fpa0extra10     rmb 1
-fpa0extra11     rmb 1
-fpa0extra12     rmb 1
+fpaextra        rmb 12                          ; "extra" bytes for calculations
                 rmb 0x71-*                      ; align RSTFLG/RSTVEC for stock ROM compatibility
 RSTFLG          rmb 1                           ; 0x55 if RSTVEC is valid
 RSTVEC          rmb 2                           ; points to warm start routine (must start with NOP)
@@ -79,6 +67,7 @@
 tokebuff        rmb linebuffsize+50             ; make it as long as line buffer plus a margin
 stringstack     rmb 5*stringstacknum            ; reserve space for the anonymous string descriptor stack           
 stringstackend  equ *                           ; end of string stack buffer
+strbuff         rmb 20                          ; temporary string buffer for converting numbers and other things
                 ifne *&0x1ff
                 rmb 0x200-(*&0x1ff)
                 endc