view src/fps.s @ 110:00c8df0b61f5

Fix some logic errors in floating point multiplication Floating point muliplication now yields correct results for a range of tests which is promising. Among the errors were not handling the overflow situation when an extra digit is needed and at least one inverted branch condition.
author William Astle <lost@l-w.ca>
date Mon, 25 Dec 2023 22:37:55 -0700
parents 1a5da3594a9e
children df803556bfae
line wrap: on
line source

                *pragmapush list
                *pragma list
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision floating point arithmetic package
;
; The single precision floating point values are stored as follows (unpacked):
;
; Byte          Length          What
; 0             1               Exponent with a +63 bias, 0 = number is zero
; 1             5               BCD significand (10 digits)
; 6             1               Sign (00 = positive, FF = negative)
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Unpack single precision BCD floating point at (X) to fpa1
fps_unpack1     ldb ,x                          ; get sign and exponent
                bne fps_unpack1b                ; brif not zero
                ldd zero                        ; zero out the entire value to represent 0
                std fpa1
                std fpa1+2
                std fpa1+4
                sta fpa1+6
                rts
fps_unpack1b    sex                             ; get sign flag to store
                sta fpa1+fpa.sign               ; set accumulator
                andb #0x7f                      ; lose sign from exponent
                stb fpa1+fpa.exp                ; set exponent
                ldd 1,x                         ; copy signficand over
                std fpa1+fpa.sig
                ldd 3,x
                std fpa1+fpa.sig+2
                lda 5,x
                sta fpa1+fpa.sig+4
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Unpack single precision BCD floating point at (X) to fpa0
fps_unpack0     ldb ,x                          ; get sign and exponent
                bne fps_unpack0b                ; brif not zero
                ldd zero                        ; zero out the entire value to represent 0
                std fpa0
                std fpa0+2
                std fpa0+4
                sta fpa0+6
                rts
fps_unpack0b    sex                             ; get sign flag to store
                sta fpa0+fpa.sign               ; set accumulator
                andb #0x7f                      ; lose sign from exponent
                stb fpa0+fpa.exp                ; set exponent
                ldd 1,x                         ; copy signficand over
                std fpa0+fpa.sig
                ldd 3,x
                std fpa0+fpa.sig+2
                lda 5,x
                sta fpa0+fpa.sig+4
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Pack single precision BCD floating point in fpa1 to (X)
fps_pack1       lda fpa1+fpa.sign               ; get sign bits
                anda #0x80                      ; only keep high bit
                ora fpa1+fpa.exp                ; merge with exponent
                sta ,x                          ; put in destination
                ldd fpa1+fpa.sig                ; copy significand over
                std 1,x
                ldd fpa1+fpa.sig+2
                std 3,x
                lda fpa1+fpa.sig+4
                sta 5,x
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Pack single precision BCD floating point in fpa0 to (X)
fps_pack0       lda fpa0+fpa.sign               ; get sign bits
                anda #0x80                      ; only keep high bit
                ora fpa0+fpa.exp                ; merge with exponent
                sta ,x                          ; put in destination
                ldd fpa0+fpa.sig                ; copy significand over
                std 1,x
                ldd fpa0+fpa.sig+2
                std 3,x
                lda fpa0+fpa.sig+4
                sta 5,x
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copy binary arguments
fps_copyargs    clr fpa0+fpa.extra              ; clear extra precision bits
                clr fpa1+fpa.extra
                bsr fps_unpack0                 ; unpack first argument to fpa0
                leax ,u                         ; point to second argument
                bra fps_unpack1                 ; now unpack it to fpa1 and return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision BCD floating point subtraction
; (X) - (U) -> (Y)
fps_sub         bsr fps_copyargs                ; copy input arguments
                ldb fpa1+fpa.exp                ; subtracting zero?
                beq fps_add0                    ; brif so - do nothing
                com fpa1+fpa.sign               ; invert sign
                bra fps_add1                    ; go handle addition of negative
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision BCD floating point addition
; (X) + (U) -> (Y)
;
; Note: must denormalize the *shorter* value. However, we may end up with values the same length so we may not be able
; to easily identify which one is larger.
;
; Note that the magnitude can get substantially smaller after addition of a negative so renormalization is necessary
; after the operation. As a result, some additional precision is maintained during the addition operation to allow for
; correct rounding.
;
; Rounding will be according to the standard rules: 0...4 go toward zero, 5...9 go away from zero.
fps_add         bsr fps_copyargs                ; fetch arguments
                ldb fpa1+fpa.exp                ; is second argument zero?
                bne fps_add1                    ; brif not
fps_add0        leax ,y                         ; if second argument is zero, do nothing and return first operand
                bra fps_pack0
fps_add1        ldb fpa0+fpa.exp                ; is first argument zero?
                bne fps_add3                    ; brif not
fps_add2        leax ,y                         ; point to output argument
                bra fps_pack1                   ; pack second argument to result and return
fps_add3        ldd zero                        ;* initialize extra precision bytes following significand which we need
                std fpa0+fpa.sig+5              ;* to handle denormalization of the smaller operand
                std fpa0+fpa.sig+7
                sta fpa0+fpa.sig+9
                std fpa1+fpa.sig+5
                std fpa1+fpa.sig+7
                sta fpa1+fpa.sig+9
                ldb fpa0+fpa.exp                ; fetch exponent of second argument
                subb fpa1+fpa.exp               ; calculate exponent difference
                lbeq fps_add8                   ; brif same exponent - no need to denormalize
                bcs fps_add4                    ; brif second operand is bigger
                ldx #fpa1                       ; point to second operand to denormalize
                cmpb #10                        ; are we going to shift more than the precision?
                bhs fps_add0                    ; brif so - return first operand
                bra fps_add5                    ; go shift operand right by B places
fps_add4        negb                            ; get positive number of shifts
                cmpb #10                        ; shifting more than precision?
                bhs fps_add2                    ; brif so - return second operand
                ldx #fpa0                       ; shift left operand right B places
                lda fpa1+fpa.exp                ; set exponent of result to larger of two
                sta fpa0+fpa.exp
fps_add5        subb #2                         ; do we need at least two places?
                bcs fps_add6                    ; brif not
                lda fpa.sig+8,x                 ; shift right into extra digits
                sta fpa.sig+9,x
                lda fpa.sig+7,x
                sta fpa.sig+8,x
                lda fpa.sig+6,x
                sta fpa.sig+7,x
                lda fpa.sig+5,x
                sta fpa.sig+6,x
                lda fpa.sig+4,x
                sta fpa.sig+5,x
                lda fpa.sig+3,x
                sta fpa.sig+4,x
                lda fpa.sig+2,x
                sta fpa.sig+3,x
                lda fpa.sig+1,x
                sta fpa.sig+2,x
                lda fpa.sig,x
                sta fpa.sig+1,x
                clr fpa.sig,x
                bra fps_add5                    ; go see if we have more to shift
fps_add6        incb                            ; do we still have a digit to shift?
                bne fps_add8                    ; brif not
                lsr fpa.sig,x                   ; shift a digit right
                ror fpa.sig+1,x
                ror fpa.sig+2,x
                ror fpa.sig+3,x
                ror fpa.sig+4,x
                ror fpa.sig+5,x
                ror fpa.sig+6,x
                ror fpa.sig+7,x
                ror fpa.sig+8,x
                ror fpa.sig+9,x
                lsr fpa.sig,x
                ror fpa.sig+1,x
                ror fpa.sig+2,x
                ror fpa.sig+3,x
                ror fpa.sig+4,x
                ror fpa.sig+5,x
                ror fpa.sig+6,x
                ror fpa.sig+7,x
                ror fpa.sig+8,x
                ror fpa.sig+9,x
                lsr fpa.sig,x
                ror fpa.sig+1,x
                ror fpa.sig+2,x
                ror fpa.sig+3,x
                ror fpa.sig+4,x
                ror fpa.sig+5,x
                ror fpa.sig+6,x
                ror fpa.sig+7,x
                ror fpa.sig+8,x
                ror fpa.sig+9,x
                lsr fpa.sig,x
                ror fpa.sig+1,x
                ror fpa.sig+2,x
                ror fpa.sig+3,x
                ror fpa.sig+4,x
                ror fpa.sig+5,x
                ror fpa.sig+6,x
                ror fpa.sig+7,x
                ror fpa.sig+8,x
                ror fpa.sig+9,x
fps_add8        clra                            ; clear carry so regular addition works below
                lda fpa0+fpa.sign               ; do signs differ?
                eora fpa1+fpa.sign
                sta fpa1+fpa.exp                ; non-zero if signs differ; we don't need fpa1 exponent any more
                beq fps_add11                   ; brif not - just add
                ldx #fpa1                       ; default to second argument being smaller
                lda fpa0+fpa.sig                ; compare high digits of significand
                cmpa fpa1+fpa.sig
                bne fps_add9                    ; brif top digits differ
                lda fpa0+fpa.sig+1              ; next digits?
                cmpa fpa1+fpa.sig+1
                bne fps_add9                    ; brif digits differ; pattern continues
                lda fpa0+fpa.sig+2
                cmpa fpa1+fpa.sig+2
                bne fps_add9
                lda fpa0+fpa.sig+3
                cmpa fpa1+fpa.sig+3
                bne fps_add9
                lda fpa0+fpa.sig+4
                cmpa fpa1+fpa.sig+4
                bne fps_add9
                lda fpa0+fpa.sig+5
                cmpa fpa1+fpa.sig+5             ; don't have to check other extras; the first extra will be decisive
fps_add9        bhs fps_add10                   ; brif first operand is bigger
                ldx #fpa0                       ; point to first operand as smaller
                lda fpa1+fpa.sign               ; set result sign to that of larger operand
                sta fpa0+fpa.sign
fps_add10       ldd #0x9999                     ;* calculate 9's complement of smaller operand (X must already point to it)
                subd fpa.sig,x                  ;* we'll complete the 10's complement by setting C on the way into the
                std fpa.sig,x                   ;* addition sequence
                ldd #0x9999
                subd fpa.sig+2,x
                std fpa.sig+2,x
                ldd #0x9999
                subd fpa.sig+4,x
                std fpa.sig+4,x
                ldd #0x9999
                subd fpa.sig+6,x
                std fpa.sig+6,x
                ldd #0x9999
                subd fpa.sig+8,x
                std fpa.sig+8,x
                coma                            ; set carry going into add
fps_add11       lda fpa0+fpa.sig+9              ; do the addition (10 bytes)
                adca fpa1+fpa.sig+9
                daa
                sta fpa0+fpa.sig+9
                lda fpa0+fpa.sig+8
                adca fpa1+fpa.sig+8
                daa
                sta fpa0+fpa.sig+8
                lda fpa0+fpa.sig+7
                adca fpa1+fpa.sig+7
                daa
                sta fpa0+fpa.sig+7
                lda fpa0+fpa.sig+6
                adca fpa1+fpa.sig+6
                daa
                sta fpa0+fpa.sig+6
                lda fpa0+fpa.sig+5
                adca fpa1+fpa.sig+5
                daa
                sta fpa0+fpa.sig+5
                lda fpa0+fpa.sig+4
                adca fpa1+fpa.sig+4
                daa
                sta fpa0+fpa.sig+4
                lda fpa0+fpa.sig+3
                adca fpa1+fpa.sig+3
                daa
                sta fpa0+fpa.sig+3
                lda fpa0+fpa.sig+2
                adca fpa1+fpa.sig+2
                daa
                sta fpa0+fpa.sig+2
                lda fpa0+fpa.sig+1
                adca fpa1+fpa.sig+1
                daa
                sta fpa0+fpa.sig+1
                lda fpa0+fpa.sig
                adca fpa1+fpa.sig
                daa
                sta fpa0+fpa.sig
                ror fpa1+fpa.exp                ;* do sign flag and carry differ? will set V if so; we will never have
                rol fpa1+fpa.exp                ;* a real carry on the subtract case but might on the add case
                bvc fps_normalize               ; brif we didn't overflow significand - we can normalize the result
                inc fpa0+fpa.exp                ; bump exponent to account for overflow
                lbmi OVERROR                    ; brif it overflowed
                ror fpa0+fpa.sig                ; do a bit dance
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                ror fpa0+fpa.sig+5
                ror fpa0+fpa.sig+6
                ror fpa0+fpa.sig+7
                ror fpa0+fpa.sig+8
                ror fpa0+fpa.sig+9
                ror fpa0+fpa.sig
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                ror fpa0+fpa.sig+5
                ror fpa0+fpa.sig+6
                ror fpa0+fpa.sig+7
                ror fpa0+fpa.sig+8
                ror fpa0+fpa.sig+9
                ror fpa0+fpa.sig
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                ror fpa0+fpa.sig+5
                ror fpa0+fpa.sig+6
                ror fpa0+fpa.sig+7
                ror fpa0+fpa.sig+8
                ror fpa0+fpa.sig+9
                ror fpa0+fpa.sig
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                ror fpa0+fpa.sig+5
                ror fpa0+fpa.sig+6
                ror fpa0+fpa.sig+7
                ror fpa0+fpa.sig+8
                ror fpa0+fpa.sig+9
                ; fall through to normalization routine
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Normalize a floating point value in fpa0 with extra precision digits at fpaextra (up to 5 bytes) and return the
; packed result at (Y)
;
; The first step is to shift the significand left until a nonzero digit is in the leftmost position. This will bring
; in extra precision digits from fpaextra through fpaextra4 until either 10 shifts are done or the decimal exponent
; underflows. In both of those cases, the result will be zero.
;
; Once the leftmost digit is nonzero, the leftmost extra precision digit is checked to see if it is >= 5. If so, the
; significand will have 1 added to it. If that triggers a carry, the decimal exponent will be incremented and the
; significand will have its leftmost digit set to 1.
;
; This will trigger an overflow if the decimal exponent exceeds the allowed range.
fps_normalize   bsr fps_normalizea0             ; do the normalization in fpa0
                leax ,y                         ; point to the desired destination
                jmp fps_pack0                   ; go pack the result to the correct destination
fps_normalizea0 clrb                            ; initialize the exponent adjustment counter
fps_normalize0  lda fpa0+fpa.sig                ; do we have a nonzero digit in the first pair?
                bne fps_normalize1              ; brif so
                lda fpa0+fpa.sig+1              ; shift everything left 2 spaces
                sta fpa0+fpa.sig
                lda fpa0+fpa.sig+2
                sta fpa0+fpa.sig+1
                lda fpa0+fpa.sig+3
                sta fpa0+fpa.sig+2
                lda fpa0+fpa.sig+4
                sta fpa0+fpa.sig+3
                lda fpa0+fpa.extra
                sta fpa0+fpa.sig+4
                lda fpa0+fpa.extra+1
                sta fpa0+fpa.extra
                lda fpa0+fpa.extra+2
                sta fpa0+fpa.extra+1
                lda fpa0+fpa.extra+3
                sta fpa0+fpa.extra+2
                lda fpa0+fpa.extra+4
                sta fpa0+fpa.extra+3
                clr fpa0+fpa.extra+4
                subb #2                         ; account for two digit positions
                cmpb #-10                       ; have we shifted the whole set of digits (assumes originally normalized)
                bgt fps_normalize0              ; brif not
fps_normalize4  clr fpa0+fpa.exp                ; set result to zero
                clr fpa0+fpa.sign
fps_normalize5  rts
fps_normalize1  bita #0xf0                      ; is the high digit zero?
                bne fps_normalize3              ; brif not
                lsl fpa0+fpa.extra              ; only need to shift one extra position here since there won't be more shifts
                rol fpa0+fpa.sig+4
                rol fpa0+fpa.sig+3
                rol fpa0+fpa.sig+2
                rol fpa0+fpa.sig+1
                rol fpa0+fpa.sig
                lsl fpa0+fpa.extra
                rol fpa0+fpa.sig+4
                rol fpa0+fpa.sig+3
                rol fpa0+fpa.sig+2
                rol fpa0+fpa.sig+1
                rol fpa0+fpa.sig
                lsl fpa0+fpa.extra
                rol fpa0+fpa.sig+4
                rol fpa0+fpa.sig+3
                rol fpa0+fpa.sig+2
                rol fpa0+fpa.sig+1
                rol fpa0+fpa.sig
                lsl fpa0+fpa.extra
                rol fpa0+fpa.sig+4
                rol fpa0+fpa.sig+3
                rol fpa0+fpa.sig+2
                rol fpa0+fpa.sig+1
                rol fpa0+fpa.sig
                decb                            ; account for digit shift
fps_normalize3  addb fpa0+fpa.exp               ; adjust exponent
                stb fpa0+fpa.exp
                ble fps_normalize4              ; brif we underflowed to zero
                ldb fpaextra                    ; get extra precision digit
                andb #0xf0                      ; keep only the highest extra precision digit
                cmpb #0x50                      ; do we need to round?
                blo fps_normalize5              ; brif not
                lda fpa0+fpa.sig+4              ; bump low digits
                adda #1
                daa
                sta fpa0+fpa.sig+4
                bcc fps_normalize5              ; brif no carry - done
                lda fpa0+fpa.sig+3              ; keep going until all significand bytes handled
                adda #1
                daa
                sta fpa0+fpa.sig+3
                bcc fps_normalize5
                lda fpa0+fpa.sig+2
                adda #1
                daa
                sta fpa0+fpa.sig+2
                bcc fps_normalize5
                lda fpa0+fpa.sig+1
                adda #1
                daa
                sta fpa0+fpa.sig+1
                bcc fps_normalize5
                lda fpa0+fpa.sig
                adda #1
                daa
                sta fpa0+fpa.sig
                bcc fps_normalize5
                lda #0x10                       ; overflowed the significand - shift a 1 digit in
                sta fpa0+fpa.sig
                inc fpa0+fpa.exp                ; and bump exponent to reflect that
                bpl fps_normalize5              ; brif we didn't overflow - return result
OVERROR         ldb #err_ov                     ; raise overflow
                jmp ERROR
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision BCD multiply (X) by (U)
;
; Calculate: (X) * (U) -> (Y)
;
; First, the routine calculates the new exponent and sign. The exponents simply add together while the sign is the
; exclusive OR of the two input signs. That gives negative for differing signs and positive for matching signs.
; Then the multiplication of the significands works by initializing an accumulator large enough to hold twice as
; many digits as each significand to zeroes. It also needs an extra byte to the left to handle overflow for each
; digit operation. It then works through each digit of the multiplier and uses that as a count to add the
; multiplicand to the *high digits* of the accumulator. Then, before moving to the next digit, the accumulator and
; mulltiplier are shifted right and the process is repeated. Once the process is done, the result required for the
; floating point calculation will be the high 10 digits of the accumulator with extra digits stored in the
; remaining bytes.
;
; Once the multiplication is complete, the significand is normalized. See above.
fps_mul         jsr fps_copyargs                ; fetch arguments
                lda fpa0+fpa.exp                ; first argumennt zero?
                bne fps_mul0                    ; brif not
                leax ,y                         ; return first argument (0)
                jmp fps_pack0
fps_mul0        lda fpa1+fpa.exp                ; second argument zero?
                bne fps_mul1                    ; brif not
                leax ,y                         ; return second argument (0)
                jmp fps_pack1
fps_mul1        lda fpa0+fpa.sign               ; calculate result sign
                eora fpa1+fpa.sign
                sta fpa0+fpa.sign               ; result will go into fpa0
                lda fpa0+fpa.exp                ; fetch exponent of multiplicand
                suba #64                        ; remove bias
                sta fpa0+fpa.exp                ; save it
                lda fpa1+fpa.exp                ; get exponent of multplier
                suba #64                        ; remove bias
                adda fpa0+fpa.exp               ; add exponents
                cmpa #63                        ; did we overflow upward?
                bgt OVERROR                     ; brif so
                cmpa #-63                       ; did we underflow?
                bge fps_mul3                    ; brif not
fps_mul2        clr fpa0+fpa.exp                ; return zero if we underflow
                leax ,y
                jmp fps_pack0
fps_mul3        adda #64                        ; add bias to resulting exponent
                sta fpa0+fpa.exp                ; save result exponent
                ldd zero                        ;* zero out result buffer high digits (extra digits + significand and
                std fpaextra                    ;* one extra byte to make sure lowest extra digit is zero)
                std fpaextra+2
                std fpaextra+4
                sta fpaextra+6
                ldb fpa1+fpa.sig+4              ;* do each byte of multiplier significand in sequence
                bsr fps_mul5                    ;* don't shift for first digit
                ldb fpa1+fpa.sig+3
                bsr fps_mul4
                ldb fpa1+fpa.sig+2
                bsr fps_mul4
                ldb fpa1+fpa.sig+1
                bsr fps_mul4
                ldb fpa1+fpa.sig
                bsr fps_mul4
                ldb fpaextra                    ; did we have an overflow on the last digit?
                beq fps_mul3a                   ; brif not
                bsr fps_mul9                    ; shift digits right one digit to move extra digit in
                inc fpa0+fpa.exp                ; bump exponent for result
                bmi OVERROR                     ; brif we went out of range (went > 127)
fps_mul3a       ldd fpaextra+1                  ; copy result into fpa0 significand (overflow byte will be zero)
                std fpa0+fpa.sig
                ldd fpaextra+3
                std fpa0+fpa.sig+2
                ldd fpaextra+5
                sta fpa0+fpa.sig+4
                ldd fpaextra+7
                std fpa0+fpa.sig+6
                ldd fpaextra+9
                std fpa0+fpa.sig+8
                jmp fps_normalize               ; go normalize the result
fps_mul4        bsr fps_mul9                    ; shift result right
fps_mul5        bitb #0x0f                      ; are we finished this digit?
                beq fps_mul6                    ; brif so
                bsr fps_mul11                   ; add multiplicand to accumulator
                decb                            ; adjust digit
                bra fps_mul5                    ; see if we're done yet
fps_mul6        bsr fps_mul9                    ; shift the result right
fps_mul7        bitb #0xf0                      ; done this digit?
                bne fps_mul8                    ; brif not
                rts                             ; we're done now so return
fps_mul8        bsr fps_mul11                   ; add multiplicand to accumulator
                subb #0x010                     ; adjust digit
                bra fps_mul7                    ; see if we're done yet
fps_mul9        lsr fpaextra                    ; shift result
                ror fpaextra+1
                ror fpaextra+2
                ror fpaextra+3
                ror fpaextra+4
                ror fpaextra+5
                ror fpaextra+6
                ror fpaextra+7
                ror fpaextra+8
                ror fpaextra+9
                ror fpaextra+10
                lsr fpaextra
                ror fpaextra+1
                ror fpaextra+2
                ror fpaextra+3
                ror fpaextra+4
                ror fpaextra+5
                ror fpaextra+6
                ror fpaextra+7
                ror fpaextra+8
                ror fpaextra+9
                ror fpaextra+10
                lsr fpaextra
                ror fpaextra+1
                ror fpaextra+2
                ror fpaextra+3
                ror fpaextra+4
                ror fpaextra+5
                ror fpaextra+6
                ror fpaextra+7
                ror fpaextra+8
                ror fpaextra+9
                ror fpaextra+10
                lsr fpaextra
                ror fpaextra+1
                ror fpaextra+2
                ror fpaextra+3
                ror fpaextra+4
                ror fpaextra+5
                ror fpaextra+6
                ror fpaextra+7
                ror fpaextra+8
                ror fpaextra+9
                ror fpaextra+10
                rts
fps_mul11       lda fpa0+fpa.sig+4              ; add digits starting at the bottom
                adda fpaextra+5
                daa
                sta fpaextra+5
                lda fpa0+fpa.sig+3
                adca fpaextra+4
                daa
                sta fpaextra+4
                lda fpa0+fpa.sig+2
                adca fpaextra+3
                daa
                sta fpaextra+3
                lda fpa0+fpa.sig+1
                adca fpaextra+2
                daa
                sta fpaextra+2
                lda fpa0+fpa.sig
                adca fpaextra+1
                daa
                sta fpaextra+1
                bcc fps_mul12
                inc fpaextra                    ; handle carry out
fps_mul12       rts 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision BCD divide
;
; Calculate (X) รท (U) -> (Y)
;
; First, calculate the result exponent by subtracting the decimal exponent of (U) from that of (X). If the result goes
; above the exponent range, raise overflow. If it goes below the exponent range, return zero.
;
; The calculate the result sign the same as for multiplcation. Also calculate the 10's complement of the divisor.
;
; First, we copy the divdend into the extra precision digits in fpa1 since they aren't needed for anything else
; here. Then we set the desired nonzero digit counter to 11 to indicate 10 significant digits plus a digit for
; rounding and set the flag indicating a nonzero digit was seen to zero to indicate it hasn't been seen. Also set
; the extra carry byte to zero. Then we run through the digit pair routine 10 times to set 20 digits of the
; significand and extra precision for fpa0. The digit pair routine just calls the digit loop twice while doing
; some bookkeeping.
;
; We also pre-cacluate the 9's complement of the divisor and store that in fpaextra to save time during the
; subtraction steps.
;
; For each digit, the process is as follows:
;
; 1. If the desired digit count is zero, do nothing and just return a zero for the quotient digit.
; 2. Initialize the quotient digit to 0
; 3. If the extra carry is nonzero, we know the result "goes" so go on to step 6
; 4. Compare the divisor to the residue; this is faster on average than doing the subtraction and then
;    reversing the last subtraction loop.
; 5. If the divisor goes *exactly* into the residue, bump the quotient digit, clear the desired digit count
;    and return the quotient digit; we have no need to do anything more
; 6. If the divisor does go, subtract the divisor from the residue, bump the digit, and go back to step 3
; 7. If the resulting digit is nonzero, set the "nonzero digit seen" flag to 1 (exactly one)
; 8. Subtract the nonzero digit flag from the desired digits flag so after enough digits, we stop doing the
;    division loops.
; 9. Return the quotient digit.
fps_div         jsr fps_copyargs                ; get arguments to the accumulators
                lda fpa1+fpa.exp                ; get divisor exponent
                bne fps_div0                    ; brif not zero
DIV0ERROR       ldb #err_div0                   ; raise division by zero
                jmp ERROR
fps_div0        suba #64                        ; remove bias
                sta fpa1+fpa.exp                ; save for later calculation
                lda fpa0+fpa.exp                ; get quotient exponent
                bne fps_div2                    ; brif not zero - we have to do work
fps_div1        clr fpa0+fpa.exp                ; make sure result is zero
                leax ,y                         ; return zero result
                jmp fps_pack0
fps_div2        suba #64                        ; remove bias
                suba fpa1+fpa.exp               ; subtract divisor exponent
                cmpa #64                        ; did we overflow upward?
                lbge OVERROR                    ; brif so
                cmpa #-64                       ; did we overflow downward (underflow)?
                bls fps_div1                    ; brif we underflow
                adda #64                        ; add back the bias
                sta fpa0+fpa.exp                ; set result exponent
                lda fpa0+fpa.sign               ; calculate result sign (XOR of argument signs)
                eora fpa1+fpa.sign
                sta fpa0+fpa.sign
                ldd fpa0+fpa.sig                ; initialize residue to dividend
                std fpa1+fpa.sig
                ldd fpa0+fpa.sig+2
                std fpa1+fpa.sig+2
                lda fpa0+fpa.sig+4
                sta fpa1+fpa.sig+4
                ldd #11                         ; initialize digit counter and nonzero seen flag
                std fpaextra+5
                sta fpaextra+7                  ; set ongoing extra carry digits to zero 
                ldd #0x9999                     ;* calculate 9's complement of divisor for later; we'll introduce a carry
                subd fpa1+fpa.sig               ;* to the first byte to complete the 10's complement's +1 to save doing
                std fpaextra                    ;* the extra work here
                ldd #0x9999
                subd fpa1+fpa.sig+2
                std fpaextra+2
                lda #0x99
                suba fpa1+fpa.sig+4
                sta fpaextra+4
                bsr fps_div3                    ; calculate the quotient byte by byte
                stb fpa0+fpa.sig
                bsr fps_div3
                stb fpa0+fpa.sig+1
                bsr fps_div3
                stb fpa0+fpa.sig+2
                bsr fps_div3
                stb fpa0+fpa.sig+3
                bsr fps_div3
                stb fpa0+fpa.sig+4
                bsr fps_div3
                stb fpa0+fpa.sig+5
                bsr fps_div3
                stb fpa0+fpa.sig+6
                bsr fps_div3
                stb fpa0+fpa.sig+7
                bsr fps_div3
                stb fpa0+fpa.sig+8
                bsr fps_div3
                stb fpa0+fpa.sig+9
                jmp fps_normalize               ; go normalize the result and return
fps_div3        bsr fps_div5                    ; do a digit
                lslb                            ; shift it over
                lslb
                lslb
                lslb
                sta fpaextra+8                  ; save it
                bsr fps_div5                    ; do next digit
                addb fpaextra+8                 ; combine the two quotient digits
fps_div4        rts
fps_div5        ldb fpaextra+6                  ; do we even need to do the division?
                beq fps_div4                    ; brif not - return 0
                clrb                            ; initialize quotient digit
fps_div6        lda fpaextra+7                  ; did we have a carry last time?
                bne fps_div7                    ; brif so - we know it "goes"
                lda fpa1+fpa.sig                ; is the divisor less than the dividend residue?
                cmpa fpa1+fpa.sig+5
                bhi fps_div8                    ; brif high byte is larger than residue
                lda fpa1+fpa.sig+1              ; and keep going for all 5 bytes
                cmpa fpa1+fpa.sig+6
                bhi fps_div8
                lda fpa1+fpa.sig+2
                cmpa fpa1+fpa.sig+7
                bhi fps_div8
                lda fpa1+fpa.sig+3
                cmpa fpa1+fpa.sig+8
                bhi fps_div8
                lda fpa1+fpa.sig+4
                cmpa fpa1+fpa.sig+9
                bhi fps_div8                    ; brif divisor is greater than the residue
                bne fps_div7                    ; brif it didn't go exactly
                incb                            ; bump quotient for this "go"
                clr fpaextra+6                  ; indicate no more digits needed
                rts
fps_div7        coma                            ; set carry to complete 10's complement of divisor
                lda fpa1+fpa.sig+9              ; do the "subtraction"
                adca fpaextra+4
                daa
                sta fpa1+fpa.sig+9
                lda fpa1+fpa.sig+8
                adca fpaextra+3
                daa
                sta fpa1+fpa.sig+8
                lda fpa1+fpa.sig+7
                adca fpaextra+2
                daa
                sta fpa1+fpa.sig+7
                lda fpa1+fpa.sig+6
                adca fpaextra+1
                daa
                sta fpa1+fpa.sig+6
                lda fpa1+fpa.sig+5
                adca fpaextra
                daa
                sta fpa1+fpa.sig+5
                lda fpaextra+7                  ; and handle the carry byte
                adca #0x99
                sta fpaextra+7
                incb                            ; bump digit count
                bra fps_div6                    ; go see if we need another subtraction
fps_div8        tstb                            ; nonzero digit?
                beq fps_div9                    ; brif not
                lda #1                          ; set nonzero flag
                sta fpaextra+5
fps_div9        lda fpaextra+6                  ; adjust digit count
                suba fpaextra+5
                sta fpaextra+6
                lsl fpa1+fpa.sig+9              ; shift residue one decimal digit
                rol fpa1+fpa.sig+8
                rol fpa1+fpa.sig+7
                rol fpa1+fpa.sig+6
                rol fpa1+fpa.sig+5
                rol fpaextra+7
                lsl fpa1+fpa.sig+9
                rol fpa1+fpa.sig+8
                rol fpa1+fpa.sig+7
                rol fpa1+fpa.sig+6
                rol fpa1+fpa.sig+5
                rol fpaextra+7
                lsl fpa1+fpa.sig+9
                rol fpa1+fpa.sig+8
                rol fpa1+fpa.sig+7
                rol fpa1+fpa.sig+6
                rol fpa1+fpa.sig+5
                rol fpaextra+7
                lsl fpa1+fpa.sig+9
                rol fpa1+fpa.sig+8
                rol fpa1+fpa.sig+7
                rol fpa1+fpa.sig+6
                rol fpa1+fpa.sig+5
                rol fpaextra+7
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert a floating point number in val0 to a string in strbuff

; The maximum size of a string generated here is 19 bytes representing 10 significant digits, 5 leading zeroes after the
; decimal point, the decimal point, a leading zero, a leading sign, and a trailing NUL.
fps_toascii     ldu #strbuff+1                  ; point to output buffer
                lda #0x20                       ; set sign to "blank"
                sta -1,u
                clr fpaextra+1                  ; set decimal point offset; zero means none
                clr fpaextra+3                  ; disable "E" notation
                ldx #val0+val.value             ; unpack value to fpa0 so we can mess with it
                jsr fps_unpack0
                lda fpa0+fpa.exp                ; check exponent
                bne fps_toascii0                ; brif not zero
                lda #'0                         ; make the number zero and print it
                sta ,u
                clr 1,u
                rts
fps_toascii0    lda fpa0+fpa.sign               ; negative?
                bpl fps_toascii1                ; brif not
                lda #'-                         ; negative sign
                sta -1,u
fps_toascii1    lda #10                         ; assume 10 significant digits exist
                ldx #fpa0+fpa.sig+5             ; point to significand
fps_toascii2    ldb ,-x                         ; get digit pair
                bitb #0x0f                      ; is right digit zero?
                bne fps_toascii3                ; brif not - we've counted all the significant digits
                deca                            ; trailing zero - uncount it
                bitb #0xf0                      ; is left digit zero?
                bne fps_toascii3                ; brif not - we've counted all the sigificant digits
                deca                            ; trailing zero - uncount it
                cmpx #fpa0+fpa.sig              ; done all significant digits?
                bhi fps_toascii2                ; brif not
fps_toascii3    sta fpaextra                    ; save significant digits that exist
                lda fpa0+fpa.exp                ; fetch exponent
                suba #64                        ; remove bias
                sta fpa0+fpa.exp                ; save for later
                bpl fps_toascii14               ; brif no leading zeroes
                suba fpaextra                   ; get number of significant digits plus decimal count
                cmpa #-15                       ; do we end up with too many digits (leading 0 + significant digits)
                blt fps_toascii15               ; brif too many - do scientific notation
                ldd #'0*256+'.                  ; put a leading "0."
                std ,u++
                ldb fpa0+fpa.exp                ; get exponent back, which is the number of leading digits (negative)
                bra fps_toascii4a               ; go add leading zeroes if needed
fps_toascii4    sta ,u+                         ; put a zero
fps_toascii4a   incb                            ; done all 
                bne fps_toascii4                ; brif not
fps_toascii5    ldx #fpa0+fpa.sig               ; point to significand
                ldd #0xf000                     ; set digit mask and counter
                sta fpaextra+2
fps_toascii6    lda ,x                          ; get digit pair
                anda fpaextra+2                 ; keep the desired digit
                bita #0xf0                      ; is it the left hand digit?
                beq fps_toascii7                ; brif not or digit is 0
                lsra                            ; right justify digit
                lsra
                lsra
                lsra
fps_toascii7    adda #0x30                      ; turn it into ascii
                sta ,u+                         ; stuff it in the output
                dec fpaextra                    ; done all digits?
                beq fps_toascii9                ; brif so
                dec fpaextra+1                  ; are we at the decimal point?
                bne fps_toascii8                ; brif not
                lda #'.                         ; put a decimal
                sta ,u+
fps_toascii8    com fpaextra+2                  ; flip digit mask
                bpl fps_toascii6                ; handle another digit
                leax 1,x                        ; move to next digit byte
                bra fps_toascii6                ; now go handle next digit
fps_toascii9    ldb fpaextra+3                  ; get decimal exponent to display
                beq fps_toascii13
                lda #'E                         ; output "E"
                sta ,u+
                tstb                            ; negative?
                bpl fps_toascii10               ; brif not
                comb                            ; positivize it, but also subtract 1?
                lda #'-                         ; put a minus for the exponent
                sta ,u+
fps_toascii10   cmpb #10                        ; do we have two digits for exponent?
                blo fps_toascii12               ; brif not
                lda #0x2f                       ; initialize left digit
fps_toascii11   inca                            ; bump digit
                subb #10                        ; are we at the right digit?
                bpl fps_toascii11               ; brif not yet
                addb #10                        ; undo extra subtraction
                sta ,u+                         ; save left digit
fps_toascii12   addb #0x30                      ; turn right digit to ascii
                stb ,u+                         ; save right digit
fps_toascii13   clr ,u                          ; put a NUL at the end of the result
                rts
fps_toascii14   cmpa #9                         ; is it in range for number of significant digits?
                bgt fps_toascii15               ; brif not - do scientific notation
                inca                            ; exponent 0 has decimal point after first digit
                sta fpaextra+1                  ; save decimal point location
                cmpa fpaextra                   ; is it more than the number of significant digits?
                ble fps_toascii5                ; brif not - just convert the significand
                sta fpaextra                    ; make sure we include the pre-decimal zeroes
                bra fps_toascii5                ; go convert the significand
fps_toascii15   ldb #1                          ; put decimal after the first digit
                stb fpaextra+1
                sta fpaextra+3                  ; enable the "E" notation with the correct exponent
                bra fps_toascii5                ; actually convert the number
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert a 32 bit integer at (X) to BCD floating point. This requires converting the 32 bit binary value to BCD which
; can be done using the double/dabble algorithm. fpa0 and fpaextra is used as temporary storage. X must point to a value.
; Enter at fps_fromuint to treat the 32 value as unsigned.
fps_fromint     ldb val.int,x                   ; is the integer negative?
                bpl fps_fromuint                ; brif not
                sex                             ; set sign to negative
                sta fpa0+fpa.sign
                ldd zero                        ; negate the value and store it to temporary location
                subd val.int+2,x
                std fpa0+fpa.extra+2
                ldd zero
                sbcb val.int+1,x
                sbca val.int,x
                std fpa0+fpa.extra
                bra fps_fromint0                ; go finish the conversion
fps_fromuint    clr fpa0+fpa.sign               ; set sign to positive
                ldd val.int+2,x                 ; copy value to temporary accumulator
                std fpa0+fpa.extra+2
                ldd val.int,x
                std fpa0+fpa.extra
fps_fromint0    ldd zero                        ; zero out destination
                std fpa0+fpa.sig
                std fpa0+fpa.sig+2
                sta fpa0+fpa.sig+4
                ldd #0x4920                     ; set exponent for decimal right of 10 digit significand
                sta fpa0+fpa.exp                ; save exponent
                stb fpa0+fpa.extra+4            ; save counter
                bra fps_fromint2                ; skip digit check on the first iteration since none need adjustment
fps_fromint1    ldu #fpa0+fpa.sig               ; point to significand
                bsr fps_fromint3                ; do adjustments, 5 bytes worth of digits
                bsr fps_fromint3
                bsr fps_fromint3
                bsr fps_fromint3
                bsr fps_fromint3
fps_fromint2    lsl fpa0+fpa.extra+3            ; shift left
                rol fpa0+fpa.extra+2
                rol fpa0+fpa.extra+1
                rol fpa0+fpa.extra
                rol fpa0+fpa.sig+4
                rol fpa0+fpa.sig+3
                rol fpa0+fpa.sig+2
                rol fpa0+fpa.sig+1
                rol fpa0+fpa.sig
                dec fpa0+fpa.extra+4            ; done all digits?
                bne fps_fromint1                ; brif not
                lda #valtype_float              ; set result type to floating point
                sta val.type,x
                jsr fps_normalizea0             ; go normalize fpa0
                jmp fps_pack0                   ; pack the result to the original accumulator
; This is the digit check for the double-dabble algorith. The left digit check is only concerned if the left digit
; is 5 or higher. Since we can make that comparison without masking the bits, we don't bother. For the right digit,
; however, we do have to mask the upper digit off. It saves and ANDA and a subsequent indexed reload.
fps_fromint3    clrb                            ; set adjustment to none
                lda ,u                          ; get digits
                cmpa #0x50                      ; is digit 5 or higher?
                blo fps_fromint4                ; brif not
                ldb #0x30                       ; set adjustment factor for first digit
fps_fromint4    anda #0x0f                      ; keep low digit
                cmpa #5                         ; is it 5 or higher?
                blo fps_fromint5                ; brif not
                orb #0x03                       ; set adjustment for second digit
fps_fromint5    addb ,u                         ; add adjustment to digits
                stb ,u+                         ; save new digit values and move on
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert the floating point value in the value accumulator at (X) to a 32 bit signed integer. Will trigger an overflow
; if the magnitude is too large and it will truncate (round toward zero) any fractional value.
fps_toint       jsr fps_unpack0                 ; unpack the floating point value to fpa0
                bsr fps_tointa0                 ; convert value to integer in fpa0 significand
                lda #valtype_int                ; set result to integer
                sta val.type,x
                ldd fpa0+fpa.sig+1
                std val.int,x
                ldd fpa0+fpa.sig+3
                std val.int+2,x
                rts
fps_tointa0     ldb fpa0+fpa.exp                ; compare exponent (is it too big?)
                cmpb #64+9
                bne fps_toint0
                ldd fpa0+fpa.sig                ; compare top of significand
                cmpd #0x2147
                bne fps_toint0
                ldd fpa0+fpa.sig+2              ; compare middle of significand
                cmpd #0x4836
                bne fps_toint0
                ldd fpa0+fpa.sig+4              ; compare bottom of significand plus extra digits
                cmpd #0x4800
fps_toint0      blt fps_toint1                  ; brif it fits positive or negative
                lbgt OVERROR                    ; brif it doesn't fit negative either
                ldb fpa0+fpa.sign               ; do we want negative?
                lbpl OVERROR                    ; brif not - it doesn't fit
; Enter here if we already know that the value will fit
fps_toint1      ldb #64+9                       ; biased exponent needed for decimal point to the right of significand
                subb fpa0+fpa.exp               ; number of digit shifts needed to denormalize
                beq fps_toint3                  ; brif already denormalized
                lslb                            ; do 4 shifts per digit; we're going to simply lose extra digits
                lslb
fps_toint2      lsr fpa0+fpa.sig                ; shift a digit right
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                decb                            ; done all shifts?
                bne fps_toint2
; Now convert BCD digit sequence in fpa0 significand to binary value in the low 32 bits of the significand
fps_toint3      ldb #32                         ; 32 bit shifts needed for whole significand
                stb fpa0+fpa.exp                ; use extra precision byte as counter
fps_toint4      lsr fpa0+fpa.sig                ; shift a bit into the binary result
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                ror fpa0+fpa.extra
                ror fpa0+fpa.extra+1
                ror fpa0+fpa.extra+2
                ror fpa0+fpa.extra+3
                ldu #fpa0+fpa.sig               ; point to BCD digits
fps_toint5      lda ,u                          ; get byte to check
                beq fps_toint8                  ; short circuit check if digits are 0
                anda #0x88                      ; keep bit 3 of each digit; adjustment on >= 8
                lsra                            ; shift over and mulply by adjustment factor
                lsra
                lsra
                ldb #3                          ; the adjustment is a subtraction by 3
                mul
                negb                            ; now subtract from digit
                addb ,u
                stb ,u+
fps_toint6      cmpu #fpa0+fpa.sig+5            ; done all 5 bytes?
                blo fps_toint5                  ; brif not
                dec fpa0+fpa.exp                ; done all bits?
                bne fps_toint4                  ; brif not
                ldb fpa0+fpa.sign               ; do we want negative?
                bpl fps_toint7                  ; brif not
                ldd zero                        ; negate the value through subtracting from 0
                subd fpa0+fpa.extra+2
                std fpa0+fpa.extra+2
                ldd zero
                sbcb fpa0+fpa.extra+1
                sbca fpa0+fpa.extra
                std fpa0+fpa.extra
fps_toint7      ldd fpa0+fpa.extra             ; put result in the significand
                std fpa0+fpa.sig+1
                ldd fpa0+fpa.extra+2
                std fpa0+fpa.sig+3
                rts
fps_toint8      leau 1,u                        ; move to next digit
                bra fps_toint6                  ; go back to mainline
                *pragmapop list