view src/fps.s @ 114:df803556bfae

Fix use of unsigned branch for signed comparison in floating point division
author William Astle <lost@l-w.ca>
date Wed, 27 Dec 2023 00:05:31 -0700
parents 00c8df0b61f5
children 03eb6d6b49b4
line wrap: on
line source

                *pragmapush list
                *pragma list
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision floating point arithmetic package
;
; The single precision floating point values are stored as follows (unpacked):
;
; Byte          Length          What
; 0             1               Exponent with a +63 bias, 0 = number is zero
; 1             5               BCD significand (10 digits)
; 6             1               Sign (00 = positive, FF = negative)
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Unpack single precision BCD floating point at (X) to fpa1
fps_unpack1     ldb ,x                          ; get sign and exponent
                bne fps_unpack1b                ; brif not zero
                ldd zero                        ; zero out the entire value to represent 0
                std fpa1
                std fpa1+2
                std fpa1+4
                sta fpa1+6
                rts
fps_unpack1b    sex                             ; get sign flag to store
                sta fpa1+fpa.sign               ; set accumulator
                andb #0x7f                      ; lose sign from exponent
                stb fpa1+fpa.exp                ; set exponent
                ldd 1,x                         ; copy signficand over
                std fpa1+fpa.sig
                ldd 3,x
                std fpa1+fpa.sig+2
                lda 5,x
                sta fpa1+fpa.sig+4
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Unpack single precision BCD floating point at (X) to fpa0
fps_unpack0     ldb ,x                          ; get sign and exponent
                bne fps_unpack0b                ; brif not zero
                ldd zero                        ; zero out the entire value to represent 0
                std fpa0
                std fpa0+2
                std fpa0+4
                sta fpa0+6
                rts
fps_unpack0b    sex                             ; get sign flag to store
                sta fpa0+fpa.sign               ; set accumulator
                andb #0x7f                      ; lose sign from exponent
                stb fpa0+fpa.exp                ; set exponent
                ldd 1,x                         ; copy signficand over
                std fpa0+fpa.sig
                ldd 3,x
                std fpa0+fpa.sig+2
                lda 5,x
                sta fpa0+fpa.sig+4
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Pack single precision BCD floating point in fpa1 to (X)
fps_pack1       lda fpa1+fpa.sign               ; get sign bits
                anda #0x80                      ; only keep high bit
                ora fpa1+fpa.exp                ; merge with exponent
                sta ,x                          ; put in destination
                ldd fpa1+fpa.sig                ; copy significand over
                std 1,x
                ldd fpa1+fpa.sig+2
                std 3,x
                lda fpa1+fpa.sig+4
                sta 5,x
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Pack single precision BCD floating point in fpa0 to (X)
fps_pack0       lda fpa0+fpa.sign               ; get sign bits
                anda #0x80                      ; only keep high bit
                ora fpa0+fpa.exp                ; merge with exponent
                sta ,x                          ; put in destination
                ldd fpa0+fpa.sig                ; copy significand over
                std 1,x
                ldd fpa0+fpa.sig+2
                std 3,x
                lda fpa0+fpa.sig+4
                sta 5,x
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copy binary arguments
fps_copyargs    clr fpa0+fpa.extra              ; clear extra precision bits
                clr fpa1+fpa.extra
                bsr fps_unpack0                 ; unpack first argument to fpa0
                leax ,u                         ; point to second argument
                bra fps_unpack1                 ; now unpack it to fpa1 and return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision BCD floating point subtraction
; (X) - (U) -> (Y)
fps_sub         bsr fps_copyargs                ; copy input arguments
                ldb fpa1+fpa.exp                ; subtracting zero?
                beq fps_add0                    ; brif so - do nothing
                com fpa1+fpa.sign               ; invert sign
                bra fps_add1                    ; go handle addition of negative
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision BCD floating point addition
; (X) + (U) -> (Y)
;
; Note: must denormalize the *shorter* value. However, we may end up with values the same length so we may not be able
; to easily identify which one is larger.
;
; Note that the magnitude can get substantially smaller after addition of a negative so renormalization is necessary
; after the operation. As a result, some additional precision is maintained during the addition operation to allow for
; correct rounding.
;
; Rounding will be according to the standard rules: 0...4 go toward zero, 5...9 go away from zero.
fps_add         bsr fps_copyargs                ; fetch arguments
                ldb fpa1+fpa.exp                ; is second argument zero?
                bne fps_add1                    ; brif not
fps_add0        leax ,y                         ; if second argument is zero, do nothing and return first operand
                bra fps_pack0
fps_add1        ldb fpa0+fpa.exp                ; is first argument zero?
                bne fps_add3                    ; brif not
fps_add2        leax ,y                         ; point to output argument
                bra fps_pack1                   ; pack second argument to result and return
fps_add3        ldd zero                        ;* initialize extra precision bytes following significand which we need
                std fpa0+fpa.sig+5              ;* to handle denormalization of the smaller operand
                std fpa0+fpa.sig+7
                sta fpa0+fpa.sig+9
                std fpa1+fpa.sig+5
                std fpa1+fpa.sig+7
                sta fpa1+fpa.sig+9
                ldb fpa0+fpa.exp                ; fetch exponent of second argument
                subb fpa1+fpa.exp               ; calculate exponent difference
                lbeq fps_add8                   ; brif same exponent - no need to denormalize
                bcs fps_add4                    ; brif second operand is bigger
                ldx #fpa1                       ; point to second operand to denormalize
                cmpb #10                        ; are we going to shift more than the precision?
                bhs fps_add0                    ; brif so - return first operand
                bra fps_add5                    ; go shift operand right by B places
fps_add4        negb                            ; get positive number of shifts
                cmpb #10                        ; shifting more than precision?
                bhs fps_add2                    ; brif so - return second operand
                ldx #fpa0                       ; shift left operand right B places
                lda fpa1+fpa.exp                ; set exponent of result to larger of two
                sta fpa0+fpa.exp
fps_add5        subb #2                         ; do we need at least two places?
                bcs fps_add6                    ; brif not
                lda fpa.sig+8,x                 ; shift right into extra digits
                sta fpa.sig+9,x
                lda fpa.sig+7,x
                sta fpa.sig+8,x
                lda fpa.sig+6,x
                sta fpa.sig+7,x
                lda fpa.sig+5,x
                sta fpa.sig+6,x
                lda fpa.sig+4,x
                sta fpa.sig+5,x
                lda fpa.sig+3,x
                sta fpa.sig+4,x
                lda fpa.sig+2,x
                sta fpa.sig+3,x
                lda fpa.sig+1,x
                sta fpa.sig+2,x
                lda fpa.sig,x
                sta fpa.sig+1,x
                clr fpa.sig,x
                bra fps_add5                    ; go see if we have more to shift
fps_add6        incb                            ; do we still have a digit to shift?
                bne fps_add8                    ; brif not
                lsr fpa.sig,x                   ; shift a digit right
                ror fpa.sig+1,x
                ror fpa.sig+2,x
                ror fpa.sig+3,x
                ror fpa.sig+4,x
                ror fpa.sig+5,x
                ror fpa.sig+6,x
                ror fpa.sig+7,x
                ror fpa.sig+8,x
                ror fpa.sig+9,x
                lsr fpa.sig,x
                ror fpa.sig+1,x
                ror fpa.sig+2,x
                ror fpa.sig+3,x
                ror fpa.sig+4,x
                ror fpa.sig+5,x
                ror fpa.sig+6,x
                ror fpa.sig+7,x
                ror fpa.sig+8,x
                ror fpa.sig+9,x
                lsr fpa.sig,x
                ror fpa.sig+1,x
                ror fpa.sig+2,x
                ror fpa.sig+3,x
                ror fpa.sig+4,x
                ror fpa.sig+5,x
                ror fpa.sig+6,x
                ror fpa.sig+7,x
                ror fpa.sig+8,x
                ror fpa.sig+9,x
                lsr fpa.sig,x
                ror fpa.sig+1,x
                ror fpa.sig+2,x
                ror fpa.sig+3,x
                ror fpa.sig+4,x
                ror fpa.sig+5,x
                ror fpa.sig+6,x
                ror fpa.sig+7,x
                ror fpa.sig+8,x
                ror fpa.sig+9,x
fps_add8        clra                            ; clear carry so regular addition works below
                lda fpa0+fpa.sign               ; do signs differ?
                eora fpa1+fpa.sign
                sta fpa1+fpa.exp                ; non-zero if signs differ; we don't need fpa1 exponent any more
                beq fps_add11                   ; brif not - just add
                ldx #fpa1                       ; default to second argument being smaller
                lda fpa0+fpa.sig                ; compare high digits of significand
                cmpa fpa1+fpa.sig
                bne fps_add9                    ; brif top digits differ
                lda fpa0+fpa.sig+1              ; next digits?
                cmpa fpa1+fpa.sig+1
                bne fps_add9                    ; brif digits differ; pattern continues
                lda fpa0+fpa.sig+2
                cmpa fpa1+fpa.sig+2
                bne fps_add9
                lda fpa0+fpa.sig+3
                cmpa fpa1+fpa.sig+3
                bne fps_add9
                lda fpa0+fpa.sig+4
                cmpa fpa1+fpa.sig+4
                bne fps_add9
                lda fpa0+fpa.sig+5
                cmpa fpa1+fpa.sig+5             ; don't have to check other extras; the first extra will be decisive
fps_add9        bhs fps_add10                   ; brif first operand is bigger
                ldx #fpa0                       ; point to first operand as smaller
                lda fpa1+fpa.sign               ; set result sign to that of larger operand
                sta fpa0+fpa.sign
fps_add10       ldd #0x9999                     ;* calculate 9's complement of smaller operand (X must already point to it)
                subd fpa.sig,x                  ;* we'll complete the 10's complement by setting C on the way into the
                std fpa.sig,x                   ;* addition sequence
                ldd #0x9999
                subd fpa.sig+2,x
                std fpa.sig+2,x
                ldd #0x9999
                subd fpa.sig+4,x
                std fpa.sig+4,x
                ldd #0x9999
                subd fpa.sig+6,x
                std fpa.sig+6,x
                ldd #0x9999
                subd fpa.sig+8,x
                std fpa.sig+8,x
                coma                            ; set carry going into add
fps_add11       lda fpa0+fpa.sig+9              ; do the addition (10 bytes)
                adca fpa1+fpa.sig+9
                daa
                sta fpa0+fpa.sig+9
                lda fpa0+fpa.sig+8
                adca fpa1+fpa.sig+8
                daa
                sta fpa0+fpa.sig+8
                lda fpa0+fpa.sig+7
                adca fpa1+fpa.sig+7
                daa
                sta fpa0+fpa.sig+7
                lda fpa0+fpa.sig+6
                adca fpa1+fpa.sig+6
                daa
                sta fpa0+fpa.sig+6
                lda fpa0+fpa.sig+5
                adca fpa1+fpa.sig+5
                daa
                sta fpa0+fpa.sig+5
                lda fpa0+fpa.sig+4
                adca fpa1+fpa.sig+4
                daa
                sta fpa0+fpa.sig+4
                lda fpa0+fpa.sig+3
                adca fpa1+fpa.sig+3
                daa
                sta fpa0+fpa.sig+3
                lda fpa0+fpa.sig+2
                adca fpa1+fpa.sig+2
                daa
                sta fpa0+fpa.sig+2
                lda fpa0+fpa.sig+1
                adca fpa1+fpa.sig+1
                daa
                sta fpa0+fpa.sig+1
                lda fpa0+fpa.sig
                adca fpa1+fpa.sig
                daa
                sta fpa0+fpa.sig
                ror fpa1+fpa.exp                ;* do sign flag and carry differ? will set V if so; we will never have
                rol fpa1+fpa.exp                ;* a real carry on the subtract case but might on the add case
                bvc fps_normalize               ; brif we didn't overflow significand - we can normalize the result
                inc fpa0+fpa.exp                ; bump exponent to account for overflow
                lbmi OVERROR                    ; brif it overflowed
                ror fpa0+fpa.sig                ; do a bit dance
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                ror fpa0+fpa.sig+5
                ror fpa0+fpa.sig+6
                ror fpa0+fpa.sig+7
                ror fpa0+fpa.sig+8
                ror fpa0+fpa.sig+9
                ror fpa0+fpa.sig
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                ror fpa0+fpa.sig+5
                ror fpa0+fpa.sig+6
                ror fpa0+fpa.sig+7
                ror fpa0+fpa.sig+8
                ror fpa0+fpa.sig+9
                ror fpa0+fpa.sig
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                ror fpa0+fpa.sig+5
                ror fpa0+fpa.sig+6
                ror fpa0+fpa.sig+7
                ror fpa0+fpa.sig+8
                ror fpa0+fpa.sig+9
                ror fpa0+fpa.sig
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                ror fpa0+fpa.sig+5
                ror fpa0+fpa.sig+6
                ror fpa0+fpa.sig+7
                ror fpa0+fpa.sig+8
                ror fpa0+fpa.sig+9
                ; fall through to normalization routine
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Normalize a floating point value in fpa0 with extra precision digits at fpaextra (up to 5 bytes) and return the
; packed result at (Y)
;
; The first step is to shift the significand left until a nonzero digit is in the leftmost position. This will bring
; in extra precision digits from fpaextra through fpaextra4 until either 10 shifts are done or the decimal exponent
; underflows. In both of those cases, the result will be zero.
;
; Once the leftmost digit is nonzero, the leftmost extra precision digit is checked to see if it is >= 5. If so, the
; significand will have 1 added to it. If that triggers a carry, the decimal exponent will be incremented and the
; significand will have its leftmost digit set to 1.
;
; This will trigger an overflow if the decimal exponent exceeds the allowed range.
fps_normalize   bsr fps_normalizea0             ; do the normalization in fpa0
                leax ,y                         ; point to the desired destination
                jmp fps_pack0                   ; go pack the result to the correct destination
fps_normalizea0 clrb                            ; initialize the exponent adjustment counter
fps_normalize0  lda fpa0+fpa.sig                ; do we have a nonzero digit in the first pair?
                bne fps_normalize1              ; brif so
                lda fpa0+fpa.sig+1              ; shift everything left 2 spaces
                sta fpa0+fpa.sig
                lda fpa0+fpa.sig+2
                sta fpa0+fpa.sig+1
                lda fpa0+fpa.sig+3
                sta fpa0+fpa.sig+2
                lda fpa0+fpa.sig+4
                sta fpa0+fpa.sig+3
                lda fpa0+fpa.extra
                sta fpa0+fpa.sig+4
                lda fpa0+fpa.extra+1
                sta fpa0+fpa.extra
                lda fpa0+fpa.extra+2
                sta fpa0+fpa.extra+1
                lda fpa0+fpa.extra+3
                sta fpa0+fpa.extra+2
                lda fpa0+fpa.extra+4
                sta fpa0+fpa.extra+3
                clr fpa0+fpa.extra+4
                subb #2                         ; account for two digit positions
                cmpb #-10                       ; have we shifted the whole set of digits (assumes originally normalized)
                bgt fps_normalize0              ; brif not
fps_normalize4  clr fpa0+fpa.exp                ; set result to zero
                clr fpa0+fpa.sign
fps_normalize5  rts
fps_normalize1  bita #0xf0                      ; is the high digit zero?
                bne fps_normalize3              ; brif not
                lsl fpa0+fpa.extra              ; only need to shift one extra position here since there won't be more shifts
                rol fpa0+fpa.sig+4
                rol fpa0+fpa.sig+3
                rol fpa0+fpa.sig+2
                rol fpa0+fpa.sig+1
                rol fpa0+fpa.sig
                lsl fpa0+fpa.extra
                rol fpa0+fpa.sig+4
                rol fpa0+fpa.sig+3
                rol fpa0+fpa.sig+2
                rol fpa0+fpa.sig+1
                rol fpa0+fpa.sig
                lsl fpa0+fpa.extra
                rol fpa0+fpa.sig+4
                rol fpa0+fpa.sig+3
                rol fpa0+fpa.sig+2
                rol fpa0+fpa.sig+1
                rol fpa0+fpa.sig
                lsl fpa0+fpa.extra
                rol fpa0+fpa.sig+4
                rol fpa0+fpa.sig+3
                rol fpa0+fpa.sig+2
                rol fpa0+fpa.sig+1
                rol fpa0+fpa.sig
                decb                            ; account for digit shift
fps_normalize3  addb fpa0+fpa.exp               ; adjust exponent
                stb fpa0+fpa.exp
                ble fps_normalize4              ; brif we underflowed to zero
                ldb fpaextra                    ; get extra precision digit
                andb #0xf0                      ; keep only the highest extra precision digit
                cmpb #0x50                      ; do we need to round?
                blo fps_normalize5              ; brif not
                lda fpa0+fpa.sig+4              ; bump low digits
                adda #1
                daa
                sta fpa0+fpa.sig+4
                bcc fps_normalize5              ; brif no carry - done
                lda fpa0+fpa.sig+3              ; keep going until all significand bytes handled
                adda #1
                daa
                sta fpa0+fpa.sig+3
                bcc fps_normalize5
                lda fpa0+fpa.sig+2
                adda #1
                daa
                sta fpa0+fpa.sig+2
                bcc fps_normalize5
                lda fpa0+fpa.sig+1
                adda #1
                daa
                sta fpa0+fpa.sig+1
                bcc fps_normalize5
                lda fpa0+fpa.sig
                adda #1
                daa
                sta fpa0+fpa.sig
                bcc fps_normalize5
                lda #0x10                       ; overflowed the significand - shift a 1 digit in
                sta fpa0+fpa.sig
                inc fpa0+fpa.exp                ; and bump exponent to reflect that
                bpl fps_normalize5              ; brif we didn't overflow - return result
OVERROR         ldb #err_ov                     ; raise overflow
                jmp ERROR
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision BCD multiply (X) by (U)
;
; Calculate: (X) * (U) -> (Y)
;
; First, the routine calculates the new exponent and sign. The exponents simply add together while the sign is the
; exclusive OR of the two input signs. That gives negative for differing signs and positive for matching signs.
; Then the multiplication of the significands works by initializing an accumulator large enough to hold twice as
; many digits as each significand to zeroes. It also needs an extra byte to the left to handle overflow for each
; digit operation. It then works through each digit of the multiplier and uses that as a count to add the
; multiplicand to the *high digits* of the accumulator. Then, before moving to the next digit, the accumulator and
; mulltiplier are shifted right and the process is repeated. Once the process is done, the result required for the
; floating point calculation will be the high 10 digits of the accumulator with extra digits stored in the
; remaining bytes.
;
; Once the multiplication is complete, the significand is normalized. See above.
fps_mul         jsr fps_copyargs                ; fetch arguments
                lda fpa0+fpa.exp                ; first argumennt zero?
                bne fps_mul0                    ; brif not
                leax ,y                         ; return first argument (0)
                jmp fps_pack0
fps_mul0        lda fpa1+fpa.exp                ; second argument zero?
                bne fps_mul1                    ; brif not
                leax ,y                         ; return second argument (0)
                jmp fps_pack1
fps_mul1        lda fpa0+fpa.sign               ; calculate result sign
                eora fpa1+fpa.sign
                sta fpa0+fpa.sign               ; result will go into fpa0
                lda fpa0+fpa.exp                ; fetch exponent of multiplicand
                suba #64                        ; remove bias
                sta fpa0+fpa.exp                ; save it
                lda fpa1+fpa.exp                ; get exponent of multplier
                suba #64                        ; remove bias
                adda fpa0+fpa.exp               ; add exponents
                cmpa #63                        ; did we overflow upward?
                bgt OVERROR                     ; brif so
                cmpa #-63                       ; did we underflow?
                bge fps_mul3                    ; brif not
fps_mul2        clr fpa0+fpa.exp                ; return zero if we underflow
                leax ,y
                jmp fps_pack0
fps_mul3        adda #64                        ; add bias to resulting exponent
                sta fpa0+fpa.exp                ; save result exponent
                ldd zero                        ;* zero out result buffer high digits (extra digits + significand and
                std fpaextra                    ;* one extra byte to make sure lowest extra digit is zero)
                std fpaextra+2
                std fpaextra+4
                sta fpaextra+6
                ldb fpa1+fpa.sig+4              ;* do each byte of multiplier significand in sequence
                bsr fps_mul5                    ;* don't shift for first digit
                ldb fpa1+fpa.sig+3
                bsr fps_mul4
                ldb fpa1+fpa.sig+2
                bsr fps_mul4
                ldb fpa1+fpa.sig+1
                bsr fps_mul4
                ldb fpa1+fpa.sig
                bsr fps_mul4
                ldb fpaextra                    ; did we have an overflow on the last digit?
                beq fps_mul3a                   ; brif not
                bsr fps_mul9                    ; shift digits right one digit to move extra digit in
                inc fpa0+fpa.exp                ; bump exponent for result
                bmi OVERROR                     ; brif we went out of range (went > 127)
fps_mul3a       ldd fpaextra+1                  ; copy result into fpa0 significand (overflow byte will be zero)
                std fpa0+fpa.sig
                ldd fpaextra+3
                std fpa0+fpa.sig+2
                ldd fpaextra+5
                sta fpa0+fpa.sig+4
                ldd fpaextra+7
                std fpa0+fpa.sig+6
                ldd fpaextra+9
                std fpa0+fpa.sig+8
                jmp fps_normalize               ; go normalize the result
fps_mul4        bsr fps_mul9                    ; shift result right
fps_mul5        bitb #0x0f                      ; are we finished this digit?
                beq fps_mul6                    ; brif so
                bsr fps_mul11                   ; add multiplicand to accumulator
                decb                            ; adjust digit
                bra fps_mul5                    ; see if we're done yet
fps_mul6        bsr fps_mul9                    ; shift the result right
fps_mul7        bitb #0xf0                      ; done this digit?
                bne fps_mul8                    ; brif not
                rts                             ; we're done now so return
fps_mul8        bsr fps_mul11                   ; add multiplicand to accumulator
                subb #0x010                     ; adjust digit
                bra fps_mul7                    ; see if we're done yet
fps_mul9        lsr fpaextra                    ; shift result
                ror fpaextra+1
                ror fpaextra+2
                ror fpaextra+3
                ror fpaextra+4
                ror fpaextra+5
                ror fpaextra+6
                ror fpaextra+7
                ror fpaextra+8
                ror fpaextra+9
                ror fpaextra+10
                lsr fpaextra
                ror fpaextra+1
                ror fpaextra+2
                ror fpaextra+3
                ror fpaextra+4
                ror fpaextra+5
                ror fpaextra+6
                ror fpaextra+7
                ror fpaextra+8
                ror fpaextra+9
                ror fpaextra+10
                lsr fpaextra
                ror fpaextra+1
                ror fpaextra+2
                ror fpaextra+3
                ror fpaextra+4
                ror fpaextra+5
                ror fpaextra+6
                ror fpaextra+7
                ror fpaextra+8
                ror fpaextra+9
                ror fpaextra+10
                lsr fpaextra
                ror fpaextra+1
                ror fpaextra+2
                ror fpaextra+3
                ror fpaextra+4
                ror fpaextra+5
                ror fpaextra+6
                ror fpaextra+7
                ror fpaextra+8
                ror fpaextra+9
                ror fpaextra+10
                rts
fps_mul11       lda fpa0+fpa.sig+4              ; add digits starting at the bottom
                adda fpaextra+5
                daa
                sta fpaextra+5
                lda fpa0+fpa.sig+3
                adca fpaextra+4
                daa
                sta fpaextra+4
                lda fpa0+fpa.sig+2
                adca fpaextra+3
                daa
                sta fpaextra+3
                lda fpa0+fpa.sig+1
                adca fpaextra+2
                daa
                sta fpaextra+2
                lda fpa0+fpa.sig
                adca fpaextra+1
                daa
                sta fpaextra+1
                bcc fps_mul12
                inc fpaextra                    ; handle carry out
fps_mul12       rts 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision BCD divide
;
; Calculate (X) รท (U) -> (Y)
;
; First, calculate the result exponent by subtracting the decimal exponent of (U) from that of (X). If the result goes
; above the exponent range, raise overflow. If it goes below the exponent range, return zero.
;
; The calculate the result sign the same as for multiplcation. Also calculate the 10's complement of the divisor.
;
; First, we copy the divdend into the extra precision digits in fpa1 since they aren't needed for anything else
; here. Then we set the desired nonzero digit counter to 11 to indicate 10 significant digits plus a digit for
; rounding and set the flag indicating a nonzero digit was seen to zero to indicate it hasn't been seen. Also set
; the extra carry byte to zero. Then we run through the digit pair routine 10 times to set 20 digits of the
; significand and extra precision for fpa0. The digit pair routine just calls the digit loop twice while doing
; some bookkeeping.
;
; We also pre-cacluate the 9's complement of the divisor and store that in fpaextra to save time during the
; subtraction steps.
;
; For each digit, the process is as follows:
;
; 1. If the desired digit count is zero, do nothing and just return a zero for the quotient digit.
; 2. Initialize the quotient digit to 0
; 3. If the extra carry is nonzero, we know the result "goes" so go on to step 6
; 4. Compare the divisor to the residue; this is faster on average than doing the subtraction and then
;    reversing the last subtraction loop.
; 5. If the divisor goes *exactly* into the residue, bump the quotient digit, clear the desired digit count
;    and return the quotient digit; we have no need to do anything more
; 6. If the divisor does go, subtract the divisor from the residue, bump the digit, and go back to step 3
; 7. If the resulting digit is nonzero, set the "nonzero digit seen" flag to 1 (exactly one)
; 8. Subtract the nonzero digit flag from the desired digits flag so after enough digits, we stop doing the
;    division loops.
; 9. Return the quotient digit.
fps_div         jsr fps_copyargs                ; get arguments to the accumulators
                lda fpa1+fpa.exp                ; get divisor exponent
                bne fps_div0                    ; brif not zero
DIV0ERROR       ldb #err_div0                   ; raise division by zero
                jmp ERROR
fps_div0        suba #64                        ; remove bias
                sta fpa1+fpa.exp                ; save for later calculation
                lda fpa0+fpa.exp                ; get quotient exponent
                bne fps_div2                    ; brif not zero - we have to do work
fps_div1        clr fpa0+fpa.exp                ; make sure result is zero
                leax ,y                         ; return zero result
                jmp fps_pack0
fps_div2        suba #64                        ; remove bias
                suba fpa1+fpa.exp               ; subtract divisor exponent
                cmpa #64                        ; did we overflow upward?
                lbge OVERROR                    ; brif so
                cmpa #-64                       ; did we overflow downward (underflow)?
                ble fps_div1                    ; brif we underflow
                adda #64                        ; add back the bias
                sta fpa0+fpa.exp                ; set result exponent
                lda fpa0+fpa.sign               ; calculate result sign (XOR of argument signs)
                eora fpa1+fpa.sign
                sta fpa0+fpa.sign
                ldd fpa0+fpa.sig                ; initialize residue to dividend
                std fpa1+fpa.sig
                ldd fpa0+fpa.sig+2
                std fpa1+fpa.sig+2
                lda fpa0+fpa.sig+4
                sta fpa1+fpa.sig+4
                ldd #11                         ; initialize digit counter and nonzero seen flag
                std fpaextra+5
                sta fpaextra+7                  ; set ongoing extra carry digits to zero 
                ldd #0x9999                     ;* calculate 9's complement of divisor for later; we'll introduce a carry
                subd fpa1+fpa.sig               ;* to the first byte to complete the 10's complement's +1 to save doing
                std fpaextra                    ;* the extra work here
                ldd #0x9999
                subd fpa1+fpa.sig+2
                std fpaextra+2
                lda #0x99
                suba fpa1+fpa.sig+4
                sta fpaextra+4
                bsr fps_div3                    ; calculate the quotient byte by byte
                stb fpa0+fpa.sig
                bsr fps_div3
                stb fpa0+fpa.sig+1
                bsr fps_div3
                stb fpa0+fpa.sig+2
                bsr fps_div3
                stb fpa0+fpa.sig+3
                bsr fps_div3
                stb fpa0+fpa.sig+4
                bsr fps_div3
                stb fpa0+fpa.sig+5
                bsr fps_div3
                stb fpa0+fpa.sig+6
                bsr fps_div3
                stb fpa0+fpa.sig+7
                bsr fps_div3
                stb fpa0+fpa.sig+8
                bsr fps_div3
                stb fpa0+fpa.sig+9
                jmp fps_normalize               ; go normalize the result and return
fps_div3        bsr fps_div5                    ; do a digit
                lslb                            ; shift it over
                lslb
                lslb
                lslb
                sta fpaextra+8                  ; save it
                bsr fps_div5                    ; do next digit
                addb fpaextra+8                 ; combine the two quotient digits
fps_div4        rts
fps_div5        ldb fpaextra+6                  ; do we even need to do the division?
                beq fps_div4                    ; brif not - return 0
                clrb                            ; initialize quotient digit
fps_div6        lda fpaextra+7                  ; did we have a carry last time?
                bne fps_div7                    ; brif so - we know it "goes"
                lda fpa1+fpa.sig                ; is the divisor less than the dividend residue?
                cmpa fpa1+fpa.sig+5
                bhi fps_div8                    ; brif high byte is larger than residue
                lda fpa1+fpa.sig+1              ; and keep going for all 5 bytes
                cmpa fpa1+fpa.sig+6
                bhi fps_div8
                lda fpa1+fpa.sig+2
                cmpa fpa1+fpa.sig+7
                bhi fps_div8
                lda fpa1+fpa.sig+3
                cmpa fpa1+fpa.sig+8
                bhi fps_div8
                lda fpa1+fpa.sig+4
                cmpa fpa1+fpa.sig+9
                bhi fps_div8                    ; brif divisor is greater than the residue
                bne fps_div7                    ; brif it didn't go exactly
                incb                            ; bump quotient for this "go"
                clr fpaextra+6                  ; indicate no more digits needed
                rts
fps_div7        coma                            ; set carry to complete 10's complement of divisor
                lda fpa1+fpa.sig+9              ; do the "subtraction"
                adca fpaextra+4
                daa
                sta fpa1+fpa.sig+9
                lda fpa1+fpa.sig+8
                adca fpaextra+3
                daa
                sta fpa1+fpa.sig+8
                lda fpa1+fpa.sig+7
                adca fpaextra+2
                daa
                sta fpa1+fpa.sig+7
                lda fpa1+fpa.sig+6
                adca fpaextra+1
                daa
                sta fpa1+fpa.sig+6
                lda fpa1+fpa.sig+5
                adca fpaextra
                daa
                sta fpa1+fpa.sig+5
                lda fpaextra+7                  ; and handle the carry byte
                adca #0x99
                sta fpaextra+7
                incb                            ; bump digit count
                bra fps_div6                    ; go see if we need another subtraction
fps_div8        tstb                            ; nonzero digit?
                beq fps_div9                    ; brif not
                lda #1                          ; set nonzero flag
                sta fpaextra+5
fps_div9        lda fpaextra+6                  ; adjust digit count
                suba fpaextra+5
                sta fpaextra+6
                lsl fpa1+fpa.sig+9              ; shift residue one decimal digit
                rol fpa1+fpa.sig+8
                rol fpa1+fpa.sig+7
                rol fpa1+fpa.sig+6
                rol fpa1+fpa.sig+5
                rol fpaextra+7
                lsl fpa1+fpa.sig+9
                rol fpa1+fpa.sig+8
                rol fpa1+fpa.sig+7
                rol fpa1+fpa.sig+6
                rol fpa1+fpa.sig+5
                rol fpaextra+7
                lsl fpa1+fpa.sig+9
                rol fpa1+fpa.sig+8
                rol fpa1+fpa.sig+7
                rol fpa1+fpa.sig+6
                rol fpa1+fpa.sig+5
                rol fpaextra+7
                lsl fpa1+fpa.sig+9
                rol fpa1+fpa.sig+8
                rol fpa1+fpa.sig+7
                rol fpa1+fpa.sig+6
                rol fpa1+fpa.sig+5
                rol fpaextra+7
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert a floating point number in val0 to a string in strbuff

; The maximum size of a string generated here is 19 bytes representing 10 significant digits, 5 leading zeroes after the
; decimal point, the decimal point, a leading zero, a leading sign, and a trailing NUL.
fps_toascii     ldu #strbuff+1                  ; point to output buffer
                lda #0x20                       ; set sign to "blank"
                sta -1,u
                clr fpaextra+1                  ; set decimal point offset; zero means none
                clr fpaextra+3                  ; disable "E" notation
                ldx #val0+val.value             ; unpack value to fpa0 so we can mess with it
                jsr fps_unpack0
                lda fpa0+fpa.exp                ; check exponent
                bne fps_toascii0                ; brif not zero
                lda #'0                         ; make the number zero and print it
                sta ,u
                clr 1,u
                rts
fps_toascii0    lda fpa0+fpa.sign               ; negative?
                bpl fps_toascii1                ; brif not
                lda #'-                         ; negative sign
                sta -1,u
fps_toascii1    lda #10                         ; assume 10 significant digits exist
                ldx #fpa0+fpa.sig+5             ; point to significand
fps_toascii2    ldb ,-x                         ; get digit pair
                bitb #0x0f                      ; is right digit zero?
                bne fps_toascii3                ; brif not - we've counted all the significant digits
                deca                            ; trailing zero - uncount it
                bitb #0xf0                      ; is left digit zero?
                bne fps_toascii3                ; brif not - we've counted all the sigificant digits
                deca                            ; trailing zero - uncount it
                cmpx #fpa0+fpa.sig              ; done all significant digits?
                bhi fps_toascii2                ; brif not
fps_toascii3    sta fpaextra                    ; save significant digits that exist
                lda fpa0+fpa.exp                ; fetch exponent
                suba #64                        ; remove bias
                sta fpa0+fpa.exp                ; save for later
                bpl fps_toascii14               ; brif no leading zeroes
                suba fpaextra                   ; get number of significant digits plus decimal count
                cmpa #-15                       ; do we end up with too many digits (leading 0 + significant digits)
                blt fps_toascii15               ; brif too many - do scientific notation
                ldd #'0*256+'.                  ; put a leading "0."
                std ,u++
                ldb fpa0+fpa.exp                ; get exponent back, which is the number of leading digits (negative)
                bra fps_toascii4a               ; go add leading zeroes if needed
fps_toascii4    sta ,u+                         ; put a zero
fps_toascii4a   incb                            ; done all 
                bne fps_toascii4                ; brif not
fps_toascii5    ldx #fpa0+fpa.sig               ; point to significand
                ldd #0xf000                     ; set digit mask and counter
                sta fpaextra+2
fps_toascii6    lda ,x                          ; get digit pair
                anda fpaextra+2                 ; keep the desired digit
                bita #0xf0                      ; is it the left hand digit?
                beq fps_toascii7                ; brif not or digit is 0
                lsra                            ; right justify digit
                lsra
                lsra
                lsra
fps_toascii7    adda #0x30                      ; turn it into ascii
                sta ,u+                         ; stuff it in the output
                dec fpaextra                    ; done all digits?
                beq fps_toascii9                ; brif so
                dec fpaextra+1                  ; are we at the decimal point?
                bne fps_toascii8                ; brif not
                lda #'.                         ; put a decimal
                sta ,u+
fps_toascii8    com fpaextra+2                  ; flip digit mask
                bpl fps_toascii6                ; handle another digit
                leax 1,x                        ; move to next digit byte
                bra fps_toascii6                ; now go handle next digit
fps_toascii9    ldb fpaextra+3                  ; get decimal exponent to display
                beq fps_toascii13
                lda #'E                         ; output "E"
                sta ,u+
                tstb                            ; negative?
                bpl fps_toascii10               ; brif not
                comb                            ; positivize it, but also subtract 1?
                lda #'-                         ; put a minus for the exponent
                sta ,u+
fps_toascii10   cmpb #10                        ; do we have two digits for exponent?
                blo fps_toascii12               ; brif not
                lda #0x2f                       ; initialize left digit
fps_toascii11   inca                            ; bump digit
                subb #10                        ; are we at the right digit?
                bpl fps_toascii11               ; brif not yet
                addb #10                        ; undo extra subtraction
                sta ,u+                         ; save left digit
fps_toascii12   addb #0x30                      ; turn right digit to ascii
                stb ,u+                         ; save right digit
fps_toascii13   clr ,u                          ; put a NUL at the end of the result
                rts
fps_toascii14   cmpa #9                         ; is it in range for number of significant digits?
                bgt fps_toascii15               ; brif not - do scientific notation
                inca                            ; exponent 0 has decimal point after first digit
                sta fpaextra+1                  ; save decimal point location
                cmpa fpaextra                   ; is it more than the number of significant digits?
                ble fps_toascii5                ; brif not - just convert the significand
                sta fpaextra                    ; make sure we include the pre-decimal zeroes
                bra fps_toascii5                ; go convert the significand
fps_toascii15   ldb #1                          ; put decimal after the first digit
                stb fpaextra+1
                sta fpaextra+3                  ; enable the "E" notation with the correct exponent
                bra fps_toascii5                ; actually convert the number
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert a 32 bit integer at (X) to BCD floating point. This requires converting the 32 bit binary value to BCD which
; can be done using the double/dabble algorithm. fpa0 and fpaextra is used as temporary storage. X must point to a value.
; Enter at fps_fromuint to treat the 32 value as unsigned.
fps_fromint     ldb val.int,x                   ; is the integer negative?
                bpl fps_fromuint                ; brif not
                sex                             ; set sign to negative
                sta fpa0+fpa.sign
                ldd zero                        ; negate the value and store it to temporary location
                subd val.int+2,x
                std fpa0+fpa.extra+2
                ldd zero
                sbcb val.int+1,x
                sbca val.int,x
                std fpa0+fpa.extra
                bra fps_fromint0                ; go finish the conversion
fps_fromuint    clr fpa0+fpa.sign               ; set sign to positive
                ldd val.int+2,x                 ; copy value to temporary accumulator
                std fpa0+fpa.extra+2
                ldd val.int,x
                std fpa0+fpa.extra
fps_fromint0    ldd zero                        ; zero out destination
                std fpa0+fpa.sig
                std fpa0+fpa.sig+2
                sta fpa0+fpa.sig+4
                ldd #0x4920                     ; set exponent for decimal right of 10 digit significand
                sta fpa0+fpa.exp                ; save exponent
                stb fpa0+fpa.extra+4            ; save counter
                bra fps_fromint2                ; skip digit check on the first iteration since none need adjustment
fps_fromint1    ldu #fpa0+fpa.sig               ; point to significand
                bsr fps_fromint3                ; do adjustments, 5 bytes worth of digits
                bsr fps_fromint3
                bsr fps_fromint3
                bsr fps_fromint3
                bsr fps_fromint3
fps_fromint2    lsl fpa0+fpa.extra+3            ; shift left
                rol fpa0+fpa.extra+2
                rol fpa0+fpa.extra+1
                rol fpa0+fpa.extra
                rol fpa0+fpa.sig+4
                rol fpa0+fpa.sig+3
                rol fpa0+fpa.sig+2
                rol fpa0+fpa.sig+1
                rol fpa0+fpa.sig
                dec fpa0+fpa.extra+4            ; done all digits?
                bne fps_fromint1                ; brif not
                lda #valtype_float              ; set result type to floating point
                sta val.type,x
                jsr fps_normalizea0             ; go normalize fpa0
                jmp fps_pack0                   ; pack the result to the original accumulator
; This is the digit check for the double-dabble algorith. The left digit check is only concerned if the left digit
; is 5 or higher. Since we can make that comparison without masking the bits, we don't bother. For the right digit,
; however, we do have to mask the upper digit off. It saves and ANDA and a subsequent indexed reload.
fps_fromint3    clrb                            ; set adjustment to none
                lda ,u                          ; get digits
                cmpa #0x50                      ; is digit 5 or higher?
                blo fps_fromint4                ; brif not
                ldb #0x30                       ; set adjustment factor for first digit
fps_fromint4    anda #0x0f                      ; keep low digit
                cmpa #5                         ; is it 5 or higher?
                blo fps_fromint5                ; brif not
                orb #0x03                       ; set adjustment for second digit
fps_fromint5    addb ,u                         ; add adjustment to digits
                stb ,u+                         ; save new digit values and move on
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert the floating point value in the value accumulator at (X) to a 32 bit signed integer. Will trigger an overflow
; if the magnitude is too large and it will truncate (round toward zero) any fractional value.
fps_toint       jsr fps_unpack0                 ; unpack the floating point value to fpa0
                bsr fps_tointa0                 ; convert value to integer in fpa0 significand
                lda #valtype_int                ; set result to integer
                sta val.type,x
                ldd fpa0+fpa.sig+1
                std val.int,x
                ldd fpa0+fpa.sig+3
                std val.int+2,x
                rts
fps_tointa0     ldb fpa0+fpa.exp                ; compare exponent (is it too big?)
                cmpb #64+9
                bne fps_toint0
                ldd fpa0+fpa.sig                ; compare top of significand
                cmpd #0x2147
                bne fps_toint0
                ldd fpa0+fpa.sig+2              ; compare middle of significand
                cmpd #0x4836
                bne fps_toint0
                ldd fpa0+fpa.sig+4              ; compare bottom of significand plus extra digits
                cmpd #0x4800
fps_toint0      blt fps_toint1                  ; brif it fits positive or negative
                lbgt OVERROR                    ; brif it doesn't fit negative either
                ldb fpa0+fpa.sign               ; do we want negative?
                lbpl OVERROR                    ; brif not - it doesn't fit
; Enter here if we already know that the value will fit
fps_toint1      ldb #64+9                       ; biased exponent needed for decimal point to the right of significand
                subb fpa0+fpa.exp               ; number of digit shifts needed to denormalize
                beq fps_toint3                  ; brif already denormalized
                lslb                            ; do 4 shifts per digit; we're going to simply lose extra digits
                lslb
fps_toint2      lsr fpa0+fpa.sig                ; shift a digit right
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                decb                            ; done all shifts?
                bne fps_toint2
; Now convert BCD digit sequence in fpa0 significand to binary value in the low 32 bits of the significand
fps_toint3      ldb #32                         ; 32 bit shifts needed for whole significand
                stb fpa0+fpa.exp                ; use extra precision byte as counter
fps_toint4      lsr fpa0+fpa.sig                ; shift a bit into the binary result
                ror fpa0+fpa.sig+1
                ror fpa0+fpa.sig+2
                ror fpa0+fpa.sig+3
                ror fpa0+fpa.sig+4
                ror fpa0+fpa.extra
                ror fpa0+fpa.extra+1
                ror fpa0+fpa.extra+2
                ror fpa0+fpa.extra+3
                ldu #fpa0+fpa.sig               ; point to BCD digits
fps_toint5      lda ,u                          ; get byte to check
                beq fps_toint8                  ; short circuit check if digits are 0
                anda #0x88                      ; keep bit 3 of each digit; adjustment on >= 8
                lsra                            ; shift over and mulply by adjustment factor
                lsra
                lsra
                ldb #3                          ; the adjustment is a subtraction by 3
                mul
                negb                            ; now subtract from digit
                addb ,u
                stb ,u+
fps_toint6      cmpu #fpa0+fpa.sig+5            ; done all 5 bytes?
                blo fps_toint5                  ; brif not
                dec fpa0+fpa.exp                ; done all bits?
                bne fps_toint4                  ; brif not
                ldb fpa0+fpa.sign               ; do we want negative?
                bpl fps_toint7                  ; brif not
                ldd zero                        ; negate the value through subtracting from 0
                subd fpa0+fpa.extra+2
                std fpa0+fpa.extra+2
                ldd zero
                sbcb fpa0+fpa.extra+1
                sbca fpa0+fpa.extra
                std fpa0+fpa.extra
fps_toint7      ldd fpa0+fpa.extra             ; put result in the significand
                std fpa0+fpa.sig+1
                ldd fpa0+fpa.extra+2
                std fpa0+fpa.sig+3
                rts
fps_toint8      leau 1,u                        ; move to next digit
                bra fps_toint6                  ; go back to mainline
                *pragmapop list