view src/fps.s @ 82:9a4e2364a966

Fix logic in int32_mul and overflow integer multiply to floating point It seems logical to allow integer multiplication to overflow to floating point. If this turns out to be unfortunate, it can be changed. In this update, 32 bit integer multiplication will overflow to floating point.
author William Astle <lost@l-w.ca>
date Sat, 07 Oct 2023 13:39:25 -0600
parents bb50ac9fdf37
children a492441bfc56
line wrap: on
line source

                *pragmapush list
                *pragma list
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision floating point arithmetic package
;
; Floating point values are stored in 6 byte packets (single precision) organized as follows:
; Offset        Length          Contents
; 0             1               8 bit binary exponent with a bias of 128; 0 means the number is 0
; 1             4               32 bit significand
; 5             1               sign flag; zero for positive, 0xff for negative
;
; Binary operateions take pointers to their arguments in X and U. Y contains the result location. In all cases, it is
; safe to specify either one of the arguments as the result location. Unary operations take their operand pointer in
; X and their result pointer in Y. In all cases, there is an in place version of the unary operation that operates on
; its input and only needs the single pointer in X.
;
; On the topic of optimization: these routines copy their operands to accumulators in the direct page. This saves one
; cycle per instruction which has a nonzero offset. In addition, this means that the input operands can remain
; unmodified by the actual operations. For instance, addition requires denormalizing one operand which would otherwise
; have to be done in place.
;
; NOTE: the input pointers X and U may be clobbered.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert 32 bit unsigned integer at (X) to single precision floating point at (U)
fps_fromuint32  clr fpa0+fps.sign               ; set result sign to positive
                bra fps_fromint32a              ; go do conversion
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert 32 bit signed integer at (X) to single precision floating point at value accumulator in (Y)
fps_fromint32   ldb ,x                          ; set sign based on signed integer
                sex
                sta fpa0+fps.sign               ; set result sign to the two's complement sign
                bpl fps_fromint32a              ; brif positive - no need to mess with bits
                jsr int32_neg                   ; make the bits positive
fps_fromint32a  ldb valtype_float               ; set output value type to floating point
                stb val.type,y
                ldd ,x                          ; copy integer bits to fpa0
                std fpa0+fps.sig
                ldd 2,x
                std fpa0+fps.sig+2
                ldb #0xa0                       ; put binary point to the right of the significand
                stb fpa0+fps.exp
                clr fpa0extra                   ; clear extra precision
                jmp fps_add10                   ; go normalize the result and return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert 64 bit unsigned value at (X) to single precision floating point in value accumulator at (Y)
;
; Cases:
; * byte 0 is first nonzero - exponent at 64 bits right, use upper 0 to 4
; * byte 1 is first nonzero - exponent at 56 bits right, use bytes 1 to 5
; * byte 2 is first nonzero - exponent at 48 bits right, use bytes 2 to 6
; * otherwise - exponent at 40 bits right, use bytes 3 to 7
fps_fromuint64  clra                            ; set sign to positive
fps_fromuint64s sta fpa0+fps.sign               ; save sign of result
                ldb #0xc0                       ; exponent if binary point is 64 bits to the right
                lda ,x+                         ; is the first byte zero?
                bne fps_fromuint64a             ; brif not
                subb #8                         ; lose a byte off exponent
                lda ,x+                         ; is the second byte zero?
                bne fps_fromuint64a             ; brif not
                subb #8                         ; lose another byte off exponent
                lda ,x+                         ; is third byte zero?
                bne fps_fromuint64a             ; brif not
                subb #8                         ; lose another byte
                lda ,x+                         ; get first byte to copy
fps_fromuint64a stb fpa0+fps.exp                ; save exponent
                sta fpa0+fps.sig                ; save high byte of significand
                ldd ,x                          ; copy next two bytes to significand
                std fpa0+fps.sig+1
                ldd 2,x                         ; and the final byte and extra precision
                sta fpa0+fps.sig+3
                stb fpa0extra
                jmp fps_add10                   ; go normalize the result and return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Unary negation - negate (X) to (Y)
fps_neg         ldd 2,x                         ; copy to output and keep exponent in A
                std 2,y
                ldd 4,x
                std 4,y
                ldd ,x
                std ,y
                tsta                            ; is the number zero?
                beq fps_neg0                    ; brif so - do nothing
                com fps.sign,y                  ; flip the sign
fps_neg0        rts
fps_negx        lda fps.exp,x                   ; is the number zero?
                beq fps_negx0                   ; brif so - do nothing
                com fps.sign,x                  ; flip the sign
fps_negx0       rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copy (X) to fpa0 and (U) to fpa1
fps_copyinputs  ldd ,x                          ; copy (X) to fpa0
                std fpa0
                ldd 2,x
                std fpa0+2
                ldd 4,x
                std fpa0+4
                ldd ,u                          ; copy (U) to fpa0
                std fpa1
                ldd 2,u
                std fpa1+2
                ldd 4,u
                std fpa1+4
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Subtraction (X) - (U) to (Y)
fps_sub         bsr fps_copyinputs              ; copy input operands
                com fpa1+fps.sign               ; negate the subtrahend (don't need to handle zero here)
                bra fps_add0                    ; go handle it as addition
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Addition (X) + (U) to (Y)
fps_add         bsr fps_copyinputs              ; copy input operands
fps_add0        lda fpa1+fps.exp                ; is the second operand 0?
                bne fps_add1                    ; brif not
                ldd fpa0                        ; copy first operand to output
                std ,y
                ldd fpa0+2
                std 2,y
                ldd fpa0+4
                std 4,y
                rts
fps_add1        lda fpa0+fps.exp                ; get exponent of first operand
                bne fps_add2                    ; brif not zero
                ldd fpa1                        ; copy second operand to output
                std ,y
                ldd fpa1+2
                std 2,y
                ldd fpa1+4
                std 4,y
                rts
fps_add2        clr fpa0extra                   ; clear extra precision bits
                lda fpa0+fps.exp                ; get first operand exponent
                suba fpa1+fps.exp               ; get difference in exponents
                beq fps_add8                    ; brif we don't need to denormalize - they're the same
                blo fps_add3                    ; brif we need to denormalize the first operand
                ldx #fpa1                       ; point to second operand to denormalize
                bra fps_add4                    ; go denormalize
fps_add3        ldb fpa1+fps.exp                ; get exponent of second operand (result exponent)
                stb fpa0+fps.exp                ; set result exponent
                ldx #fpa0                       ; point to first operand to denormalize
                nega                            ; get number of bits to shift as positive number
fps_add4        suba #8                         ; is there 8 bits left?
                blo fps_add5                    ; brif not
                ldb fps.sig+3,x                 ; shift significand 8 bits right
                stb fpa0extra                   ; save extra precision bits
                ldb fps.sig+2,x
                stb fps.sig+3,x
                ldb fps.sig+1,x
                stb fps.sig+2,x
                ldb fps.sig,x
                stb fps.sig+1,x
                clr fps.sig,x
                bra fps_add4                    ; see if we have another byte to shift
fps_add5        adda #8                         ; adjust for extra subtract above
                bra fps_add7                    ; do the bit shifting
fps_add6        lsr fps.sig,x                   ; shift one bit right
                ror fps.sig+1,x
                ror fps.sig+2,x
                ror fps.sig+3,x
                ror fpa0extra
                deca                            ; done all of the bits?
fps_add7        bne fps_add6                    ; brif not
fps_add8        lda fpa0+fps.sign               ; compare the signs of the operands
                eora fpa1+fps.sign              ; set A if signs differ
                bne fps_add9                    ; brif they differ - do subtraction
                ldd fpa0+fps.sig+2              ; add low word of significands
                addd fpa1+fps.sig+2
                std fpa0+fps.sig+2
                ldd fpa0+fps.sig                ; and the high word
                adcb fpa1+fps.sig+1
                adca fpa1+fps.sig
                std fpa0+fps.sig
                bcc fps_add9                    ; brif no carry
                ror fpa0+fps.sig                ; shift carry into significand
                ror fpa0+fps.sig+1
                ror fpa0+fps.sig+2
                ror fpa0+fps.sig+3
                ror fpa0extra                   ; and the extra bits (for rounding)
                inc fpa0+fps.exp                ; bump exponent to account for bit shift
                bne fps_add14                   ; go check for round-off if not overflow
OVERROR         ldb #err_ov                     ; raise overflow
                jmp ERROR 
fps_add9        ldd fpa0+fps.sig+2              ; subtract low word
                subd fpa1+fps.sig+2
                std fpa0+fps.sig+2
                ldd fpa0+fps.sig                ; and now the high word
                sbcb fpa1+fps.sig+1
                sbca fpa1+fps.sig
                std fpa0+fps.sig
                bcc fps_add10                   ; brif we didn't carry
                com fpa0+fps.sign               ; flip result sign - other number was bigger
                com fpa0+fps.sig+3              ; negate two's complement result to be just the magnitude
                com fpa0+fps.sig+2
                com fpa0+fps.sig+1
                com fpa0+fps.sig
                ldx fpa0+fps.sig+2              ; add 1 to complete negation
                leax 1,x
                stx fpa0+fps.sig+2
                bne fps_add10                   ; brif carry doesn't propagate
                ldx fpa0+fps.sig                ; propagate carry
                leax 1,x
                stx fpa0+fps.sig                ; NOTE: this cannot carry because magnitude got smaller
fps_add10       clra                            ; initialize exponent offset
fps_add11       ldb fpa0+fps.sig                ; do we have nonzero bits in high byte of significand?
                bne fps_add13                   ; brif so
                ldb fpa0+fps.sig+1              ; shift left 8 bits
                stb fpa0+fps.sig
                ldb fpa0+fps.sig+2
                stb fpa0+fps.sig+1
                ldb fpa0+fps.sig+3
                stb fpa0+fps.sig+2
                ldb fpa0extra                   ; and extra precision bits
                stb fpa0+fps.sig+3
                clr fpa0extra
                addb #8                         ; account for number of bits shifted
                cmpb #40                        ; done 40 bits?
                blo fps_add11                   ; brif not - see if we have more bits to shift
                clr fpa0+fps.exp                ; number underflowed to zero - set it so
                clr fpa0+fps.sign
                clr fpa0+fps.sig
                clr fpa0+fps.sig+1
                clr fpa0+fps.sig+2
                clr fpa0+fps.sig+3
                bra fps_add16                   ; go return result
fps_add12       inca                            ; account for a bit shift
                lsl fpa0extra                   ; shift significand and extra bits left
                rol fpa0+fps.sig+3
                rol fpa0+fps.sig+2
                rol fpa0+fps.sig+1
                rol fpa0+fps.sig
fps_add13       bpl fps_add12                   ; brif we haven't normalized yet
fps_add14       ldb fpa0extra                   ; do we need to round?
                bpl fps_add16                   ; brif not
                ldx fpa0+fps.sig+2              ; add one to significand
                leax 1,x
                stx fpa0+fps.sig+2
                bne fps_add16                   ; brif no carry
                ldx fpa0+fps.sig                ; bump the upper word of significand
                leax 1,x
                stx fpa0+fps.sig
                bne fps_add16
fps_add15       inc fpa0+fps.exp                ; bump exponent
                beq OVERROR                     ; brif it overflowed
                lda #0x80                       ; set high bit of significand (rest is zero)
                sta fpa0+fps.sig
fps_add16       ldd fpa0                        ; copy result to destination
                std ,y
                ldd fpa0+2
                std 2,y
                ldd fpa0+4
                std 4,y
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision multiplication (X) × (U) to (Y)
fps_mul         lda fps.exp,x                   ; is first operand zero?
                beq fps_mul0                    ; brif so - return zero
                lda fps.exp,u                   ; is second operand zero?
                bne fps_mul1                    ; brif not - have to do the multiply
fps_mul0        ldd zero                        ; return zero result
                std ,y
                std 2,y
                std 4,y
                rts
fps_mul1        jsr fps_copyinputs              ; copy input operands
                lda fpa0+fps.sign               ; calculate sign of result - xor of the two signs
                eora fpa1+fps.sign
                sta fpa0+fps.sign               ; save result sign
                lda fpa0+fps.exp                ; get exponent of first value
                adda fpa1+fps.exp               ; calculate new exponent; also cancels the bias
                rora                            ; set V if C and N differ
                rola
                bvc fps_mul3                    ; brif maybe an overflow
                adda #0x80                      ; add back the bias
                sta fpa0+fps.sign               ; save new sign
                beq fps_mul0                    ; brif we underflowed - zero out sign
; This does a shift-and-add multiplication algorithm. This is slower than an equivalent using MUL but is smaller.
; The high order bytes will be left in fpa0 with the low order bytes in fpa0extra and fpa0extra[1-3]. The low order
; bytes are kept for the odd case where extra precision is useful. Uses fpa0extra[4-7] as temporaries.
fps_mul2        ldd zero                        ; zero out temporary bytes
                std fpa0extra4
                std fpa0extra6
                ldb fpa0+fps.sig+3              ; multiply by low byte of fpa0
                bsr fps_mul4
                ldb fpa0extra                   ; move low bites
                stb fpa0extra3
                ldb fpa0+fps.sig+2              ; multiply by next higher byte
                bsr fps_mul4
                ldb fpa0extra                   ; move low bits
                stb fpa0extra2
                ldb fpa0+fps.sig+1              ; and again for the next higher byte
                bsr fps_mul4
                ldb fpa0extra
                stb fpa0extra1
                ldb fpa0+fps.sig                ; and the high order byte
                bsr fps_mul4
                ldd fpa0extra4                  ; copy high order product bits to result
                std fpa0+fps.sig
                ldd fpa0extra6
                std fpa0+fps.sig+2
                jmp fps_add10                   ; go normalize the result and return
fps_mul3        bpl fps_mul0                    ; brif we underflowed - return 0
                jmp OVERROR                     ; raise overflow
fps_mul4        bne fps_mul5                    ; brif not multiply by zero
                lda fpa0+fps.sig+3              ; shift 8 bits right
                sta fpa0extra
                lda fpa0+fps.sig+2
                sta fpa0+fps.sig+3
                lda fpa0+fps.sig+1
                sta fpa0+fps.sig+2
                lda fpa0+fps.sig
                sta fpa0+fps.sig+1
                clr fpa0+fps.sig
fps_mul8        rts
fps_mul5        coma                            ; set C
fps_mul6        lda fpa0extra4                  ; get high byte of result bytes
                rorb                            ; is multiplier bit set?
                beq fps_mul8                    ; brif 8 shifts done (C set above makes sure of that)
                bcc fps_mul7                    ; brif bit not set - don't do addition
                lda fpa0extra7                  ; add multiplier (fpa1) to result
                adda fpa1+fps.sig+3
                sta fpa0extra7
                lda fpa0extra6
                adca fpa1+fps.sig+2
                sta fpa0extra6
                lda fpa0extra5
                adca fpa1+fps.sig+1
                sta fpa0extra5
                lda fpa0extra4
                adca fpa1+fps.sig
fps_mul7        rora                            ; rotate carry in (from add or 0)
                sta fpa0extra4
                ror fpa0extra5
                ror fpa0extra6
                ror fpa0extra7
                ror fpa0extra                   ; and into the extra precision bits
                clra                            ; clear carry - so shift above will terminate
                bra fps_mul6                    ; go do another bit
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision division (X) ÷ (U) -> (Y)
;
; This is basically the same algorithm used in the Color Basic ROM
fps_div         lda fps.exp,u                   ; is divisor 0?
                bne fps_div0
DIV0ERROR       ldb #err_div0                   ; raise division by zero
                jmp ERROR
fps_div0        lda fps.exp,x                   ; is dividend 0?
                lbeq fps_mul0                   ; brif so - return 0
                jsr fps_copyinputs              ; copy input values
                lda fpa0+fps.sign               ; calculate result sign - xor of operand signs
                eora fpa1+fps.sign
                sta fpa0+fps.sign               ; save result sign
                lda fpa1+fps.exp                ; get divisor exponent
                nega                            ; negate for subtraction
                adda fpa0+fps.exp               ; subtract it from dividend exponent
                rora                            ; set V if C and N differ
                rola
                bvc fps_mul3                    ; brif overflow or underflow
                adda #0x80                      ; add back the bias
                sta fpa0+fps.sign               ; save new sign
                lbeq fps_mul0                   ; brif we underflowed - zero out sign
                inca                            ; bump exponent - why?? related to the bias stuff above?
                lbeq OVERROR                    ; brif it overflows
                sta fpa0+fps.exp                ; save result exponent
                ldx #fpa0extra4                 ; point to temporary storage bytes for quotient
                ldb #4                          ; counter for 4 significand bytes and one extra partial byte
                stb fpa0extra1                  ; save counter since we need both accumulators
                ldb #1                          ; shift counter flag and quotient byte
fps_div1        lda fpa1+fps.sig                ; set C if fpa0 significand <= fpa1 significand
                cmpa fpa0+fps.sig
                bne fps_div2
                lda fpa1+fps.sig+1
                cmpa fpa0+fps.sig+1
                bne fps_div2
                lda fpa1+fps.sig+2
                cmpa fpa0+fps.sig+2
                bne fps_div2
                lda fpa1+fps.sig+3
                cmpa fpa0+fps.sig+3
                bne fps_div2
                coma                            ; set C if values are the same
fps_div2        tfr cc,a                        ; save C for later, C clear if fpa1 > fpa0
                rolb                            ; shift carry into quotient
                bcc fps_div3                    ; brif carry clear - we haven't done 8 bits yet
                stb ,x+                         ; save quotient byte after a full set of bits
                dec fpa0extra1                  ; have we done all the bytes?
                bmi fps_div7                    ; brif all bytes plus extra precision - done all
                beq fps_div6                    ; brif all main sigificand bytes - do a couple extra bits
                ldb #1                          ; reset the bit counter flag
fps_div3        tfr cc,a                        ; get back original carry from compare
                bcs fps_div5                    ; brif it "went"
fps_div4        lsl fpa0+fps.sig+3              ; shift dividend left
                rol fpa0+fps.sig+2
                rol fpa0+fps.sig+1
                rol fpa0+fps.sig
                bcs fps_div2                    ; brif it carries - next bit "goes"
                bmi fps_div1                    ; check magnitudes of next bit
                bra fps_div2                    ; carry clear - check another bit
fps_div5        lda fpa0+fps.sig+3              ; subtract divisor from dividend bits
                suba fpa1+fps.sig+3
                sta fpa0+fps.sig+3
                lda fpa0+fps.sig+2
                sbca fpa1+fps.sig+2
                sta fpa0+fps.sig+2
                lda fpa0+fps.sig+1
                sbca fpa1+fps.sig+1
                sta fpa0+fps.sig+1
                lda fpa0+fps.sig
                sbca fpa1+fps.sig
                sta fpa0+fps.sig
                bra fps_div4                    ; now do the bit shift to line things up
fps_div6        ldb #0x40                       ; only do two bits of extra precision byte
                bra fps_div2                    ; go handle these bitsd
fps_div7        rorb                            ; get extra quotient bits to bit 7,6 and bit 5 set
                rorb
                rorb
                stb fpa0extra                   ; save extra precision bits
                ldd fpa0extra4                  ; copy result bits to fpa0
                std fpa0+fps.sig
                ldd fpa0extra6
                std fpa0+fps.sig+2
                jmp fps_add10                   ; go normalize the result
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Pack single precision number at (X) to (U)
fps_pack        lda fps.sign,x                  ; get sign of number (will be 0 for 0)
                ora #0x7f                       ; make sure low bits are set for merging
                anda fps.sig,x                  ; merge with high bits of significand
                ldb fps.sig+1,x                 ; get upper mid bits of significand
                std fps.sig,u
                ldd fps.sig+2,x
                std fps.sig+2,u
                lda fps.exp,x
                sta fps.exp,u
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Unpack single precision number at (X) to (U)
fps_unpack      lda fps.exp,x                   ; get exponent of value
                beq fps_unpack0                 ; brif value is zero
                sta fps.exp,u
                ldb fps.sig,x                   ; get high byte of significand
                sex                             ; make sign value in A
                sta fps.sign,u                  ; set sign in result
                ldd fps.sig,x                   ; get high word of sifnificand
                ora #0x80                       ; make sure high bit is set
                std fps.sig,u                   ; save high word in result
                ldd fps.sig+2,x                 ; copy middle bytes of significand
                std fps.sig+2,u
                rts
fps_unpack0     sta fps.exp,u                   ; zero out destination
                sta fps.sig,u
                sta fps.sig+1,u
                sta fps.sig+2,u
                sta fps.sig+3,u
                sta fps.sign,u
                rts
                *pragmapop list