Mercurial > hg > index.cgi

                *pragmapush list
                *pragma list
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision floating point arithmetic package
;
; Floating point values are stored in 6 byte packets (single precision) organized as follows:
; Offset        Length          Contents
; 0             1               8 bit binary exponent with a bias of 128; 0 means the number is 0
; 1             4               32 bit significand
; 5             1               sign flag; zero for positive, 0xff for negative
;
; Binary operateions take pointers to their arguments in X and U. Y contains the result location. In all cases, it is
; safe to specify either one of the arguments as the result location. Unary operations take their operand pointer in
; X and their result pointer in Y. In all cases, there is an in place version of the unary operation that operates on
; its input and only needs the single pointer in X.
;
; On the topic of optimization: these routines copy their operands to accumulators in the direct page. This saves one
; cycle per instruction which has a nonzero offset. In addition, this means that the input operands can remain
; unmodified by the actual operations. For instance, addition requires denormalizing one operand which would otherwise
; have to be done in place.
;
; NOTE: the input pointers X and U may be clobbered.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert 32 bit unsigned integer at (X) to single precision floating point at (U)
fps_fromuint32  clr fpa0+fps.sign               ; set result sign to positive
                bra fps_fromint32a              ; go do conversion
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Convert 32 bit signed integer at (X) to single precision floating point at value accumulator in (Y)
fps_fromint32   ldb ,x                          ; set sign based on signed integer
                sex
                sta fpa0+fps.sign               ; set result sign to the two's complement sign
                bpl fps_fromint32a              ; brif positive - no need to mess with bits
                jsr int32_neg                   ; make the bits positive
fps_fromint32a  ldb valtype_float               ; set output value type to floating point
                stb val.type,y
                ldd ,x                          ; copy integer bits to fpa0
                std fpa0+fps.sig
                ldd 2,x
                std fpa0+fps.sig+2
                ldb #0xa0                       ; put binary point to the right of the significand
                stb fpa0+fps.exp
                clr fpa0extra                   ; clear extra precision
                jmp fps_add10                   ; go normalize the result and return
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Unary negation - negate (X) to (Y)
fps_neg         ldd 2,x                         ; copy to output and keep exponent in A
                std 2,y
                ldd 4,x
                std 4,y
                ldd ,x
                std ,y
                tsta                            ; is the number zero?
                beq fps_neg0                    ; brif so - do nothing
                com fps.sign,y                  ; flip the sign
fps_neg0        rts
fps_negx        lda fps.exp,x                   ; is the number zero?
                beq fps_negx0                   ; brif so - do nothing
                com fps.sign,x                  ; flip the sign
fps_negx0       rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copy (X) to fpa0 and (U) to fpa1
fps_copyinputs  ldd ,x                          ; copy (X) to fpa0
                std fpa0
                ldd 2,x
                std fpa0+2
                ldd 4,x
                std fpa0+4
                ldd ,u                          ; copy (U) to fpa0
                std fpa1
                ldd 2,u
                std fpa1+2
                ldd 4,u
                std fpa1+4
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Subtraction (X) - (U) to (Y)
fps_sub         bsr fps_copyinputs              ; copy input operands
                com fpa1+fps.sign               ; negate the subtrahend (don't need to handle zero here)
                bra fps_add0                    ; go handle it as addition
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Addition (X) + (U) to (Y)
fps_add         bsr fps_copyinputs              ; copy input operands
fps_add0        lda fpa1+fps.exp                ; is the second operand 0?
                bne fps_add1                    ; brif not
                ldd fpa0                        ; copy first operand to output
                std ,y
                ldd fpa0+2
                std 2,y
                ldd fpa0+4
                std 4,y
                rts
fps_add1        lda fpa0+fps.exp                ; get exponent of first operand
                bne fps_add2                    ; brif not zero
                ldd fpa1                        ; copy second operand to output
                std ,y
                ldd fpa1+2
                std 2,y
                ldd fpa1+4
                std 4,y
                rts
fps_add2        clr fpa0extra                   ; clear extra precision bits
                lda fpa0+fps.exp                ; get first operand exponent
                suba fpa1+fps.exp               ; get difference in exponents
                beq fps_add8                    ; brif we don't need to denormalize - they're the same
                blo fps_add3                    ; brif we need to denormalize the first operand
                ldx #fpa1                       ; point to second operand to denormalize
                bra fps_add4                    ; go denormalize
fps_add3        ldb fpa1+fps.exp                ; get exponent of second operand (result exponent)
                stb fpa0+fps.exp                ; set result exponent
                ldx #fpa0                       ; point to first operand to denormalize
                nega                            ; get number of bits to shift as positive number
fps_add4        suba #8                         ; is there 8 bits left?
                blo fps_add5                    ; brif not
                ldb fps.sig+3,x                 ; shift significand 8 bits right
                stb fpa0extra                   ; save extra precision bits
                ldb fps.sig+2,x
                stb fps.sig+3,x
                ldb fps.sig+1,x
                stb fps.sig+2,x
                ldb fps.sig,x
                stb fps.sig+1,x
                clr fps.sig,x
                bra fps_add4                    ; see if we have another byte to shift
fps_add5        adda #8                         ; adjust for extra subtract above
                bra fps_add7                    ; do the bit shifting
fps_add6        lsr fps.sig,x                   ; shift one bit right
                ror fps.sig+1,x
                ror fps.sig+2,x
                ror fps.sig+3,x
                ror fpa0extra
                deca                            ; done all of the bits?
fps_add7        bne fps_add6                    ; brif not
fps_add8        lda fpa0+fps.sign               ; compare the signs of the operands
                eora fpa1+fps.sign              ; set A if signs differ
                bne fps_add9                    ; brif they differ - do subtraction
                ldd fpa0+fps.sig+2              ; add low word of significands
                addd fpa1+fps.sig+2
                std fpa0+fps.sig+2
                ldd fpa0+fps.sig                ; and the high word
                adcb fpa1+fps.sig+1
                adca fpa1+fps.sig
                std fpa0+fps.sig
                bcc fps_add9                    ; brif no carry
                ror fpa0+fps.sig                ; shift carry into significand
                ror fpa0+fps.sig+1
                ror fpa0+fps.sig+2
                ror fpa0+fps.sig+3
                ror fpa0extra                   ; and the extra bits (for rounding)
                inc fpa0+fps.exp                ; bump exponent to account for bit shift
                bne fps_add14                   ; go check for round-off if not overflow
OVERROR         ldb #err_ov                     ; raise overflow
                jmp ERROR
fps_add9        ldd fpa0+fps.sig+2              ; subtract low word
                subd fpa1+fps.sig+2
                std fpa0+fps.sig+2
                ldd fpa0+fps.sig                ; and now the high word
                sbcb fpa1+fps.sig+1
                sbca fpa1+fps.sig
                std fpa0+fps.sig
                bcc fps_add10                   ; brif we didn't carry
                com fpa0+fps.sign               ; flip result sign - other number was bigger
                com fpa0+fps.sig+3              ; negate two's complement result to be just the magnitude
                com fpa0+fps.sig+2
                com fpa0+fps.sig+1
                com fpa0+fps.sig
                ldx fpa0+fps.sig+2              ; add 1 to complete negation
                leax 1,x
                stx fpa0+fps.sig+2
                bne fps_add10                   ; brif carry doesn't propagate
                ldx fpa0+fps.sig                ; propagate carry
                leax 1,x
                stx fpa0+fps.sig                ; NOTE: this cannot carry because magnitude got smaller
fps_add10       clra                            ; initialize exponent offset
fps_add11       ldb fpa0+fps.sig                ; do we have nonzero bits in high byte of significand?
                bne fps_add13                   ; brif so
                ldb fpa0+fps.sig+1              ; shift left 8 bits
                stb fpa0+fps.sig
                ldb fpa0+fps.sig+2
                stb fpa0+fps.sig+1
                ldb fpa0+fps.sig+3
                stb fpa0+fps.sig+2
                ldb fpa0extra                   ; and extra precision bits
                stb fpa0+fps.sig+3
                clr fpa0extra
                addb #8                         ; account for number of bits shifted
                cmpb #40                        ; done 40 bits?
                blo fps_add11                   ; brif not - see if we have more bits to shift
                clr fpa0+fps.exp                ; number underflowed to zero - set it so
                clr fpa0+fps.sign
                clr fpa0+fps.sig
                clr fpa0+fps.sig+1
                clr fpa0+fps.sig+2
                clr fpa0+fps.sig+3
                bra fps_add16                   ; go return result
fps_add12       inca                            ; account for a bit shift
                lsl fpa0extra                   ; shift significand and extra bits left
                rol fpa0+fps.sig+3
                rol fpa0+fps.sig+2
                rol fpa0+fps.sig+1
                rol fpa0+fps.sig
fps_add13       bpl fps_add12                   ; brif we haven't normalized yet
fps_add14       ldb fpa0extra                   ; do we need to round?
                bpl fps_add16                   ; brif not
                ldx fpa0+fps.sig+2              ; add one to significand
                leax 1,x
                stx fpa0+fps.sig+2
                bne fps_add16                   ; brif no carry
                ldx fpa0+fps.sig                ; bump the upper word of significand
                leax 1,x
                stx fpa0+fps.sig
                bne fps_add16
fps_add15       inc fpa0+fps.exp                ; bump exponent
                beq OVERROR                     ; brif it overflowed
                lda #0x80                       ; set high bit of significand (rest is zero)
                sta fpa0+fps.sig
fps_add16       ldd fpa0                        ; copy result to destination
                std ,y
                ldd fpa0+2
                std 2,y
                ldd fpa0+4
                std 4,y
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision multiplication (X) × (U) to (Y)
fps_mul         lda fps.exp,x                   ; is first operand zero?
                beq fps_mul0                    ; brif so - return zero
                lda fps.exp,u                   ; is second operand zero?
                bne fps_mul1                    ; brif not - have to do the multiply
fps_mul0        ldd zero                        ; return zero result
                std ,y
                std 2,y
                std 4,y
                rts
fps_mul1        jsr fps_copyinputs              ; copy input operands
                lda fpa0+fps.sign               ; calculate sign of result - xor of the two signs
                eora fpa1+fps.sign
                sta fpa0+fps.sign               ; save result sign
                lda fpa0+fps.exp                ; get exponent of first value
                adda fpa1+fps.exp               ; calculate new exponent; also cancels the bias
                rora                            ; set V if C and N differ
                rola
                bvc fps_mul3                    ; brif maybe an overflow
                adda #0x80                      ; add back the bias
                sta fpa0+fps.sign               ; save new sign
                beq fps_mul0                    ; brif we underflowed - zero out sign
; This does a shift-and-add multiplication algorithm. This is slower than an equivalent using MUL but is smaller.
; The high order bytes will be left in fpa0 with the low order bytes in fpa0extra and fpa0extra[1-3]. The low order
; bytes are kept for the odd case where extra precision is useful. Uses fpa0extra[4-7] as temporaries.
fps_mul2        ldd zero                        ; zero out temporary bytes
                std fpa0extra4
                std fpa0extra6
                ldb fpa0+fps.sig+3              ; multiply by low byte of fpa0
                bsr fps_mul4
                ldb fpa0extra                   ; move low bites
                stb fpa0extra3
                ldb fpa0+fps.sig+2              ; multiply by next higher byte
                bsr fps_mul4
                ldb fpa0extra                   ; move low bits
                stb fpa0extra2
                ldb fpa0+fps.sig+1              ; and again for the next higher byte
                bsr fps_mul4
                ldb fpa0extra
                stb fpa0extra1
                ldb fpa0+fps.sig                ; and the high order byte
                bsr fps_mul4
                ldd fpa0extra4                  ; copy high order product bits to result
                std fpa0+fps.sig
                ldd fpa0extra6
                std fpa0+fps.sig+2
                jmp fps_add10                   ; go normalize the result and return
fps_mul3        bpl fps_mul0                    ; brif we underflowed - return 0
                jmp OVERROR                     ; raise overflow
fps_mul4        bne fps_mul5                    ; brif not multiply by zero
                lda fpa0+fps.sig+3              ; shift 8 bits right
                sta fpa0extra
                lda fpa0+fps.sig+2
                sta fpa0+fps.sig+3
                lda fpa0+fps.sig+1
                sta fpa0+fps.sig+2
                lda fpa0+fps.sig
                sta fpa0+fps.sig+1
                clr fpa0+fps.sig
fps_mul8        rts
fps_mul5        coma                            ; set C
fps_mul6        lda fpa0extra4                  ; get high byte of result bytes
                rorb                            ; is multiplier bit set?
                beq fps_mul8                    ; brif 8 shifts done (C set above makes sure of that)
                bcc fps_mul7                    ; brif bit not set - don't do addition
                lda fpa0extra7                  ; add multiplier (fpa1) to result
                adda fpa1+fps.sig+3
                sta fpa0extra7
                lda fpa0extra6
                adca fpa1+fps.sig+2
                sta fpa0extra6
                lda fpa0extra5
                adca fpa1+fps.sig+1
                sta fpa0extra5
                lda fpa0extra4
                adca fpa1+fps.sig
fps_mul7        rora                            ; rotate carry in (from add or 0)
                sta fpa0extra4
                ror fpa0extra5
                ror fpa0extra6
                ror fpa0extra7
                ror fpa0extra                   ; and into the extra precision bits
                clra                            ; clear carry - so shift above will terminate
                bra fps_mul6                    ; go do another bit
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Single precision division (X) ÷ (U) -> (Y)
;
; This is basically the same algorithm used in the Color Basic ROM
fps_div         lda fps.exp,u                   ; is divisor 0?
                bne fps_div0
DIV0ERROR       ldb #err_div0                   ; raise division by zero
                jmp ERROR
fps_div0        lda fps.exp,x                   ; is dividend 0?
                lbeq fps_mul0                   ; brif so - return 0
                jsr fps_copyinputs              ; copy input values
                lda fpa0+fps.sign               ; calculate result sign - xor of operand signs
                eora fpa1+fps.sign
                sta fpa0+fps.sign               ; save result sign
                lda fpa1+fps.exp                ; get divisor exponent
                nega                            ; negate for subtraction
                adda fpa0+fps.exp               ; subtract it from dividend exponent
                rora                            ; set V if C and N differ
                rola
                bvc fps_mul3                    ; brif overflow or underflow
                adda #0x80                      ; add back the bias
                sta fpa0+fps.sign               ; save new sign
                lbeq fps_mul0                   ; brif we underflowed - zero out sign
                inca                            ; bump exponent - why?? related to the bias stuff above?
                lbeq OVERROR                    ; brif it overflows
                sta fpa0+fps.exp                ; save result exponent
                ldx #fpa0extra4                 ; point to temporary storage bytes for quotient
                ldb #4                          ; counter for 4 significand bytes and one extra partial byte
                stb fpa0extra1                  ; save counter since we need both accumulators
                ldb #1                          ; shift counter flag and quotient byte
fps_div1        lda fpa1+fps.sig                ; set C if fpa0 significand <= fpa1 significand
                cmpa fpa0+fps.sig
                bne fps_div2
                lda fpa1+fps.sig+1
                cmpa fpa0+fps.sig+1
                bne fps_div2
                lda fpa1+fps.sig+2
                cmpa fpa0+fps.sig+2
                bne fps_div2
                lda fpa1+fps.sig+3
                cmpa fpa0+fps.sig+3
                bne fps_div2
                coma                            ; set C if values are the same
fps_div2        tfr cc,a                        ; save C for later, C clear if fpa1 > fpa0
                rolb                            ; shift carry into quotient
                bcc fps_div3                    ; brif carry clear - we haven't done 8 bits yet
                stb ,x+                         ; save quotient byte after a full set of bits
                dec fpa0extra1                  ; have we done all the bytes?
                bmi fps_div7                    ; brif all bytes plus extra precision - done all
                beq fps_div6                    ; brif all main sigificand bytes - do a couple extra bits
                ldb #1                          ; reset the bit counter flag
fps_div3        tfr cc,a                        ; get back original carry from compare
                bcs fps_div5                    ; brif it "went"
fps_div4        lsl fpa0+fps.sig+3              ; shift dividend left
                rol fpa0+fps.sig+2
                rol fpa0+fps.sig+1
                rol fpa0+fps.sig
                bcs fps_div2                    ; brif it carries - next bit "goes"
                bmi fps_div1                    ; check magnitudes of next bit
                bra fps_div2                    ; carry clear - check another bit
fps_div5        lda fpa0+fps.sig+3              ; subtract divisor from dividend bits
                suba fpa1+fps.sig+3
                sta fpa0+fps.sig+3
                lda fpa0+fps.sig+2
                sbca fpa1+fps.sig+2
                sta fpa0+fps.sig+2
                lda fpa0+fps.sig+1
                sbca fpa1+fps.sig+1
                sta fpa0+fps.sig+1
                lda fpa0+fps.sig
                sbca fpa1+fps.sig
                sta fpa0+fps.sig
                bra fps_div4                    ; now do the bit shift to line things up
fps_div6        ldb #0x40                       ; only do two bits of extra precision byte
                bra fps_div2                    ; go handle these bitsd
fps_div7        rorb                            ; get extra quotient bits to bit 7,6 and bit 5 set
                rorb
                rorb
                stb fpa0extra                   ; save extra precision bits
                ldd fpa0extra4                  ; copy result bits to fpa0
                std fpa0+fps.sig
                ldd fpa0extra6
                std fpa0+fps.sig+2
                jmp fps_add10                   ; go normalize the result
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Pack single precision number at (X) to (U)
fps_pack        lda fps.sign,x                  ; get sign of number (will be 0 for 0)
                ora #0x7f                       ; make sure low bits are set for merging
                anda fps.sig,x                  ; merge with high bits of significand
                ldb fps.sig+1,x                 ; get upper mid bits of significand
                std fps.sig,u
                ldd fps.sig+2,x
                std fps.sig+2,u
                lda fps.exp,x
                sta fps.exp,u
                rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Unpack single precision number at (X) to (U)
fps_unpack      lda fps.exp,x                   ; get exponent of value
                beq fps_unpack0                 ; brif value is zero
                sta fps.exp,u
                ldb fps.sig,x                   ; get high byte of significand
                sex                             ; make sign value in A
                sta fps.sign,u                  ; set sign in result
                ldd fps.sig,x                   ; get high word of sifnificand
                ora #0x80                       ; make sure high bit is set
                std fps.sig,u                   ; save high word in result
                ldd fps.sig+2,x                 ; copy middle bytes of significand
                std fps.sig+2,u
                rts
fps_unpack0     sta fps.exp,u                   ; zero out destination
                sta fps.sig,u
                sta fps.sig+1,u
                sta fps.sig+2,u
                sta fps.sig+3,u
                sta fps.sign,u
                rts
                *pragmapop list
author	William Astle <lost@l-w.ca>
date	Sat, 07 Oct 2023 02:56:59 -0600
parents
children	9a4e2364a966