LWOS: src/fps.s comparison

comparison src/fps.s @ 83:a492441bfc56

Add utility multiply and divide by 10 routines Add a fast multiply by 10 routine for both integer and floating point (shift left twice, add original, shift left). Also add a simple call to divide by 10 for both though there is no fast shortcut for that.

author	William Astle <lost@l-w.ca>
date	Sat, 07 Oct 2023 15:17:44 -0600
parents	9a4e2364a966
children	663d8e77b579

comparison

equal deleted inserted replaced

-:9a4e2364a966
+:a492441bfc56
 std fpa0+fps.sig+1
 ldd 2,x                         ; and the final byte and extra precision
 sta fpa0+fps.sig+3
 stb fpa0extra
 jmp fps_add10                   ; go normalize the result and return
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Fast multiply (X) by 10, in place.
+;
+; * first, save original value
+; * then, shift left by 2 bits (add 2 to exponent)
+; * then, add original value
+; * then, shift left one more (add 1 to exponent)
+;
+; This should be faster than multiplying by 10.
+fps_mul10       leas -fps.size,s                ; make a temporary to hold original value
+ldd ,x                          ; copy original value
+std ,s
+ldd 2,x
+std 2,s
+ldd 4,x
+std 4,s
+lda fps.exp,x                   ; bump original exponent by 2 (times 4)
+adda #2
+bcc fps_mul10b                  ; brif it overflowed
+fps_mul10a      jmp OVERROR                     ; raise overflow
+fps_mul10b      sta fps.exp,x
+leay ,x
+leau ,s
+bsr fps_add                     ; add original value (times 5)
+leas fps.size,s                 ; clean up temporary
+inc fps.exp,y                   ; bump exponent (times 10) in result
+beq fps_mul10a                  ; brif it overflowed
+rts
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Unary negation - negate (X) to (Y)
 fps_neg         ldd 2,x                         ; copy to output and keep exponent in A
 std 2,y
 ldd 4,x
 ror fpa0extra7
 ror fpa0extra                   ; and into the extra precision bits
 clra                            ; clear carry - so shift above will terminate
 bra fps_mul6                    ; go do another bit
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Divide (X) by 10 in place
+fps_const10     fcb 0x83,0xa0,0x00,0x00,0x00,0x00 ; single precision unpacked constant 10
+fps_div10       ldu #fps_const10                ; point to constant 10
+leay ,x                         ; put output in input
+; fall through to regular division
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Single precision division (X) ÷ (U) -> (Y)
 ;
 ; This is basically the same algorithm used in the Color Basic ROM
 fps_div         lda fps.exp,u                   ; is divisor 0?
 bne fps_div0

Mercurial > hg > index.cgi

comparison src/fps.s @ 83:a492441bfc56