# HG changeset patch # User William Astle # Date 1696713464 21600 # Node ID a492441bfc5640b7b70c163e0477be5a701888ce # Parent 9a4e2364a96621dbf7af10a3bd3d2ed04272196b Add utility multiply and divide by 10 routines Add a fast multiply by 10 routine for both integer and floating point (shift left twice, add original, shift left). Also add a simple call to divide by 10 for both though there is no fast shortcut for that. diff -r 9a4e2364a966 -r a492441bfc56 src/fps.s --- a/src/fps.s Sat Oct 07 13:39:25 2023 -0600 +++ b/src/fps.s Sat Oct 07 15:17:44 2023 -0600 @@ -71,6 +71,34 @@ stb fpa0extra jmp fps_add10 ; go normalize the result and return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Fast multiply (X) by 10, in place. +; +; * first, save original value +; * then, shift left by 2 bits (add 2 to exponent) +; * then, add original value +; * then, shift left one more (add 1 to exponent) +; +; This should be faster than multiplying by 10. +fps_mul10 leas -fps.size,s ; make a temporary to hold original value + ldd ,x ; copy original value + std ,s + ldd 2,x + std 2,s + ldd 4,x + std 4,s + lda fps.exp,x ; bump original exponent by 2 (times 4) + adda #2 + bcc fps_mul10b ; brif it overflowed +fps_mul10a jmp OVERROR ; raise overflow +fps_mul10b sta fps.exp,x + leay ,x + leau ,s + bsr fps_add ; add original value (times 5) + leas fps.size,s ; clean up temporary + inc fps.exp,y ; bump exponent (times 10) in result + beq fps_mul10a ; brif it overflowed + rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Unary negation - negate (X) to (Y) fps_neg ldd 2,x ; copy to output and keep exponent in A std 2,y @@ -335,6 +363,12 @@ clra ; clear carry - so shift above will terminate bra fps_mul6 ; go do another bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Divide (X) by 10 in place +fps_const10 fcb 0x83,0xa0,0x00,0x00,0x00,0x00 ; single precision unpacked constant 10 +fps_div10 ldu #fps_const10 ; point to constant 10 + leay ,x ; put output in input + ; fall through to regular division +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Single precision division (X) รท (U) -> (Y) ; ; This is basically the same algorithm used in the Color Basic ROM diff -r 9a4e2364a966 -r a492441bfc56 src/int.s --- a/src/int.s Sat Oct 07 13:39:25 2023 -0600 +++ b/src/int.s Sat Oct 07 15:17:44 2023 -0600 @@ -35,6 +35,38 @@ bvs OVERROR2 ; raise overflow if needed int32_add0 rts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Fast multiply 32 bit at (X) by 10 +; +; This will work for signed because the left shift will double it even if it is negative and V is set correctly after +; left shifts. The add will have the same sign so the magnitude will still increase, not decrease. +uint32_mul10 ldd val.int,x ; make copy of original + ldu val.int+2,x + pshs d,u ; save original + lsl val.int+3,x ; shift left (times 2) + rol val.int+2,x + rol val.int+1,x + rol val.int,x + bvs OVERROR2 ; brif overflow + lsl val.int+3,x ; shift left (times 4) + rol val.int+2,x + rol val.int+1,x + rol val.int,x + bvs OVERROR2 ; brif overflow + ldd val.int+2,x ; add original (times 5) + addd 2,s + std val.int+2,x + puls d,u ; (get upper word and clean stack) + adcb val.int+1,x + adca val.int,x + std val.int,x + bvs OVERROR2 ; brif overflow + lsl val.int+3,x ; shift left again (times 10) + rol val.int+2,x + rol val.int+1,x + rol val.int,x + bvs OVERROR2 ; brif overflow + rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Signed 32 bit integer multiply (X) * (U) -> (Y), overflow if exceeds signed 32 bit range int32_mul ldd val.int+2,x ; copy left operand to temporary std fpa0+fps.sig+2 @@ -227,6 +259,12 @@ stb fpa0extra rts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Integer divide (X) by 10 *in place* +int32_const10 fqb 10 ; integer constant 10 +int32_div10 ldu #int32_const10 ; point to integer constant 10 + leay ,x ; point to output location + ; fall through to integer division +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; 32 bit division, integer only, truncate fraction without rounding. Note that there is exactly one case where integer ; division can overflow: dividing -0x80000000 by -1 which yields 0x80000000. All other cases reduce the magnitude. int32_div ldd val.int+2,x ; copy left operand to temporary