# HG changeset patch
# User William Astle <lost@l-w.ca>
# Date 1703659065 25200
# Node ID 0aac264538491968b64dfc2a2934e80cdaac8472
# Parent  98b0646360e198a2d3b5092599a1b9ff578012b5
Actually implement the 32 bit integer division algorithm correctly

diff -r 98b0646360e1 -r 0aac26453849 src/int.s
--- a/src/int.s	Tue Dec 26 21:50:09 2023 -0700
+++ b/src/int.s	Tue Dec 26 23:37:45 2023 -0700
@@ -384,44 +384,59 @@
 ; quotient at fpaextra...fpaextra+3 and remainder at fpaextra+4...fpaextra+7; does not check for division by zero
 ; which will result in a quotient of 0xffffffff and a remainder will be the dividend. It will not get suck in a loop.
 ;
-; Algorithm is basically pencil and paper long division. We check to see if the divisor "goes" at each step by doing
-; a trial subtraction without saving the result. If it doesn't go, we just loop around again. If it does go, we stash
-; a 1 bit in the quotient and actually do the subtraction. Then go loop around again. Doing it this way rather than
-; with an actual subtraction and then undoing it with addition saves two store instructions on the comparison saves
-; having to do a restore in the no-go case which is going to be quite common with values whose upper bits are
-; mostly zeroes, thus it makes the operations faster in that case, for integers. (Floating point is a different
-; problem.) 
+; Uses a 96 bit temporary at fpaextra. The high 32 bits are the result and the mid 32 bits will be the remainder. The
+; remaining 32 bits are used to avoid modifing the fpa0 significand. The extra 4 bytes could be avoided by simply
+; clobbering the fpa0 significand.
+;
+; The algorithm is basically the pencil and paper long division algorithm. First, the dividend is shifted left one
+; bit into the remainder. If a carry occurs from that, the subtraction will succeed. If not, do a trial subtraction
+; which is just the subtraction without saving the result. If that results in a carry, then the divisor doesn't "go"
+; at this bit position so the quotient bit will be zero. If there is no carry here or there was a carry on the shift,
+; it does "go" so the quotient bit will be one. If it went, actually do the subtraction. In either event, shift the
+; quotient bit into the accumulated quotient. Do this for all 32 bits.
+;
+; The carry has to be checked on the shift to simulate doing what is basically a 33 bit subtraction.
 util_div32      ldd fpa0+fpa.sig+2              ; copy dividend to result location
-                std fpaextra+6
+                std fpaextra+10
                 ldd fpa0+fpa.sig
-                std fpaextra+4
-                ldb #32                         ; do 32 bits
+                std fpaextra+8
+                ldb #32                         ; do 64 bits - will give us a remainder
                 stb fpa0+fpa.exp                ; save counter somewhere because we don't have enough registers
                 ldd zero                        ; zero out remainder
+                std fpaextra
+                std fpaextra+2
                 std fpaextra+4
                 std fpaextra+6
-util_div32a     lsl fpaextra+3                  ; shift dividend residue into remainder
-                rol fpaextra+2
-                rol fpaextra+1
-                rol fpaextra
+util_div32a     lsl fpaextra+11                 ; shift residue and remainder over
+                rol fpaextra+10
+                rol fpaextra+9
+                rol fpaextra+8
                 rol fpaextra+7
                 rol fpaextra+6
                 rol fpaextra+5
                 rol fpaextra+4
-                ldd fpaextra+6                  ; now subtract divisor from remainder
+                pshs cc                         ; save "goes" status from shift
+                bcs util_div32b                 ; brif it definitely goes
+                ldd fpaextra+6                  ; do a trial subtraction
                 subd fpa1+fpa.sig+2
                 ldd fpaextra+4
                 sbcb fpa1+fpa.sig+1
                 sbca fpa1+fpa.sig
-                bcs util_div32b                 ; brif it doesn't go - don't subtract or set bit
-                inc fpaextra+3                  ; set quotient bit
-                ldd fpaextra+6                  ; actually do the subtraction
+                bcs util_div32c                 ; brif it doesn't go - goes status on stack is right
+                inc ,s                          ; set "C" on the stack
+util_div32b     ldd fpaextra+6                  ; actually do the subtraction
                 subd fpa1+fpa.sig+2
                 std fpaextra+6
                 ldd fpaextra+4
                 sbcb fpa1+fpa.sig+1
                 sbca fpa1+fpa.sig
                 std fpaextra+4
-util_div32b     dec fpa0+fpa.exp                ; done all 32 bits?
-                bne util_div32a                 ; do another
+util_div32c     puls cc                         ; get back "goes" status
+                rol fpaextra+3                  ; shift quotient over and put the new bit in
+                rol fpaextra+2
+                rol fpaextra+1
+                rol fpaextra
+                dec fpa0+fpa.exp                ; done 32 bits?
+                bne util_div32a                 ; brif not - handle another bit position
+                rts
                 *pragmapop list