# HG changeset patch # User William Astle # Date 1697429736 21600 # Node ID 663d8e77b579cb07accdd045039691d2cdc68585 # Parent f959c92bc3294bb4cfbc0df30499a71755b7bfea Implmement BCD floating point and update number parsing and printing Implements a BCD floating point system with 10 decimal digits of precistion and an exponent range of -63 to +63. Also include parsing integer and floating point values and printing them out. diff -r f959c92bc329 -r 663d8e77b579 README.txt --- a/README.txt Sun Oct 08 00:17:20 2023 -0600 +++ b/README.txt Sun Oct 15 22:15:36 2023 -0600 @@ -43,3 +43,49 @@ and, thus, will not autostart whatever Disk Basic you happen to have installed. This is most relevant on a Coco 3 where the upper 16K of the LWBasic ROM is internal instead of in a cartrige. + + +Numbers +======= + +LWBasic has three numeric types: a 32 bit signed integer, stored as two's +complement, a decimal floating point type stored in packed BCD with +10 digits of precision and a base 10 exponent range of -63 to +63, and a +double precision decimal floating point type with 20 decimal digits of +precision and a base 10 exponent range from -2047 to +2047. + +The BCD format using 48 bits is stored as follows: + +Offset Size Content +0 1 Sign bit - 1 for negative +1 7 Decimal exponent with a bias of 64; 0 indicates a value of 0 +8 40 10 BCD digits of the significand + +*** Planned but not implememted +The BCD double format using 96 bits is stored as follows: + +Offset Size Content +0 1 sign bit - 1 for negative +1 3 +4 12 Decimal exponent with a bias of 2048; 0 indicates value of 0 +16 80 20 BCD digits of the significand + +It is worth noting the reason for using the BCD format instead of binary +floating point. Because interactions with the computer are typically in base +10, it makes sense to avoid the horrendous complications of attempting to +maximize the accuracy of converting binary floating point to decimal and +back again. While this is trivial for integers, it is non-trivial for +floating point where the variations in accuracy and the need for rounding is +unavoidable. + +At the expense of some extra CPU cycles for doing the calculations, base 10 +accuracy is preserved, at least to the precision limit of the data type. +This also allows pre-parsing numbers and being able to display them again +accurately when generating a program listing, even if extra nonsignificant +digits like trailing decimal zeroes are not reproduced. + +It is, however, critical to note that despite the BCD representation, these +values are still floating point and have the inherent inaccuracies of all +limited precision data types. The simple difference is that it makes the +numbers in the computer behave exactly like scientific notiation with no +unexpected surprises. diff -r f959c92bc329 -r 663d8e77b579 src/defs.s --- a/src/defs.s Sun Oct 08 00:17:20 2023 -0600 +++ b/src/defs.s Sun Oct 15 22:15:36 2023 -0600 @@ -43,14 +43,18 @@ ; Value type constants valtype_none equ 0 ; unknown value type valtype_int equ 1 ; integer (32 bit) value (signed) -valtype_float equ 2 ; float type (40 bit) value +valtype_float equ 2 ; BCD float type (48 bit) value valtype_string equ 3 ; string type (16 bit length, 16(32) bit data pointer ; Floating point accumulator structure definitions -fps.exp equ 0 ; single precision exponent -fps.sig equ fps.exp+1 ; single precision significand -fps.sign equ fps.sig+4 ; single precision sign -fps.size equ fps.sign+1 -fpa.size equ fps.size ; use the largest floating point accumulator size +; +; Note: the extra precision bytes are needed for general calculations and need to be at least the same +; size as the significand itself; the exponent will be stored with the bias. The accumulators are +; unpacked. +fpa.exp equ 0 ; exponent - use largest size needed for any precision +fpa.sig equ fpa.exp+1 ; significand - use largest size needed for any precision +fpa.extra equ fpa.sig+5 ; extras; largest size needed for any precision, must follow significand +fpa.sign equ fpa.extra+5 ; sign flag +fpa.size equ fpa.sign+1 ; use the largest floating point accumulator size ; String data definition str.len equ 0 ; string length (2 bytes) str.ptr equ str.len+2 ; string data pointer (3 bytes) @@ -58,13 +62,13 @@ ; Value accumulator structure definitions; note that the actual value data must be first and any ; incidental meta data must follow val.value equ 0 ; offset of the value stored in the accumulator -val.fpsexp equ val.value+fps.exp ; floating point exponent -val.fpssig equ val.value+fps.sig ; floating point significand -val.fpssign equ val.value+fps.sign ; floating point sign +val.fpsexp equ val.value ; floating point exponent +val.fpssig equ val.fpsexp+1 ; floating point significand +val.fpssign equ val.fpssig+5 ; floating point sign val.int equ val.value ; integer offset val.strlen equ val.value+str.len ; string length offset val.strptr equ val.value+str.ptr ; string data pointer (low word) -val.type equ val.value+fps.size ; use the largest of the data types here +val.type equ val.value+val.fpssign+1 ; use the largest of the data types here val.size equ val.type+1 ; size of a value accumulator ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ifdef COCO3 diff -r f959c92bc329 -r 663d8e77b579 src/fps-bin.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/fps-bin.s Sun Oct 15 22:15:36 2023 -0600 @@ -0,0 +1,488 @@ + *pragmapush list + *pragma list +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Single precision floating point arithmetic package +; +; Floating point values are stored in 6 byte packets (single precision) organized as follows: +; Offset Length Contents +; 0 1 8 bit binary exponent with a bias of 128; 0 means the number is 0 +; 1 4 32 bit significand +; 5 1 sign flag; zero for positive, 0xff for negative +; +; Binary operateions take pointers to their arguments in X and U. Y contains the result location. In all cases, it is +; safe to specify either one of the arguments as the result location. Unary operations take their operand pointer in +; X and their result pointer in Y. In all cases, there is an in place version of the unary operation that operates on +; its input and only needs the single pointer in X. +; +; On the topic of optimization: these routines copy their operands to accumulators in the direct page. This saves one +; cycle per instruction which has a nonzero offset. In addition, this means that the input operands can remain +; unmodified by the actual operations. For instance, addition requires denormalizing one operand which would otherwise +; have to be done in place. +; +; NOTE: the input pointers X and U may be clobbered. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Convert 32 bit unsigned integer at (X) to single precision floating point at (U) +fps_fromuint32 clr fpa0+fps.sign ; set result sign to positive + bra fps_fromint32a ; go do conversion +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Convert 32 bit signed integer at (X) to single precision floating point at value accumulator in (Y) +fps_fromint32 ldb ,x ; set sign based on signed integer + sex + sta fpa0+fps.sign ; set result sign to the two's complement sign + bpl fps_fromint32a ; brif positive - no need to mess with bits + jsr int32_neg ; make the bits positive +fps_fromint32a ldb valtype_float ; set output value type to floating point + stb val.type,y + ldd ,x ; copy integer bits to fpa0 + std fpa0+fps.sig + ldd 2,x + std fpa0+fps.sig+2 + ldb #0xa0 ; put binary point to the right of the significand + stb fpa0+fps.exp + clr fpa0extra ; clear extra precision + jmp fps_add10 ; go normalize the result and return +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Convert 64 bit unsigned value at (X) to single precision floating point in value accumulator at (Y) +; +; Cases: +; * byte 0 is first nonzero - exponent at 64 bits right, use upper 0 to 4 +; * byte 1 is first nonzero - exponent at 56 bits right, use bytes 1 to 5 +; * byte 2 is first nonzero - exponent at 48 bits right, use bytes 2 to 6 +; * otherwise - exponent at 40 bits right, use bytes 3 to 7 +fps_fromuint64 clra ; set sign to positive +fps_fromuint64s sta fpa0+fps.sign ; save sign of result + ldb #0xc0 ; exponent if binary point is 64 bits to the right + lda ,x+ ; is the first byte zero? + bne fps_fromuint64a ; brif not + subb #8 ; lose a byte off exponent + lda ,x+ ; is the second byte zero? + bne fps_fromuint64a ; brif not + subb #8 ; lose another byte off exponent + lda ,x+ ; is third byte zero? + bne fps_fromuint64a ; brif not + subb #8 ; lose another byte + lda ,x+ ; get first byte to copy +fps_fromuint64a stb fpa0+fps.exp ; save exponent + sta fpa0+fps.sig ; save high byte of significand + ldd ,x ; copy next two bytes to significand + std fpa0+fps.sig+1 + ldd 2,x ; and the final byte and extra precision + sta fpa0+fps.sig+3 + stb fpa0extra + jmp fps_add10 ; go normalize the result and return +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Fast multiply (X) by 10, in place. +; +; * first, save original value +; * then, shift left by 2 bits (add 2 to exponent) +; * then, add original value +; * then, shift left one more (add 1 to exponent) +; +; This should be faster than multiplying by 10. +fps_mul10 leas -fps.size,s ; make a temporary to hold original value + ldd ,x ; copy original value + std ,s + ldd 2,x + std 2,s + ldd 4,x + std 4,s + lda fps.exp,x ; bump original exponent by 2 (times 4) + adda #2 + bcc fps_mul10b ; brif it overflowed +fps_mul10a jmp OVERROR ; raise overflow +fps_mul10b sta fps.exp,x + leay ,x + leau ,s + bsr fps_add ; add original value (times 5) + leas fps.size,s ; clean up temporary + inc fps.exp,y ; bump exponent (times 10) in result + beq fps_mul10a ; brif it overflowed + rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Unary negation - negate (X) to (Y) +fps_neg ldd 2,x ; copy to output and keep exponent in A + std 2,y + ldd 4,x + std 4,y + ldd ,x + std ,y + tsta ; is the number zero? + beq fps_neg0 ; brif so - do nothing + com fps.sign,y ; flip the sign +fps_neg0 rts +fps_negx lda fps.exp,x ; is the number zero? + beq fps_negx0 ; brif so - do nothing + com fps.sign,x ; flip the sign +fps_negx0 rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copy (X) to fpa0 and (U) to fpa1 +fps_copyinputs ldd ,x ; copy (X) to fpa0 + std fpa0 + ldd 2,x + std fpa0+2 + ldd 4,x + std fpa0+4 + ldd ,u ; copy (U) to fpa0 + std fpa1 + ldd 2,u + std fpa1+2 + ldd 4,u + std fpa1+4 + rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Subtraction (X) - (U) to (Y) +fps_sub bsr fps_copyinputs ; copy input operands + com fpa1+fps.sign ; negate the subtrahend (don't need to handle zero here) + bra fps_add0 ; go handle it as addition +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Addition (X) + (U) to (Y) +fps_add bsr fps_copyinputs ; copy input operands +fps_add0 lda fpa1+fps.exp ; is the second operand 0? + bne fps_add1 ; brif not + ldd fpa0 ; copy first operand to output + std ,y + ldd fpa0+2 + std 2,y + ldd fpa0+4 + std 4,y + rts +fps_add1 lda fpa0+fps.exp ; get exponent of first operand + bne fps_add2 ; brif not zero + ldd fpa1 ; copy second operand to output + std ,y + ldd fpa1+2 + std 2,y + ldd fpa1+4 + std 4,y + rts +fps_add2 clr fpa0extra ; clear extra precision bits + lda fpa0+fps.exp ; get first operand exponent + suba fpa1+fps.exp ; get difference in exponents + beq fps_add8 ; brif we don't need to denormalize - they're the same + blo fps_add3 ; brif we need to denormalize the first operand + ldx #fpa1 ; point to second operand to denormalize + bra fps_add4 ; go denormalize +fps_add3 ldb fpa1+fps.exp ; get exponent of second operand (result exponent) + stb fpa0+fps.exp ; set result exponent + ldx #fpa0 ; point to first operand to denormalize + nega ; get number of bits to shift as positive number +fps_add4 suba #8 ; is there 8 bits left? + blo fps_add5 ; brif not + ldb fps.sig+3,x ; shift significand 8 bits right + stb fpa0extra ; save extra precision bits + ldb fps.sig+2,x + stb fps.sig+3,x + ldb fps.sig+1,x + stb fps.sig+2,x + ldb fps.sig,x + stb fps.sig+1,x + clr fps.sig,x + bra fps_add4 ; see if we have another byte to shift +fps_add5 adda #8 ; adjust for extra subtract above + bra fps_add7 ; do the bit shifting +fps_add6 lsr fps.sig,x ; shift one bit right + ror fps.sig+1,x + ror fps.sig+2,x + ror fps.sig+3,x + ror fpa0extra + deca ; done all of the bits? +fps_add7 bne fps_add6 ; brif not +fps_add8 lda fpa0+fps.sign ; compare the signs of the operands + eora fpa1+fps.sign ; set A if signs differ + bne fps_add9 ; brif they differ - do subtraction + ldd fpa0+fps.sig+2 ; add low word of significands + addd fpa1+fps.sig+2 + std fpa0+fps.sig+2 + ldd fpa0+fps.sig ; and the high word + adcb fpa1+fps.sig+1 + adca fpa1+fps.sig + std fpa0+fps.sig + bcc fps_add9 ; brif no carry + ror fpa0+fps.sig ; shift carry into significand + ror fpa0+fps.sig+1 + ror fpa0+fps.sig+2 + ror fpa0+fps.sig+3 + ror fpa0extra ; and the extra bits (for rounding) + inc fpa0+fps.exp ; bump exponent to account for bit shift + bne fps_add14 ; go check for round-off if not overflow +OVERROR ldb #err_ov ; raise overflow + jmp ERROR +fps_add9 ldd fpa0+fps.sig+2 ; subtract low word + subd fpa1+fps.sig+2 + std fpa0+fps.sig+2 + ldd fpa0+fps.sig ; and now the high word + sbcb fpa1+fps.sig+1 + sbca fpa1+fps.sig + std fpa0+fps.sig + bcc fps_add10 ; brif we didn't carry + com fpa0+fps.sign ; flip result sign - other number was bigger + com fpa0+fps.sig+3 ; negate two's complement result to be just the magnitude + com fpa0+fps.sig+2 + com fpa0+fps.sig+1 + com fpa0+fps.sig + ldx fpa0+fps.sig+2 ; add 1 to complete negation + leax 1,x + stx fpa0+fps.sig+2 + bne fps_add10 ; brif carry doesn't propagate + ldx fpa0+fps.sig ; propagate carry + leax 1,x + stx fpa0+fps.sig ; NOTE: this cannot carry because magnitude got smaller +fps_add10 clra ; initialize exponent offset +fps_add11 ldb fpa0+fps.sig ; do we have nonzero bits in high byte of significand? + bne fps_add13 ; brif so + ldb fpa0+fps.sig+1 ; shift left 8 bits + stb fpa0+fps.sig + ldb fpa0+fps.sig+2 + stb fpa0+fps.sig+1 + ldb fpa0+fps.sig+3 + stb fpa0+fps.sig+2 + ldb fpa0extra ; and extra precision bits + stb fpa0+fps.sig+3 + clr fpa0extra + addb #8 ; account for number of bits shifted + cmpb #40 ; done 40 bits? + blo fps_add11 ; brif not - see if we have more bits to shift + clr fpa0+fps.exp ; number underflowed to zero - set it so + clr fpa0+fps.sign + clr fpa0+fps.sig + clr fpa0+fps.sig+1 + clr fpa0+fps.sig+2 + clr fpa0+fps.sig+3 + bra fps_add16 ; go return result +fps_add12 inca ; account for a bit shift + lsl fpa0extra ; shift significand and extra bits left + rol fpa0+fps.sig+3 + rol fpa0+fps.sig+2 + rol fpa0+fps.sig+1 + rol fpa0+fps.sig +fps_add13 bpl fps_add12 ; brif we haven't normalized yet +fps_add14 ldb fpa0extra ; do we need to round? + bpl fps_add16 ; brif not + ldx fpa0+fps.sig+2 ; add one to significand + leax 1,x + stx fpa0+fps.sig+2 + bne fps_add16 ; brif no carry + ldx fpa0+fps.sig ; bump the upper word of significand + leax 1,x + stx fpa0+fps.sig + bne fps_add16 +fps_add15 inc fpa0+fps.exp ; bump exponent + beq OVERROR ; brif it overflowed + lda #0x80 ; set high bit of significand (rest is zero) + sta fpa0+fps.sig +fps_add16 ldd fpa0 ; copy result to destination + std ,y + ldd fpa0+2 + std 2,y + ldd fpa0+4 + std 4,y + rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Single precision multiplication (X) × (U) to (Y) +fps_mul lda fps.exp,x ; is first operand zero? + beq fps_mul0 ; brif so - return zero + lda fps.exp,u ; is second operand zero? + bne fps_mul1 ; brif not - have to do the multiply +fps_mul0 ldd zero ; return zero result + std ,y + std 2,y + std 4,y + rts +fps_mul1 jsr fps_copyinputs ; copy input operands + lda fpa0+fps.sign ; calculate sign of result - xor of the two signs + eora fpa1+fps.sign + sta fpa0+fps.sign ; save result sign + lda fpa0+fps.exp ; get exponent of first value + adda fpa1+fps.exp ; calculate new exponent; also cancels the bias + rora ; set V if C and N differ + rola + bvc fps_mul3 ; brif maybe an overflow + adda #0x80 ; add back the bias + sta fpa0+fps.sign ; save new sign + beq fps_mul0 ; brif we underflowed - zero out sign +; This does a shift-and-add multiplication algorithm. This is slower than an equivalent using MUL but is smaller. +; The high order bytes will be left in fpa0 with the low order bytes in fpa0extra and fpa0extra[1-3]. The low order +; bytes are kept for the odd case where extra precision is useful. Uses fpa0extra[4-7] as temporaries. +fps_mul2 ldd zero ; zero out temporary bytes + std fpa0extra4 + std fpa0extra6 + ldb fpa0+fps.sig+3 ; multiply by low byte of fpa0 + bsr fps_mul4 + ldb fpa0extra ; move low bites + stb fpa0extra3 + ldb fpa0+fps.sig+2 ; multiply by next higher byte + bsr fps_mul4 + ldb fpa0extra ; move low bits + stb fpa0extra2 + ldb fpa0+fps.sig+1 ; and again for the next higher byte + bsr fps_mul4 + ldb fpa0extra + stb fpa0extra1 + ldb fpa0+fps.sig ; and the high order byte + bsr fps_mul4 + ldd fpa0extra4 ; copy high order product bits to result + std fpa0+fps.sig + ldd fpa0extra6 + std fpa0+fps.sig+2 + jmp fps_add10 ; go normalize the result and return +fps_mul3 bpl fps_mul0 ; brif we underflowed - return 0 + jmp OVERROR ; raise overflow +fps_mul4 bne fps_mul5 ; brif not multiply by zero + lda fpa0+fps.sig+3 ; shift 8 bits right + sta fpa0extra + lda fpa0+fps.sig+2 + sta fpa0+fps.sig+3 + lda fpa0+fps.sig+1 + sta fpa0+fps.sig+2 + lda fpa0+fps.sig + sta fpa0+fps.sig+1 + clr fpa0+fps.sig +fps_mul8 rts +fps_mul5 coma ; set C +fps_mul6 lda fpa0extra4 ; get high byte of result bytes + rorb ; is multiplier bit set? + beq fps_mul8 ; brif 8 shifts done (C set above makes sure of that) + bcc fps_mul7 ; brif bit not set - don't do addition + lda fpa0extra7 ; add multiplier (fpa1) to result + adda fpa1+fps.sig+3 + sta fpa0extra7 + lda fpa0extra6 + adca fpa1+fps.sig+2 + sta fpa0extra6 + lda fpa0extra5 + adca fpa1+fps.sig+1 + sta fpa0extra5 + lda fpa0extra4 + adca fpa1+fps.sig +fps_mul7 rora ; rotate carry in (from add or 0) + sta fpa0extra4 + ror fpa0extra5 + ror fpa0extra6 + ror fpa0extra7 + ror fpa0extra ; and into the extra precision bits + clra ; clear carry - so shift above will terminate + bra fps_mul6 ; go do another bit +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Divide (X) by 10 in place +fps_const10 fcb 0x83,0xa0,0x00,0x00,0x00,0x00 ; single precision unpacked constant 10 +fps_div10 ldu #fps_const10 ; point to constant 10 + leay ,x ; put output in input + ; fall through to regular division +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Single precision division (X) ÷ (U) -> (Y) +; +; This is basically the same algorithm used in the Color Basic ROM +fps_div lda fps.exp,u ; is divisor 0? + bne fps_div0 +DIV0ERROR ldb #err_div0 ; raise division by zero + jmp ERROR +fps_div0 lda fps.exp,x ; is dividend 0? + lbeq fps_mul0 ; brif so - return 0 + jsr fps_copyinputs ; copy input values + lda fpa0+fps.sign ; calculate result sign - xor of operand signs + eora fpa1+fps.sign + sta fpa0+fps.sign ; save result sign + lda fpa1+fps.exp ; get divisor exponent + nega ; negate for subtraction + adda fpa0+fps.exp ; subtract it from dividend exponent + rora ; set V if C and N differ + rola + bvc fps_mul3 ; brif overflow or underflow + adda #0x80 ; add back the bias + sta fpa0+fps.sign ; save new sign + lbeq fps_mul0 ; brif we underflowed - zero out sign + inca ; bump exponent - why?? related to the bias stuff above? + lbeq OVERROR ; brif it overflows + sta fpa0+fps.exp ; save result exponent + ldx #fpa0extra4 ; point to temporary storage bytes for quotient + ldb #4 ; counter for 4 significand bytes and one extra partial byte + stb fpa0extra1 ; save counter since we need both accumulators + ldb #1 ; shift counter flag and quotient byte +fps_div1 lda fpa1+fps.sig ; set C if fpa0 significand <= fpa1 significand + cmpa fpa0+fps.sig + bne fps_div2 + lda fpa1+fps.sig+1 + cmpa fpa0+fps.sig+1 + bne fps_div2 + lda fpa1+fps.sig+2 + cmpa fpa0+fps.sig+2 + bne fps_div2 + lda fpa1+fps.sig+3 + cmpa fpa0+fps.sig+3 + bne fps_div2 + coma ; set C if values are the same +fps_div2 tfr cc,a ; save C for later, C clear if fpa1 > fpa0 + rolb ; shift carry into quotient + bcc fps_div3 ; brif carry clear - we haven't done 8 bits yet + stb ,x+ ; save quotient byte after a full set of bits + dec fpa0extra1 ; have we done all the bytes? + bmi fps_div7 ; brif all bytes plus extra precision - done all + beq fps_div6 ; brif all main sigificand bytes - do a couple extra bits + ldb #1 ; reset the bit counter flag +fps_div3 tfr cc,a ; get back original carry from compare + bcs fps_div5 ; brif it "went" +fps_div4 lsl fpa0+fps.sig+3 ; shift dividend left + rol fpa0+fps.sig+2 + rol fpa0+fps.sig+1 + rol fpa0+fps.sig + bcs fps_div2 ; brif it carries - next bit "goes" + bmi fps_div1 ; check magnitudes of next bit + bra fps_div2 ; carry clear - check another bit +fps_div5 lda fpa0+fps.sig+3 ; subtract divisor from dividend bits + suba fpa1+fps.sig+3 + sta fpa0+fps.sig+3 + lda fpa0+fps.sig+2 + sbca fpa1+fps.sig+2 + sta fpa0+fps.sig+2 + lda fpa0+fps.sig+1 + sbca fpa1+fps.sig+1 + sta fpa0+fps.sig+1 + lda fpa0+fps.sig + sbca fpa1+fps.sig + sta fpa0+fps.sig + bra fps_div4 ; now do the bit shift to line things up +fps_div6 ldb #0x40 ; only do two bits of extra precision byte + bra fps_div2 ; go handle these bitsd +fps_div7 rorb ; get extra quotient bits to bit 7,6 and bit 5 set + rorb + rorb + stb fpa0extra ; save extra precision bits + ldd fpa0extra4 ; copy result bits to fpa0 + std fpa0+fps.sig + ldd fpa0extra6 + std fpa0+fps.sig+2 + jmp fps_add10 ; go normalize the result +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Pack single precision number at (X) to (U) +fps_pack lda fps.sign,x ; get sign of number (will be 0 for 0) + ora #0x7f ; make sure low bits are set for merging + anda fps.sig,x ; merge with high bits of significand + ldb fps.sig+1,x ; get upper mid bits of significand + std fps.sig,u + ldd fps.sig+2,x + std fps.sig+2,u + lda fps.exp,x + sta fps.exp,u + rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Unpack single precision number at (X) to (U) +fps_unpack lda fps.exp,x ; get exponent of value + beq fps_unpack0 ; brif value is zero + sta fps.exp,u + ldb fps.sig,x ; get high byte of significand + sex ; make sign value in A + sta fps.sign,u ; set sign in result + ldd fps.sig,x ; get high word of sifnificand + ora #0x80 ; make sure high bit is set + std fps.sig,u ; save high word in result + ldd fps.sig+2,x ; copy middle bytes of significand + std fps.sig+2,u + rts +fps_unpack0 sta fps.exp,u ; zero out destination + sta fps.sig,u + sta fps.sig+1,u + sta fps.sig+2,u + sta fps.sig+3,u + sta fps.sign,u + rts + *pragmapop list diff -r f959c92bc329 -r 663d8e77b579 src/fps.s --- a/src/fps.s Sun Oct 08 00:17:20 2023 -0600 +++ b/src/fps.s Sun Oct 15 22:15:36 2023 -0600 @@ -3,486 +3,885 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Single precision floating point arithmetic package ; -; Floating point values are stored in 6 byte packets (single precision) organized as follows: -; Offset Length Contents -; 0 1 8 bit binary exponent with a bias of 128; 0 means the number is 0 -; 1 4 32 bit significand -; 5 1 sign flag; zero for positive, 0xff for negative -; -; Binary operateions take pointers to their arguments in X and U. Y contains the result location. In all cases, it is -; safe to specify either one of the arguments as the result location. Unary operations take their operand pointer in -; X and their result pointer in Y. In all cases, there is an in place version of the unary operation that operates on -; its input and only needs the single pointer in X. -; -; On the topic of optimization: these routines copy their operands to accumulators in the direct page. This saves one -; cycle per instruction which has a nonzero offset. In addition, this means that the input operands can remain -; unmodified by the actual operations. For instance, addition requires denormalizing one operand which would otherwise -; have to be done in place. +; The single precision floating point values are stored as follows (unpacked): ; -; NOTE: the input pointers X and U may be clobbered. -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Convert 32 bit unsigned integer at (X) to single precision floating point at (U) -fps_fromuint32 clr fpa0+fps.sign ; set result sign to positive - bra fps_fromint32a ; go do conversion -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Convert 32 bit signed integer at (X) to single precision floating point at value accumulator in (Y) -fps_fromint32 ldb ,x ; set sign based on signed integer - sex - sta fpa0+fps.sign ; set result sign to the two's complement sign - bpl fps_fromint32a ; brif positive - no need to mess with bits - jsr int32_neg ; make the bits positive -fps_fromint32a ldb valtype_float ; set output value type to floating point - stb val.type,y - ldd ,x ; copy integer bits to fpa0 - std fpa0+fps.sig - ldd 2,x - std fpa0+fps.sig+2 - ldb #0xa0 ; put binary point to the right of the significand - stb fpa0+fps.exp - clr fpa0extra ; clear extra precision - jmp fps_add10 ; go normalize the result and return -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Convert 64 bit unsigned value at (X) to single precision floating point in value accumulator at (Y) +; Byte Length What +; 0 1 Exponent with a +63 bias, 0 = number is zero +; 1 5 BCD significand (10 digits) +; 6 1 Sign (00 = positive, FF = negative) ; -; Cases: -; * byte 0 is first nonzero - exponent at 64 bits right, use upper 0 to 4 -; * byte 1 is first nonzero - exponent at 56 bits right, use bytes 1 to 5 -; * byte 2 is first nonzero - exponent at 48 bits right, use bytes 2 to 6 -; * otherwise - exponent at 40 bits right, use bytes 3 to 7 -fps_fromuint64 clra ; set sign to positive -fps_fromuint64s sta fpa0+fps.sign ; save sign of result - ldb #0xc0 ; exponent if binary point is 64 bits to the right - lda ,x+ ; is the first byte zero? - bne fps_fromuint64a ; brif not - subb #8 ; lose a byte off exponent - lda ,x+ ; is the second byte zero? - bne fps_fromuint64a ; brif not - subb #8 ; lose another byte off exponent - lda ,x+ ; is third byte zero? - bne fps_fromuint64a ; brif not - subb #8 ; lose another byte - lda ,x+ ; get first byte to copy -fps_fromuint64a stb fpa0+fps.exp ; save exponent - sta fpa0+fps.sig ; save high byte of significand - ldd ,x ; copy next two bytes to significand - std fpa0+fps.sig+1 - ldd 2,x ; and the final byte and extra precision - sta fpa0+fps.sig+3 - stb fpa0extra - jmp fps_add10 ; go normalize the result and return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Fast multiply (X) by 10, in place. -; -; * first, save original value -; * then, shift left by 2 bits (add 2 to exponent) -; * then, add original value -; * then, shift left one more (add 1 to exponent) -; -; This should be faster than multiplying by 10. -fps_mul10 leas -fps.size,s ; make a temporary to hold original value - ldd ,x ; copy original value - std ,s - ldd 2,x - std 2,s - ldd 4,x - std 4,s - lda fps.exp,x ; bump original exponent by 2 (times 4) - adda #2 - bcc fps_mul10b ; brif it overflowed -fps_mul10a jmp OVERROR ; raise overflow -fps_mul10b sta fps.exp,x - leay ,x - leau ,s - bsr fps_add ; add original value (times 5) - leas fps.size,s ; clean up temporary - inc fps.exp,y ; bump exponent (times 10) in result - beq fps_mul10a ; brif it overflowed +; Unpack single precision BCD floating point at (X) to fpa1 +fps_unpack1 ldb ,x ; get sign and exponent + bne fps_unpack1b ; brif not zero + ldd zero ; zero out the entire value to represent 0 + std fpa1 + std fpa1+2 + std fpa1+4 + sta fpa1+6 + rts +fps_unpack1b sex ; get sign flag to store + sta fpa1+fpa.sign ; set accumulator + anda #0x7f ; lose sign from exponent + sta fpa1+fpa.exp ; set exponent + ldd 1,x ; copy signficand over + std fpa1+fpa.sig + ldd 3,x + std fpa1+fpa.sig+2 + lda 5,x + sta fpa1+fpa.sig+4 rts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Unary negation - negate (X) to (Y) -fps_neg ldd 2,x ; copy to output and keep exponent in A - std 2,y - ldd 4,x - std 4,y - ldd ,x - std ,y - tsta ; is the number zero? - beq fps_neg0 ; brif so - do nothing - com fps.sign,y ; flip the sign -fps_neg0 rts -fps_negx lda fps.exp,x ; is the number zero? - beq fps_negx0 ; brif so - do nothing - com fps.sign,x ; flip the sign -fps_negx0 rts +; Unpack single precision BCD floating point at (X) to fpa0 +fps_unpack0 ldb ,x ; get sign and exponent + bne fps_unpack0b ; brif not zero + ldd zero ; zero out the entire value to represent 0 + std fpa0 + std fpa0+2 + std fpa0+4 + sta fpa0+6 + rts +fps_unpack0b sex ; get sign flag to store + sta fpa0+fpa.sign ; set accumulator + anda #0x7f ; lose sign from exponent + sta fpa0+fpa.exp ; set exponent + ldd 1,x ; copy signficand over + std fpa0+fpa.sig + ldd 3,x + std fpa0+fpa.sig+2 + lda 5,x + sta fpa0+fpa.sig+4 + rts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Copy (X) to fpa0 and (U) to fpa1 -fps_copyinputs ldd ,x ; copy (X) to fpa0 - std fpa0 - ldd 2,x - std fpa0+2 - ldd 4,x - std fpa0+4 - ldd ,u ; copy (U) to fpa0 - std fpa1 - ldd 2,u - std fpa1+2 - ldd 4,u - std fpa1+4 +; Pack single precision BCD floating point in fpa1 to (X) +fps_pack1 lda fpa1+fpa.sign ; get sign bits + anda #0x80 ; only keep high bit + ora fpa1+fpa.exp ; merge with exponent + sta ,x ; put in destination + ldd fpa1+fpa.sig ; copy significand over + std 1,x + ldd fpa1+fpa.sig+2 + std 3,x + lda fpa1+fpa.sig+4 + sta 5,x + rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Pack single precision BCD floating point in fpa0 to (X) +fps_pack0 lda fpa0+fpa.sign ; get sign bits + anda #0x80 ; only keep high bit + ora fpa0+fpa.exp ; merge with exponent + sta ,x ; put in destination + ldd fpa0+fpa.sig ; copy significand over + std 1,x + ldd fpa0+fpa.sig+2 + std 3,x + lda fpa0+fpa.sig+4 + sta 5,x rts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Subtraction (X) - (U) to (Y) -fps_sub bsr fps_copyinputs ; copy input operands - com fpa1+fps.sign ; negate the subtrahend (don't need to handle zero here) - bra fps_add0 ; go handle it as addition +; Copy binary arguments +fps_copyargs clr fpa0+fpa.extra ; clear extra precision bits + clr fpa1+fpa.extra + bsr fps_unpack0 ; unpack first argument to fpa0 + leax ,u ; point to second argument + bra fps_unpack1 ; now unpack it to fpa1 and return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Addition (X) + (U) to (Y) -fps_add bsr fps_copyinputs ; copy input operands -fps_add0 lda fpa1+fps.exp ; is the second operand 0? +; Single precision BCD floating point subtraction +; (X) - (U) -> (Y) +fps_sub bsr fps_copyargs ; copy input arguments + ldb fpa1+fpa.exp ; subtracting zero? + beq fps_add0 ; brif so - do nothing + com fpa1+fpa.sign ; invert sign + bra fps_add1 ; go handle addition of negative +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Single precision BCD floating point addition +; (X) + (U) -> (Y) +; +; Note: must denormalize the *shorter* value. However, we may end up with values the same length so we may not be able +; to easily identify which one is larger. +; +; Note that the magnitude can get substantially smaller after addition of a negative so renormalization is necessary +; after the operation. As a result, some additional precision is maintained during the addition operation to allow for +; correct rounding. +; +; Rounding will be according to the standard rules: 0...4 go toward zero, 5...9 go away from zero. +fps_add bsr fps_copyargs ; fetch arguments + ldb fpa1+fpa.exp ; is second argument zero? bne fps_add1 ; brif not - ldd fpa0 ; copy first operand to output - std ,y - ldd fpa0+2 - std 2,y - ldd fpa0+4 - std 4,y - rts -fps_add1 lda fpa0+fps.exp ; get exponent of first operand - bne fps_add2 ; brif not zero - ldd fpa1 ; copy second operand to output - std ,y - ldd fpa1+2 - std 2,y - ldd fpa1+4 - std 4,y - rts -fps_add2 clr fpa0extra ; clear extra precision bits - lda fpa0+fps.exp ; get first operand exponent - suba fpa1+fps.exp ; get difference in exponents - beq fps_add8 ; brif we don't need to denormalize - they're the same - blo fps_add3 ; brif we need to denormalize the first operand +fps_add0 leax ,y ; if second argument is zero, do nothing and return first operand + bra fps_pack0 +fps_add1 ldb fpa0+fpa.exp ; is first argument zero? + bne fps_add3 ; brif not +fps_add2 leax ,y ; point to output argument + bra fps_pack1 ; pack second argument to result and return +fps_add3 ldd zero ;* initialize extra precision bytes following significand which we need + std fpa0+fpa.sig+5 ;* to handle denormalization of the smaller operand + std fpa0+fpa.sig+7 + sta fpa0+fpa.sig+9 + std fpa1+fpa.sig+5 + std fpa1+fpa.sig+7 + sta fpa1+fpa.sig+9 + subb fpa0+fpa.exp ; calculate exponent difference + lbeq fps_add8 ; brif same exponent - no need to denormalize + bcs fps_add4 ; brif second operand is bigger ldx #fpa1 ; point to second operand to denormalize - bra fps_add4 ; go denormalize -fps_add3 ldb fpa1+fps.exp ; get exponent of second operand (result exponent) - stb fpa0+fps.exp ; set result exponent - ldx #fpa0 ; point to first operand to denormalize - nega ; get number of bits to shift as positive number -fps_add4 suba #8 ; is there 8 bits left? - blo fps_add5 ; brif not - ldb fps.sig+3,x ; shift significand 8 bits right - stb fpa0extra ; save extra precision bits - ldb fps.sig+2,x - stb fps.sig+3,x - ldb fps.sig+1,x - stb fps.sig+2,x - ldb fps.sig,x - stb fps.sig+1,x - clr fps.sig,x - bra fps_add4 ; see if we have another byte to shift -fps_add5 adda #8 ; adjust for extra subtract above - bra fps_add7 ; do the bit shifting -fps_add6 lsr fps.sig,x ; shift one bit right - ror fps.sig+1,x - ror fps.sig+2,x - ror fps.sig+3,x - ror fpa0extra - deca ; done all of the bits? -fps_add7 bne fps_add6 ; brif not -fps_add8 lda fpa0+fps.sign ; compare the signs of the operands - eora fpa1+fps.sign ; set A if signs differ - bne fps_add9 ; brif they differ - do subtraction - ldd fpa0+fps.sig+2 ; add low word of significands - addd fpa1+fps.sig+2 - std fpa0+fps.sig+2 - ldd fpa0+fps.sig ; and the high word - adcb fpa1+fps.sig+1 - adca fpa1+fps.sig - std fpa0+fps.sig - bcc fps_add9 ; brif no carry - ror fpa0+fps.sig ; shift carry into significand - ror fpa0+fps.sig+1 - ror fpa0+fps.sig+2 - ror fpa0+fps.sig+3 - ror fpa0extra ; and the extra bits (for rounding) - inc fpa0+fps.exp ; bump exponent to account for bit shift - bne fps_add14 ; go check for round-off if not overflow + cmpb #10 ; are we going to shift more than the precision? + bhs fps_add0 ; brif so - return first operand + bra fps_add5 ; go shift operand right by B places +fps_add4 negb ; get positive number of shifts + ldx #fpa0 ; shift left operand right B places + lda fpa1+fpa.exp ; set exponent of result to larger of two + sta fpa0+fpa.exp + cmpb #10 ; shifting more than precision? + bhs fps_add2 ; brif so - return second operand +fps_add5 subb #2 ; do we need at least two places? + bcs fps_add6 ; brif not + lda fpa.sig+8,x ; shift right into extra digits + sta fpa.sig+9,x + lda fpa.sig+7,x + sta fpa.sig+8,x + lda fpa.sig+6,x + sta fpa.sig+7,x + lda fpa.sig+5,x + sta fpa.sig+6,x + lda fpa.sig+4,x + sta fpa.sig+5,x + lda fpa.sig+3,x + sta fpa.sig+4,x + lda fpa.sig+2,x + sta fpa.sig+3,x + lda fpa.sig+1,x + sta fpa.sig+2,x + lda fpa.sig,x + sta fpa.sig+1,x + clr fpa.sig,x + bra fps_add5 ; go see if we have more to shift +fps_add6 incb ; do we still have a digit to shift? + bne fps_add8 ; brif not + lsr fpa.sig,x ; shift a digit right + ror fpa.sig+1,x + ror fpa.sig+2,x + ror fpa.sig+3,x + ror fpa.sig+4,x + ror fpa.sig+5,x + ror fpa.sig+6,x + ror fpa.sig+7,x + ror fpa.sig+8,x + ror fpa.sig+9,x + lsr fpa.sig,x + ror fpa.sig+1,x + ror fpa.sig+2,x + ror fpa.sig+3,x + ror fpa.sig+4,x + ror fpa.sig+5,x + ror fpa.sig+6,x + ror fpa.sig+7,x + ror fpa.sig+8,x + ror fpa.sig+9,x + lsr fpa.sig,x + ror fpa.sig+1,x + ror fpa.sig+2,x + ror fpa.sig+3,x + ror fpa.sig+4,x + ror fpa.sig+5,x + ror fpa.sig+6,x + ror fpa.sig+7,x + ror fpa.sig+8,x + ror fpa.sig+9,x + lsr fpa.sig,x + ror fpa.sig+1,x + ror fpa.sig+2,x + ror fpa.sig+3,x + ror fpa.sig+4,x + ror fpa.sig+5,x + ror fpa.sig+6,x + ror fpa.sig+7,x + ror fpa.sig+8,x + ror fpa.sig+9,x +fps_add8 clra ; clear carry so regular addition works below + lda fpa0+fpa.sign ; do signs differ? + eora fpa1+fpa.sign + sta fpa1+fpa.exp ; non-zero if signs differ; we don't need fpa1 exponent any more + beq fps_add11 ; brif not - just add + ldx #fpa1 ; default to second argument being smaller + lda fpa0+fpa.sig ; compare high digits of significand + cmpa fpa1+fpa.sig + bne fps_add9 ; brif top digits differ + lda fpa0+fpa.sig+1 ; next digits? + cmpa fpa1+fpa.sig+1 + bne fps_add9 ; brif digits differ; pattern continues + lda fpa0+fpa.sig+2 + cmpa fpa1+fpa.sig+2 + bne fps_add9 + lda fpa0+fpa.sig+3 + cmpa fpa1+fpa.sig+3 + bne fps_add9 + lda fpa0+fpa.sig+4 + cmpa fpa1+fpa.sig+4 + bne fps_add9 + lda fpa0+fpa.sig+5 + cmpa fpa1+fpa.sig+5 ; don't have to check other extras; only one set will be set +fps_add9 bhs fps_add10 ; brif first operand is bigger + ldx #fpa0 ; point to first operand as smaller + lda fpa1+fpa.sign ; set result sign to that of larger operand + sta fpa0+fpa.sign +fps_add10 ldd #0x9999 ;* calculate 9's complement of smaller operand (X must already point to it) + subd fpa.sig,x ;* we'll complete the 10's complement by setting C on the way into the + std fpa.sig,x ;* addition sequence + ldd #0x9999 + subd fpa.sig+2,x + std fpa.sig+2,x + ldd #0x9999 + subd fpa.sig+4,x + std fpa.sig+4,x + ldd #0x9999 + subd fpa.sig+6,x + std fpa.sig+6,x + ldd #0x9999 + subd fpa.sig+8,x + std fpa.sig+8,x + coma ; set carry going into add +fps_add11 lda fpa0+fpa.sig+9 ; do the addition (10 bytes) + adca fpa1+fpa.sig+9 + daa + sta fpa0+fpa.sig+9 + lda fpa0+fpa.sig+8 + adca fpa1+fpa.sig+8 + daa + sta fpa0+fpa.sig+8 + lda fpa0+fpa.sig+7 + adca fpa1+fpa.sig+7 + daa + sta fpa0+fpa.sig+7 + lda fpa0+fpa.sig+6 + adca fpa1+fpa.sig+6 + daa + sta fpa0+fpa.sig+6 + lda fpa0+fpa.sig+5 + adca fpa1+fpa.sig+5 + daa + sta fpa0+fpa.sig+5 + lda fpa0+fpa.sig+4 + adca fpa1+fpa.sig+4 + daa + sta fpa0+fpa.sig+4 + lda fpa0+fpa.sig+3 + adca fpa1+fpa.sig+3 + daa + sta fpa0+fpa.sig+3 + lda fpa0+fpa.sig+2 + adca fpa1+fpa.sig+2 + daa + sta fpa0+fpa.sig+2 + lda fpa0+fpa.sig+1 + adca fpa1+fpa.sig+1 + daa + sta fpa0+fpa.sig+1 + lda fpa0+fpa.sig + adca fpa1+fpa.sig + daa + sta fpa0+fpa.sig + ror fpa1+fpa.exp ;* do sign flag and carry differ? will set V if so; we will never have + rol fpa1+fpa.exp ;* a real carry on the subtract case but might on the add case + bvc fps_normalize ; brif we didn't overflow significand - we can normalize the result + inc fpa0+fpa.exp ; bump exponent to account for overflow + lbmi OVERROR ; brif it overflowed + ror fpa0+fpa.sig ; do a bit dance + ror fpa0+fpa.sig+1 + ror fpa0+fpa.sig+2 + ror fpa0+fpa.sig+3 + ror fpa0+fpa.sig+4 + ror fpa0+fpa.sig+5 + ror fpa0+fpa.sig+6 + ror fpa0+fpa.sig+7 + ror fpa0+fpa.sig+8 + ror fpa0+fpa.sig+9 + ror fpa0+fpa.sig + ror fpa0+fpa.sig+1 + ror fpa0+fpa.sig+2 + ror fpa0+fpa.sig+3 + ror fpa0+fpa.sig+4 + ror fpa0+fpa.sig+5 + ror fpa0+fpa.sig+6 + ror fpa0+fpa.sig+7 + ror fpa0+fpa.sig+8 + ror fpa0+fpa.sig+9 + ror fpa0+fpa.sig + ror fpa0+fpa.sig+1 + ror fpa0+fpa.sig+2 + ror fpa0+fpa.sig+3 + ror fpa0+fpa.sig+4 + ror fpa0+fpa.sig+5 + ror fpa0+fpa.sig+6 + ror fpa0+fpa.sig+7 + ror fpa0+fpa.sig+8 + ror fpa0+fpa.sig+9 + ror fpa0+fpa.sig + ror fpa0+fpa.sig+1 + ror fpa0+fpa.sig+2 + ror fpa0+fpa.sig+3 + ror fpa0+fpa.sig+4 + ror fpa0+fpa.sig+5 + ror fpa0+fpa.sig+6 + ror fpa0+fpa.sig+7 + ror fpa0+fpa.sig+8 + ror fpa0+fpa.sig+9 + ; fall through to normalization routine +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Normalize a floating point value in fpa0 with extra precision digits at fpaextra (up to 5 bytes) and return the +; packed result at (Y) +; +; The first step is to shift the significand left until a nonzero digit is in the leftmost position. This will bring +; in extra precision digits from fpaextra through fpaextra4 until either 10 shifts are done or the decimal exponent +; underflows. In both of those cases, the result will be zero. +; +; Once the leftmost digit is nonzero, the leftmost extra precision digit is checked to see if it is >= 5. If so, the +; significand will have 1 added to it. If that triggers a carry, the decimal exponent will be incremented and the +; significand will have its leftmost digit set to 1. +; +; This will trigger an overflow if the decimal exponent exceeds the allowed range. +fps_normalize clrb ; initialize the exponent adjustment counter +fps_normalize0 lda fpa0+fpa.sig ; do we have a nonzero digit in the first pair? + bne fps_normalize1 ; brif so + lda fpa0+fpa.sig+1 ; shift everything left 2 spaces + sta fpa0+fpa.sig + lda fpa0+fpa.sig+2 + sta fpa0+fpa.sig+1 + lda fpa0+fpa.sig+3 + sta fpa0+fpa.sig+2 + lda fpa0+fpa.sig+4 + sta fpa0+fpa.sig+3 + lda fpaextra + sta fpa0+fpa.sig+4 + lda fpaextra+1 + sta fpaextra + lda fpaextra+2 + sta fpaextra+1 + lda fpaextra+3 + sta fpaextra+2 + lda fpaextra+4 + sta fpaextra+3 + clr fpaextra+4 + subb #2 ; account for two digit positions + cmpb #-10 ; have we shifted the whole set of digits (assumes originally normalized) + bgt fps_normalize0 ; brif not +fps_normalize4 clr fpa0+fpa.exp ; set result to zero + clr fpa0+fpa.sign +fps_normalize5 leax ,y ; point to return location + jmp fps_pack0 ; return result +fps_normalize1 bita #0xf0 ; is the high digit zero? + bne fps_normalize3 ; brif not + lsl fpaextra ; only need to shift one extra position here since there won't be more shifts + rol fpa0+fpa.sig+4 + rol fpa0+fpa.sig+3 + rol fpa0+fpa.sig+2 + rol fpa0+fpa.sig+1 + rol fpa0+fpa.sig + lsl fpaextra + rol fpa0+fpa.sig+4 + rol fpa0+fpa.sig+3 + rol fpa0+fpa.sig+2 + rol fpa0+fpa.sig+1 + rol fpa0+fpa.sig + lsl fpaextra + rol fpa0+fpa.sig+4 + rol fpa0+fpa.sig+3 + rol fpa0+fpa.sig+2 + rol fpa0+fpa.sig+1 + rol fpa0+fpa.sig + lsl fpaextra + rol fpa0+fpa.sig+4 + rol fpa0+fpa.sig+3 + rol fpa0+fpa.sig+2 + rol fpa0+fpa.sig+1 + rol fpa0+fpa.sig + decb ; account for digit shift +fps_normalize3 addb fpa0+fpa.exp ; adjust exponent + stb fpa0+fpa.exp + ble fps_normalize4 ; brif we underflowed to zero + ldb fpaextra ; get extra precision digit + andb #0xf0 ; keep only the highest extra precision digit + cmpb #0x50 ; do we need to round? + blo fps_normalize5 ; brif not + lda fpa0+fpa.sig+4 ; bump low digits + adda #1 + daa + sta fpa0+fpa.sig+4 + bcc fps_normalize5 ; brif no carry - done + lda fpa0+fpa.sig+3 ; keep going until all significand bytes handled + adda #1 + daa + sta fpa0+fpa.sig+3 + bcc fps_normalize5 + lda fpa0+fpa.sig+2 + adda #1 + daa + sta fpa0+fpa.sig+2 + bcc fps_normalize5 + lda fpa0+fpa.sig+1 + adda #1 + daa + sta fpa0+fpa.sig+1 + bcc fps_normalize5 + lda fpa0+fpa.sig + adda #1 + daa + sta fpa0+fpa.sig + bcc fps_normalize5 + lda #0x10 ; overflowed the significand - shift a 1 digit in + sta fpa0+fpa.sig + inc fpa0+fpa.exp ; and bump exponent to reflect that + bpl fps_normalize5 ; brif we didn't overflow - return result OVERROR ldb #err_ov ; raise overflow - jmp ERROR -fps_add9 ldd fpa0+fps.sig+2 ; subtract low word - subd fpa1+fps.sig+2 - std fpa0+fps.sig+2 - ldd fpa0+fps.sig ; and now the high word - sbcb fpa1+fps.sig+1 - sbca fpa1+fps.sig - std fpa0+fps.sig - bcc fps_add10 ; brif we didn't carry - com fpa0+fps.sign ; flip result sign - other number was bigger - com fpa0+fps.sig+3 ; negate two's complement result to be just the magnitude - com fpa0+fps.sig+2 - com fpa0+fps.sig+1 - com fpa0+fps.sig - ldx fpa0+fps.sig+2 ; add 1 to complete negation - leax 1,x - stx fpa0+fps.sig+2 - bne fps_add10 ; brif carry doesn't propagate - ldx fpa0+fps.sig ; propagate carry - leax 1,x - stx fpa0+fps.sig ; NOTE: this cannot carry because magnitude got smaller -fps_add10 clra ; initialize exponent offset -fps_add11 ldb fpa0+fps.sig ; do we have nonzero bits in high byte of significand? - bne fps_add13 ; brif so - ldb fpa0+fps.sig+1 ; shift left 8 bits - stb fpa0+fps.sig - ldb fpa0+fps.sig+2 - stb fpa0+fps.sig+1 - ldb fpa0+fps.sig+3 - stb fpa0+fps.sig+2 - ldb fpa0extra ; and extra precision bits - stb fpa0+fps.sig+3 - clr fpa0extra - addb #8 ; account for number of bits shifted - cmpb #40 ; done 40 bits? - blo fps_add11 ; brif not - see if we have more bits to shift - clr fpa0+fps.exp ; number underflowed to zero - set it so - clr fpa0+fps.sign - clr fpa0+fps.sig - clr fpa0+fps.sig+1 - clr fpa0+fps.sig+2 - clr fpa0+fps.sig+3 - bra fps_add16 ; go return result -fps_add12 inca ; account for a bit shift - lsl fpa0extra ; shift significand and extra bits left - rol fpa0+fps.sig+3 - rol fpa0+fps.sig+2 - rol fpa0+fps.sig+1 - rol fpa0+fps.sig -fps_add13 bpl fps_add12 ; brif we haven't normalized yet -fps_add14 ldb fpa0extra ; do we need to round? - bpl fps_add16 ; brif not - ldx fpa0+fps.sig+2 ; add one to significand - leax 1,x - stx fpa0+fps.sig+2 - bne fps_add16 ; brif no carry - ldx fpa0+fps.sig ; bump the upper word of significand - leax 1,x - stx fpa0+fps.sig - bne fps_add16 -fps_add15 inc fpa0+fps.exp ; bump exponent - beq OVERROR ; brif it overflowed - lda #0x80 ; set high bit of significand (rest is zero) - sta fpa0+fps.sig -fps_add16 ldd fpa0 ; copy result to destination - std ,y - ldd fpa0+2 - std 2,y - ldd fpa0+4 - std 4,y + jmp ERROR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Single precision BCD multiply (X) by (U) +; +; Calculate: (X) * (U) -> (Y) +; +; First, the routine calculates the new exponent and sign. The exponents simply add together while the sign is the +; exclusive OR of the two input signs. That gives negative for differing signs and positive for matching signs. +; Then the multiplication of the significands works by initializing an accumulator large enough to hold twice as +; many digits as each significand to zeroes. It also needs an extra byte to the left to handle overflow for each +; digit operation. It then works through each digit of the multiplier and uses that as a count to add the +; multiplicand to the *high digits* of the accumulator. Then, before moving to the next digit, the accumulator and +; mulltiplier are shifted right and the process is repeated. Once the process is done, the result required for the +; floating point calculation will be the high 10 digits of the accumulator with extra digits stored in the +; remaining bytes. +; +; Once the multiplication is complete, the significand is normalized. See above. +fps_mul jsr fps_copyargs ; fetch arguments + lda fpa0+fpa.exp ; first argumennt zero? + bne fps_mul0 ; brif not + leax ,y ; return first argument (0) + jmp fps_pack0 +fps_mul0 lda fpa1+fpa.exp ; second argument zero? + bne fps_mul1 ; brif not + leax ,y ; return second argument (0) + jmp fps_pack1 +fps_mul1 lda fpa0+fpa.sign ; calculate result sign + eora fpa1+fpa.sign + sta fpa0+fpa.sign ; result will go into fpa0 + lda fpa0+fpa.exp ; fetch exponent of multiplicand + suba #64 ; remove bias + sta fpa0+fpa.exp ; save it + lda fpa1+fpa.exp ; get exponent of multplier + suba #64 ; remove bias + adda fpa0+fpa.exp ; add exponents + cmpa #63 ; did we overflow upward? + bgt OVERROR ; brif so + cmpa #-63 ; did we underflow? + bhs fps_mul3 ; brif not +fps_mul2 clr fpa0+fpa.exp ; return zero if we underflow + leax ,y + jmp fps_pack0 +fps_mul3 adda #64 ; add bias to resulting exponent + sta fpa0+fpa.exp ; save result exponent + ldd zero ;* zero out result buffer high digits (extra digits + significand and + std fpaextra ;* one extra byte to make sure lowest extra digit is zero) + std fpaextra+2 + std fpaextra+4 + sta fpaextra+6 + ldb fpa1+fpa.sig+4 ;* do each byte of multiplier significand in sequence + bsr fps_mul4 + ldb fpa1+fpa.sig+3 + bsr fps_mul4 + ldb fpa1+fpa.sig+2 + bsr fps_mul4 + ldb fpa1+fpa.sig+1 + bsr fps_mul4 + ldb fpa1+fpa.sig + bsr fps_mul4 + ldd fpaextra+1 ; copy result into fpa0 significand (overflow byte will be zero) + std fpa0+fpa.sig + ldd fpaextra+3 + std fpa0+fpa.sig+2 + ldd fpaextra+5 + sta fpa0+fpa.sig+4 + ldd fpaextra+7 + std fpa0+fpa.sig+6 + ldd fpaextra+9 + std fpa0+fpa.sig+8 + jmp fps_normalize ; go normalize the result +fps_mul4 bne fps_mul6 ; brif at least one digit is nonzero + ldd fpaextra+8 ; shift right by 8 bits + std fpaextra+9 + ldd fpaextra+6 + std fpaextra+7 + ldd fpaextra+4 + std fpaextra+5 + ldd fpaextra+2 + sta fpaextra+3 + ldd fpaextra + std fpaextra+1 + clr fpaextra + rts +fps_mul5 bsr fps_mul11 ; add multiplicand to accumulator + decb ; account for iteration +fps_mul6 bitb #0x0f ; done everything for this digit? + bne fps_mul7 + bsr fps_mul9 ; shift accumulator + bra fps_mul8 +fps_mul7 bsr fps_mul11 ; add multiplicand to accumulator + subb #0x10 ; account for iteration +fps_mul8 bitb #0xf0 ; done all iterations? + bne fps_mul7 ; brif not +fps_mul9 lsr fpaextra ; shift result + ror fpaextra+1 + ror fpaextra+2 + ror fpaextra+3 + ror fpaextra+4 + ror fpaextra+5 + ror fpaextra+6 + ror fpaextra+7 + ror fpaextra+8 + ror fpaextra+9 + ror fpaextra+10 + lsr fpaextra + ror fpaextra+1 + ror fpaextra+2 + ror fpaextra+3 + ror fpaextra+4 + ror fpaextra+5 + ror fpaextra+6 + ror fpaextra+7 + ror fpaextra+8 + ror fpaextra+9 + ror fpaextra+10 + lsr fpaextra + ror fpaextra+1 + ror fpaextra+2 + ror fpaextra+3 + ror fpaextra+4 + ror fpaextra+5 + ror fpaextra+6 + ror fpaextra+7 + ror fpaextra+8 + ror fpaextra+9 + ror fpaextra+10 + lsr fpaextra + ror fpaextra+1 + ror fpaextra+2 + ror fpaextra+3 + ror fpaextra+4 + ror fpaextra+5 + ror fpaextra+6 + ror fpaextra+7 + ror fpaextra+8 + ror fpaextra+9 + ror fpaextra+10 + rts +fps_mul11 lda fpa0+fpa.sig+4 ; add digits starting at the bottom + adda fpaextra+5 + daa + sta fpaextra+5 + lda fpa0+fpa.sig+3 + adca fpaextra+4 + daa + sta fpaextra+4 + lda fpa0+fpa.sig+2 + adca fpaextra+3 + daa + sta fpaextra+3 + lda fpa0+fpa.sig+1 + adca fpaextra+2 + daa + sta fpaextra+2 + lda fpa0+fpa.sig + adca fpaextra+1 + daa + sta fpaextra+1 + bcc fps_mul12 + inc fpaextra ; handle carry out +fps_mul12 rts +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Single precision BCD divide +; +; Calculate (X) ÷ (U) -> (Y) +; +; First, calculate the result exponent by subtracting the decimal exponent of (U) from that of (X). If the result goes +; above the exponent range, raise overflow. If it goes below the exponent range, return zero. +; +; The calculate the result sign the same as for multiplcation. Also calculate the 10's complement of the divisor. +; +; First, we copy the divdend into the extra precision digits in fpa1 since they aren't needed for anything else +; here. Then we set the desired nonzero digit counter to 11 to indicate 10 significant digits plus a digit for +; rounding and set the flag indicating a nonzero digit was seen to zero to indicate it hasn't been seen. Also set +; the extra carry byte to zero. Then we run through the digit pair routine 10 times to set 20 digits of the +; significand and extra precision for fpa0. The digit pair routine just calls the digit loop twice while doing +; some bookkeeping. +; +; We also pre-cacluate the 9's complement of the divisor and store that in fpaextra to save time during the +; subtraction steps. +; +; For each digit, the process is as follows: +; +; 1. If the desired digit count is zero, do nothing and just return a zero for the quotient digit. +; 2. Initialize the quotient digit to 0 +; 3. If the extra carry is nonzero, we know the result "goes" so go on to step 6 +; 4. Compare the divisor to the residue; this is faster on average than doing the subtraction and then +; reversing the last subtraction loop. +; 5. If the divisor goes *exactly* into the residue, bump the quotient digit, clear the desired digit count +; and return the quotient digit; we have no need to do anything more +; 6. If the divisor does go, subtract the divisor from the residue, bump the digit, and go back to step 3 +; 7. If the resulting digit is nonzero, set the "nonzero digit seen" flag to 1 (exactly one) +; 8. Subtract the nonzero digit flag from the desired digits flag so after enough digits, we stop doing the +; division loops. +; 9. Return the quotient digit. +fps_div jsr fps_copyargs ; get arguments to the accumulators + lda fpa1+fpa.exp ; get divisor exponent + bne fps_div0 ; brif not zero +DIV0ERROR ldb #err_div0 ; raise division by zero + jmp ERROR +fps_div0 suba #64 ; remove bias + sta fpa1+fpa.exp ; save for later calculation + lda fpa0+fpa.exp ; get quotient exponent + bne fps_div2 ; brif not zero - we have to do work +fps_div1 clr fpa0+fpa.exp ; make sure result is zero + leax ,y ; return zero result + jmp fps_pack0 +fps_div2 suba #64 ; remove bias + suba fpa1+fpa.exp ; subtract divisor exponent + cmpa #64 ; did we overflow upward? + lbge OVERROR ; brif so + cmpa #-64 ; did we overflow downward (underflow)? + bls fps_div1 ; brif we underflow + adda #64 ; add back the bias + sta fpa0+fpa.exp ; set result exponent + lda fpa0+fpa.sign ; calculate result sign (XOR of argument signs) + eora fpa1+fpa.sign + sta fpa0+fpa.sign + ldd fpa0+fpa.sig ; initialize residue to dividend + std fpa1+fpa.sig + ldd fpa0+fpa.sig+2 + std fpa1+fpa.sig+2 + lda fpa0+fpa.sig+4 + sta fpa1+fpa.sig+4 + ldd #11 ; initialize digit counter and nonzero seen flag + std fpaextra+5 + sta fpaextra+7 ; set ongoing extra carry digits to zero + ldd #0x9999 ;* calculate 9's complement of divisor for later; we'll introduce a carry + subd fpa1+fpa.sig ;* to the first byte to complete the 10's complement's +1 to save doing + std fpaextra ;* the extra work here + ldd #0x9999 + subd fpa1+fpa.sig+2 + std fpaextra+2 + lda #0x99 + suba fpa1+fpa.sig+4 + sta fpaextra+4 + bsr fps_div3 ; calculate the quotient byte by byte + stb fpa0+fpa.sig + bsr fps_div3 + stb fpa0+fpa.sig+1 + bsr fps_div3 + stb fpa0+fpa.sig+2 + bsr fps_div3 + stb fpa0+fpa.sig+3 + bsr fps_div3 + stb fpa0+fpa.sig+4 + bsr fps_div3 + stb fpa0+fpa.sig+5 + bsr fps_div3 + stb fpa0+fpa.sig+6 + bsr fps_div3 + stb fpa0+fpa.sig+7 + bsr fps_div3 + stb fpa0+fpa.sig+8 + bsr fps_div3 + stb fpa0+fpa.sig+9 + jmp fps_normalize ; go normalize the result and return +fps_div3 bsr fps_div5 ; do a digit + lslb ; shift it over + lslb + lslb + lslb + sta fpaextra+8 ; save it + bsr fps_div5 ; do next digit + addb fpaextra+8 ; combine the two quotient digits +fps_div4 rts +fps_div5 ldb fpaextra+6 ; do we even need to do the division? + beq fps_div4 ; brif not - return 0 + clrb ; initialize quotient digit +fps_div6 lda fpaextra+7 ; did we have a carry last time? + bne fps_div7 ; brif so - we know it "goes" + lda fpa1+fpa.sig ; is the divisor less than the dividend residue? + cmpa fpa1+fpa.sig+5 + bhi fps_div8 ; brif high byte is larger than residue + lda fpa1+fpa.sig+1 ; and keep going for all 5 bytes + cmpa fpa1+fpa.sig+6 + bhi fps_div8 + lda fpa1+fpa.sig+2 + cmpa fpa1+fpa.sig+7 + bhi fps_div8 + lda fpa1+fpa.sig+3 + cmpa fpa1+fpa.sig+8 + bhi fps_div8 + lda fpa1+fpa.sig+4 + cmpa fpa1+fpa.sig+9 + bhi fps_div8 ; brif divisor is greater than the residue + bne fps_div7 ; brif it didn't go exactly + incb ; bump quotient for this "go" + clr fpaextra+6 ; indicate no more digits needed + rts +fps_div7 coma ; set carry to complete 10's complement of divisor + lda fpa1+fpa.sig+9 ; do the "subtraction" + adca fpaextra+4 + daa + sta fpa1+fpa.sig+9 + lda fpa1+fpa.sig+8 + adca fpaextra+3 + daa + sta fpa1+fpa.sig+8 + lda fpa1+fpa.sig+7 + adca fpaextra+2 + daa + sta fpa1+fpa.sig+7 + lda fpa1+fpa.sig+6 + adca fpaextra+1 + daa + sta fpa1+fpa.sig+6 + lda fpa1+fpa.sig+5 + adca fpaextra + daa + sta fpa1+fpa.sig+5 + lda fpaextra+7 ; and handle the carry byte + adca #0x99 + sta fpaextra+7 + incb ; bump digit count + bra fps_div6 ; go see if we need another subtraction +fps_div8 tstb ; nonzero digit? + beq fps_div9 ; brif not + lda #1 ; set nonzero flag + sta fpaextra+5 +fps_div9 lda fpaextra+6 ; adjust digit count + suba fpaextra+5 + sta fpaextra+6 + lsl fpa1+fpa.sig+9 ; shift residue one decimal digit + rol fpa1+fpa.sig+8 + rol fpa1+fpa.sig+7 + rol fpa1+fpa.sig+6 + rol fpa1+fpa.sig+5 + rol fpaextra+7 + lsl fpa1+fpa.sig+9 + rol fpa1+fpa.sig+8 + rol fpa1+fpa.sig+7 + rol fpa1+fpa.sig+6 + rol fpa1+fpa.sig+5 + rol fpaextra+7 + lsl fpa1+fpa.sig+9 + rol fpa1+fpa.sig+8 + rol fpa1+fpa.sig+7 + rol fpa1+fpa.sig+6 + rol fpa1+fpa.sig+5 + rol fpaextra+7 + lsl fpa1+fpa.sig+9 + rol fpa1+fpa.sig+8 + rol fpa1+fpa.sig+7 + rol fpa1+fpa.sig+6 + rol fpa1+fpa.sig+5 + rol fpaextra+7 rts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Single precision multiplication (X) × (U) to (Y) -fps_mul lda fps.exp,x ; is first operand zero? - beq fps_mul0 ; brif so - return zero - lda fps.exp,u ; is second operand zero? - bne fps_mul1 ; brif not - have to do the multiply -fps_mul0 ldd zero ; return zero result - std ,y - std 2,y - std 4,y +; Convert a floating point number in val0 to a string in strbuff + +; The maximum size of a string generated here is 19 bytes representing 10 significant digits, 5 leading zeroes after the +; decimal point, the decimal point, a leading zero, a leading sign, and a trailing NUL. +fps_toascii ldu #strbuff+1 ; point to output buffer + lda #0x20 ; set sign to "blank" + sta -1,u + clr fpaextra+1 ; set decimal point offset; zero means none + clr fpaextra+3 ; disable "E" notation + ldx #val0+val.value ; unpack value to fpa0 so we can mess with it + jsr fps_unpack0 + lda fpa0+fpa.exp ; check exponent + bne fps_toascii0 ; brif not zero + lda #'0 ; make the number zero and print it + sta ,u + clr 1,u rts -fps_mul1 jsr fps_copyinputs ; copy input operands - lda fpa0+fps.sign ; calculate sign of result - xor of the two signs - eora fpa1+fps.sign - sta fpa0+fps.sign ; save result sign - lda fpa0+fps.exp ; get exponent of first value - adda fpa1+fps.exp ; calculate new exponent; also cancels the bias - rora ; set V if C and N differ - rola - bvc fps_mul3 ; brif maybe an overflow - adda #0x80 ; add back the bias - sta fpa0+fps.sign ; save new sign - beq fps_mul0 ; brif we underflowed - zero out sign -; This does a shift-and-add multiplication algorithm. This is slower than an equivalent using MUL but is smaller. -; The high order bytes will be left in fpa0 with the low order bytes in fpa0extra and fpa0extra[1-3]. The low order -; bytes are kept for the odd case where extra precision is useful. Uses fpa0extra[4-7] as temporaries. -fps_mul2 ldd zero ; zero out temporary bytes - std fpa0extra4 - std fpa0extra6 - ldb fpa0+fps.sig+3 ; multiply by low byte of fpa0 - bsr fps_mul4 - ldb fpa0extra ; move low bites - stb fpa0extra3 - ldb fpa0+fps.sig+2 ; multiply by next higher byte - bsr fps_mul4 - ldb fpa0extra ; move low bits - stb fpa0extra2 - ldb fpa0+fps.sig+1 ; and again for the next higher byte - bsr fps_mul4 - ldb fpa0extra - stb fpa0extra1 - ldb fpa0+fps.sig ; and the high order byte - bsr fps_mul4 - ldd fpa0extra4 ; copy high order product bits to result - std fpa0+fps.sig - ldd fpa0extra6 - std fpa0+fps.sig+2 - jmp fps_add10 ; go normalize the result and return -fps_mul3 bpl fps_mul0 ; brif we underflowed - return 0 - jmp OVERROR ; raise overflow -fps_mul4 bne fps_mul5 ; brif not multiply by zero - lda fpa0+fps.sig+3 ; shift 8 bits right - sta fpa0extra - lda fpa0+fps.sig+2 - sta fpa0+fps.sig+3 - lda fpa0+fps.sig+1 - sta fpa0+fps.sig+2 - lda fpa0+fps.sig - sta fpa0+fps.sig+1 - clr fpa0+fps.sig -fps_mul8 rts -fps_mul5 coma ; set C -fps_mul6 lda fpa0extra4 ; get high byte of result bytes - rorb ; is multiplier bit set? - beq fps_mul8 ; brif 8 shifts done (C set above makes sure of that) - bcc fps_mul7 ; brif bit not set - don't do addition - lda fpa0extra7 ; add multiplier (fpa1) to result - adda fpa1+fps.sig+3 - sta fpa0extra7 - lda fpa0extra6 - adca fpa1+fps.sig+2 - sta fpa0extra6 - lda fpa0extra5 - adca fpa1+fps.sig+1 - sta fpa0extra5 - lda fpa0extra4 - adca fpa1+fps.sig -fps_mul7 rora ; rotate carry in (from add or 0) - sta fpa0extra4 - ror fpa0extra5 - ror fpa0extra6 - ror fpa0extra7 - ror fpa0extra ; and into the extra precision bits - clra ; clear carry - so shift above will terminate - bra fps_mul6 ; go do another bit -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Divide (X) by 10 in place -fps_const10 fcb 0x83,0xa0,0x00,0x00,0x00,0x00 ; single precision unpacked constant 10 -fps_div10 ldu #fps_const10 ; point to constant 10 - leay ,x ; put output in input - ; fall through to regular division -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Single precision division (X) ÷ (U) -> (Y) -; -; This is basically the same algorithm used in the Color Basic ROM -fps_div lda fps.exp,u ; is divisor 0? - bne fps_div0 -DIV0ERROR ldb #err_div0 ; raise division by zero - jmp ERROR -fps_div0 lda fps.exp,x ; is dividend 0? - lbeq fps_mul0 ; brif so - return 0 - jsr fps_copyinputs ; copy input values - lda fpa0+fps.sign ; calculate result sign - xor of operand signs - eora fpa1+fps.sign - sta fpa0+fps.sign ; save result sign - lda fpa1+fps.exp ; get divisor exponent - nega ; negate for subtraction - adda fpa0+fps.exp ; subtract it from dividend exponent - rora ; set V if C and N differ - rola - bvc fps_mul3 ; brif overflow or underflow - adda #0x80 ; add back the bias - sta fpa0+fps.sign ; save new sign - lbeq fps_mul0 ; brif we underflowed - zero out sign - inca ; bump exponent - why?? related to the bias stuff above? - lbeq OVERROR ; brif it overflows - sta fpa0+fps.exp ; save result exponent - ldx #fpa0extra4 ; point to temporary storage bytes for quotient - ldb #4 ; counter for 4 significand bytes and one extra partial byte - stb fpa0extra1 ; save counter since we need both accumulators - ldb #1 ; shift counter flag and quotient byte -fps_div1 lda fpa1+fps.sig ; set C if fpa0 significand <= fpa1 significand - cmpa fpa0+fps.sig - bne fps_div2 - lda fpa1+fps.sig+1 - cmpa fpa0+fps.sig+1 - bne fps_div2 - lda fpa1+fps.sig+2 - cmpa fpa0+fps.sig+2 - bne fps_div2 - lda fpa1+fps.sig+3 - cmpa fpa0+fps.sig+3 - bne fps_div2 - coma ; set C if values are the same -fps_div2 tfr cc,a ; save C for later, C clear if fpa1 > fpa0 - rolb ; shift carry into quotient - bcc fps_div3 ; brif carry clear - we haven't done 8 bits yet - stb ,x+ ; save quotient byte after a full set of bits - dec fpa0extra1 ; have we done all the bytes? - bmi fps_div7 ; brif all bytes plus extra precision - done all - beq fps_div6 ; brif all main sigificand bytes - do a couple extra bits - ldb #1 ; reset the bit counter flag -fps_div3 tfr cc,a ; get back original carry from compare - bcs fps_div5 ; brif it "went" -fps_div4 lsl fpa0+fps.sig+3 ; shift dividend left - rol fpa0+fps.sig+2 - rol fpa0+fps.sig+1 - rol fpa0+fps.sig - bcs fps_div2 ; brif it carries - next bit "goes" - bmi fps_div1 ; check magnitudes of next bit - bra fps_div2 ; carry clear - check another bit -fps_div5 lda fpa0+fps.sig+3 ; subtract divisor from dividend bits - suba fpa1+fps.sig+3 - sta fpa0+fps.sig+3 - lda fpa0+fps.sig+2 - sbca fpa1+fps.sig+2 - sta fpa0+fps.sig+2 - lda fpa0+fps.sig+1 - sbca fpa1+fps.sig+1 - sta fpa0+fps.sig+1 - lda fpa0+fps.sig - sbca fpa1+fps.sig - sta fpa0+fps.sig - bra fps_div4 ; now do the bit shift to line things up -fps_div6 ldb #0x40 ; only do two bits of extra precision byte - bra fps_div2 ; go handle these bitsd -fps_div7 rorb ; get extra quotient bits to bit 7,6 and bit 5 set - rorb - rorb - stb fpa0extra ; save extra precision bits - ldd fpa0extra4 ; copy result bits to fpa0 - std fpa0+fps.sig - ldd fpa0extra6 - std fpa0+fps.sig+2 - jmp fps_add10 ; go normalize the result -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Pack single precision number at (X) to (U) -fps_pack lda fps.sign,x ; get sign of number (will be 0 for 0) - ora #0x7f ; make sure low bits are set for merging - anda fps.sig,x ; merge with high bits of significand - ldb fps.sig+1,x ; get upper mid bits of significand - std fps.sig,u - ldd fps.sig+2,x - std fps.sig+2,u - lda fps.exp,x - sta fps.exp,u +fps_toascii0 lda fpa0+fpa.sign ; negative? + bpl fps_toascii1 ; brif not + lda #'- ; negative sign + sta -1,u +fps_toascii1 clra ; set number of significant digits that exist + ldx #fpa0+fpa.sig ; point to significand +fps_toascii2 ldb ,x+ ; get digit pair + bitb #0xf0 ; is left digit set? + beq fps_toascii3 ; brif not - we've counted all the significant digits + inca ; count digit + bitb #0x0f ; is right digit set? + beq fps_toascii3 ; brif not - we've counted all the sigificant digits + deca ; done all significant digits? + bne fps_toascii2 ; brif not +fps_toascii3 stb fpaextra ; save significant digits that exist + lda fpa0+fpa.exp ; fetch exponent + suba #64 ; remove bias + sta fpa0+fpa.exp ; save for later + bpl fps_toascii14 ; brif no leading zeroes + suba fpaextra ; get number of significant digits plus decimal count + cmpa #-15 ; do we end up with too many digits (leading 0 + significant digits) + blt fps_toascii15 ; brif too many - do scientific notation + ldd #'0*256+'. ; put a leading "0." + std ,u++ + ldb fpa0+fpa.exp ; get exponent back + addb fpaextra ; add number of available significant digits + beq fps_toascii5 ; brif no leading digits +fps_toascii4 sta ,u+ ; put a zero + incb ; done all + bne fps_toascii4 ; brif not +fps_toascii5 ldx #fpa0+fpa.sig ; point to significand + ldd #0xf000 ; set digit mask and counter + sta fpaextra+2 +fps_toascii6 lda ,x ; get digit pair + anda fpaextra+2 ; keep the desired digit + bita #0xf0 ; is it the left hand digit? + beq fps_toascii7 ; brif not or digit is 0 + lsra ; right justify digit + lsra + lsra +fps_toascii7 adda #0x30 ; turn it into ascii + sta ,u+ ; stuff it in the output + dec fpaextra ; done all digits? + beq fps_toascii9 ; brif so + cmpa fpaextra+1 ; are we at the decimal point? + bne fps_toascii8 ; brif not + lda #'. ; put a decimal + sta ,u+ +fps_toascii8 com fpaextra+2 ; flip digit mask + bpl fps_toascii6 ; handle another digit + leax 1,x ; move to next digit byte + bra fps_toascii6 ; now go handle next digit +fps_toascii9 ldb fpaextra+3 ; get decimal exponent to display + beq fps_toascii13 + lda #'E ; output "E" + sta ,u+ + tstb ; negative? + bpl fps_toascii10 ; brif not + negb ; positivize it + lda #'- ; put a minus for the exponent + sta ,u+ +fps_toascii10 cmpb #10 ; do we have two digits for exponent? + blo fps_toascii12 ; brif not + lda #0x2f ; initialize left digit +fps_toascii11 inca ; bump digit + subb #10 ; are we at the right digit? + bhi fps_toascii11 ; brif not yet + addb #10 ; undo extra subtraction + sta ,u+ ; save left digit +fps_toascii12 addb #0x30 ; turn right digit to ascii + stb ,u+ ; save right digit +fps_toascii13 clr ,u ; put a NUL at the end of the result rts -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Unpack single precision number at (X) to (U) -fps_unpack lda fps.exp,x ; get exponent of value - beq fps_unpack0 ; brif value is zero - sta fps.exp,u - ldb fps.sig,x ; get high byte of significand - sex ; make sign value in A - sta fps.sign,u ; set sign in result - ldd fps.sig,x ; get high word of sifnificand - ora #0x80 ; make sure high bit is set - std fps.sig,u ; save high word in result - ldd fps.sig+2,x ; copy middle bytes of significand - std fps.sig+2,u - rts -fps_unpack0 sta fps.exp,u ; zero out destination - sta fps.sig,u - sta fps.sig+1,u - sta fps.sig+2,u - sta fps.sig+3,u - sta fps.sign,u - rts +fps_toascii14 cmpa #9 ; is it in range for number of significant digits? + bgt fps_toascii15 ; brif not - do scientific notation + inca ; exponent 0 has decimal point after first digit + sta fpaextra+1 ; save decimal point location + cmpa fpaextra ; is it more than the number of significant digits? + ble fps_toascii5 ; brif not - just convert the significand + sta fpaextra ; make sure we include the pre-decimal zeroes + bra fps_toascii5 ; go convert the significand +fps_toascii15 ldb #1 ; put decimal after the first digit + stb fpaextra+1 + sta fpaextra+3 ; enable the "E" notation with the correct exponent + bra fps_toascii5 ; actually convert the number *pragmapop list diff -r f959c92bc329 -r 663d8e77b579 src/int.s --- a/src/int.s Sun Oct 08 00:17:20 2023 -0600 +++ b/src/int.s Sun Oct 15 22:15:36 2023 -0600 @@ -1,6 +1,74 @@ *pragmapush list *pragma list ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Convert a signed integer in val0 to a string in strbuff +; +; The maximum size of a string generated here is 12 bytes representing 10 significant digits, a leading sign, and the +; trailing NUL. +int_toascii ldu #strbuff+1 ; point to start of digits + lda #0x20 ; default sign to space + sta -1,u + ldx #int_toascii5 ; point to digit constant table + ldd #10 ; do 10 digits, no nonzero seen yet + stb fpaextra ; save digit count + ldd val0+val.int+2 ; copy to temporary accumulator + std fpaextra+4 + ldd val0+val.int + std fpaextra+2 ; (will set N if the number is negative) + bpl int_toascii0 ; brif so + lda #'- ; negative sign + sta -1,u ; set sign + ldd zero ; negate the value + subd fpaextra+4 + std fpaextra+4 + ldd zero + sbcb fpaextra+3 + sbca fpaextra+2 + std fpaextra+2 +int_toascii0 lda #0x2f ; initialize digit (account for inc below) + sta ,u +int_toascii1 inc ,u ; bump digit + ldd fpaextra+4 ; subtract digit constnat + subd 2,x + std fpaextra+4 + ldd fpaextra+2 + sbcb 1,x + sbca ,x + std fpaextra+2 + bcc int_toascii1 ; brif we aren't at the right digit + ldd fpaextra+4 ; undo extra subtraction + addd 2,x + std fpaextra+4 + ldd fpaextra+2 + adcb 1,x + adca ,x + std fpaextra+2 + leax 4,x ; move to next constant + lda ,u ; get digit count + cmpa #'0 ; is it zero? + bne int_toascii2 ; brif not + lda fpaextra+1 ; get nonzero digit count + beq int_toascii3 ; brif we haven't already got a nonzero - don't count the zero +int_toascii2 inc fpaextra+1 ; bump nonzero digit count + leau 1,u ; move to next digit position +int_toascii3 dec fpaextra ; done all digits? + bne int_toascii0 ; brif not + ldb fpaextra+1 ; did we have any nonzero digits? + bne int_toascii4 ; brif so + leau 1,u ; move past the only zero in the output +int_toascii4 clr ,u ; NUL terminate the string + rts +int_toascii5 fqb 1000000000 ; digit place values + fqb 100000000 + fqb 10000000 + fqb 1000000 + fqb 100000 + fqb 10000 + fqb 1000 + fqb 100 + fqb 10 + fqb 1 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; 32 bit integer handling package. ; ; Negate a 32 bit integer in (X); done by subtracting it from zero @@ -69,194 +137,194 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Signed 32 bit integer multiply (X) * (U) -> (Y), overflow if exceeds signed 32 bit range int32_mul ldd val.int+2,x ; copy left operand to temporary - std fpa0+fps.sig+2 + std fpa0+fpa.sig+2 ldd val.int,x - std fpa0+fps.sig + std fpa0+fpa.sig eora val.int,u ; set sign bit in A if signs differ pshs a ; save result sign ldd val.int+2,u ; copy right operand to temporary - std fpa1+fps.sig+2 + std fpa1+fpa.sig+2 ldd val.int,u - std fpa1+fps.sig + std fpa1+fpa.sig bpl int32_mul0 ; brif right operand is positive ldd zero ; negate right operand - subd fpa1+fps.sig+2 - std fpa1+fps.sig+2 + subd fpa1+fpa.sig+2 + std fpa1+fpa.sig+2 ldd zero - sbcb fpa1+fps.sig+1 - sbca fpa1+fps.sig - std fpa1+fps.sig -int32_mul0 lda fpa0+fps.sig ; is left operand negative? + sbcb fpa1+fpa.sig+1 + sbca fpa1+fpa.sig + std fpa1+fpa.sig +int32_mul0 lda fpa0+fpa.sig ; is left operand negative? bpl int32_mul1 ; brif not ldd zero ; negate left operand - subd fpa0+fps.sig+2 - std fpa0+fps.sig+2 + subd fpa0+fpa.sig+2 + std fpa0+fpa.sig+2 ldd zero - sbcb fpa0+fps.sig+1 - sbca fpa0+fps.sig - std fpa0+fps.sig + sbcb fpa0+fpa.sig+1 + sbca fpa0+fpa.sig + std fpa0+fpa.sig int32_mul1 bsr util_mul32 ; do the actual multiplication - ldb fpa0extra ; are upper bits all zero? - orb fpa0extra1 - orb fpa0extra2 - orb fpa0extra3 + ldb fpaextra ; are upper bits all zero? + orb fpaextra+1 + orb fpaextra+2 + orb fpaextra+3 bne int32_mul4 ; brif not - overflow to floating point - ldb fpa0extra4 ; is bit 31 set? + ldb fpaextra+4 ; is bit 31 set? bpl int32_mul2 ; brif not - no overflow lda ,s ; negative result wanted? bpl int32_mul4 ; brif not - overflow to floating point andb #0x7f ; lose extra sign bit - orb fpa0extra2 ; "or" in other bytes to see if all but bit 31 are zero - orb fpa0extra6 - orb fpa0extra7 + orb fpaextra+2 ; "or" in other bytes to see if all but bit 31 are zero + orb fpaextra+6 + orb fpaextra+7 bne int32_mul4 ; brif any nonzero bits - we overflowed maximum negative number int32_mul2 ldb ,s+ ; do we want a negative result? bpl int32_mul3 ; brif not - don't negate result ldd zero ; negate result - subd fpa0extra6 - std fpa0extra6 + subd fpaextra+6 + std fpaextra+6 ldd zero - sbcb fpa0extra5 - sbca fpa0extra4 - std fpa0extra4 -int32_mul3 ldd fpa0extra4 ; copy result to destination + sbcb fpaextra+5 + sbca fpaextra+4 + std fpaextra+4 +int32_mul3 ldd fpaextra+4 ; copy result to destination std val.int,y - ldd fpa0extra6 + ldd fpaextra+6 std val.int+2,y rts int32_mul4 puls b ; get back desired sign sex ; set proper sign for floating point result - ldx #fpa0extra ; point to 64 bit unsigned result - jmp fps_fromuint64s ; go convert to floating point using the sign in A + ldx #fpaextra ; point to 64 bit unsigned result +; jmp fps_fromuint64s ; go convert to floating point using the sign in A ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; 32 bit multiply. ; -; Significands of fpa0 and fpa1, treated as unsigned, are multiplied with the product being stored in the fpa0extra +; Significands of fpa0 and fpa1, treated as unsigned, are multiplied with the product being stored in the fpaextra ; memory locations. ; ; The agorithm is simply this: zero out the result, then multiply fpa0 by each byte of fpa1 and then add the result ; to the result location. This yields a 64 bit product which is somewhat wasteful. util_mul32 ldd zero ;* zero out result bits; low 16 bits don't need to be cleared and - stb fpa0extra3 ;* upper 24 bits also don't - std fpa0extra4 - ldb fpa1+fps.sig+3 ; multiply by low byte of fpa1 - no carries possible for this iteration - lda fpa0+fps.sig+3 + stb fpaextra+3 ;* upper 24 bits also don't + std fpaextra+4 + ldb fpa1+fpa.sig+3 ; multiply by low byte of fpa1 - no carries possible for this iteration + lda fpa0+fpa.sig+3 mul - std fpa0extra6 - ldb fpa1+fps.sig+3 - lda fpa0+fps.sig+2 + std fpaextra+6 + ldb fpa1+fpa.sig+3 + lda fpa0+fpa.sig+2 mul - addd fpa0extra5 - std fpa0extra5 - ldb fpa1+fps.sig+3 - lda fpa0+fps.sig+1 + addd fpaextra+5 + std fpaextra+5 + ldb fpa1+fpa.sig+3 + lda fpa0+fpa.sig+1 mul - addd fpa0extra4 - std fpa0extra4 - ldb fpa1+fps.sig+3 - lda fpa0+fps.sig + addd fpaextra+4 + std fpaextra+4 + ldb fpa1+fpa.sig+3 + lda fpa0+fpa.sig mul - addd fpa0extra3 - std fpa0extra3 + addd fpaextra+3 + std fpaextra+3 ; Now we potentially have cascading carries at every stage; it makes more sense to handle those in a separate ; addition pass after each partial calculation. The partial calculations are identical to above. This is completely ; unrolled for speed. ldd zero ; zero out extra work bytes - std fpa0extra8 - stb fpa0extra10 - ldb fpa1+fps.sig+2 ; multiply by second low byte of fpa1 - lda fpa0+fps.sig+3 + std fpaextra+8 + stb fpaextra+10 + ldb fpa1+fpa.sig+2 ; multiply by second low byte of fpa1 + lda fpa0+fpa.sig+3 mul - std fpa0extra11 - ldb fpa1+fps.sig+2 - lda fpa0+fps.sig+2 + std fpaextra+11 + ldb fpa1+fpa.sig+2 + lda fpa0+fpa.sig+2 mul - addd fpa0extra10 - std fpa0extra10 - ldb fpa1+fps.sig+2 - lda fpa0+fps.sig+1 + addd fpaextra+10 + std fpaextra+10 + ldb fpa1+fpa.sig+2 + lda fpa0+fpa.sig+1 mul - addd fpa0extra9 - std fpa0extra9 - ldb fpa1+fps.sig+2 - lda fpa0+fps.sig + addd fpaextra+9 + std fpaextra+9 + ldb fpa1+fpa.sig+2 + lda fpa0+fpa.sig mul - addd fpa0extra8 - std fpa0extra8 - ldd fpa0extra11 ; add to partial product (shifted left 8 bits) - addd fpa0extra5 - std fpa0extra5 - ldd fpa0extra9 - adcb fpa0extra4 - adca fpa0extra3 - std fpa0extra3 + addd fpaextra+8 + std fpaextra+8 + ldd fpaextra+11 ; add to partial product (shifted left 8 bits) + addd fpaextra+5 + std fpaextra+5 + ldd fpaextra+9 + adcb fpaextra+4 + adca fpaextra+3 + std fpaextra+3 ldb #0 - adcb fpa0extra8 - stb fpa0extra2 + adcb fpaextra+8 + stb fpaextra+2 ldd zero ; and do it all again for next byte of fpa1 - std fpa0extra8 - stb fpa0extra10 - ldb fpa1+fps.sig+1 - lda fpa0+fps.sig+3 + std fpaextra+8 + stb fpaextra+10 + ldb fpa1+fpa.sig+1 + lda fpa0+fpa.sig+3 mul - std fpa0extra11 - ldb fpa1+fps.sig+1 - lda fpa0+fps.sig+2 + std fpaextra+11 + ldb fpa1+fpa.sig+1 + lda fpa0+fpa.sig+2 mul - addd fpa0extra10 - std fpa0extra10 - ldb fpa1+fps.sig+1 - lda fpa0+fps.sig+1 + addd fpaextra+10 + std fpaextra+10 + ldb fpa1+fpa.sig+1 + lda fpa0+fpa.sig+1 mul - addd fpa0extra9 - std fpa0extra9 - ldb fpa1+fps.sig+1 - lda fpa0+fps.sig + addd fpaextra+9 + std fpaextra+9 + ldb fpa1+fpa.sig+1 + lda fpa0+fpa.sig mul - addd fpa0extra8 - std fpa0extra8 - ldd fpa0extra11 - addd fpa0extra4 - std fpa0extra4 - ldd fpa0extra9 - adcb fpa0extra3 - adca fpa0extra2 - std fpa0extra2 + addd fpaextra+8 + std fpaextra+8 + ldd fpaextra+11 + addd fpaextra+4 + std fpaextra+4 + ldd fpaextra+9 + adcb fpaextra+3 + adca fpaextra+2 + std fpaextra+2 ldb #0 - adcb fpa0extra8 - stb fpa0extra1 + adcb fpaextra+8 + stb fpaextra+1 ldd zero ; and the final sequence with the fpa1 high byte - std fpa0extra8 - stb fpa0extra10 - ldb fpa1+fps.sig - lda fpa0+fps.sig+3 + std fpaextra+8 + stb fpaextra+10 + ldb fpa1+fpa.sig + lda fpa0+fpa.sig+3 mul - std fpa0extra11 - ldb fpa1+fps.sig - lda fpa0+fps.sig+2 + std fpaextra+11 + ldb fpa1+fpa.sig + lda fpa0+fpa.sig+2 mul - addd fpa0extra10 - std fpa0extra10 - ldb fpa1+fps.sig - lda fpa0+fps.sig+1 + addd fpaextra+10 + std fpaextra+10 + ldb fpa1+fpa.sig + lda fpa0+fpa.sig+1 mul - addd fpa0extra9 - std fpa0extra9 - ldb fpa1+fps.sig - lda fpa0+fps.sig + addd fpaextra+9 + std fpaextra+9 + ldb fpa1+fpa.sig + lda fpa0+fpa.sig mul - addd fpa0extra8 - std fpa0extra8 - ldd fpa0extra11 - addd fpa0extra3 - std fpa0extra3 - ldd fpa0extra9 - adcb fpa0extra2 - adca fpa0extra1 - std fpa0extra1 + addd fpaextra+8 + std fpaextra+8 + ldd fpaextra+11 + addd fpaextra+3 + std fpaextra+3 + ldd fpaextra+9 + adcb fpaextra+2 + adca fpaextra+1 + std fpaextra+1 ldb #0 - adcb fpa0extra - stb fpa0extra + adcb fpaextra + stb fpaextra rts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Integer divide (X) by 10 *in place* @@ -268,58 +336,58 @@ ; 32 bit division, integer only, truncate fraction without rounding. Note that there is exactly one case where integer ; division can overflow: dividing -0x80000000 by -1 which yields 0x80000000. All other cases reduce the magnitude. int32_div ldd val.int+2,x ; copy left operand to temporary - std fpa0+fps.sig+2 + std fpa0+fpa.sig+2 ldd val.int,x - std fpa0+fps.sig + std fpa0+fpa.sig eora val.int,u ; set sign bit in A if signs differ pshs a ; save result sign ldd val.int+2,u ; copy right operand to temporary - std fpa1+fps.sig+2 + std fpa1+fpa.sig+2 ldd val.int,u - std fpa1+fps.sig + std fpa1+fpa.sig bpl int32_div0 ; brif right operand is positive ldd zero ; negate right operand - subd fpa1+fps.sig+2 - std fpa1+fps.sig+2 + subd fpa1+fpa.sig+2 + std fpa1+fpa.sig+2 ldd zero - sbcb fpa1+fps.sig+1 - sbca fpa1+fps.sig - std fpa1+fps.sig -int32_div0 lda fpa0+fps.sig ; is left operand negative? + sbcb fpa1+fpa.sig+1 + sbca fpa1+fpa.sig + std fpa1+fpa.sig +int32_div0 lda fpa0+fpa.sig ; is left operand negative? bpl int32_div1 ; brif not ldd zero ; negate left operand - subd fpa0+fps.sig+2 - std fpa0+fps.sig+2 + subd fpa0+fpa.sig+2 + std fpa0+fpa.sig+2 ldd zero - sbcb fpa0+fps.sig+1 - sbca fpa0+fps.sig - std fpa0+fps.sig -int32_div1 ldb fpa1+fps.sig ; check for division by zero - orb fpa1+fps.sig+1 - orb fpa1+fps.sig+2 - orb fpa1+fps.sig+3 + sbcb fpa0+fpa.sig+1 + sbca fpa0+fpa.sig + std fpa0+fpa.sig +int32_div1 ldb fpa1+fpa.sig ; check for division by zero + orb fpa1+fpa.sig+1 + orb fpa1+fpa.sig+2 + orb fpa1+fpa.sig+3 lbne DIV0ERROR ; brif division by zero bsr util_div32 ; do the actual division lda ,s+ ; get desired sign bmi int32_div2 ; brif want negative - we can't overflow in that case - ldb fpa0extra ; get high byte of result + ldb fpaextra ; get high byte of result lbmi OVERROR2 ; brif we ended up with 0x80000000 positive bra int32_div3 ; go return result int32_div2 ldd zero ; negate result to correct sign - subd fpa0extra+2 - std fpa0extra+2 + subd fpaextra+2 + std fpaextra+2 ldd zero - sbcb fpa0extra+1 - sbca fpa0extra - std fpa0extra -int32_div3 ldd fpa0extra ; copy result to destination + sbcb fpaextra+1 + sbca fpaextra + std fpaextra +int32_div3 ldd fpaextra ; copy result to destination std val.int,y - ldd fpa0extra2 + ldd fpaextra+2 std val.int+2,y rts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Divide 32 bit integer in fpa0 significand by 32 bit integer in fpa1 significand, both treated as unsigned. Leave -; quotient at fpa0extra...fpa0extra3 and remainder at fpa0extra4...fpa0extra7; does not check for division by zero +; quotient at fpaextra...fpaextra+3 and remainder at fpaextra+4...fpaextra+7; does not check for division by zero ; which will result in a quotient of 0xffffffff and a remainder will be the dividend. It will not get suck in a loop. ; ; Algorithm is basically pencil and paper long division. We check to see if the divisor "goes" at each step by doing @@ -329,37 +397,37 @@ ; having to do a restore in the no-go case which is going to be quite common with values whose upper bits are ; mostly zeroes, thus it makes the operations faster in that case, for integers. (Floating point is a different ; problem.) -util_div32 ldd fpa0+fps.sig+2 ; copy dividend to result location - std fpa0extra6 - ldd fpa0+fps.sig - std fpa0extra4 +util_div32 ldd fpa0+fpa.sig+2 ; copy dividend to result location + std fpaextra+6 + ldd fpa0+fpa.sig + std fpaextra+4 ldb #32 ; do 32 bits - stb fpa0+fps.exp ; save counter somewhere because we don't have enough registers + stb fpa0+fpa.exp ; save counter somewhere because we don't have enough registers ldd zero ; zero out remainder - std fpa0extra4 - std fpa0extra6 -util_div32a lsl fpa0extra3 ; shift dividend residue into remainder - rol fpa0extra2 - rol fpa0extra1 - rol fpa0extra - rol fpa0extra7 - rol fpa0extra6 - rol fpa0extra5 - rol fpa0extra4 - ldd fpa0extra6 ; now subtract divisor from remainder - subd fpa1+fps.sig+2 - ldd fpa0extra4 - sbcb fpa1+fps.sig+1 - sbca fpa1+fps.sig + std fpaextra+4 + std fpaextra+6 +util_div32a lsl fpaextra+3 ; shift dividend residue into remainder + rol fpaextra+2 + rol fpaextra+1 + rol fpaextra + rol fpaextra+7 + rol fpaextra+6 + rol fpaextra+5 + rol fpaextra+4 + ldd fpaextra+6 ; now subtract divisor from remainder + subd fpa1+fpa.sig+2 + ldd fpaextra+4 + sbcb fpa1+fpa.sig+1 + sbca fpa1+fpa.sig bcs util_div32b ; brif it doesn't go - don't subtract or set bit - inc fpa0extra3 ; set quotient bit - ldd fpa0extra6 ; actually do the subtraction - subd fpa1+fps.sig+2 - std fpa0extra6 - ldd fpa0extra4 - sbcb fpa1+fps.sig+1 - sbca fpa1+fps.sig - std fpa0extra4 -util_div32b dec fpa0+fps.exp ; done all 32 bits? + inc fpaextra+3 ; set quotient bit + ldd fpaextra+6 ; actually do the subtraction + subd fpa1+fpa.sig+2 + std fpaextra+6 + ldd fpaextra+4 + sbcb fpa1+fpa.sig+1 + sbca fpa1+fpa.sig + std fpaextra+4 +util_div32b dec fpa0+fpa.exp ; done all 32 bits? bne util_div32a ; do another *pragmapop list diff -r f959c92bc329 -r 663d8e77b579 src/number.s --- a/src/number.s Sun Oct 08 00:17:20 2023 -0600 +++ b/src/number.s Sun Oct 15 22:15:36 2023 -0600 @@ -29,7 +29,7 @@ bne TMERROR ; brif not - raise error pshs x ; save X which may be clobbered leay ,x ; point to input operand as destination for conversion - jsr fps_fromint32 ; convert first argument to floating point +; jsr fps_fromint32 ; convert first argument to floating point puls x,pc ; restore second operand pointer and return val_matchtypes1 ldb val.type,u ; get second argument type cmpb #valtype_float ; is it floating point? @@ -39,7 +39,7 @@ pshs x ; save X which mill be clobbered leax ,u ; convert (U) to floating point leay ,u - jsr fps_fromint32 +; jsr fps_fromint32 puls x,pc ; restore argument pointer and return ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Addition and subtraction of values; must enter with values of matching types and the result type already set @@ -101,196 +101,195 @@ jmp TMERROR ; unsupported type endc ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Parse a number to either an integer or a floating point value +; Parse a number to either an integer or a floating point value and return the result in val0 +; +; This works by first detecting any sign indicators and handling those. Multiple prefix signs are supported. Note that +; in the regular expression evaluation sequence, unary minus and plus will be handled by the expression evaluator so +; in that case, the number evaluator would not need to care about those. However, in the case of an arbitrary string +; fed into the evaluator, those must be handled. Note that there is no need to handle tokenized sign indicators because +; the only place where they would be tokenized is in a proper expression. +; +; Once leading signs are handled, any base specifiers or other modifiers are handled. If none of those intercept the +; parsing, the regular number parsing continues as follows. ; -; First, identify any sign present. Then parse the remainder as an integer until either a decimal point, an exponential -; indicator, or the value gets larger than 32 bits. If any of those eventualities happens, convert to floating point -; and then continue parsing the number as floating point. The result will be stored to (Y). +; 1. Read a sequence of digits. The digits are stored in backed BCD form in the significand of fpa0. An arbitrary number +; of leading zeroes will be skipped. A count of significant digits is maintained while reading digits as is the +; position of a decimal point if one is encountered. Once one digit more than the maximum possible precision of any +; supported number type is read, subsequent digits will not be stored, but the counters will still be updated. +; 2. Any subsequent "E" or "e" followed by either a positive or negative decimal exponent is read, with the sign +; indicator being optional. +; 3. The decimal offset calculated in (1) is adjusted by the exponent read in (2) if any. +; 4. Range checks are completed as follows: +; 4a. If the calculated decimal exponent is beyond the supported range of any floating point or integer type, raise +; an overflow error. +; 4b. If the number is an integer in the range of -0x80000000 to 0x7fffffff, it is converted to a signed binary integer +; and the result is returned +; 4b. Set the exponent correctly then normalize the result to val0 val_parsenum lbeq SNERROR ; brif no numberr to parse - ldd zero ; zero out integer value accumulator - std fpa0extra - std fpa0extra+2 - std fpa0extra+4 - std fpa0extra+6 - sta fpa0extra12 ; zero out result sign to default positive - jsr curchar ; get current input character - bra val_parsenum1 ; parse flags -val_parsenum0 jsr nextchar ; get next input character -val_parsenum1 bcs val_parsenum5 ; brif digit - beq val_parsenum ; brif end of input - cmpa #'. ; decimal? - lbeq val_parsefloat ; switch to parsing floating point - cmpa #'- ; minus? - beq val_parsenum2 ; brif so - cmpa #tok_minus ; unary minus operator? + ldd zero ; zero out digit accumulator + stx fpa0+fpa.sig + stx fpa0+fpa.sig+2 + stx fpa0+fpa.sig+4 + stx fpa0+fpa.sig+6 + stx fpa0+fpa.sig+8 + sta fpa0+fpa.sign ; set number sign to positive + std fpaextra+4 ; clear out decimal exponent and sign + std fpaextra ; set digit count and decimal flag to zero and no decimal + sta fpaextra+2 ; set decimal position to 0 - unused if decimal not seen + ldx #fpa0+fpa.sig ; point to digit storage location + lda #0xf0 ; set digit mask + sta fpaextra+3 + jsr curchar ; get back current input + bra val_parsenum1 +val_parsenum0 jsr nextchar ; fetch next input +val_parsenum1 bcs val_parsenum3 ; brif digit - short ciruit other checks + cmpa #'+ ; unary plus? + beq val_parsenum0 ; brif so - it's a no-op but supported for symmetry with unary minus + cmpa #'- ; negative? + bne val_parsenum5 ; brif not + com fpa0+fpa.sign ; invert the sign + bra val_parsenum0 ; eat the sign and see if there's more signs +val_parsenum2 jsr nextchar ; fetch next character in number + bcc val_parsenum5 ; brif not a digit +val_parsenum3 ldb fpaextra ; is it within the digit count? + cmpb #11 ; (11 digits holds both 10 digit fp and 32 bit integer) + bhs val_parsenum4 ; brif so - don't convert it + suba #0x30 ; binary-ize the digit + bne val_parsenum3a ; brif not zero + tstb ; no digits? + beq val_parsenum2 ; brif so - skip leading zeroes +val_parsenum3a ldb #0x11 ; put in both digit spots + mul + andb fpaextra+3 ; only keep the one we need + orb ,x ; merge with existing digit + stb ,x ; put in digit location + com fpaextra+3 ; flip digit position mask + bpl val_parsenum4 ; brif not moving to new location + leax 1,x ; move to new digit storage location +val_parsenum4 inc fpaextra ; bump digit count + lbmi OVERROR ; brif it overflowed - we can't parse more than 127 digits! + ldb fpaextra+2 ; get decimal position counter + subb fpaextra+1 ; subtract decimal flag (will be 0xff or -1 if decimal seen) + stb fpaextra+2 + bra val_parsenum2 ; go handle another digit or whatever +val_parsenum5 cmpa #'. ; decimal? bne val_parsenum3 ; brif not -val_parsenum2 com fpa0extra12 ; invert current sign - bra val_parsenum0 ; go handle more stuff at the start of the number -val_parsenum3 cmpa #'+ ; unary +? - beq val_parsenum0 ; brif so - skip it - cmpa #tok_plus ; unary + operator? - beq val_parsenum0 ; brif so - skip it -val_parsenum4 lda fpa0extra4 ; is bit 7 of high byte set? - bpl val_parsenum4a ; brif not - no overflow - ldb fpa0extra12 ; do we want negative? - lbpl val_parsefloat ; brif not - we overflowed so convert to floating point - anda #0x7f ; lose sign bit then see if any other bits are set - ora fpa0extra5 - ora fpa0extra6 - ora fpa0extra7 - lbne val_parsefloat ; brif nonzero bits - too big for max negative 2's complement -val_parsenum4a lda fpa0extra12 ; do we want negative? - bpl val_parsenum4b ; brif not - ldd zero ; negate it - subd fpa0extra6 - std fpa0extra6 + com fpaextra ; flag decimal seen + lbeq SNERROR ; brif already seen a decimal point - syntax error + bra val_parsenum2 ; go parse more digits +val_parsenum6 cmpa #'E ; decimal exponent? + beq val_parsenum7 ; brif so + cmpa #'e ; lower case exponent indicator? + bne val_parsenum11 ; brif not - we have the end of the number here +val_parsenum7 jsr nextchar ; eat exponent indicator + bcs val_parsenum9 ; brif digit + cmpa #'+ ; positive? + beq val_parsenum8 ; brif no + cmpa #'- ; negative? + lbne SNERROR ; brif not positive, negative, or digit + com fpaextra+5 ; make sign of exponent negative +val_parsenum8 jsr nextchar ; eat exponent sign/get next digit + bcc val_parsenum10 ; brif not a digit - done with number +val_parsenum9 suba #0x30 ; binary-ize the digit + sta fpaextra+6 ; save digit value + ldb fpaextra+4 ; get calculated exponent + lda #10 ; multiply by 10 + mul + lbcs OVERROR ; brif decimal exponent overlows ±127 - we just don't handle that + addb fpaextra+6 ; add digit in + lbmi OVERROR ; same as above - make sure exponent in range + stb fpaextra+4 ; save new decimal exponent + bra val_parsenum8 ; handle another digit +val_parsenum10 lda fpaextra+5 ; get sign of exponent + bpl val_parsenum11 ; brif positive + neg fpaextra+4 ; negate resulting exponent +val_parsenum11 ldb fpaextra ; get number of digits provided + subb fpaextra+2 ; subtract out count of fractional digits giving whole number digits + addb fpaextra+4 ; add in decimal exponent adjustment + stb fpa0+fpa.exp ; set result exponent +; Normalization is not required here though rounding might be. Rounding will be handled during floating point return. +; By ensuring there were no leading zeroes converted, the result is already pre-normalized without losing precision due +; to an aribtrary number of leading zeroes. + cmpb fpaextra ; is the exponent less than the number of digits? + blt val_parsenum13 ; brif so - return floating point (signed comparison!) + cmpb #10 ; is exponent in the range for a binary integer? + bgt val_parsenum13 ; brif not - return floating point +; Compare with 2147483648, the maximum *negative* value; note that this is a floating point comparison because we +; already normalized everything above and it handles exponents properly + lda fpa0+fpa.exp ; compare exponents (unbiased) + cmpa #9 + bne val_parsenum12 + ldx fpa0+fpa.sig ; compare top of significand + cmpx #0x2147 + bne val_parsenum12 + ldx fpa0+fpa.sig+2 ; compare middle of significand + cmpx #0x4836 + bne val_parsenum12 + ldx fpa0+fpa.sig+4 ; compare bottom of significand plus extra digits + cmpx #0x4800 +val_parsenum12 bgt val_parsenum13 ; brif too big for integer + blt val_parsenum14 ; brif it fits in a positive integer + ldb fpa0+fpa.sign ; negative? + bpl val_parsenum14 ; brif not - doesn't fit in integer +val_parsenum13 lda #valtype_float ; set return value to floating point + sta val0+val.type + lda fpa0+fpa.exp ; put the bias into the exponent + adda #64 + sta fpa0+fpa.exp + ldy #val0+val.value ; normalize/round and return the result + jmp fps_normalize +val_parsenum14 lda #valtype_int ; set value type to integer + sta val0+val.type + ldb #9 ; exponent needed for decimal point to the right of significand + subb fpa0+fpa.exp ; number of digit shifts needed to denormalize + beq val_parsenum16 ; brif already denormalized + lslb ; do 4 shifts per digit + lslb +val_parsenum15 lsr fpa0+fpa.sig ; shift a digit right + ror fpa0+fpa.sig+1 + ror fpa0+fpa.sig+2 + ror fpa0+fpa.sig+3 + ror fpa0+fpa.sig+4 + decb ; done all shifts? + bne val_parsenum15 +; Now convert BCD digit sequence in fpa0 significand to binary value in val0 +val_parsenum16 ldb #40 ; 40 bit shifts needed for whole significand + stb fpa0+fpa.extra ; use extra precision byte as counter +val_parsenum17 lsl fpa0+fpa.sig+4 ; shift a bit into the binary result + rol fpa0+fpa.sig+3 + rol fpa0+fpa.sig+2 + rol fpa0+fpa.sig+1 + rol fpa0+fpa.sig + rol val0+val.int+3 + rol val0+val.int+2 + rol val0+val.int+1 + rol val0+val.int + ldx #fpa0+fpa.sig ; point to BCD digits +val_parsenum18 lda ,x ; get byte to check + beq val_parsenum20 ; short circuit check if digits are 0 + anda #0x88 ; keep bit 3 of each digit; adjustment on >= 8 + lsra ; shift over and mulply by adjustment factor + lsra + lsra + ldb #3 ; the adjustment is a subtraction by 3 + mul + negb ; now subtract from digit + addb ,x + stb ,x+ +val_parsenum18a cmpx #fpa0+fpa.sig+5 ; done all 5 bytes? + blo val_parsenum18 ; brif not + dec fpa0+fpa.extra ; done all bits? + bne val_parsenum17 ; brif not + ldb fpa0+fpa.sign ; do we want negative? + bpl val_parsenum19 ; brif not + ldd zero ; negate the value through subtracting from 0 + subd val0+val.int+2 + std val0+val.int+2 ldd zero - sbcb fpa0extra5 - sbca fpa0extra4 - std fpa0extra4 -val_parsenum4b ldd fpa0extra6 ; copy value to result location - std val.int+2,y - ldd fpa0extra4 - std val.int,y - lda #valtype_int ; set value type to integer - sta val.type,y - rts -val_parsenum4c jsr nextchar ; fetch next character (after a digit) - bcs val_parsenum5 ; it's a digit - cmpa #'. ; decimal? - beq val_parsefloat ; brif so - handle floating point - cmpa #'E ; exponent? - beq val_parsefloat ; brif so - handle floating point - cmpa #'e ; exponent but lower case? - beq val_parsefloat ; brif so - handle floating point - bra val_parsenum4 ; unrecognized character - treat as end of number -val_parsenum5 suba #'0 ; offset digit to binary - pshs a ; save it for later addition - ldx fpa0extra4 ; save original value - stx fpa0extra8 - ldx fpa0extra6 - stx fpa0extra10 - lsl fpa0extra7 ; shift partial result left (times 2) - rol fpa0extra6 - rol fpa0extra5 - rol fpa0extra4 - rol fpa0extra3 - lsl fpa0extra7 ; shift partial result left (times 4) - rol fpa0extra6 - rol fpa0extra5 - rol fpa0extra4 - rol fpa0extra3 - ldd fpa0extra6 ; add in original value (time 5) - addd fpa0extra10 - std fpa0extra6 - ldd fpa0extra8 - adcb fpa0extra5 - adca fpa0extra4 - std fpa0extra4 - ldb fpa0extra3 - adcb #0 - stb fpa0extra3 - lsl fpa0extra7 ; shift partial result left (times 10) - rol fpa0extra6 - rol fpa0extra5 - rol fpa0extra4 - rol fpa0extra3 - ldd fpa0extra6 ; add in new digit - addb ,s+ - adca #0 - std fpa0extra6 - ldd fpa0extra4 ; and propagate carry - adcb #0 - adca #0 - std fpa0extra4 - ldb fpa0extra3 - adcb #0 - stb fpa0extra3 - beq val_parsenum4c ; go handle next digit if we didn't overflow past 32 bits - jsr nextchar ; eat the digit we just handled -val_parsefloat pshs y ; save destination pointer - lda #valtype_float ; set return type to floating point - sta val.type,y - ldx #fpa0extra ; point to integer accumulator - jsr fps_fromuint64 ; convert to floating point - clr fpa0extra11 ; zero out decimal counter - clr fpa0extra10 ; zero out decimal exponent counter - clr fpa0extra9 ; flag for decimal seen - jsr curchar ; fetch current character - bra val_parsefloat1 ; go handle character -val_parsefloat0 jsr nextchar ; fetch next character -val_parsefloat1 bcc val_parsefloat2 ; brif not digit - suba #'0 ; adjust digit to binary - sta fpa0extra3 ; save it for later (upper 3 bytes of 32 bit value already 0) - ldx ,s ; get destination value - jsr fps_mul10 ; do a quick multiply by 10 - ldx #fpa0extra ; convert digit to floating point - ldy #fpa1 - jsr fps_fromuint32 - ldu #fpa1 ; add digit to accumulated value - ldx ,s - leay ,x - jsr fps_add - lda fpa0extra11 ; update decimal counter - suba fpa0extra9 - sta fpa0extra11 - bra val_parsefloat0 ; go handle another digit -val_parsefloat2 cmpa #'. ; decimal? - bne val_parsefloat7 ; brif not - com fpa0extra9 ; flag for decimal - bne val_parsefloat0 ; brif not two decimals - keep parsing -val_parsefloat3 ldb fpa0extra10 ; fetch decimal exponent counter - subb fpa0extra11 ; subtract out decimal places provided - beq val_parsefloat6 ; brif no adjustment needed - stb fpa0extra9 ; save counter - bmi val_parsefloat5 ; brif negative exponent - need to do divisions -val_parsefloat4 ldx ,s ; point to destination value - jsr fps_mul10 ; multiply by 10 - dec fpa0extra9 ; done all of them? - bne val_parsefloat4 ; brif not - bra val_parsefloat6 -val_parsefloat5 ldx ,s ; point to destination value - jsr fps_div10 ; divide by 10 - inc fpa0extra9 ; done all of them? - bne val_parsefloat5 ; brif not -val_parsefloat6 puls y ; get back destination pointer - lda fpa0extra12 ; get desired sign - sta val.fpssign,y ; set in result - rts -val_parsefloat7 cmpa #'E ; decimal exponent? - beq val_parsefloat8 ; brif so - cmpa #'e ; decimal exponent, lower case edition? - bne val_parsefloat3 ; brif not - must be end of number -val_parsefloat8 clr fpa0extra9 ; set sign of exponent to positive - jsr nextchar ; fetch exponent character - bcs val_parsefloat11 ; brif digit - cmpa #'+ ; positive exponent? - beq val_parsefloat10 ; brif so - skip it - cmpa #tok_plus ; positive exponent, operator style? - beq val_parsefloat10 ; brif so - skip it - cmpa #'- ; negative exponent? - beq val_parsefloat9 ; brif so - cmpa #tok_minus ; negative exponent, operator style? - bne val_parsefloat3 ; brif not - must be end of exponent -val_parsefloat9 com fpa0extra9 ; set exponent to negative -val_parsefloat10 - jsr nextchar ; eat exponent sign - bcc val_parsefloat12 ; brif end of exponent - apply sign -val_parsefloat11 - suba #'0 ; binary-ize digit - sta fpa0extra8 ; save digit for later - lda #10 ; mutiply current decimal exponent by 10 - ldb fpa0extra10 ; get current exponent - mul - adca #0 ; set A if we overflowed *or* bit 7 of B is set - lbne OVERROR ; brif exponent overflow - addb fpa0extra8 ; add in digit - lbvs OVERROR ; brif exponent overflow - stb fpa0extra10 ; save new exponent - bra val_parsefloat10 ; go handle next exponent digit -val_parsefloat12 - ldb fpa0extra9 ; do we have a negative exponent? - beq val_parsefloat3 ; brif not, go adjust value by exponent and return - neg fpa0extra10 ; set base 10 exponent negative - bra val_parsefloat3 ; go adjust value by exponent and return + sbcb val0+val.int+1 + sbca val0+val.int + std val0+val.int +val_parsenum19 rts +val_parsenum20 leax 1,x ; move to next digit + bra val_parsenum18a ; go back to mainline *pragmapop list diff -r f959c92bc329 -r 663d8e77b579 src/print.s --- a/src/print.s Sun Oct 08 00:17:20 2023 -0600 +++ b/src/print.s Sun Oct 15 22:15:36 2023 -0600 @@ -54,6 +54,8 @@ ldb val0+val.type ; get value type cmpb #valtype_int ; integer? beq cmd_printint ; brif so - print integer + cmpb #valtype_float ; floatingp point? + beq cmd_printfps ; brif so - print floating point lda #'! ; flag unknown expression type jsr console_outchr jsr console_outchr @@ -61,60 +63,10 @@ cmd_printnext jsr curchar ; see what we have here bra cmd_print ; and go process cmd_printeol jmp console_outnl ; do a newline and return -cmd_printint leas -12,s ; make a buffer - leay ,s ; point to buffer - lda #0x20 ; default sign (positive) - ldb val0+val.int ; is it negative? - bpl cmd_printint0 ; brif not - jsr int32_neg ; negate the integer - lda #'- ; negative sign -cmd_printint0 sta ,y+ ; save sign - ldu #cmd_printintpc ; point to positive constant table - ldx #10 ; there are 10 constants to process -; subtraction loop - positive residue -cmd_printint1 lda #'0-1 ; initialize digit - sta ,y -cmd_printint2 inc ,y ; bump digit - ldd val0+val.int+2 ; subtract constant - subd 2,u - std val0+val.int+2 - ldd val0+val.int - sbcb 1,u - sbca ,u - std val0+val.int - bcc cmd_printint2 ; brif we didn't go negative - ldd val0+val.int+2 ; undo last subtract - addd 2,u - std val0+val.int+2 - ldd val0+val.int - adcb 1,u - adca ,u - std val0+val.int - leay 1,y ; move to next digit in buffer - leau 4,u ; move to next constant - leax -1,x ; done all constants? - bne cmd_printint1 ; brif not - done all -cmd_printint5 clr ,y ; NUL terminate the string - leax 1,s ; point past the sign -cmd_printint6 lda ,x+ ; get digit - beq cmd_printint8 ; brif end of number - cmpa #'0 ; is it a zero? - beq cmd_printint6 ; brif so - skip it -cmd_printint7 lda ,s ; get the sign - sta ,--x ; put it at the start of the number - jsr console_outstr ; display the number - leas 12,s ; clean up stack - bra cmd_printnext ; go print the next thing -cmd_printint8 leax -1,x ; restore one of the zeros - bra cmd_printint7 ; go finish up -cmd_printintpc fqb 1000000000 ; 10^9 - fqb 100000000 ; 10^8 - fqb 10000000 ; 10^7 - fqb 1000000 ; 10^6 - fqb 100000 ; 10^5 - fqb 10000 ; 10^4 - fqb 1000 ; 10^3 - fqb 100 ; 10^2 - fqb 10 ; 10^1 - fqb 1 ; 10^0 +cmd_printint jsr int_toascii ; convert val0 to string +cmd_printstrb ldx #strbuff ; point to resulting string + jsr console_outstr ; output the string + bra cmd_printnext ; go handle next stuff +cmd_printfps jsr fps_toascii ; convert val0 to string + bra cmd_printstrb ; go output the resulting string *pragmapop list diff -r f959c92bc329 -r 663d8e77b579 src/vars.s --- a/src/vars.s Sun Oct 08 00:17:20 2023 -0600 +++ b/src/vars.s Sun Oct 15 22:15:36 2023 -0600 @@ -49,19 +49,7 @@ ; the maximum precision floating point values used by the system. fpa0 rmb fpa.size ; floating point accumulator 1 fpa1 rmb fpa.size ; floating point accumulator 1 -fpa0extra rmb 1 ; "extra" bytes for calculations -fpa0extra1 rmb 1 -fpa0extra2 rmb 1 -fpa0extra3 rmb 1 -fpa0extra4 rmb 1 -fpa0extra5 rmb 1 -fpa0extra6 rmb 1 -fpa0extra7 rmb 1 -fpa0extra8 rmb 1 -fpa0extra9 rmb 1 -fpa0extra10 rmb 1 -fpa0extra11 rmb 1 -fpa0extra12 rmb 1 +fpaextra rmb 12 ; "extra" bytes for calculations rmb 0x71-* ; align RSTFLG/RSTVEC for stock ROM compatibility RSTFLG rmb 1 ; 0x55 if RSTVEC is valid RSTVEC rmb 2 ; points to warm start routine (must start with NOP) @@ -79,6 +67,7 @@ tokebuff rmb linebuffsize+50 ; make it as long as line buffer plus a margin stringstack rmb 5*stringstacknum ; reserve space for the anonymous string descriptor stack stringstackend equ * ; end of string stack buffer +strbuff rmb 20 ; temporary string buffer for converting numbers and other things ifne *&0x1ff rmb 0x200-(*&0x1ff) endc