#***** # # Michael Riff # mullwtiprecision division version 1.0 Februar 2022. # 128 bit divisions for 16 bits divisor implemented # using the div instruction. # 96 bits division can easiy be deduced. # #****/ dialect PowerPC file 'div3.s' # Only the sections can be exported (dos not work with labels) # export div128_16[DS] export .div128_16[PR] => 'div_128_16' rx equ r12 ry equ r11 rz equ r0 csect div128_16[DS] csect .div128_16[PR] lwz r7, 0(r3) lwz r8, 4(r3) lwz r9, 8(r3) lwz r10, 12(r3) # Perform division for the ho 32 bits word divwu rx, r7, r4 # quotient 1 32 bits stw rx, 0(r5) mullw rx, rx, r4 subf rx, rx, r7 # remainder 1 from here r7 is free # Here starts the cycles in which each time the remainder concatenated # with the next higher 16 bits of the original dividend are precessed slwi rx, rx, 16 srwi ry, r8, 16 or ry, rx, ry # next dividend 2 divwu rz, ry, r4 # quotient 2 16 bits mullw rx, rz, r4 subf rx, rx, ry # remainder 2 slwi rx, rx, 16 andi. r8, r8, 0xFFFF # from here r8 is free or r8, r8, rx # next dividend 3 divwu ry, r8, r4 # quotient 3 16 bits # Generate next 32 bit quotient part slwi r7, rz, 16 or r7, r7, ry stw r7, 4(r5) mullw r7, ry, r4 subf r8, r7, r8 # remainder 3 # slwi r8, r8, 16 srwi ry, r9, 16 or ry, r8, ry # next dividend 4 divwu rx, ry, r4 # quotient 4 mullw rz, rx, r4 subf ry, rz, ry # remainder 4 slwi ry, ry, 16 andi. r9, r9, 0xFFFF # from here r9 is free or r9, ry, r9 # next dividend 5 divwu ry, r9, r4 # quotient 5 # Generate next 32 bit quotient part slwi r8, rx, 16 or r8, r8, ry stw r8, 8(r5) mullw r8, ry, r4 subf r9, r8, r9 # remainder 5 # For 96 bits division stop here # slwi r9, r9, 16 srwi rx, r10, 16 or rx, rx, r9 # next dividend 6 divwu ry, rx, r4 # quotient 6 mullw rz, ry, r4 subf rx, rz, rx # remainder 6 slwi rx, rx, 16 andi. r10, r10, 0xFFFF # from here r10 is free or r10, r10, rx # next dividend 7 divwu rz, r10, r4 # quotient 7 # Generate last 32 bit quotient part slwi r9, ry, 16 or r9, r9, rz stw r9, 12(r5) mullw r9, rz, r4 subf r10, r9, r10 # remainder 7 and final stw r10, 0(r6) blr