mirror of
https://github.com/oxen-io/session-android.git
synced 2024-12-30 03:37:46 +00:00
2176 lines
33 KiB
ArmAsm
2176 lines
33 KiB
ArmAsm
|
.set mips2
|
||
|
.rdata
|
||
|
.asciiz "mips3.s, Version 1.2"
|
||
|
.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
|
||
|
|
||
|
.text
|
||
|
.set noat
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_mul_add_words
|
||
|
.ent bn_mul_add_words
|
||
|
bn_mul_add_words:
|
||
|
.set noreorder
|
||
|
bgtz $6,bn_mul_add_words_internal
|
||
|
move $2,$0
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_mul_add_words
|
||
|
|
||
|
.align 5
|
||
|
.ent bn_mul_add_words_internal
|
||
|
bn_mul_add_words_internal:
|
||
|
.set reorder
|
||
|
li $3,-4
|
||
|
and $8,$6,$3
|
||
|
beqz $8,.L_bn_mul_add_words_tail
|
||
|
|
||
|
.L_bn_mul_add_words_loop:
|
||
|
lw $12,0($5)
|
||
|
multu $12,$7
|
||
|
lw $13,0($4)
|
||
|
lw $14,4($5)
|
||
|
lw $15,4($4)
|
||
|
lw $8,2*4($5)
|
||
|
lw $9,2*4($4)
|
||
|
addu $13,$2
|
||
|
sltu $2,$13,$2 # All manuals say it "compares 32-bit
|
||
|
# values", but it seems to work fine
|
||
|
# even on 64-bit registers.
|
||
|
mflo $1
|
||
|
mfhi $12
|
||
|
addu $13,$1
|
||
|
addu $2,$12
|
||
|
multu $14,$7
|
||
|
sltu $1,$13,$1
|
||
|
sw $13,0($4)
|
||
|
addu $2,$1
|
||
|
|
||
|
lw $10,3*4($5)
|
||
|
lw $11,3*4($4)
|
||
|
addu $15,$2
|
||
|
sltu $2,$15,$2
|
||
|
mflo $1
|
||
|
mfhi $14
|
||
|
addu $15,$1
|
||
|
addu $2,$14
|
||
|
multu $8,$7
|
||
|
sltu $1,$15,$1
|
||
|
sw $15,4($4)
|
||
|
addu $2,$1
|
||
|
|
||
|
subu $6,4
|
||
|
addu $4,4*4
|
||
|
addu $5,4*4
|
||
|
addu $9,$2
|
||
|
sltu $2,$9,$2
|
||
|
mflo $1
|
||
|
mfhi $8
|
||
|
addu $9,$1
|
||
|
addu $2,$8
|
||
|
multu $10,$7
|
||
|
sltu $1,$9,$1
|
||
|
sw $9,-2*4($4)
|
||
|
addu $2,$1
|
||
|
|
||
|
|
||
|
and $8,$6,$3
|
||
|
addu $11,$2
|
||
|
sltu $2,$11,$2
|
||
|
mflo $1
|
||
|
mfhi $10
|
||
|
addu $11,$1
|
||
|
addu $2,$10
|
||
|
sltu $1,$11,$1
|
||
|
sw $11,-4($4)
|
||
|
.set noreorder
|
||
|
bgtz $8,.L_bn_mul_add_words_loop
|
||
|
addu $2,$1
|
||
|
|
||
|
beqz $6,.L_bn_mul_add_words_return
|
||
|
nop
|
||
|
|
||
|
.L_bn_mul_add_words_tail:
|
||
|
.set reorder
|
||
|
lw $12,0($5)
|
||
|
multu $12,$7
|
||
|
lw $13,0($4)
|
||
|
subu $6,1
|
||
|
addu $13,$2
|
||
|
sltu $2,$13,$2
|
||
|
mflo $1
|
||
|
mfhi $12
|
||
|
addu $13,$1
|
||
|
addu $2,$12
|
||
|
sltu $1,$13,$1
|
||
|
sw $13,0($4)
|
||
|
addu $2,$1
|
||
|
beqz $6,.L_bn_mul_add_words_return
|
||
|
|
||
|
lw $12,4($5)
|
||
|
multu $12,$7
|
||
|
lw $13,4($4)
|
||
|
subu $6,1
|
||
|
addu $13,$2
|
||
|
sltu $2,$13,$2
|
||
|
mflo $1
|
||
|
mfhi $12
|
||
|
addu $13,$1
|
||
|
addu $2,$12
|
||
|
sltu $1,$13,$1
|
||
|
sw $13,4($4)
|
||
|
addu $2,$1
|
||
|
beqz $6,.L_bn_mul_add_words_return
|
||
|
|
||
|
lw $12,2*4($5)
|
||
|
multu $12,$7
|
||
|
lw $13,2*4($4)
|
||
|
addu $13,$2
|
||
|
sltu $2,$13,$2
|
||
|
mflo $1
|
||
|
mfhi $12
|
||
|
addu $13,$1
|
||
|
addu $2,$12
|
||
|
sltu $1,$13,$1
|
||
|
sw $13,2*4($4)
|
||
|
addu $2,$1
|
||
|
|
||
|
.L_bn_mul_add_words_return:
|
||
|
.set noreorder
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_mul_add_words_internal
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_mul_words
|
||
|
.ent bn_mul_words
|
||
|
bn_mul_words:
|
||
|
.set noreorder
|
||
|
bgtz $6,bn_mul_words_internal
|
||
|
move $2,$0
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_mul_words
|
||
|
|
||
|
.align 5
|
||
|
.ent bn_mul_words_internal
|
||
|
bn_mul_words_internal:
|
||
|
.set reorder
|
||
|
li $3,-4
|
||
|
and $8,$6,$3
|
||
|
beqz $8,.L_bn_mul_words_tail
|
||
|
|
||
|
.L_bn_mul_words_loop:
|
||
|
lw $12,0($5)
|
||
|
multu $12,$7
|
||
|
lw $14,4($5)
|
||
|
lw $8,2*4($5)
|
||
|
lw $10,3*4($5)
|
||
|
mflo $1
|
||
|
mfhi $12
|
||
|
addu $2,$1
|
||
|
sltu $13,$2,$1
|
||
|
multu $14,$7
|
||
|
sw $2,0($4)
|
||
|
addu $2,$13,$12
|
||
|
|
||
|
subu $6,4
|
||
|
addu $4,4*4
|
||
|
addu $5,4*4
|
||
|
mflo $1
|
||
|
mfhi $14
|
||
|
addu $2,$1
|
||
|
sltu $15,$2,$1
|
||
|
multu $8,$7
|
||
|
sw $2,-3*4($4)
|
||
|
addu $2,$15,$14
|
||
|
|
||
|
mflo $1
|
||
|
mfhi $8
|
||
|
addu $2,$1
|
||
|
sltu $9,$2,$1
|
||
|
multu $10,$7
|
||
|
sw $2,-2*4($4)
|
||
|
addu $2,$9,$8
|
||
|
|
||
|
and $8,$6,$3
|
||
|
mflo $1
|
||
|
mfhi $10
|
||
|
addu $2,$1
|
||
|
sltu $11,$2,$1
|
||
|
sw $2,-4($4)
|
||
|
.set noreorder
|
||
|
bgtz $8,.L_bn_mul_words_loop
|
||
|
addu $2,$11,$10
|
||
|
|
||
|
beqz $6,.L_bn_mul_words_return
|
||
|
nop
|
||
|
|
||
|
.L_bn_mul_words_tail:
|
||
|
.set reorder
|
||
|
lw $12,0($5)
|
||
|
multu $12,$7
|
||
|
subu $6,1
|
||
|
mflo $1
|
||
|
mfhi $12
|
||
|
addu $2,$1
|
||
|
sltu $13,$2,$1
|
||
|
sw $2,0($4)
|
||
|
addu $2,$13,$12
|
||
|
beqz $6,.L_bn_mul_words_return
|
||
|
|
||
|
lw $12,4($5)
|
||
|
multu $12,$7
|
||
|
subu $6,1
|
||
|
mflo $1
|
||
|
mfhi $12
|
||
|
addu $2,$1
|
||
|
sltu $13,$2,$1
|
||
|
sw $2,4($4)
|
||
|
addu $2,$13,$12
|
||
|
beqz $6,.L_bn_mul_words_return
|
||
|
|
||
|
lw $12,2*4($5)
|
||
|
multu $12,$7
|
||
|
mflo $1
|
||
|
mfhi $12
|
||
|
addu $2,$1
|
||
|
sltu $13,$2,$1
|
||
|
sw $2,2*4($4)
|
||
|
addu $2,$13,$12
|
||
|
|
||
|
.L_bn_mul_words_return:
|
||
|
.set noreorder
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_mul_words_internal
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_sqr_words
|
||
|
.ent bn_sqr_words
|
||
|
bn_sqr_words:
|
||
|
.set noreorder
|
||
|
bgtz $6,bn_sqr_words_internal
|
||
|
move $2,$0
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_sqr_words
|
||
|
|
||
|
.align 5
|
||
|
.ent bn_sqr_words_internal
|
||
|
bn_sqr_words_internal:
|
||
|
.set reorder
|
||
|
li $3,-4
|
||
|
and $8,$6,$3
|
||
|
beqz $8,.L_bn_sqr_words_tail
|
||
|
|
||
|
.L_bn_sqr_words_loop:
|
||
|
lw $12,0($5)
|
||
|
multu $12,$12
|
||
|
lw $14,4($5)
|
||
|
lw $8,2*4($5)
|
||
|
lw $10,3*4($5)
|
||
|
mflo $13
|
||
|
mfhi $12
|
||
|
sw $13,0($4)
|
||
|
sw $12,4($4)
|
||
|
|
||
|
multu $14,$14
|
||
|
subu $6,4
|
||
|
addu $4,8*4
|
||
|
addu $5,4*4
|
||
|
mflo $15
|
||
|
mfhi $14
|
||
|
sw $15,-6*4($4)
|
||
|
sw $14,-5*4($4)
|
||
|
|
||
|
multu $8,$8
|
||
|
mflo $9
|
||
|
mfhi $8
|
||
|
sw $9,-4*4($4)
|
||
|
sw $8,-3*4($4)
|
||
|
|
||
|
|
||
|
multu $10,$10
|
||
|
and $8,$6,$3
|
||
|
mflo $11
|
||
|
mfhi $10
|
||
|
sw $11,-2*4($4)
|
||
|
|
||
|
.set noreorder
|
||
|
bgtz $8,.L_bn_sqr_words_loop
|
||
|
sw $10,-4($4)
|
||
|
|
||
|
beqz $6,.L_bn_sqr_words_return
|
||
|
nop
|
||
|
|
||
|
.L_bn_sqr_words_tail:
|
||
|
.set reorder
|
||
|
lw $12,0($5)
|
||
|
multu $12,$12
|
||
|
subu $6,1
|
||
|
mflo $13
|
||
|
mfhi $12
|
||
|
sw $13,0($4)
|
||
|
sw $12,4($4)
|
||
|
beqz $6,.L_bn_sqr_words_return
|
||
|
|
||
|
lw $12,4($5)
|
||
|
multu $12,$12
|
||
|
subu $6,1
|
||
|
mflo $13
|
||
|
mfhi $12
|
||
|
sw $13,2*4($4)
|
||
|
sw $12,3*4($4)
|
||
|
beqz $6,.L_bn_sqr_words_return
|
||
|
|
||
|
lw $12,2*4($5)
|
||
|
multu $12,$12
|
||
|
mflo $13
|
||
|
mfhi $12
|
||
|
sw $13,4*4($4)
|
||
|
sw $12,5*4($4)
|
||
|
|
||
|
.L_bn_sqr_words_return:
|
||
|
.set noreorder
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
|
||
|
.end bn_sqr_words_internal
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_add_words
|
||
|
.ent bn_add_words
|
||
|
bn_add_words:
|
||
|
.set noreorder
|
||
|
bgtz $7,bn_add_words_internal
|
||
|
move $2,$0
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_add_words
|
||
|
|
||
|
.align 5
|
||
|
.ent bn_add_words_internal
|
||
|
bn_add_words_internal:
|
||
|
.set reorder
|
||
|
li $3,-4
|
||
|
and $1,$7,$3
|
||
|
beqz $1,.L_bn_add_words_tail
|
||
|
|
||
|
.L_bn_add_words_loop:
|
||
|
lw $12,0($5)
|
||
|
lw $8,0($6)
|
||
|
subu $7,4
|
||
|
lw $13,4($5)
|
||
|
and $1,$7,$3
|
||
|
lw $14,2*4($5)
|
||
|
addu $6,4*4
|
||
|
lw $15,3*4($5)
|
||
|
addu $4,4*4
|
||
|
lw $9,-3*4($6)
|
||
|
addu $5,4*4
|
||
|
lw $10,-2*4($6)
|
||
|
lw $11,-4($6)
|
||
|
addu $8,$12
|
||
|
sltu $24,$8,$12
|
||
|
addu $12,$8,$2
|
||
|
sltu $2,$12,$8
|
||
|
sw $12,-4*4($4)
|
||
|
addu $2,$24
|
||
|
|
||
|
addu $9,$13
|
||
|
sltu $25,$9,$13
|
||
|
addu $13,$9,$2
|
||
|
sltu $2,$13,$9
|
||
|
sw $13,-3*4($4)
|
||
|
addu $2,$25
|
||
|
|
||
|
addu $10,$14
|
||
|
sltu $24,$10,$14
|
||
|
addu $14,$10,$2
|
||
|
sltu $2,$14,$10
|
||
|
sw $14,-2*4($4)
|
||
|
addu $2,$24
|
||
|
|
||
|
addu $11,$15
|
||
|
sltu $25,$11,$15
|
||
|
addu $15,$11,$2
|
||
|
sltu $2,$15,$11
|
||
|
sw $15,-4($4)
|
||
|
|
||
|
.set noreorder
|
||
|
bgtz $1,.L_bn_add_words_loop
|
||
|
addu $2,$25
|
||
|
|
||
|
beqz $7,.L_bn_add_words_return
|
||
|
nop
|
||
|
|
||
|
.L_bn_add_words_tail:
|
||
|
.set reorder
|
||
|
lw $12,0($5)
|
||
|
lw $8,0($6)
|
||
|
addu $8,$12
|
||
|
subu $7,1
|
||
|
sltu $24,$8,$12
|
||
|
addu $12,$8,$2
|
||
|
sltu $2,$12,$8
|
||
|
sw $12,0($4)
|
||
|
addu $2,$24
|
||
|
beqz $7,.L_bn_add_words_return
|
||
|
|
||
|
lw $13,4($5)
|
||
|
lw $9,4($6)
|
||
|
addu $9,$13
|
||
|
subu $7,1
|
||
|
sltu $25,$9,$13
|
||
|
addu $13,$9,$2
|
||
|
sltu $2,$13,$9
|
||
|
sw $13,4($4)
|
||
|
addu $2,$25
|
||
|
beqz $7,.L_bn_add_words_return
|
||
|
|
||
|
lw $14,2*4($5)
|
||
|
lw $10,2*4($6)
|
||
|
addu $10,$14
|
||
|
sltu $24,$10,$14
|
||
|
addu $14,$10,$2
|
||
|
sltu $2,$14,$10
|
||
|
sw $14,2*4($4)
|
||
|
addu $2,$24
|
||
|
|
||
|
.L_bn_add_words_return:
|
||
|
.set noreorder
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
|
||
|
.end bn_add_words_internal
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_sub_words
|
||
|
.ent bn_sub_words
|
||
|
bn_sub_words:
|
||
|
.set noreorder
|
||
|
bgtz $7,bn_sub_words_internal
|
||
|
move $2,$0
|
||
|
jr $31
|
||
|
move $4,$0
|
||
|
.end bn_sub_words
|
||
|
|
||
|
.align 5
|
||
|
.ent bn_sub_words_internal
|
||
|
bn_sub_words_internal:
|
||
|
.set reorder
|
||
|
li $3,-4
|
||
|
and $1,$7,$3
|
||
|
beqz $1,.L_bn_sub_words_tail
|
||
|
|
||
|
.L_bn_sub_words_loop:
|
||
|
lw $12,0($5)
|
||
|
lw $8,0($6)
|
||
|
subu $7,4
|
||
|
lw $13,4($5)
|
||
|
and $1,$7,$3
|
||
|
lw $14,2*4($5)
|
||
|
addu $6,4*4
|
||
|
lw $15,3*4($5)
|
||
|
addu $4,4*4
|
||
|
lw $9,-3*4($6)
|
||
|
addu $5,4*4
|
||
|
lw $10,-2*4($6)
|
||
|
lw $11,-4($6)
|
||
|
sltu $24,$12,$8
|
||
|
subu $8,$12,$8
|
||
|
subu $12,$8,$2
|
||
|
sgtu $2,$12,$8
|
||
|
sw $12,-4*4($4)
|
||
|
addu $2,$24
|
||
|
|
||
|
sltu $25,$13,$9
|
||
|
subu $9,$13,$9
|
||
|
subu $13,$9,$2
|
||
|
sgtu $2,$13,$9
|
||
|
sw $13,-3*4($4)
|
||
|
addu $2,$25
|
||
|
|
||
|
|
||
|
sltu $24,$14,$10
|
||
|
subu $10,$14,$10
|
||
|
subu $14,$10,$2
|
||
|
sgtu $2,$14,$10
|
||
|
sw $14,-2*4($4)
|
||
|
addu $2,$24
|
||
|
|
||
|
sltu $25,$15,$11
|
||
|
subu $11,$15,$11
|
||
|
subu $15,$11,$2
|
||
|
sgtu $2,$15,$11
|
||
|
sw $15,-4($4)
|
||
|
|
||
|
.set noreorder
|
||
|
bgtz $1,.L_bn_sub_words_loop
|
||
|
addu $2,$25
|
||
|
|
||
|
beqz $7,.L_bn_sub_words_return
|
||
|
nop
|
||
|
|
||
|
.L_bn_sub_words_tail:
|
||
|
.set reorder
|
||
|
lw $12,0($5)
|
||
|
lw $8,0($6)
|
||
|
subu $7,1
|
||
|
sltu $24,$12,$8
|
||
|
subu $8,$12,$8
|
||
|
subu $12,$8,$2
|
||
|
sgtu $2,$12,$8
|
||
|
sw $12,0($4)
|
||
|
addu $2,$24
|
||
|
beqz $7,.L_bn_sub_words_return
|
||
|
|
||
|
lw $13,4($5)
|
||
|
subu $7,1
|
||
|
lw $9,4($6)
|
||
|
sltu $25,$13,$9
|
||
|
subu $9,$13,$9
|
||
|
subu $13,$9,$2
|
||
|
sgtu $2,$13,$9
|
||
|
sw $13,4($4)
|
||
|
addu $2,$25
|
||
|
beqz $7,.L_bn_sub_words_return
|
||
|
|
||
|
lw $14,2*4($5)
|
||
|
lw $10,2*4($6)
|
||
|
sltu $24,$14,$10
|
||
|
subu $10,$14,$10
|
||
|
subu $14,$10,$2
|
||
|
sgtu $2,$14,$10
|
||
|
sw $14,2*4($4)
|
||
|
addu $2,$24
|
||
|
|
||
|
.L_bn_sub_words_return:
|
||
|
.set noreorder
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_sub_words_internal
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_div_3_words
|
||
|
.ent bn_div_3_words
|
||
|
bn_div_3_words:
|
||
|
.set noreorder
|
||
|
move $7,$4 # we know that bn_div_words does not
|
||
|
# touch $7, $10, $11 and preserves $6
|
||
|
# so that we can save two arguments
|
||
|
# and return address in registers
|
||
|
# instead of stack:-)
|
||
|
|
||
|
lw $4,($7)
|
||
|
move $10,$5
|
||
|
bne $4,$6,bn_div_3_words_internal
|
||
|
lw $5,-4($7)
|
||
|
li $2,-1
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_div_3_words
|
||
|
|
||
|
.align 5
|
||
|
.ent bn_div_3_words_internal
|
||
|
bn_div_3_words_internal:
|
||
|
.set reorder
|
||
|
move $11,$31
|
||
|
bal bn_div_words_internal
|
||
|
move $31,$11
|
||
|
multu $10,$2
|
||
|
lw $14,-2*4($7)
|
||
|
move $8,$0
|
||
|
mfhi $13
|
||
|
mflo $12
|
||
|
sltu $24,$13,$5
|
||
|
.L_bn_div_3_words_inner_loop:
|
||
|
bnez $24,.L_bn_div_3_words_inner_loop_done
|
||
|
sgeu $1,$14,$12
|
||
|
seq $25,$13,$5
|
||
|
and $1,$25
|
||
|
sltu $15,$12,$10
|
||
|
addu $5,$6
|
||
|
subu $13,$15
|
||
|
subu $12,$10
|
||
|
sltu $24,$13,$5
|
||
|
sltu $8,$5,$6
|
||
|
or $24,$8
|
||
|
.set noreorder
|
||
|
beqz $1,.L_bn_div_3_words_inner_loop
|
||
|
subu $2,1
|
||
|
addu $2,1
|
||
|
.set reorder
|
||
|
.L_bn_div_3_words_inner_loop_done:
|
||
|
.set noreorder
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_div_3_words_internal
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_div_words
|
||
|
.ent bn_div_words
|
||
|
bn_div_words:
|
||
|
.set noreorder
|
||
|
bnez $6,bn_div_words_internal
|
||
|
li $2,-1 # I would rather signal div-by-zero
|
||
|
# which can be done with 'break 7'
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_div_words
|
||
|
|
||
|
.align 5
|
||
|
.ent bn_div_words_internal
|
||
|
bn_div_words_internal:
|
||
|
move $3,$0
|
||
|
bltz $6,.L_bn_div_words_body
|
||
|
move $25,$3
|
||
|
sll $6,1
|
||
|
bgtz $6,.-4
|
||
|
addu $25,1
|
||
|
|
||
|
.set reorder
|
||
|
negu $13,$25
|
||
|
li $14,-1
|
||
|
sll $14,$13
|
||
|
and $14,$4
|
||
|
srl $1,$5,$13
|
||
|
.set noreorder
|
||
|
beqz $14,.+12
|
||
|
nop
|
||
|
break 6 # signal overflow
|
||
|
.set reorder
|
||
|
sll $4,$25
|
||
|
sll $5,$25
|
||
|
or $4,$1
|
||
|
.L_bn_div_words_body:
|
||
|
srl $3,$6,4*4 # bits
|
||
|
sgeu $1,$4,$6
|
||
|
.set noreorder
|
||
|
beqz $1,.+12
|
||
|
nop
|
||
|
subu $4,$6
|
||
|
.set reorder
|
||
|
|
||
|
li $8,-1
|
||
|
srl $9,$4,4*4 # bits
|
||
|
srl $8,4*4 # q=0xffffffff
|
||
|
beq $3,$9,.L_bn_div_words_skip_div1
|
||
|
divu $0,$4,$3
|
||
|
mflo $8
|
||
|
.L_bn_div_words_skip_div1:
|
||
|
multu $6,$8
|
||
|
sll $15,$4,4*4 # bits
|
||
|
srl $1,$5,4*4 # bits
|
||
|
or $15,$1
|
||
|
mflo $12
|
||
|
mfhi $13
|
||
|
.L_bn_div_words_inner_loop1:
|
||
|
sltu $14,$15,$12
|
||
|
seq $24,$9,$13
|
||
|
sltu $1,$9,$13
|
||
|
and $14,$24
|
||
|
sltu $2,$12,$6
|
||
|
or $1,$14
|
||
|
.set noreorder
|
||
|
beqz $1,.L_bn_div_words_inner_loop1_done
|
||
|
subu $13,$2
|
||
|
subu $12,$6
|
||
|
b .L_bn_div_words_inner_loop1
|
||
|
subu $8,1
|
||
|
.set reorder
|
||
|
.L_bn_div_words_inner_loop1_done:
|
||
|
|
||
|
sll $5,4*4 # bits
|
||
|
subu $4,$15,$12
|
||
|
sll $2,$8,4*4 # bits
|
||
|
|
||
|
li $8,-1
|
||
|
srl $9,$4,4*4 # bits
|
||
|
srl $8,4*4 # q=0xffffffff
|
||
|
beq $3,$9,.L_bn_div_words_skip_div2
|
||
|
divu $0,$4,$3
|
||
|
mflo $8
|
||
|
.L_bn_div_words_skip_div2:
|
||
|
multu $6,$8
|
||
|
sll $15,$4,4*4 # bits
|
||
|
srl $1,$5,4*4 # bits
|
||
|
or $15,$1
|
||
|
mflo $12
|
||
|
mfhi $13
|
||
|
.L_bn_div_words_inner_loop2:
|
||
|
sltu $14,$15,$12
|
||
|
seq $24,$9,$13
|
||
|
sltu $1,$9,$13
|
||
|
and $14,$24
|
||
|
sltu $3,$12,$6
|
||
|
or $1,$14
|
||
|
.set noreorder
|
||
|
beqz $1,.L_bn_div_words_inner_loop2_done
|
||
|
subu $13,$3
|
||
|
subu $12,$6
|
||
|
b .L_bn_div_words_inner_loop2
|
||
|
subu $8,1
|
||
|
.set reorder
|
||
|
.L_bn_div_words_inner_loop2_done:
|
||
|
|
||
|
subu $4,$15,$12
|
||
|
or $2,$8
|
||
|
srl $3,$4,$25 # $3 contains remainder if anybody wants it
|
||
|
srl $6,$25 # restore $6
|
||
|
|
||
|
.set noreorder
|
||
|
move $5,$3
|
||
|
jr $31
|
||
|
move $4,$2
|
||
|
.end bn_div_words_internal
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_mul_comba8
|
||
|
.ent bn_mul_comba8
|
||
|
bn_mul_comba8:
|
||
|
.set noreorder
|
||
|
.frame $29,6*4,$31
|
||
|
.mask 0x003f0000,-4
|
||
|
subu $29,6*4
|
||
|
sw $21,5*4($29)
|
||
|
sw $20,4*4($29)
|
||
|
sw $19,3*4($29)
|
||
|
sw $18,2*4($29)
|
||
|
sw $17,1*4($29)
|
||
|
sw $16,0*4($29)
|
||
|
|
||
|
.set reorder
|
||
|
lw $12,0($5) # If compiled with -mips3 option on
|
||
|
# R5000 box assembler barks on this
|
||
|
# 1ine with "should not have mult/div
|
||
|
# as last instruction in bb (R10K
|
||
|
# bug)" warning. If anybody out there
|
||
|
# has a clue about how to circumvent
|
||
|
# this do send me a note.
|
||
|
# <appro@fy.chalmers.se>
|
||
|
|
||
|
lw $8,0($6)
|
||
|
lw $13,4($5)
|
||
|
lw $14,2*4($5)
|
||
|
multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3);
|
||
|
lw $15,3*4($5)
|
||
|
lw $9,4($6)
|
||
|
lw $10,2*4($6)
|
||
|
lw $11,3*4($6)
|
||
|
mflo $2
|
||
|
mfhi $3
|
||
|
|
||
|
lw $16,4*4($5)
|
||
|
lw $18,5*4($5)
|
||
|
multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1);
|
||
|
lw $20,6*4($5)
|
||
|
lw $5,7*4($5)
|
||
|
lw $17,4*4($6)
|
||
|
lw $19,5*4($6)
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1);
|
||
|
addu $7,$25,$1
|
||
|
lw $21,6*4($6)
|
||
|
lw $6,7*4($6)
|
||
|
sw $2,0($4) # r[0]=c1;
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $2,$7,$25
|
||
|
sw $3,4($4) # r[1]=c2;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $3,$2,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,2*4($4) # r[2]=c3;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $7,$3,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
sw $2,3*4($4) # r[3]=c1;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $2,$7,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
sw $3,4*4($4) # r[4]=c2;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $3,$2,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,5*4($4) # r[5]=c3;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $7,$3,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
sw $2,6*4($4) # r[6]=c1;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $2,$7,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
sw $3,7*4($4) # r[7]=c2;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $3,$2,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,8*4($4) # r[8]=c3;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $7,$3,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
sw $2,9*4($4) # r[9]=c1;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $2,$7,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
sw $3,10*4($4) # r[10]=c2;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $3,$2,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,11*4($4) # r[11]=c3;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $7,$3,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
sw $2,12*4($4) # r[12]=c1;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $2,$7,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
sw $3,13*4($4) # r[13]=c2;
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sw $7,14*4($4) # r[14]=c3;
|
||
|
sw $2,15*4($4) # r[15]=c1;
|
||
|
|
||
|
.set noreorder
|
||
|
lw $21,5*4($29)
|
||
|
lw $20,4*4($29)
|
||
|
lw $19,3*4($29)
|
||
|
lw $18,2*4($29)
|
||
|
lw $17,1*4($29)
|
||
|
lw $16,0*4($29)
|
||
|
jr $31
|
||
|
addu $29,6*4
|
||
|
.end bn_mul_comba8
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_mul_comba4
|
||
|
.ent bn_mul_comba4
|
||
|
bn_mul_comba4:
|
||
|
.set reorder
|
||
|
lw $12,0($5)
|
||
|
lw $8,0($6)
|
||
|
lw $13,4($5)
|
||
|
lw $14,2*4($5)
|
||
|
multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3);
|
||
|
lw $15,3*4($5)
|
||
|
lw $9,4($6)
|
||
|
lw $10,2*4($6)
|
||
|
lw $11,3*4($6)
|
||
|
mflo $2
|
||
|
mfhi $3
|
||
|
sw $2,0($4)
|
||
|
|
||
|
multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1);
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1);
|
||
|
addu $7,$25,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $2,$7,$25
|
||
|
sw $3,4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $3,$2,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,2*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $7,$3,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
sw $2,3*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $2,$7,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
sw $3,4*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $3,$2,$25
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,5*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sw $2,6*4($4)
|
||
|
sw $3,7*4($4)
|
||
|
|
||
|
.set noreorder
|
||
|
jr $31
|
||
|
nop
|
||
|
.end bn_mul_comba4
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_sqr_comba8
|
||
|
.ent bn_sqr_comba8
|
||
|
bn_sqr_comba8:
|
||
|
.set reorder
|
||
|
lw $12,0($5)
|
||
|
lw $13,4($5)
|
||
|
lw $14,2*4($5)
|
||
|
lw $15,3*4($5)
|
||
|
|
||
|
multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3);
|
||
|
lw $8,4*4($5)
|
||
|
lw $9,5*4($5)
|
||
|
lw $10,6*4($5)
|
||
|
lw $11,7*4($5)
|
||
|
mflo $2
|
||
|
mfhi $3
|
||
|
sw $2,0($4)
|
||
|
|
||
|
multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1);
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $2,$25,$0
|
||
|
sll $25,1
|
||
|
multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $7,$25,$1
|
||
|
sw $3,4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $3,$25,$0
|
||
|
sll $25,1
|
||
|
multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,2*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $7,$25,$0
|
||
|
sll $25,1
|
||
|
multu $13,$14 # mul_add_c2(a[1],b[2],c1,c2,c3);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $7,$1
|
||
|
multu $8,$12 # mul_add_c2(a[4],b[0],c2,c3,c1);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
sw $2,3*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $2,$25,$0
|
||
|
sll $25,1
|
||
|
multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $2,$1
|
||
|
multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
sw $3,4*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $3,$25,$0
|
||
|
sll $25,1
|
||
|
multu $13,$8 # mul_add_c2(a[1],b[4],c3,c1,c2);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $3,$1
|
||
|
multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
multu $10,$12 # mul_add_c2(a[6],b[0],c1,c2,c3);
|
||
|
addu $3,$1
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,5*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $7,$25,$0
|
||
|
sll $25,1
|
||
|
multu $9,$13 # mul_add_c2(a[5],b[1],c1,c2,c3);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $7,$1
|
||
|
multu $8,$14 # mul_add_c2(a[4],b[2],c1,c2,c3);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $7,$1
|
||
|
multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
sw $2,6*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $2,$25,$0
|
||
|
sll $25,1
|
||
|
multu $13,$10 # mul_add_c2(a[1],b[6],c2,c3,c1);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $2,$1
|
||
|
multu $14,$9 # mul_add_c2(a[2],b[5],c2,c3,c1);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $2,$1
|
||
|
multu $15,$8 # mul_add_c2(a[3],b[4],c2,c3,c1);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $2,$1
|
||
|
multu $11,$13 # mul_add_c2(a[7],b[1],c3,c1,c2);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
sw $3,7*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $3,$25,$0
|
||
|
sll $25,1
|
||
|
multu $10,$14 # mul_add_c2(a[6],b[2],c3,c1,c2);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $3,$1
|
||
|
multu $9,$15 # mul_add_c2(a[5],b[3],c3,c1,c2);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $3,$1
|
||
|
multu $8,$8 # mul_add_c(a[4],b[4],c3,c1,c2);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,8*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $7,$25,$0
|
||
|
sll $25,1
|
||
|
multu $15,$10 # mul_add_c2(a[3],b[6],c1,c2,c3);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $7,$1
|
||
|
multu $8,$9 # mul_add_c2(a[4],b[5],c1,c2,c3);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $7,$1
|
||
|
multu $11,$15 # mul_add_c2(a[7],b[3],c2,c3,c1);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
sw $2,9*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $2,$25,$0
|
||
|
sll $25,1
|
||
|
multu $10,$8 # mul_add_c2(a[6],b[4],c2,c3,c1);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $2,$1
|
||
|
multu $9,$9 # mul_add_c(a[5],b[5],c2,c3,c1);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
sw $3,10*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $3,$25,$0
|
||
|
sll $25,1
|
||
|
multu $9,$10 # mul_add_c2(a[5],b[6],c3,c1,c2);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $3,$1
|
||
|
multu $11,$9 # mul_add_c2(a[7],b[5],c1,c2,c3);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,11*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $7,$25,$0
|
||
|
sll $25,1
|
||
|
multu $10,$10 # mul_add_c(a[6],b[6],c1,c2,c3);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1);
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
sw $2,12*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $2,$25,$0
|
||
|
sll $25,1
|
||
|
multu $11,$11 # mul_add_c(a[7],b[7],c3,c1,c2);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
sw $3,13*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sw $7,14*4($4)
|
||
|
sw $2,15*4($4)
|
||
|
|
||
|
.set noreorder
|
||
|
jr $31
|
||
|
nop
|
||
|
.end bn_sqr_comba8
|
||
|
|
||
|
.align 5
|
||
|
.globl bn_sqr_comba4
|
||
|
.ent bn_sqr_comba4
|
||
|
bn_sqr_comba4:
|
||
|
.set reorder
|
||
|
lw $12,0($5)
|
||
|
lw $13,4($5)
|
||
|
multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3);
|
||
|
lw $14,2*4($5)
|
||
|
lw $15,3*4($5)
|
||
|
mflo $2
|
||
|
mfhi $3
|
||
|
sw $2,0($4)
|
||
|
|
||
|
multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1);
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $2,$25,$0
|
||
|
sll $25,1
|
||
|
multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $7,$25,$1
|
||
|
sw $3,4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $3,$25,$0
|
||
|
sll $25,1
|
||
|
multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3);
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,2*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $7,$25,$0
|
||
|
sll $25,1
|
||
|
multu $13,$14 # mul_add_c(a2[1],b[2],c1,c2,c3);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $1,$25,$0
|
||
|
addu $7,$1
|
||
|
multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1);
|
||
|
sll $25,1
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sltu $1,$3,$25
|
||
|
addu $7,$1
|
||
|
sw $2,3*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $2,$25,$0
|
||
|
sll $25,1
|
||
|
multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $3,$24
|
||
|
sltu $1,$3,$24
|
||
|
multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2);
|
||
|
addu $25,$1
|
||
|
addu $7,$25
|
||
|
sltu $1,$7,$25
|
||
|
addu $2,$1
|
||
|
sw $3,4*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
slt $3,$25,$0
|
||
|
sll $25,1
|
||
|
multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3);
|
||
|
slt $6,$24,$0
|
||
|
addu $25,$6
|
||
|
sll $24,1
|
||
|
addu $7,$24
|
||
|
sltu $1,$7,$24
|
||
|
addu $25,$1
|
||
|
addu $2,$25
|
||
|
sltu $1,$2,$25
|
||
|
addu $3,$1
|
||
|
sw $7,5*4($4)
|
||
|
|
||
|
mflo $24
|
||
|
mfhi $25
|
||
|
addu $2,$24
|
||
|
sltu $1,$2,$24
|
||
|
addu $25,$1
|
||
|
addu $3,$25
|
||
|
sw $2,6*4($4)
|
||
|
sw $3,7*4($4)
|
||
|
|
||
|
.set noreorder
|
||
|
jr $31
|
||
|
nop
|
||
|
.end bn_sqr_comba4
|