mirror of
https://github.com/oxen-io/session-android.git
synced 2024-12-24 16:57:50 +00:00
116 lines
2.7 KiB
ArmAsm
116 lines
2.7 KiB
ArmAsm
|
#include "arm_arch.h"
|
||
|
|
||
|
.text
|
||
|
.arch armv8-a+crypto
|
||
|
.global gcm_init_v8
|
||
|
.type gcm_init_v8,%function
|
||
|
.align 4
|
||
|
gcm_init_v8:
|
||
|
ld1 {v17.2d},[x1] //load H
|
||
|
movi v16.16b,#0xe1
|
||
|
ext v3.16b,v17.16b,v17.16b,#8
|
||
|
shl v16.2d,v16.2d,#57
|
||
|
ushr v18.2d,v16.2d,#63
|
||
|
ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01
|
||
|
dup v17.4s,v17.s[1]
|
||
|
ushr v19.2d,v3.2d,#63
|
||
|
sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
||
|
and v19.16b,v19.16b,v16.16b
|
||
|
shl v3.2d,v3.2d,#1
|
||
|
ext v19.16b,v19.16b,v19.16b,#8
|
||
|
and v16.16b,v16.16b,v17.16b
|
||
|
orr v3.16b,v3.16b,v19.16b //H<<<=1
|
||
|
eor v3.16b,v3.16b,v16.16b //twisted H
|
||
|
st1 {v3.2d},[x0]
|
||
|
|
||
|
ret
|
||
|
.size gcm_init_v8,.-gcm_init_v8
|
||
|
|
||
|
.global gcm_gmult_v8
|
||
|
.type gcm_gmult_v8,%function
|
||
|
.align 4
|
||
|
gcm_gmult_v8:
|
||
|
ld1 {v17.2d},[x0] //load Xi
|
||
|
movi v19.16b,#0xe1
|
||
|
ld1 {v20.2d},[x1] //load twisted H
|
||
|
shl v19.2d,v19.2d,#57
|
||
|
#ifndef __ARMEB__
|
||
|
rev64 v17.16b,v17.16b
|
||
|
#endif
|
||
|
ext v21.16b,v20.16b,v20.16b,#8
|
||
|
mov x3,#0
|
||
|
ext v3.16b,v17.16b,v17.16b,#8
|
||
|
mov x12,#0
|
||
|
eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing
|
||
|
mov x2,x0
|
||
|
b .Lgmult_v8
|
||
|
.size gcm_gmult_v8,.-gcm_gmult_v8
|
||
|
|
||
|
.global gcm_ghash_v8
|
||
|
.type gcm_ghash_v8,%function
|
||
|
.align 4
|
||
|
gcm_ghash_v8:
|
||
|
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||
|
subs x3,x3,#16
|
||
|
movi v19.16b,#0xe1
|
||
|
mov x12,#16
|
||
|
ld1 {v20.2d},[x1] //load twisted H
|
||
|
csel x12,xzr,x12,eq
|
||
|
ext v0.16b,v0.16b,v0.16b,#8
|
||
|
shl v19.2d,v19.2d,#57
|
||
|
ld1 {v17.2d},[x2],x12 //load [rotated] inp
|
||
|
ext v21.16b,v20.16b,v20.16b,#8
|
||
|
#ifndef __ARMEB__
|
||
|
rev64 v0.16b,v0.16b
|
||
|
rev64 v17.16b,v17.16b
|
||
|
#endif
|
||
|
eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing
|
||
|
ext v3.16b,v17.16b,v17.16b,#8
|
||
|
b .Loop_v8
|
||
|
|
||
|
.align 4
|
||
|
.Loop_v8:
|
||
|
ext v18.16b,v0.16b,v0.16b,#8
|
||
|
eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
||
|
eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi
|
||
|
|
||
|
.Lgmult_v8:
|
||
|
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||
|
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||
|
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||
|
subs x3,x3,#16
|
||
|
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||
|
csel x12,xzr,x12,eq
|
||
|
|
||
|
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||
|
eor v18.16b,v0.16b,v2.16b
|
||
|
eor v1.16b,v1.16b,v17.16b
|
||
|
ld1 {v17.2d},[x2],x12 //load [rotated] inp
|
||
|
eor v1.16b,v1.16b,v18.16b
|
||
|
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||
|
|
||
|
ins v2.d[0],v1.d[1]
|
||
|
ins v1.d[1],v0.d[0]
|
||
|
#ifndef __ARMEB__
|
||
|
rev64 v17.16b,v17.16b
|
||
|
#endif
|
||
|
eor v0.16b,v1.16b,v18.16b
|
||
|
ext v3.16b,v17.16b,v17.16b,#8
|
||
|
|
||
|
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||
|
pmull v0.1q,v0.1d,v19.1d
|
||
|
eor v18.16b,v18.16b,v2.16b
|
||
|
eor v0.16b,v0.16b,v18.16b
|
||
|
b.hs .Loop_v8
|
||
|
|
||
|
#ifndef __ARMEB__
|
||
|
rev64 v0.16b,v0.16b
|
||
|
#endif
|
||
|
ext v0.16b,v0.16b,v0.16b,#8
|
||
|
st1 {v0.2d},[x0] //write out Xi
|
||
|
|
||
|
ret
|
||
|
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||
|
.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
|
||
|
.align 2
|