mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-05 21:39:19 +08:00
8a4c0d4a3f
issue #28657 follow https://pkg.go.dev/cmd/go#hdr-Build_constraints to compile go assembly with different cpu arch Signed-off-by: chasingegg <chao.gao@zilliz.com>
65 lines
1.4 KiB
ArmAsm
65 lines
1.4 KiB
ArmAsm
// Code generated by command: go run l2.go -out l2_amd64.s -stubs l2_stub_amd64.go. DO NOT EDIT.
|
|
|
|
#include "textflag.h"
|
|
|
|
// func L2(x []float32, y []float32) float32
|
|
// Requires: AVX, FMA3, SSE
|
|
TEXT ·L2(SB), NOSPLIT, $0-52
|
|
MOVQ x_base+0(FP), AX
|
|
MOVQ y_base+24(FP), CX
|
|
MOVQ x_len+8(FP), DX
|
|
VXORPS Y0, Y0, Y0
|
|
VXORPS Y1, Y1, Y1
|
|
VXORPS Y2, Y2, Y2
|
|
VXORPS Y3, Y3, Y3
|
|
VXORPS Y4, Y4, Y4
|
|
VXORPS Y5, Y5, Y5
|
|
VXORPS Y6, Y6, Y6
|
|
VXORPS Y7, Y7, Y7
|
|
|
|
blockloop:
|
|
CMPQ DX, $0x00000020
|
|
JL tail
|
|
VMOVUPS (AX), Y1
|
|
VMOVUPS 32(AX), Y3
|
|
VMOVUPS 64(AX), Y5
|
|
VMOVUPS 96(AX), Y7
|
|
VSUBPS (CX), Y1, Y1
|
|
VSUBPS 32(CX), Y3, Y3
|
|
VSUBPS 64(CX), Y5, Y5
|
|
VSUBPS 96(CX), Y7, Y7
|
|
VFMADD231PS Y1, Y1, Y0
|
|
VFMADD231PS Y3, Y3, Y2
|
|
VFMADD231PS Y5, Y5, Y4
|
|
VFMADD231PS Y7, Y7, Y6
|
|
ADDQ $0x00000080, AX
|
|
ADDQ $0x00000080, CX
|
|
SUBQ $0x00000020, DX
|
|
JMP blockloop
|
|
|
|
tail:
|
|
VXORPS X1, X1, X1
|
|
|
|
tailloop:
|
|
CMPQ DX, $0x00000000
|
|
JE reduce
|
|
VMOVSS (AX), X3
|
|
VSUBSS (CX), X3, X3
|
|
VFMADD231SS X3, X3, X1
|
|
ADDQ $0x00000004, AX
|
|
ADDQ $0x00000004, CX
|
|
DECQ DX
|
|
JMP tailloop
|
|
|
|
reduce:
|
|
VADDPS Y0, Y2, Y0
|
|
VADDPS Y4, Y6, Y4
|
|
VADDPS Y0, Y4, Y0
|
|
VEXTRACTF128 $0x01, Y0, X2
|
|
VADDPS X0, X2, X0
|
|
VADDPS X0, X1, X0
|
|
VHADDPS X0, X0, X0
|
|
VHADDPS X0, X0, X0
|
|
MOVSS X0, ret+48(FP)
|
|
RET
|