mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-04 12:59:23 +08:00
8a4c0d4a3f
issue #28657 follow https://pkg.go.dev/cmd/go#hdr-Build_constraints to compile go assembly with different cpu arch Signed-off-by: chasingegg <chao.gao@zilliz.com>
56 lines
1.2 KiB
ArmAsm
56 lines
1.2 KiB
ArmAsm
// Code generated by command: go run ip.go -out ip_amd64.s -stubs ip_stub_amd64.go. DO NOT EDIT.
|
|
|
|
#include "textflag.h"
|
|
|
|
// func IP(x []float32, y []float32) float32
|
|
// Requires: AVX, FMA3, SSE
|
|
TEXT ·IP(SB), NOSPLIT, $0-52
|
|
MOVQ x_base+0(FP), AX
|
|
MOVQ y_base+24(FP), CX
|
|
MOVQ x_len+8(FP), DX
|
|
VXORPS Y0, Y0, Y0
|
|
VXORPS Y1, Y1, Y1
|
|
VXORPS Y2, Y2, Y2
|
|
VXORPS Y3, Y3, Y3
|
|
|
|
blockloop:
|
|
CMPQ DX, $0x00000020
|
|
JL tail
|
|
VMOVUPS (AX), Y4
|
|
VMOVUPS 32(AX), Y5
|
|
VMOVUPS 64(AX), Y6
|
|
VMOVUPS 96(AX), Y7
|
|
VFMADD231PS (CX), Y4, Y0
|
|
VFMADD231PS 32(CX), Y5, Y1
|
|
VFMADD231PS 64(CX), Y6, Y2
|
|
VFMADD231PS 96(CX), Y7, Y3
|
|
ADDQ $0x00000080, AX
|
|
ADDQ $0x00000080, CX
|
|
SUBQ $0x00000020, DX
|
|
JMP blockloop
|
|
|
|
tail:
|
|
VXORPS X4, X4, X4
|
|
|
|
tailloop:
|
|
CMPQ DX, $0x00000000
|
|
JE reduce
|
|
VMOVSS (AX), X5
|
|
VFMADD231SS (CX), X5, X4
|
|
ADDQ $0x00000004, AX
|
|
ADDQ $0x00000004, CX
|
|
DECQ DX
|
|
JMP tailloop
|
|
|
|
reduce:
|
|
VADDPS Y0, Y1, Y0
|
|
VADDPS Y2, Y3, Y2
|
|
VADDPS Y0, Y2, Y0
|
|
VEXTRACTF128 $0x01, Y0, X1
|
|
VADDPS X0, X1, X0
|
|
VADDPS X0, X4, X0
|
|
VHADDPS X0, X0, X0
|
|
VHADDPS X0, X0, X0
|
|
MOVSS X0, ret+48(FP)
|
|
RET
|