816 lines
29 KiB
C
816 lines
29 KiB
C
/*
|
|
|
|
Copyright (c) 2009, 2010, 2011, 2012, 2013 STMicroelectronics
|
|
Written by Christophe Lyon
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
#ifndef _STM_ARM_NEON_REF_H_
|
|
#define _STM_ARM_NEON_REF_H_
|
|
|
|
#if defined(__cplusplus)
|
|
#include <cstdio>
|
|
#include <cinttypes>
|
|
#include <cstring>
|
|
#else
|
|
#include <stdio.h>
|
|
#if defined(_MSC_VER)
|
|
#include "msinttypes.h"
|
|
#include <float.h> /* for isnan() ... */
|
|
static int32_t _ptrNan[]={0x7fc00000L};
|
|
#define NAN (*(float*)_ptrNan)
|
|
static int32_t _ptrInf[]={0x7f800000L};
|
|
#define INFINITY (*(float*)_ptrInf)
|
|
#define HUGE_VALF INFINITY
|
|
#else
|
|
#include <inttypes.h>
|
|
#endif
|
|
#include <string.h>
|
|
#endif
|
|
|
|
#define xSTR(X) #X
|
|
#define STR(X) xSTR(X)
|
|
|
|
#define xNAME1(V,T) V ## _ ## T
|
|
#define xNAME(V,T) xNAME1(V,T)
|
|
|
|
#define VAR(V,T,W) xNAME(V,T##W)
|
|
#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
|
|
|
|
#define VECT_NAME(T, W, N) T##W##x##N
|
|
#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
|
|
#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
|
|
#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
|
|
|
|
#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
|
|
#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
|
|
|
|
/* This one is used for padding between input buffers. */
|
|
#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
|
|
|
|
/* Array declarations. */
|
|
#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
|
|
#define ARRAY4(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[4]
|
|
|
|
/* Arrays of vectors. */
|
|
#define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
|
|
#define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
|
|
|
|
static int result_idx = 0;
|
|
#define DUMP(MSG,T,W,N,FMT) \
|
|
fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
|
|
STR(VECT_VAR(result, T, W, N))); \
|
|
for(i=0; i<N ; i++) \
|
|
{ \
|
|
fprintf(ref_file, "%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \
|
|
} \
|
|
fprintf(ref_file, " }\n"); \
|
|
DUMP4GCC(MSG,T,W,N,FMT);
|
|
|
|
/* Use casts for remove sign bits */
|
|
#define DUMP_POLY(MSG,T,W,N,FMT) \
|
|
fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
|
|
STR(VECT_VAR(result, T, W, N))); \
|
|
for(i=0; i<N ; i++) \
|
|
{ \
|
|
fprintf(ref_file, "%" FMT ", ", \
|
|
(uint##W##_t)VECT_VAR(result, T, W, N)[i]); \
|
|
} \
|
|
fprintf(ref_file, " }\n"); \
|
|
DUMP4GCC(MSG,T,W,N,FMT);
|
|
|
|
#define DUMP_FP(MSG,T,W,N,FMT) \
|
|
fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
|
|
STR(VECT_VAR(result, T, W, N))); \
|
|
for(i=0; i<N ; i++) \
|
|
{ \
|
|
union fp_operand { \
|
|
uint##W##_t i; \
|
|
float##W##_t f; \
|
|
} tmp; \
|
|
tmp.f = VECT_VAR(result, T, W, N)[i]; \
|
|
fprintf(ref_file, "%" FMT ", ", tmp.i); \
|
|
} \
|
|
fprintf(ref_file, " }\n"); \
|
|
DUMP4GCC_FP(MSG,T,W,N,FMT);
|
|
|
|
#define DUMP4GCC(MSG,T,W,N,FMT) \
|
|
fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
|
|
STR(T), W, N); \
|
|
for(i=0; i<(N-1) ; i++) \
|
|
{ \
|
|
if (W < 32) { \
|
|
uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i]; \
|
|
fprintf(gcc_tests_file, "0x%" FMT ", ", tmp); \
|
|
} else { \
|
|
fprintf(gcc_tests_file, "0x%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \
|
|
} \
|
|
} \
|
|
if (W < 32) { \
|
|
uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i]; \
|
|
fprintf(gcc_tests_file, "0x%" FMT, tmp); \
|
|
} else { \
|
|
fprintf(gcc_tests_file, "0x%" FMT, VECT_VAR(result, T, W, N)[i]); \
|
|
} \
|
|
fprintf(gcc_tests_file, " };\n");
|
|
|
|
#define DUMP4GCC_FP(MSG,T,W,N,FMT) \
|
|
{ \
|
|
union fp_operand { \
|
|
uint##W##_t i; \
|
|
float##W##_t f; \
|
|
} tmp; \
|
|
fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
|
|
"hfloat", W, N); \
|
|
for(i=0; i<(N-1) ; i++) \
|
|
{ \
|
|
tmp.f = VECT_VAR(result, T, W, N)[i]; \
|
|
fprintf(gcc_tests_file, "0x%" FMT ", ", tmp.i); \
|
|
} \
|
|
tmp.f = VECT_VAR(result, T, W, N)[i]; \
|
|
fprintf(gcc_tests_file, "0x%" FMT, tmp.i); \
|
|
fprintf(gcc_tests_file, " };\n"); \
|
|
}
|
|
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
#define float16_t __fp16
|
|
|
|
#define DUMP_FP16(MSG,T,W,N,FMT) \
|
|
fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
|
|
STR(VECT_VAR(result, T, W, N))); \
|
|
for(i=0; i<N ; i++) \
|
|
{ \
|
|
uint##W##_t tmp; \
|
|
tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
|
|
fprintf(ref_file, "%" FMT ", ", tmp); \
|
|
} \
|
|
fprintf(ref_file, " }\n"); \
|
|
DUMP4GCC_FP16(MSG,T,W,N,FMT);
|
|
|
|
#define DUMP4GCC_FP16(MSG,T,W,N,FMT) \
|
|
{ \
|
|
uint##W##_t tmp; \
|
|
fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
|
|
"hfloat", W, N); \
|
|
for(i=0; i<(N-1) ; i++) \
|
|
{ \
|
|
tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
|
|
fprintf(gcc_tests_file, "0x%" FMT ", ", tmp); \
|
|
} \
|
|
tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
|
|
fprintf(gcc_tests_file, "0x%" FMT, tmp); \
|
|
fprintf(gcc_tests_file, " };\n"); \
|
|
}
|
|
#endif
|
|
|
|
#define CLEAN_PATTERN_8 0x33
|
|
#define CLEAN_PATTERN_16 0x3333
|
|
#define CLEAN_PATTERN_32 0x33333333
|
|
#define CLEAN_PATTERN_64 0x3333333333333333
|
|
|
|
#define CLEAN(VAR,T,W,N) \
|
|
memset(VECT_VAR(VAR, T, W, N), \
|
|
CLEAN_PATTERN_8, \
|
|
sizeof(VECT_VAR(VAR, T, W, N)));
|
|
|
|
#define CHECK_INIT(VAR,Q,T1,T2,W,N) \
|
|
{ \
|
|
ARRAY(check_result, T1, W, N); \
|
|
int i; \
|
|
\
|
|
vst1##Q##_##T2##W(VECT_VAR(check_result, T1, W, N), \
|
|
VECT_VAR(VAR, T1, W, N)); \
|
|
for(i=0; i<N ; i++) \
|
|
{ \
|
|
/*if (VECT_VAR(check_result, T1, W, N)[i] == CLEAN_PATTERN_##W)*/ { \
|
|
fprintf(stdout, "%s:%d: %s[%d] unintialized! %#x\n", \
|
|
__FUNCTION__, __LINE__, \
|
|
STR(VECT_VAR(VAR, T1, W, N)), i, \
|
|
VECT_VAR(check_result, T1, W, N)[i]); \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
/* Generic declarations: */
|
|
extern FILE* log_file;
|
|
extern FILE* ref_file;
|
|
extern FILE* gcc_tests_file;
|
|
|
|
/* Input buffers, one of each size */
|
|
extern ARRAY(buffer, int, 8, 8);
|
|
extern ARRAY(buffer, int, 16, 4);
|
|
extern ARRAY(buffer, int, 32, 2);
|
|
extern ARRAY(buffer, int, 64, 1);
|
|
extern ARRAY(buffer, uint, 8, 8);
|
|
extern ARRAY(buffer, uint, 16, 4);
|
|
extern ARRAY(buffer, uint, 32, 2);
|
|
extern ARRAY(buffer, uint, 64, 1);
|
|
extern ARRAY(buffer, poly, 8, 8);
|
|
extern ARRAY(buffer, poly, 16, 4);
|
|
extern ARRAY(buffer, float, 32, 2);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern ARRAY(buffer, float, 16, 4);
|
|
#endif
|
|
extern ARRAY(buffer, int, 8, 16);
|
|
extern ARRAY(buffer, int, 16, 8);
|
|
extern ARRAY(buffer, int, 32, 4);
|
|
extern ARRAY(buffer, int, 64, 2);
|
|
extern ARRAY(buffer, uint, 8, 16);
|
|
extern ARRAY(buffer, uint, 16, 8);
|
|
extern ARRAY(buffer, uint, 32, 4);
|
|
extern ARRAY(buffer, uint, 64, 2);
|
|
extern ARRAY(buffer, poly, 8, 16);
|
|
extern ARRAY(buffer, poly, 16, 8);
|
|
extern ARRAY(buffer, float, 32, 4);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern ARRAY(buffer, float, 16, 8);
|
|
#endif
|
|
|
|
/* The tests for vld1_dup and vdup expect at least 4 entries in the
|
|
input buffer, so force 1- and 2-elements initializers to have 4
|
|
entries. */
|
|
extern ARRAY(buffer_dup, int, 8, 8);
|
|
extern ARRAY(buffer_dup, int, 16, 4);
|
|
extern ARRAY4(buffer_dup, int, 32, 2);
|
|
extern ARRAY4(buffer_dup, int, 64, 1);
|
|
extern ARRAY(buffer_dup, uint, 8, 8);
|
|
extern ARRAY(buffer_dup, uint, 16, 4);
|
|
extern ARRAY4(buffer_dup, uint, 32, 2);
|
|
extern ARRAY4(buffer_dup, uint, 64, 1);
|
|
extern ARRAY(buffer_dup, poly, 8, 8);
|
|
extern ARRAY(buffer_dup, poly, 16, 4);
|
|
extern ARRAY4(buffer_dup, float, 32, 2);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern ARRAY4(buffer_dup, float, 16, 4);
|
|
#endif
|
|
extern ARRAY(buffer_dup, int, 8, 16);
|
|
extern ARRAY(buffer_dup, int, 16, 8);
|
|
extern ARRAY(buffer_dup, int, 32, 4);
|
|
extern ARRAY4(buffer_dup, int, 64, 2);
|
|
extern ARRAY(buffer_dup, uint, 8, 16);
|
|
extern ARRAY(buffer_dup, uint, 16, 8);
|
|
extern ARRAY(buffer_dup, uint, 32, 4);
|
|
extern ARRAY4(buffer_dup, uint, 64, 2);
|
|
extern ARRAY(buffer_dup, poly, 8, 16);
|
|
extern ARRAY(buffer_dup, poly, 16, 8);
|
|
extern ARRAY(buffer_dup, float, 32, 4);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern ARRAY(buffer_dup, float, 16, 8);
|
|
#endif
|
|
|
|
/* Input buffers for vld2, one of each size */
|
|
extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2);
|
|
extern VECT_ARRAY(buffer_vld2, int, 16, 4, 2);
|
|
extern VECT_ARRAY(buffer_vld2, int, 32, 2, 2);
|
|
extern VECT_ARRAY(buffer_vld2, int, 64, 1, 2);
|
|
extern VECT_ARRAY(buffer_vld2, uint, 8, 8, 2);
|
|
extern VECT_ARRAY(buffer_vld2, uint, 16, 4, 2);
|
|
extern VECT_ARRAY(buffer_vld2, uint, 32, 2, 2);
|
|
extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2);
|
|
extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2);
|
|
extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2);
|
|
extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2);
|
|
#endif
|
|
extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2);
|
|
extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2);
|
|
extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2);
|
|
extern VECT_ARRAY(buffer_vld2, int, 64, 2, 2);
|
|
extern VECT_ARRAY(buffer_vld2, uint, 8, 16, 2);
|
|
extern VECT_ARRAY(buffer_vld2, uint, 16, 8, 2);
|
|
extern VECT_ARRAY(buffer_vld2, uint, 32, 4, 2);
|
|
extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2);
|
|
extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2);
|
|
extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2);
|
|
extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2);
|
|
#endif
|
|
|
|
/* Input buffers for vld3, one of each size */
|
|
extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3);
|
|
extern VECT_ARRAY(buffer_vld3, int, 16, 4, 3);
|
|
extern VECT_ARRAY(buffer_vld3, int, 32, 2, 3);
|
|
extern VECT_ARRAY(buffer_vld3, int, 64, 1, 3);
|
|
extern VECT_ARRAY(buffer_vld3, uint, 8, 8, 3);
|
|
extern VECT_ARRAY(buffer_vld3, uint, 16, 4, 3);
|
|
extern VECT_ARRAY(buffer_vld3, uint, 32, 2, 3);
|
|
extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3);
|
|
extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3);
|
|
extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3);
|
|
extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3);
|
|
#endif
|
|
extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3);
|
|
extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3);
|
|
extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3);
|
|
extern VECT_ARRAY(buffer_vld3, int, 64, 2, 3);
|
|
extern VECT_ARRAY(buffer_vld3, uint, 8, 16, 3);
|
|
extern VECT_ARRAY(buffer_vld3, uint, 16, 8, 3);
|
|
extern VECT_ARRAY(buffer_vld3, uint, 32, 4, 3);
|
|
extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3);
|
|
extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3);
|
|
extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3);
|
|
extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3);
|
|
#endif
|
|
|
|
/* Input buffers for vld4, one of each size */
|
|
extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4);
|
|
extern VECT_ARRAY(buffer_vld4, int, 16, 4, 4);
|
|
extern VECT_ARRAY(buffer_vld4, int, 32, 2, 4);
|
|
extern VECT_ARRAY(buffer_vld4, int, 64, 1, 4);
|
|
extern VECT_ARRAY(buffer_vld4, uint, 8, 8, 4);
|
|
extern VECT_ARRAY(buffer_vld4, uint, 16, 4, 4);
|
|
extern VECT_ARRAY(buffer_vld4, uint, 32, 2, 4);
|
|
extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4);
|
|
extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4);
|
|
extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4);
|
|
extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4);
|
|
#endif
|
|
extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4);
|
|
extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4);
|
|
extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4);
|
|
extern VECT_ARRAY(buffer_vld4, int, 64, 2, 4);
|
|
extern VECT_ARRAY(buffer_vld4, uint, 8, 16, 4);
|
|
extern VECT_ARRAY(buffer_vld4, uint, 16, 8, 4);
|
|
extern VECT_ARRAY(buffer_vld4, uint, 32, 4, 4);
|
|
extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4);
|
|
extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4);
|
|
extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4);
|
|
extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4);
|
|
#endif
|
|
|
|
/* Input buffers for vld2_lane */
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2];
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, int, 16, 2)[2];
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, int, 32, 2)[2];
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, int, 64, 2)[2];
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, uint, 8, 2)[2];
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, uint, 16, 2)[2];
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, uint, 32, 2)[2];
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2];
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2];
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2];
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2];
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2];
|
|
#endif
|
|
|
|
/* Input buffers for vld3_lane */
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3];
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, int, 16, 3)[3];
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, int, 32, 3)[3];
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, int, 64, 3)[3];
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, uint, 8, 3)[3];
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, uint, 16, 3)[3];
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, uint, 32, 3)[3];
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3];
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3];
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3];
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3];
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3];
|
|
#endif
|
|
|
|
/* Input buffers for vld4_lane */
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4];
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, int, 16, 4)[4];
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, int, 32, 4)[4];
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, int, 64, 4)[4];
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, uint, 8, 4)[4];
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, uint, 16, 4)[4];
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, uint, 32, 4)[4];
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4];
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4];
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4];
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4];
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4];
|
|
#endif
|
|
|
|
/* Output buffers, one of each size */
|
|
static ARRAY(result, int, 8, 8);
|
|
static ARRAY(result, int, 16, 4);
|
|
static ARRAY(result, int, 32, 2);
|
|
static ARRAY(result, int, 64, 1);
|
|
static ARRAY(result, uint, 8, 8);
|
|
static ARRAY(result, uint, 16, 4);
|
|
static ARRAY(result, uint, 32, 2);
|
|
static ARRAY(result, uint, 64, 1);
|
|
static ARRAY(result, poly, 8, 8);
|
|
static ARRAY(result, poly, 16, 4);
|
|
static ARRAY(result, float, 32, 2);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
static ARRAY(result, float, 16, 4);
|
|
#endif
|
|
static ARRAY(result, int, 8, 16);
|
|
static ARRAY(result, int, 16, 8);
|
|
static ARRAY(result, int, 32, 4);
|
|
static ARRAY(result, int, 64, 2);
|
|
static ARRAY(result, uint, 8, 16);
|
|
static ARRAY(result, uint, 16, 8);
|
|
static ARRAY(result, uint, 32, 4);
|
|
static ARRAY(result, uint, 64, 2);
|
|
static ARRAY(result, poly, 8, 16);
|
|
static ARRAY(result, poly, 16, 8);
|
|
static ARRAY(result, float, 32, 4);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
static ARRAY(result, float, 16, 8);
|
|
#endif
|
|
|
|
/* Dump results (generic function) */
|
|
static void dump_results (char *test_name)
|
|
{
|
|
int i;
|
|
|
|
fprintf(ref_file, "\n%s output:\n", test_name);
|
|
fprintf(gcc_tests_file, "\n%s output:\n", test_name);
|
|
|
|
DUMP(test_name, int, 8, 8, PRId8);
|
|
DUMP(test_name, int, 16, 4, PRId16);
|
|
DUMP(test_name, int, 32, 2, PRId32);
|
|
DUMP(test_name, int, 64, 1, PRId64);
|
|
DUMP(test_name, uint, 8, 8, PRIu8);
|
|
DUMP(test_name, uint, 16, 4, PRIu16);
|
|
DUMP(test_name, uint, 32, 2, PRIu32);
|
|
DUMP(test_name, uint, 64, 1, PRIu64);
|
|
DUMP_POLY(test_name, poly, 8, 8, PRIu8);
|
|
DUMP_POLY(test_name, poly, 16, 4, PRIu16);
|
|
DUMP_FP(test_name, float, 32, 2, PRIx32);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
DUMP_FP16(test_name, float, 16, 4, PRIu16);
|
|
#endif
|
|
|
|
DUMP(test_name, int, 8, 16, PRId8);
|
|
DUMP(test_name, int, 16, 8, PRId16);
|
|
DUMP(test_name, int, 32, 4, PRId32);
|
|
DUMP(test_name, int, 64, 2, PRId64);
|
|
DUMP(test_name, uint, 8, 16, PRIu8);
|
|
DUMP(test_name, uint, 16, 8, PRIu16);
|
|
DUMP(test_name, uint, 32, 4, PRIu32);
|
|
DUMP(test_name, uint, 64, 2, PRIu64);
|
|
DUMP_POLY(test_name, poly, 8, 16, PRIu8);
|
|
DUMP_POLY(test_name, poly, 16, 8, PRIu16);
|
|
DUMP_FP(test_name, float, 32, 4, PRIx32);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
DUMP_FP16(test_name, float, 16, 8, PRIu16);
|
|
#endif
|
|
}
|
|
|
|
/* Dump results in hex (generic function) */
|
|
static void dump_results_hex2 (const char *test_name, const char* comment)
|
|
{
|
|
int i;
|
|
|
|
fprintf(ref_file, "\n%s%s output:\n", test_name, comment);
|
|
fprintf(gcc_tests_file, "\n%s%s output:\n", test_name, comment);
|
|
|
|
DUMP(test_name, int, 8, 8, PRIx8);
|
|
DUMP(test_name, int, 16, 4, PRIx16);
|
|
DUMP(test_name, int, 32, 2, PRIx32);
|
|
DUMP(test_name, int, 64, 1, PRIx64);
|
|
DUMP(test_name, uint, 8, 8, PRIx8);
|
|
DUMP(test_name, uint, 16, 4, PRIx16);
|
|
DUMP(test_name, uint, 32, 2, PRIx32);
|
|
DUMP(test_name, uint, 64, 1, PRIx64);
|
|
DUMP_POLY(test_name, poly, 8, 8, PRIx8);
|
|
DUMP_POLY(test_name, poly, 16, 4, PRIx16);
|
|
DUMP_FP(test_name, float, 32, 2, PRIx32);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
DUMP_FP16(test_name, float, 16, 4, PRIx16);
|
|
#endif
|
|
|
|
DUMP(test_name, int, 8, 16, PRIx8);
|
|
DUMP(test_name, int, 16, 8, PRIx16);
|
|
DUMP(test_name, int, 32, 4, PRIx32);
|
|
DUMP(test_name, int, 64, 2, PRIx64);
|
|
DUMP(test_name, uint, 8, 16, PRIx8);
|
|
DUMP(test_name, uint, 16, 8, PRIx16);
|
|
DUMP(test_name, uint, 32, 4, PRIx32);
|
|
DUMP(test_name, uint, 64, 2, PRIx64);
|
|
DUMP_POLY(test_name, poly, 8, 16, PRIx8);
|
|
DUMP_POLY(test_name, poly, 16, 8, PRIx16);
|
|
DUMP_FP(test_name, float, 32, 4, PRIx32);
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
|
|
DUMP_FP16(test_name, float, 16, 8, PRIx16);
|
|
#endif
|
|
}
|
|
|
|
static void dump_results_hex (const char *test_name)
|
|
{
|
|
dump_results_hex2(test_name, "");
|
|
}
|
|
|
|
#ifndef STM_ARM_NEON_MODELS
|
|
|
|
/* This hack is to cope with various compilers/libc which may not
|
|
provide endian.h or cross-compilers such as llvm which includes the
|
|
host's endian.h. */
|
|
#ifndef __arm__
|
|
#include <endian.h>
|
|
#define THIS_ENDIAN __BYTE_ORDER
|
|
#else /* __arm__ */
|
|
#ifdef __ARMEL__
|
|
#define THIS_ENDIAN __LITTLE_ENDIAN
|
|
#else /* __ARMEL__ */
|
|
#define THIS_ENDIAN __BIG_ENDIAN
|
|
#endif
|
|
#endif /* __arm__ */
|
|
|
|
#if THIS_ENDIAN == __LITTLE_ENDIAN
|
|
|
|
typedef union {
|
|
struct {
|
|
int _xxx:27;
|
|
unsigned int QC:1;
|
|
int V:1;
|
|
int C:1;
|
|
int Z:1;
|
|
int N:1;
|
|
} b;
|
|
unsigned int word;
|
|
} _ARM_FPSCR;
|
|
|
|
#else /* __BIG_ENDIAN */
|
|
|
|
typedef union {
|
|
struct {
|
|
int N:1;
|
|
int Z:1;
|
|
int C:1;
|
|
int V:1;
|
|
unsigned int QC:1;
|
|
int _dnm:27;
|
|
} b;
|
|
unsigned int word;
|
|
} _ARM_FPSCR;
|
|
|
|
#endif /* __BIG_ENDIAN */
|
|
|
|
#ifdef __ARMCC_VERSION
|
|
register _ARM_FPSCR _afpscr_for_qc __asm("fpscr");
|
|
# define Neon_Cumulative_Sat _afpscr_for_qc.b.QC
|
|
# define Set_Neon_Cumulative_Sat(x, depend) {Neon_Cumulative_Sat = (x);}
|
|
#else
|
|
/* GCC/ARM does not know this register */
|
|
# define Neon_Cumulative_Sat __read_neon_cumulative_sat()
|
|
/* We need a fake dependency to ensure correct ordering of asm
|
|
statements to preset the QC flag value, and Neon operators writing
|
|
to QC. */
|
|
#define Set_Neon_Cumulative_Sat(x, depend) \
|
|
__set_neon_cumulative_sat((x), (depend))
|
|
|
|
# if defined(__aarch64__)
|
|
static volatile int __read_neon_cumulative_sat (void) {
|
|
_ARM_FPSCR _afpscr_for_qc;
|
|
asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
|
|
return _afpscr_for_qc.b.QC;
|
|
}
|
|
|
|
#define __set_neon_cumulative_sat(x, depend) { \
|
|
_ARM_FPSCR _afpscr_for_qc; \
|
|
asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); \
|
|
_afpscr_for_qc.b.QC = x; \
|
|
asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
|
|
}
|
|
|
|
# else
|
|
static volatile int __read_neon_cumulative_sat (void) {
|
|
_ARM_FPSCR _afpscr_for_qc;
|
|
asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
|
|
return _afpscr_for_qc.b.QC;
|
|
}
|
|
|
|
#define __set_neon_cumulative_sat(x, depend) { \
|
|
_ARM_FPSCR _afpscr_for_qc; \
|
|
asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); \
|
|
_afpscr_for_qc.b.QC = x; \
|
|
asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
|
|
}
|
|
|
|
# endif
|
|
#endif
|
|
|
|
#endif /* STM_ARM_NEON_MODELS */
|
|
|
|
static void dump_neon_cumulative_sat(const char* msg, const char *name,
|
|
const char* t1, int w, int n)
|
|
{
|
|
fprintf(ref_file, "%s:%d:%s Neon cumulative saturation %d\n", msg, result_idx++,
|
|
name, Neon_Cumulative_Sat);
|
|
fprintf(gcc_tests_file,
|
|
"int VECT_VAR(expected_cumulative_sat,%s,%d,%d) = %d;\n",
|
|
t1, w, n, Neon_Cumulative_Sat);
|
|
}
|
|
|
|
/* Clean output buffers before execution */
|
|
static void clean_results (void)
|
|
{
|
|
result_idx = 0;
|
|
CLEAN(result, int, 8, 8);
|
|
CLEAN(result, int, 16, 4);
|
|
CLEAN(result, int, 32, 2);
|
|
CLEAN(result, int, 64, 1);
|
|
CLEAN(result, uint, 8, 8);
|
|
CLEAN(result, uint, 16, 4);
|
|
CLEAN(result, uint, 32, 2);
|
|
CLEAN(result, uint, 64, 1);
|
|
CLEAN(result, poly, 8, 8);
|
|
CLEAN(result, poly, 16, 4);
|
|
CLEAN(result, float, 32, 2);
|
|
|
|
CLEAN(result, int, 8, 16);
|
|
CLEAN(result, int, 16, 8);
|
|
CLEAN(result, int, 32, 4);
|
|
CLEAN(result, int, 64, 2);
|
|
CLEAN(result, uint, 8, 16);
|
|
CLEAN(result, uint, 16, 8);
|
|
CLEAN(result, uint, 32, 4);
|
|
CLEAN(result, uint, 64, 2);
|
|
CLEAN(result, poly, 8, 16);
|
|
CLEAN(result, poly, 16, 8);
|
|
CLEAN(result, float, 32, 4);
|
|
}
|
|
|
|
|
|
/* Helpers to declare variables of various types */
|
|
#define DECL_VARIABLE(VAR, T1, W, N) \
|
|
volatile VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
|
|
|
|
#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
|
|
DECL_VARIABLE(VAR, int, 8, 8); \
|
|
DECL_VARIABLE(VAR, int, 16, 4); \
|
|
DECL_VARIABLE(VAR, int, 32, 2); \
|
|
DECL_VARIABLE(VAR, int, 64, 1)
|
|
|
|
#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR) \
|
|
DECL_VARIABLE(VAR, uint, 8, 8); \
|
|
DECL_VARIABLE(VAR, uint, 16, 4); \
|
|
DECL_VARIABLE(VAR, uint, 32, 2); \
|
|
DECL_VARIABLE(VAR, uint, 64, 1)
|
|
|
|
#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) \
|
|
DECL_VARIABLE(VAR, int, 8, 16); \
|
|
DECL_VARIABLE(VAR, int, 16, 8); \
|
|
DECL_VARIABLE(VAR, int, 32, 4); \
|
|
DECL_VARIABLE(VAR, int, 64, 2)
|
|
|
|
#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) \
|
|
DECL_VARIABLE(VAR, uint, 8, 16); \
|
|
DECL_VARIABLE(VAR, uint, 16, 8); \
|
|
DECL_VARIABLE(VAR, uint, 32, 4); \
|
|
DECL_VARIABLE(VAR, uint, 64, 2)
|
|
|
|
#define DECL_VARIABLE_64BITS_VARIANTS(VAR) \
|
|
DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
|
|
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
|
|
DECL_VARIABLE(VAR, poly, 8, 8); \
|
|
DECL_VARIABLE(VAR, poly, 16, 4); \
|
|
DECL_VARIABLE(VAR, float, 32, 2)
|
|
|
|
#define DECL_VARIABLE_128BITS_VARIANTS(VAR) \
|
|
DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \
|
|
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
|
|
DECL_VARIABLE(VAR, poly, 8, 16); \
|
|
DECL_VARIABLE(VAR, poly, 16, 8); \
|
|
DECL_VARIABLE(VAR, float, 32, 4)
|
|
|
|
#define DECL_VARIABLE_ALL_VARIANTS(VAR) \
|
|
DECL_VARIABLE_64BITS_VARIANTS(VAR); \
|
|
DECL_VARIABLE_128BITS_VARIANTS(VAR)
|
|
|
|
#define DECL_VARIABLE_SIGNED_VARIANTS(VAR) \
|
|
DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
|
|
DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
|
|
|
|
#define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR) \
|
|
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
|
|
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
|
|
|
|
/* Helpers to initialize vectors */
|
|
#define VDUP(VAR, Q, T1, T2, W, N, V) \
|
|
VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
|
|
|
|
#define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \
|
|
VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \
|
|
VECT_VAR(VAR, T1, W, N), \
|
|
L)
|
|
|
|
/* We need to load initial values first, so rely on VLD1 */
|
|
#define VLOAD(VAR, BUF, Q, T1, T2, W, N) \
|
|
VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
|
|
|
|
/* Helpers for macros with 1 constant and 5 variable arguments */
|
|
#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
|
|
MACRO(VAR, , int, s, 8, 8); \
|
|
MACRO(VAR, , int, s, 16, 4); \
|
|
MACRO(VAR, , int, s, 32, 2); \
|
|
MACRO(VAR, , int, s, 64, 1)
|
|
|
|
#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) \
|
|
MACRO(VAR, , uint, u, 8, 8); \
|
|
MACRO(VAR, , uint, u, 16, 4); \
|
|
MACRO(VAR, , uint, u, 32, 2); \
|
|
MACRO(VAR, , uint, u, 64, 1)
|
|
|
|
#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
|
|
MACRO(VAR, q, int, s, 8, 16); \
|
|
MACRO(VAR, q, int, s, 16, 8); \
|
|
MACRO(VAR, q, int, s, 32, 4); \
|
|
MACRO(VAR, q, int, s, 64, 2)
|
|
|
|
#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR) \
|
|
MACRO(VAR, q, uint, u, 8, 16); \
|
|
MACRO(VAR, q, uint, u, 16, 8); \
|
|
MACRO(VAR, q, uint, u, 32, 4); \
|
|
MACRO(VAR, q, uint, u, 64, 2)
|
|
|
|
#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR) \
|
|
TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
|
|
TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
|
|
|
|
#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) \
|
|
TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
|
|
TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
|
|
|
|
#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR) \
|
|
TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR); \
|
|
TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
|
|
|
|
#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR) \
|
|
TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
|
|
TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
|
|
|
|
/* Helpers for macros with 2 constant and 5 variable arguments */
|
|
#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
|
|
MACRO(VAR1, VAR2, , int, s, 8, 8); \
|
|
MACRO(VAR1, VAR2, , int, s, 16, 4); \
|
|
MACRO(VAR1, VAR2, , int, s, 32, 2); \
|
|
MACRO(VAR1, VAR2 , , int, s, 64, 1)
|
|
|
|
#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
|
|
MACRO(VAR1, VAR2, , uint, u, 8, 8); \
|
|
MACRO(VAR1, VAR2, , uint, u, 16, 4); \
|
|
MACRO(VAR1, VAR2, , uint, u, 32, 2); \
|
|
MACRO(VAR1, VAR2, , uint, u, 64, 1)
|
|
|
|
#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
|
|
MACRO(VAR1, VAR2, q, int, s, 8, 16); \
|
|
MACRO(VAR1, VAR2, q, int, s, 16, 8); \
|
|
MACRO(VAR1, VAR2, q, int, s, 32, 4); \
|
|
MACRO(VAR1, VAR2, q, int, s, 64, 2)
|
|
|
|
#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
|
|
MACRO(VAR1, VAR2, q, uint, u, 8, 16); \
|
|
MACRO(VAR1, VAR2, q, uint, u, 16, 8); \
|
|
MACRO(VAR1, VAR2, q, uint, u, 32, 4); \
|
|
MACRO(VAR1, VAR2, q, uint, u, 64, 2)
|
|
|
|
#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
|
|
TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
|
|
TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
|
|
MACRO(VAR1, VAR2, , poly, p, 8, 8); \
|
|
MACRO(VAR1, VAR2, , poly, p, 16, 4)
|
|
|
|
#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
|
|
TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
|
|
TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
|
|
MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
|
|
MACRO(VAR1, VAR2, q, poly, p, 16, 8)
|
|
|
|
#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
|
|
TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \
|
|
TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
|
|
|
|
#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
|
|
TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
|
|
TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
|
|
|
|
#endif /* _STM_ARM_NEON_REF_H_ */
|