forked from jiuyuan/InfiniTensor
[fix] fix rope op test failing
This commit is contained in:
parent
d000f9750c
commit
b0d030d0de
|
@ -5,7 +5,6 @@
|
|||
#define BLOCKSIZE_2 WARP_SIZE*4
|
||||
#define MAX_PARTITION 1024
|
||||
|
||||
// ASSUME SEQ_LEN OF Q IS 1
|
||||
template <class T>
|
||||
__global__ void _attention_kvcache_kernel_128_1(T* input_k_cache,
|
||||
T* input_v_cache,
|
||||
|
|
|
@ -22,7 +22,6 @@ class RoPECuda : public CudaKernelWithoutConfig {
|
|||
IT_ASSERT(inputShape[0] == pos->getDims()[0] &&
|
||||
inputShape[1] == pos->getDims()[1]);
|
||||
int position_idx_dtype = op->getInputs()[0]->getDTypeIndex();
|
||||
IT_ASSERT(position_idx_dtype == 7);
|
||||
int dim_model = inputShape[2];
|
||||
int dim_head = 128; // TODO: get dim_head from the framework
|
||||
int pos_stride = inputShape[1];
|
||||
|
|
|
@ -26,11 +26,12 @@ TEST(RoPE, Cuda) {
|
|||
cudaRuntime->run(gCuda);
|
||||
|
||||
auto oCpu = gCpu->cloneTensor(op->getOutputs()[0]);
|
||||
oCpu->printData();
|
||||
EXPECT_TRUE(oCpu->equalData(vector<float>{
|
||||
1.381773, 1.381773, 1.381773, 1.381773, 1.381773, 1.381773, 1.381773,
|
||||
1.381773, 1.381773, 1.381773, 1.381773, 1.381773, 1.381773, 1.381773,
|
||||
1.381773, 1.381773, 1.381773, 1.381773, 1.381773, 1.381773, 1.381773,
|
||||
1.381773, 1.381773, 1.381773, 1.381773, 1.381773, 1.381773, 1.381773,
|
||||
1.381773, 1.381773, 1.381773, 1.381773}));
|
||||
0.540302, 0.647906, 0.731761, 0.796458, 0.846009, 0.883756, 0.912396,
|
||||
0.934062, 0.950415, 0.962739, 0.972014, 0.978989, 0.98423, 0.988167,
|
||||
0.991122, 0.99334, 0.995004, 0.996253, 0.99719, 0.997892, 0.998419,
|
||||
0.998815, 0.999111, 0.999333, 0.9995, 0.999625, 0.999719, 0.999789,
|
||||
0.999842, 0.999881, 0.999911, 0.999933}));
|
||||
}
|
||||
} // namespace infini
|
||||
|
|
Loading…
Reference in New Issue