forked from jiuyuan/InfiniTensor
16 lines
446 B
C
16 lines
446 B
C
|
#pragma once
|
||
|
#include <cstdio>
|
||
|
|
||
|
struct AttentionKVCacheMetadata {
|
||
|
int dimSize[4];
|
||
|
int stride[4];
|
||
|
};
|
||
|
|
||
|
namespace infini {
|
||
|
void attention_kvcache_kernel(float *input_k_cache, float *input_v_cache,
|
||
|
float *input_q, float *input_k, float *input_v,
|
||
|
int *position_id, float *output_matmul,
|
||
|
const AttentionKVCacheMetadata &compMeta);
|
||
|
|
||
|
} // namespace infini
|