2023-11-14 23:44:22 +08:00
|
|
|
#pragma once
|
2024-01-25 14:20:43 +08:00
|
|
|
#include "core/common.h"
|
2023-11-14 23:44:22 +08:00
|
|
|
#include <cstdio>
|
|
|
|
|
|
|
|
struct AttentionKVCacheMetadata {
|
|
|
|
int dimSize[4];
|
|
|
|
int stride[4];
|
|
|
|
};
|
|
|
|
|
|
|
|
namespace infini {
|
|
|
|
void attention_kvcache_kernel(float *input_k_cache, float *input_v_cache,
|
|
|
|
float *input_q, float *input_k, float *input_v,
|
|
|
|
int *position_id, float *output_matmul,
|
2024-01-25 14:20:43 +08:00
|
|
|
const AttentionKVCacheMetadata &compMeta,
|
|
|
|
float *output_O_temp, float *output_sum_temp);
|
2023-11-14 23:44:22 +08:00
|
|
|
|
|
|
|
} // namespace infini
|