JYCache V1

This commit is contained in:
Feng Ren 2024-08-01 16:35:05 +08:00
parent 4151ae3932
commit 580e5068d9
177 changed files with 48139 additions and 0 deletions

24
CMakeLists.txt Normal file
View File

@ -0,0 +1,24 @@
project(hybridcache)
cmake_minimum_required(VERSION 3.7)
cmake_policy(SET CMP0079 NEW)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-PIE")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fno-PIE")
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/thirdparties)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/thirdparties/CmakeFiles)
include(ThirdPartyConfig)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG -O3 -g -D__const__=__unused__ -pipe -W -Wno-deprecated -Wno-sign-compare -Wno-unused-parameter -fPIC")
include_directories(AFTER ${CMAKE_SOURCE_DIR}/local_cache ${CMAKE_SOURCE_DIR}/global_cache)
include_directories(AFTER ${CMAKE_BINARY_DIR}/local_cache ${CMAKE_BINARY_DIR}/global_cache)
# subdirectory
add_subdirectory(local_cache)
add_subdirectory(global_cache)
add_subdirectory(s3fs)
add_subdirectory(intercept)
add_subdirectory(test)

339
COPYING Normal file
View File

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

151
README.md
View File

@ -1,2 +1,153 @@
# JYCache
**九源缓存存储系统简称JYCache** 是一款面向个人使用、大模型训练推理等多种场景适配大容量对象存储等多种底层存储形态高性能、易扩展的分布式缓存存储系统。通过层次化架构、接入层优化、I/O优化等多种组合优化JYCache 不仅支持文件顺序/随机读写,其读写性能也领先国际主流产品 Alluxio。JYCache 现支持在 X86Intel、AMD、海光等及 ARM鲲鹏、飞腾等平台下运行。
缓存存储系统面向个人使用及集群使用等场景,可为用户提供以下两种运行模式:
1. **单机对象加速**:将 S3 对象存储通过 POSIX 接口挂载到本地像本地磁盘一样进行读写访问。S3 上的一个完整对象对应本地的一个文件,通过对象名构造目录树结构。进一步地,热点数据可缓存于本地的 DRAM/SSD通过减少与 S3 的数据交互操作,可提升文件系统性能。
2. **分布式对象加速**:将 S3 对象存储通过 POSIX 接口挂载到本地像本地磁盘一样进行读写访问。S3 上的一个完整对象对应本地的一个文件,通过对象名构造目录树结构。热点数据呈现两级缓存结构,除各个客户端内部 DRAM/SSD 缓存外,还提供一层共享的 DRAM/SSD 缓存,进一步提高缓存命中率,提升并发读等场景下的 IO 性能。
## 主要特性
- **兼容 POSIX 接口**。通过 FUSE 或动态库劫持技术,应用程序无需重新编译即可立即实现缓存存储加速。
- **高可用缓存写**。数据写入缓存层即可视为持久化,通过多副本、纠删码机制实现缓存层内数据高可用,降低下层存储系统压力,提高 I/O 性能。
- **支持用户态零拷贝 I/O**。动态库劫持技术Syscall intercept实现全用户态 I/O降低上下文切换和拷贝实现极限性能。
- **层次化缓存存储**。本地缓存与计算任务同机部署,使用高速共享缓存可为用户进程提供高达 45GB/s 的缓存带宽;为进一步提高分布式系统缓存效率,可额外部署全局缓存服务,通过与多个本地缓存相关联,进一步提高缓存命中率。
- **易于扩展和集成**。本地缓存与全局缓存采用模块化设计,可依据业务需要实现多样的组合。
- **兼容多种平台**。支持在 X86Intel、AMD、海光等及 ARM鲲鹏、飞腾等平台下运行。
## 系统架构
![](doc/image/JYCache_architecture.PNG)
在单机对象加速部署模式下,对象存储可通过 FUSE基于S3FS(V1.94)实现) 或系统调用劫持等方式挂载到本地,用户可像本地磁盘一样进行读写访问。对象存储系统上的一个完整对象对应本地的一个文件,通过对象名构造目录树结构。热点数据可缓存于本地的 DRAM/SSD通过减少与对象存储系统的数据交互操作可提升文件系统性能。
在分布式对象加速模式下,热点数据呈现两级缓存结构,除各个客户端内部 DRAM/SSD 缓存外,还提供一层共享的 DRAM/SSD 缓存,进一步提高缓存命中率,提升并发读等场景下的 IO 性能。
缓存存储系统的两个核心部件是客户端缓存模块及全局缓存模块。客户端缓存模块内部包含写缓存、读缓存。客户端缓存模块按需向全局缓存服务器发出 RPC 通信请求实现数据的传递。全局缓存服务器包含写缓存和读缓存其中写缓存提供多副本等高可用模式。当用户发出下刷fsync请求时写数据会落入此处可容忍少量全局缓存服务器故障时不丢失写入的数据。无论是读缓存还是写缓存都会按需调用数据源访问组件访问对象存储等底层存储资源从而轻松适配其他类型的底层存储。
此外在intercept模式的缓存系统中我们采用了client-server+中间件架构利用系统调用拦截技术捕获POSIX请求将posix请求封装后发送至服务器处理处理完成后返回至客户端。通过绕过FUSE内核模块和采用零拷贝中间件最大限度地减少了数据拷贝和系统开销不仅确保了与常见posix接口的兼容还显著提升了系统性能尤其在读写密集的场景中避免了数据的重复拷贝性能优势明显。
## 系统性能
顺序读性能使用 FIO 测试工具,带宽数据如下表所示:
| BS | 优化前 | JYCache(FUSE) | JYCache(intercept) |
| ------------ | ------------ | ------------ | ------------ |
| 4K | 761MiB/s | 933MiB/s | 3576MiB/s |
| 16K | 706MiB/s | 3643MiB/s | 11.6GiB/s |
| 128K | 2268MiB/s | 22.6GiB/s | 38GiB/s |
顺序写性能使用 FIO 测试工具,带宽数据如下表所示:
| BS | 优化前 | JYCache(FUSE) | JYCache(intercept) |
| ------------ | ------------ | ------------ | ------------ |
| 4K | 624MiB/s | 1226MiB/s | 2571MiB/s |
| 16K | 2153MiB/s | 5705MiB/s | 9711MiB/s |
| 128K | 7498MiB/s | 23.5GiB/s | 31.2GiB/s |
## 系统构建
**环境要求**
- GCC 9.3.0
- GLIBC 2.31
- CMake 3.7
- C++ 17
- FUSE >= 2.6
**从源码构建**
直接在根目录下运行build.sh脚本
```bash
sh build.sh
```
*在build.sh脚本中会自动下载第三方依赖。*
**系统安装**
编译完成后在根目录下运行install.sh脚本
```bash
sh install.sh
```
## 快速使用
执行install.sh脚本后会在当前目录下构建JYCache运行环境其目录为JYCache_Env。下述使用方法均以JYCache_Env为根目录。
**一、JYCache普通模式不启用全局缓存**
修改conf/newcache.conf配置文件中的`UseGlobalCache=0`
```bash
# 1.启动minio
cd ./minio && sh start.sh && cd ..
# 2.启动s3fs
sh start_s3fs.sh
```
启动完成后,在挂载目录 ./mnt 下的文件操作均为JYCache控制。
*注:需要在此模式下,在挂载目录 ./mnt 创建文件夹testdir此为intercept模式所需。*
**关闭服务**
```bash
sh stop_s3fs.sh
cd ./minio && sh stop.sh && cd ..
```
**二、JYCache普通模式启用全局缓存**
修改conf/newcache.conf配置文件中的`UseGlobalCache=1`
```bash
# 1.启动minio
cd ./minio && sh start.sh && cd ..
# 2.启动etcd
sh start_etcd.sh
# 3.启动全局缓存
sh start_global.sh
# 4.启动s3fs
sh start_s3fs.sh
```
启动完成后,在挂载目录 ./mnt 下的文件操作均为JYCache控制
**关闭服务**
```bash
sh stop_s3fs.sh
sh stop_global.sh
sh stop_etcd.sh
cd ./minio && sh stop.sh && cd ..
```
**三、JYCache intercept模式**
此模式也支持全局缓存,方法与二同。下述以不开全局缓存为例:
```bash
# 1.启动minio
cd ./minio && sh start.sh && cd ..
# 2.启动intercept_server
sh start_intercept_server.sh
```
启动完成后在JYCache_Env根目录下执行
```bash
LD_LIBRARY_PATH=./libs/:$LD_LIBRARY_PATH LD_PRELOAD=./libintercept_client.so ${cmd}
```
其中`${cmd}`为用户实际文件操作的命令。例如:
```bash
LD_LIBRARY_PATH=./libs/:$LD_LIBRARY_PATH LD_PRELOAD=./libintercept_client.so ll /testdir/
```
需要在testdir目录下进行文件操作才为JYCache intercept模式控制。
*且使用intercept模式前需要先通过普通模式在挂载目录下创建文件夹testdir。*
**关闭服务**
```bash
sh stop_intercept_server.sh
cd ./minio && sh stop.sh && cd ..
```
## 常见问题
[常见问题](doc/frequently_asked_questions.md)
## 许可
本项目使用了以下遵循GPLv2许可的代码
- S3FS (https://github.com/s3fs-fuse/s3fs-fuse)
This software is licensed under the GNU GPL version 2.

13
build.sh Executable file
View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
if [ ! -d "./thirdparties" ]; then
wget https://madstorage.s3.cn-north-1.jdcloud-oss.com/JYCache_Dendepency_x64.tgz
md5=`md5sum JYCache_Dendepency_x64.tgz | awk {'print $1'}`
if [ "$md5" != "48f67dd9b7bcb1b2bdd6be9f2283b714" ]; then
  echo 'JYCache_Dendepency version inconsistency!'
exit 1
fi
tar -zxvf JYCache_Dendepency_x64.tgz
fi
mkdir -p build && cd build
cmake .. && cmake --build . -j 16

View File

@ -0,0 +1,40 @@
# 缓存系统设计
### 设计背景
在用户和数据服务器之间构建一套缓存系统该缓存系统可以让用户以本地文件的形式透明且高效地访问数据服务器中的数据。其中数据服务器的类型有对象存储、自建全局缓存等。以数据服务器为对象存储为例用户可以通过fuse以本地文件形式访问存储在远端的对象且远端的对象索引是用户可懂的。
![](image/system_purpose.png)
### 系统定位
该缓存系统支持多种数据源包括S3对象存储、自建全局缓存等故称为HybridCache。同时借助S3FS对fuse的支持以及其在元数据管理方面的能力实现fuse模式下的文件管理操作。HybridCache的定位如下图所示
![](image/system_positioning.png)
### 系统架构
HybridCache架构如下图所示
![](image/HybridCache_architecture.PNG)
1.写缓存模块
写缓存模块的定位是本地写缓存写缓存中的key是文件的path不理解远端数据源对象存储和全局缓存等从write->flush的过程由上层去做。
2.读缓存模块
读缓存模块的定位是文件以远端数据源为对象存储为例的只读缓存读缓存中的key是对象的key。读缓存需要用到本地缓存以及远端缓存对象存储和全局缓存等
3.数据源访问组件
数据源访问组件负责和远端数据源进行交互涉及数据的上传下载等。以Adaptor的形式支持多种数据源包括对象存储和全局缓存等。
4.缓存管理组件
内存管理组件管理本地缓存,写缓存模块和读缓存模块中实际的本地缓存就是用的该组件。
在本地缓存中我们直接将文件切分为固定大小的pagepage大小可配置下文以64KB为例并使用CacheLib来维护这些page。page在CacheLib中以KV形式进行存储其存储结构如下
- key为 cacheKey_pageid。读写模块各自维护自己的本地缓存cacheKey在写缓存模块中就是文件的path在读缓存模块中就是S3上对象的key。pageid即为页号通过offset/64KB计算得来。
- value的数据结构如下
![](image/page_structure.jpg)
通过 cacheKey+offset+size 即可接操作指定文件中的特定page。page并发操作的安全性是通过CacheLib自身的机制以及page内的lock和新旧版号位来保证。
5.HybridCache访问组件
HybridCache访问组件定位在胶水层要根据上层调用方的特性定制化实现其内需要理解到上层调用方的逻辑。

View File

@ -0,0 +1,38 @@
# ReadCache
ReadCacheConfig.CacheConfig.CacheName # 读缓存名称
ReadCacheConfig.CacheConfig.MaxCacheSize # 读缓存内存容量限制
ReadCacheConfig.CacheConfig.PageBodySize # 读缓存page大小
ReadCacheConfig.CacheConfig.PageMetaSize # 读缓存page元数据大小
ReadCacheConfig.CacheConfig.EnableCAS # 读缓存是否启用CAS
ReadCacheConfig.CacheConfig.CacheLibConfig.EnableNvmCache # 读缓存是否开启nvm缓存
ReadCacheConfig.CacheConfig.CacheLibConfig.RaidPath # nvm缓存文件目录
ReadCacheConfig.CacheConfig.CacheLibConfig.RaidFileNum # nvm缓存文件数量限制
ReadCacheConfig.CacheConfig.CacheLibConfig.RaidFileSize # nvm单个缓存文件大小限制
ReadCacheConfig.CacheConfig.CacheLibConfig.DataChecksum # nvm缓存是否进行数据校验
ReadCacheConfig.DownloadNormalFlowLimit # 读缓存内存未命中从远端下载时的平峰流控
ReadCacheConfig.DownloadBurstFlowLimit # 读缓存内存未命中从远端下载时的顶峰流控
# WriteCache
WriteCacheConfig.CacheConfig.CacheName # 写缓存名称
WriteCacheConfig.CacheConfig.MaxCacheSize # 写缓存内存容量限制
WriteCacheConfig.CacheConfig.PageBodySize # 写缓存page大小
WriteCacheConfig.CacheConfig.PageMetaSize # 写缓存page元数据大小
WriteCacheConfig.CacheConfig.EnableCAS # 写缓存是否启用CAS
WriteCacheConfig.CacheSafeRatio # 写缓存安全容量阈值(百分比), 缓存达到阈值时阻塞待异步flush释放空间
# GlobalCache
UseGlobalCache # 全局缓存开关
GlobalCacheConfig.EnableWriteCache # 全局缓存是否启用写缓存
GlobalCacheConfig.EtcdAddress # etcd地址例如 http://127.0.0.1:2379
GlobalCacheConfig.GlobalServers # 全局缓存服务端地址,例如 127.0.0.1:8000
GlobalCacheConfig.GflagFile # 全局缓存gflag文件形式输入
ThreadNum=48 # 线程数
BackFlushCacheRatio # 写缓存异步flush阈值(百分比)
UploadNormalFlowLimit # 上传平峰流控
UploadBurstFlowLimit # 上传顶峰流控
LogPath # 日志文件路径
LogLevel # 日志级别INFO=0, WARNING=1, ERROR=2, FATAL=3
EnableLog # 是否启用日志打印
FlushToRead # 文件flush完成后是否写入读缓存
CleanCacheByOpen # 文件open时是否清理读缓存

View File

@ -0,0 +1,7 @@
**1. 如何切换挂载目录?**
在start_s3fs.sh中
```bash
LD_LIBRARY_PATH=./libs/:$LD_LIBRARY_PATH nohup ./s3fs -o passwd_file=./conf/passwd -o use_path_request_style -o endpoint=us-east-1 -o url=http://127.0.0.1:9000 -o bucket=test ./mnt -o dbglevel=err -o use_cache=./diskcache -o del_cache -o newcache_conf=./conf/newcache.conf -f >> ./log/s3fs.log 2>&1 &
```
更换其中的 `./mnt` 即可

Binary file not shown.

After

Width:  |  Height:  |  Size: 241 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 196 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

View File

@ -0,0 +1,46 @@
include(FindThreads)
include(FindProtobuf)
protobuf_generate_cpp(PROTO_SRC PROTO_HEADER gcache.proto)
include_directories(${CMAKE_CURRENT_BINARY_DIR} /usr/local/include/jerasure)
add_library(madfs_global
Common.h
Common.cpp
FileSystemDataAdaptor.h
EtcdClient.h
Placement.h
GlobalCacheClient.h
GlobalCacheClient.cpp
S3DataAdaptor.h
S3DataAdaptor.cpp
GlobalDataAdaptor.h
GlobalDataAdaptor.cpp
ReadCacheClient.h
ReadCacheClient.cpp
ReplicationWriteCacheClient.h
ReplicationWriteCacheClient.cpp
GlobalCacheServer.h
GlobalCacheServer.cpp
ReadCache.h
ReadCache.cpp
WriteCache.h
WriteCache.cpp
WriteCacheClient.h
ErasureCodingWriteCacheClient.h
ErasureCodingWriteCacheClient.cpp
${PROTO_SRC}
${PROTO_HEADER}
)
option(ENABLE_EC "Enable Erasure Coding" OFF)
target_link_libraries(madfs_global PUBLIC hybridcache_local aio)
if(ENABLE_EC)
add_definitions(-DCONFIG_JERASURE)
target_link_libraries(madfs_global PUBLIC Jerasure)
endif()
add_executable(madfs_global_server GlobalCacheServerMain.cpp)
target_link_libraries(madfs_global_server PUBLIC madfs_global)
add_executable(madfs_gc GarbageCollectorMain.cpp)
target_link_libraries(madfs_gc PUBLIC madfs_global)

122
global_cache/Common.cpp Normal file
View File

@ -0,0 +1,122 @@
#include "Common.h"
DEFINE_int32(rpc_timeout, 30000, "RPC timeout in milliseconds");
DEFINE_int32(rpc_threads, 16, "Maximum number of threads in brpc");
DEFINE_int32(folly_threads, 48, "Maximum number of threads in folly's executor");
DEFINE_int32(rpc_connections, 10, "RPC connections");
DEFINE_bool(use_rdma, true, "Use remote memory direct access");
DEFINE_int64(read_chunk_size, 256 * 1024, "Granularity of global read cache");
DEFINE_int32(read_replication_factor, 1, "Replication factor of global read cache");
DEFINE_string(read_cache_dir, "/mnt/nvme0/renfeng/readcache", "Read cache directory");
DEFINE_string(write_cache_dir, "/mnt/nvme0/renfeng/writecache", "Write cache directory");
DEFINE_string(write_cache_type, "nocache", "Policy of global write cache: nocache, replication, reed-solomon");
DEFINE_int32(write_replication_factor, 1, "Replication factor of global write cache, available if --write_cache_type=replication");
DEFINE_int32(write_data_blocks, 3, "Data blocks of global write cache, available if --write_cache_type=reed-solomon");
DEFINE_int32(write_parity_blocks, 2, "Parity blocks of global write cache, available if --write_cache_type=reed-solomon");
DEFINE_string(s3_address, "<undefined>", "S3 - server address (URL)");
DEFINE_string(s3_access_key, "<undefined>", "S3 - AccessKey");
DEFINE_string(s3_secret_access_key, "<undefined>", "S3 - SecretAccessKey");
DEFINE_string(s3_bucket, "madfs", "S3 - bucket name");
DEFINE_int32(s3_bg_threads, 4, "S3 - number of background threads");
DEFINE_uint64(read_normal_flow_limit, 1024, "Read cache normal flow limit");
DEFINE_uint64(read_burst_flow_limit, 10 * 1024, "Read cache burst flow limit");
DEFINE_uint64(read_capacity_mb, 4096, "Read cache capacity in MB");
DEFINE_uint64(read_page_body_size, 64 * 1024, "Read cache page body size");
DEFINE_uint64(read_page_meta_size, 1024, "Read cache page meta size");
DEFINE_bool(read_cas, true, "Read cache enable CAS");
DEFINE_bool(read_nvm_cache, false, "Read cache enable NVM cache");
DEFINE_bool(use_meta_cache, true, "Enable meta cache");
DEFINE_uint64(meta_cache_max_size, 1024 * 1024, "Max size of meta cache");
DEFINE_uint64(meta_cache_clear_size, 512 * 1024, "Read cache burst flow limit");
DEFINE_uint64(write_chunk_size, 16 * 1024 * 1024, "Granularity of global write cache");
DEFINE_uint64(max_inflight_payload_size, 256 * 1024 * 1024, "Max inflight payload size in bytes");
DEFINE_string(etcd_prefix, "/madfs/", "Etcd directory prefix");
DEFINE_bool(verbose, false, "Print debug logging");
namespace brpc {
DECLARE_int64(socket_max_unwritten_bytes);
};
static GlobalConfig g_cfg;
std::once_flag g_cfg_once;
#define SAFE_ASSIGN(conf, flag, min_val, max_val) { \
const static auto flag##_min = (min_val); \
const static auto flag##_max = (max_val); \
if (flag < (min_val) || flag > (max_val)) { \
LOG(WARNING) << "Invalid " #flag ", reset to " << (max_val); \
flag = (max_val); \
} \
conf = flag; \
}
void InitGlobalConfig() {
SAFE_ASSIGN(g_cfg.rpc_timeout, FLAGS_rpc_timeout, 0, 60000);
SAFE_ASSIGN(g_cfg.rpc_threads, FLAGS_rpc_threads, 0, 256);
SAFE_ASSIGN(g_cfg.rpc_connections, FLAGS_rpc_connections, 0, 64);
SAFE_ASSIGN(g_cfg.folly_threads, FLAGS_folly_threads, 0, 256);
g_cfg.use_rdma = FLAGS_use_rdma;
g_cfg.write_chunk_size = FLAGS_write_chunk_size;
g_cfg.default_policy.read_chunk_size = FLAGS_read_chunk_size;
g_cfg.default_policy.read_replication_factor = FLAGS_read_replication_factor;
g_cfg.default_policy.read_chunk_size = FLAGS_read_chunk_size;
g_cfg.default_policy.read_replication_factor = FLAGS_read_replication_factor;
g_cfg.use_meta_cache = FLAGS_use_meta_cache;
g_cfg.meta_cache_max_size = size_t(FLAGS_meta_cache_max_size);
g_cfg.meta_cache_clear_size = size_t(FLAGS_meta_cache_clear_size);
g_cfg.read_cache_dir = FLAGS_read_cache_dir;
g_cfg.write_cache_dir = FLAGS_write_cache_dir;
g_cfg.etcd_prefix = FLAGS_etcd_prefix;
g_cfg.max_inflight_payload_size = FLAGS_max_inflight_payload_size;
if (FLAGS_write_cache_type == "nocache") {
g_cfg.default_policy.write_cache_type = NOCACHE;
} else if (FLAGS_write_cache_type == "replication") {
g_cfg.default_policy.write_cache_type = REPLICATION;
g_cfg.default_policy.write_replication_factor = FLAGS_write_replication_factor;
} else if (FLAGS_write_cache_type == "reed-solomon") {
g_cfg.default_policy.write_cache_type = REED_SOLOMON;
g_cfg.default_policy.write_data_blocks = FLAGS_write_data_blocks;
g_cfg.default_policy.write_parity_blocks = FLAGS_write_parity_blocks;
} else {
LOG(ERROR) << "The program will be terminated because of unsupported write cache type: " << FLAGS_write_cache_type;
exit(EXIT_FAILURE);
}
g_cfg.s3_config.address = FLAGS_s3_address;
g_cfg.s3_config.access_key = FLAGS_s3_access_key;
g_cfg.s3_config.secret_access_key = FLAGS_s3_secret_access_key;
g_cfg.s3_config.bucket = FLAGS_s3_bucket;
g_cfg.s3_config.bg_threads = FLAGS_s3_bg_threads;
HybridCache::ReadCacheConfig &read_cache = g_cfg.read_cache;
read_cache.DownloadNormalFlowLimit = FLAGS_read_normal_flow_limit;
read_cache.DownloadBurstFlowLimit = FLAGS_read_burst_flow_limit;
read_cache.CacheCfg.CacheName = "Read";
read_cache.CacheCfg.MaxCacheSize = FLAGS_read_capacity_mb * 1024 * 1024;;
read_cache.CacheCfg.PageBodySize = FLAGS_read_page_body_size;
read_cache.CacheCfg.PageMetaSize = FLAGS_read_page_meta_size;
read_cache.CacheCfg.EnableCAS = FLAGS_read_cas;
read_cache.CacheCfg.CacheLibCfg.EnableNvmCache = FLAGS_read_nvm_cache;
brpc::FLAGS_socket_max_unwritten_bytes = FLAGS_max_inflight_payload_size * 2;
}
GlobalConfig &GetGlobalConfig() {
std::call_once(g_cfg_once, InitGlobalConfig);
return g_cfg;
}

130
global_cache/Common.h Normal file
View File

@ -0,0 +1,130 @@
#ifndef MADFS_COMMON_H
#define MADFS_COMMON_H
#include <string>
#include <butil/iobuf.h>
#include <folly/futures/Future.h>
#include <folly/futures/Promise.h>
#include <folly/executors/ThreadedExecutor.h>
#include <boost/filesystem.hpp>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include "config.h"
using folly::Future;
using folly::Promise;
#define RED "\033[1;31m"
#define GREEN "\033[1;32m"
#define YELLOW "\033[1;33m"
#define WHITE "\033[0m"
DECLARE_bool(verbose);
const static int OK = 0;
const static int RPC_FAILED = -2;
const static int NOT_FOUND = -3;
const static int CACHE_ENTRY_NOT_FOUND = -3; // deprecated
const static int INVALID_ARGUMENT = -4;
const static int S3_INTERNAL_ERROR = -5;
const static int FOLLY_ERROR = -6;
const static int NO_ENOUGH_REPLICAS = -7;
const static int METADATA_ERROR = -8;
const static int IO_ERROR = -9;
const static int END_OF_FILE = -10;
const static int NO_ENOUGH_DISKSPACE = -11;
const static int UNSUPPORTED_TYPE = -12;
const static int UNSUPPORTED_OPERATION = -13;
const static int UNIMPLEMENTED = -128;
struct GetOutput {
int status;
butil::IOBuf buf;
};
struct PutOutput {
int status;
std::string internal_key;
};
struct QueryTsOutput {
int status;
uint64_t timestamp;
};
enum WriteCacheType {
NOCACHE, REPLICATION, REED_SOLOMON
};
struct S3Config {
std::string address;
std::string access_key;
std::string secret_access_key;
std::string bucket;
int bg_threads;
};
struct CachePolicy {
size_t read_chunk_size;
size_t read_replication_factor;
WriteCacheType write_cache_type;
size_t write_replication_factor; // if write_cache_type == REPLICATION
size_t write_data_blocks;
size_t write_parity_blocks; // if write_cache_type == REED_SOLOMON
};
struct GlobalConfig {
int rpc_timeout;
int rpc_threads;
int rpc_connections;
int folly_threads;
bool use_rdma;
bool use_meta_cache;
size_t meta_cache_max_size;
size_t meta_cache_clear_size;
size_t write_chunk_size;
size_t max_inflight_payload_size;
CachePolicy default_policy;
S3Config s3_config;
HybridCache::ReadCacheConfig read_cache;
HybridCache::WriteCacheConfig write_cache;
std::string read_cache_dir;
std::string write_cache_dir;
std::string etcd_prefix;
};
GlobalConfig &GetGlobalConfig();
static inline std::string PathJoin(const std::string &left, const std::string &right) {
if (left.empty()) {
return right;
} else if (left[left.length() - 1] == '/') {
return left + right;
} else {
return left + "/" + right;
}
}
static inline int CreateParentDirectories(const std::string &path) {
auto pos = path.rfind('/');
if (pos == path.npos) {
return 0;
}
auto parent = path.substr(0, pos);
boost::filesystem::create_directories(parent);
return 0;
}
#endif // MADFS_COMMON_H

View File

@ -0,0 +1,333 @@
#include "ErasureCodingWriteCacheClient.h"
#include "GlobalDataAdaptor.h"
// #define CONFIG_JERASURE
#ifdef CONFIG_JERASURE
#include <jerasure.h>
#include <jerasure/reed_sol.h>
static int _roundup(int a, int b) {
if (a % b == 0) return a;
return a + b - (a % b);
}
folly::Future<PutResult> ErasureCodingWriteCacheClient::Put(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers,
size_t off) {
std::vector <folly::Future<PutOutput>> future_list;
Json::Value root;
Json::Value json_replica(Json::arrayValue), json_headers;
const std::vector<int> replicas = GetReplica(key);
for (auto server_id: replicas) {
json_replica.append(server_id);
}
auto &policy = parent_->GetCachePolicy(key);
const int k = policy.write_data_blocks;
const int m = policy.write_parity_blocks;
const int w = 32;
auto matrix = reed_sol_vandermonde_coding_matrix(k, m, w);
std::vector<char *> data_buf_list;
auto rpc_client = parent_->GetRpcClient();
auto write_chunk_size = GetGlobalConfig().write_chunk_size;
for (uint64_t offset = 0; offset < size; offset += write_chunk_size) {
const auto unit_size = _roundup((write_chunk_size + k - 1) / k, w);
const auto region_size = std::min(write_chunk_size, size - offset);
char *data_buf = new char[(k + m) * unit_size];
data_buf_list.push_back(data_buf);
memcpy(data_buf, &buffer.data[offset], region_size);
memset(data_buf + region_size, 0, k * unit_size - region_size);
char *data_ptrs[k] = { nullptr }, *coding_ptrs[m] = { nullptr };
for (int i = 0; i < k + m; ++i) {
if (i < k) {
data_ptrs[i] = &data_buf[i * unit_size];
} else {
coding_ptrs[i - k] = &data_buf[i * unit_size];
}
}
jerasure_matrix_encode(k, m, w, matrix, data_ptrs, coding_ptrs, unit_size);
auto cur_data_buf = data_buf;
for (auto server_id: replicas) {
ByteBuffer region_buffer(cur_data_buf, unit_size);
cur_data_buf += unit_size;
std::string partial_key = key
+ "-" + std::to_string(offset / write_chunk_size)
+ "-" + std::to_string(write_chunk_size);
future_list.emplace_back(rpc_client->PutEntryFromWriteCache(server_id, partial_key, region_buffer, unit_size));
}
}
for (auto iter = headers.begin(); iter != headers.end(); ++iter) {
json_headers[iter->first] = iter->second;
}
root["type"] = "reed-solomon";
root["size"] = size;
root["replica"] = json_replica;
root["headers"] = json_headers;
return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue(
[this, root, data_buf_list, matrix](std::vector <folly::Try<PutOutput>> output) -> PutResult {
free(matrix);
for (auto &entry : data_buf_list) {
delete []entry;
}
Json::Value res_root;
Json::Value json_path(Json::arrayValue);
for (auto &entry: output) {
if (!entry.hasValue())
return PutResult { FOLLY_ERROR, res_root };
if (entry.value().status != OK)
return PutResult { entry.value().status, res_root };
json_path.append(entry.value().internal_key);
}
res_root = root;
res_root["path"] = json_path;
return PutResult { OK, res_root };
});
}
folly::Future<int> ErasureCodingWriteCacheClient::Get(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
Json::Value &root) {
std::vector<int> replicas;
for (auto &entry : root["replica"]) {
replicas.push_back(entry.asInt());
}
std::vector<std::string> internal_keys;
for (auto &entry : root["path"]) {
internal_keys.push_back(entry.asString());
}
std::vector <folly::Future<int>> future_list;
std::vector<GetChunkRequestV2> requests;
auto write_chunk_size = GetGlobalConfig().write_chunk_size;
GenerateGetChunkRequestsV2(key, start, size, buffer, requests, write_chunk_size);
if (requests.empty())
return folly::makeFuture(OK);
for (auto &entry: requests) {
auto &policy = parent_->GetCachePolicy(key);
const int k = policy.write_data_blocks;
const int m = policy.write_parity_blocks;
const int w = 32;
const auto unit_size = _roundup((write_chunk_size + k - 1) / k, w);
const auto start_replica_id = entry.chunk_start / unit_size;
const auto end_replica_id = (entry.chunk_start + entry.chunk_len + unit_size - 1) / unit_size;
size_t dest_buf_pos = 0;
for (auto replica_id = start_replica_id; replica_id < end_replica_id; ++replica_id) {
auto start_off = (replica_id == start_replica_id) ? entry.chunk_start % unit_size : 0;
auto end_off = (replica_id + 1 == end_replica_id) ? (entry.chunk_start + entry.chunk_len) - replica_id * unit_size : unit_size;
int server_id = replicas[replica_id];
std::string internal_key = internal_keys[entry.chunk_id * replicas.size() + replica_id];
auto cur_dest_buf_pos = dest_buf_pos;
dest_buf_pos += (end_off - start_off);
future_list.emplace_back(parent_->GetRpcClient()->GetEntryFromWriteCache(server_id, internal_key, start_off, end_off - start_off)
.then([this, server_id, entry, start_off, end_off, cur_dest_buf_pos](folly::Try<GetOutput> &&output) -> folly::Future<int> {
if (!output.hasValue()) {
return folly::makeFuture(FOLLY_ERROR);
}
auto &value = output.value();
if (value.status == OK) {
value.buf.copy_to(entry.buffer.data + cur_dest_buf_pos, end_off - start_off);
return folly::makeFuture(OK);
} else {
return folly::makeFuture(value.status);
}
}));
}
}
return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue(
[=](std::vector <folly::Try<int>> output) -> int {
for (auto &entry: output)
if (entry.value_or(FOLLY_ERROR) != OK) {
LOG(ERROR) << "Failed to get data from write cache, key: " << key
<< ", start: " << start
<< ", size: " << size
<< ", buf: " << (void *) buffer.data << " " << buffer.len
<< ", error code: " << entry.hasValue() << " " << entry.value_or(FOLLY_ERROR);
return entry.value_or(FOLLY_ERROR);
}
return OK;
});
}
folly::Future<int> ErasureCodingWriteCacheClient::GetDecode(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
Json::Value &root) {
std::vector<int> replicas;
for (auto &entry : root["replica"]) {
replicas.push_back(entry.asInt());
}
std::vector<std::string> internal_keys;
for (auto &entry : root["path"]) {
internal_keys.push_back(entry.asString());
}
std::vector<GetChunkRequestV2> requests;
auto write_chunk_size = GetGlobalConfig().write_chunk_size;
GenerateGetChunkRequestsV2(key, start, size, buffer, requests, write_chunk_size);
if (requests.empty())
return folly::makeFuture(OK);
std::vector <folly::Future<GetOutput>> future_list;
for (auto &entry: requests) {
auto &policy = parent_->GetCachePolicy(key);
const int k = policy.write_data_blocks;
const int m = policy.write_parity_blocks;
const int w = 32;
auto matrix = reed_sol_vandermonde_coding_matrix(k, m, w);
const auto unit_size = _roundup((write_chunk_size + k - 1) / k, w);
const auto start_replica_id = entry.chunk_start / unit_size;
const auto end_replica_id = (entry.chunk_start + entry.chunk_len + unit_size - 1) / unit_size;
int erasures[k + m + 1] = { 0 };
int erasures_idx = 0;
char *data_buf = new char[(k + m) * unit_size];
char *data_ptrs[k] = { nullptr }, *coding_ptrs[m] = { nullptr };
for (int i = 0; i < k + m; ++i) {
if (i < k) {
data_ptrs[i] = &data_buf[i * unit_size];
} else {
coding_ptrs[i - k] = &data_buf[i * unit_size];
}
}
// rarely occurred, can be synchronized
for (auto replica_id = 0; replica_id < k + m; ++replica_id) {
int server_id = replicas[replica_id];
std::string internal_key = internal_keys[entry.chunk_id * replicas.size() + replica_id];
auto output = parent_->GetRpcClient()->GetEntryFromWriteCache(server_id, internal_key, 0, unit_size).get();
if (output.status == OK) {
if (replica_id < k) {
output.buf.copy_to(data_ptrs[replica_id], unit_size);
} else {
output.buf.copy_to(coding_ptrs[replica_id - k], unit_size);
}
} else {
erasures[erasures_idx++] = replica_id;
}
}
erasures[erasures_idx] = -1;
int rc = jerasure_matrix_decode(k, m, w, matrix, 1, erasures, data_ptrs, coding_ptrs, unit_size);
if (rc == -1) {
LOG(FATAL) << "Unable to decode RS matrix";
return IO_ERROR;
}
auto cur_pos = 0;
for (auto replica_id = start_replica_id; replica_id < end_replica_id; ++replica_id) {
auto start_pos = (replica_id == start_replica_id) ? entry.chunk_start % unit_size : 0;
auto end_pos = (replica_id + 1 == end_replica_id) ? (entry.chunk_start + entry.chunk_len) - replica_id * unit_size : unit_size;
memcpy(entry.buffer.data + cur_pos, data_ptrs[replica_id] + start_pos, end_pos - start_pos);
cur_pos += end_pos - start_pos;
}
delete []data_buf;
free(matrix);
}
return OK;
}
std::vector<int> ErasureCodingWriteCacheClient::GetReplica(const std::string &key) {
const int num_available = parent_->server_list_.size();
auto &policy = parent_->GetCachePolicy(key);
const int num_choose = policy.write_data_blocks + policy.write_parity_blocks;
uint64_t seed = std::hash < std::string > {}(key);
std::vector<int> output;
// for (int i = 0; i < std::min(num_available, num_choose); ++i)
for (int i = 0; i < num_choose; ++i)
output.push_back((seed + i) % num_available);
return output;
}
void ErasureCodingWriteCacheClient::GenerateGetChunkRequestsV2(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
std::vector<GetChunkRequestV2> &requests,
size_t chunk_size) {
const size_t end = start + size;
const size_t begin_chunk_id = start / chunk_size;
const size_t end_chunk_id = (end + chunk_size - 1) / chunk_size;
if (buffer.len < size) {
LOG(WARNING) << "Buffer capacity may be not enough, expect " << size << ", actual " << buffer.len;
}
size_t buffer_offset = 0;
for (size_t chunk_id = begin_chunk_id; chunk_id < end_chunk_id; ++chunk_id) {
size_t chunk_start = std::max(chunk_id * chunk_size, start);
size_t chunk_stop = std::min((chunk_id + 1) * chunk_size, end);
if (chunk_stop <= chunk_start)
return;
GetChunkRequestV2 item;
item.user_key = key;
item.chunk_id = chunk_id;
item.chunk_start = chunk_start % chunk_size;
item.chunk_len = chunk_stop - chunk_start;
item.chunk_granularity = chunk_size;
item.buffer.data = buffer.data + buffer_offset;
item.buffer.len = item.chunk_len;
buffer_offset += item.chunk_len;
requests.emplace_back(item);
}
LOG_ASSERT(buffer_offset == size);
}
#else
folly::Future<PutResult> ErasureCodingWriteCacheClient::Put(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers,
size_t off) {
PutResult res;
res.status = UNSUPPORTED_OPERATION;
return res;
}
folly::Future<int> ErasureCodingWriteCacheClient::Get(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
Json::Value &root) {
return UNSUPPORTED_OPERATION;
}
folly::Future<int> ErasureCodingWriteCacheClient::GetDecode(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
Json::Value &root) {
return UNSUPPORTED_OPERATION;
}
std::vector<int> ErasureCodingWriteCacheClient::GetReplica(const std::string &key) {
return std::vector<int>{};
}
void ErasureCodingWriteCacheClient::GenerateGetChunkRequestsV2(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
std::vector<GetChunkRequestV2> &requests,
size_t chunk_size) {
}
#endif

View File

@ -0,0 +1,61 @@
#ifndef MADFS_EC_WRITE_CACHE_CLIENT_H
#define MADFS_EC_WRITE_CACHE_CLIENT_H
#include "WriteCacheClient.h"
using HybridCache::ByteBuffer;
class GlobalDataAdaptor;
using PutResult = WriteCacheClient::PutResult;
class ErasureCodingWriteCacheClient : public WriteCacheClient {
friend class GetChunkContext;
public:
ErasureCodingWriteCacheClient(GlobalDataAdaptor *parent) : parent_(parent) {}
~ErasureCodingWriteCacheClient() {}
virtual folly::Future<PutResult> Put(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers,
size_t off = 0);
virtual folly::Future<int> Get(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
Json::Value &root);
virtual folly::Future<int> GetDecode(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
Json::Value &root);
public:
std::vector<int> GetReplica(const std::string &key);
struct GetChunkRequestV2 {
std::string user_key;
size_t chunk_id;
size_t chunk_start;
size_t chunk_len;
size_t chunk_granularity;
ByteBuffer buffer;
};
static void GenerateGetChunkRequestsV2(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
std::vector<GetChunkRequestV2> &requests,
size_t chunk_size);
private:
GlobalDataAdaptor *parent_;
};
#endif // MADFS_EC_WRITE_CACHE_CLIENT_H

101
global_cache/EtcdClient.h Normal file
View File

@ -0,0 +1,101 @@
#ifndef ETCD_CLIENT_H
#define ETCD_CLIENT_H
#include <etcd/SyncClient.hpp>
#include <json/json.h>
#include <mutex>
#include "WriteCacheClient.h"
class EtcdClient {
public:
EtcdClient(const std::string &etcd_url) : client_(etcd_url) {};
~EtcdClient() {}
struct GetResult {
int status;
Json::Value root;
};
folly::Future<GetResult> GetJson(const std::string &key) {
std::lock_guard<std::mutex> lock(mutex_);
Json::Reader reader;
Json::Value root;
auto resp = client_.get(PathJoin(GetGlobalConfig().etcd_prefix, key));
if (!resp.is_ok()) {
if (resp.error_code() != 100) {
LOG(ERROR) << "Error from etcd client: " << resp.error_code()
<< ", message: " << resp.error_message();
return folly::makeFuture(GetResult{ METADATA_ERROR, root });
} else {
LOG(WARNING) << "Record not found in the etcd storage: key " << key;
return folly::makeFuture(GetResult{ NOT_FOUND, root });
}
}
if (!reader.parse(resp.value().as_string(), root)) {
LOG(ERROR) << "Error from etcd client: failed to parse record: " << resp.value().as_string();
return folly::makeFuture(GetResult{ METADATA_ERROR, root });
}
LOG(INFO) << "Record get: " << key;
return folly::makeFuture(GetResult{ OK, root });
}
folly::Future<int> PutJson(const std::string &key, const Json::Value &root) {
std::lock_guard<std::mutex> lock(mutex_);
Json::FastWriter writer;
const std::string json_file = writer.write(root);
auto resp = client_.put(PathJoin(GetGlobalConfig().etcd_prefix, key), json_file);
if (!resp.is_ok()) {
LOG(ERROR) << "Error from etcd client: " << resp.error_code()
<< ", message: " << resp.error_message();
return folly::makeFuture(METADATA_ERROR);
}
LOG(INFO) << "Record put: " << key;
return folly::makeFuture(OK);
}
folly::Future<int> DeleteJson(const std::string &key) {
std::lock_guard<std::mutex> lock(mutex_);
auto resp = client_.rm(PathJoin(GetGlobalConfig().etcd_prefix, key));
if (!resp.is_ok()) {
if (resp.error_code() != 100) {
LOG(ERROR) << "Error from etcd client: " << resp.error_code()
<< ", message: " << resp.error_message();
return folly::makeFuture(METADATA_ERROR);
} else {
LOG(WARNING) << "Record not found in the etcd storage: key " << key;
return folly::makeFuture(NOT_FOUND);
}
return folly::makeFuture(METADATA_ERROR);
}
return folly::makeFuture(OK);
}
folly::Future<int> ListJson(const std::string &key_prefix, std::vector<std::string> &key_list) {
std::lock_guard<std::mutex> lock(mutex_);
const std::string etcd_prefix = GetGlobalConfig().etcd_prefix;
auto resp = client_.keys(PathJoin(etcd_prefix, key_prefix));
if (!resp.is_ok()) {
if (resp.error_code() != 100) {
LOG(ERROR) << "Error from etcd client: " << resp.error_code()
<< ", message: " << resp.error_message();
return folly::makeFuture(METADATA_ERROR);
} else {
LOG(WARNING) << "Record not found in the etcd storage: key " << key_prefix;
return folly::makeFuture(NOT_FOUND);
}
return folly::makeFuture(METADATA_ERROR);
}
for (auto &entry : resp.keys()) {
key_list.push_back(entry.substr(etcd_prefix.length()));
}
return folly::makeFuture(OK);
}
private:
std::mutex mutex_;
etcd::SyncClient client_;
};
#endif // ETCD_CLIENT_H

View File

@ -0,0 +1,323 @@
#ifndef MADFS_FILE_SYSTEM_DATA_ADAPTOR_H
#define MADFS_FILE_SYSTEM_DATA_ADAPTOR_H
#include <sys/types.h>
#include <sys/stat.h>
#include <butil/time.h>
#include <unistd.h>
#include <fcntl.h>
#include <string>
#include "Common.h"
#include "data_adaptor.h"
#include <folly/File.h>
#include <folly/futures/Future.h>
#include <folly/futures/Promise.h>
#include <folly/experimental/io/SimpleAsyncIO.h>
#include <sys/statvfs.h>
using HybridCache::ByteBuffer;
using HybridCache::DataAdaptor;
static inline ssize_t fully_pread(int fd, void* buf, size_t n, size_t offset) {
ssize_t total_read = 0;
ssize_t bytes_read;
while (total_read < n) {
bytes_read = pread(fd, buf + total_read, n - total_read, offset);
if (bytes_read < 0) {
if (errno == EAGAIN) continue;
return -1;
} else if (bytes_read == 0) {
break;
}
total_read += bytes_read;
offset += bytes_read;
}
return total_read;
}
static inline ssize_t fully_pwrite(int fd, void* buf, size_t n, size_t offset) {
ssize_t total_written = 0;
ssize_t bytes_written;
while (total_written < n) {
bytes_written = pwrite(fd, buf + total_written, n - total_written, offset);
if (bytes_written < 0) {
if (errno == EAGAIN) continue;
return -1;
} else if (bytes_written == 0) {
break;
}
total_written += bytes_written;
offset += bytes_written;
}
return total_written;
}
class FileSystemDataAdaptor : public DataAdaptor {
const std::string prefix_;
std::shared_ptr<DataAdaptor> base_adaptor_;
bool use_optimized_path_;
std::shared_ptr<folly::CPUThreadPoolExecutor> executor_;
bool fsync_required_;
public:
FileSystemDataAdaptor(const std::string &prefix = "",
std::shared_ptr<DataAdaptor> base_adaptor = nullptr,
bool use_optimized_path = false,
std::shared_ptr<folly::CPUThreadPoolExecutor> executor = nullptr,
bool fsync_required = true)
: prefix_(prefix),
base_adaptor_(base_adaptor),
use_optimized_path_(use_optimized_path),
executor_(executor),
fsync_required_(fsync_required) {}
~FileSystemDataAdaptor() {}
virtual folly::Future<int> DownLoad(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer) {
LOG_IF(INFO, FLAGS_verbose) << "Download key: " << key << ", start: " << start << ", size: " << size;
if (!buffer.data || buffer.len < size) {
LOG(ERROR) << "Buffer capacity is not enough, expected " << size
<< ", actual " << buffer.len;
return folly::makeFuture(INVALID_ARGUMENT);
}
auto path = BuildPath(prefix_, key);
if (access(path.c_str(), F_OK)) {
if (base_adaptor_) {
#if 1
size_t full_size;
std::map<std::string, std::string> headers;
if (base_adaptor_->Head(key, full_size, headers).get()) {
LOG(ERROR) << "Fail to retrive metadata of key: " << key;
return folly::makeFuture(IO_ERROR);
}
ByteBuffer tmp_buffer(new char[full_size], full_size);
return base_adaptor_->DownLoad(key, 0, full_size, tmp_buffer).thenValue([buffer, tmp_buffer, start, size, key](int rc) -> int {
if (rc) {
LOG(ERROR) << "Fail to retrive data of key: " << key;
return IO_ERROR;
}
memcpy(buffer.data, tmp_buffer.data + start, size);
delete []tmp_buffer.data;
return OK;
});
#else
return base_adaptor_->DownLoad(key, start, size, buffer);
#endif
} else if (errno == ENOENT) {
LOG_IF(ERROR, FLAGS_verbose) << "File not found: " << path;
return folly::makeFuture(NOT_FOUND);
} else {
PLOG(ERROR) << "Fail inaccessible: " << path;
return folly::makeFuture(IO_ERROR);
}
}
butil::Timer t;
t.start();
const bool kUseDirectIO = false; // ((uint64_t) buffer.data & 4095) == 0 && (size & 4095) == 0;
int flags = O_RDONLY;
flags |= kUseDirectIO ? O_DIRECT : 0;
int fd = open(path.c_str(), flags);
if (fd < 0) {
PLOG(ERROR) << "Fail to open file: " << path;
return folly::makeFuture(IO_ERROR);
}
#ifdef ASYNC_IO
if (kUseDirectIO) {
thread_local folly::SimpleAsyncIO aio(folly::SimpleAsyncIO::Config().setCompletionExecutor(executor_.get()));
auto promise = std::make_shared<folly::Promise<int>>();
aio.pread(fd, buffer.data, size, start, [key, size, promise, fd](int rc) {
if (rc != size) {
PLOG(ERROR) << "Fail to read file: " << key
<< ", expected read " << size
<< ", actual read " << rc;
close(fd);
promise->setValue(IO_ERROR);
} else {
close(fd);
promise->setValue(OK);
}
});
return promise->getFuture();
}
#endif
ssize_t nbytes = fully_pread(fd, buffer.data, size, start);
if (nbytes != size) {
PLOG(ERROR) << "Fail to read file: " << key
<< ", expected read " << size
<< ", actual read " << nbytes;
close(fd);
return folly::makeFuture(IO_ERROR);
}
t.stop();
// LOG_EVERY_N(INFO, 1) << t.u_elapsed() << " " << size;
close(fd);
return folly::makeFuture(OK);
}
virtual folly::Future<int> UpLoad(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers) {
butil::Timer t;
t.start();
LOG_IF(INFO, FLAGS_verbose) << "Upload key: " << key << ", size: " << size;
if (!buffer.data || buffer.len < size) {
LOG(ERROR) << "Buffer capacity is not enough, expected " << size
<< ", actual " << buffer.len;
return folly::makeFuture(INVALID_ARGUMENT);
}
auto path = BuildPath(prefix_, key);
if (CreateParentDirectories(path)) {
return folly::makeFuture(IO_ERROR);
}
t.stop();
//LOG(INFO) << "Upload P0: " << key << " " << t.u_elapsed() << " " << size;
const bool kUseDirectIO = false; // ((uint64_t) buffer.data & 4095) == 0 && (size & 4095) == 0;
int flags = O_WRONLY | O_CREAT;
flags |= kUseDirectIO ? O_DIRECT : 0;
int fd = open(path.c_str(), flags, 0644);
if (fd < 0) {
PLOG(ERROR) << "Fail to open file: " << path;
return folly::makeFuture(IO_ERROR);
}
#ifdef ASYNC_IO
if (kUseDirectIO) {
thread_local folly::SimpleAsyncIO aio(folly::SimpleAsyncIO::Config().setCompletionExecutor(executor_.get()));
auto promise = std::make_shared<folly::Promise<int>>();
aio.pwrite(fd, buffer.data, size, 0, [key, size, promise, fd](int rc) {
if (rc != size) {
PLOG(ERROR) << "Fail to write file: " << key
<< ", expected " << size
<< ", actual " << rc;
close(fd);
promise->setValue(IO_ERROR);
}
if (ftruncate64(fd, size) < 0) {
PLOG(ERROR) << "Fail to truncate file: " << key;
close(fd);
return folly::makeFuture(IO_ERROR);
}
if (fsync_required_ && fsync(fd) < 0) {
PLOG(ERROR) << "Fail to sync file: " << key;
close(fd);
return folly::makeFuture(IO_ERROR);
}
close(fd);
promise->setValue(OK);
});
return promise->getFuture();
}
#endif
ssize_t nbytes = fully_pwrite(fd, buffer.data, size, 0);
if (nbytes != size) {
PLOG(ERROR) << "Fail to write file: " << key
<< ", expected read " << size
<< ", actual read " << nbytes;
close(fd);
return folly::makeFuture(IO_ERROR);
}
t.stop();
//LOG(INFO) << "Upload P2: " << key << " " << t.u_elapsed() << " " << size;
if (ftruncate64(fd, size) < 0) {
PLOG(ERROR) << "Fail to truncate file: " << key;
close(fd);
return folly::makeFuture(IO_ERROR);
}
t.stop();
//LOG(INFO) << "Upload P3: " << key << " " << t.u_elapsed() << " " << size;
if (fsync_required_ && fsync(fd) < 0) {
PLOG(ERROR) << "Fail to sync file: " << key;
close(fd);
return folly::makeFuture(IO_ERROR);
}
close(fd);
if (base_adaptor_) {
return base_adaptor_->UpLoad(key, size, buffer, headers);
}
t.stop();
// LOG(INFO) << "Upload P4: " << key << " " << t.u_elapsed() << " " << size;
return folly::makeFuture(OK);
}
virtual folly::Future<int> Delete(const std::string &key) {
LOG_IF(INFO, FLAGS_verbose) << "Delete key: " << key;
auto path = BuildPath(prefix_, key);
if (remove(path.c_str())) {
if (errno == ENOENT) {
LOG_IF(ERROR, FLAGS_verbose) << "File not found: " << path;
return folly::makeFuture(NOT_FOUND);
} else {
PLOG(ERROR) << "Failed to remove file: " << path;
return folly::makeFuture(IO_ERROR);
}
}
if (base_adaptor_) {
return base_adaptor_->Delete(key);
}
return folly::makeFuture(OK);
}
virtual folly::Future<int> Head(const std::string &key,
size_t &size,
std::map <std::string, std::string> &headers) {
LOG_IF(INFO, FLAGS_verbose) << "Head key: " << key;
if (base_adaptor_) {
return base_adaptor_->Head(key, size, headers);
}
auto path = BuildPath(prefix_, key);
struct stat st;
if (access(path.c_str(), F_OK)) {
if (errno == ENOENT) {
LOG_IF(ERROR, FLAGS_verbose) << "File not found: " << path;
return folly::makeFuture(NOT_FOUND);
} else {
PLOG(ERROR) << "Failed to access file: " << path;
return folly::makeFuture(IO_ERROR);
}
}
if (stat(path.c_str(), &st)) {
PLOG(ERROR) << "Fail to state file: " << path;
return folly::makeFuture(IO_ERROR);
}
size = st.st_size;
return folly::makeFuture(OK);
}
std::string BuildPath(const std::string &prefix, const std::string &key) {
if (use_optimized_path_) {
std::size_t h1 = std::hash<std::string>{}(key);
std::string suffix = std::to_string(h1 % 256) + '/' + std::to_string(h1 % 65536) + '/' + key;
return PathJoin(prefix, suffix);
} else {
return PathJoin(prefix, key);
}
}
};
#endif // MADFS_FILE_SYSTEM_DATA_ADAPTOR_H

View File

@ -0,0 +1,50 @@
#include <vector>
#include <thread>
#include <atomic>
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "S3DataAdaptor.h"
#include "FileSystemDataAdaptor.h"
#include "GlobalDataAdaptor.h"
#include "ReadCacheClient.h"
#include "GlobalCacheServer.h"
#include "S3DataAdaptor.h"
DEFINE_string(data_server, "0.0.0.0:8000", "IP address of global data servers");
DEFINE_string(etcd_server, "http://127.0.0.1:2379", "Location of etcd server");
DEFINE_string(prefix, "", "Key prefix for garbage collection");
DEFINE_bool(use_s3, false, "Use S3 storage");
std::vector<std::string> SplitString(const std::string &input) {
std::vector<std::string> result;
std::stringstream ss(input);
std::string item;
while (std::getline(ss, item, ',')) {
result.push_back(item);
}
return result;
}
int main(int argc, char *argv[]) {
std::cerr << YELLOW << "MADFS GC TOOL" << WHITE << std::endl;
gflags::ParseCommandLineFlags(&argc, &argv, true);
auto etcd_client = std::make_shared<EtcdClient>(FLAGS_etcd_server);
std::shared_ptr<DataAdaptor> base_adaptor;
if (FLAGS_use_s3) {
base_adaptor = std::make_shared<S3DataAdaptor>();
} else {
base_adaptor = std::make_shared<FileSystemDataAdaptor>();
}
auto global_adaptor = std::make_shared<GlobalDataAdaptor>(base_adaptor, SplitString(FLAGS_data_server), etcd_client);
if (global_adaptor->PerformGarbageCollection(FLAGS_prefix)) {
std::cerr << RED << "Garbage collection failed!" << WHITE << std::endl;
exit(EXIT_FAILURE);
} else {
std::cerr << GREEN << "Garbage collection successfully" << WHITE << std::endl;
exit(EXIT_SUCCESS);
}
}

View File

@ -0,0 +1,368 @@
#include <gflags/gflags.h>
#include <butil/logging.h>
#include <butil/time.h>
#include <map>
#include "gcache.pb.h"
#include "GlobalCacheClient.h"
GlobalCacheClient::GlobalCacheClient(const std::string &group) : group_(group), inflight_payload_size_(0) {}
GlobalCacheClient::~GlobalCacheClient() {
for (auto &entry: server_map_) {
delete entry.second;
}
server_map_.clear();
}
int GlobalCacheClient::RegisterServer(int server_id, const char *hostname) {
std::lock_guard <std::mutex> lock(mutex_);
if (server_map_.count(server_id)) {
LOG(WARNING) << "Server has been registered, previous regitration will be override"
<< ", group: " << group_
<< ", server_id: " << server_id
<< ", hostname: " << hostname;
}
brpc::ChannelOptions options;
options.use_rdma = GetGlobalConfig().use_rdma;
options.timeout_ms = GetGlobalConfig().rpc_timeout;
options.connection_group = group_;
int32_t fixed_backoff_time_ms = 100; // 固定时间间隔(毫秒)
int32_t no_backoff_remaining_rpc_time_ms = 150; // 无需重试退避的剩余rpc时间阈值毫秒
bool retry_backoff_in_pthread = false;
static brpc::RpcRetryPolicyWithFixedBackoff g_retry_policy_with_fixed_backoff(
fixed_backoff_time_ms, no_backoff_remaining_rpc_time_ms, retry_backoff_in_pthread);
options.retry_policy = &g_retry_policy_with_fixed_backoff;
options.max_retry = 5;
auto channel = new brpc::Channel();
if (channel->Init(hostname, &options)) {
PLOG(ERROR) << "Unable to initialize channel object"
<< ", group: " << group_
<< ", server_id: " << server_id
<< ", hostname: " << hostname;
delete channel;
return RPC_FAILED;
}
// Sending sync register RPC
gcache::GlobalCacheService_Stub stub(channel);
brpc::Controller cntl;
gcache::RegisterRequest request;
gcache::RegisterResponse response;
stub.Register(&cntl, &request, &response, nullptr);
if (cntl.Failed() || response.status_code() != OK) {
LOG(ERROR) << "Failed to register server, reason: " << cntl.ErrorText()
<< ", group: " << group_
<< ", server_id: " << server_id
<< ", hostname: " << hostname;
delete channel;
return RPC_FAILED;
}
LOG_IF(INFO, FLAGS_verbose) << "Register server successfully"
<< ", group: " << group_
<< ", server_id: " << server_id
<< ", hostname: " << hostname;
server_map_[server_id] = channel;
return OK;
}
brpc::Channel *GlobalCacheClient::GetChannelByServerId(int server_id) {
std::lock_guard <std::mutex> lock(mutex_);
if (!server_map_.count(server_id)) {
LOG_EVERY_SECOND(ERROR) << "Server not registered. server_id: " << server_id;
return nullptr;
}
return server_map_[server_id];
}
Future<GetOutput> GlobalCacheClient::GetEntry(int server_id,
const std::string &key,
uint64_t start,
uint64_t length,
bool is_read_cache) {
// while (inflight_payload_size_.load() >= GetGlobalConfig().max_inflight_payload_size) {
// LOG_EVERY_SECOND(INFO) << "Overcroweded " << inflight_payload_size_.load();
// sched_yield();
// }
inflight_payload_size_.fetch_add(length);
auto channel = GetChannelByServerId(server_id);
if (!channel) {
GetOutput output;
output.status = RPC_FAILED;
return folly::makeFuture(output);
}
gcache::GlobalCacheService_Stub stub(channel);
gcache::GetEntryRequest request;
request.set_key(key);
request.set_start(start);
request.set_length(length);
struct OnRPCDone : public google::protobuf::Closure {
virtual void Run() {
GetOutput output;
if (cntl.Failed()) {
LOG(WARNING) << "RPC error: " << cntl.ErrorText()
<< ", server id: " << server_id
<< ", key: " << key
<< ", start: " << start
<< ", length: " << length;
output.status = RPC_FAILED;
} else {
output.status = response.status_code();
output.buf = cntl.response_attachment();
if (output.status == OK && output.buf.length() != length) {
LOG(WARNING) << "Received truncated attachment, expected " << length
<< " bytes, actual " << output.buf.length() << " bytes"
<< ", server id: " << server_id
<< ", key: " << key
<< ", start: " << start
<< ", length: " << length;
output.status = RPC_FAILED;
}
}
promise.setValue(output);
parent->inflight_payload_size_.fetch_sub(length);
t.stop();
LOG_EVERY_N(INFO, 1000) << t.u_elapsed();
delete this;
}
brpc::Controller cntl;
gcache::GetEntryResponse response;
Promise<GetOutput> promise;
int server_id;
std::string key;
uint64_t start;
uint64_t length;
GlobalCacheClient *parent;
butil::Timer t;
};
auto done = new OnRPCDone();
done->t.start();
done->parent = this;
done->server_id = server_id;
done->key = key;
done->start = start;
done->length = length;
auto future = done->promise.getFuture();
if (is_read_cache)
stub.GetEntryFromReadCache(&done->cntl, &request, &done->response, done);
else
stub.GetEntryFromWriteCache(&done->cntl, &request, &done->response, done);
return std::move(future);
}
Future<PutOutput> GlobalCacheClient::PutEntry(int server_id,
const std::string &key,
const ByteBuffer &buf,
uint64_t length,
bool is_read_cache) {
// while (inflight_payload_size_.load() >= GetGlobalConfig().max_inflight_payload_size) {
// LOG_EVERY_SECOND(INFO) << "Overcroweded " << inflight_payload_size_.load();
// sched_yield();
// }
inflight_payload_size_.fetch_add(length);
auto channel = GetChannelByServerId(server_id);
if (!channel) {
PutOutput output;
output.status = RPC_FAILED;
return folly::makeFuture(output);
}
gcache::GlobalCacheService_Stub stub(channel);
gcache::PutEntryRequest request;
request.set_key(key);
request.set_length(length);
struct OnRPCDone : public google::protobuf::Closure {
virtual void Run() {
PutOutput output;
if (cntl.Failed()) {
LOG(WARNING) << "RPC error: " << cntl.ErrorText()
<< ", server id: " << server_id
<< ", key: " << key
<< ", length: " << length;
output.status = RPC_FAILED;
} else {
output.status = response.status_code();
output.internal_key = response.internal_key();
}
promise.setValue(output);
parent->inflight_payload_size_.fetch_sub(length);
delete this;
}
brpc::Controller cntl;
gcache::PutEntryResponse response;
Promise<PutOutput> promise;
int server_id;
std::string key;
uint64_t length;
GlobalCacheClient *parent;
};
auto done = new OnRPCDone();
done->parent = this;
done->server_id = server_id;
done->key = key;
done->length = length;
done->cntl.request_attachment().append(buf.data, length);
auto future = done->promise.getFuture();
if (is_read_cache)
stub.PutEntryFromReadCache(&done->cntl, &request, &done->response, done);
else
stub.PutEntryFromWriteCache(&done->cntl, &request, &done->response, done);
return std::move(future);
}
Future<int> GlobalCacheClient::DeleteEntryFromReadCache(int server_id,
const std::string &key,
uint64_t chunk_size,
uint64_t max_chunk_id) {
auto channel = GetChannelByServerId(server_id);
if (!channel) {
LOG(ERROR) << "Cannot find channel for server " << server_id;
return folly::makeFuture(RPC_FAILED);
}
gcache::GlobalCacheService_Stub stub(channel);
gcache::DeleteEntryRequest request;
request.set_key(key);
request.set_chunk_size(chunk_size);
request.set_max_chunk_id(max_chunk_id);
struct OnRPCDone : public google::protobuf::Closure {
virtual void Run() {
int status;
if (cntl.Failed()) {
LOG(WARNING) << "RPC error: " << cntl.ErrorText()
<< ", server id: " << server_id
<< ", key: " << key;
status = RPC_FAILED;
} else {
status = response.status_code();
}
promise.setValue(status);
delete this;
}
brpc::Controller cntl;
gcache::DeleteEntryResponse response;
Promise<int> promise;
int server_id;
std::string key;
};
auto done = new OnRPCDone();
done->server_id = server_id;
done->key = key;
auto future = done->promise.getFuture();
stub.DeleteEntryFromReadCache(&done->cntl, &request, &done->response, done);
return std::move(future);
}
Future<QueryTsOutput> GlobalCacheClient::QueryTsFromWriteCache(int server_id) {
auto channel = GetChannelByServerId(server_id);
if (!channel) {
QueryTsOutput output;
output.status = RPC_FAILED;
return folly::makeFuture(output);
}
gcache::GlobalCacheService_Stub stub(channel);
gcache::QueryTsRequest request;
struct OnRPCDone : public google::protobuf::Closure {
virtual void Run() {
QueryTsOutput output;
if (cntl.Failed()) {
LOG(WARNING) << "RPC error: " << cntl.ErrorText()
<< ", server id: " << server_id;
output.status = RPC_FAILED;
} else {
output.status = response.status_code();
output.timestamp = response.timestamp();
}
promise.setValue(output);
delete this;
}
brpc::Controller cntl;
gcache::QueryTsResponse response;
Promise<QueryTsOutput> promise;
int server_id;
};
auto done = new OnRPCDone();
done->server_id = server_id;
auto future = done->promise.getFuture();
stub.QueryTsFromWriteCache(&done->cntl, &request, &done->response, done);
return std::move(future);
}
Future<int> GlobalCacheClient::DeleteEntryFromWriteCache(int server_id,
const std::string &key_prefix,
uint64_t max_ts,
std::vector<std::string> &except_keys) {
auto channel = GetChannelByServerId(server_id);
if (!channel) {
LOG(ERROR) << "Cannot find channel for server " << server_id;
return folly::makeFuture(RPC_FAILED);
}
gcache::GlobalCacheService_Stub stub(channel);
gcache::DeleteEntryRequestForWriteCache request;
request.set_key_prefix(key_prefix);
request.set_max_ts(max_ts);
for (auto &entry : except_keys)
request.add_except_keys(entry);
struct OnRPCDone : public google::protobuf::Closure {
virtual void Run() {
int status;
if (cntl.Failed()) {
LOG(WARNING) << "RPC error: " << cntl.ErrorText()
<< ", server id: " << server_id
<< ", key: " << key;
status = RPC_FAILED;
} else {
status = response.status_code();
}
promise.setValue(status);
delete this;
}
brpc::Controller cntl;
gcache::DeleteEntryResponse response;
Promise<int> promise;
int server_id;
std::string key;
};
auto done = new OnRPCDone();
done->server_id = server_id;
done->key = key_prefix;
auto future = done->promise.getFuture();
stub.DeleteEntryFromWriteCache(&done->cntl, &request, &done->response, done);
return std::move(future);
}

View File

@ -0,0 +1,62 @@
#ifndef MADFS_GLOBAL_CACHE_CLIENT_H
#define MADFS_GLOBAL_CACHE_CLIENT_H
#include <brpc/channel.h>
#include <atomic>
#include <string>
#include <mutex>
#include <map>
#include "Common.h"
#include "common.h"
using HybridCache::ByteBuffer;
class GlobalCacheClient {
public:
GlobalCacheClient(const std::string &group = "");
~GlobalCacheClient();
int RegisterServer(int server_id, const char *hostname);
Future<GetOutput> GetEntryFromReadCache(int server_id, const std::string &key, uint64_t start, uint64_t length) {
return GetEntry(server_id, key, start, length, true);
}
Future<PutOutput> PutEntryFromReadCache(int server_id, const std::string &key, const ByteBuffer &buf, uint64_t length) {
return PutEntry(server_id, key, buf, length, true);
}
Future<int> DeleteEntryFromReadCache(int server_id, const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id);
Future<GetOutput> GetEntryFromWriteCache(int server_id, const std::string &key, uint64_t start, uint64_t length){
return GetEntry(server_id, key, start, length, false);
}
Future<PutOutput> PutEntryFromWriteCache(int server_id, const std::string &key, const ByteBuffer &buf, uint64_t length){
return PutEntry(server_id, key, buf, length, false);
}
Future<QueryTsOutput> QueryTsFromWriteCache(int server_id);
Future<int> DeleteEntryFromWriteCache(int server_id,
const std::string &key_prefix,
uint64_t max_ts,
std::vector<std::string> &except_keys);
private:
brpc::Channel *GetChannelByServerId(int server_id);
Future<GetOutput> GetEntry(int server_id, const std::string &key, uint64_t start, uint64_t length, bool is_read_cache);
Future<PutOutput> PutEntry(int server_id, const std::string &key, const ByteBuffer &buf, uint64_t length, bool is_read_cache);
private:
std::mutex mutex_;
const std::string group_;
std::map<int, brpc::Channel *> server_map_;
std::atomic<uint64_t> inflight_payload_size_;
};
#endif // MADFS_GLOBAL_CACHE_CLIENT_H

View File

@ -0,0 +1,107 @@
#include "GlobalCacheServer.h"
namespace gcache {
GlobalCacheServiceImpl::GlobalCacheServiceImpl(std::shared_ptr<folly::CPUThreadPoolExecutor> executor,
std::shared_ptr<DataAdaptor> base_adaptor)
: executor_(executor) {
read_cache_ = std::make_shared<ReadCache>(executor_, base_adaptor);
write_cache_ = std::make_shared<WriteCache>(executor_);
}
void GlobalCacheServiceImpl::GetEntryFromReadCache(google::protobuf::RpcController *cntl_base,
const GetEntryRequest *request,
GetEntryResponse *response,
google::protobuf::Closure *done) {
brpc::Controller *cntl = static_cast<brpc::Controller *>(cntl_base);
read_cache_->Get(request->key(), request->start(), request->length())
.thenValue([this, cntl, request, done, response](GetOutput output) {
response->set_status_code(output.status);
butil::Timer t;
t.start();
cntl->response_attachment().append(output.buf);
t.stop();
// LOG_EVERY_N(INFO, 1000) << t.u_elapsed();
done->Run();
});
}
void GlobalCacheServiceImpl:: PutEntryFromReadCache(google::protobuf::RpcController *cntl_base,
const PutEntryRequest *request,
PutEntryResponse *response,
google::protobuf::Closure *done) {
brpc::Controller *cntl = static_cast<brpc::Controller *>(cntl_base);
auto output = read_cache_->Put(request->key(), request->length(), cntl->request_attachment());
response->set_status_code(output);
done->Run();
}
void GlobalCacheServiceImpl::DeleteEntryFromReadCache(google::protobuf::RpcController *cntl_base,
const DeleteEntryRequest *request,
DeleteEntryResponse *response,
google::protobuf::Closure *done) {
brpc::Controller *cntl = static_cast<brpc::Controller *>(cntl_base);
if (request->has_chunk_size() && request->has_max_chunk_id()) {
response->set_status_code(read_cache_->Delete(request->key(),
request->chunk_size(),
request->max_chunk_id()));
} else {
response->set_status_code(read_cache_->Delete(request->key()));
}
done->Run();
}
void GlobalCacheServiceImpl::GetEntryFromWriteCache(google::protobuf::RpcController *cntl_base,
const GetEntryRequest *request,
GetEntryResponse *response,
google::protobuf::Closure *done) {
brpc::Controller *cntl = static_cast<brpc::Controller *>(cntl_base);
auto output = write_cache_->Get(request->key(), request->start(), request->length());
response->set_status_code(output.status);
cntl->response_attachment().append(output.buf);
done->Run();
}
void GlobalCacheServiceImpl::PutEntryFromWriteCache(google::protobuf::RpcController *cntl_base,
const PutEntryRequest *request,
PutEntryResponse *response,
google::protobuf::Closure *done) {
brpc::Controller *cntl = static_cast<brpc::Controller *>(cntl_base);
auto output = write_cache_->Put(request->key(), request->length(), cntl->request_attachment());
response->set_status_code(output.status);
response->set_internal_key(output.internal_key);
done->Run();
}
void GlobalCacheServiceImpl::DeleteEntryFromWriteCache(google::protobuf::RpcController *cntl_base,
const DeleteEntryRequestForWriteCache *request,
DeleteEntryResponse *response,
google::protobuf::Closure *done) {
brpc::Controller *cntl = static_cast<brpc::Controller *>(cntl_base);
std::unordered_set<std::string> except_keys;
for (auto &entry : request->except_keys()) {
except_keys.insert(entry);
}
auto output = write_cache_->Delete(request->key_prefix(), request->max_ts(), except_keys);
response->set_status_code(output);
done->Run();
}
void GlobalCacheServiceImpl::QueryTsFromWriteCache(google::protobuf::RpcController *cntl_base,
const QueryTsRequest *request,
QueryTsResponse *response,
google::protobuf::Closure *done) {
brpc::Controller *cntl = static_cast<brpc::Controller *>(cntl_base);
response->set_timestamp(write_cache_->QueryTS());
response->set_status_code(OK);
done->Run();
}
void GlobalCacheServiceImpl::Register(google::protobuf::RpcController *cntl_base,
const RegisterRequest *request,
RegisterResponse *response,
google::protobuf::Closure *done) {
brpc::Controller *cntl = static_cast<brpc::Controller *>(cntl_base);
response->set_status_code(OK);
done->Run();
}
}

View File

@ -0,0 +1,74 @@
#ifndef MADFS_GLOBAL_CACHE_SERVER_H
#define MADFS_GLOBAL_CACHE_SERVER_H
#include <gflags/gflags.h>
#include <butil/logging.h>
#include <brpc/server.h>
#include <folly/futures/Future.h>
#include <folly/futures/Promise.h>
#include <folly/executors/CPUThreadPoolExecutor.h>
#include "butil/time.h"
#include "bvar/bvar.h"
#include "gcache.pb.h"
#include "ReadCache.h"
#include "WriteCache.h"
#include "data_adaptor.h"
namespace gcache {
class GlobalCacheServiceImpl : public GlobalCacheService {
public:
GlobalCacheServiceImpl(std::shared_ptr<folly::CPUThreadPoolExecutor> executor,
std::shared_ptr<DataAdaptor> base_adaptor);
virtual ~GlobalCacheServiceImpl() {}
virtual void GetEntryFromReadCache(google::protobuf::RpcController *cntl_base,
const GetEntryRequest *request,
GetEntryResponse *response,
google::protobuf::Closure *done);
virtual void PutEntryFromReadCache(google::protobuf::RpcController *cntl_base,
const PutEntryRequest *request,
PutEntryResponse *response,
google::protobuf::Closure *done);
virtual void DeleteEntryFromReadCache(google::protobuf::RpcController *cntl_base,
const DeleteEntryRequest *request,
DeleteEntryResponse *response,
google::protobuf::Closure *done);
virtual void GetEntryFromWriteCache(google::protobuf::RpcController *cntl_base,
const GetEntryRequest *request,
GetEntryResponse *response,
google::protobuf::Closure *done);
virtual void PutEntryFromWriteCache(google::protobuf::RpcController *cntl_base,
const PutEntryRequest *request,
PutEntryResponse *response,
google::protobuf::Closure *done);
virtual void DeleteEntryFromWriteCache(google::protobuf::RpcController *cntl_base,
const DeleteEntryRequestForWriteCache *request,
DeleteEntryResponse *response,
google::protobuf::Closure *done);
virtual void QueryTsFromWriteCache(google::protobuf::RpcController *cntl_base,
const QueryTsRequest *request,
QueryTsResponse *response,
google::protobuf::Closure *done);
virtual void Register(google::protobuf::RpcController *cntl_base,
const RegisterRequest *request,
RegisterResponse *response,
google::protobuf::Closure *done);
private:
std::shared_ptr<folly::CPUThreadPoolExecutor> executor_;
std::shared_ptr<ReadCache> read_cache_;
std::shared_ptr<WriteCache> write_cache_;
};
}
#endif // MADFS_GLOBAL_CACHE_SERVER_H

View File

@ -0,0 +1,41 @@
#include "GlobalCacheServer.h"
#include "S3DataAdaptor.h"
#include <folly/Singleton.h>
DEFINE_int32(port, 8000, "TCP Port of global cache server");
DEFINE_bool(fetch_s3_if_miss, false, "Allow fetch data from S3 if cache miss");
int main(int argc, char *argv[]) {
LOG(INFO) << "MADFS Global Cache Server";
gflags::ParseCommandLineFlags(&argc, &argv, true);
brpc::Server server;
folly::SingletonVault::singleton()->registrationComplete();
brpc::ServerOptions options;
options.num_threads = GetGlobalConfig().rpc_threads;
options.use_rdma = GetGlobalConfig().use_rdma;
std::shared_ptr<S3DataAdaptor> base_adaptor = nullptr;
if (FLAGS_fetch_s3_if_miss) {
base_adaptor = std::make_shared<S3DataAdaptor>();
}
auto executor = std::make_shared<folly::CPUThreadPoolExecutor>(GetGlobalConfig().folly_threads);
auto gcache_service = std::make_shared<gcache::GlobalCacheServiceImpl>(executor, base_adaptor);
if (server.AddService(gcache_service.get(), brpc::SERVER_DOESNT_OWN_SERVICE)) {
PLOG(ERROR) << "Failed to register global cache service";
return -1;
}
butil::EndPoint point = butil::EndPoint(butil::IP_ANY, FLAGS_port);
if (server.Start(point, &options) != 0) {
PLOG(ERROR) << "Failed to start global cache server";
return -1;
}
server.RunUntilAskedToQuit();
return 0;
}

View File

@ -0,0 +1,674 @@
#include "GlobalDataAdaptor.h"
#include "ReadCacheClient.h"
#include "ReplicationWriteCacheClient.h"
#include "ErasureCodingWriteCacheClient.h"
using HybridCache::ByteBuffer;
#define CONFIG_GC_ON_EXCEEDING_DISKSPACE
DEFINE_uint32(bg_execution_period, 10, "Background execution period in seconds");
GlobalDataAdaptor::GlobalDataAdaptor(std::shared_ptr<DataAdaptor> base_adaptor,
const std::vector<std::string> &server_list,
std::shared_ptr<EtcdClient> etcd_client,
std::shared_ptr<folly::CPUThreadPoolExecutor> executor)
: base_adaptor_(base_adaptor),
executor_(executor),
server_list_(server_list),
etcd_client_(etcd_client),
meta_cache_(GetGlobalConfig().meta_cache_max_size, GetGlobalConfig().meta_cache_clear_size) {
if (!executor_) {
executor_ = std::make_shared<folly::CPUThreadPoolExecutor>(GetGlobalConfig().folly_threads);
}
read_cache_ = std::make_shared<ReadCacheClient>(this);
write_caches_[WC_TYPE_REPLICATION] = std::make_shared<ReplicationWriteCacheClient>(this);
write_caches_[WC_TYPE_REEDSOLOMON] = std::make_shared<ErasureCodingWriteCacheClient>(this);
for (int conn_id = 0; conn_id < GetGlobalConfig().rpc_connections; conn_id++) {
auto client = std::make_shared<GlobalCacheClient>(std::to_string(conn_id));
int server_id = 0;
for (auto &entry: server_list_) {
if (client->RegisterServer(server_id, entry.c_str())) {
// TODO 周期性尝试重连
LOG(WARNING) << "Failed to connect with server id: " << server_id
<< ", address: " << entry;
bg_mutex_.lock();
bg_tasks_.push_back([client,server_id, entry]() -> int {
return client->RegisterServer(server_id, entry.c_str());
});
bg_mutex_.unlock();
}
server_id++;
}
rpc_client_.push_back(client);
}
srand48(time(nullptr));
bg_running_ = true;
bg_thread_ = std::thread(std::bind(&GlobalDataAdaptor::BackgroundWorker, this));
}
GlobalDataAdaptor::~GlobalDataAdaptor() {
bg_running_ = false;
bg_cv_.notify_all();
bg_thread_.join();
}
void GlobalDataAdaptor::BackgroundWorker() {
while (bg_running_) {
std::unique_lock<std::mutex> lock(bg_mutex_);
std::vector<std::function<int()>> bg_tasks_next;
for (auto &entry : bg_tasks_) {
if (entry()) {
bg_tasks_next.push_back(entry);
}
}
bg_tasks_ = bg_tasks_next;
bg_cv_.wait_for(lock, std::chrono::seconds(FLAGS_bg_execution_period));
}
}
struct DownloadArgs {
DownloadArgs(const std::string &key, size_t start, size_t size, ByteBuffer &buffer)
: key(key), start(start), size(size), buffer(buffer) {}
std::string key;
size_t start;
size_t size;
ByteBuffer &buffer;
};
folly::Future<int> GlobalDataAdaptor::DownLoad(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer) {
return DownLoadFromGlobalCache(key, start, size, buffer).then(
[this, key, start, size, &buffer](folly::Try<int> &&output) -> folly::Future<int> {
if (output.value_or(FOLLY_ERROR) == RPC_FAILED) {
return base_adaptor_->DownLoad(key, start, size, buffer);
}
return output.value_or(FOLLY_ERROR);
});
}
folly::Future<int> GlobalDataAdaptor::DownLoadFromGlobalCache(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer) {
auto &policy = GetCachePolicy(key);
auto meta_cache_entry = GetMetaCacheEntry(key);
if (meta_cache_entry->present) {
if (!meta_cache_entry->existed) {
LOG(ERROR) << "Request for potential deleted file: " << key;
return folly::makeFuture(NOT_FOUND);
}
if (start + size > meta_cache_entry->size) {
LOG(ERROR) << "Request out of file range, key: " << key
<< ", start: " << start
<< ", size: " << size
<< ", file length: " << meta_cache_entry->size;
return folly::makeFuture(END_OF_FILE);
}
}
if (policy.write_cache_type != NOCACHE) {
auto args = std::make_shared<DownloadArgs>(key, start, size, buffer);
if (meta_cache_entry->present) {
if (meta_cache_entry->write_cached) {
auto &root = meta_cache_entry->root;
if (root["type"] == "replication") {
return write_caches_[WC_TYPE_REPLICATION]->Get(args->key, args->start, args->size, args->buffer, root);
} else if (root["type"] == "reed-solomon") {
return write_caches_[WC_TYPE_REEDSOLOMON]->Get(args->key, args->start, args->size, args->buffer, root);
}
LOG(ERROR) << "Failed to download data, reason: unsuppported type, key: " << args->key
<< ", start: " << args->start
<< ", size: " << args->size
<< ", type: " << root["type"];
return folly::makeFuture(UNSUPPORTED_TYPE);
} else {
return read_cache_->Get(key, start, size, buffer);
}
} else {
return etcd_client_->GetJson(key).then(
[this, args, meta_cache_entry](folly::Try<EtcdClient::GetResult> &&output) -> folly::Future<int> {
if (!output.hasValue()) { // 当 GetJson 函数抛出异常时执行这部分代码
LOG(ERROR) << "Failed to download data, reason: internal error, key: " << args->key
<< ", start: " << args->start
<< ", size: " << args->size;
return folly::makeFuture(FOLLY_ERROR);
}
auto &status = output.value().status;
if (status == NOT_FOUND) {
if (GetGlobalConfig().use_meta_cache) {
return base_adaptor_->Head(args->key, meta_cache_entry->size, meta_cache_entry->headers).then(
[this, meta_cache_entry, args](folly::Try<int> &&output) -> folly::Future<int> {
int res = output.value_or(FOLLY_ERROR);
if (res == OK || res == NOT_FOUND) {
meta_cache_entry->present = true;
meta_cache_entry->existed = (res == OK);
meta_cache_entry->write_cached = false;
}
if (res == OK) {
return read_cache_->Get(args->key, args->start, args->size, args->buffer);
}
return res;
});
} else {
return read_cache_->Get(args->key, args->start, args->size, args->buffer);
}
} else if (status != OK) {
return folly::makeFuture(status);
}
auto &root = output.value().root;
if (GetGlobalConfig().use_meta_cache) {
meta_cache_entry->present = true;
meta_cache_entry->existed = true;
meta_cache_entry->write_cached = true;
meta_cache_entry->size = root["size"].asInt64();
for (auto iter = root["headers"].begin(); iter != root["headers"].end(); iter++) {
meta_cache_entry->headers[iter.key().asString()] = (*iter).asString();
}
meta_cache_entry->root = root;
}
if (root["type"] == "replication") {
return write_caches_[WC_TYPE_REPLICATION]->Get(args->key, args->start, args->size, args->buffer, root);
} else if (root["type"] == "reed-solomon") {
return write_caches_[WC_TYPE_REEDSOLOMON]->Get(args->key, args->start, args->size, args->buffer, root);
}
LOG(ERROR) << "Failed to download data, reason: unsuppported type, key: " << args->key
<< ", start: " << args->start
<< ", size: " << args->size
<< ", type: " << root["type"];
return folly::makeFuture(UNSUPPORTED_TYPE);
});
}
} else {
return read_cache_->Get(key, start, size, buffer);
}
}
folly::Future<int> GlobalDataAdaptor::UpLoad(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers) {
#ifdef CONFIG_GC_ON_EXCEEDING_DISKSPACE
return DoUpLoad(key, size, buffer, headers).thenValue([this, key, size, &buffer, &headers](int &&res) -> int {
if (res != NO_ENOUGH_DISKSPACE) {
return res;
}
LOG(INFO) << "Disk limit exceeded - perform GC immediately";
res = PerformGarbageCollection();
if (res) {
LOG(WARNING) << "GC failed";
return res;
}
LOG(INFO) << "Disk limit exceeded - GC completed, now retry";
return DoUpLoad(key, size, buffer, headers).get();
});
#else
return DoUpLoad(key, size, buffer, headers);
#endif
}
folly::Future<int> GlobalDataAdaptor::DoUpLoad(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers) {
butil::Timer *t = new butil::Timer();
t->start();
auto &policy = GetCachePolicy(key);
auto meta_cache_entry = GetMetaCacheEntry(key);
meta_cache_entry->present = false;
meta_cache_entry->existed = true;
meta_cache_entry->size = size;
meta_cache_entry->headers = headers;
auto pre_op = read_cache_->Invalidate(key, size);
if (policy.write_cache_type == REPLICATION || policy.write_cache_type == REED_SOLOMON) {
auto write_cache = policy.write_cache_type == REPLICATION
? write_caches_[WC_TYPE_REPLICATION]
: write_caches_[WC_TYPE_REEDSOLOMON];
return std::move(pre_op)
.then(std::bind(&WriteCacheClient::Put, write_cache.get(), key, size, buffer, headers, 0))
.then([this, key, meta_cache_entry, t] (folly::Try<WriteCacheClient::PutResult> output) -> folly::Future<int> {
int status = output.hasValue() ? output.value().status : FOLLY_ERROR;
if (status == OK) {
status = etcd_client_->PutJson(key, output.value().root).get();
if (status == OK && GetGlobalConfig().use_meta_cache) {
meta_cache_entry->root = output.value().root;
meta_cache_entry->write_cached = true;
meta_cache_entry->present = true;
}
t->stop();
LOG(INFO) << "JSON: " << t->u_elapsed();
delete t;
}
return folly::makeFuture(status);
});
} else if (policy.write_cache_type == NOCACHE) {
return std::move(pre_op)
.then(std::bind(&DataAdaptor::UpLoad, base_adaptor_.get(), key, size, buffer, headers))
.thenValue([meta_cache_entry](int &&res) -> int {
if (res == OK && GetGlobalConfig().use_meta_cache) {
meta_cache_entry->write_cached = false;
meta_cache_entry->present = true;
}
return res;
});
} else {
LOG(ERROR) << "Failed to upload data, reason: unsuppported type, key: " << key
<< ", size: " << size
<< ", type: " << policy.write_cache_type;
return folly::makeFuture(UNSUPPORTED_TYPE);
}
}
folly::Future<int> GlobalDataAdaptor::Delete(const std::string &key) {
auto &policy = GetCachePolicy(key);
if (policy.write_cache_type == NOCACHE) {
InvalidateMetaCacheEntry(key);
return base_adaptor_->Delete(key);
} else {
auto meta_cache_entry = GetMetaCacheEntry(key);
auto size = meta_cache_entry->size;
bool present = meta_cache_entry->present;
bool has_write_cache = false;
if (!present) {
auto result = etcd_client_->GetJson(key).get();
if (result.status == OK) {
size = result.root["size"].asInt64();
has_write_cache = true;
} else if (result.status == NOT_FOUND) { // 只在 S3 里存储
std::map<std::string, std::string> headers;
int ret = base_adaptor_->Head(key, size, headers).get();
if (ret) return ret;
} else {
return folly::makeFuture(result.status);
}
}
InvalidateMetaCacheEntry(key);
if (has_write_cache) {
return base_adaptor_->Delete(key)
.then(std::bind(&ReadCacheClient::Invalidate, read_cache_.get(), key, size))
.then(std::bind(&EtcdClient::DeleteJson, etcd_client_.get(), key));
} else {
return base_adaptor_->Delete(key)
.then(std::bind(&ReadCacheClient::Invalidate, read_cache_.get(), key, size));
}
}
}
struct DeepFlushArgs {
DeepFlushArgs(const std::string &key) : key(key) {}
~DeepFlushArgs() { if (buffer.data) delete []buffer.data; }
std::string key;
std::map <std::string, std::string> headers;
ByteBuffer buffer;
};
folly::Future<int> GlobalDataAdaptor::DeepFlush(const std::string &key) {
butil::Timer *t = new butil::Timer();
t->start();
auto &policy = GetCachePolicy(key);
if (policy.write_cache_type == REPLICATION || policy.write_cache_type == REED_SOLOMON) {
auto args = std::make_shared<DeepFlushArgs>(key);
return etcd_client_->GetJson(key).then([this, t, args](folly::Try<EtcdClient::GetResult> &&output) -> folly::Future<int> {
if (!output.hasValue()) {
return folly::makeFuture(FOLLY_ERROR);
}
if (output.value().status != OK) {
return folly::makeFuture(output.value().status);
}
auto &root = output.value().root;
args->buffer.len = root["size"].asInt64();
args->buffer.data = new char[args->buffer.len];
for (auto iter = root["headers"].begin(); iter != root["headers"].end(); iter++) {
args->headers[iter.key().asString()] = (*iter).asString();
}
t->stop();
LOG(INFO) << "DeepFlush phase 1: " << t->u_elapsed();
return DownLoad(args->key, 0, args->buffer.len, args->buffer);
}).then([this, t, args](folly::Try<int> &&output) -> folly::Future<int> {
int res = output.value_or(FOLLY_ERROR);
t->stop();
LOG(INFO) << "DeepFlush phase 2: " << t->u_elapsed();
if (res != OK) {
return folly::makeFuture(res);
} else {
return base_adaptor_->UpLoad(args->key, args->buffer.len, args->buffer, args->headers);
}
}).then([this, t, key, args](folly::Try<int> &&output) -> folly::Future<int> {
t->stop();
LOG(INFO) << "DeepFlush phase 3: " << t->u_elapsed();
int res = output.value_or(FOLLY_ERROR);
if (res != OK) {
return folly::makeFuture(res);
} else {
InvalidateMetaCacheEntry(key);
return etcd_client_->DeleteJson(key);
}
});
} else {
t->stop();
LOG(INFO) << "DeepFlush phase 4: " << t->u_elapsed();
return folly::makeFuture(OK);
}
}
struct HeadArgs {
HeadArgs(const std::string &key, size_t &size, std::map <std::string, std::string> &headers)
: key(key), size(size), headers(headers) {}
std::string key;
size_t &size;
std::map <std::string, std::string> &headers;
};
folly::Future<int> GlobalDataAdaptor::Head(const std::string &key,
size_t &size,
std::map <std::string, std::string> &headers) {
auto &policy = GetCachePolicy(key);
auto meta_cache_entry = GetMetaCacheEntry(key);
if (meta_cache_entry->present) {
if (!meta_cache_entry->existed) {
LOG(ERROR) << "Request for potential deleted file: " << key;
return folly::makeFuture(NOT_FOUND);
}
size = meta_cache_entry->size;
headers = meta_cache_entry->headers;
return folly::makeFuture(OK);
}
if (policy.write_cache_type == REPLICATION || policy.write_cache_type == REED_SOLOMON) {
auto args = std::make_shared<HeadArgs>(key, size, headers);
return etcd_client_->GetJson(key).then([this, args, meta_cache_entry](folly::Try<EtcdClient::GetResult> &&output) -> folly::Future<int> {
if (!output.hasValue()) {
return folly::makeFuture(FOLLY_ERROR);
}
if (output.value().status != OK) {
return folly::makeFuture(output.value().status);
}
auto &root = output.value().root;
args->size = root["size"].asInt64();
for (auto iter = root["headers"].begin(); iter != root["headers"].end(); iter++) {
args->headers[iter.key().asString()] = (*iter).asString();
}
if (GetGlobalConfig().use_meta_cache) {
meta_cache_entry->present = true;
meta_cache_entry->existed = true;
meta_cache_entry->write_cached = true;
meta_cache_entry->size = args->size;
meta_cache_entry->headers = args->headers;
meta_cache_entry->root = output.value().root;
}
return folly::makeFuture(OK);
}).then([this, args, meta_cache_entry](folly::Try<int> &&output) -> folly::Future<int> {
int res = output.value_or(FOLLY_ERROR);
if (res != NOT_FOUND) {
return folly::makeFuture(res);
} else {
return base_adaptor_->Head(args->key, args->size, args->headers).thenValue([args, meta_cache_entry](int &&res) -> int {
if (GetGlobalConfig().use_meta_cache && (res == OK || res == NOT_FOUND)) {
meta_cache_entry->present = true;
meta_cache_entry->existed = (res == OK);
meta_cache_entry->write_cached = false;
meta_cache_entry->size = args->size;
meta_cache_entry->headers = args->headers;
}
return res;
});
}
});
} else {
return base_adaptor_->Head(key, size, headers).thenValue([meta_cache_entry, &size, &headers](int &&res) -> int {
if (GetGlobalConfig().use_meta_cache && (res == OK || res == NOT_FOUND)) {
meta_cache_entry->present = true;
meta_cache_entry->existed = (res == OK);
meta_cache_entry->write_cached = false;
meta_cache_entry->size = size;
meta_cache_entry->headers = headers;
}
return res;
});
}
}
void GlobalDataAdaptor::InvalidateMetaCache() {
std::lock_guard<std::mutex> lock(meta_cache_mutex_);
meta_cache_.clear();
}
void GlobalDataAdaptor::InvalidateMetaCacheEntry(const std::string &key) {
std::lock_guard<std::mutex> lock(meta_cache_mutex_);
meta_cache_.erase(key);
}
std::shared_ptr<GlobalDataAdaptor::MetaCacheEntry> GlobalDataAdaptor::GetMetaCacheEntry(const std::string &key) {
std::lock_guard<std::mutex> lock(meta_cache_mutex_);
auto iter = meta_cache_.find(key);
if (iter == meta_cache_.end()) {
auto entry = std::make_shared<MetaCacheEntry>(key);
meta_cache_.insert(key, entry);
return entry;
} else {
return iter->second;
}
}
void GlobalDataAdaptor::SetCachePolicy(const std::string &key, CachePolicy &policy) {
// ...
}
const CachePolicy &GlobalDataAdaptor::GetCachePolicy(const std::string &key) const {
return GetGlobalConfig().default_policy;
}
std::shared_ptr<GlobalCacheClient> GlobalDataAdaptor::GetRpcClient() const {
return rpc_client_[lrand48() % rpc_client_.size()];
}
int GlobalDataAdaptor::PerformGarbageCollection(const std::string &prefix) {
LOG(INFO) << "==================GC START===================";
butil::Timer t;
t.start();
std::vector<uint64_t> write_cache_ts;
std::set<int> skipped_server_id_list;
for (int server_id = 0; server_id < server_list_.size(); ++server_id) {
auto res = GetRpcClient()->QueryTsFromWriteCache(server_id).get();
if (res.status != OK) {
std::cerr << RED << "Skip recycling write cache data in server " << server_id << WHITE << std::endl;
skipped_server_id_list.insert(server_id);
}
write_cache_ts.push_back(res.timestamp);
LOG(INFO) << "TS for server " << server_id << ": " << res.timestamp;
}
t.stop();
LOG(INFO) << "Flush stage 1: " << t.u_elapsed();
if (server_list_.size() == skipped_server_id_list.size()) {
std::cerr << RED << "All servers are not available." << WHITE << std::endl;
return RPC_FAILED;
}
std::vector<std::string> key_list;
int rc = etcd_client_->ListJson(prefix, key_list).get();
if (rc) {
std::cerr << RED << "Failed to list metadata in write cache. "
<< "Check the availability of etcd server." << WHITE << std::endl;
return rc;
}
for (auto &key : key_list) {
LOG(INFO) << "Found entry: " << key;
}
t.stop();
LOG(INFO) << "Flush stage 2: " << t.u_elapsed();
std::vector<folly::Future<int>> future_list;
for (auto &key : key_list) {
future_list.emplace_back(DeepFlush(key));
}
auto output = folly::collectAll(future_list).get();
for (auto &entry: output)
if (entry.value_or(FOLLY_ERROR) != OK) {
LOG(ERROR) << "Cannot flush data to S3 storage";
}
t.stop();
LOG(INFO) << "Flush stage 3: " << t.u_elapsed();
// Recheck the JSON metadata from etcd server
rc = etcd_client_->ListJson(prefix, key_list).get();
if (rc != 0 && rc != NOT_FOUND) {
return rc;
}
t.stop();
LOG(INFO) << "Flush stage 4: " << t.u_elapsed();
std::unordered_map<int, std::vector<std::string>> preserve_chunk_keys_map;
for (auto &key : key_list) {
auto resp = etcd_client_->GetJson(key).get();
if (resp.status) {
continue;
}
std::vector<int> replicas;
for (auto &entry : resp.root["replica"]) {
replicas.push_back(entry.asInt());
}
std::vector<std::string> internal_keys;
for (auto &entry : resp.root["path"]) {
internal_keys.push_back(entry.asString());
}
assert(!replicas.empty() && !internal_keys.empty());
for (int i = 0; i < internal_keys.size(); ++i) {
preserve_chunk_keys_map[replicas[i % replicas.size()]].push_back(internal_keys[i]);
}
}
for (int server_id = 0; server_id < server_list_.size(); ++server_id) {
if (skipped_server_id_list.count(server_id)) {
continue;
}
std::vector<std::string> except_keys;
if (preserve_chunk_keys_map.count(server_id)) {
except_keys = preserve_chunk_keys_map[server_id];
}
int rc = GetRpcClient()->DeleteEntryFromWriteCache(server_id,
prefix,
write_cache_ts[server_id],
except_keys).get();
if (rc) {
LOG(WARNING) << "Cannot delete unused entries from write cache. Server id: " << server_id;
}
}
t.stop();
LOG(INFO) << "Flush stage 5: " << t.u_elapsed();
LOG(INFO) << "==================GC END===================";
return 0;
}
folly::Future<int> GlobalDataAdaptor::UpLoadPart(const std::string &key,
size_t off,
size_t size,
const ByteBuffer &buffer,
const std::map<std::string, std::string> &headers,
Json::Value& root) {
#ifdef CONFIG_GC_ON_EXCEEDING_DISKSPACE
return DoUpLoadPart(key, off, size, buffer, headers, root)
.thenValue([this, key, off, size, &buffer, &headers, &root](int &&res) -> int {
if (res != NO_ENOUGH_DISKSPACE) {
return res;
}
LOG(INFO) << "Disk limit exceeded - perform GC immediately";
res = PerformGarbageCollection();
if (res) {
LOG(WARNING) << "GC failed";
return res;
}
LOG(INFO) << "Disk limit exceeded - GC completed, now retry";
return DoUpLoadPart(key, off, size, buffer, headers, root).get();
});
#else
return DoUpLoadPart(key, off, size, buffer, headers, root);
#endif
}
folly::Future<int> GlobalDataAdaptor::DoUpLoadPart(const std::string &key,
size_t off,
size_t size,
const ByteBuffer &buffer,
const std::map<std::string, std::string> &headers,
Json::Value& root) {
butil::Timer *t = new butil::Timer();
t->start();
auto &policy = GetCachePolicy(key);
auto pre_op = read_cache_->Invalidate(key, off + size);
if (policy.write_cache_type == REPLICATION || policy.write_cache_type == REED_SOLOMON) {
auto write_cache = policy.write_cache_type == REPLICATION
? write_caches_[WC_TYPE_REPLICATION]
: write_caches_[WC_TYPE_REEDSOLOMON];
return std::move(pre_op)
.then(std::bind(&WriteCacheClient::Put, write_cache.get(), key, size, buffer, headers, off))
.then([this, t, &root] (folly::Try<WriteCacheClient::PutResult> output) -> folly::Future<int> {
int status = output.hasValue() ? output.value().status : FOLLY_ERROR;
if (status == OK) {
root = std::move(output.value().root);
t->stop();
delete t;
}
return folly::makeFuture(status);
});
} else {
LOG(ERROR) << "Failed to upload data, reason: unsuppported type, key: " << key
<< ", size: " << size
<< ", type: " << policy.write_cache_type;
return folly::makeFuture(UNSUPPORTED_TYPE);
}
}
folly::Future<int> GlobalDataAdaptor::Completed(const std::string &key,
const std::vector<Json::Value> &roots,
size_t size) {
if (!roots.empty()) {
auto meta_cache_entry = GetMetaCacheEntry(key);
meta_cache_entry->present = false;
Json::Value json_path(Json::arrayValue);
for (int i=0; i<roots.size(); ++i) {
for (auto& partial_key : roots[i]["path"])
json_path.append(partial_key.asString());
}
Json::Value new_root = roots[0];
new_root["path"] = json_path;
new_root["size"] = size;
return etcd_client_->PutJson(key, new_root);
}
return folly::makeFuture(OK);
}

View File

@ -0,0 +1,143 @@
#ifndef MADFS_GLOBAL_DATA_ADAPTOR_H
#define MADFS_GLOBAL_DATA_ADAPTOR_H
#include <string>
#include <folly/executors/CPUThreadPoolExecutor.h>
#include <folly/container/EvictingCacheMap.h>
#include "data_adaptor.h"
#include "EtcdClient.h"
#include "ReadCacheClient.h"
#include "WriteCacheClient.h"
#include "GlobalCacheClient.h"
#define NUM_WC_TYPES 2
#define WC_TYPE_REPLICATION 0
#define WC_TYPE_REEDSOLOMON 1
using HybridCache::ByteBuffer;
using HybridCache::DataAdaptor;
class GlobalDataAdaptor : public DataAdaptor {
friend class ReadCacheClient;
friend class ReplicationWriteCacheClient;
friend class ErasureCodingWriteCacheClient;
public:
GlobalDataAdaptor(std::shared_ptr<DataAdaptor> base_adaptor,
const std::vector<std::string> &server_list,
std::shared_ptr<EtcdClient> etcd_client = nullptr,
std::shared_ptr<folly::CPUThreadPoolExecutor> executor = nullptr);
~GlobalDataAdaptor();
// 从数据服务器加载数据
virtual folly::Future<int> DownLoad(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer);
folly::Future<int> DownLoadFromGlobalCache(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer);
// 上传数据到数据服务器
virtual folly::Future<int> UpLoad(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers);
virtual folly::Future<int> DoUpLoad(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers);
virtual folly::Future<int> UpLoadPart(const std::string &key,
size_t off,
size_t size,
const ByteBuffer &buffer,
const std::map<std::string, std::string> &headers,
Json::Value& root);
virtual folly::Future<int> DoUpLoadPart(const std::string &key,
size_t off,
size_t size,
const ByteBuffer &buffer,
const std::map<std::string, std::string> &headers,
Json::Value& root);
virtual folly::Future<int> Completed(const std::string &key,
const std::vector<Json::Value> &roots,
size_t size);
// 删除数据服务器的数据
virtual folly::Future<int> Delete(const std::string &key);
// 数据源flush到S3(全局缓存用)
virtual folly::Future<int> DeepFlush(const std::string &key);
// 获取数据的元数据
virtual folly::Future<int> Head(const std::string &key,
size_t &size,
std::map <std::string, std::string> &headers);
int PerformGarbageCollection(const std::string &prefix = "");
void SetCachePolicy(const std::string &key, CachePolicy &policy);
public:
struct MetaCacheEntry {
MetaCacheEntry(const std::string &key) : key(key), present(false) {}
const std::string key;
bool present; // 只有设为 true这个缓存才有效
bool existed; // key 目前是存在的
bool write_cached; // key 的数据位于全局写缓存
size_t size;
std::map<std::string, std::string> headers;
Json::Value root;
};
void InvalidateMetaCache();
void InvalidateMetaCacheEntry(const std::string &key);
std::shared_ptr<MetaCacheEntry> GetMetaCacheEntry(const std::string &key);
const CachePolicy &GetCachePolicy(const std::string &key) const;
std::shared_ptr<GlobalCacheClient> GetRpcClient() const;
const std::string GetServerHostname(int server_id) const {
if (server_id >= 0 && server_id < server_list_.size())
return server_list_[server_id];
return "<invalid>";
};
void BackgroundWorker();
private:
std::shared_ptr<folly::CPUThreadPoolExecutor> executor_;
std::shared_ptr<ReadCacheClient> read_cache_;
std::shared_ptr<WriteCacheClient> write_caches_[NUM_WC_TYPES];
std::shared_ptr<DataAdaptor> base_adaptor_;
std::vector<std::shared_ptr<GlobalCacheClient>> rpc_client_;
std::shared_ptr<EtcdClient> etcd_client_;
std::vector<std::string> server_list_;
std::mutex meta_cache_mutex_;
folly::EvictingCacheMap<std::string, std::shared_ptr<MetaCacheEntry>> meta_cache_;
std::atomic<bool> bg_running_;
std::thread bg_thread_;
std::mutex bg_mutex_;
std::condition_variable bg_cv_;
std::vector<std::function<int()>> bg_tasks_;
};
#endif // MADFS_GLOBAL_DATA_ADAPTOR_H

15
global_cache/Placement.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef MADFS_PLACEMENT_H
#define MADFS_PLACEMENT_H
#include <vector>
#include "Common.h"
inline static std::vector<int> Placement(const std::string &key, int num_available, int num_choose) {
uint64_t seed = std::hash < std::string > {}(key);
std::vector<int> output;
for (int i = 0; i < std::min(num_available, num_choose); ++i)
output.push_back((seed + i) % num_available);
return output;
}
#endif // MADFS_PLACEMENT_H

215
global_cache/ReadCache.cpp Normal file
View File

@ -0,0 +1,215 @@
#include <unistd.h>
#include <butil/iobuf.h>
#include <bvar/bvar.h>
#include <butil/time.h>
#define BRPC_WITH_RDMA 1
#include <brpc/rdma/block_pool.h>
#include "ReadCache.h"
#include "FileSystemDataAdaptor.h"
bvar::LatencyRecorder g_latency_readcache4cachelib_get("readcache4cachelib_get");
class ReadCache4Cachelib : public ReadCacheImpl {
public:
explicit ReadCache4Cachelib(std::shared_ptr<folly::CPUThreadPoolExecutor> executor,
std::shared_ptr<DataAdaptor> base_adaptor = nullptr);
~ReadCache4Cachelib() {}
virtual Future<GetOutput> Get(const std::string &key, uint64_t start, uint64_t length);
virtual int Put(const std::string &key, uint64_t length, const butil::IOBuf &buf);
virtual int Delete(const std::string &key);
virtual int Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id);
private:
std::shared_ptr<folly::CPUThreadPoolExecutor> executor_;
std::shared_ptr<DataAdaptor> base_adaptor_;
std::shared_ptr<HybridCache::ReadCache> impl_;
};
ReadCache4Cachelib::ReadCache4Cachelib(std::shared_ptr<folly::CPUThreadPoolExecutor> executor,
std::shared_ptr<DataAdaptor> base_adaptor)
: executor_(executor), base_adaptor_(base_adaptor) {
HybridCache::EnableLogging = false;
impl_ = std::make_shared<HybridCache::ReadCache>(GetGlobalConfig().read_cache,
base_adaptor_,
executor);
}
Future<GetOutput> ReadCache4Cachelib::Get(const std::string &key, uint64_t start, uint64_t length) {
butil::Timer *t = new butil::Timer();
t->start();
#ifndef BRPC_WITH_RDMA
auto wrap = HybridCache::ByteBuffer(new char[length], length);
#else
auto wrap = HybridCache::ByteBuffer((char *) brpc::rdma::AllocBlock(length), length);
#endif
return impl_->Get(key, start, length, wrap).thenValue([wrap, key, start, length, t](int res) -> GetOutput {
t->stop();
g_latency_readcache4cachelib_get << t->u_elapsed();
delete t;
GetOutput output;
output.status = res;
#ifndef BRPC_WITH_RDMA
if (res == OK) {
output.buf.append(wrap.data, wrap.len);
}
delete []wrap.data;
#else
if (res == OK) {
output.buf.append_user_data(wrap.data, wrap.len, brpc::rdma::DeallocBlock);
} else {
brpc::rdma::DeallocBlock(wrap.data);
}
#endif
LOG_IF(INFO, FLAGS_verbose) << "Get key: " << key
<< ", start: " << start
<< ", length: " << length
<< ", status: " << res;
return output;
});
}
int ReadCache4Cachelib::Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) {
auto data_len = buf.length();
auto aux_buffer = malloc(data_len);
auto data = buf.fetch(aux_buffer, data_len);
auto wrap = HybridCache::ByteBuffer((char *) data, data_len);
int res = impl_->Put(key, 0, length, wrap);
free(aux_buffer);
LOG_IF(INFO, FLAGS_verbose) << "Put key: " << key
<< ", length: " << length
<< ", status: " << res;
return res;
}
int ReadCache4Cachelib::Delete(const std::string &key) {
LOG_IF(INFO, FLAGS_verbose) << "Delete key: " << key;
return impl_->Delete(key);
}
int ReadCache4Cachelib::Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id) {
LOG_IF(INFO, FLAGS_verbose) << "Delete key: " << key;
for (uint64_t chunk_id = 0; chunk_id < max_chunk_id; chunk_id++) {
auto internal_key = key + "-" + std::to_string(chunk_id) + "-" + std::to_string(chunk_size);
int ret = impl_->Delete(internal_key);
if (ret) {
return ret;
}
}
return OK;
}
bvar::LatencyRecorder g_latency_readcache4disk_get("readcache4disk_get");
// ----------------------------------------------------------------------------
class ReadCache4Disk : public ReadCacheImpl {
public:
explicit ReadCache4Disk(std::shared_ptr<folly::CPUThreadPoolExecutor> executor,
std::shared_ptr<DataAdaptor> base_adaptor = nullptr);
~ReadCache4Disk() {}
virtual Future<GetOutput> Get(const std::string &key, uint64_t start, uint64_t length);
virtual int Put(const std::string &key, uint64_t length, const butil::IOBuf &buf);
virtual int Delete(const std::string &key);
virtual int Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id);
private:
std::shared_ptr<folly::CPUThreadPoolExecutor> executor_;
std::shared_ptr<DataAdaptor> base_adaptor_;
std::shared_ptr<DataAdaptor> cache_fs_adaptor_;
};
ReadCache4Disk::ReadCache4Disk(std::shared_ptr<folly::CPUThreadPoolExecutor> executor,
std::shared_ptr<DataAdaptor> base_adaptor)
: executor_(executor), base_adaptor_(base_adaptor) {
cache_fs_adaptor_ = std::make_shared<FileSystemDataAdaptor>(GetGlobalConfig().read_cache_dir, base_adaptor_, true, executor_);
}
Future<GetOutput> ReadCache4Disk::Get(const std::string &key, uint64_t start, uint64_t length) {
butil::Timer *t = new butil::Timer();
t->start();
#ifndef BRPC_WITH_RDMA
auto wrap = HybridCache::ByteBuffer(new char[length], length);
#else
auto wrap = HybridCache::ByteBuffer((char *) brpc::rdma::AllocBlock(length), length);
#endif
return cache_fs_adaptor_->DownLoad(key, start, length, wrap).thenValue([wrap, key, start, length, t](int res) -> GetOutput {
GetOutput output;
output.status = res;
#ifndef BRPC_WITH_RDMA
if (res == OK) {
output.buf.append(wrap.data, wrap.len);
}
delete []wrap.data;
#else
if (res == OK) {
output.buf.append_user_data(wrap.data, wrap.len, brpc::rdma::DeallocBlock);
} else {
brpc::rdma::DeallocBlock(wrap.data);
}
#endif
t->stop();
g_latency_readcache4disk_get << t->u_elapsed();
delete t;
LOG_IF(INFO, FLAGS_verbose) << "Get key: " << key
<< ", start: " << start
<< ", length: " << length
<< ", status: " << res;
return output;
});
}
int ReadCache4Disk::Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) {
auto data_len = buf.length();
auto aux_buffer = malloc(data_len);
auto data = buf.fetch(aux_buffer, data_len);
auto wrap = HybridCache::ByteBuffer((char *) data, data_len);
std::map<std::string, std::string> headers;
int res = cache_fs_adaptor_->UpLoad(key, length, wrap, headers).get();
free(aux_buffer);
LOG_IF(INFO, FLAGS_verbose) << "Put key: " << key
<< ", length: " << length
<< ", status: " << res;
return res;
}
int ReadCache4Disk::Delete(const std::string &key) {
LOG_IF(INFO, FLAGS_verbose) << "Delete key: " << key;
return cache_fs_adaptor_->Delete(key).get();
}
int ReadCache4Disk::Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id) {
LOG_IF(INFO, FLAGS_verbose) << "Delete key: " << key;
for (uint64_t chunk_id = 0; chunk_id < max_chunk_id; chunk_id++) {
auto internal_key = key + "-" + std::to_string(chunk_id) + "-" + std::to_string(chunk_size);
int ret = cache_fs_adaptor_->Delete(internal_key).get();
if (ret) {
return ret;
}
}
return OK;
}
DEFINE_string(read_cache_engine, "cachelib", "Read cache engine: cachelib | disk");
ReadCache::ReadCache(std::shared_ptr<folly::CPUThreadPoolExecutor> executor,
std::shared_ptr<DataAdaptor> base_adaptor) {
if (FLAGS_read_cache_engine == "cachelib")
impl_ = new ReadCache4Cachelib(executor, base_adaptor);
else if (FLAGS_read_cache_engine == "disk")
impl_ = new ReadCache4Disk(executor, base_adaptor);
else {
LOG(FATAL) << "unsupported read cache engine";
exit(EXIT_FAILURE);
}
}

53
global_cache/ReadCache.h Normal file
View File

@ -0,0 +1,53 @@
#ifndef MADFS_READ_CACHE_H
#define MADFS_READ_CACHE_H
#include <map>
#include <string>
#include <folly/executors/CPUThreadPoolExecutor.h>
#include <butil/iobuf.h>
#include "Common.h"
#include "data_adaptor.h"
#include "read_cache.h"
using HybridCache::DataAdaptor;
class ReadCacheImpl {
public:
virtual Future<GetOutput> Get(const std::string &key, uint64_t start, uint64_t length) = 0;
virtual int Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) = 0;
virtual int Delete(const std::string &key) = 0;
virtual int Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id) = 0;
};
class ReadCache {
public:
explicit ReadCache(std::shared_ptr<folly::CPUThreadPoolExecutor> executor,
std::shared_ptr<DataAdaptor> base_adaptor = nullptr);
~ReadCache() {
delete impl_;
}
Future<GetOutput> Get(const std::string &key, uint64_t start, uint64_t length) {
return impl_->Get(key, start, length);
}
int Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) {
return impl_->Put(key, length, buf);
}
int Delete(const std::string &key) {
return impl_->Delete(key);
}
int Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id) {
return impl_->Delete(key, chunk_size, max_chunk_id);
}
private:
ReadCacheImpl *impl_;
};
#endif // MADFS_READ_CACHE_H

View File

@ -0,0 +1,245 @@
#include "ReadCacheClient.h"
#include "GlobalDataAdaptor.h"
#define AWS_BUFFER_PADDING 64
ReadCacheClient::ReadCacheClient(GlobalDataAdaptor *parent)
: parent_(parent) {}
ReadCacheClient::~ReadCacheClient() {}
folly::Future<int> ReadCacheClient::Get(const std::string &key, size_t start, size_t size, ByteBuffer &buffer) {
butil::Timer t;
t.start();
LOG_IF(INFO, FLAGS_verbose) << "Get key=" << key << ", start=" << start << ", size=" << size;
std::vector<folly::Future<int>> future_list;
std::vector<GetChunkRequestV2> requests;
auto &policy = parent_->GetCachePolicy(key);
const int num_choose = policy.read_replication_factor;
GenerateGetChunkRequestsV2(key, start, size, buffer, requests, policy.read_chunk_size);
if (requests.empty())
return folly::makeFuture(OK);
auto DoGetChunkAsync = [this, num_choose](GetChunkRequestV2 &entry) -> folly::Future<int> {
auto replicas = GetReplica(entry.internal_key, num_choose);
int primary_server_id = replicas[lrand48() % replicas.size()];
return GetChunkAsync(primary_server_id, entry).thenValue([this, replicas, entry, primary_server_id](int res) -> int {
if (res != RPC_FAILED) {
return res;
}
LOG_EVERY_SECOND(WARNING) << "Unable to connect primary replicas. server_id " << primary_server_id
<< ", hostname: " << parent_->GetServerHostname(primary_server_id);
for (auto &server_id : replicas) {
if (server_id == primary_server_id) {
continue;
}
res = GetChunkAsync(server_id, entry).get();
if (res != RPC_FAILED) {
return res;
}
LOG_EVERY_SECOND(WARNING) << "Unable to connect secondary replicas. server_id " << server_id
<< ", hostname: " << parent_->GetServerHostname(server_id);
}
LOG_EVERY_SECOND(ERROR) << "Unable to connect all target replicas";
return RPC_FAILED;
});
};
if (requests.size() == 1) {
return DoGetChunkAsync(requests[0]);
}
size_t aggregated_size = 0;
for (auto &entry: requests) {
aggregated_size += entry.chunk_len;
future_list.emplace_back(DoGetChunkAsync(entry));
if (aggregated_size >= GetGlobalConfig().max_inflight_payload_size) {
auto output = folly::collectAll(future_list).get();
for (auto &entry: output)
if (entry.value_or(FOLLY_ERROR) != OK) {
LOG(ERROR) << "Failed to get data from read cache, key: " << key
<< ", start: " << start
<< ", size: " << size
<< ", buf: " << (void *) buffer.data << " " << buffer.len
<< ", error code: " << entry.value_or(FOLLY_ERROR);
return entry.value_or(FOLLY_ERROR);
}
future_list.clear();
}
}
if (future_list.empty()) return OK;
return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue(
[=](std::vector <folly::Try<int>> output) -> int {
for (auto &entry: output)
if (entry.value_or(FOLLY_ERROR) != OK) {
LOG(ERROR) << "Failed to get data from read cache, key: " << key
<< ", start: " << start
<< ", size: " << size
<< ", buf: " << (void *) buffer.data << " " << buffer.len
<< ", error code: " << entry.value_or(FOLLY_ERROR);
return entry.value_or(FOLLY_ERROR);
}
return OK;
});
}
folly::Future<int> ReadCacheClient::GetChunkAsync(int server_id, GetChunkRequestV2 request) {
LOG_IF(INFO, FLAGS_verbose) << "GetChunkAsync server_id=" << server_id
<< ", internal_key=" << request.internal_key
<< ", chunk_id=" << request.chunk_id
<< ", chunk_start=" << request.chunk_start
<< ", chunk_len=" << request.chunk_len
<< ", buffer=" << (void *) request.buffer.data;
return parent_->GetRpcClient()->GetEntryFromReadCache(server_id, request.internal_key, request.chunk_start, request.chunk_len)
.then([this, server_id, request](folly::Try<GetOutput> &&output) -> folly::Future<int> {
if (!output.hasValue()) {
return folly::makeFuture(FOLLY_ERROR);
}
auto &value = output.value();
if (value.status == OK) {
value.buf.copy_to(request.buffer.data, request.buffer.len);
return folly::makeFuture(OK);
} else if (value.status == CACHE_ENTRY_NOT_FOUND) {
return GetChunkFromGlobalCache(server_id, request);
} else {
return folly::makeFuture(value.status);
}
});
}
folly::Future<int> ReadCacheClient::GetChunkFromGlobalCache(int server_id, GetChunkRequestV2 request) {
struct Args {
size_t size;
std::map<std::string, std::string> headers;
ByteBuffer data_buf;
~Args() {
if (data_buf.data) {
delete []data_buf.data;
}
}
};
auto args = std::make_shared<Args>();
// auto f = parent_->base_adaptor_->Head(request.user_key, args->size, args->headers)
auto f = parent_->Head(request.user_key, args->size, args->headers)
.then([this, args, request] (folly::Try<int> &&output) -> folly::Future<int> {
if (output.value_or(FOLLY_ERROR) != OK) {
return folly::makeFuture(output.value_or(FOLLY_ERROR));
}
const size_t align_chunk_start = request.chunk_id * request.chunk_granularity;
const size_t align_chunk_stop = std::min(align_chunk_start + request.chunk_granularity, args->size);
if (align_chunk_start + request.chunk_start + request.chunk_len > args->size) {
LOG(WARNING) << "Requested data range exceeds object size, key: " << request.user_key
<< " request offset: " << align_chunk_start + request.chunk_start + request.chunk_len
<< ", size: " << args->size;
return folly::makeFuture(END_OF_FILE);
} else if (align_chunk_start == align_chunk_stop) {
return folly::makeFuture(OK);
} else if (align_chunk_start > align_chunk_stop) {
LOG(WARNING) << "Unexpected request range, key: " << request.user_key
<< " start offset: " << align_chunk_start
<< ", end offset: " << align_chunk_stop;
return folly::makeFuture(INVALID_ARGUMENT);
}
args->data_buf.len = align_chunk_stop - align_chunk_start + AWS_BUFFER_PADDING;
args->data_buf.data = new char[args->data_buf.len];
return parent_->base_adaptor_->DownLoad(request.user_key,
align_chunk_start,
align_chunk_stop - align_chunk_start,
args->data_buf);
}).then([this, args, request] (folly::Try<int> &&output) -> folly::Future<int> {
if (output.value_or(FOLLY_ERROR) != OK) {
return folly::makeFuture(output.value_or(FOLLY_ERROR));
}
memcpy(request.buffer.data, args->data_buf.data + request.chunk_start, request.chunk_len);
args->data_buf.len -= AWS_BUFFER_PADDING;
auto &policy = parent_->GetCachePolicy(request.user_key);
auto replicas = GetReplica(request.internal_key, policy.read_replication_factor);
std::vector <folly::Future<PutOutput>> future_list;
for (auto server_id: replicas)
future_list.emplace_back(parent_->GetRpcClient()->PutEntryFromReadCache(server_id,
request.internal_key,
args->data_buf,
args->data_buf.len));
return folly::collectAll(std::move(future_list)).via(parent_->executor_.get()).thenValue(
[](std::vector <folly::Try<PutOutput>> &&output) -> int {
for (auto &entry: output) {
if (!entry.hasValue())
return FOLLY_ERROR;
if (entry.value().status != OK)
return entry.value().status;
}
return OK;
});
});
return f;
}
folly::Future<int> ReadCacheClient::Invalidate(const std::string &key, size_t size) {
// LOG(INFO) << "Invalidate key=" << key;
std::vector <folly::Future<int>> future_list;
auto &policy = parent_->GetCachePolicy(key);
const size_t chunk_size = policy.read_chunk_size;
const size_t end_chunk_id = (size + chunk_size - 1) / chunk_size;
for (int server_id = 0; server_id < parent_->server_list_.size(); server_id++) {
future_list.emplace_back(parent_->GetRpcClient()->DeleteEntryFromReadCache(server_id, key, chunk_size, end_chunk_id));
}
return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue(
[](std::vector <folly::Try<int>> output) -> int {
for (auto &entry: output)
if (entry.value_or(FOLLY_ERROR) != OK)
return entry.value_or(FOLLY_ERROR);
return OK;
});
}
void ReadCacheClient::GenerateGetChunkRequestsV2(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
std::vector<GetChunkRequestV2> &requests,
size_t chunk_size) {
const size_t end = start + size;
const size_t begin_chunk_id = start / chunk_size;
const size_t end_chunk_id = (end + chunk_size - 1) / chunk_size;
if (buffer.len < size) {
LOG(WARNING) << "Buffer capacity may be not enough, expect " << size << ", actual " << buffer.len;
}
size_t buffer_offset = 0;
for (size_t chunk_id = begin_chunk_id; chunk_id < end_chunk_id; ++chunk_id) {
size_t chunk_start = std::max(chunk_id * chunk_size, start);
size_t chunk_stop = std::min((chunk_id + 1) * chunk_size, end);
if (chunk_stop <= chunk_start)
return;
GetChunkRequestV2 item;
item.user_key = key;
item.internal_key = key + "-" + std::to_string(chunk_id) + "-" + std::to_string(chunk_size);
item.chunk_id = chunk_id;
item.chunk_start = chunk_start % chunk_size;
item.chunk_len = chunk_stop - chunk_start;
item.chunk_granularity = chunk_size;
item.buffer.data = buffer.data + buffer_offset;
item.buffer.len = item.chunk_len;
buffer_offset += item.chunk_len;
requests.emplace_back(item);
}
LOG_ASSERT(buffer_offset == size);
}
std::vector<int> ReadCacheClient::GetReplica(const std::string &key, int num_choose) {
const int num_available = parent_->server_list_.size();
uint64_t seed = std::hash < std::string > {}(key);
std::vector<int> output;
for (int i = 0; i < std::min(num_available, num_choose); ++i)
output.push_back((seed + i) % num_available);
return output;
}

View File

@ -0,0 +1,60 @@
#ifndef MADFS_READ_CACHE_CLIENT_H
#define MADFS_READ_CACHE_CLIENT_H
#include <folly/futures/Future.h>
#include <folly/futures/Promise.h>
#include <folly/executors/CPUThreadPoolExecutor.h>
#include "Common.h"
#include "Placement.h"
#include "data_adaptor.h"
using HybridCache::ByteBuffer;
class GlobalDataAdaptor;
class ReadCacheClient {
friend class GetChunkContext;
public:
ReadCacheClient(GlobalDataAdaptor *parent);
~ReadCacheClient();
virtual folly::Future<int> Get(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer);
virtual folly::Future<int> Invalidate(const std::string &key, size_t size);
// for testing only
public:
struct GetChunkRequestV2 {
std::string user_key;
std::string internal_key;
size_t chunk_id;
size_t chunk_start;
size_t chunk_len;
size_t chunk_granularity;
ByteBuffer buffer;
};
static void GenerateGetChunkRequestsV2(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
std::vector<GetChunkRequestV2> &requests,
size_t chunk_size);
folly::Future<int> GetChunkAsync(int server_id, GetChunkRequestV2 context);
folly::Future<int> GetChunkFromGlobalCache(int server_id, GetChunkRequestV2 context);
std::vector<int> GetReplica(const std::string &key, int num_choose);
private:
GlobalDataAdaptor *parent_;
};
#endif // MADFS_READ_CACHE_CLIENT_H

View File

@ -0,0 +1,248 @@
#include "ReplicationWriteCacheClient.h"
#include "GlobalDataAdaptor.h"
folly::Future<PutResult> ReplicationWriteCacheClient::Put(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers,
size_t off) {
std::vector <folly::Future<PutOutput>> future_list;
Json::Value root, dummy_root;
Json::Value json_replica(Json::arrayValue), json_path(Json::arrayValue), json_headers;
butil::Timer *t = new butil::Timer();
t->start();
const std::vector<int> replicas = GetReplica(key);
for (auto server_id: replicas) {
json_replica.append(server_id);
}
auto rpc_client = parent_->GetRpcClient();
auto write_chunk_size = GetGlobalConfig().write_chunk_size;
for (auto iter = headers.begin(); iter != headers.end(); ++iter) {
json_headers[iter->first] = iter->second;
}
size_t aggregated_size = 0;
for (uint64_t offset = 0; offset < size; offset += write_chunk_size) {
for (auto server_id: replicas) {
auto region_size = std::min(size - offset, write_chunk_size);
ByteBuffer region_buffer(buffer.data + offset, region_size);
std::string partial_key = key
+ "-" + std::to_string((off + offset) / write_chunk_size)
+ "-" + std::to_string(write_chunk_size);
auto PutRPC = folly::via(parent_->executor_.get(), [this, server_id, partial_key, region_buffer, region_size]() -> PutOutput {
return parent_->GetRpcClient()->PutEntryFromWriteCache(server_id, partial_key, region_buffer, region_size).get();
});
future_list.emplace_back(std::move(PutRPC));
}
}
t->stop();
LOG(INFO) << "Phase 1: " << t->u_elapsed();
root["type"] = "replication";
root["size"] = size;
root["replica"] = json_replica;
root["headers"] = json_headers;
return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue([root, t](std::vector<folly::Try<PutOutput>> &&output) -> folly::Future<PutResult> {
Json::Value dummy_root;
Json::Value json_path(Json::arrayValue);
for (auto &entry: output) {
if (!entry.hasValue())
return PutResult { FOLLY_ERROR, dummy_root };
if (entry.value().status != OK) {
LOG(INFO) << "Found error";
return PutResult { entry.value().status, dummy_root };
}
json_path.append(entry.value().internal_key);
}
Json::Value new_root = root;
new_root["path"] = json_path;
t->stop();
LOG(INFO) << "Duration: " << t->u_elapsed();
delete t;
return PutResult { OK, new_root };
});
}
folly::Future<int> ReplicationWriteCacheClient::Get(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
Json::Value &root) {
std::vector<int> replicas;
for (auto &entry : root["replica"]) {
replicas.push_back(entry.asInt());
}
std::vector<std::string> internal_keys;
for (auto &entry : root["path"]) {
internal_keys.push_back(entry.asString());
}
std::vector <folly::Future<int>> future_list;
std::vector<GetChunkRequestV2> requests;
auto write_chunk_size = GetGlobalConfig().write_chunk_size;
GenerateGetChunkRequestsV2(key, start, size, buffer, requests, write_chunk_size);
if (requests.empty())
return folly::makeFuture(OK);
size_t aggregated_size = 0;
for (auto &entry: requests) {
int primary_replica_id = lrand48() % replicas.size();
int primary_server_id = replicas[primary_replica_id];
std::string internal_key = internal_keys[entry.chunk_id * replicas.size() + primary_replica_id];
future_list.emplace_back(GetChunkAsync(primary_server_id, entry, internal_key)
.thenValue([this, replicas, entry, primary_server_id, internal_keys](int res) -> int {
if (res != RPC_FAILED) {
return res;
}
LOG_EVERY_SECOND(WARNING) << "Unable to connect primary replicas. server_id " << primary_server_id
<< ", hostname: " << parent_->GetServerHostname(primary_server_id);
for (auto &server_id : replicas) {
if (server_id == primary_server_id) {
continue;
}
auto internal_key = internal_keys[entry.chunk_id * replicas.size() + server_id];
res = GetChunkAsync(server_id, entry, internal_key).get();
if (res != RPC_FAILED) {
return res;
}
LOG_EVERY_SECOND(WARNING) << "Unable to connect secondary replicas. server_id " << server_id
<< ", hostname: " << parent_->GetServerHostname(server_id);
}
LOG_EVERY_SECOND(ERROR) << "Unable to connect all target replicas";
return RPC_FAILED;
}));
aggregated_size += entry.chunk_len;
if (aggregated_size >= GetGlobalConfig().max_inflight_payload_size) {
auto output = folly::collectAll(future_list).get();
for (auto &entry: output)
if (entry.value_or(FOLLY_ERROR) != OK) {
LOG(ERROR) << "Failed to get data from write cache, key: " << key
<< ", start: " << start
<< ", size: " << size
<< ", buf: " << (void *) buffer.data << " " << buffer.len
<< ", error code: " << entry.hasValue() << " " << entry.value_or(FOLLY_ERROR);
return entry.value_or(FOLLY_ERROR);
}
aggregated_size = 0;
future_list.clear();
}
}
return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue(
[=](std::vector <folly::Try<int>> output) -> int {
for (auto &entry: output)
if (entry.value_or(FOLLY_ERROR) != OK) {
LOG(ERROR) << "Failed to get data from write cache, key: " << key
<< ", start: " << start
<< ", size: " << size
<< ", buf: " << (void *) buffer.data << " " << buffer.len
<< ", error code: " << entry.hasValue() << " " << entry.value_or(FOLLY_ERROR);
return entry.value_or(FOLLY_ERROR);
}
return OK;
});
// return parent_->GetRpcClient()->GetEntryFromWriteCache(replica[primary_index], internal_keys[primary_index], start, size).thenValue(
// [this, &buffer, start, size, replica, internal_keys, primary_index](GetOutput &&output) -> int {
// if (output.status == OK) {
// output.buf.copy_to(buffer.data, size);
// }
// if (output.status == RPC_FAILED) {
// for (int index = 0; index < replica.size(); ++index) {
// if (index == primary_index) {
// continue;
// }
// auto res = parent_->GetRpcClient()->GetEntryFromWriteCache(replica[index], internal_keys[index], start, size).get();
// if (res.status == OK) {
// output.buf.copy_to(buffer.data, size);
// }
// if (res.status != RPC_FAILED) {
// return res.status;
// }
// }
// LOG(ERROR) << "All target replicas are crashed";
// return RPC_FAILED;
// }
// return output.status;
// }
// );
}
folly::Future<int> ReplicationWriteCacheClient::GetChunkAsync(int server_id, GetChunkRequestV2 request, std::string &internal_key) {
LOG_IF(INFO, FLAGS_verbose) << "GetChunkAsync server_id=" << server_id
<< ", internal_key=" << internal_key
<< ", chunk_id=" << request.chunk_id
<< ", chunk_start=" << request.chunk_start
<< ", chunk_len=" << request.chunk_len
<< ", buffer=" << (void *) request.buffer.data;
auto f = parent_->GetRpcClient()->GetEntryFromWriteCache(server_id, internal_key, request.chunk_start, request.chunk_len)
.then([this, server_id, request](folly::Try<GetOutput> &&output) -> folly::Future<int> {
if (!output.hasValue()) {
return folly::makeFuture(FOLLY_ERROR);
}
auto &value = output.value();
if (value.status == OK) {
value.buf.copy_to(request.buffer.data, request.buffer.len);
return folly::makeFuture(OK);
} else {
return folly::makeFuture(value.status);
}
}).via(parent_->executor_.get());
return f;
// memset(request.buffer.data, 'x', request.buffer.len);
// return folly::makeFuture(OK);
}
std::vector<int> ReplicationWriteCacheClient::GetReplica(const std::string &key) {
const int num_available = parent_->server_list_.size();
auto &policy = parent_->GetCachePolicy(key);
const int num_choose = policy.write_replication_factor;
uint64_t seed = std::hash < std::string > {}(key);
std::vector<int> output;
for (int i = 0; i < num_choose; ++i)
output.push_back((seed + i) % num_available);
return output;
}
void ReplicationWriteCacheClient::GenerateGetChunkRequestsV2(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
std::vector<GetChunkRequestV2> &requests,
size_t chunk_size) {
const size_t end = start + size;
const size_t begin_chunk_id = start / chunk_size;
const size_t end_chunk_id = (end + chunk_size - 1) / chunk_size;
if (buffer.len < size) {
LOG(WARNING) << "Buffer capacity may be not enough, expect " << size << ", actual " << buffer.len;
}
size_t buffer_offset = 0;
for (size_t chunk_id = begin_chunk_id; chunk_id < end_chunk_id; ++chunk_id) {
size_t chunk_start = std::max(chunk_id * chunk_size, start);
size_t chunk_stop = std::min((chunk_id + 1) * chunk_size, end);
if (chunk_stop <= chunk_start)
return;
GetChunkRequestV2 item;
item.user_key = key;
item.chunk_id = chunk_id;
item.chunk_start = chunk_start % chunk_size;
item.chunk_len = chunk_stop - chunk_start;
item.chunk_granularity = chunk_size;
item.buffer.data = buffer.data + buffer_offset;
item.buffer.len = item.chunk_len;
buffer_offset += item.chunk_len;
requests.emplace_back(item);
}
LOG_ASSERT(buffer_offset == size);
}

View File

@ -0,0 +1,57 @@
#ifndef MADFS_REPLICATION_WRITE_CACHE_CLIENT_H
#define MADFS_REPLICATION_WRITE_CACHE_CLIENT_H
#include "WriteCacheClient.h"
using HybridCache::ByteBuffer;
class GlobalDataAdaptor;
using PutResult = WriteCacheClient::PutResult;
class ReplicationWriteCacheClient : public WriteCacheClient {
friend class GetChunkContext;
public:
ReplicationWriteCacheClient(GlobalDataAdaptor *parent) : parent_(parent) {}
~ReplicationWriteCacheClient() {}
virtual folly::Future<PutResult> Put(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers,
size_t off = 0);
virtual folly::Future<int> Get(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
Json::Value &root);
public:
std::vector<int> GetReplica(const std::string &key);
struct GetChunkRequestV2 {
std::string user_key;
size_t chunk_id;
size_t chunk_start;
size_t chunk_len;
size_t chunk_granularity;
ByteBuffer buffer;
};
static void GenerateGetChunkRequestsV2(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
std::vector<GetChunkRequestV2> &requests,
size_t chunk_size);
folly::Future<int> GetChunkAsync(int server_id, GetChunkRequestV2 context, std::string &internal_key);
private:
GlobalDataAdaptor *parent_;
};
#endif // MADFS_REPLICATION_WRITE_CACHE_CLIENT_H

View File

@ -0,0 +1,188 @@
#include "S3DataAdaptor.h"
#include <aws/s3/model/PutObjectRequest.h>
#include <aws/s3/model/HeadObjectRequest.h>
#include <aws/s3/model/GetObjectRequest.h>
#include <aws/s3/model/DeleteObjectRequest.h>
#include <aws/core/utils/memory/stl/AWSString.h>
#include <aws/core/utils/stream/PreallocatedStreamBuf.h>
#define STRINGIFY_HELPER(val) #val
#define STRINGIFY(val) STRINGIFY_HELPER(val)
#define AWS_ALLOCATE_TAG __FILE__ ":" STRINGIFY(__LINE__)
std::once_flag S3INIT_FLAG;
std::once_flag S3SHUTDOWN_FLAG;
Aws::SDKOptions AWS_SDK_OPTIONS;
// https://github.com/aws/aws-sdk-cpp/issues/1430
class PreallocatedIOStream : public Aws::IOStream {
public:
PreallocatedIOStream(char *buf, size_t size)
: Aws::IOStream(new Aws::Utils::Stream::PreallocatedStreamBuf(
reinterpret_cast<unsigned char *>(buf), size)) {}
PreallocatedIOStream(const char *buf, size_t size)
: PreallocatedIOStream(const_cast<char *>(buf), size) {}
~PreallocatedIOStream() {
// corresponding new in constructor
delete rdbuf();
}
};
Aws::String GetObjectRequestRange(uint64_t offset, uint64_t len) {
auto range =
"bytes=" + std::to_string(offset) + "-" + std::to_string(offset + len);
return {range.data(), range.size()};
}
S3DataAdaptor::S3DataAdaptor() {
auto initSDK = [&]() {
Aws::InitAPI(AWS_SDK_OPTIONS);
};
std::call_once(S3INIT_FLAG, initSDK);
auto &s3_config = GetGlobalConfig().s3_config;
setenv("AWS_EC2_METADATA_DISABLED", "true", 1);
clientCfg_ = Aws::New<Aws::Client::ClientConfiguration>(AWS_ALLOCATE_TAG, true);
clientCfg_->scheme = Aws::Http::Scheme::HTTP;
clientCfg_->verifySSL = false;
clientCfg_->maxConnections = 10;
clientCfg_->endpointOverride = s3_config.address;
clientCfg_->executor = Aws::MakeShared<Aws::Utils::Threading::PooledThreadExecutor>("S3Adapter.S3Client", s3_config.bg_threads);
s3Client_ = Aws::New<Aws::S3::S3Client>(AWS_ALLOCATE_TAG,
Aws::Auth::AWSCredentials(s3_config.access_key, s3_config.secret_access_key),
*clientCfg_,
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
false);
}
S3DataAdaptor::~S3DataAdaptor() {
if (clientCfg_ != nullptr) {
Aws::Delete<Aws::Client::ClientConfiguration>(clientCfg_);
clientCfg_ = nullptr;
}
if (s3Client_ != nullptr) {
Aws::Delete<Aws::S3::S3Client>(s3Client_);
s3Client_ = nullptr;
}
auto shutdownSDK = [&]() {
Aws::ShutdownAPI(AWS_SDK_OPTIONS);
};
std::call_once(S3SHUTDOWN_FLAG, shutdownSDK);
}
folly::Future<int> S3DataAdaptor::DownLoad(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer) {
Aws::S3::Model::GetObjectRequest request;
request.SetBucket(GetGlobalConfig().s3_config.bucket);
request.SetKey(Aws::String{key.c_str(), key.size()});
request.SetRange(GetObjectRequestRange(start, size));
request.SetResponseStreamFactory(
[&buffer]() { return Aws::New<PreallocatedIOStream>(AWS_ALLOCATE_TAG, buffer.data, buffer.len); });
auto promise = std::make_shared < folly::Promise < int >> ();
Aws::S3::GetObjectResponseReceivedHandler handler =
[&buffer, size, promise](
const Aws::S3::S3Client */*client*/,
const Aws::S3::Model::GetObjectRequest &/*request*/,
const Aws::S3::Model::GetObjectOutcome &response,
const std::shared_ptr<const Aws::Client::AsyncCallerContext> &awsCtx) {
if (response.IsSuccess()) {
promise->setValue(OK);
} else if (response.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) {
promise->setValue(NOT_FOUND);
} else {
LOG(ERROR) << "GetObjectAsync error: "
<< response.GetError().GetExceptionName()
<< "message: " << response.GetError().GetMessage();
promise->setValue(S3_INTERNAL_ERROR);
}
};
s3Client_->GetObjectAsync(request, handler, nullptr);
return promise->getFuture();
}
folly::Future<int> S3DataAdaptor::UpLoad(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers) {
Aws::S3::Model::PutObjectRequest request;
request.SetBucket(GetGlobalConfig().s3_config.bucket);
request.SetKey(key);
request.SetMetadata(headers);
request.SetBody(Aws::MakeShared<PreallocatedIOStream>(AWS_ALLOCATE_TAG, buffer.data, buffer.len));
auto promise = std::make_shared < folly::Promise < int >> ();
Aws::S3::PutObjectResponseReceivedHandler handler =
[promise](
const Aws::S3::S3Client */*client*/,
const Aws::S3::Model::PutObjectRequest &/*request*/,
const Aws::S3::Model::PutObjectOutcome &response,
const std::shared_ptr<const Aws::Client::AsyncCallerContext> &awsCtx) {
LOG_IF(ERROR, !response.IsSuccess())
<< "PutObjectAsync error: "
<< response.GetError().GetExceptionName()
<< "message: " << response.GetError().GetMessage();
promise->setValue(response.IsSuccess() ? OK : S3_INTERNAL_ERROR);
};
s3Client_->PutObjectAsync(request, handler, nullptr);
return promise->getFuture();
}
folly::Future<int> S3DataAdaptor::Delete(const std::string &key) {
Aws::S3::Model::DeleteObjectRequest request;
request.SetBucket(GetGlobalConfig().s3_config.bucket);
request.SetKey(key);
auto promise = std::make_shared < folly::Promise < int >> ();
Aws::S3::DeleteObjectResponseReceivedHandler handler =
[promise](
const Aws::S3::S3Client */*client*/,
const Aws::S3::Model::DeleteObjectRequest &/*request*/,
const Aws::S3::Model::DeleteObjectOutcome &response,
const std::shared_ptr<const Aws::Client::AsyncCallerContext> &awsCtx) {
if (response.IsSuccess()) {
promise->setValue(OK);
} else if (response.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) {
promise->setValue(NOT_FOUND);
} else {
LOG(ERROR) << "DeleteObjectAsync error: "
<< response.GetError().GetExceptionName()
<< "message: " << response.GetError().GetMessage();
promise->setValue(S3_INTERNAL_ERROR);
}
};
s3Client_->DeleteObjectAsync(request, handler, nullptr);
return promise->getFuture();
}
folly::Future<int> S3DataAdaptor::Head(const std::string &key,
size_t &size,
std::map <std::string, std::string> &headers) {
Aws::S3::Model::HeadObjectRequest request;
request.SetBucket(GetGlobalConfig().s3_config.bucket);
request.SetKey(key);
auto promise = std::make_shared < folly::Promise < int >> ();
Aws::S3::HeadObjectResponseReceivedHandler handler =
[promise, &size, &headers](
const Aws::S3::S3Client */*client*/,
const Aws::S3::Model::HeadObjectRequest &/*request*/,
const Aws::S3::Model::HeadObjectOutcome &response,
const std::shared_ptr<const Aws::Client::AsyncCallerContext> &awsCtx) {
if (response.IsSuccess()) {
headers = response.GetResult().GetMetadata();
size = response.GetResult().GetContentLength();
promise->setValue(OK);
} else if (response.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) {
promise->setValue(NOT_FOUND);
} else {
LOG(ERROR) << "HeadObjectAsync error: "
<< response.GetError().GetExceptionName()
<< "message: " << response.GetError().GetMessage();
promise->setValue(S3_INTERNAL_ERROR);
}
};
s3Client_->HeadObjectAsync(request, handler, nullptr);
return promise->getFuture();
}

View File

@ -0,0 +1,47 @@
#ifndef MADFS_S3_DATA_ADAPTOR_H
#define MADFS_S3_DATA_ADAPTOR_H
#include <aws/core/Aws.h>
#include <aws/s3/S3Client.h>
#include <aws/core/client/ClientConfiguration.h>
#include <aws/core/auth/AWSCredentialsProvider.h>
#include "data_adaptor.h"
#include "Common.h"
using HybridCache::ByteBuffer;
using HybridCache::DataAdaptor;
class S3DataAdaptor : public DataAdaptor {
public:
S3DataAdaptor();
~S3DataAdaptor();
// 从数据服务器加载数据
virtual folly::Future<int> DownLoad(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer);
// 上传数据到数据服务器
virtual folly::Future<int> UpLoad(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers);
// 删除数据服务器的数据
virtual folly::Future<int> Delete(const std::string &key);
// 获取数据的元数据
virtual folly::Future<int> Head(const std::string &key,
size_t &size,
std::map <std::string, std::string> &headers);
private:
Aws::Client::ClientConfiguration *clientCfg_;
Aws::S3::S3Client *s3Client_;
};
#endif // MADFS_S3_DATA_ADAPTOR_H

404
global_cache/WriteCache.cpp Normal file
View File

@ -0,0 +1,404 @@
#include <unistd.h>
#include "WriteCache.h"
#include "FileSystemDataAdaptor.h"
#include <dirent.h>
#include "write_cache.h"
//#define BRPC_WITH_RDMA 1
//#include <brpc/rdma/block_pool.h>
class WriteCache4RocksDB : public WriteCacheImpl {
public:
explicit WriteCache4RocksDB(std::shared_ptr<folly::CPUThreadPoolExecutor> executor);
~WriteCache4RocksDB();
virtual GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length);
virtual PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf);
virtual int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set<std::string> &except_keys);
private:
std::string rocksdb_path_;
rocksdb::DB *db_;
};
WriteCache4RocksDB::WriteCache4RocksDB(std::shared_ptr<folly::CPUThreadPoolExecutor> executor)
: WriteCacheImpl(executor) {
rocksdb::Options options;
options.create_if_missing = true;
rocksdb_path_ = PathJoin(GetGlobalConfig().write_cache_dir, ".write_cache.db");
if (CreateParentDirectories(rocksdb_path_)) {
LOG(WARNING) << "Failed to create directory: " << rocksdb_path_;
abort();
}
auto status = rocksdb::DB::Open(options, rocksdb_path_, &db_);
if (!status.ok()) {
LOG(WARNING) << "Failed to open RocksDB: " << status.ToString();
abort();
}
}
WriteCache4RocksDB::~WriteCache4RocksDB() {
if (db_) {
db_->Close();
}
}
GetOutput WriteCache4RocksDB::Get(const std::string &internal_key, uint64_t start, uint64_t length) {
rocksdb::ReadOptions options;
std::string value;
auto status = db_->Get(options, internal_key, &value);
GetOutput output;
if (status.IsNotFound()) {
output.status = CACHE_ENTRY_NOT_FOUND;
return output;
} else if (!status.ok()) {
LOG(WARNING) << "Failed to get key " << internal_key << " from RocksDB: " << status.ToString();
output.status = IO_ERROR;
return output;
}
if (length == 0 || start + length > value.size()) {
output.status = INVALID_ARGUMENT;
return output;
}
output.status = OK;
output.buf.append(&value[start], length);
LOG_IF(INFO, FLAGS_verbose) << "GetWriteCache internal_key: " << internal_key << ", size: " << length;
return output;
}
PutOutput WriteCache4RocksDB::Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) {
auto oid = next_object_id_.fetch_add(1);
auto internal_key = key + "-" + std::to_string(oid);
rocksdb::WriteOptions options;
std::string value = buf.to_string();
auto status = db_->Put(options, internal_key, value);
if (!status.ok()) {
LOG(WARNING) << "Failed to put key " << internal_key << " from RocksDB: " << status.ToString();
return {IO_ERROR, "<undefined>"};
}
LOG_IF(INFO, FLAGS_verbose) << "PutWriteCache key: " << key << ", internal_key: " << internal_key << ", size: " << length;
return {OK, internal_key};
}
static bool HasPrefix(const std::string &key, const std::string &key_prefix) {
return key.substr(0, key_prefix.size()) == key_prefix;
}
static uint64_t ParseTS(const std::string &key) {
size_t pos = key.rfind('-');
if (pos != std::string::npos) {
std::string lastSubStr = key.substr(pos + 1);
uint64_t number;
std::istringstream(lastSubStr) >> number;
if (!std::cin.fail()) {
return number;
} else {
return UINT64_MAX;
}
} else {
return UINT64_MAX;
}
}
// Delete all entries that: match the prefix, < ts, and not in except_keys
int WriteCache4RocksDB::Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set<std::string> &except_keys) {
LOG(INFO) << "Request key_prefix = " << key_prefix << ", ts = " << ts;
rocksdb::ReadOptions read_options;
rocksdb::WriteOptions write_options;
auto iter = db_->NewIterator(read_options);
iter->Seek(key_prefix);
for (; iter->Valid(); iter->Next()) {
std::string key = iter->key().ToString();
LOG(INFO) << "Processing key " << key;
if (!HasPrefix(key, key_prefix)) {
break;
}
if (ParseTS(key) >= ts || except_keys.count(key)) {
continue;
}
auto status = db_->Delete(write_options, key);
if (!status.ok() && !status.IsNotFound()) {
LOG(WARNING) << "Failed to delete key " << key << " from RocksDB: " << status.ToString();
iter->Reset();
return IO_ERROR;
}
LOG(INFO) << "Deleted key " << key;
}
iter->Reset();
return OK;
}
// ----------------------------------------------------------------------------
class WriteCache4Disk : public WriteCacheImpl {
public:
explicit WriteCache4Disk(std::shared_ptr<folly::CPUThreadPoolExecutor> executor);
~WriteCache4Disk();
virtual GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length);
virtual PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf);
virtual int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set<std::string> &except_keys);
private:
std::shared_ptr<DataAdaptor> cache_fs_adaptor_;
};
WriteCache4Disk::WriteCache4Disk(std::shared_ptr<folly::CPUThreadPoolExecutor> executor)
: WriteCacheImpl(executor) {
cache_fs_adaptor_ = std::make_shared<FileSystemDataAdaptor>(GetGlobalConfig().write_cache_dir, nullptr, false, nullptr, false);
}
WriteCache4Disk::~WriteCache4Disk() {}
GetOutput WriteCache4Disk::Get(const std::string &internal_key, uint64_t start, uint64_t length) {
butil::Timer t;
t.start();
#ifndef BRPC_WITH_RDMA
auto wrap = HybridCache::ByteBuffer(new char[length], length);
#else
auto wrap = HybridCache::ByteBuffer((char *) brpc::rdma::AllocBlock(length), length);
#endif
int res = cache_fs_adaptor_->DownLoad(internal_key, start, length, wrap).get();
GetOutput output;
output.status = res;
#ifndef BRPC_WITH_RDMA
if (res == OK) {
output.buf.append(wrap.data, wrap.len);
}
delete []wrap.data;
#else
if (res == OK) {
output.buf.append_user_data(wrap.data, wrap.len, brpc::rdma::DeallocBlock);
} else {
brpc::rdma::DeallocBlock(wrap.data);
}
#endif
t.stop();
LOG_IF(INFO, FLAGS_verbose) << "Get key: " << internal_key
<< ", start: " << start
<< ", length: " << length
<< ", status: " << res
<< ", latency: " << t.u_elapsed();
return output;
}
uint64_t ReportAvailableDiskSpace(std::string &path) {
struct statvfs stat;
if (statvfs(path.c_str(), &stat)) {
PLOG(ERROR) << "Failed to statvfs";
return 0;
}
return stat.f_bavail * stat.f_bsize;
}
const static size_t kMinDiskFreeSpace = 1024 * 1024 * 512;
PutOutput WriteCache4Disk::Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) {
butil::Timer t;
t.start();
auto oid = next_object_id_.fetch_add(1);
auto internal_key = key + "-" + std::to_string(oid);
if (ReportAvailableDiskSpace(GetGlobalConfig().write_cache_dir) < std::max(length, kMinDiskFreeSpace)) {
// LOG(WARNING) << "No enough space to persist data, please perform one GC immediately";
return {NO_ENOUGH_DISKSPACE, "<undefined>"};
}
t.stop();
// LOG_IF(INFO, FLAGS_verbose) << "duration: " << t.u_elapsed();
auto data_len = buf.length();
thread_local void *aux_buffer = nullptr;
if (!aux_buffer)
posix_memalign(&aux_buffer, 4096, GetGlobalConfig().write_chunk_size);
auto data = buf.fetch(aux_buffer, data_len);
auto wrap = HybridCache::ByteBuffer((char *) data, data_len);
std::map<std::string, std::string> headers;
t.stop();
// LOG_IF(INFO, FLAGS_verbose) << "duration: " << t.u_elapsed();
int res = cache_fs_adaptor_->UpLoad(internal_key, length, wrap, headers).get();
// free(aux_buffer);
if (res) {
LOG(WARNING) << "Failed to put key " << internal_key << " to disk";
return {IO_ERROR, "<undefined>"};
}
t.stop();
LOG_IF(INFO, FLAGS_verbose) << "PutWriteCache key: " << key << ", internal_key: " << internal_key << ", size: " << length << ", duration: " << t.u_elapsed();
return {OK, internal_key};
}
void listFilesRecursively(const std::string &directoryPath,
std::vector<std::string> &to_remove,
const std::string &key_prefix,
uint64_t ts,
const std::unordered_set<std::string> &except_keys) {
DIR* dir = opendir(directoryPath.c_str());
if (dir == nullptr) {
std::cerr << "Error opening directory: " << directoryPath << std::endl;
return;
}
struct dirent* entry;
while ((entry = readdir(dir)) != nullptr) {
// Skip "." and ".." entries
if (std::string(entry->d_name) == "." || std::string(entry->d_name) == "..") {
continue;
}
std::string fullPath = PathJoin(directoryPath, entry->d_name);
std::string rootPath = GetGlobalConfig().write_cache_dir;
struct stat statbuf;
if (stat(fullPath.c_str(), &statbuf) == 0) {
if (S_ISDIR(statbuf.st_mode)) {
// It's a directory, recurse into it
listFilesRecursively(fullPath, to_remove, key_prefix, ts, except_keys);
} else if (S_ISREG(statbuf.st_mode)) {
std::string key = fullPath.substr(rootPath.length());
if (!key.empty() && key[0] == '/') {
key = key.substr(1);
}
if (!HasPrefix(key, key_prefix) || except_keys.count(key) || ParseTS(key) >= ts) {
continue;
}
to_remove.push_back(fullPath);
// LOG(INFO) << "Deleted key " << key << ", location " << fullPath;
}
}
}
closedir(dir);
}
// Delete all entries that: match the prefix, < ts, and not in except_keys
int WriteCache4Disk::Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set<std::string> &except_keys) {
LOG(INFO) << "Request key_prefix = " << key_prefix << ", ts = " << ts;
std::vector<std::string> to_remove;
listFilesRecursively(GetGlobalConfig().write_cache_dir,
to_remove,
key_prefix,
ts,
except_keys);
for (auto &entry : to_remove) {
if (remove(entry.c_str())) {
LOG(WARNING) << "Failed to remove file: " << entry;
return IO_ERROR;
}
}
return OK;
}
class WriteCache4Fake : public WriteCacheImpl {
public:
explicit WriteCache4Fake(std::shared_ptr<folly::CPUThreadPoolExecutor> executor) : WriteCacheImpl(executor) {}
virtual ~WriteCache4Fake() {}
virtual GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length) {
LOG_IF(INFO, FLAGS_verbose) << "Get internal_key " << internal_key << " start " << start << " length " << length;
GetOutput ret;
ret.status = OK;
ret.buf.resize(length, 'x');
return ret;
}
virtual PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) {
LOG_IF(INFO, FLAGS_verbose) << "Put key " << key << " length " << length;
PutOutput ret;
ret.status = OK;
ret.internal_key = key;
return ret;
}
virtual int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set<std::string> &except_keys) {
return OK;
}
};
class WriteCache4Cachelib : public WriteCacheImpl {
public:
explicit WriteCache4Cachelib(std::shared_ptr<folly::CPUThreadPoolExecutor> executor) : WriteCacheImpl(executor) {
HybridCache::EnableLogging = false;
impl_ = std::make_shared<HybridCache::WriteCache>(GetGlobalConfig().write_cache);
}
virtual ~WriteCache4Cachelib() {}
virtual GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length) {
butil::Timer t;
t.start();
std::vector<std::pair<size_t, size_t>> dataBoundary;
#ifndef BRPC_WITH_RDMA
auto wrap = HybridCache::ByteBuffer(new char[length], length);
#else
auto wrap = HybridCache::ByteBuffer((char *) brpc::rdma::AllocBlock(length), length);
#endif
int res = impl_->Get(internal_key, start, length, wrap, dataBoundary);
GetOutput output;
output.status = res;
#ifndef BRPC_WITH_RDMA
if (res == OK) {
output.buf.append(wrap.data, wrap.len);
}
delete []wrap.data;
#else
if (res == OK) {
output.buf.append_user_data(wrap.data, wrap.len, brpc::rdma::DeallocBlock);
} else {
brpc::rdma::DeallocBlock(wrap.data);
}
#endif
t.stop();
LOG_IF(INFO, FLAGS_verbose) << "Get key: " << internal_key
<< ", start: " << start
<< ", length: " << length
<< ", status: " << res
<< ", latency: " << t.u_elapsed();
return output;
}
virtual PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) {
LOG_IF(INFO, FLAGS_verbose) << "Put key " << key << " length " << length;
PutOutput ret;
ret.status = OK;
ret.internal_key = key;
return ret;
}
virtual int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set<std::string> &except_keys) {
return OK;
}
private:
std::shared_ptr<HybridCache::WriteCache> impl_;
};
DEFINE_string(write_cache_engine, "disk", "Write cache engine: rocksdb | disk");
WriteCache::WriteCache(std::shared_ptr<folly::CPUThreadPoolExecutor> executor) {
if (FLAGS_write_cache_engine == "rocksdb")
impl_ = new WriteCache4RocksDB(executor);
else if (FLAGS_write_cache_engine == "disk")
impl_ = new WriteCache4Disk(executor);
else if (FLAGS_write_cache_engine == "fake")
impl_ = new WriteCache4Fake(executor);
else {
LOG(WARNING) << "unsupported write cache engine";
exit(EXIT_FAILURE);
}
}

53
global_cache/WriteCache.h Normal file
View File

@ -0,0 +1,53 @@
#ifndef MADFS_WRITE_CACHE_H
#define MADFS_WRITE_CACHE_H
#include <map>
#include <string>
#include <atomic>
#include <folly/executors/CPUThreadPoolExecutor.h>
#include <folly/synchronization/RWSpinLock.h>
#include <butil/iobuf.h>
#include <rocksdb/db.h>
#include "Common.h"
class WriteCacheImpl {
public:
WriteCacheImpl(std::shared_ptr<folly::CPUThreadPoolExecutor> executor) : executor_(executor), next_object_id_(0) {}
virtual GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length) = 0;
virtual PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) = 0;
virtual uint64_t QueryTS() { return next_object_id_.load(); }
virtual int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set<std::string> &except_keys) = 0;
std::shared_ptr<folly::CPUThreadPoolExecutor> executor_;
std::atomic<uint64_t> next_object_id_;
};
class WriteCache {
public:
explicit WriteCache(std::shared_ptr<folly::CPUThreadPoolExecutor> executor);
~WriteCache() {
delete impl_;
}
GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length) {
return impl_->Get(internal_key, start, length);
}
PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) {
return impl_->Put(key, length, buf);
}
uint64_t QueryTS() { return impl_->QueryTS(); }
int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set<std::string> &except_keys) {
return impl_->Delete(key_prefix, ts, except_keys);
}
private:
WriteCacheImpl *impl_;
};
#endif // MADFS_WRITE_CACHE_H

View File

@ -0,0 +1,42 @@
#ifndef MADFS_WRITE_CACHE_CLIENT_H
#define MADFS_WRITE_CACHE_CLIENT_H
#include <folly/futures/Future.h>
#include <folly/futures/Promise.h>
#include <folly/executors/CPUThreadPoolExecutor.h>
#include "Common.h"
#include "Placement.h"
#include "data_adaptor.h"
#include "EtcdClient.h"
using HybridCache::ByteBuffer;
class GlobalDataAdaptor;
class WriteCacheClient {
public:
struct PutResult {
int status;
Json::Value root;
};
public:
WriteCacheClient() {}
~WriteCacheClient() {}
virtual folly::Future<PutResult> Put(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map <std::string, std::string> &headers,
size_t off = 0) = 0;
virtual folly::Future<int> Get(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer,
Json::Value &root) = 0;
};
#endif // MADFS_WRITE_CACHE_CLIENT_H

72
global_cache/gcache.proto Normal file
View File

@ -0,0 +1,72 @@
syntax="proto2";
package gcache;
option cc_generic_services = true;
message GetEntryRequest {
required string key = 1;
required uint64 start = 2;
required uint64 length = 3;
};
message GetEntryResponse {
required int32 status_code = 1;
optional bytes data = 2;
};
message PutEntryRequest {
required string key = 1;
required uint64 length = 2;
optional bytes data = 3;
};
message PutEntryResponse {
required int32 status_code = 1;
optional string internal_key = 2; // for write cache
};
message DeleteEntryRequest {
required string key = 1; // actually 'prefix'
optional uint64 chunk_size = 2;
optional uint64 max_chunk_id = 3;
};
message DeleteEntryRequestForWriteCache {
required string key_prefix = 1;
required uint64 max_ts = 2;
repeated string except_keys = 3;
};
message DeleteEntryResponse {
required int32 status_code = 1;
};
message RegisterRequest {
// nothing
};
message QueryTsRequest {
// nothing
};
message QueryTsResponse {
required int32 status_code = 1;
required uint64 timestamp = 2;
};
message RegisterResponse {
required int32 status_code = 1;
};
service GlobalCacheService {
rpc GetEntryFromReadCache(GetEntryRequest) returns (GetEntryResponse);
rpc PutEntryFromReadCache(PutEntryRequest) returns (PutEntryResponse);
rpc DeleteEntryFromReadCache(DeleteEntryRequest) returns (DeleteEntryResponse);
rpc GetEntryFromWriteCache(GetEntryRequest) returns (GetEntryResponse);
rpc PutEntryFromWriteCache(PutEntryRequest) returns (PutEntryResponse);
rpc DeleteEntryFromWriteCache(DeleteEntryRequestForWriteCache) returns (DeleteEntryResponse);
rpc QueryTsFromWriteCache(QueryTsRequest) returns (QueryTsResponse);
rpc Register(RegisterRequest) returns (RegisterResponse);
};

16
install.sh Executable file
View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
if [ ! -d "./JYCache_Env" ]; then
wget https://madstorage.s3.cn-north-1.jdcloud-oss.com/JYCache_Env_x64.tgz
md5=`md5sum JYCache_Env_x64.tgz | awk {'print $1'}`
if [ "$md5" != "cd27e0db8b1fc33b88bf1c467ed012b8" ]; then
  echo 'JYCache_Env version inconsistency!'
exit 1
fi
tar -zxvf JYCache_Env_x64.tgz
fi
cp ./build/intercept/intercept_server JYCache_Env/
cp ./build/intercept/libintercept_client.so JYCache_Env/
cp ./build/global_cache/madfs_gc JYCache_Env/
cp ./build/global_cache/madfs_global_server JYCache_Env/
cp ./build/bin/s3fs JYCache_Env/

34
intercept/CMakeLists.txt Normal file
View File

@ -0,0 +1,34 @@
link_libraries(-lrt)
find_library(ICEORYX_POSH_LIB iceoryx_posh PATHS ../thirdparties/iceoryx/lib)
find_library(ICEORYX_HOOFS_LIB iceoryx_hoofs PATHS ../thirdparties/iceoryx/lib)
find_library(ICEORYX_PLATFORM_LIB iceoryx_platform PATHS ../thirdparties/iceoryx/lib)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../thirdparties/spdlog/include)
add_subdirectory(common)
add_subdirectory(internal)
add_subdirectory(discovery)
add_subdirectory(filesystem)
add_subdirectory(registry)
add_subdirectory(middleware)
add_subdirectory(posix)
add_executable(intercept_server server.cpp)
target_link_libraries(intercept_server PUBLIC intercept_discovery intercept_internal common_lib intercept_filesystem intercept_middleware intercept_registry hybridcache_local madfs_global s3fs_lib ${THIRD_PARTY_LIBRARIES} ${ICEORYX_POSH_LIB} ${ICEORYX_HOOFS_LIB} ${ICEORYX_PLATFORM_LIB} -pthread -lcurl -lxml2 -lcrypto -ldl -laio -lrt -lacl)
add_library(intercept_client SHARED client.cpp)
target_link_libraries(intercept_client PUBLIC
intercept_posix_interface_client
-lsyscall_intercept
-pthread
${ICEORYX_POSH_LIB}
${ICEORYX_HOOFS_LIB}
${ICEORYX_PLATFORM_LIB}
-lrt
-L${CMAKE_CURRENT_SOURCE_DIR}/../thirdparties/intercept/lib
)
target_compile_options(intercept_client PUBLIC -DCLIENT_BUILD -mavx2)

138
intercept/client.cpp Normal file
View File

@ -0,0 +1,138 @@
// int main2(int argc, char *argv[]) {
// InitSyscall();
// GlobalInit();
// long args[6];
// const char* pathname = "/curvefs/test_mount/testfile";
// args[0] = (long)(pathname);
// args[1] = O_CREAT | O_WRONLY | O_TRUNC;
// args[2] = S_IRUSR | S_IWUSR;
// long result = 0;
// PosixOpOpen(args, &result);
// PosixOpAccess(args, &result);
// return 0;
// }
#include "registry/client_server_registry.h"
// ! 暂时注释,使用时不能注释
#include "posix/posix_helper.h"
using intercept::middleware::ReqResMiddlewareWrapper;
int main() {
constexpr char APP_NAME[] = "iox-intercept-client";
iox::runtime::PoshRuntime::initRuntime(APP_NAME);
intercept::internal::ServiceMetaInfo info;
info.service = SERVICE_FLAG;
info.instance = INTERCEPT_INSTANCE_FLAG;
intercept::registry::ClientServerRegistry registry(ICEORYX, info);
auto dummyserver = registry.CreateDummyServer();
sleep(2);
info = dummyserver->GetServiceMetaInfo();
info.service = SERVICE_FLAG;
info.instance = INTERCEPT_INSTANCE_FLAG;
std::shared_ptr<ReqResMiddlewareWrapper> wrapper = registry.CreateClient(info);
intercept::internal::OpenOpReqRes req("/testdir/hellofile1", O_CREAT|O_RDWR, S_IRUSR | S_IWUSR);
wrapper->OnRequest(req);
const auto& openRes = static_cast<intercept::internal::OpenResponseData&> (req.GetResponse());
char* writebuf = (char*)malloc(sizeof(char) * 1024);
char str[] = "hello world";
memcpy(writebuf, str, sizeof(str));
intercept::internal::WriteOpReqRes writeReq(openRes.fd, writebuf, strlen(writebuf));
wrapper->OnRequest(writeReq);
// open and read
intercept::internal::OpenOpReqRes req2("/testdir/hellofile1", O_RDWR, S_IRUSR | S_IWUSR);
wrapper->OnRequest(req2);
const auto& openRes2 = static_cast<intercept::internal::OpenResponseData&> (req2.GetResponse());
char* buf2 = (char*)malloc(sizeof(char) * 1024);
intercept::internal::ReadOpReqRes readReq2(openRes2.fd, buf2, 8);
wrapper->OnRequest(readReq2);
free((void*)buf2);
dummyserver->StopServer();
std::cout << "stop dummyserver in main" << std::endl;
//sleep(5);
return 0;
}
int mainposix() {
char filename[256];
// 循环执行流程
while (true) {
std::cout << "Enter filename (or 'exit' to quit): ";
std::cin >> filename;
if (strcmp(filename, "exit") == 0) {
std::cout << "Exiting program..." << std::endl;
break;
}
std::cout << "Enter 'write' to write to file, 'read' to read from file: ";
std::string operation;
std::cin >> operation;
if (operation == "write") {
// 打开文件进行写入
int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
if (fd == -1) {
std::cerr << "Error: Failed to open file for writing." << std::endl;
continue;
}
std::string content;
std::cout << "Enter content to write to file (end with 'EOF'): " << std::endl;
std::cin.ignore(); // 忽略前一个输入的换行符
while (true) {
std::string line;
std::getline(std::cin, line);
if (line == "EOF") {
break;
}
content += line + "\n";
}
ssize_t bytes_written = write(fd, content.c_str(), content.size());
std::cout << "the write byte: " << bytes_written << std::endl;
close(fd);
} else if (operation == "read") {
// 打开文件进行读取
int fd = open(filename, O_RDONLY);
if (fd == -1) {
std::cerr << "Error: Failed to open file for reading." << std::endl;
continue;
}
char buffer[4096];
ssize_t bytesRead;
std::cout << "Content read from file:" << std::endl;
while ((bytesRead = read(fd, buffer, sizeof(buffer))) > 0) {
std::cout.write(buffer, bytesRead);
}
std::cout << std::endl;
// 获取文件的状态信息
struct stat fileStat;
if (fstat(fd, &fileStat) == 0) {
std::cout << "File size: " << fileStat.st_size << " bytes" << std::endl;
std::cout << "File permissions: " << (fileStat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) << std::endl;
} else {
std::cerr << "Error: Failed to get file status." << std::endl;
}
close(fd);
} else {
std::cerr << "Error: Invalid operation. Please enter 'write' or 'read'." << std::endl;
}
}
return 0;
}

View File

@ -0,0 +1,8 @@
file(GLOB COMMON_SOURCES *.cpp)
add_library(common_lib ${COMMON_SOURCES})
target_include_directories(common_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
add_library(common_lib_client ${COMMON_SOURCES})
target_compile_options(common_lib_client PUBLIC -fPIC -DCLIENT_BUILD)
target_include_directories(common_lib_client PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

175
intercept/common/common.cpp Normal file
View File

@ -0,0 +1,175 @@
#include <random>
#include "common.h"
#include "spdlog/sinks/basic_file_sink.h"
#include "spdlog/sinks/stdout_color_sinks.h"
namespace intercept{
namespace common {
void dummy() {
}
// Constructor starts the timer
Timer::Timer() : m_startTimePoint(std::chrono::high_resolution_clock::now()) {}
Timer::Timer(const std::string& message) : m_message(message),
m_startTimePoint(std::chrono::high_resolution_clock::now()) {}
// Destructor prints the elapsed time if the timer hasn't been stopped manually
Timer::~Timer() {
if (!m_stopped) {
Stop();
}
// std::cout << m_message << " Elapsed time: " << m_elapsedTime << " ms" << std::endl;
}
// Method to stop the timer and return the elapsed time in milliseconds
void Timer::Stop() {
if (!m_stopped) {
auto endTimePoint = std::chrono::high_resolution_clock::now();
auto start = std::chrono::time_point_cast<std::chrono::microseconds>(m_startTimePoint).time_since_epoch().count();
auto end = std::chrono::time_point_cast<std::chrono::microseconds>(endTimePoint).time_since_epoch().count();
m_elapsedTime = end - start;
m_stopped = true;
if (m_elapsedTime > 0) {
// std::cout << m_message << ", Elapsed time: " << m_elapsedTime << " us" << std::endl;
spdlog::warn("{}, Elapsed time: {} us ", m_message, m_elapsedTime);
}
}
}
// Method to get the elapsed time in microseconds
long long Timer::ElapsedMicroseconds() const {
return m_elapsedTime;
}
// Method to restart the timer
void Timer::Restart() {
m_startTimePoint = std::chrono::high_resolution_clock::now();
m_stopped = false;
}
ThreadPool::ThreadPool(size_t numThreads) : stop(false) {
for (size_t i = 0; i < numThreads; ++i) {
workers.emplace_back([this] {
for (;;) {
std::function<void()> task;
{
std::unique_lock<std::mutex> lock(this->queueMutex);
this->condition.wait(lock, [this] { return this->stop || !this->tasks.empty(); });
if (this->stop && this->tasks.empty())
return;
task = std::move(this->tasks.front());
this->tasks.pop();
}
task();
}
});
}
}
ThreadPool::~ThreadPool() {
{
std::unique_lock<std::mutex> lock(queueMutex);
stop = true;
}
condition.notify_all();
for (std::thread &worker : workers)
worker.join();
}
void ThreadPool::enqueue(std::function<void()> task) {
{
std::unique_lock<std::mutex> lock(queueMutex);
if (stop)
throw std::runtime_error("enqueue on stopped ThreadPool");
tasks.emplace(task);
}
condition.notify_one();
}
std::string generateRandomSuffix() {
// 使用当前时间作为随机数生成器的种子,以获得更好的随机性
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::default_random_engine generator(seed);
std::uniform_int_distribution<int> distribution(0, 25); // 生成0-25之间的整数对应字母'a'到'z'
std::string suffix;
suffix.reserve(5); // 假设我们想要生成5个随机字符的后缀
for (size_t i = 0; i < 5; ++i) {
suffix += static_cast<char>('a' + distribution(generator));
}
return suffix;
}
std::atomic<bool> running(true);
void UpdateLogLevelPeriodically(int intervalSeconds) {
auto& config = Configure::getInstance();
while (running) {
std::this_thread::sleep_for(std::chrono::seconds(intervalSeconds));
// std::cout << "reload the config: " << CONFIG_FILE << std::endl;
config.loadConfig(CONFIG_FILE); // Assuming this reloads the configuration
std::string loglevel = config.getConfig("loglevel");
if (loglevel == "debug") {
spdlog::set_level(spdlog::level::debug);
} else if (loglevel == "warning") {
spdlog::set_level(spdlog::level::warn);
} else if (loglevel == "info") {
spdlog::set_level(spdlog::level::info);
} else if (loglevel == "error") {
spdlog::set_level(spdlog::level::err);
} else {
std::cerr << "Invalid log level specified in the config file" << std::endl;
}
}
}
void InitLog() {
const auto& config = Configure::getInstance();
std::string pid = std::to_string((long)getpid());
std::string logpath = config.getConfig("logpath") + "." + pid;
std::string loglevel = config.getConfig("loglevel");
try
{
std::shared_ptr<spdlog::logger> logger;
std::string printtype = config.getConfig("logprinttype");
if (printtype == "console") {
logger = spdlog::stdout_color_mt("console");
} else {
logger = spdlog::basic_logger_mt("basic_logger", logpath);
}
spdlog::set_default_logger(logger);
if (loglevel == "debug") {
spdlog::set_level(spdlog::level::debug);
}
else if (loglevel == "warning") {
spdlog::set_level(spdlog::level::warn);
}
else if (loglevel == "info") {
spdlog::set_level(spdlog::level::info);
}
else if (loglevel == "error") {
spdlog::set_level(spdlog::level::err);
}
else {
std::cerr << "Invalid log level specified in the config file" << std::endl;
}
//spdlog::set_pattern("[%H:%M:%S %z] [%n] [%^---%L---%$] [thread %t] %v");
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%n] [%l] [pid %P tid %t] %v");
spdlog::flush_every(std::chrono::seconds(5));
// Start the periodic log level updater thread
std::thread updateThread(UpdateLogLevelPeriodically, 5); // Check every 60 seconds
updateThread.detach(); // Detach the thread so it runs independently
}
catch (const spdlog::spdlog_ex &ex) {
std::cout << "Log init failed: " << ex.what() << std::endl;
}
}
} // namespace common
} // namespace intercept

143
intercept/common/common.h Normal file
View File

@ -0,0 +1,143 @@
#pragma once
#include <stdlib.h>
#include <stdint.h>
#include <chrono>
#include <iostream>
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <map>
#include <vector>
#include <thread>
#include <queue>
#include <mutex>
#include <condition_variable>
#include <functional>
#include "spdlog/spdlog.h"
#include "spdlog/fmt/fmt.h"
namespace intercept {
namespace common {
#ifndef CLIENT_BUILD
const std::string CONFIG_FILE = "conf/server.conf";
#else
const std::string CONFIG_FILE = "conf/client.conf";
#endif
using Ino = uint64_t;
struct DirStream {
Ino ino;
uint64_t fh;
uint64_t offset;
};
class Timer {
public:
// Constructor starts the timer
Timer();
Timer(const std::string& message);
// Destructor prints the elapsed time if the timer hasn't been stopped manually
~Timer();
// Method to stop the timer and return the elapsed time in milliseconds
void Stop();
// Method to get the elapsed time in microseconds
long long ElapsedMicroseconds() const;
// Method to restart the timer
void Restart();
private:
std::chrono::time_point<std::chrono::high_resolution_clock> m_startTimePoint;
long long m_elapsedTime = 0;
bool m_stopped = false;
std::string m_message;
};
class Configure {
public:
// 获取单例实例的静态方法
static Configure& getInstance() {
static Configure instance;
return instance;
}
// 加载配置文件的方法
bool loadConfig(const std::string& filePath) {
std::ifstream file(filePath);
if (!file.is_open()) {
std::cerr << "Failed to open config file: " << filePath << std::endl;
return false;
}
std::string line;
while (std::getline(file, line)) {
// Ignore comments and empty lines
if (line.empty() || line[0] == '#') {
continue;
}
std::istringstream iss(line);
std::string key, value;
// Split line into key and value
if (std::getline(iss, key, '=') && std::getline(iss, value)) {
// Remove whitespace from the key and value
key.erase(key.find_last_not_of(" \t\n\r\f\v") + 1);
key.erase(0, key.find_first_not_of(" \t\n\r\f\v"));
value.erase(value.find_last_not_of(" \t\n\r\f\v") + 1);
value.erase(0, value.find_first_not_of(" \t\n\r\f\v"));
configMap[key] = value;
}
}
file.close();
return true;
}
// 获取配置值的方法
std::string getConfig(const std::string& key) const {
auto it = configMap.find(key);
if (it != configMap.end()) {
return it->second;
}
return "";
}
private:
std::map<std::string, std::string> configMap; // 存储配置键值对
Configure() {} // 私有构造函数,防止外部直接实例化
Configure(const Configure&) = delete; // 禁止拷贝构造
Configure& operator=(const Configure&) = delete; // 禁止赋值操作
};
class ThreadPool {
public:
ThreadPool(size_t numThreads = 30);
~ThreadPool();
void enqueue(std::function<void()> task);
private:
std::vector<std::thread> workers;
std::queue<std::function<void()>> tasks;
std::mutex queueMutex;
std::condition_variable condition;
bool stop;
};
std::string generateRandomSuffix();
void InitLog();
} // namespace common
} // namespace intercept

View File

@ -0,0 +1,22 @@
# discovery/CMakeLists.txt
file(GLOB DISCOVERY_SOURCES *.cpp)
find_library(ICEORYX_POSH_LIB NAMES iceoryx_posh PATHS ../../thirdparties/iceoryx/lib)
find_library(ICEORYX_HOOFS_LIB NAMES iceoryx_hoofs PATHS ../../thirdparties/iceoryx/lib)
find_library(ICEORYX_PLATFORM_LIB NAMES iceoryx_platform PATHS ../../thirdparties/iceoryx/lib)
add_library(intercept_discovery ${DISCOVERY_SOURCES})
target_include_directories(intercept_discovery PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include
)
target_link_libraries(intercept_discovery PUBLIC intercept_internal ${ICEORYX_POSH_LIB})
add_library(intercept_discovery_client ${DISCOVERY_SOURCES})
target_include_directories(intercept_discovery_client PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include
)
target_compile_options(intercept_discovery_client PUBLIC -fPIC )
target_link_libraries(intercept_discovery_client PUBLIC -lrt intercept_internal_client ${ICEORYX_POSH_LIB} ${ICEORYX_HOOFS_LIB} ${ICEORYX_PLATFORM_LIB} )

View File

@ -0,0 +1,66 @@
#pragma once
#include <iostream>
#include <vector>
#include <set>
#include <thread>
#include "internal/metainfo.h"
namespace intercept {
namespace discovery {
using intercept::internal::ServiceMetaInfo;
// Discovery : use to discover the existing servers
// and the servers to be deleted
class Discovery {
public:
// Constructor
Discovery() {
// Initialization code
}
// Initialize the discovery
virtual void Init() = 0;
// Start the discovery loop
virtual void Start() = 0;
// Stop the discovery loop
virtual void Stop() = 0;
// Get the existing servers
virtual std::vector<ServiceMetaInfo> GetServers() const {
// Return the existing servers
return std::vector<ServiceMetaInfo>();
}
// Get the servers to be deleted
virtual std::set<ServiceMetaInfo> GetServersToDelete() const {
// Return the servers to be deleted
return std::set<ServiceMetaInfo>();
}
virtual std::vector<ServiceMetaInfo> FindServices(const ServiceMetaInfo& info) = 0;
// Create a new server
virtual void CreateServer(const ServiceMetaInfo& serverInfo) {
// Create a new server using the serverInfo
}
// Delete a server
virtual void DeleteServer(const ServiceMetaInfo& serverInfo) {
// Delete a server using the serverInfo
}
protected:
std::vector<ServiceMetaInfo> existingServers;
std::set<ServiceMetaInfo> serversToDelete;
bool DISCOVERY_RUNNING;
};
}
}

View File

@ -0,0 +1,125 @@
#include <vector>
#include <string>
#include "iceoryx_discovery.h"
#include "iox/signal_watcher.hpp"
#include "iceoryx_posh/runtime/posh_runtime.hpp"
#include "iceoryx_posh/runtime/service_discovery.hpp"
namespace intercept {
namespace discovery {
// TODO: Add your own discovery service implementation here
#define DISCOVERY_SERVICE_NAME "IceoryxDiscoveryService"
#define DISCOVERY_SERVICE_VERSION "1.0.0"
#define DISCOVERY_SERVICE_DESCRIPTION "IceoryxDiscoveryServiceDescription"
#define DISCOVERY_SERVICE_PROVIDER "IceoryxDiscoveryServiceProvider"
// constexpr char APP_NAME[] = "iox-discovery-service";
IceoryxDiscovery::IceoryxDiscovery() {
}
IceoryxDiscovery::~IceoryxDiscovery() {
// TODO: Clean up your discovery service implementation here
}
void IceoryxDiscovery::Init() {
// TODO: Initialize your discovery service implementation here
}
void IceoryxDiscovery::Start() {
// TODO: Start your discovery service implementation here
while (!iox::hasTerminationRequested()) {
// TODO: Implement discovery service logic here
const auto& servers = GetServers();
const auto& newservers = GetNewServers(existingServers, servers);
for (auto& server : newservers) {
// TODO: Implement logic to handle new servers here
CreateServer(server);
}
const auto& removedServers = GetRemovedServers(existingServers, servers);
for (auto& server : removedServers) {
// TODO: Implement logic to handle deleted servers here
DeleteServer(server);
}
existingServers = servers;
}
}
void IceoryxDiscovery::Stop() {
// TODO: Stop your discovery service implementation here
}
std::vector<ServiceMetaInfo> IceoryxDiscovery::GetServers() const {
return {};
}
std::vector<ServiceMetaInfo> IceoryxDiscovery::GetNewServers(const std::vector<ServiceMetaInfo>& existingServers, const std::vector<ServiceMetaInfo>& newServers) {
std::vector<ServiceMetaInfo> newServersList;
return newServersList;
}
std::set<ServiceMetaInfo> IceoryxDiscovery::GetRemovedServers(const std::vector<ServiceMetaInfo>& existingServers, const std::vector<ServiceMetaInfo>& newServers) {
std::set<ServiceMetaInfo> removedServersList;
return removedServersList;
}
std::vector<ServiceMetaInfo> IceoryxDiscovery::FindServices(const ServiceMetaInfo& info) {
iox::capro::IdString_t serviceStr(iox::TruncateToCapacity, info.service.c_str());
iox::capro::IdString_t instanceStr(iox::TruncateToCapacity, info.instance.c_str());
iox::capro::IdString_t eventStr(iox::TruncateToCapacity, info.instance.c_str());
iox::optional<iox::capro::IdString_t> service = serviceStr;
iox::optional<iox::capro::IdString_t> instance = instanceStr;
iox::optional<iox::capro::IdString_t> event = eventStr;
if (info.service == "") {
//service = iox::capro::Wildcard;
service = iox::optional<iox::capro::IdString_t>(iox::capro::Wildcard);
}
if (info.instance == "") {
//instance = iox::capro::Wildcard;
instance = iox::optional<iox::capro::IdString_t>(iox::capro::Wildcard);
}
if (info.event == "") {
//event = iox::capro::Wildcard;
event = iox::optional<iox::capro::IdString_t>(iox::capro::Wildcard);
}
std::vector<iox::capro::ServiceDescription> results;
serviceDiscovery_.findService(service, instance, event,
[&results](const iox::capro::ServiceDescription& serviceDescription) {
results.push_back(serviceDescription);
},
iox::popo::MessagingPattern::REQ_RES
);
std::vector<ServiceMetaInfo> metainfos;
for (const iox::capro::ServiceDescription& result : results) {
ServiceMetaInfo metaInfo;
metaInfo.service = result.getServiceIDString().c_str();
metaInfo.instance = result.getInstanceIDString().c_str();
metaInfo.event = result.getEventIDString().c_str();
metainfos.push_back(metaInfo);
// std::cout << "Found service: " << metaInfo.service
// << " instance: " << metaInfo.instance << " event: " << metaInfo.event << std::endl;
}
return metainfos;
}
void IceoryxDiscovery::CreateServer(const ServiceMetaInfo& server) {
// TODO: Implement logic to handle new servers here
}
void IceoryxDiscovery::DeleteServer(const ServiceMetaInfo& server) {
}
} // namespace discovery
} // namespace intercept

View File

@ -0,0 +1,41 @@
#pragma once
#include "discovery.h"
#include "iceoryx_posh/runtime/service_discovery.hpp"
namespace intercept {
namespace discovery {
class IceoryxDiscovery : public Discovery
{
public:
IceoryxDiscovery();
virtual ~IceoryxDiscovery();
virtual void Init();
virtual void Start();
virtual void Stop();
virtual std::vector<ServiceMetaInfo> GetServers() const;
virtual std::vector<ServiceMetaInfo> GetNewServers(const std::vector<ServiceMetaInfo>& oldservers,
const std::vector<ServiceMetaInfo>& newservers);
virtual std::set<ServiceMetaInfo> GetRemovedServers(
const std::vector<ServiceMetaInfo>& oldservers, const std::vector<ServiceMetaInfo>& newservers);
virtual std::vector<ServiceMetaInfo> FindServices(const ServiceMetaInfo& info);
virtual void CreateServer(const ServiceMetaInfo& serverInfo);
virtual void DeleteServer(const ServiceMetaInfo& serverInfo);
private:
iox::runtime::ServiceDiscovery serviceDiscovery_;
};
} // namespace discovery
} // namespace intercept

View File

@ -0,0 +1,28 @@
find_library(ICEORYX_POSH_LIB iceoryx_posh PATHS ../thirdparties/iceoryx/lib)
find_library(ICEORYX_HOOFS_LIB iceoryx_hoofs PATHS ../thirdparties/iceoryx/lib)
find_library(ICEORYX_PLATFORM_LIB iceoryx_platform PATHS ../thirdparties/iceoryx/lib)
file(GLOB FILESYSTEM_SOURCES *.cpp)
file(GLOB FILESYSTEM_HEADERS *.h)
add_library(intercept_filesystem ${FILESYSTEM_SOURCES})
target_include_directories(intercept_filesystem PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(intercept_filesystem PUBLIC
${ICEORYX_POSH_LIB} ${ICEORYX_HOOFS_LIB} ${ICEORYX_PLATFORM_LIB}
hybridcache_local madfs_global s3fs_lib ${THIRD_PARTY_LIBRARIES} common_lib
-pthread
-lcurl
-lxml2
-lcrypto
-ldl
-laio
-lrt
)
add_library(intercept_filesystem_client INTERFACE)
target_include_directories(intercept_filesystem_client INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(intercept_filesystem_client INTERFACE
common_lib_client
-lrt
)
target_compile_options(intercept_filesystem_client INTERFACE -DCLIENT_BUILD)

View File

@ -0,0 +1,57 @@
#ifndef ABSTRACT_FILESYSTEM_H
#define ABSTRACT_FILESYSTEM_H
#include <string>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdexcept>
#include "common/common.h"
namespace intercept {
namespace filesystem {
using intercept::common::DirStream;
class AbstractFileSystem {
public:
virtual ~AbstractFileSystem() {}
virtual void Init() = 0;
virtual void Shutdown() = 0;
virtual int Open(const char* path, int flags, int mode) = 0;
virtual ssize_t Read(int fd, void* buf, size_t count) = 0;
virtual ssize_t Write(int fd, const void* buf, size_t count) = 0;
virtual int Close(int fd) = 0;
virtual off_t Lseek(int fd, off_t offset, int whence) = 0;
virtual int Stat(const char* path, struct stat* st) = 0;
virtual int Fstat(int fd, struct stat* st) = 0;
virtual int Fsync(int fd) = 0;
virtual int Truncate(const char* path, off_t length) = 0;
virtual int Ftruncate(int fd, off_t length) = 0;
virtual int Unlink(const char* path) = 0;
virtual int Mkdir(const char* path, mode_t mode) = 0;
virtual int Opendir(const char* path, DirStream* dirstream) = 0;
virtual int Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes) = 0;
virtual int Closedir(DirStream* dirstream) = 0;
virtual int Rmdir(const char* path) = 0;
virtual int Chmod(const char* path, mode_t mode) = 0;
virtual int Chown(const char* path, uid_t owner, gid_t group) = 0;
virtual int Rename(const char* oldpath, const char* newpath) = 0;
virtual int Link(const char* oldpath, const char* newpath) = 0;
virtual int Symlink(const char* oldpath, const char* newpath) = 0;
virtual int Readlink(const char* path, char* buf, size_t bufsize) = 0;
virtual int Utime(const char* path, const struct utimbuf* times) = 0;
virtual ssize_t MultiRead(int fd, void* buf, size_t count) {}
virtual ssize_t MultiWrite(int fd, const void* buf, size_t count) {}
protected:
virtual std::string NormalizePath(const std::string& path) = 0;
};
} // namespace filesystem
} // namespace intercept
#endif // ABSTRACT_FILESYSTEM_H

View File

@ -0,0 +1,166 @@
#include <iostream>
#include <cstring>
#include "curve_filesystem.h"
#include "libcurvefs_external.h"
namespace intercept {
namespace filesystem {
#define POSIX_SET_ATTR_SIZE (1 << 3)
CurveFileSystem::CurveFileSystem() {}
CurveFileSystem::~CurveFileSystem() {
curvefs_release(instance_);
}
void CurveFileSystem::Init() {
instance_ = curvefs_create();
curvefs_load_config(instance_, "./curve_posix_client.conf");
//curvefs_mount(instance_, "s3cy1", "/tmp/curvefs");
curvefs_mount(instance_, "s3cy1", "/");
std::cout << "finish curvefs create" << std::endl;
}
void CurveFileSystem::Shutdown() {
}
int CurveFileSystem::Open(const char* path, int flags, int mode) {
std::cout << "open, the path: " << path << std::endl;
int ret = curvefs_open(instance_, path, flags, mode);
// 注意EEXIST为17 那么当fd ret也是17时 是不是就会判断错误。
if (ret == EEXIST) { // 不去创建
ret = curvefs_open(instance_, path, flags & ~O_CREAT, mode);
}
//std::cout << "the path: " << path << " , the stat: " << tmp.st_size << " , the time: " << tmp.st_mtime << std::endl;
return ret;
}
ssize_t CurveFileSystem::Read(int fd, void* buf, size_t count) {
int ret = curvefs_read(instance_, fd, (char*)buf, count);
//int ret = count;
//std::cout << "read, the fd: " << fd << " the buf: " << (char*)buf << ", the count: " << count << ", the ret: " << ret << std::endl;
return ret;
}
ssize_t CurveFileSystem::Write(int fd, const void* buf, size_t count) {
int ret = curvefs_write(instance_, fd, (char*)buf, count);
//int ret = count;
//std::cout << "write, the fd: " << fd << " the buf: " << (char*)buf << ", the count: " << count << ", the ret: " << ret << std::endl;
return ret;
}
int CurveFileSystem::Close(int fd) {
int ret = curvefs_close(instance_, fd);
std::cout << "curve close, the fd: " << fd << std::endl;
return ret;
}
off_t CurveFileSystem::Lseek(int fd, off_t offset, int whence) {
int ret = curvefs_lseek(instance_, fd, offset, whence);
std::cout << "curve lseek, the fd: " << fd << ", the offset: " << offset << ", the whence: " << whence << ", the ret: " << ret << std::endl;
return ret;
}
int CurveFileSystem::Stat(const char* path, struct stat* st) {
int ret = curvefs_lstat(instance_, path, st);
return ret;
}
int CurveFileSystem::Fstat(int fd, struct stat* st) {
int ret = curvefs_fstat(instance_, fd, st);
return ret;
}
int CurveFileSystem::Fsync(int fd) {
int ret = curvefs_fsync(instance_, fd);
return ret;
}
int CurveFileSystem::Ftruncate(int fd, off_t length) {
throw std::runtime_error("未实现");
}
int CurveFileSystem::Unlink(const char* path) {
int ret = curvefs_unlink(instance_, path);
std::cout << "unlink, the path: " << path << ", the ret: " << ret << std::endl;
return ret;
}
int CurveFileSystem::Mkdir(const char* path, mode_t mode) {
int ret = curvefs_mkdir(instance_, path, mode);
std::cout << "mkdir, the path: " << path << ", the mode: " << mode << ", the ret: " << ret << std::endl;
return ret;
}
int CurveFileSystem::Opendir(const char* path, DirStream* dirstream) {
int ret = curvefs_opendir(instance_, path, (dir_stream_t*)dirstream);
std::cout << "opendir, the path: " << path << ", the dirstream ino: " << dirstream->ino << ", the ret: " << ret << std::endl;
return ret;
}
int CurveFileSystem::Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes) {
int ret = curvefs_getdents(instance_, (dir_stream_t*)dirstream, contents, maxread, realbytes);
std::cout << "getdents, the dirstream ino: " << dirstream->ino << ", the maxread: " << maxread << ", the realbytes: " << realbytes << ", the ret: " << ret << std::endl;
return ret;
}
int CurveFileSystem::Closedir(DirStream* dirstream) {
int ret = curvefs_closedir(instance_, (dir_stream_t*)dirstream);;
std::cout << "closedir, the fd: " << dirstream->fh << " ino:" << dirstream->ino << std::endl;
return ret;
}
int CurveFileSystem::Rmdir(const char* path) {
int ret = curvefs_rmdir(instance_, path);
std::cout << "rmdir, the path: " << path << ", the ret: " << ret << std::endl;
return ret;
}
int CurveFileSystem::Rename(const char* oldpath, const char* newpath) {
int ret = curvefs_rename(instance_, oldpath, newpath);
std::cout << "rename, the oldpath: " << oldpath << ", the newpath: " << newpath << ", the ret: " << ret << std::endl;
return ret;
}
int CurveFileSystem::Link(const char* oldpath, const char* newpath) {
throw std::runtime_error("未实现");
}
int CurveFileSystem::Symlink(const char* oldpath, const char* newpath) {
throw std::runtime_error("未实现");
}
int CurveFileSystem::Readlink(const char* path, char* buf, size_t bufsize) {
throw std::runtime_error("未实现");
}
int CurveFileSystem::Chmod(const char* path, mode_t mode) {
throw std::runtime_error("未实现");
}
int CurveFileSystem::Chown(const char* path, uid_t uid, gid_t gid) {
throw std::runtime_error("未实现");
}
int CurveFileSystem::Truncate(const char* path, off_t length) {
struct stat attr;
attr.st_size = length;
int set = POSIX_SET_ATTR_SIZE ;
int ret = curvefs_setattr(instance_, path, &attr, set);
return ret;
}
int CurveFileSystem::Utime(const char* path, const struct utimbuf* ubuf) {
throw std::runtime_error("未实现");
}
std::string CurveFileSystem::NormalizePath(const std::string& path) {
throw std::runtime_error("未实现");
}
} // namespace filesystem
} // namespace intercept

View File

@ -0,0 +1,47 @@
#ifndef CURVE_FILESYSTEM_H
#define CURVE_FILESYSTEM_H
#include "abstract_filesystem.h"
namespace intercept {
namespace filesystem {
class CurveFileSystem : public AbstractFileSystem {
public:
CurveFileSystem();
~CurveFileSystem() override;
void Init() override;
void Shutdown() override;
int Open(const char* path, int flags, int mode) override;
ssize_t Read(int fd, void* buf, size_t count) override;
ssize_t Write(int fd, const void* buf, size_t count) override;
int Close(int fd) override;
off_t Lseek(int fd, off_t offset, int whence) override;
int Stat(const char* path, struct stat* st) override;
int Fstat(int fd, struct stat* st) override;
int Fsync(int fd) override;
int Ftruncate(int fd, off_t length) override;
int Unlink(const char* path) override;
int Mkdir(const char* path, mode_t mode) override;
int Opendir(const char* path, DirStream* dirstream);
int Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes);
int Closedir(DirStream* dirstream);
int Rmdir(const char* path) override;
int Rename(const char* from, const char* to) override;
int Link(const char* from, const char* to) override;
int Symlink(const char* from, const char* to) override;
int Readlink(const char* path, char* buf, size_t bufsize) override;
int Chmod(const char* path, mode_t mode) override;
int Chown(const char* path, uid_t uid, gid_t gid) override;
int Truncate(const char* path, off_t length) override;
int Utime(const char* path, const struct utimbuf* times) override;
protected:
std::string NormalizePath(const std::string& path) override;
uintptr_t instance_;
};
} // namespace filesystem
} // namespace intercept
#endif // CURVE_FILESYSTEM_H

View File

@ -0,0 +1,186 @@
#include <iostream>
#include <cstring>
#include "dummy_filesystem.h"
namespace intercept {
namespace filesystem{
std::size_t g_size = 10240000000;
char* DummyFileSystem::memory_ = nullptr;
DummyFileSystem::DummyFileSystem()
{
if (memory_ == nullptr) {
memory_ = new char[g_size];
//memset(memory_, 'j', g_size);
std::cout << "Memory allocated for shared_memory" << std::endl;
}
std::cout << "DummyFileSystem created" << std::endl;
}
DummyFileSystem::~DummyFileSystem()
{
std::cout << "DummyFileSystem destroyed, copy num: " << copynum_ << std::endl;
if (memory_ != nullptr) {
delete[] memory_;
memory_ = nullptr;
std::cout << "Memory deallocated for shared_memory" << std::endl;
}
}
void DummyFileSystem::Init() {
std::cout << "DummyFileSystem Init" << std::endl;
}
void DummyFileSystem::Shutdown() {
std::cout << "DummyFileSystem Shutdown" << std::endl;
}
int DummyFileSystem::Open(const char* path, int flags, int mode) {
fd_.fetch_add(1);
std::cout << "DummyFileSystem Open: " << path << " ret: " << fd_.load() << std::endl;
return fd_.load();
}
char buffer[] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
ssize_t DummyFileSystem::Read(int fd, void* buf, size_t count) {
// std::cout << "DummyFileSystem Read: " << fd << std::endl;
offset_ += count;
if (offset_ > g_size - count) {
// std::cout << "begin offset_: " << offset_ << " g_size: "<< g_size << ", count: " << count << std::endl;
offset_ = offset_ % (g_size - 10000000);
// std::cout << "after offset_: " << offset_ << std::endl;
}
if (offset_ < (g_size - 10000000)) {
memcpy((char*)buf, memory_ + offset_, count);
// memcpy((char*)buf, buffer, count);
} else {
memcpy((char*)buf, memory_ + 128, count);
// memcpy((char*)buf, buffer, count);
}
copynum_++;
return count;
}
ssize_t DummyFileSystem::Write(int fd, const void* buf, size_t count) {
std::cout << "DummyFileSystem Write: " << fd << ", count: " << count << std::endl;
//memcpy(memory_ + offset_, buf, count);
return count;
}
int DummyFileSystem::Close(int fd) {
std::cout << "DummyFileSystem Close: " << fd << " ,copynum_ :" << copynum_ << std::endl;
return 0;
}
off_t DummyFileSystem::Lseek(int fd, off_t offset, int whence) {
std::cout << "DummyFileSystem Lseek: " << fd << std::endl;
if (offset_ > g_size - 10000000) {
offset_ = offset_ % (g_size-10000000);
} else {
offset_ = offset;
}
return 0;
}
int DummyFileSystem::Stat(const char* path, struct stat* buf) {
buf->st_ino = 111111;
std::cout << "DummyFileSystem Stat: " << path << std::endl;
return 0;
}
int DummyFileSystem::Fstat(int fd, struct stat* buf) {
std::cout << "DummyFileSystem Fstat: " << fd << std::endl;
return 0;
}
int DummyFileSystem::Fsync(int fd) {
std::cout << "DummyFileSystem Fsync: " << fd << std::endl;
return 0;
}
int DummyFileSystem::Ftruncate(int fd, off_t length) {
std::cout << "DummyFileSystem Ftruncate: " << fd << std::endl;
return 0;
}
int DummyFileSystem::Unlink(const char* path) {
std::cout << "DummyFileSystem Unlink: " << path << std::endl;
return 0;
}
int DummyFileSystem::Mkdir(const char* path, mode_t mode) {
std::cout << "DummyFileSystem Mkdir: " << path << std::endl;
return 0;
}
int DummyFileSystem::Opendir(const char* path, DirStream* dirstream) {
std::cout << "DummyFileSystem Opendir: " << path << std::endl;
return 0;
}
int DummyFileSystem::Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes) {
std::cout << "DummyFileSystem getdentes: " << std::endl;
return 0;
}
int DummyFileSystem::Closedir(DirStream* dirstream) {
std::cout << "DummyFileSystem Closedir: " << std::endl;
return 0;
}
int DummyFileSystem::Rmdir(const char* path) {
std::cout << "DummyFileSystem Rmdir: " << path << std::endl;
return 0;
}
int DummyFileSystem::Rename(const char* oldpath, const char* newpath) {
std::cout << "DummyFileSystem Rename: " << oldpath << " to " << newpath << std::endl;
return 0;
}
int DummyFileSystem::Link(const char* oldpath, const char* newpath) {
std::cout << "DummyFileSystem Link: " << oldpath << " to " << newpath << std::endl;
return 0;
}
int DummyFileSystem::Symlink(const char* oldpath, const char* newpath) {
std::cout << "DummyFileSystem Symlink: " << oldpath << std::endl;
return 0;
}
int DummyFileSystem::Readlink(const char* path, char* buf, size_t bufsize) {
throw std::runtime_error("未实现");
}
int DummyFileSystem::Chmod(const char* path, mode_t mode) {
throw std::runtime_error("未实现");
}
int DummyFileSystem::Chown(const char* path, uid_t uid, gid_t gid) {
throw std::runtime_error("未实现");
}
int DummyFileSystem::Truncate(const char* path, off_t length) {
return 0;
}
int DummyFileSystem::Utime(const char* path, const struct utimbuf* ubuf) {
throw std::runtime_error("未实现");
}
std::string DummyFileSystem::NormalizePath(const std::string& path) {
throw std::runtime_error("未实现");
}
} // namespace intercept
} // namespace filesystem

View File

@ -0,0 +1,50 @@
#ifndef DUMMY_FILESYSTEM_H
#define DUMMY_FILESYSTEM_H
#include <atomic>
#include "abstract_filesystem.h"
namespace intercept {
namespace filesystem {
class DummyFileSystem : public AbstractFileSystem {
public:
DummyFileSystem();
~DummyFileSystem() override;
void Init() override;
void Shutdown() override;
int Open(const char* path, int flags, int mode) override;
ssize_t Read(int fd, void* buf, size_t count) override;
ssize_t Write(int fd, const void* buf, size_t count) override;
int Close(int fd) override;
off_t Lseek(int fd, off_t offset, int whence) override;
int Stat(const char* path, struct stat* st) override;
int Fstat(int fd, struct stat* st) override;
int Fsync(int fd) override;
int Ftruncate(int fd, off_t length) override;
int Unlink(const char* path) override;
int Mkdir(const char* path, mode_t mode) override;
int Opendir(const char* path, DirStream* dirstream);
int Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes);
int Closedir(DirStream* dirstream);
int Rmdir(const char* path) override;
int Rename(const char* from, const char* to) override;
int Link(const char* from, const char* to) override;
int Symlink(const char* from, const char* to) override;
int Readlink(const char* path, char* buf, size_t bufsize) override;
int Chmod(const char* path, mode_t mode) override;
int Chown(const char* path, uid_t uid, gid_t gid) override;
int Truncate(const char* path, off_t length) override;
int Utime(const char* path, const struct utimbuf* times) override;
protected:
std::string NormalizePath(const std::string& path) override;
uintptr_t instance_;
std::atomic<int> fd_ = 0;
off_t offset_ = 0;
long copynum_ = 0;
static char* memory_;
};
} // namespace filesystem
} // namespace intercept
#endif // DUMMY_FILESYSTEM_H

View File

@ -0,0 +1,121 @@
/*
* Copyright (c) 2023 NetEase Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Project: Curve
* Created Date: 2023-07-12
* Author: Jingli Chen (Wine93)
*/
#include <stdint.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/statvfs.h>
#include "libcurvefs_external.h"
uintptr_t curvefs_create() {return 0;}
void curvefs_load_config(uintptr_t instance_ptr,
const char* config_file) {}
void curvefs_release(uintptr_t instance_ptr) {}
// NOTE: instance_ptr is the pointer of curvefs_mount_t instance.
void curvefs_conf_set(uintptr_t instance_ptr,
const char* key,
const char* value) {}
int curvefs_mount(uintptr_t instance_ptr,
const char* fsname,
const char* mountpoint) {return 0;}
int curvefs_umonut(uintptr_t instance_ptr,
const char* fsname,
const char* mountpoint) {return 0;}
// directory
int curvefs_mkdir(uintptr_t instance_ptr, const char* path, uint16_t mode) {return 0;}
int curvefs_mkdirs(uintptr_t instance_ptr, const char* path, uint16_t mode) {return 0;}
int curvefs_rmdir(uintptr_t instance_ptr, const char* path) {return 0;}
int curvefs_opendir(uintptr_t instance_ptr,
const char* path,
dir_stream_t* dir_stream) {return 0;}
ssize_t curvefs_readdir(uintptr_t instance_ptr,
dir_stream_t* dir_stream,
dirent_t* dirent) {return 0;}
int curvefs_getdents(uintptr_t instance_ptr,
dir_stream_t* dir_stream,
char* data, size_t maxread, ssize_t* realbytes) {return 0;}
int curvefs_closedir(uintptr_t instance_ptr, dir_stream_t* dir_stream) {return 0;}
// file
int curvefs_open(uintptr_t instance_ptr,
const char* path,
uint32_t flags,
uint16_t mode) {return 0;}
int curvefs_lseek(uintptr_t instance_ptr,
int fd,
uint64_t offset,
int whence){return 0;}
ssize_t curvefs_read(uintptr_t instance_ptr,
int fd,
char* buffer,
size_t count) {return 0;}
ssize_t curvefs_write(uintptr_t instance_ptr,
int fd,
char* buffer,
size_t count) {return 0;}
int curvefs_fsync(uintptr_t instance_ptr, int fd) {return 0;}
int curvefs_close(uintptr_t instance_ptr, int fd) {return 0;}
int curvefs_unlink(uintptr_t instance_ptr, const char* path) {return 0;}
// others
int curvefs_statfs(uintptr_t instance_ptr, struct statvfs* statvfs) {return 0;}
int curvefs_lstat(uintptr_t instance_ptr, const char* path, struct stat* stat) {return 0;}
int curvefs_fstat(uintptr_t instance_ptr, int fd, struct stat* stat) {return 0;}
int curvefs_setattr(uintptr_t instance_ptr,
const char* path,
struct stat* stat,
int to_set) {return 0;}
int curvefs_chmod(uintptr_t instance_ptr, const char* path, uint16_t mode) {return 0;}
int curvefs_chown(uintptr_t instance_ptr,
const char* path,
uint32_t uid,
uint32_t gid) {return 0;}
int curvefs_rename(uintptr_t instance_ptr,
const char* oldpath,
const char* newpath) {return 0;}

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2023 NetEase Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Project: Curve
* Created Date: 2023-07-12
* Author: Jingli Chen (Wine93)
*/
#ifndef CURVEFS_SDK_LIBCURVEFS_LIBCURVEFS_H_
#define CURVEFS_SDK_LIBCURVEFS_LIBCURVEFS_H_
#include <stdint.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/statvfs.h>
// Must be synchronized with DirStream if changed
typedef struct {
uint64_t ino;
uint64_t fh;
uint64_t offset;
} dir_stream_t;
typedef struct {
struct stat stat;
char name[256];
} dirent_t;
#ifdef __cplusplus
extern "C" {
#endif
uintptr_t curvefs_create();
void curvefs_load_config(uintptr_t instance_ptr,
const char* config_file);
void curvefs_release(uintptr_t instance_ptr);
// NOTE: instance_ptr is the pointer of curvefs_mount_t instance.
void curvefs_conf_set(uintptr_t instance_ptr,
const char* key,
const char* value);
int curvefs_mount(uintptr_t instance_ptr,
const char* fsname,
const char* mountpoint);
int curvefs_umonut(uintptr_t instance_ptr,
const char* fsname,
const char* mountpoint);
// directory
int curvefs_mkdir(uintptr_t instance_ptr, const char* path, uint16_t mode);
int curvefs_mkdirs(uintptr_t instance_ptr, const char* path, uint16_t mode);
int curvefs_rmdir(uintptr_t instance_ptr, const char* path);
int curvefs_opendir(uintptr_t instance_ptr,
const char* path,
dir_stream_t* dir_stream);
ssize_t curvefs_readdir(uintptr_t instance_ptr,
dir_stream_t* dir_stream,
dirent_t* dirent);
int curvefs_getdents(uintptr_t instance_ptr,
dir_stream_t* dir_stream,
char* data, size_t maxread, ssize_t* realbytes);
int curvefs_closedir(uintptr_t instance_ptr, dir_stream_t* dir_stream);
// file
int curvefs_open(uintptr_t instance_ptr,
const char* path,
uint32_t flags,
uint16_t mode);
int curvefs_lseek(uintptr_t instance_ptr,
int fd,
uint64_t offset,
int whence);
ssize_t curvefs_read(uintptr_t instance_ptr,
int fd,
char* buffer,
size_t count);
ssize_t curvefs_write(uintptr_t instance_ptr,
int fd,
char* buffer,
size_t count);
int curvefs_fsync(uintptr_t instance_ptr, int fd);
int curvefs_close(uintptr_t instance_ptr, int fd);
int curvefs_unlink(uintptr_t instance_ptr, const char* path);
// others
int curvefs_statfs(uintptr_t instance_ptr, struct statvfs* statvfs);
int curvefs_lstat(uintptr_t instance_ptr, const char* path, struct stat* stat);
int curvefs_fstat(uintptr_t instance_ptr, int fd, struct stat* stat);
int curvefs_setattr(uintptr_t instance_ptr,
const char* path,
struct stat* stat,
int to_set);
int curvefs_chmod(uintptr_t instance_ptr, const char* path, uint16_t mode);
int curvefs_chown(uintptr_t instance_ptr,
const char* path,
uint32_t uid,
uint32_t gid);
int curvefs_rename(uintptr_t instance_ptr,
const char* oldpath,
const char* newpath);
#ifdef __cplusplus
}
#endif
#endif // CURVEFS_SDK_LIBCURVEFS_LIBCURVEFS_H_

View File

@ -0,0 +1,222 @@
#include <iostream>
#include <cstring>
#include "spdlog/spdlog.h"
#include "s3fs_filesystem.h"
#include "s3fs_lib.h"
namespace intercept {
namespace filesystem {
S3fsFileSystem::S3fsFileSystem() {
}
S3fsFileSystem::~S3fsFileSystem() {
s3fs_global_uninit();
}
void S3fsFileSystem::Init() {
s3fs_global_init();
}
void S3fsFileSystem::Shutdown() {
std::cout << "S3fsFileSystem::Shutdown" << std::endl;
}
int S3fsFileSystem::Open(const char* path, int flags, int mode) {
// std::cout << "S3fsFileSystem::Open: " << path << std::endl;
spdlog::info("S3fsFileSystem::Open:{}", path);
return posix_s3fs_open(path, flags, mode);
}
ssize_t S3fsFileSystem::MultiRead(int fd, void* buf, size_t count) {
intercept::common::Timer timer("server S3fsFileSystem::MultiRead");
int numThreads = intercept::common::Configure::getInstance().getConfig("opThreadnum") == "" ?
1 : atoi(intercept::common::Configure::getInstance().getConfig("opThreadnum").c_str());
size_t partSize = count / numThreads; // Part size for each thread
size_t remaining = count % numThreads; // Remaining part
std::vector<std::thread> threads;
char* charBuf = static_cast<char*>(buf);
std::atomic<ssize_t> totalBytesRead(0); // Atomic variable to accumulate bytes read
std::mutex readMutex; // Mutex to protect shared variable
for (size_t i = 0; i < numThreads; ++i) {
size_t offset = i * partSize;
size_t size = (i == numThreads - 1) ? (partSize + remaining) : partSize;
threads.emplace_back([=, &totalBytesRead, &readMutex]() {
ssize_t bytesRead = posix_s3fs_multiread(fd, charBuf + offset, size, offset);
spdlog::debug("S3fsFileSystem::MultiRead, fd: {}, offset: {}, size: {}, bytesRead: {}", fd, offset, size, bytesRead);
std::lock_guard<std::mutex> lock(readMutex);
totalBytesRead += bytesRead;
});
}
for (auto& th : threads) {
th.join();
}
posix_s3fs_lseek(fd, totalBytesRead.load(), SEEK_CUR);
spdlog::info("S3fsFileSystem::MultiRead, read bytes: {}", totalBytesRead.load());
return totalBytesRead.load(); // Return the total bytes read
}
ssize_t S3fsFileSystem::Read(int fd, void* buf, size_t count) {
// std::cout << "S3fsFileSystem::Read: " << fd << std::endl;
spdlog::debug("S3fsFileSystem::Read, fd: {}, count: {}", fd, count);
return posix_s3fs_read(fd, buf, count);
}
ssize_t S3fsFileSystem::MultiWrite(int fd, const void* buf, size_t count) {
intercept::common::Timer timer("server S3fsFileSystem::MultiWrite");
int numThreads = intercept::common::Configure::getInstance().getConfig("opThreadnum") == "" ?
1 : atoi(intercept::common::Configure::getInstance().getConfig("opThreadnum").c_str());
size_t partSize = count / numThreads; // Part size for each thread
size_t remaining = count % numThreads; // Remaining part
std::vector<std::thread> threads;
const char* charBuf = static_cast<const char*>(buf);
std::atomic<ssize_t> totalBytesWrite(0); // Atomic variable to accumulate bytes write
std::mutex writeMutex; // Mutex to protect shared variable
for (size_t i = 0; i < numThreads; ++i) {
size_t offset = i * partSize;
size_t size = (i == numThreads - 1) ? (partSize + remaining) : partSize;
threads.emplace_back([=, &totalBytesWrite, &writeMutex]() {
ssize_t bytesWrite = posix_s3fs_multiwrite(fd, charBuf + offset, size, offset);
spdlog::debug("finish S3fsFileSystem::Multiwrite, fd: {}, offset: {}, size: {}, bytesRead: {}", fd, offset, size, bytesWrite);
std::lock_guard<std::mutex> lock(writeMutex);
totalBytesWrite += bytesWrite;
});
}
for (auto& th : threads) {
th.join();
}
posix_s3fs_lseek(fd, totalBytesWrite.load(), SEEK_CUR);
spdlog::debug("S3fsFileSystem::Multiwrite, multiwrite bytes: {}", totalBytesWrite.load());
return totalBytesWrite.load(); // Return the total bytes write
}
ssize_t S3fsFileSystem::Write(int fd, const void* buf, size_t count) {
// std::cout << "S3fsFileSystem::Write: " << fd << std::endl;
spdlog::debug("S3fsFileSystem::Write, fd: {}, count: {}", fd, count);
return posix_s3fs_write(fd, buf, count);
}
int S3fsFileSystem::Close(int fd) {
//std::cout << "S3fsFileSystem::Close: " << fd << std::endl;
spdlog::info("S3fsFileSystem::Close, fd: {}", fd);
return posix_s3fs_close(fd);
}
off_t S3fsFileSystem::Lseek(int fd, off_t offset, int whence) {
//std::cout << "S3fsFileSystem::Lseek: " << fd << std::endl;
spdlog::debug("S3fsFileSystem::Lseek, fd: {}, offset: {}, whence: {}", fd, offset, whence);
return posix_s3fs_lseek(fd, offset, whence);
}
int S3fsFileSystem::Stat(const char* path, struct stat* statbuf) {
// std::cout << "S3fsFileSystem::Stat: " << path << std::endl;
spdlog::info("S3fsFileSystem::Stat, path: {}", path);
int ret = posix_s3fs_stat(path, statbuf);
return ret;
}
int S3fsFileSystem::Fstat(int fd, struct stat* statbuf) {
// std::cout << "S3fsFileSystem::Fstat: " << fd << std::endl;
spdlog::info("S3fsFileSystem::Stat, fd: {}", fd);
int ret = posix_s3fs_fstat(fd, statbuf);
return ret;
}
int S3fsFileSystem::Fsync(int fd) {
// std::cout << "S3fsFileSystem::Fsync: " << fd << std::endl;
spdlog::info("S3fsFileSystem::Fsync, fd: {} no implement....", fd);
return 0;
}
int S3fsFileSystem::Ftruncate(int fd, off_t length) {
// std::cout << "S3fsFileSystem::Ftruncate: " << fd << " " << length << std::endl;
spdlog::info("S3fsFileSystem::Ftruncate, fd: {} length: {} no implement...", fd, length);
return 0;
}
int S3fsFileSystem::Unlink(const char* path) {
// std::cout << "S3fsFileSystem::Unlink: " << path << std::endl;
spdlog::info("S3fsFileSystem::Unlink, path: {}", path);
return posix_s3fs_unlink(path);
}
int S3fsFileSystem::Mkdir(const char* path, mode_t mode) {
// std::cout << "S3fsFileSystem::Mkdir: " << path << " " << mode << std::endl;
spdlog::info("S3fsFileSystem::Mkdir, path: {} mode: {}", path, mode);
return posix_s3fs_mkdir(path, mode);
}
int S3fsFileSystem::Opendir(const char* path, DirStream* dirstream) {
int ret = posix_s3fs_opendir(path, (S3DirStream*)dirstream);
// std::cout << "S3fsFileSystem::Opendir: " << path << std::endl;
spdlog::info("S3fsFileSystem::Opendir path: {} ret {}", path, ret);
return 0;
}
int S3fsFileSystem::Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes) {
//std::cout << "S3fsFileSystem::Getdents: " << dirstream << " " << maxread << " " << realbytes << std::endl;
int ret = posix_s3fs_getdents((S3DirStream*)dirstream, contents, maxread, realbytes);
spdlog::info("S3fsFileSystem::Getdents, maxread: {}, realbytes: {}", maxread, *realbytes);
return ret;
}
int S3fsFileSystem::Closedir(DirStream* dirstream) {
// std::cout << "S3fsFileSystem::Closedir: " << dirstream << std::endl;
int ret = posix_s3fs_closedir((S3DirStream*)dirstream);
spdlog::info("S3fsFileSystem::Closedir, ret: {}", ret);
return ret;
}
int S3fsFileSystem::Rmdir(const char* path) {
std::cout << "S3fsFileSystem::Rmdir: " << path << std::endl;
return 0;
}
int S3fsFileSystem::Rename(const char* from, const char* to) {
std::cout << "S3fsFileSystem::Rename: " << from << " to " << to << std::endl;
return 0;
}
int S3fsFileSystem::Link(const char* oldpath, const char* newpath) {
throw std::runtime_error("未实现");
}
int S3fsFileSystem::Symlink(const char* oldpath, const char* newpath) {
throw std::runtime_error("未实现");
}
int S3fsFileSystem::Readlink(const char* path, char* buf, size_t bufsize) {
throw std::runtime_error("未实现");
}
int S3fsFileSystem::Chmod(const char* path, mode_t mode) {
throw std::runtime_error("未实现");
}
int S3fsFileSystem::Chown(const char* path, uid_t uid, gid_t gid) {
throw std::runtime_error("未实现");
}
int S3fsFileSystem::Truncate(const char* path, off_t length) {
std::cout << "S3fsFileSystem::Truncate" << std::endl;
return 0;
}
int S3fsFileSystem::Utime(const char* path, const struct utimbuf* ubuf) {
throw std::runtime_error("未实现");
}
std::string S3fsFileSystem::NormalizePath(const std::string& path) {
throw std::runtime_error("未实现");
}
} // namespace filesystem
} // namespace intercept

View File

@ -0,0 +1,49 @@
#ifndef S3FS_FILESYSTEM_H
#define S3FS_FILESYSTEM_H
#include "abstract_filesystem.h"
namespace intercept {
namespace filesystem {
class S3fsFileSystem : public AbstractFileSystem {
public:
S3fsFileSystem();
~S3fsFileSystem() override;
void Init() override;
void Shutdown() override;
int Open(const char* path, int flags, int mode) override;
ssize_t Read(int fd, void* buf, size_t count) override;
ssize_t Write(int fd, const void* buf, size_t count) override;
int Close(int fd) override;
off_t Lseek(int fd, off_t offset, int whence) override;
int Stat(const char* path, struct stat* st) override;
int Fstat(int fd, struct stat* st) override;
int Fsync(int fd) override;
int Ftruncate(int fd, off_t length) override;
int Unlink(const char* path) override;
int Mkdir(const char* path, mode_t mode) override;
int Opendir(const char* path, DirStream* dirstream);
int Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes);
int Closedir(DirStream* dirstream);
int Rmdir(const char* path) override;
int Rename(const char* from, const char* to) override;
int Link(const char* from, const char* to) override;
int Symlink(const char* from, const char* to) override;
int Readlink(const char* path, char* buf, size_t bufsize) override;
int Chmod(const char* path, mode_t mode) override;
int Chown(const char* path, uid_t uid, gid_t gid) override;
int Truncate(const char* path, off_t length) override;
int Utime(const char* path, const struct utimbuf* times) override;
ssize_t MultiRead(int fd, void* buf, size_t count) override;
ssize_t MultiWrite(int fd, const void* buf, size_t count) override;
protected:
std::string NormalizePath(const std::string& path) override;
};
} // namespace filesystem
} // namespace intercept
#endif

View File

@ -0,0 +1,63 @@
#ifndef S3FS_S3FS_LIB_H_
#define S3FS_S3FS_LIB_H_
#ifdef S3FS_MALLOC_TRIM
#ifdef HAVE_MALLOC_TRIM
#include <malloc.h>
#define S3FS_MALLOCTRIM(pad) malloc_trim(pad)
#else // HAVE_MALLOC_TRIM
#define S3FS_MALLOCTRIM(pad)
#endif // HAVE_MALLOC_TRIM
#else // S3FS_MALLOC_TRIM
#define S3FS_MALLOCTRIM(pad)
#endif // S3FS_MALLOC_TRIM
//-------------------------------------------------------------------
// posix interface functions
//-------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif
struct S3DirStream;
void s3fs_global_init();
void s3fs_global_uninit();
int posix_s3fs_create(const char* _path, int flags, mode_t mode);
int posix_s3fs_open(const char* _path, int flags, mode_t mode);
int posix_s3fs_multiread(int fd, void* buf, size_t size, off_t file_offset);
int posix_s3fs_read(int fd, void* buf, size_t size);
int posix_s3fs_multiwrite(int fd, const void* buf, size_t size, off_t file_offset);
int posix_s3fs_write(int fd, const void* buf, size_t size);
off_t posix_s3fs_lseek(int fd, off_t offset, int whence);
int posix_s3fs_close(int fd);
int posix_s3fs_stat(const char* _path, struct stat* stbuf);
int posix_s3fs_fstat(int fd, struct stat* stbuf) ;
int posix_s3fs_mkdir(const char* _path, mode_t mode);
int posix_s3fs_opendir(const char* _path, S3DirStream* dirstream);
int posix_s3fs_getdents(S3DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes);
int posix_s3fs_closedir(S3DirStream* dirstream);
int posix_s3fs_unlink(const char* _path);
#ifdef __cplusplus
}
#endif
#endif // S3FS_S3FS_LIB_H_

View File

@ -0,0 +1,12 @@
# internal/CMakeLists.txt
file(GLOB INTERNAL_SOURCES *.cpp)
add_library(intercept_internal ${INTERNAL_SOURCES})
target_include_directories(intercept_internal PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(intercept_internal PUBLIC common_lib)
add_library(intercept_internal_client ${INTERNAL_SOURCES})
target_include_directories(intercept_internal_client PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_options(intercept_internal_client PUBLIC -fPIC -mavx2)
target_link_libraries(intercept_internal_client PUBLIC common_lib_client)

View File

@ -0,0 +1,112 @@
// Copyright (c) 2022 by Apex.AI Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// SPDX-License-Identifier: Apache-2.0
#ifndef IOX_EXAMPLES_REQUEST_AND_RESPONSE_TYPES_HPP
#define IOX_EXAMPLES_REQUEST_AND_RESPONSE_TYPES_HPP
#include <cstdint>
#include <cstddef>
#include <cstdlib>
#include <stdio.h>
#include <string>
#define SERVICE_FLAG "interceptservice"
#define DUMMY_INSTANCE_FLAG "dummyserver"
#define INTERCEPT_INSTANCE_FLAG "interceptserver"
#define ICEORYX "ICEORYX"
namespace intercept {
namespace internal {
//! [request]
struct AddRequest
{
uint64_t augend{0};
uint64_t addend{0};
};
//! [request]
//! [response]
struct AddResponse
{
uint64_t sum{0};
};
//! [response]
struct UserRequest
{
uint64_t pid{0};
uint64_t threadid{0};
};
struct UserResponse
{
uint64_t pid{0};
uint64_t threadid{0};
};
struct Metainfo {
int type = 0;
int fd = 0;
size_t count = 0;
};
struct ServiceMetaInfo {
std::string service = "";
std::string instance = "";
std::string event = "";
std::string serverType = ""; // server类型 : normal dummy
};
} // namespace internal
} // namespace intercept
#define MAX_LENGTH 2000000
// 生成随机字符,不包括 '\0'
// char randomChar() {
// const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
// return charset[rand() % (sizeof(charset) - 1)];
// }
// // 生成随机字符串
// char* generateRandomString(size_t length) {
// if (length > MAX_LENGTH) {
// fprintf(stderr, "String length is too long.\n");
// }
// char *str = (char*)malloc((length + 1) * sizeof(char)); // +1 为字符串的终止符 '\0' 预留空间
// if (str == NULL) {
// perror("malloc");
// }
// for (size_t i = 0; i < length; ++i) {
// str[i] = randomChar();
// }
// str[length] = '\0'; // 确保字符串以空字符结尾
// return str;
// }
#endif // IOX_EXAMPLES_REQUEST_AND_RESPONSE_TYPES_HPP

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,650 @@
#pragma once
#include <sys/stat.h>
#include <dirent.h>
#include <fcntl.h>
#include <string.h>
#include <cstdint>
#include <string>
#include <vector>
#include <optional>
#include "common/common.h"
namespace intercept {
namespace internal {
using intercept::common::DirStream;
// 操作类型枚举
enum class FileType {
FILE = 0,
DIR = 1,
};
enum class PosixOpType {
OPEN = 0,
WRITE,
READ,
ACCESS,
CLOSE,
FSYNC,
TRUNCATE,
FTRUNCATE,
FUTIMES,
LSEEK,
MKDIR,
MKNOD,
OPENDIR,
READDIR,
GETDENTS,
CLOSEDIR,
RENAME,
STAT,
FSTAT,
UNLINK,
UTIMES,
TERMINAL, // 程序退出时的操作
// ... 其他操作类型
};
std::string TypeToStr(PosixOpType opType);
// 请求数据结构体
struct PosixOpRequest {
PosixOpType opType;
//virtual ~PosixOpRequest() = default; // 添加虚析构函数使类变为多态
};
// 响应数据结构体
struct PosixOpResponse{
PosixOpType opType;
//virtual ~PosixOpResponse() = default; // 添加虚析构函数使类变为多态
};
// 请求/响应类
class PosixOpReqRes {
public:
PosixOpReqRes() = default;
PosixOpReqRes(PosixOpType opType);
PosixOpReqRes(const long* args, long* result);
virtual ~PosixOpReqRes() = default; // 添加虚析构函数使类变为多态
void SetOpType(PosixOpType type);
PosixOpType GetOpType() const;
// virtual void Init() = 0;
// virtual void Shutdown() = 0;
// 设置和获取请求数据
// virtual const PosixOpRequest& GetRequestData() const = 0;
// virtual void SetRequestData(const PosixOpRequest& requestData) = 0;
// virtual void SetRequestData(const long* args, long* result) = 0;
// 复制请求数据到缓冲区
virtual void CopyRequestDataToBuf(void* buf) = 0;
// 获取请求大小
virtual int GetRequestSize() = 0;
virtual int GetRequestAlignSize() = 0;
virtual int GetResponseSize() = 0;
virtual int GetResponseAlignSize() = 0;
// 设置和获取响应数据
virtual PosixOpResponse& GetResponse() = 0;
virtual void SetResponse(void* response) = 0;
protected:
PosixOpType opType_;
};
// ---------------------------------open------------------------------------------------
struct OpenRequestData : PosixOpRequest {
char path[200];
int flags;
mode_t mode;
};
struct OpenResponseData : PosixOpResponse {
int fd;
};
class OpenOpReqRes : public PosixOpReqRes {
public:
OpenOpReqRes(const char* path, int flags, mode_t mode);
OpenOpReqRes(const long *args, long *result);
~OpenOpReqRes() override;
// 复制请求数据到缓冲区
virtual void CopyRequestDataToBuf(void* buf);
// 获取请求大小
int GetRequestSize() override;
int GetRequestAlignSize() override;
int GetResponseSize() override;
int GetResponseAlignSize() override;
// 获取和设置响应数据
PosixOpResponse& GetResponse() override;
void SetResponse(void* request) override;
private:
OpenRequestData requestData_;
OpenResponseData responseData_;
};
// --------------------------------------read----------------------------------------
struct ReadRequestData : PosixOpRequest {
int fd;
size_t count;
// void* buf;
};
struct ReadResponseData : PosixOpResponse {
int ret; // 返回值
ssize_t length; // 返回长度
void* buf; // 为上游保存数据的指针
char content[0]; // server返回数据
};
class ReadOpReqRes : public PosixOpReqRes {
public:
ReadOpReqRes(int fd, void* buf, size_t count);
ReadOpReqRes(const long *args, long *result);
virtual ~ReadOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
void SetResponseMultithreads(void* response);
private:
ReadRequestData requestData_;
ReadResponseData responseData_;
// intercept::common::ThreadPool threadPool_;
};
// ---------------------------------write-------------------------------------------
struct WriteRequestData : PosixOpRequest {
int fd;
size_t count; // 要求长度
void* buf;
char content[0]; // 传输时保存数据
};
struct WriteResponseData : PosixOpResponse {
int ret; // 返回值
ssize_t length; // 返回长度
};
class WriteOpReqRes : public PosixOpReqRes {
public:
WriteOpReqRes()
: PosixOpReqRes(PosixOpType::WRITE) {}
WriteOpReqRes(int fd, void* buf, size_t count);
WriteOpReqRes(const long *args, long *result);
~WriteOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
void CopyRequestDataToBufMultithread(void* dest, const void* src, size_t count, int numThreads);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
WriteRequestData requestData_;
WriteResponseData responseData_;
};
//-------------------------------------close---------------------------------------
struct CloseRequestData : PosixOpRequest {
int fd;
};
struct CloseResponseData : PosixOpResponse {
int ret; // 返回值
};
class CloseOpReqRes : public PosixOpReqRes {
public:
CloseOpReqRes()
: PosixOpReqRes(PosixOpType::CLOSE) {}
CloseOpReqRes(int fd);
CloseOpReqRes(const long *args, long *result);
~CloseOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
CloseRequestData requestData_;
CloseResponseData responseData_;
};
// ----------------------------------------fsync-------------------------------
struct FsyncRequestData : PosixOpRequest {
int fd;
};
struct FsyncResponseData : PosixOpResponse {
int ret; // 返回值
};
class FsyncOpReqRes : public PosixOpReqRes {
public:
FsyncOpReqRes()
: PosixOpReqRes(PosixOpType::CLOSE) {}
FsyncOpReqRes(int fd);
FsyncOpReqRes(const long *args, long *result);
~FsyncOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
FsyncRequestData requestData_;
FsyncResponseData responseData_;
};
// -----------------------------------stat----------------------------------------
struct StatRequestData : PosixOpRequest {
char path[200];
};
struct StatResponseData : PosixOpResponse {
int ret; // 返回值
void* st; // 为上游保存数据的指针
struct stat fileStat; // server返回数据
};
class StatOpReqRes : public PosixOpReqRes {
public:
StatOpReqRes()
: PosixOpReqRes(PosixOpType::STAT) {}
StatOpReqRes(const char *path, struct stat *st);
StatOpReqRes(const long *args, long *result);
~StatOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
StatRequestData requestData_;
StatResponseData responseData_;
};
// ----------------------------------fstat------------------------------------------
struct FstatRequestData : PosixOpRequest {
int fd;
};
struct FstatResponseData : PosixOpResponse {
int ret; // 返回值
void* st; // 为上游保存数据的指针
struct stat fileStat; // server返回数据
};
class FstatOpReqRes : public PosixOpReqRes {
public:
FstatOpReqRes()
: PosixOpReqRes(PosixOpType::FSTAT) {}
FstatOpReqRes(int fd, struct stat *st);
FstatOpReqRes(const long *args, long *result);
~FstatOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
FstatRequestData requestData_;
FstatResponseData responseData_;
};
// -----------------------------------lseek------------------------------------------
struct LseekRequestData : PosixOpRequest {
int fd;
uint64_t offset;
int whence;
};
struct LseekResponseData : PosixOpResponse {
off_t ret; // 返回值
};
class LseekOpReqRes : public PosixOpReqRes {
public:
LseekOpReqRes()
: PosixOpReqRes(PosixOpType::LSEEK) {}
LseekOpReqRes(int fd, uint64_t offset, int whence);
LseekOpReqRes(const long *args, long *result);
~LseekOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
LseekRequestData requestData_;
LseekResponseData responseData_;
};
// ----------------------------------mkdir-----------------------------------------------
struct MkdirRequestData : PosixOpRequest {
char path[200];
mode_t mode;
};
struct MkdirResponseData : PosixOpResponse {
int ret; // 返回值
};
class MkdirOpReqRes : public PosixOpReqRes {
public:
MkdirOpReqRes()
: PosixOpReqRes(PosixOpType::MKDIR) {}
MkdirOpReqRes(const char *path, mode_t mode);
MkdirOpReqRes(const long *args, long *result);
~MkdirOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
MkdirRequestData requestData_;
MkdirResponseData responseData_;
};
// ----------------------------------opendir------------------------------------
struct OpendirRequestData : PosixOpRequest {
char path[200];
};
struct OpendirResponseData : PosixOpResponse {
int ret; // 返回值
DIR* dir; // 上游保存dir的指针
DirStream dirStream; // 保存server获取的结果
};
class OpendirOpReqRes : public PosixOpReqRes {
public:
OpendirOpReqRes()
: PosixOpReqRes(PosixOpType::OPENDIR) {}
OpendirOpReqRes(const char *path);
OpendirOpReqRes(const long *args, long *result);
~OpendirOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
OpendirRequestData requestData_;
OpendirResponseData responseData_;
};
// ----------------------------------getdents------------------------
struct GetdentsRequestData : PosixOpRequest {
DirStream dirinfo;
size_t maxread;
};
struct GetdentsResponseData : PosixOpResponse {
int ret; // 返回值
DirStream dirinfo;
ssize_t realbytes;
char* data; // 上游数据指针
char contents[0]; // 保存server获取的结果
};
class GetdentsOpReqRes : public PosixOpReqRes {
public:
GetdentsOpReqRes()
: PosixOpReqRes(PosixOpType::GETDENTS) {}
GetdentsOpReqRes(DirStream dirinfo, char* data, size_t maxread);
GetdentsOpReqRes(const long *args, long *result);
~GetdentsOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
GetdentsRequestData requestData_;
GetdentsResponseData responseData_;
};
// ----------------------------------closedir------------------------------------
struct ClosedirRequestData : PosixOpRequest {
DirStream dirstream;
};
struct ClosedirResponseData : PosixOpResponse {
int ret; // 返回值
};
class ClosedirOpReqRes : public PosixOpReqRes {
public:
ClosedirOpReqRes()
: PosixOpReqRes(PosixOpType::CLOSEDIR) {}
ClosedirOpReqRes(const DirStream& dirstream);
ClosedirOpReqRes(const long *args, long *result);
~ClosedirOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
virtual void SetResponse(void* response) override;
private:
ClosedirRequestData requestData_;
ClosedirResponseData responseData_;
};
// -------------------------unlink-----------------------------------------
struct UnlinkRequestData : PosixOpRequest {
char path[200];
};
struct UnlinkResponseData : PosixOpResponse {
int ret; // 返回值
};
class UnlinkOpReqRes : public PosixOpReqRes {
public:
UnlinkOpReqRes()
: PosixOpReqRes(PosixOpType::UNLINK) {}
UnlinkOpReqRes(const char *path);
UnlinkOpReqRes(const long *args, long *result);
~UnlinkOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
UnlinkRequestData requestData_;
UnlinkResponseData responseData_;
};
struct RenameRequestData : PosixOpRequest {
char oldpath[200];
char newpath[200];
};
struct RenameResponseData : PosixOpResponse {
int ret; // 返回值
};
class RenameOpReqRes : public PosixOpReqRes {
public:
RenameOpReqRes()
: PosixOpReqRes(PosixOpType::RENAME) {}
RenameOpReqRes(const char *oldpath, const char *newpath);
RenameOpReqRes(const long *args, long *result);
~RenameOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
RenameRequestData requestData_;
RenameResponseData responseData_;
};
// ----------------------truncate-----------------------------------------
class TruncateRequestData : public PosixOpRequest {
public:
char path[200];
off_t length;
};
class TruncateResponseData : public PosixOpResponse {
public:
int ret; // 返回值
};
class TruncateOpReqRes : public PosixOpReqRes {
public:
TruncateOpReqRes()
: PosixOpReqRes(PosixOpType::TRUNCATE) {}
TruncateOpReqRes(const char *path, off_t length);
TruncateOpReqRes(const long *args, long *result);
~TruncateOpReqRes() override;
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;
private:
TruncateRequestData requestData_;
TruncateResponseData responseData_;
};
// ----------------------TERMINAL-------------------
class TerminalRequestData : public PosixOpRequest {
public:
};
class TerminalResponseData : public PosixOpResponse {
public:
int ret; // 返回值
};
class TerminalOpReqRes : public PosixOpReqRes {
public:
TerminalOpReqRes();
~TerminalOpReqRes() override {};
virtual void CopyRequestDataToBuf(void* buf);
virtual int GetRequestSize() override;
virtual int GetRequestAlignSize() override;
virtual int GetResponseSize() override ;
virtual int GetResponseAlignSize() override;
virtual PosixOpResponse& GetResponse() override;
void SetResponse(void* response) override;;
private:
TerminalRequestData requestData_;
TerminalResponseData responseData_;
};
} // namespace internal
} // namespace intercept

View File

@ -0,0 +1,45 @@
# src/middleware/CMakeLists.txt
find_library(ICEORYX_POSH_LIB NAMES iceoryx_posh PATHS ../../thirdparties/iceoryx/lib)
find_library(ICEORYX_HOOFS_LIB NAMES iceoryx_hoofs PATHS ../../thirdparties/iceoryx/lib)
file(GLOB MIDDLEWARE_SOURCES *.cpp)
file(GLOB MIDDLEWARE_HEADERS *.h)
add_library(intercept_middleware ${MIDDLEWARE_SOURCES})
target_include_directories(intercept_middleware PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include
)
target_link_libraries(intercept_middleware PUBLIC
intercept_internal
intercept_filesystem
${ICEORYX_HOOFS_LIB}
${ICEORYX_POSH_LIB}
)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so" ".a")
find_library(ICEORYX_POSH_LIB NAMES iceoryx_posh PATHS ../../thirdparties/iceoryx/lib)
find_library(ICEORYX_HOOFS_LIB NAMES iceoryx_hoofs PATHS ../../thirdparties/iceoryx/lib)
find_library(ICEORYX_PLATFORM_LIB NAMES iceoryx_hoofs PATHS ../../thirdparties/iceoryx/lib)
file(GLOB CLIENT_MIDDLEWARE_SOURCES *.cpp)
file(GLOB CLIENT_MIDDLEWARE_HEADERS *.h)
add_library(intercept_middleware_client ${CLIENT_MIDDLEWARE_SOURCES})
target_include_directories(intercept_middleware_client PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include
)
target_link_libraries(intercept_middleware_client PUBLIC
-lrt
intercept_internal_client
intercept_filesystem_client
${ICEORYX_POSH_LIB}
${ICEORYX_HOOFS_LIB}
${ICEORYX_PLATFORM_LIB}
)
target_compile_options(intercept_middleware_client PUBLIC -DCLIENT_BUILD -fPIC )

View File

@ -0,0 +1,645 @@
#include "filesystem/abstract_filesystem.h"
#ifndef CLIENT_BUILD
#include "filesystem/curve_filesystem.h"
#endif
#include "iox/signal_watcher.hpp"
#include "iceoryx_wrapper.h"
#include "iceoryx_posh/mepoo/chunk_header.hpp"
namespace intercept {
namespace middleware {
using intercept::internal::PosixOpReqRes;
using intercept::internal::PosixOpRequest;
using intercept::internal::PosixOpResponse;
using intercept::internal::PosixOpType;
using intercept::internal::OpenRequestData;
using intercept::internal::OpenResponseData;
using intercept::internal::ReadRequestData;
using intercept::internal::ReadResponseData;
using intercept::internal::WriteRequestData;
using intercept::internal::WriteResponseData;
using intercept::internal::CloseRequestData;
using intercept::internal::CloseResponseData;
using intercept::internal::StatRequestData;
using intercept::internal::StatResponseData;
using intercept::internal::FstatRequestData;
using intercept::internal::FstatResponseData;
using intercept::internal::FsyncRequestData;
using intercept::internal::FsyncResponseData;
using intercept::internal::LseekRequestData;
using intercept::internal::LseekResponseData;
using intercept::internal::MkdirRequestData;
using intercept::internal::MkdirResponseData;
using intercept::internal::OpendirRequestData;
using intercept::internal::OpendirResponseData;
using intercept::internal::GetdentsRequestData;
using intercept::internal::GetdentsResponseData;
using intercept::internal::ClosedirRequestData;
using intercept::internal::ClosedirResponseData;
using intercept::internal::UnlinkRequestData;
using intercept::internal::UnlinkResponseData;
using intercept::internal::RenameRequestData;
using intercept::internal::RenameResponseData;
using intercept::internal::TruncateRequestData;
using intercept::internal::TruncateResponseData;
using intercept::internal::TerminalRequestData;
using intercept::internal::TerminalResponseData;
std::shared_ptr<intercept::filesystem::AbstractFileSystem> ReqResMiddlewareWrapper::fileSystem_ = nullptr;
IceoryxWrapper::IceoryxWrapper(const ServiceMetaInfo& info) :
ReqResMiddlewareWrapper(info){
}
IceoryxWrapper::~IceoryxWrapper() {
Shutdown();
}
void IceoryxWrapper::Init() {
}
void IceoryxWrapper::InitClient() {
// 创建client
iox::capro::IdString_t service(iox::TruncateToCapacity,
info_.service.c_str(), info_.service.length());
iox::capro::IdString_t instance(iox::TruncateToCapacity,
info_.instance.c_str(), info_.instance.length());
iox::capro::IdString_t event(iox::TruncateToCapacity,
info_.event.c_str(), info_.event.length());
client_.reset(new iox::popo::UntypedClient({service, instance, event}));
spdlog::info("client init, service: {}, instance: {}, event: {}",
info_.service, info_.instance, info_.event);
}
void IceoryxWrapper::InitServer() {
// 创建server
ReqResMiddlewareWrapper::InitServer();
iox::capro::IdString_t service(iox::TruncateToCapacity,
info_.service.c_str(), info_.service.length());
iox::capro::IdString_t instance(iox::TruncateToCapacity,
info_.instance.c_str(), info_.instance.length());
iox::capro::IdString_t event(iox::TruncateToCapacity,
info_.event.c_str(), info_.event.length());
server_.reset(new iox::popo::UntypedServer({service, instance, event}));
// std::cout << "server init, service: " << info_.service << ", instance: " << info_.instance << ", event: " << info_.event << std::endl;
spdlog::info("IceoryxWrapper::InitServer, server: {}, instance: {}, event: {} ", info_.service, info_.instance, info_.event);
}
void IceoryxWrapper::InitDummyServer() {
iox::capro::IdString_t service(iox::TruncateToCapacity,
info_.service.c_str(), info_.service.length());
iox::capro::IdString_t instance(iox::TruncateToCapacity,
info_.instance.c_str(), info_.instance.length());
iox::capro::IdString_t event(iox::TruncateToCapacity,
info_.event.c_str(), info_.event.length());
server_.reset(new iox::popo::UntypedServer({service, instance, event}));
// std::cout << "server init, service: " << info_.service << ", instance: " << info_.instance << ", event: " << info_.event << std::endl;
spdlog::info("IceoryxWrapper::InitDummyServer, server: {}, instance: {}, event: {} ", info_.service, info_.instance, info_.event);
}
void IceoryxWrapper::Shutdown() {
spdlog::info("shutdown IceoryxWrapper");
if (servicetype_ == ServiceType::SERVER) {
spdlog::info("stop the server....");
// StopServer();
} else if (servicetype_ == ServiceType::CLIENT) {
StopClient();
spdlog::info("stop the client....");
} else if (servicetype_ == ServiceType::DUMMYSERVER) {
spdlog::info("stop the dummyserver, do nothing");
} else {
spdlog::info("unknown service type : {}", (int)servicetype_);
}
}
void IceoryxWrapper::StartServer() {
// 启动server
if (server_.get() == nullptr) {
std::cerr << "server is nullptr" << std::endl;
return;
}
spdlog::info("enter IceoryxWrapper::StartServer, bgein OnResponse");
running_ = true;
OnResponse();
spdlog::info("enter IceoryxWrapper::StartServer, end OnResponse");
}
// 暂时没有调用
void IceoryxWrapper::StartClient() {
// 启动client
InitClient();
}
void IceoryxWrapper::StopServer() {
kill(getpid(), SIGINT);
running_ = false;
}
void IceoryxWrapper::StopClient() {
intercept::internal::TerminalOpReqRes terminal;
spdlog::info("wait stop client, service: {}, instance: {}, event: {} client count: {}",
info_.service, info_.instance, info_.event, client_.use_count());
OnRequest(terminal);
}
// client: 这里组织请求并处理返回的响应
void IceoryxWrapper::OnRequest(PosixOpReqRes& reqRes) {
// 上游用户侧需要调用
// 假设我们直接将请求的响应数据复制回响应对象
int reqsize = reqRes.GetRequestSize();
int alignsize = reqRes.GetRequestAlignSize();
int64_t expectedResponseSequenceId = requestSequenceId_;
{
// intercept::common::Timer timer("client request");
client_->loan(reqsize, alignsize)
.and_then([&](auto& requestPayload) {
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
requestHeader->setSequenceId(requestSequenceId_);
expectedResponseSequenceId = requestSequenceId_;
requestSequenceId_ += 1;
char* request = static_cast<char*>(requestPayload);
const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload);
spdlog::info("to loan chunk in client, head info, chunksize: {}", chunkHeader->chunkSize());
reqRes.CopyRequestDataToBuf((void*)request);
client_->send(request).or_else(
[&](auto& error) { std::cout << "Could not send Request! Error: " << error << std::endl; });
})
.or_else([](auto& error) { std::cout << "Could not allocate Request! Error: " << error << std::endl; });
}
//! [take response]
{
// intercept::common::Timer timer("client response");
bool hasReceivedResponse{false};
do{
client_->take().and_then([&](const auto& responsePayload) {
auto responseHeader = iox::popo::ResponseHeader::fromPayload(responsePayload);
if (responseHeader->getSequenceId() == expectedResponseSequenceId)
{
const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(responsePayload);
spdlog::info("to release chunk in client, head info, type: {} typestr: {} , chunksize: {}", int(reqRes.GetOpType()), TypeToStr(reqRes.GetOpType()), chunkHeader->chunkSize());
reqRes.SetResponse((void*)responsePayload);
client_->releaseResponse(responsePayload);
// sleep(10);
const iox::mepoo::ChunkHeader* nowheader = iox::mepoo::ChunkHeader::fromUserPayload(responsePayload);
if (nowheader == nullptr) {
spdlog::error("the chunkheader is nullptr!!!!");
}
spdlog::info("chunkheader info, chunksize {}", nowheader->chunkSize());
// std::cout << "Got Response with expected sequence ID! -> continue" << std::endl;
}
else
{
spdlog::error("Got Response with outdated sequence ID! Expected = {}; Actual = {} ! -> skip",
expectedResponseSequenceId, responseHeader->getSequenceId());
}
hasReceivedResponse = true;
});
} while (!hasReceivedResponse);
}
}
// server: 这里获取、处理请求并返回响应结果
void IceoryxWrapper::OnResponse() {
auto lastRequestTime = std::chrono::steady_clock::now(); // 初始化上一次处理请求的时间戳
int intervalSeconds = intercept::common::Configure::getInstance().getConfig("waitRequestMaxSeconds") == "" ? 5 : std::stoi(intercept::common::Configure::getInstance().getConfig("waitRequestMaxSeconds"));
int trynumber = 0;
int getnum = 0;
int missnum = 0;
std::chrono::steady_clock::duration totalDuration = std::chrono::steady_clock::duration::zero(); // 总耗时
while (!iox::hasTerminationRequested() && running_) {
trynumber++;
if(trynumber > 2000000) {
// ! 注意的判断可能会导致某些连接过早被中断使得client无法正常响应
auto now = std::chrono::steady_clock::now(); // 获取当前时间
if (now - lastRequestTime > std::chrono::seconds(intervalSeconds)) { // 检查是否超过n秒无请求处理
spdlog::info("No request handled in the last {} seconds. Exiting loop.", intervalSeconds);
break;
}
}
server_->take().and_then([&](auto& requestPayload) {
auto begintime = std::chrono::steady_clock::now();
auto request = static_cast<const PosixOpRequest*>(requestPayload);
// std::cout << "request type: " << (int)request->opType << std::endl;
switch (request->opType) {
case PosixOpType::OPEN:
HandleOpenRequest(requestPayload);
break;
case PosixOpType::READ:
HandleReadRequest(requestPayload);
break;
case PosixOpType::WRITE:
HandleWriteRequest(requestPayload);
break;
case PosixOpType::CLOSE:
HandleCloseRequest(requestPayload);
break;
case PosixOpType::STAT:
HandleStatRequest(requestPayload);
break;
case PosixOpType::FSTAT:
HandleFstatRequest(requestPayload);
break;
case PosixOpType::FSYNC:
HandleFsyncRequest(requestPayload);
break;
case PosixOpType::LSEEK:
HandleLseekRequest(requestPayload);
break;
case PosixOpType::MKDIR:
HandleMkdirRequest(requestPayload);
break;
case PosixOpType::UNLINK:
HandleUnlinkRequest(requestPayload);
break;
case PosixOpType::OPENDIR:
HandleOpendirRequest(requestPayload);
break;
case PosixOpType::GETDENTS:
HandleGetdentsRequest(requestPayload);
break;
case PosixOpType::CLOSEDIR:
HandleClosedirRequest(requestPayload);
break;
case PosixOpType::RENAME:
HandleRenameRequest(requestPayload);
break;
case PosixOpType::TRUNCATE:
HandleTruncateRequest(requestPayload);
break;
case PosixOpType::TERMINAL:
HandleTerminalRequest(requestPayload);
break;
default:
spdlog::error("Unsupported request type: {}", (int)request->opType);
break;
}
// 更新最后处理请求的时间戳
lastRequestTime = std::chrono::steady_clock::now();
trynumber = 0; // 归零
getnum++;
totalDuration += (lastRequestTime - begintime);
}
);
// TODO: 如果不sleep 获取不到数据 待排查
// sleep(1);
}
std::cout << "exit Server OnResponse... " << info_.service << " " << info_.instance << " " << info_.event << std::endl;
// if (getnum > 0) {
// std::cout << "total request time: " << totalDuration.count() << " , average time : " << totalDuration.count()/ getnum << std::endl;
// }
}
void IceoryxWrapper::HandleOpenRequest(const auto& requestPayload) {
auto request = static_cast<const OpenRequestData*>(requestPayload);
spdlog::info("Open file request, path: {}, flags: {}, mode: {}", request->path, request->flags, request->mode);
// 这里可以调用posix open函数
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(OpenResponseData), alignof(OpenResponseData))
.and_then([&](auto& responsePayload) {
const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload);
spdlog::info("to loan chunk in server open , head info, chunksize: {}", chunkHeader->chunkSize());
auto response = static_cast<OpenResponseData*>(responsePayload);
response->opType = request->opType;
response->fd = fileSystem_->Open(request->path, request->flags, request->mode);
server_->send(responsePayload).or_else(
[&](auto& error) { std::cout << "Could not send Response! Error: " << error << std::endl; });
spdlog::info("open response info, the type: {}, the fd: {}", intercept::internal::TypeToStr(response->opType), response->fd );
})
.or_else(
[&](auto& error) { std::cout << "Could not allocate Open Response! Error: " << error << std::endl; });
const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload);
spdlog::info("to release chunk in server open , head info, chunksize: {}", chunkHeader->chunkSize());
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleReadRequest(const auto& requestPayload) {
auto request = static_cast<const ReadRequestData*>(requestPayload);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(ReadResponseData) + request->count, alignof(ReadResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<ReadResponseData*>(responsePayload);
response->opType = request->opType;
char* buf = (char*) response + sizeof(ReadResponseData);
const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload);
// spdlog::info("to loan chunk in server read , head info, chunksize: {} real size: {}", chunkHeader->chunkSize(), sizeof(ReadResponseData) + request->count);
if (intercept::common::Configure::getInstance().getConfig("multiop") == "true"
&& request->count >= atol(intercept::common::Configure::getInstance().getConfig("blocksize").c_str())) {
response->length = fileSystem_->MultiRead(request->fd, buf, request->count);
} else {
response->length = fileSystem_->Read(request->fd, buf, request->count);
}
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Read! Error: " << error << std::endl;});
spdlog::debug("read response, fd: {}, count: {}, read response info, the type: {}, the length: {}",
request->fd, request->count, intercept::internal::TypeToStr(response->opType), response->length);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Read Response! Error: " << error << std::endl; });
const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload);
// spdlog::info("to release chunk in server read , head info, chunksize: {}", chunkHeader->chunkSize());
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleWriteRequest(const auto& requestPayload) {
spdlog::debug("handle one write request");
auto request = static_cast<const WriteRequestData*>(requestPayload);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(WriteResponseData), alignof(WriteResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<WriteResponseData*>(responsePayload);
response->opType = request->opType;
if (intercept::common::Configure::getInstance().getConfig("multiop") == "true"
&& request->count >= atol(intercept::common::Configure::getInstance().getConfig("blocksize").c_str())) {
response->length = fileSystem_->MultiWrite(request->fd, request->content, request->count);
} else {
response->length = fileSystem_->Write(request->fd, request->content, request->count);
}
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Write! Error: " << error << std::endl;});
spdlog::debug("write response, fd: {}, count: {}, write response info, the type: {}, the length: {}",
request->fd, request->count, intercept::internal::TypeToStr(response->opType), response->length);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleCloseRequest(const auto& requestPayload) {
auto request = static_cast<const CloseRequestData*>(requestPayload);
spdlog::info("close request, fd: {}", request->fd);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(CloseResponseData), alignof(CloseResponseData))
.and_then([&](auto& responsePayload) {
const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload);
spdlog::info("to loan chunk in server close , head info, chunksize: {}", chunkHeader->chunkSize());
auto response = static_cast<CloseResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Close(request->fd);
spdlog::info("finish close, fd: {}", request->fd);
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Close! Error: " << error << std::endl;});
spdlog::info("close response info, the type: {}, the ret: {}", intercept::internal::TypeToStr(response->opType), response->ret);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload);
spdlog::info("to release chunk in server close , head info, chunksize: {}", chunkHeader->chunkSize());
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleFsyncRequest(const auto& requestPayload) {
auto request = static_cast<const FsyncRequestData*>(requestPayload);
spdlog::info("fsync reqeust, fd: {}", request->fd);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(FsyncResponseData), alignof(FsyncResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<FsyncResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Fsync(request->fd);
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;});
spdlog::info("fsync response info, ret: {}", response->ret);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleStatRequest(const auto& requestPayload) {
auto request = static_cast<const StatRequestData*>(requestPayload);
spdlog::info("stat request, pathname: {}", request->path);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(StatResponseData), alignof(StatResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<StatResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Stat(request->path, &(response->fileStat));
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;});
spdlog::info("stat response info, the ino: {}, size: {}, the ret: {}",
(int)response->fileStat.st_ino, response->fileStat.st_size, response->ret);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleFstatRequest(const auto& requestPayload) {
auto request = static_cast<const FstatRequestData*>(requestPayload);
spdlog::info("fstat request, fd: {}", request->fd);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(FstatResponseData), alignof(FstatResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<FstatResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Fstat(request->fd, &(response->fileStat));
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;});
spdlog::info("fstat response info, the ino: {}, size: {}, the ret: {}",
(int)response->fileStat.st_ino, response->fileStat.st_size, response->ret);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleLseekRequest(const auto& requestPayload) {
auto request = static_cast<const LseekRequestData*>(requestPayload);
spdlog::debug("lseek request, fd: {}, offset: {}", request->fd, request->offset);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(LseekResponseData), alignof(LseekResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<LseekResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Lseek(request->fd, request->offset, request->whence);
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;});
spdlog::debug("lseek response, ret: {}", response->ret);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleMkdirRequest(const auto& requestPayload) {
auto request = static_cast<const MkdirRequestData*>(requestPayload);
spdlog::info("mkdir request, pathname: {}, mode: {}", request->path, request->mode);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(MkdirResponseData), alignof(MkdirResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<MkdirResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Mkdir(request->path, request->mode);
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;});
spdlog::info("mkdir resposne, ret: {}", response->ret);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleGetdentsRequest(const auto& requestPayload) {
auto request = static_cast<const GetdentsRequestData*>(requestPayload);
int maxread = request->maxread;
maxread = 200; // 暂时读取目录下的200个文件否则分配会失败
spdlog::info("getdents request, fd: {}, the info: {}", request->dirinfo.fh, request->dirinfo.ino);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(GetdentsResponseData) + maxread * sizeof(dirent64), alignof(GetdentsResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<GetdentsResponseData*>(responsePayload);
response->opType = request->opType;
auto req = const_cast<GetdentsRequestData*>(request);
response->ret = fileSystem_->Getdents(&req->dirinfo, response->contents, maxread, &response->realbytes);
response->dirinfo = req->dirinfo;
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;});
spdlog::info("getdents response, ret: {}, thre realbytes: {}, the offset: {}",
response->ret, response->realbytes, response->dirinfo.offset);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleOpendirRequest(const auto&requestPayload) {
auto request = static_cast<const OpendirRequestData*>(requestPayload);
spdlog::info("opendir request, path: {}", request->path);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(OpendirResponseData), alignof(OpendirResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<OpendirResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Opendir(request->path, &response->dirStream);
server_->send(responsePayload).or_else(
[&](auto& error) { std::cout << "Could not send Response! Error: " << error << std::endl; });
spdlog::info("opendir response, the type: {}, the fd: {}", TypeToStr(response->opType), response->dirStream.fh);
})
.or_else(
[&](auto& error) { std::cout << "Could not allocate Open Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleClosedirRequest(const auto& requestPayload) {
auto request = static_cast<const ClosedirRequestData*>(requestPayload);
spdlog::info("closedir requset, fd: {}", request->dirstream.fh);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(ClosedirResponseData), alignof(ClosedirResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<ClosedirResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Closedir(const_cast<intercept::common::DirStream*>(&request->dirstream));
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Closedir! Error: " << error << std::endl;});
spdlog::info("closedir response, the type: {}, the ret: {}", TypeToStr(response->opType), response->ret );
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleUnlinkRequest(const auto& requestPayload) {
auto request = static_cast<const UnlinkRequestData*>(requestPayload);
spdlog::info("unlink reqeust, pathname: {}", request->path);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(UnlinkResponseData), alignof(UnlinkResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<UnlinkResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Unlink(request->path);
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;});
spdlog::info("unlink response, ret: ", response->ret);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleRenameRequest(const auto& requestPayload) {
auto request = static_cast<const RenameRequestData*>(requestPayload);
spdlog::info("rename request, oldpath: {}, newpath: {}", request->oldpath, request->newpath);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(RenameResponseData), alignof(RenameResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<RenameResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Rename(request->oldpath, request->newpath);
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;});
spdlog::info("rename response, ret: {}", response->ret);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleTruncateRequest(const auto& requestPayload) {
auto request = static_cast<const TruncateRequestData*>(requestPayload);
spdlog::info("truncate request, path: {}, length: {}", request->path, request->length);
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(TruncateResponseData), alignof(TruncateResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<TruncateResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = fileSystem_->Truncate(request->path, request->length);
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;});
spdlog::info("truncate response, ret: {}", response->ret);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; });
server_->releaseRequest(request);
}
void IceoryxWrapper::HandleTerminalRequest(const auto& requestPayload) {
auto request = static_cast<const TerminalRequestData*>(requestPayload);
spdlog::info("terminal request.");
auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload);
server_->loan(requestHeader, sizeof(TerminalResponseData), alignof(TerminalResponseData))
.and_then([&](auto& responsePayload) {
auto response = static_cast<TerminalResponseData*>(responsePayload);
response->opType = request->opType;
response->ret = 0;
running_ = false; // 终结退出
server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Terminal! Error: " << error << std::endl;});
const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(responsePayload);
spdlog::info("terminal response, ret: {}, pid: {}, tid: {}, loan chunk chunksize: {}",
response->ret, (unsigned int) getpid(), (unsigned int) pthread_self(), chunkHeader->chunkSize());
sleep(0.1);
}).or_else(
[&](auto& error) { std::cout << "Could not allocate Terminal Response! Error: " << error << std::endl; });
const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload);
spdlog::info("to release chunk in server terminal , head info, chunksize: {}", chunkHeader->chunkSize());
server_->releaseRequest(request);
}
} // namespace middleware
} // namespace intercept
int test() {
// std::string servicename = "MyService";
// std::unique_ptr<ReqResMiddlewareWrapper> middleware = std::make_unique<IceoryxWrapper>(servicename);
// AddClientService(servicename);
// WriteOpReqRes writeReqRes(1, "data".data(), 4, 0);
// int ret = middleware->OnRequest(writeReqRes);
// const auto& response = middleware->GetResponse(writeRequest);
// if (response.result >= 0) {
// std::cout << "Write operation successful!" << std::endl;
// } else {
// std::cout << "Write operation failed with error code: " << response.result << std::endl;
// }
return 0;
}

View File

@ -0,0 +1,76 @@
#pragma once
#include "req_res_middleware_wrapper.h"
#include "iceoryx_posh/popo/untyped_server.hpp"
#include "iceoryx_posh/popo/untyped_client.hpp"
namespace intercept {
namespace filesystem {
class AbstractFileSystem; // Forward declaration
}
}
namespace intercept {
namespace middleware {
class IceoryxWrapper : public ReqResMiddlewareWrapper {
public:
explicit IceoryxWrapper(const ServiceMetaInfo& info);
~IceoryxWrapper() override;
virtual void Init() override;
virtual void InitClient() override;
virtual void InitServer() override;
virtual void InitDummyServer() override;
virtual void StartServer();
virtual void StartClient();
virtual void StopServer() override;
virtual void StopClient() override;
virtual void OnRequest(PosixOpReqRes& reqRes) override;
virtual void OnResponse() override;
virtual void Shutdown() override;
virtual ServiceMetaInfo GetServiceMetaInfo() override {return info_;}
private:
void HandleOpenRequest(const auto& requestPayload);
void HandleReadRequest(const auto& requestPayload);
void HandleWriteRequest(const auto& requestPayload);
void HandleCloseRequest(const auto& requestPayload);
void HandleLseekRequest(const auto& requestPayload);
void HandleFsyncRequest(const auto& requestPayload);
void HandleStatRequest(const auto& requestPayload);
void HandleFstatRequest(const auto& requestPayload);
void HandleMkdirRequest(const auto& requestPayload);
void HandleOpendirRequest(const auto& requestPayload);
void HandleGetdentsRequest(const auto& requestPayload);
void HandleClosedirRequest(const auto& requestPayload);
void HandleUnlinkRequest(const auto& requestPayload);
void HandleRenameRequest(const auto& requestPayload);
void HandleTruncateRequest(const auto& requestPayload);
void HandleTerminalRequest(const auto& requestPayload);
private:
std::shared_ptr<iox::popo::UntypedServer> server_;
std::shared_ptr<iox::popo::UntypedClient> client_;
int64_t requestSequenceId_ = 0;
bool running_ = false;
};
} // namespace middleware
} // namespace intercept

View File

@ -0,0 +1,49 @@
#include <iostream>
#include "middleware/req_res_middleware_wrapper.h"
#ifndef CLIENT_BUILD
#include "filesystem/curve_filesystem.h"
#include "filesystem/s3fs_filesystem.h"
#include "filesystem/dummy_filesystem.h"
#endif
#include "filesystem/abstract_filesystem.h"
namespace intercept {
namespace middleware {
using intercept::common::Configure;
void ReqResMiddlewareWrapper::Init() {
}
void ReqResMiddlewareWrapper::InitServer() {
if (info_.serverType == "dummy") {
spdlog::info("dont create fileSystem in ReqResMiddlewareWrapper::InitServer");
return;
}
if (!fileSystem_) {
#ifndef CLIENT_BUILD
if (Configure::getInstance().getConfig("backendFilesystem") == "s3fs") {
fileSystem_.reset(new intercept::filesystem::S3fsFileSystem);
} else if (Configure::getInstance().getConfig("backendFilesystem") == "curvefs") {
fileSystem_.reset(new intercept::filesystem::CurveFileSystem);
} else if (Configure::getInstance().getConfig("backendFilesystem") == "dummyfs") {
fileSystem_.reset(new intercept::filesystem::DummyFileSystem);
} else {
spdlog::error("dont create fileSystem in ReqResMiddlewareWrapper::InitServer");
return;
}
fileSystem_->Init();
spdlog::info("Initserver, filesystem: {}", Configure::getInstance().getConfig("backendFilesystem"));
#endif
} else {
spdlog::info("ReqResMiddlewareWrapper::InitServer, have inited, donot need to init again");
}
}
void ReqResMiddlewareWrapper::InitClient() {
}
} // namespace middleware
} // namespace intercept

View File

@ -0,0 +1,80 @@
#pragma once
#include <memory>
#include "internal/posix_op_req_res.h"
#include "internal/metainfo.h"
namespace intercept {
namespace filesystem {
class AbstractFileSystem; // Forward declaration
}
}
namespace intercept
{
namespace middleware
{
using intercept::internal::ServiceMetaInfo;
using intercept::internal::PosixOpReqRes;
enum class ServiceType {
CLIENT = 0,
SERVER = 1,
DUMMYSERVER = 2,
};
class ReqResMiddlewareWrapper {
public:
ReqResMiddlewareWrapper() {
spdlog::info("construct ReqResMiddlewareWrapper");
}
ReqResMiddlewareWrapper(ServiceMetaInfo info) : info_(info) {
spdlog::info("construct ReqResMiddlewareWrapper");
}
virtual ~ReqResMiddlewareWrapper() {
spdlog::info("deconstruct ReqResMiddlewareWrapper");
}
virtual void Init();
virtual void InitClient();
virtual void InitServer();
virtual void SetServiceType(ServiceType type) {
servicetype_ = type;
}
virtual void InitDummyServer() {}
virtual void StartServer() = 0;
virtual void StartClient() = 0;
virtual void StopServer() = 0;
virtual void StopClient() = 0;
// 对外request接口
virtual void OnRequest(PosixOpReqRes& reqRes) = 0;
// 对外response接口
virtual void OnResponse() = 0;
virtual void Shutdown() = 0;
virtual ServiceMetaInfo GetServiceMetaInfo() = 0;
protected:
static std::shared_ptr<intercept::filesystem::AbstractFileSystem> fileSystem_;
ServiceMetaInfo info_;
ServiceType servicetype_;
};
} // namespace middleware
} // namespace intercept

View File

@ -0,0 +1,13 @@
# src/posix/CMakeLists.txt
file(GLOB POSIX_SOURCES *.cpp)
file(GLOB POSIX_HEADERS *.h)
add_library(intercept_posix_interface_client ${POSIX_SOURCES})
target_include_directories(intercept_posix_interface_client PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
)
target_link_libraries(intercept_posix_interface_client PUBLIC
intercept_registry_client
)
target_compile_options(intercept_posix_interface_client PUBLIC -DCLIENT_BUILD -fPIC)

View File

@ -0,0 +1,102 @@
/*
* Copyright 2016-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LIBSYSCALL_INTERCEPT_HOOK_POINT_H
#define LIBSYSCALL_INTERCEPT_HOOK_POINT_H
/*
* The inteface for using the intercepting library.
* This callback function should be implemented by
* the code using the library.
*
* The syscall_number, and the six args describe the syscall
* currently being intercepted.
* A non-zero return value means libsyscall_intercept
* should execute the original syscall, use its result. A zero return value
* means libsyscall_intercept should not execute the syscall, and
* use the integer stored to *result as the result of the syscall
* to be returned in RAX to libc.
*/
#ifdef __cplusplus
extern "C" {
#endif
extern int (*intercept_hook_point)(long syscall_number,
long arg0, long arg1,
long arg2, long arg3,
long arg4, long arg5,
long *result);
extern void (*intercept_hook_point_clone_child)(void);
extern void (*intercept_hook_point_clone_parent)(long pid);
/*
* syscall_no_intercept - syscall without interception
*
* Call syscall_no_intercept to make syscalls
* from the interceptor library, once glibc is already patched.
* Don't use the syscall function from glibc, that
* would just result in an infinite recursion.
*/
long syscall_no_intercept(long syscall_number, ...);
/*
* syscall_error_code - examines a return value from
* syscall_no_intercept, and returns an error code if said
* return value indicates an error.
*/
static inline int
syscall_error_code(long result)
{
if (result < 0 && result >= -0x1000)
return (int)-result;
return 0;
}
/*
* The syscall intercepting library checks for the
* INTERCEPT_HOOK_CMDLINE_FILTER environment variable, with which one can
* control in which processes interception should actually happen.
* If the library is loaded in this process, but syscall interception
* is not allowed, the syscall_hook_in_process_allowed function returns zero,
* otherwise, it returns one. The user of the library can use it to notice
* such situations, where the code is loaded, but no syscall will be hooked.
*/
int syscall_hook_in_process_allowed(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2021 NetEase Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Project: curve
* Created Date: Thur May 27 2021
* Author: xuchaojie
*/
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <string>
#include <unordered_map>
#include "posix_op.h"
#include "syscall_client.h"
// 仅用于联编
int help(int argc, char *argv[]) {
return 0;
}

View File

@ -0,0 +1,657 @@
#include <cstdlib>
#include <cstring>
#include <stdio.h>
#include <syscall.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <iostream>
#include <fstream>
#include <unordered_map>
#include <cstring>
#include <memory>
#include <atomic>
#include <unistd.h>
#include <random>
#include "common/common.h"
#include "posix_op.h"
#include "middleware/iceoryx_wrapper.h"
#include "registry/client_server_registry.h"
using intercept::internal::FileType;
struct PosixInfo {
std::string fileName;
FileType fileType;
uint64_t fd;
intercept::internal::DirStream dirinfo;
};
// key : 返回给上游的fd, value: 存储文件信息
std::unordered_map<int, PosixInfo> g_fdtofile(10000);
// 以BEGIN_COUNTER为起始值在map中保存避免fd从0开始与系统内部fd冲突
constexpr uint32_t BEGIN_COUNTER = 10000;
std::atomic<uint32_t> g_fdCounter(BEGIN_COUNTER);
std::chrono::steady_clock::duration totalDuration = std::chrono::steady_clock::duration::zero(); // 总耗时
int readnum = 0;
unsigned long g_processid = -1;
thread_local std::shared_ptr<intercept::middleware::ReqResMiddlewareWrapper> g_wrapper;
thread_local bool g_initflag = false;
std::mutex global_mutex;
thread_local struct ThreadCleanup {
ThreadCleanup() {
std::cout << "Thread cleanup object created\n";
}
~ThreadCleanup() {
std::cout << "Thread cleanup object destroyed\n";
}
} cleanup;
struct syscall_desc table[1000] = {
{0, 0, {argNone, argNone, argNone, argNone, argNone, argNone}}};
#define FUNC_NAME(name) PosixOp##name
#define REGISTER_CALL(sysname, funcname, ...) \
table[SYS_##sysname] = syscall_desc { \
#sysname, (FUNC_NAME(funcname)), { __VA_ARGS__, } \
}
// ---------------------------init and unint----------------------------------
int ThreadInit() {
// std::lock_guard<std::mutex> lock(global_mutex);
if (g_initflag == true) {
return 0;
}
std::stringstream ss;
auto myid = std::this_thread::get_id();
ss << myid;
std::string threadid = ss.str();
pthread_t tid = pthread_self();
pid_t pid = getpid();
if (g_processid == -1) {
// 进程级初始化
g_processid = (unsigned long)pid;
GlobalInit();
}
spdlog::warn("thread init, processid: {}, threadid: {}, flag id: {}",
(unsigned long) pid, (unsigned long)tid, g_initflag);
// sleep(10);
intercept::internal::ServiceMetaInfo info;
info.service = SERVICE_FLAG;
info.instance = INTERCEPT_INSTANCE_FLAG;
intercept::registry::ClientServerRegistry registry(ICEORYX, info);
auto dummyserver = registry.CreateDummyServer();
std::cout << "wait dummy server for client...." << std::endl;
sleep(5);
info = dummyserver->GetServiceMetaInfo();
info.service = SERVICE_FLAG;
info.instance = INTERCEPT_INSTANCE_FLAG;
g_wrapper = registry.CreateClient(info);
g_initflag = true;
return 0;
}
int GlobalInit() {
if (intercept::common::Configure::getInstance().loadConfig(intercept::common::CONFIG_FILE)) {
std::cout << "Config file loaded : " << intercept::common::CONFIG_FILE << std::endl;
} else {
std::cout << "Config file not loaded:" << intercept::common::CONFIG_FILE << std::endl;
return 0;
}
intercept::common::InitLog();
constexpr char BASE_APP_NAME[] = "iox-intercept-client";
std::string appNameWithRandom = BASE_APP_NAME + intercept::common::generateRandomSuffix();
iox::string<iox::MAX_RUNTIME_NAME_LENGTH> appname(iox::TruncateToCapacity, appNameWithRandom.c_str(), appNameWithRandom.length());
spdlog::info("create app name: {}", appNameWithRandom);
iox::runtime::PoshRuntime::initRuntime(appname);
return 0;
}
void UnInitPosixClient() {
}
// 初始化函数
static __attribute__((constructor)) void Init(void) {
printf("Library loaded: PID %d TID: %lu\n", getpid(), (unsigned long)pthread_self());
//GlobalInit();
}
// 退出函数
static __attribute__((destructor)) void Clean(void) {
// std::cout << "readnum: " << readnum << " , total time : " << totalDuration.count() << " , average time : " << totalDuration.count() / readnum << std::endl;
pthread_t tid = pthread_self();
pid_t pid = getpid();
std::cout << "exit and kill, pid:" << (unsigned long)pid
<< " threadid:" << (unsigned long) tid << std::endl;
//kill(getpid(), SIGINT);
//sleep(5);
}
// ---------------------------posix func----------------------------------------------
// 判断字符串是否以指定挂载点开头
bool StartsWithMountPath(const char *str) {
// 指定路径
const std::string mountpath = "/testdir";
//"/home/caiyi/shared_memory_code/iceoryx/iceoryx_examples/intercept/testdir";
size_t prefixLen = mountpath.length();
return strncmp(str, mountpath.c_str(), prefixLen) == 0;
}
std::string GetPath(const char* path) {
return "";
}
// 获取相对路径
std::string GetRelativeFilePath(const std::string& fullPath) {
size_t found = fullPath.find_last_of("/\\");
return fullPath.substr(found+1);
}
// 判断路径是否有效
bool IsValidPath(arg_type type, long arg0, long arg1) {
int fd = -1;
switch (type) {
case argFd:
fd = (int)arg0;
if (fd >= BEGIN_COUNTER &&
(g_fdtofile.empty() == false && g_fdtofile.count(fd)) > 0) {
return true;
} else {
return false;
}
case argCstr:
if (StartsWithMountPath(reinterpret_cast<const char *>(arg0))) {
return true;
} else {
// printf("cstr, not right filepath: %s\n", reinterpret_cast<const
// char*>(arg0));
return false;
}
case argAtfd:
if (StartsWithMountPath(reinterpret_cast<const char *>(arg1)) ||
(g_fdtofile.empty() == false && g_fdtofile.count((int)arg0)) > 0) {
return true;
} else {
// printf("atfd, not right filepath: %s\n", reinterpret_cast<const
// char*>(arg1));
return false;
}
case arg_:
return true;
default:
return false;
}
}
// 判断系统调用是否需要拦截
bool ShouldInterceptSyscall(const struct syscall_desc *desc, const long *args) {
return IsValidPath(desc->args[0], args[0], args[1]);
}
const struct syscall_desc *GetSyscallDesc(long syscallNumber,
const long args[6]) {
//char buffer[1024];
if (syscallNumber < 0 ||
static_cast<size_t>(syscallNumber) >=
sizeof(table) / sizeof(table[0]) ||
table[syscallNumber].name == NULL ||
ShouldInterceptSyscall(&table[syscallNumber], args) == false) {
return nullptr;
}
//sprintf(buffer, "right number:%ld, name:%s\n", syscallNumber, table[syscallNumber].name);
//printSyscall(buffer);
return table + syscallNumber;
}
uint32_t GetNextFileDescriptor() { return g_fdCounter.fetch_add(1); }
void InitSyscall() {
#ifdef __aarch64__
//REGISTER_CALL(access, Access, argCstr, argMode);
REGISTER_CALL(faccessat, Faccessat, argAtfd, argCstr, argMode);
//REGISTER_CALL(open, Open, argCstr, argOpenFlags, argMode);
REGISTER_CALL(close, Close, argFd);
REGISTER_CALL(openat, Openat, argAtfd, argCstr, argOpenFlags, argMode);
//REGISTER_CALL(creat, Creat, argCstr, argMode);
REGISTER_CALL(write, Write, argFd);
REGISTER_CALL(read, Read, argFd);
REGISTER_CALL(fsync, Fsync, argFd);
REGISTER_CALL(lseek, Lseek, argFd);
//REGISTER_CALL(stat, Stat, argCstr);
// for fstatat
REGISTER_CALL(newfstatat, Newfstatat, argAtfd, argCstr);
REGISTER_CALL(fstat, Fstat, argFd);
REGISTER_CALL(statx, Statx, argAtfd, argCstr);
//REGISTER_CALL(lstat, Lstat, argCstr);
//REGISTER_CALL(mkdir, MkDir, argCstr, argMode);
REGISTER_CALL(mkdirat, MkDirat, argAtfd, argCstr, argMode);
REGISTER_CALL(getdents64, Getdents64, argFd, argCstr, arg_);
//REGISTER_CALL(unlink, Unlink, argCstr);
REGISTER_CALL(unlinkat, Unlinkat, argAtfd, argCstr, argMode);
//REGISTER_CALL(rmdir, Rmdir, argCstr);
REGISTER_CALL(chdir, Chdir, argCstr);
REGISTER_CALL(utimensat, Utimensat, argAtfd, argCstr);
REGISTER_CALL(statfs, Statfs, argCstr);
REGISTER_CALL(fstatfs, Fstatfs, argFd);
REGISTER_CALL(truncate, Truncate, argCstr);
REGISTER_CALL(ftruncate, Ftruncate, argFd);
REGISTER_CALL(renameat, Renameat, argAtfd, argCstr);
#else
REGISTER_CALL(access, Access, argCstr, argMode);
REGISTER_CALL(faccessat, Faccessat, argAtfd, argCstr, argMode);
REGISTER_CALL(open, Open, argCstr, argOpenFlags, argMode);
REGISTER_CALL(close, Close, argFd);
REGISTER_CALL(openat, Openat, argAtfd, argCstr, argOpenFlags, argMode);
REGISTER_CALL(creat, Creat, argCstr, argMode);
REGISTER_CALL(write, Write, argFd);
REGISTER_CALL(read, Read, argFd);
REGISTER_CALL(fsync, Fsync, argFd);
REGISTER_CALL(lseek, Lseek, argFd);
REGISTER_CALL(stat, Stat, argCstr);
// for fstatat
REGISTER_CALL(newfstatat, Newfstatat, argAtfd, argCstr);
REGISTER_CALL(fstat, Fstat, argFd);
REGISTER_CALL(lstat, Lstat, argCstr);
REGISTER_CALL(mkdir, MkDir, argCstr, argMode);
REGISTER_CALL(getdents64, Getdents64, argFd, argCstr, arg_);
REGISTER_CALL(unlink, Unlink, argCstr);
REGISTER_CALL(unlinkat, Unlinkat, argAtfd, argCstr, argMode);
REGISTER_CALL(rmdir, Rmdir, argCstr);
REGISTER_CALL(chdir, Chdir, argCstr);
REGISTER_CALL(utimensat, Utimensat, argAtfd, argCstr);
REGISTER_CALL(statfs, Statfs, argCstr);
REGISTER_CALL(fstatfs, Fstatfs, argFd);
REGISTER_CALL(truncate, Truncate, argCstr);
REGISTER_CALL(ftruncate, Ftruncate, argFd);
REGISTER_CALL(rename, Rename, argCstr, argCstr);
#endif
}
int PosixOpAccess(const long *args, long *result) {
return 0;
}
int PosixOpFaccessat(const long *args, long *result) {
return PosixOpAccess(args + 1, result);
}
int PosixOpOpen(const long *args, long *result) {
ThreadInit();
const char* path = (const char*)args[0];
int flags = args[1];
mode_t mode = args[2];
if (flags & O_DIRECTORY) {
intercept::internal::OpendirOpReqRes req(path);
g_wrapper->OnRequest(req);
const auto& openRes = static_cast<intercept::internal::OpendirResponseData&> (req.GetResponse());
// 向上游返回的fd
*result = openRes.dirStream.fh + BEGIN_COUNTER;
// 记录打开的fd
PosixInfo info;
info.fd = *result;
info.dirinfo = openRes.dirStream;
info.fileType = FileType::DIR;
g_fdtofile[*result] = info;
std::cout << "the opendir result fd is: " << *result << std::endl;
} else {
intercept::internal::OpenOpReqRes req(path, flags, mode);
g_wrapper->OnRequest(req);
const auto& openRes = static_cast<intercept::internal::OpenResponseData&> (req.GetResponse());
// 向上游返回的fd
*result = openRes.fd + BEGIN_COUNTER;
// 记录打开的fd
PosixInfo info;
info.fd = *result;
info.fileType = FileType::FILE;
info.fileName = path;
g_fdtofile[*result] = info;
spdlog::info("the open result fd: {}, path: {}", *result, path);
}
return 0;
}
int PosixOpOpenat(const long *args, long *result) {
return PosixOpOpen(args + 1, result); // args[0] is dir fd, jump
}
int PosixOpCreat(const long *args, long *result) {
return 0;
}
int PosixOpRead(const long *args, long *result) {
ThreadInit();
int fd = args[0] - BEGIN_COUNTER;
char* buf = (char*)args[1];
int count = args[2];
const auto& info = g_fdtofile[fd];
std::string timeinfo = "client read, count: " + std::to_string(count) + " filename: " + info.fileName;
intercept::common::Timer timer(timeinfo);
intercept::internal::ReadOpReqRes readReq(fd, buf, count);
//intercept::common::Timer timer("client OnRequest");
g_wrapper->OnRequest(readReq);
const auto& readRes = static_cast<intercept::internal::ReadResponseData&> (readReq.GetResponse());
*result = readRes.length;
spdlog::debug("read fd: {}, length: {}", fd, readRes.length);
return 0;
}
int PosixOpWrite(const long *args, long *result) {
spdlog::debug("get write request...");
ThreadInit();
int fd = args[0] - BEGIN_COUNTER;
char* writebuf = (char*)args[1];
int count = args[2];
std::string timeinfo = "client write, count: " + std::to_string(count);
intercept::common::Timer timer(timeinfo);
intercept::internal::WriteOpReqRes writeReq(fd, writebuf, count);
g_wrapper->OnRequest(writeReq);
const auto& writeRes = static_cast<intercept::internal::WriteResponseData&> (writeReq.GetResponse());
*result = writeRes.length;
spdlog::debug("write fd: {}, length: {}", fd, writeRes.length);
return 0;
}
int PosixOpFsync(const long *args, long *result) {
ThreadInit();
int fd = args[0] - BEGIN_COUNTER;
spdlog::info("begin fsync, fd: {}", fd);
intercept::internal::FsyncOpReqRes fsyncReq(fd);
g_wrapper->OnRequest(fsyncReq);
const auto& fsyncRes = static_cast<intercept::internal::FsyncResponseData&> (fsyncReq.GetResponse());
*result = fsyncRes.ret;
spdlog::info("the fysnc result is: {}", *result);
return 0;
}
int PosixOpLseek(const long *args, long *result) {
ThreadInit();
int fd = args[0] - BEGIN_COUNTER;
long offset = args[1];
int whence = args[2];
intercept::internal::LseekOpReqRes lseekReq(fd, offset, whence);
g_wrapper->OnRequest(lseekReq);
const auto& lseekRes = static_cast<intercept::internal::LseekResponseData&> (lseekReq.GetResponse());
*result = lseekRes.ret;
// std::cout << "the lseek result is: " << *result << " , the offset: "<< offset << std::endl;
spdlog::debug("lseek, fd: {}, offset: {}, whence: {}, result: {}", fd, offset, whence, *result);
return 0;
}
int PosixOpStat(const long *args, long *result) {
ThreadInit();
spdlog::debug("it is opstat...");
const char* filename = (const char*) args[0];
struct stat* statbuf = (struct stat*) args[1];
intercept::internal::StatOpReqRes statReq(filename, statbuf);
g_wrapper->OnRequest(statReq);
const auto& statRes = static_cast<intercept::internal::StatResponseData&> (statReq.GetResponse());
// 向上游返回的fd
*result = statRes.ret;
spdlog::debug("the stat result fd: {}", *result);
return 0;
}
int PosixOpNewfstatat(const long *args, long *result) {
std::cout << "newfstatat" << std::endl;
// TODO: 以args[0]为起点找到args[1]路径
int ret = 0;
if (strlen((char*)args[1]) == 0) {
// 空目录
long newargs[4];
newargs[0] = args[0];
newargs[1] = args[2];
return PosixOpFstat(newargs, result);
}
return PosixOpStat(args + 1, result);
}
int PosixOpLstat(const long *args, long *result) {
std::cout << "call PosixOpLstat" << std::endl;
return PosixOpStat(args, result);
}
int PosixOpFstat(const long *args, long *result) {
ThreadInit();
spdlog::debug("it is opfstat...");
int fd = args[0] - BEGIN_COUNTER;
struct stat* statbuf = (struct stat*) args[1];
intercept::internal::FstatOpReqRes statReq(fd, statbuf);
g_wrapper->OnRequest(statReq);
const auto& statRes = static_cast<intercept::internal::FstatResponseData&> (statReq.GetResponse());
// 向上游返回的fd
*result = statRes.ret;
spdlog::debug("the fstat result fd: {}, the stat ino: {}, size: {}",
fd, statbuf->st_ino, statbuf->st_size);
return 0;
}
int PosixOpFstat64(const long *args, long *result) {
std::cout << "it is opfstat64" << std::endl;
return 0;
}
int PosixOpStatx(const long *args, long *result) {
ThreadInit();
std::cout << "it is opstatx" << std::endl;
const char* filename = (const char*) args[1];
struct statx* fileStat = (struct statx*) args[4];
struct stat tmpStat;
intercept::internal::StatOpReqRes statReq(filename, &tmpStat);
g_wrapper->OnRequest(statReq);
const auto& statRes = static_cast<intercept::internal::StatResponseData&> (statReq.GetResponse());
if (statRes.ret != 0 ) {
std::cout << "get stat failed.." << std::endl;
}
*result = statRes.ret;
// inode number
fileStat->stx_ino = (uint64_t)tmpStat.st_ino;
// total size, in bytes
fileStat->stx_size = (uint64_t)tmpStat.st_size;
// protection
fileStat->stx_mode = (uint32_t)tmpStat.st_mode;
// number of hard links
fileStat->stx_nlink = (uint32_t)tmpStat.st_nlink;
// user ID of owner
fileStat->stx_uid = (uint32_t)tmpStat.st_uid;
// group ID of owner
fileStat->stx_gid = (uint32_t)tmpStat.st_gid;
// last access time
fileStat->stx_atime.tv_sec = tmpStat.st_atim.tv_sec;
fileStat->stx_atime.tv_nsec = tmpStat.st_atim.tv_nsec;
// last modification time
fileStat->stx_mtime.tv_sec = tmpStat.st_mtim.tv_sec;
fileStat->stx_mtime.tv_nsec = tmpStat.st_mtim.tv_nsec;
// last status change time
fileStat->stx_ctime.tv_sec = tmpStat.st_ctim.tv_sec;
fileStat->stx_ctime.tv_nsec = tmpStat.st_ctim.tv_nsec;
// 示意性地为stx_attributes设置一个默认值实际上这需要更具体的场景考虑
fileStat->stx_attributes = 0; // 假设没有额外的属性
// stx_attributes_mask通常和stx_attributes一起使用表示希望查询或设置哪些属性
fileStat->stx_attributes_mask = 0; // 示意性地设置,可能需要根据场景具体调整
return 0;
}
int PosixOpClose(const long *args, long *result) {
if (g_fdtofile.find((int)args[0]) == g_fdtofile.end()) {
std::cout << "fd not found: " << args[0] << std::endl;
}
const auto& info = g_fdtofile[(int)args[0]];
if (info.fileType == FileType::FILE) {
int fd = args[0] - BEGIN_COUNTER;
intercept::internal::CloseOpReqRes req(fd);
spdlog::info("begin close, fd: {}", fd);
g_wrapper->OnRequest(req);
const auto& closeRes = static_cast<intercept::internal::CloseResponseData&> (req.GetResponse());
// 向上游返回的fd
*result = closeRes.ret;
spdlog::info("the close result, fd: {}", fd);
} else if (info.fileType == FileType::DIR) {
int fd = args[0] - BEGIN_COUNTER;
intercept::internal::ClosedirOpReqRes req(info.dirinfo);
g_wrapper->OnRequest(req);
const auto& closeRes = static_cast<intercept::internal::CloseResponseData&> (req.GetResponse());
// 向上游返回的fd
*result = closeRes.ret;
std::cout << "the closedir result fd is: " << fd << std::endl;
} else {
std::cout << "unknown file type for close" << std::endl;
}
g_fdtofile.erase((int)args[0]);
return 0;
}
int PosixOpMkDir(const long *args, long *result) {
ThreadInit();
const char* path = (const char*) args[0];
mode_t mode = args[1];
intercept::internal::MkdirOpReqRes req(path, mode);
g_wrapper->OnRequest(req);
const auto& mkdirRes = static_cast<intercept::internal::MkdirResponseData&> (req.GetResponse());
// 向上游返回的fd
*result = mkdirRes.ret;
std::cout << "the mkdir result fd is: " << *result << std::endl;
return 0;
}
int PosixOpMkDirat(const long *args, long *result) {
// 直接按照绝对路径处理
return PosixOpMkDir(args + 1, result);
}
int PosixOpOpenDir(const long *args, long *result) {
std::cout << "open dir....." << std::endl;
return 0;
}
int PosixOpGetdents64(const long *args, long *result) {
int fd = args[0] - BEGIN_COUNTER;
char* data = (char*)args[1];
size_t maxread = args[2];
if (g_fdtofile.find(args[0]) == g_fdtofile.end()) {
std::cout << "fd not found" << std::endl;
*result = 0;
return 0;
}
std::cout << "getdents request, fd: " << fd << " maxread: " << maxread << std::endl;
PosixInfo& posixinfo = g_fdtofile[args[0]];
intercept::internal::GetdentsOpReqRes req(posixinfo.dirinfo, data, maxread);
g_wrapper->OnRequest(req);
const auto& getdentsRes = static_cast<intercept::internal::GetdentsResponseData&> (req.GetResponse());
posixinfo.dirinfo.offset = getdentsRes.dirinfo.offset;
*result = getdentsRes.realbytes;
std::cout << "the getdents result bytes:" << getdentsRes.realbytes << ", offset is: " << getdentsRes.dirinfo.offset << std::endl;
return 0;
}
int PosixOpRmdir(const long *args, long *result) {
std::cout << "rmdir, call thePosixOpUnlink " << std::endl;
PosixOpUnlink(args, result);
return 0;
}
int PosixOpChdir(const long *args, long *result) {
return 0;
}
int PosixOpUnlink(const long *args, long *result) {
const char* path = (const char*) args[0];
intercept::internal::UnlinkOpReqRes req(path);
g_wrapper->OnRequest(req);
const auto& unlinkRes = static_cast<intercept::internal::UnlinkResponseData&> (req.GetResponse());
// 向上游返回的fd
*result = unlinkRes.ret;
std::cout << "the unlink path: " << path << " ,result fd is: " << *result << std::endl;
return 0;
}
int PosixOpUnlinkat(const long *args, long *result) {
const char *filename = (const char *)args[1];
int flags = args[2];
if (flags & AT_REMOVEDIR) {
// 删除目录
std::cout << "unlinkat remove dir..." << std::endl;
PosixOpRmdir(args + 1, result);
return 0;
}
int ret = 0;
// 暂不支持从指定位置开始删除
ret = PosixOpUnlink(args + 1, result);
std::cout << "unlinkat... ret: " << ret << std::endl;
return ret;
}
int PosixOpUtimensat(const long* args, long *result) {
int dirfd = args[0];
return 0;
}
int PosixOpExitgroup(const long* args, long *result) {
return 0;
}
int PosixOpStatfs(const long* args, long *result) {
return 0;
}
int PosixOpFstatfs(const long* args, long *result) {
return 0;
}
int PosixOpTruncate(const long* args, long *result) {
const char* path = (const char*) args[0];
off_t length = args[1];
intercept::internal::TruncateOpReqRes req(path, length);
g_wrapper->OnRequest(req);
const auto& truncateRes = static_cast<intercept::internal::TruncateResponseData&> (req.GetResponse());
// 向上游返回的fd
*result = truncateRes.ret;
std::cout << "the truncate path: " << path << " ,result fd is: " << *result << std::endl;
return 0;
}
int PosixOpFtruncate(const long* args, long *result) {
return 0;
}
int PosixOpRename(const long *args, long *result) {
return 0;
}
int PosixOpRenameat(const long *args, long *result) {
// 假设都从根目录开始
const char *oldpath = (const char *)args[1];
const char* newpath = (const char*)args[3];
intercept::internal::RenameOpReqRes req(oldpath, newpath);
g_wrapper->OnRequest(req);
const auto& renameRes = static_cast<intercept::internal::RenameResponseData&> (req.GetResponse());
// 向上游返回的fd
*result = renameRes.ret;
std::cout << "the rename path: " << oldpath << " ,result fd is: " << *result << std::endl;
return 0;
}

493
intercept/posix/posix_op.h Normal file
View File

@ -0,0 +1,493 @@
#ifndef CURVEFS_SRC_CLIENT_CURVE_POSIX_OP_H_
#define CURVEFS_SRC_CLIENT_CURVE_POSIX_OP_H_
#include <string>
#include <unordered_map>
// #include "curvefs/src/client/filesystem/meta.h"
// using ::curvefs::client::filesystem::PosixFile;
// extern std::unordered_map<int, PosixFile*> g_fdtofile;
typedef int (*syscallFunction_t)(const long *args, long *result);
enum arg_type {
argNone,
argFd,
argAtfd,
argCstr,
argOpenFlags,
argMode,
arg_ /* no special formatting implemented yet, print as hex number */
};
struct syscall_desc {
const char *name;
syscallFunction_t syscallFunction;
enum arg_type args[6];
};
extern struct syscall_desc table[1000];
bool ShouldInterceptSyscall(const struct syscall_desc* desc, const long* args);
void InitSyscall();
const struct syscall_desc* GetSyscallDesc(long syscallNumber, const long args[6]);
bool StartsWithMountPath(const char* str);
int GlobalInit();
void UnInitPosixClient();
#ifdef __cplusplus
extern "C" {
#endif
/**
* The access() function is used to check the permissions of a file or directory.
*
* @param args[0] const char* path The path name of the file or directory to be checked.
* @param args[1] int: mode The mode specifies the desired permissions to be verified, and can be a combination of the following constants using bitwise OR:
* - R_OK: Check if the file or directory is readable.
* - W_OK: Check if the file or directory is writable.
* - X_OK: Check if the file or directory is executable.
* - F_OK: Check if the file or directory exists.
* @return If the file or directory has the specified permissions (or exists), it returns 0. Otherwise, it returns -1 (with an errno error code set).
*/
int PosixOpAccess(const long *args, long *result);
/**
* The faccessat() function is used to check the permissions of a file or directory relative to a specified directory file descriptor.
*
* @param args[0] int: dirfd The file descriptor of the directory from which the path is relative.
* @param args[1] const char* pathname The relative path name of the file or directory to be checked.
* @param args[2] int The mode specifies the desired permissions to be verified, and can be a combination of the following constants using bitwise OR:
* - R_OK: Check if the file or directory is readable.
* - W_OK: Check if the file or directory is writable.
* - X_OK: Check if the file or directory is executable.
* - F_OK: Check if the file or directory exists.
* @param args[3] int Flags for controlling how the function operates, such as AT_SYMLINK_NOFOLLOW to not follow symbolic links.
* @return If the file or directory has the specified permissions (or exists), it returns 0. Otherwise, it returns -1 (with an errno error code set).
*/
int PosixOpFaccessat(const long *args, long *result);
/**
* Open a file
*
* Opens the file specified by 'path' with the given 'flags'.
* The 'flags' parameter provides information about the access mode
* (read, write, read-write) and other options for opening the file.
*
* args[0]: path The path of the file to be opened
* args[1]: flags The flags controlling the file open operation
* args[2]: mode The mode for accessing file, only be used for creating new file
* result: The file descriptor on success, or -1 on failure with errno set
*/
int PosixOpOpen(const long *args, long *result);
int PosixOpOpenat(const long *args, long *result);
/**
* Creates a new file or truncates an existing file.
*
* args[0] pathname The path to the file to be created.
* args[1] mode The permissions to be set for the newly created file.
*
* result: On success, the file descriptor for the newly created file is returned.
* On error, -1 is returned, and errno is set appropriately.
*/
int PosixOpCreat(const long *args, long *result);
/**
* Read data from a file
*
* Reads up to 'count' bytes from the file associated with the file
* descriptor 'fd' into the buffer pointed to by 'buf',
* The actual number of bytes read is returned.
*
* args[0]: int fd: The file descriptor of the file to read from
* args[1]: void* buf: The buffer to store the read data
* args[2]: size_t count: The maximum number of bytes to read
* result: The number of bytes read on success, or -1 on failure with errno set
*/
int PosixOpRead(const long *args, long *result);
/**
* Read data from a file
*
* Reads up to 'count' bytes from the file associated with the file
* descriptor 'fd' into the buffer pointed to by 'buf', starting at
* the specified 'offset'. The actual number of bytes read is returned.
*
* args[0] int fd: The file descriptor of the file to read from
* args[1] void* buf: The buffer to store the read data
* args[2] size_t count: The maximum number of bytes to read
* args[3] off_t offset: The offset within the file to start reading from
* result: The number of bytes read on success, or -1 on failure with errno set
*/
int PosixOpPread(const long *args, long *result);
/**
* Write data to a file
*
* Writes up to 'count' bytes from the buffer pointed to by 'buf'
* to the file associated with the file descriptor 'fd'.
* The actual number of bytes written is returned.
*
* args[0] int fd: The file descriptor of the file to write to
* args[1] const void* buf: The buffer containing the data to be written
* args[2] size_t count: The number of bytes to write
* result: The number of bytes written on success, or -1 on failure with errno set
*/
int PosixOpWrite(const long *args, long *result);
/**
* Write data to a file
*
* Writes up to 'count' bytes from the buffer pointed to by 'buf'
* to the file associated with the file descriptor 'fd', starting at
* the specified 'offset'. The actual number of bytes written is returned.
*
* args[0] int fd: The file descriptor of the file to write to
* args[1] const void* buf: The buffer containing the data to be written
* args[2] size_t count: The number of bytes to write
* args[3] off_t offset: The offset within the file to start writing to
* result: The number of bytes written on success, or -1 on failure with errno set
*/
int PosixOpPwrite(const long *args, long *result);
/**
* Sets the current read/write position of a file descriptor.
*
* args[0] int fd: The file descriptor representing the file.
* args[1] off_t offset: The offset relative to the 'whence' position.
* args[2] int whence: The reference position for calculating the offset:
* - SEEK_SET: Calculates from the beginning of the file.
* - SEEK_CUR: Calculates from the current position.
* - SEEK_END: Calculates from the end of the file.
*
* result The new offset of the file, or -1 if an error occurs.
*/
int PosixOpLseek(const long *args, long *result);
/**
* Close a file
*
* args[0] int fd: The file descriptor of the file to close
* result: 0 on success, or -1 on failure with errno set
*/
int PosixOpClose(const long *args, long *result);
/**
* Create a directory.
*
* args[0] const char* name: Name of the directory to create
* args[1] mode_t mode: Mode with which to create the new directory
* result: 0 on success, -1 on failure
*/
int PosixOpMkDir(const long *args, long *result);
/**
* mkdirat - create a new directory relative to a directory file descriptor
* @dirfd: the file descriptor of the base directory
* @pathname: the pathname of the new directory to be created
* @mode: the permissions to be set for the new directory
*
* Returns: 0 on success, or -1 on failure
*/
int PosixOpMkDirat(const long *args, long *result);
/**
* Open a directory
*
* @args[0] const char* name: dirname The path to the directory you want to open.
*
* @result: If successful, returns a pointer to a DIR structure that can be
* used for subsequent directory operations. If there's an error,
* it returns NULL, and you can use the errno variable to check the
* specific error.
*/
int PosixOpOpenDir(const long *args, long *result);
/**
* Read directory entries from a directory file descriptor.
*
* @args[0]: fd File descriptor of the directory to read.
* @args[1]: dirp Pointer to a buffer where the directory entries will be stored.
* @args[2]: count The size of the buffer `dirp` in bytes.
*
* @result: realbytes, On success, returns the number of bytes read into the buffer `dirp`.
* On error, returns -1 and sets the appropriate errno.
*/
//ssize_t PosixOpGetdents64(int fd, struct linux_dirent64 *dirp, size_t count);
int PosixOpGetdents64(const long *args, long *result);
/**
* Deletes a directory, which must be empty.
*
*
* args[0] const char* name: Name of the directory to remove
* result: 0 on success, -1 on failure
*/
int PosixOpRmdir(const long *args, long *result);
/**
A function to change the current working directory of the calling process.
@param args - A pointer to a null-terminated string specifying the path to the new working directory
@param result - A pointer to an integer where the result of the operation will be stored.
On successful completion, 0 will be returned.
In case of failure, a non-zero value is returned.
@return - On successful completion, the function should return 0.
If the function encounters an error, it will return -1 and set errno accordingly.
*/
int PosixOpChdir(const long *args, long *result);
/**
* Rename a file
*
* Renames the file specified by 'oldpath' to 'newpath'.
* If 'newpath' already exists, it should be replaced atomically.
* If the target's inode's lookup count is non-zero, the file system
* is expected to postpone any removal of the inode until the lookup
* count reaches zero.s
*
* args[0] const char* oldpath: The path of the file to be renamed
* args[1] const char* newpath: The new path of the file
* result: 0 on success, or -1 on failure with errno set
*/
int PosixOpRename(const long *args, long *result);
/*
* Renameat renames a file, moving it between directories if required.
*
* args[0] int olddirfd: The file descriptor of the directory containing the file to be renamed
* args[1] const char* oldpath: The path of the file to be renamed
* args[2] int newdirfd: The file descriptor of the directory containing the new path of the file
* args[3] const char* newpath: The new path of the file
* result: 0 on success, or -1 on failure with errno set
*
*/
int PosixOpRenameat(const long *args, long *result);
/**
* Get pathname attributes.
*
* args[0] const char* pathname: The path name
* args[1] struct stat* attr: Pointer to struct stat to store the file attributes
* result: 0 on success, -1 on failure
*/
int PosixOpStat(const long *args, long *result);
/**
* Get file attributes.
*
* args[0] int fd: file descriptor
* args[1] struct stat* attr: Pointer to struct stat to store the file attributes
* result: 0 on success, -1 on failure
*/
int PosixOpFstat(const long *args, long *result);
/**
* Get file status relative to a directory file descriptor
* args[0] int dirfd
* args[1] pathname
* args[2] struct stat* buf
* args[3] flags :can either be 0, or include one or more of the following flags ORed:
* AT_EMPTY_PATH AT_NO_AUTOMOUNT AT_SYMLINK_NOFOLLOW
*/
int PosixOpNewfstatat(const long *args, long *result);
/**
* Get file status information for a symbolic link or file.
*
* args[0] const char* pathname The path to the symbolic link or file.
* args[1] struct stat* statbuf A pointer to a struct stat object where the file status
* information will be stored.
*
* result: On success, 0 is returned. On error, -1 is returned, and errno is
* set appropriately. If the symbolic link is encountered and the
* 'pathname' argument refers to a symbolic link, then the 'statbuf'
* parameter will contain information about the link itself rather
* than the file it refers to.
*/
int PosixOpLstat(const long *args, long *result);
/*
Obtain file status information.
Parameters:
- args[0] dirfd: A file descriptor referring to the directory in which the file resides.
Use AT_FDCWD to refer to the current working directory.
- args[1] pathname: The path to the file whose status information is to be retrieved.
- args[2] flags: Flags controlling the behavior of the call.
- args[3] mask: Mask specifying which fields in the returned 'statx' structure should be populated.
- args[4] statxbuf: Pointer to the 'statx' structure where the retrieved status information is stored.
Return Value:
- On success, returns 0. The 'statxbuf' structure contains the requested file status information.
- On failure, returns -1 and sets errno to indicate the error.
*/
int PosixOpStatx(const long *args, long *result);
/**
* Creates a symbolic link.
*
* args[0] const char* target: The target file or directory that the symbolic link should point to.
* args[1] const cahr* linkpath: The path and name of the symbolic link to be created.
*
* result: On success, 0 is returned. On error, -1 is returned, and errno is
* set appropriately.
*/
int PosixOpSymlink(const long *args, long *result);
/**
* Create a hard link
*
* Creates a hard link between the file specified by 'oldpath'
* and the 'newpath'.
*
* args[0] const char* oldpath: The path of the existing file
* args[1] const char* newpath: The path of the new link to be created
* result: 0 on success, or -1 on failure with errno set
*/
void PosixOpLink(const long *args, long *result);
/**
* Deletes a file by removing its directory entry.
*
* args[0] const char* pathname: The path to the file to be deleted.
*
* result: On success, 0 is returned. On error, -1 is returned, and errno is
* set appropriately.
*/
int PosixOpUnlink(const long *args, long *result);
/*
* Deletes a specified file or directory at a given path
*
* args[0] dirfd: A file descriptor representing the directory in which to perform the unlinkat operation.
* Typically, you can use AT_FDCWD to indicate the current working directory.
* This parameter specifies the base directory for the operation.
* args[1] pathname: The path to the file to be removed. It can be either a relative or absolute path,
* depending on the setting of dirfd.
* args[2] flags: An integer value used to control the behavior of the unlinkat operation.
* You can use flags to influence the operation. Common flags include 0 (default behavior)
* and AT_REMOVEDIR (to remove a directory instead of a file).
* result: On success, returns 0, indicating the successful removal of the file or directory.
* On failure, returns -1 and sets the global variable errno to indicate the type of error.
*/
int PosixOpUnlinkat(const long *args, long *result);
/**
* Synchronize the file data and metadata to disk.
*
* arg[0] int fd The file descriptor associated with the file.
*
* result: On success, the function should return 0. On error, it should
* return a negative value,
*/
int PosixOpFsync(const long* args, long *result);
/*
* int utimensat(int dirfd, const char *pathname, const struct timespec *times, int flags);
*
* args[0] dirfd:The file descriptor of the directory containing the file or directory to be modified.
* If dirfd is AT_FDCWD, then the current working directory is used.
*
* args[1] pathname: The path to the file or directory to be modified.
*
* args[2] times: A pointer to a structure containing the new access and modification times for the file or directory.
* If times is NULL, then the current time is used for both times.
*
* args[3] flags: A bitwise OR of flags that modify the behavior of the call.
* See the `man utimensat` page for a list of supported flags.
*
* result: 0 on success; -1 on error, with errno set to the error number.
*/
int PosixOpUtimensat(const long* args, long *result);
/**
* Terminate all threads in a process and exit.
*
* This system call terminates all threads in the calling process and
* causes the process to exit. The exit status of the process is
* specified by the parameter "status".
*
* args[0] status The exit status of the process.
*/
int PosixOpExitgroup(const long* args, long *result);
/**
* statfs() - Get filesystem statistics
*
* @param args[0] path The path to the filesystem to query.
* @param args[1] buf A pointer to a statfs structure to store the results.
*
* @return 0 on success, or a negative error code on failure.
*
*/
int PosixOpStatfs(const long* args, long *result);
/**
* fstatfs() - Get filesystem statistics for a file descriptor
*
* @param args[0] fd The file descriptor of the filesystem to query.
* @param args[1] buf A pointer to a statfs structure to store the results.
*
* @return 0 on success, or a negative error code on failure.
*/
int PosixOpFstatfs(const long* args, long *result);
/**
* @brief Truncate a file to the specified length.
*
* This function truncates the file specified by the given path to the specified
* length. If the file is larger than the specified length, it is truncated to
* the specified size; if it is smaller, it is extended and filled with zeros.
*
* @param args[0] path: The path to the file to be truncated.
* @param args[1] length:The desired length to which the file should be truncated.
*
* @return On success, returns 0. On failure, returns -1, and sets errno to indicate
* the error type.
*/
int PosixOpTruncate(const long* args, long *result);
/**
* @brief Truncate a file opened with the specified file descriptor to the specified length.
*
* This function truncates the file associated with the given file descriptor to the
* specified length. If the file is larger than the specified length, it is truncated;
* if it is smaller, it is extended and filled with zeros.
*
* @param args[0] :fd The file descriptor of the file to be truncated.
* @param args[1]: length The desired length to which the file should be truncated.
*
* @return On success, returns 0. On failure, returns -1, and sets errno to indicate
* the error type.
*/
int PosixOpFtruncate(const long* args, long *result);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // CURVEFS_SRC_CLIENT_CURVE_POSIX_OP_H_

View File

@ -0,0 +1,44 @@
#ifndef CURVEFS_SRC_CLIENT_SYSCALL_CLIENT_
#define CURVEFS_SRC_CLIENT_SYSCALL_CLIENT_
#include <stddef.h>
#include <string.h>
#include <syscall.h>
#include <stdio.h>
#include "posix/libsyscall_intercept_hook_point.h"
//#include "syscall_interception.h"
#include "posix/posix_op.h"
#include <iostream>
#include <string>
#include <unordered_map>
// 拦截函数
static int hook(long syscallNumber,
long arg0, long arg1,
long arg2, long arg3,
long arg4, long arg5,
long* result) {
long args[6] = {arg0, arg1, arg2, arg3, arg4, arg5};
const struct syscall_desc* desc = GetSyscallDesc(syscallNumber, args);
if (desc != nullptr) {
int ret = desc->syscallFunction(args, result);
//return 0; // 接管
return ret;
}
return 1; // 如果不需要拦截返回1
}
// 初始化函数
static __attribute__((constructor)) void start(void) {
InitSyscall();
intercept_hook_point = &hook;
}
#endif

View File

@ -0,0 +1,40 @@
# src/registry/CMakeLists.txt
find_library(ICEORYX_POSH_LIB NAMES iceoryx_posh PATHS ../../thirdparties/iceoryx/lib)
find_library(ICEORYX_HOOFS_LIB iceoryx_hoofs PATHS ../thirdparties/iceoryx/lib)
find_library(ICEORYX_PLATFORM_LIB iceoryx_platform PATHS ../thirdparties/iceoryx/lib)
file(GLOB REGISTRY_SOURCES *.cpp)
file(GLOB REGISTRY_HEADERS *.h)
add_library(intercept_registry ${REGISTRY_SOURCES})
target_include_directories(intercept_registry PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include
)
target_link_libraries(intercept_registry PUBLIC
intercept_middleware
intercept_discovery
${ICEORYX_HOOFS_LIB}
${ICEORYX_PLATFORM_LIB}
${ICEORYX_POSH_LIB}
)
file(GLOB CLIENT_REGISTRY_SOURCES *.cpp)
file(GLOB CLIENT_REGISTRY_HEADERS *.h)
add_library(intercept_registry_client ${CLIENT_REGISTRY_SOURCES})
target_include_directories(intercept_registry_client PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include
)
target_link_libraries(intercept_registry_client PUBLIC
intercept_middleware_client
intercept_discovery_client
${ICEORYX_POSH_LIB}
${ICEORYX_HOOFS_LIB}
${ICEORYX_PLATFORM_LIB}
-lrt
)
target_compile_options(intercept_registry_client PUBLIC -DCLIENT_BUILD )

View File

@ -0,0 +1,169 @@
#include <string>
#include <ctime>
#include <cstdlib>
#include "middleware/iceoryx_wrapper.h"
#include "client_server_registry.h"
namespace intercept {
namespace registry {
using intercept::discovery::IceoryxDiscovery;
using intercept::middleware::IceoryxWrapper;
std::string generateRandomString(int length) {
std::string result;
const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
srand(time(0)); // 初始化随机数生成器
for (int i = 0; i < length; i++) {
int randomIndex = rand() % strlen(charset);
result += charset[randomIndex];
}
return result;
}
ClientServerRegistry::ClientServerRegistry(const std::string& middlewareType, const ServiceMetaInfo& info) {
// 根据middlewareType创建对应的ServiceDiscovery
discovery_ = std::make_shared<IceoryxDiscovery>();
serviceInfo_ = info;
middlewareType_ = middlewareType;
spdlog::info("ClientServerRegistry init");
}
ClientServerRegistry::~ClientServerRegistry() {
spdlog::info("ClientServerRegistry destory");
}
// 在用户侧创建dummpserver
std::shared_ptr<ReqResMiddlewareWrapper> ClientServerRegistry::CreateDummyServer() {
std::string dummpyserver = "dummy_server";
ServiceMetaInfo info;
info.service = SERVICE_FLAG;
info.instance = DUMMY_INSTANCE_FLAG;
pid_t pid = getpid();
auto myid = std::this_thread::get_id();
std::stringstream ss;
ss << myid;
std::string threadid = ss.str();
info.event = generateRandomString(10) + std::to_string((long)pid) + threadid;
info.serverType = "dummy";
spdlog::info("ClientServerRegistry try to create dummy server, the service: {}, instance: {}, event: {}",
info.service, info.instance, info.event);
std::shared_ptr<ReqResMiddlewareWrapper> wrapper;
if (middlewareType_ == ICEORYX) {
wrapper = std::make_shared<IceoryxWrapper>(info);
wrapper->SetServiceType(intercept::middleware::ServiceType::DUMMYSERVER);
}
wrapper->InitDummyServer();
spdlog::info("ClientServerRegistry finish creating dummy server, server: {}, instance: {}, event: {}",
info.service, info.instance, info.event);
return wrapper;
}
void ClientServerRegistry::DestroyDummyServer() {
std::string dummpyserver = "dummy_server";
}
std::shared_ptr<ReqResMiddlewareWrapper>
ClientServerRegistry::CreateClient(const ServiceMetaInfo& info) {
// 1. 获取客户端创建client的请求
// 2. 创建对应的client
// 3. 返回对应的client
if (middlewareType_ == ICEORYX) {
spdlog::info("ClientServerRegistry begin creating client, service: {}, instance: {}, event: {}",
info.service, info.instance, info.event);
std::shared_ptr<IceoryxWrapper> wrapper = std::make_shared<IceoryxWrapper>(info);
wrapper->SetServiceType(intercept::middleware::ServiceType::CLIENT);
wrapper->InitClient();
return wrapper;
}
return nullptr;
}
std::shared_ptr<ReqResMiddlewareWrapper>
ClientServerRegistry::CreateServer(const ServiceMetaInfo& info) {
// 1. 获取客户端创建server的请求
// 2. 创建对应的server
// 3. 返回对应的server
if (middlewareType_ == ICEORYX) {
std::shared_ptr<IceoryxWrapper> wrapper = std::make_shared<IceoryxWrapper>(info);
wrapper->SetServiceType(intercept::middleware::ServiceType::SERVER);
// wrapper->InitServer();
return wrapper;
}
return nullptr;
}
// 作用于服务端
void ClientServerRegistry::CreateServers() {
// 1. 获取客户端创建server的请求
std::vector<ServiceMetaInfo> results = discovery_->FindServices(serviceInfo_);
std::vector<ServiceMetaInfo> neededServers;
// 通过dummy请求获取创建server的需求
for (auto& result : results) {
if (result.instance == DUMMY_INSTANCE_FLAG &&
serverMap_.find(result.event) == serverMap_.end()){
// 根据dummy 创建一个serveiceinfo
ServiceMetaInfo info;
info.service = result.service;
info.instance = INTERCEPT_INSTANCE_FLAG;
info.event = result.event;
neededServers.push_back(info);
spdlog::info("ClientServerRegistry create server, service: {}, instance: {}, event: {}",
info.service, info.instance, info.event);
}
}
// 2. 创建对应的server
for (const auto& result : neededServers) {
// 启动一个线程创建ReqResMiddlewareWrapper 并调用它的StartServer函数
// 2.1 是否已经创建对应server
// 2.2 如果没有创建, 创建server并添加到serverMap_中
// 2.3 如果已经创建,跳过
if (middlewareType_ == ICEORYX) {
std::thread t([this, result]() {
// 创建server
auto wrapper = std::make_shared<IceoryxWrapper>(result);
wrapper->SetServiceType(intercept::middleware::ServiceType::SERVER);
this->serverMap_[result.event] = wrapper;
// 启动server
wrapper->InitServer();
wrapper->StartServer();
// 添加到serverMap_中
});
threads_.push_back(std::move(t));
}
sleep(0.1);
}
}
void ClientServerRegistry::DestroyServers() {
// 1. 获取客户端销毁server的请求
// 2. 销毁对应的server
}
void ClientServerRegistry::MonitorServers() {
spdlog::info("ClientServerRegistry monitor servers");
while (1) {
// create:
CreateServers();
// destroy:
DestroyServers();
// TODO: 这个等待很重要
sleep(1);
}
for (auto& t : threads_) {
t.join();
}
}
} // namespace internal
} // namespace intercecpt

View File

@ -0,0 +1,78 @@
#pragma once
#include "middleware/req_res_middleware_wrapper.h"
#include "discovery/iceoryx_discovery.h"
#include "discovery/discovery.h"
#define CREATE_FLAG "create"
#define DESTROY_FLAG "destroy"
#define SERVER_FLAG "server"
namespace intercept {
namespace registry {
using intercept::middleware::ReqResMiddlewareWrapper;
using intercept::discovery::Discovery;
using intercept::internal::OpenOpReqRes;
using intercept::internal::ServiceMetaInfo;
class ClientServerRegistry {
public:
// ...
ClientServerRegistry(const std::string& middlewareType, const ServiceMetaInfo& info);
~ClientServerRegistry();
// 创建临时的server主要用于通过server创建数据交换的server
std::shared_ptr<ReqResMiddlewareWrapper> CreateDummyServer();
void DestroyDummyServer();
// 返回一个已经初始化的middleWrapper_;
std::shared_ptr<ReqResMiddlewareWrapper> CreateClient(const ServiceMetaInfo& info);
std::shared_ptr<ReqResMiddlewareWrapper> CreateServer(const ServiceMetaInfo& info);
// 在daemon端更新server
void MonitorServers();
private:
// 根据client传递的信息
void CreateServers(); // 创建服务
void DestroyServers(); // 销毁服务
private:
// ...
std::string middlewareType_;
ServiceMetaInfo serviceInfo_; // 这里一个service由service instance构成
std::shared_ptr<Discovery> discovery_;
std::vector<std::shared_ptr<ReqResMiddlewareWrapper>> clientWrapper_;
std::vector<std::shared_ptr<ReqResMiddlewareWrapper>> serverWrapper_;
std::set<std::string> dummyevent_;
std::unordered_map<std::string, std::shared_ptr<ReqResMiddlewareWrapper>> serverMap_;
// 存放创建的线程
std::vector<std::thread> threads_;
};
///
// int client() {
// ServiceMetaInfo info = {"Service", "Instance", "Event"};
// ClientServerRegistry registry("ICE", info);
// registry.CreateDummyServer();
// auto client = registry.CreateClient(ServiceMetaInfo{"Service", "Instance", "Event"});
// OpenOpReqRes reqres("test", 1, 1);
// client->OnRequest(reqres);
// // 全局使用这一个client去操作请求
// registry.DestroyDummyServer();
// return 0;
// }
}
}

32
intercept/server.cpp Normal file
View File

@ -0,0 +1,32 @@
#include <iostream>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <atomic>
#include "registry/client_server_registry.h"
using namespace intercept::internal;
using namespace intercept::registry;
std::mutex mtx;
std::condition_variable cv;
std::atomic<bool> discovery_thread_running{false};
int main() {
constexpr char APP_NAME[] = "iox-intercept-server";
if (intercept::common::Configure::getInstance().loadConfig(intercept::common::CONFIG_FILE)) {
std::cout << "Config file loaded" << std::endl;
} else {
std::cout << "Config file not loaded: server.conf" << std::endl;
return 0;
}
intercept::common::InitLog();
iox::runtime::PoshRuntime::initRuntime(APP_NAME);
ServiceMetaInfo info = {SERVICE_FLAG, "", ""};
std::string type = ICEORYX;
ClientServerRegistry registry(type, info);
spdlog::info("begin to monitor servers");
registry.MonitorServers();
return 0;
}

View File

@ -0,0 +1,5 @@
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
file (GLOB_RECURSE LOCAL_CACHE_SOURCES CONFIGURE_DEPENDS "*.cpp")
add_library(hybridcache_local STATIC ${LOCAL_CACHE_SOURCES})
target_link_libraries(hybridcache_local PUBLIC ${THIRD_PARTY_LIBRARIES} -laio)

52
local_cache/accessor.h Normal file
View File

@ -0,0 +1,52 @@
/*
* Project: HybridCache
* Created Date: 24-3-25
* Author: lshb
*/
#ifndef HYBRIDCACHE_ACCESSOR_H_
#define HYBRIDCACHE_ACCESSOR_H_
#include "read_cache.h"
#include "write_cache.h"
namespace HybridCache {
class HybridCacheAccessor {
public:
HybridCacheAccessor(const HybridCacheConfig& cfg) : cfg_(cfg) {}
~HybridCacheAccessor() {}
// Put in write cache.
// If the write cache is full, block waiting for asynchronous flush to release the write cache space
virtual int Put(const std::string &key, size_t start, size_t len, const char* buf) = 0;
// 1.Read from write cache. 2.Read from read cache.
virtual int Get(const std::string &key, size_t start, size_t len, char* buf) = 0;
// Get4ReadHandle();
// File flush. Need to handle flush/write concurrency.
virtual int Flush(const std::string &key) = 0;
// Flush to the final data source, such as global cache to s3.
virtual int DeepFlush(const std::string &key) = 0;
virtual int Delete(const std::string &key) = 0;
// Invalidated the local read cache.
// Delete read cache when open the file. That is a configuration item.
virtual int Invalidate(const std::string &key) = 0;
// Background asynchronous flush all files and releases write cache space.
virtual int FsSync() = 0;
protected:
HybridCacheConfig cfg_;
std::shared_ptr<WriteCache> writeCache_;
std::shared_ptr<ReadCache> readCache_;
std::shared_ptr<DataAdaptor> dataAdaptor_;
};
} // namespace HybridCache
#endif // HYBRIDCACHE_ACCESSOR_H_

18
local_cache/common.cpp Normal file
View File

@ -0,0 +1,18 @@
#include "common.h"
namespace HybridCache {
bool EnableLogging = true;
void split(const std::string& str, const char delim,
std::vector<std::string>& items) {
std::istringstream iss(str);
std::string tmp;
while (std::getline(iss, tmp, delim)) {
if (!tmp.empty()) {
items.emplace_back(std::move(tmp));
}
}
}
} // namespace HybridCache

40
local_cache/common.h Normal file
View File

@ -0,0 +1,40 @@
/*
* Project: HybridCache
* Created Date: 24-2-21
* Author: lshb
*/
#ifndef HYBRIDCACHE_COMMON_H_
#define HYBRIDCACHE_COMMON_H_
#include <cstdint>
#include <sstream>
#include <string>
#include <vector>
#include "folly/executors/CPUThreadPoolExecutor.h"
namespace HybridCache {
typedef folly::CPUThreadPoolExecutor ThreadPool;
static const char PAGE_SEPARATOR = 26;
static const uint32_t BYTE_LEN = 8;
// ConcurrentSkipList height
static const int SKIP_LIST_HEIGHT = 2;
extern bool EnableLogging;
struct ByteBuffer {
char* data;
size_t len;
ByteBuffer(char* buf = nullptr, size_t bufLen = 0) : data(buf), len(bufLen) {}
};
void split(const std::string& str, const char delim,
std::vector<std::string>& items);
} // namespace HybridCache
#endif // HYBRIDCACHE_COMMON_H_

187
local_cache/config.cpp Normal file
View File

@ -0,0 +1,187 @@
#include <algorithm>
#include <fstream>
#include <iostream>
#include <sstream>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "common.h"
#include "config.h"
namespace HybridCache {
std::vector<std::string> SplitString(const std::string &input) {
std::vector<std::string> result;
std::stringstream ss(input);
std::string item;
while (std::getline(ss, item, ',')) {
result.push_back(item);
}
return result;
}
bool GetHybridCacheConfig(const std::string& file, HybridCacheConfig& cfg) {
Configuration conf;
if (!conf.LoadConfig(file)) return false;
// ReadCache
conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheName",
cfg.ReadCacheCfg.CacheCfg.CacheName);
conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.MaxCacheSize",
cfg.ReadCacheCfg.CacheCfg.MaxCacheSize);
conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.PageBodySize",
cfg.ReadCacheCfg.CacheCfg.PageBodySize);
conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.PageMetaSize",
cfg.ReadCacheCfg.CacheCfg.PageMetaSize);
conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.EnableCAS",
cfg.ReadCacheCfg.CacheCfg.EnableCAS);
conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheLibConfig.EnableNvmCache",
cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.EnableNvmCache);
if (cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.EnableNvmCache) {
conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheLibConfig.RaidPath",
cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.RaidPath);
conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheLibConfig.RaidFileNum",
cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.RaidFileNum);
conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheLibConfig.RaidFileSize",
cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.RaidFileSize);
conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheLibConfig.DataChecksum",
cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.DataChecksum);
}
conf.GetValueFatalIfFail("ReadCacheConfig.DownloadNormalFlowLimit",
cfg.ReadCacheCfg.DownloadNormalFlowLimit);
conf.GetValueFatalIfFail("ReadCacheConfig.DownloadBurstFlowLimit",
cfg.ReadCacheCfg.DownloadBurstFlowLimit);
// WriteCache
conf.GetValueFatalIfFail("WriteCacheConfig.CacheConfig.CacheName",
cfg.WriteCacheCfg.CacheCfg.CacheName);
conf.GetValueFatalIfFail("WriteCacheConfig.CacheConfig.MaxCacheSize",
cfg.WriteCacheCfg.CacheCfg.MaxCacheSize);
conf.GetValueFatalIfFail("WriteCacheConfig.CacheConfig.PageBodySize",
cfg.WriteCacheCfg.CacheCfg.PageBodySize);
conf.GetValueFatalIfFail("WriteCacheConfig.CacheConfig.PageMetaSize",
cfg.WriteCacheCfg.CacheCfg.PageMetaSize);
conf.GetValueFatalIfFail("WriteCacheConfig.CacheConfig.EnableCAS",
cfg.WriteCacheCfg.CacheCfg.EnableCAS);
conf.GetValueFatalIfFail("WriteCacheConfig.CacheSafeRatio",
cfg.WriteCacheCfg.CacheSafeRatio);
// GlobalCache
conf.GetValueFatalIfFail("UseGlobalCache", cfg.UseGlobalCache);
if (cfg.UseGlobalCache) {
conf.GetValueFatalIfFail("GlobalCacheConfig.EnableWriteCache",
cfg.GlobalCacheCfg.EnableWriteCache);
conf.GetValueFatalIfFail("GlobalCacheConfig.EtcdAddress",
cfg.GlobalCacheCfg.EtcdAddress);
std::string servers;
conf.GetValueFatalIfFail("GlobalCacheConfig.GlobalServers",
servers);
cfg.GlobalCacheCfg.GlobalServers = std::move(SplitString(servers));
conf.GetValueFatalIfFail("GlobalCacheConfig.GflagFile",
cfg.GlobalCacheCfg.GflagFile);
}
conf.GetValueFatalIfFail("ThreadNum", cfg.ThreadNum);
conf.GetValueFatalIfFail("BackFlushCacheRatio", cfg.BackFlushCacheRatio);
conf.GetValueFatalIfFail("UploadNormalFlowLimit", cfg.UploadNormalFlowLimit);
conf.GetValueFatalIfFail("UploadBurstFlowLimit", cfg.UploadBurstFlowLimit);
conf.GetValueFatalIfFail("LogPath", cfg.LogPath);
conf.GetValueFatalIfFail("LogLevel", cfg.LogLevel);
conf.GetValueFatalIfFail("EnableLog", cfg.EnableLog);
conf.GetValueFatalIfFail("FlushToRead", cfg.FlushToRead);
conf.GetValueFatalIfFail("CleanCacheByOpen", cfg.CleanCacheByOpen);
conf.PrintConfig();
return CheckConfig(cfg);
}
bool CheckConfig(const HybridCacheConfig& cfg) {
if (cfg.WriteCacheCfg.CacheCfg.CacheLibCfg.EnableNvmCache) {
LOG(FATAL) << "Config error. Write Cache not support nvm cache!";
return false;
}
if (cfg.ReadCacheCfg.CacheCfg.PageBodySize % BYTE_LEN ||
cfg.WriteCacheCfg.CacheCfg.PageBodySize % BYTE_LEN) {
LOG(FATAL) << "Config error. Page body size must be a multiple of " << BYTE_LEN;
return false;
}
return true;
}
bool ParseFlagFromFile(const std::string& file) {
std::ifstream config_file(file);
if (config_file.is_open()) {
std::string line;
std::vector<std::string> args;
args.push_back("hybridcache");
while (std::getline(config_file, line)) {
args.push_back(line);
}
char* dummy_argv[args.size()];
for (size_t i = 0; i < args.size(); ++i) {
dummy_argv[i] = const_cast<char*>(args[i].c_str());
}
int size = args.size();
char** tmp = const_cast<char**>(dummy_argv);
google::ParseCommandLineFlags(&size, &tmp, true);
config_file.close();
} else {
LOG(ERROR) << "Unable to open gflag file '" << file << "' failed: "
<< strerror(errno);
return false;
}
return true;
}
bool Configuration::LoadConfig(const std::string& file) {
confFile_ = file;
std::ifstream cFile(confFile_);
if (cFile.is_open()) {
std::string line;
while (getline(cFile, line)) {
// FIXME: may not remove middle spaces
line.erase(std::remove_if(line.begin(), line.end(), isspace),
line.end());
if (line[0] == '#' || line.empty())
continue;
int delimiterPos = line.find("=");
std::string key = line.substr(0, delimiterPos);
int commentPos = line.find("#");
std::string value = line.substr(delimiterPos + 1,
commentPos - delimiterPos - 1);
config_[key] = value;
}
} else {
LOG(ERROR) << "Open config file '" << confFile_ << "' failed: "
<< strerror(errno);
return false;
}
return true;
}
void Configuration::PrintConfig() {
LOG(INFO) << std::string(30, '=') << "BEGIN" << std::string(30, '=');
for (auto &item : config_) {
LOG(INFO) << item.first << std::string(60 - item.first.size(), ' ')
<< ": " << item.second;
}
LOG(INFO) << std::string(31, '=') << "END" << std::string(31, '=');
}
template <class T>
void Configuration::GetValueFatalIfFail(const std::string& key, T& value) {
if (config_.find(key) != config_.end()) {
std::stringstream sstream(config_[key]);
sstream >> value;
return;
}
LOG(FATAL) << "Get " << key << " from " << confFile_ << " fail";
}
} // namespace HybridCache

93
local_cache/config.h Normal file
View File

@ -0,0 +1,93 @@
/*
* Project: HybridCache
* Created Date: 24-2-21
* Author: lshb
*/
#ifndef HYBRIDCACHE_CONFIG_H_
#define HYBRIDCACHE_CONFIG_H_
#include <map>
#include <string>
namespace HybridCache {
struct CacheLibConfig {
bool EnableNvmCache = false;
std::string RaidPath;
uint64_t RaidFileNum;
size_t RaidFileSize;
bool DataChecksum = false;
};
struct CacheConfig {
std::string CacheName;
size_t MaxCacheSize;
uint32_t PageBodySize;
uint32_t PageMetaSize;
bool EnableCAS;
CacheLibConfig CacheLibCfg;
};
struct ReadCacheConfig {
CacheConfig CacheCfg;
uint64_t DownloadNormalFlowLimit;
uint64_t DownloadBurstFlowLimit;
};
struct WriteCacheConfig {
CacheConfig CacheCfg;
uint32_t CacheSafeRatio; // cache safety concern threshold (percent)
};
struct GlobalCacheConfig {
bool EnableWriteCache;
std::string EtcdAddress;
std::vector<std::string> GlobalServers;
std::string GflagFile;
};
struct HybridCacheConfig {
ReadCacheConfig ReadCacheCfg;
WriteCacheConfig WriteCacheCfg;
GlobalCacheConfig GlobalCacheCfg;
uint32_t ThreadNum;
uint32_t BackFlushCacheRatio;
uint64_t UploadNormalFlowLimit;
uint64_t UploadBurstFlowLimit;
std::string LogPath;
uint32_t LogLevel;
bool EnableLog = true;
bool UseGlobalCache = false;
bool FlushToRead = false; // write to read cache after flush
bool CleanCacheByOpen = false; // clean read cache when open file
};
bool GetHybridCacheConfig(const std::string& file, HybridCacheConfig& cfg);
bool CheckConfig(const HybridCacheConfig& cfg);
bool ParseFlagFromFile(const std::string& file);
class Configuration {
public:
bool LoadConfig(const std::string& file);
void PrintConfig();
/*
* @brief GetValueFatalIfFail Get the value of the specified config item
* log it if get error
*
* @param[in] key config name
* @param[out] value config value
*
* @return
*/
template <class T>
void GetValueFatalIfFail(const std::string& key, T& value);
private:
std::string confFile_;
std::map<std::string, std::string> config_;
};
} // namespace HybridCache
#endif // HYBRIDCACHE_CONFIG_H_

View File

@ -0,0 +1,89 @@
/*
* Project: HybridCache
* Created Date: 24-2-26
* Author: lshb
*/
#ifndef HYBRIDCACHE_DATA_ADAPTOR_H_
#define HYBRIDCACHE_DATA_ADAPTOR_H_
#include <thread>
#include "folly/futures/Future.h"
#include "glog/logging.h"
#include "common.h"
#include "errorcode.h"
namespace HybridCache {
class DataAdaptor {
public:
virtual folly::Future<int> DownLoad(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer) = 0;
virtual folly::Future<int> UpLoad(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map<std::string, std::string>& headers) = 0;
virtual folly::Future<int> Delete(const std::string &key) = 0;
// for global cache
virtual folly::Future<int> DeepFlush(const std::string &key) {
return folly::makeFuture<int>(0);
}
virtual folly::Future<int> Head(const std::string &key,
size_t& size,
std::map<std::string,
std::string>& headers) = 0;
void SetExecutor(std::shared_ptr<ThreadPool> executor) {
executor_ = executor;
}
protected:
std::shared_ptr<ThreadPool> executor_;
};
class DataAdaptor4Test : public DataAdaptor {
public:
folly::Future<int> DownLoad(const std::string &key,
size_t start,
size_t size,
ByteBuffer &buffer) {
assert(executor_);
return folly::via(executor_.get(), [key, start, size, buffer]() -> int {
LOG(INFO) << "[DataAdaptor]DownLoad start, key:" << key
<< ", start:" << start << ", size:" << size;
std::this_thread::sleep_for(std::chrono::seconds(3));
LOG(INFO) << "[DataAdaptor]DownLoad error, key:" << key
<< ", start:" << start << ", size:" << size;
return REMOTE_FILE_NOT_FOUND;
});
}
folly::Future<int> UpLoad(const std::string &key,
size_t size,
const ByteBuffer &buffer,
const std::map<std::string, std::string>& headers) {
return folly::makeFuture<int>(REMOTE_FILE_NOT_FOUND);
}
folly::Future<int> Delete(const std::string &key) {
return folly::makeFuture<int>(REMOTE_FILE_NOT_FOUND);
}
folly::Future<int> Head(const std::string &key,
size_t& size,
std::map<std::string,
std::string>& headers) {
return folly::makeFuture<int>(REMOTE_FILE_NOT_FOUND);
}
};
} // namespace HybridCache
#endif // HYBRIDCACHE_DATA_ADAPTOR_H_

21
local_cache/errorcode.h Normal file
View File

@ -0,0 +1,21 @@
/*
* Project: HybridCache
* Created Date: 24-3-18
* Author: lshb
*/
#ifndef HYBRIDCACHE_ERRORCODE_H_
#define HYBRIDCACHE_ERRORCODE_H_
namespace HybridCache {
enum ErrCode {
SUCCESS = 0,
PAGE_NOT_FOUND = -1,
PAGE_DEL_FAIL = -2,
ADAPTOR_NOT_FOUND = -3,
REMOTE_FILE_NOT_FOUND = -4,
};
} // namespace HybridCache
#endif // HYBRIDCACHE_ERRORCODE_H_

440
local_cache/page_cache.cpp Normal file
View File

@ -0,0 +1,440 @@
#include "glog/logging.h"
#include "common.h"
#include "errorcode.h"
#include "page_cache.h"
namespace HybridCache {
bool PageCache::Lock(char* pageMemory) {
if (!cfg_.EnableCAS) return true;
uint8_t* lock = reinterpret_cast<uint8_t*>(pageMemory + int(MetaPos::LOCK));
uint8_t lockExpected = 0;
return __atomic_compare_exchange_n(lock, &lockExpected, 1, true,
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
}
void PageCache::UnLock(char* pageMemory) {
if (!cfg_.EnableCAS) return;
uint8_t* lock = reinterpret_cast<uint8_t*>(pageMemory + int(MetaPos::LOCK));
__atomic_store_n(lock, 0, __ATOMIC_SEQ_CST);
}
uint8_t PageCache::AddNewVer(char* pageMemory) {
if (!cfg_.EnableCAS) return 0;
uint8_t* newVer = reinterpret_cast<uint8_t*>(pageMemory + int(MetaPos::NEWVER));
return __atomic_add_fetch(newVer, 1, __ATOMIC_SEQ_CST);
}
void PageCache::SetLastVer(char* pageMemory, uint8_t newVer) {
if (!cfg_.EnableCAS) return;
uint8_t* lastVer = reinterpret_cast<uint8_t*>(pageMemory + int(MetaPos::LASTVER));
__atomic_store_n(lastVer, newVer, __ATOMIC_SEQ_CST);
}
uint8_t PageCache::GetLastVer(const char* pageMemory) {
if (!cfg_.EnableCAS) return 0;
const uint8_t* lastVer = reinterpret_cast<const uint8_t*>(pageMemory + int(MetaPos::LASTVER));
return __atomic_load_n(lastVer, __ATOMIC_SEQ_CST);
}
uint8_t PageCache::GetNewVer(const char* pageMemory) {
if (!cfg_.EnableCAS) return 0;
const uint8_t* newVer = reinterpret_cast<const uint8_t*>(pageMemory + int(MetaPos::NEWVER));
return __atomic_load_n(newVer, __ATOMIC_SEQ_CST);
}
void PageCache::SetFastBitmap(char* pageMemory, bool valid) {
uint8_t* fastBitmap = reinterpret_cast<uint8_t*>(pageMemory + int(MetaPos::FAST_BITMAP));
if (valid) *fastBitmap = 1;
else *fastBitmap = 0;
}
bool PageCache::GetFastBitmap(const char* pageMemory) {
const uint8_t* fastBitmap = reinterpret_cast<const uint8_t*>(pageMemory + int(MetaPos::FAST_BITMAP));
return *fastBitmap == 1;
}
void PageCache::SetBitMap(char* pageMemory, int pos, int len, bool valid) {
if (len == cfg_.PageBodySize && valid)
SetFastBitmap(pageMemory, valid);
if (!valid)
SetFastBitmap(pageMemory, valid);
char* x = pageMemory + cfg_.PageMetaSize;
uint32_t startByte = pos / BYTE_LEN;
// head byte
if (pos % BYTE_LEN > 0) {
int headByteSetLen = BYTE_LEN - pos % BYTE_LEN;
headByteSetLen = headByteSetLen > len ? len : headByteSetLen;
len -= headByteSetLen;
while (headByteSetLen) {
if (valid)
SetBit(x+startByte, pos%BYTE_LEN+(--headByteSetLen));
else
ClearBit(x+startByte, pos%BYTE_LEN+(--headByteSetLen));
}
++startByte;
}
// mid bytes
int midLen = len / BYTE_LEN;
if (midLen > 0) {
if (valid)
memset(x+startByte, UINT8_MAX, midLen);
else
memset(x+startByte, 0, midLen);
len -= BYTE_LEN * midLen;
startByte += midLen;
}
// tail byte
while (len > 0) {
if (valid)
SetBit(x+startByte, --len);
else
ClearBit(x+startByte, --len);
}
}
int PageCacheImpl::Init() {
const uint64_t REDUNDANT_SIZE = 1024 * 1024 * 1024;
const unsigned bucketsPower = 25;
const unsigned locksPower = 15;
Cache::Config config;
config
.setCacheSize(cfg_.MaxCacheSize + REDUNDANT_SIZE)
.setCacheName(cfg_.CacheName)
.setAccessConfig({bucketsPower, locksPower})
.validate();
if (cfg_.CacheLibCfg.EnableNvmCache) {
Cache::NvmCacheConfig nvmConfig;
std::vector<std::string> raidPaths;
for (int i=0; i<cfg_.CacheLibCfg.RaidFileNum; ++i) {
raidPaths.push_back(cfg_.CacheLibCfg.RaidPath + std::to_string(i));
}
nvmConfig.navyConfig.setRaidFiles(raidPaths,
cfg_.CacheLibCfg.RaidFileSize, false);
nvmConfig.navyConfig.blockCache()
.setDataChecksum(cfg_.CacheLibCfg.DataChecksum);
nvmConfig.navyConfig.setReaderAndWriterThreads(1, 1, 0, 0);
config.enableNvmCache(nvmConfig).validate();
}
cache_ = std::make_unique<Cache>(config);
pool_ = cache_->addPool(cfg_.CacheName + "_pool", cfg_.MaxCacheSize);
LOG(WARNING) << "[PageCache]Init, name:" << config.getCacheName()
<< ", size:" << config.getCacheSize()
<< ", dir:" << config.getCacheDir();
return SUCCESS;
}
int PageCacheImpl::Close() {
if (cache_)
cache_.reset();
LOG(WARNING) << "[PageCache]Close, name:" << cfg_.CacheName;
return SUCCESS;
}
int PageCacheImpl::Write(const std::string &key,
uint32_t pagePos,
uint32_t length,
const char *buf) {
assert(cfg_.PageBodySize >= pagePos + length);
assert(cache_);
Cache::WriteHandle writeHandle = nullptr;
char* pageValue = nullptr;
while (true) {
writeHandle = std::move(FindOrCreateWriteHandle(key));
pageValue = reinterpret_cast<char*>(writeHandle->getMemory());
if (Lock(pageValue)) break;
}
uint64_t realOffset = cfg_.PageMetaSize + bitmapSize_ + pagePos;
uint8_t newVer = AddNewVer(pageValue);
std::memcpy(pageValue + realOffset, buf, length);
SetBitMap(pageValue, pagePos, length, true);
SetLastVer(pageValue, newVer);
UnLock(pageValue);
return SUCCESS;
}
int PageCacheImpl::Read(const std::string &key,
uint32_t pagePos,
uint32_t length,
char *buf,
std::vector<std::pair<size_t, size_t>>& dataBoundary) {
assert(cfg_.PageBodySize >= pagePos + length);
assert(cache_);
int res = SUCCESS;
while (true) {
auto readHandle = cache_->find(key);
if (!readHandle) {
res = PAGE_NOT_FOUND;
break;
}
while (!readHandle.isReady());
const char* pageValue = reinterpret_cast<const char*>(
readHandle->getMemory());
uint8_t lastVer = GetLastVer(pageValue);
uint8_t newVer = GetNewVer(pageValue);
if (lastVer != newVer) continue;
dataBoundary.clear();
uint32_t cur = pagePos;
if (GetFastBitmap(pageValue)) {
uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_ + pagePos;
std::memcpy(buf, pageValue + pageOff, length);
dataBoundary.push_back(std::make_pair(0, length));
cur += length;
}
bool continuousDataValid = false; // continuous Data valid or invalid
uint32_t continuousLen = 0;
while (cur < pagePos+length) {
const char *byte = pageValue + cfg_.PageMetaSize + cur / BYTE_LEN;
// fast to judge full byte of bitmap
uint16_t batLen = 0;
bool batByteValid = false, isBatFuncValid = false;
batLen = 64;
if (cur % batLen == 0 && (pagePos+length-cur) >= batLen) {
uint64_t byteValue = *reinterpret_cast<const uint64_t*>(byte);
if (byteValue == UINT64_MAX) {
batByteValid = true;
isBatFuncValid = true;
} else if (byteValue == 0) {
isBatFuncValid = true;
}
}
if (isBatFuncValid && (continuousLen == 0 ||
continuousDataValid == batByteValid)) {
continuousDataValid = batByteValid;
continuousLen += batLen;
cur += batLen;
continue;
}
bool curByteValid = GetBit(byte, cur % BYTE_LEN);
if (continuousLen == 0 || continuousDataValid == curByteValid) {
continuousDataValid = curByteValid;
++continuousLen;
++cur;
continue;
}
if (continuousDataValid) {
uint32_t bufOff = cur - continuousLen - pagePos;
uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_ +
cur - continuousLen;
std::memcpy(buf + bufOff, pageValue + pageOff, continuousLen);
dataBoundary.push_back(std::make_pair(bufOff, continuousLen));
}
continuousDataValid = curByteValid;
continuousLen = 1;
++cur;
}
if (continuousDataValid) {
uint32_t bufOff = cur - continuousLen - pagePos;
uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_ +
cur - continuousLen;
std::memcpy(buf + bufOff, pageValue + pageOff, continuousLen);
dataBoundary.push_back(std::make_pair(bufOff, continuousLen));
}
newVer = GetNewVer(pageValue);
if (lastVer == newVer) break;
}
return res;
}
int PageCacheImpl::GetAllCache(const std::string &key,
std::vector<std::pair<ByteBuffer, size_t>>& dataSegments) {
assert(cache_);
uint32_t pageSize = cfg_.PageBodySize;
int res = SUCCESS;
while (true) {
auto readHandle = cache_->find(key);
if (!readHandle) {
res = PAGE_NOT_FOUND;
break;
}
while (!readHandle.isReady());
const char* pageValue = reinterpret_cast<const char*>(
readHandle->getMemory());
uint8_t lastVer = GetLastVer(pageValue);
uint8_t newVer = GetNewVer(pageValue);
if (lastVer != newVer) continue;
dataSegments.clear();
uint32_t cur = 0;
if (GetFastBitmap(pageValue)) {
uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_;
dataSegments.push_back(std::make_pair(
ByteBuffer(const_cast<char*>(pageValue + pageOff), pageSize), 0));
cur += pageSize;
}
bool continuousDataValid = false; // continuous Data valid or invalid
uint32_t continuousLen = 0;
while (cur < pageSize) {
const char *byte = pageValue + cfg_.PageMetaSize + cur / BYTE_LEN;
// fast to judge full byte of bitmap
uint16_t batLen = 0;
bool batByteValid = false, isBatFuncValid = false;
batLen = 64;
if (cur % batLen == 0 && (pageSize-cur) >= batLen) {
uint64_t byteValue = *reinterpret_cast<const uint64_t*>(byte);
if (byteValue == UINT64_MAX) {
batByteValid = true;
isBatFuncValid = true;
} else if (byteValue == 0) {
isBatFuncValid = true;
}
}
if (isBatFuncValid && (continuousLen == 0 ||
continuousDataValid == batByteValid)) {
continuousDataValid = batByteValid;
continuousLen += batLen;
cur += batLen;
continue;
}
bool curByteValid = GetBit(byte, cur % BYTE_LEN);
if (continuousLen == 0 || continuousDataValid == curByteValid) {
continuousDataValid = curByteValid;
++continuousLen;
++cur;
continue;
}
if (continuousDataValid) {
uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_ +
cur - continuousLen;
dataSegments.push_back(std::make_pair(
ByteBuffer(const_cast<char*>(pageValue + pageOff), continuousLen),
cur - continuousLen));
}
continuousDataValid = curByteValid;
continuousLen = 1;
++cur;
}
if (continuousDataValid) {
uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_ +
cur - continuousLen;
dataSegments.push_back(std::make_pair(
ByteBuffer(const_cast<char*>(pageValue + pageOff), continuousLen),
cur - continuousLen));
}
newVer = GetNewVer(pageValue);
if (lastVer == newVer) break;
}
return res;
}
int PageCacheImpl::DeletePart(const std::string &key,
uint32_t pagePos,
uint32_t length) {
assert(cfg_.PageBodySize >= pagePos + length);
assert(cache_);
int res = SUCCESS;
Cache::WriteHandle writeHandle = nullptr;
char* pageValue = nullptr;
while (true) {
writeHandle = cache_->findToWrite(key);
if (!writeHandle) {
res = PAGE_NOT_FOUND;
break;
}
pageValue = reinterpret_cast<char*>(writeHandle->getMemory());
if (Lock(pageValue)) break;
}
if (SUCCESS == res) {
uint8_t newVer = AddNewVer(pageValue);
SetBitMap(pageValue, pagePos, length, false);
bool isEmpty = true;
uint32_t pos = 0;
while (pos < bitmapSize_) {
if (*(pageValue + cfg_.PageMetaSize + pos) != 0) {
isEmpty = false;
break;
}
++pos;
}
bool isDel = false;
if (isEmpty) {
if (cache_->remove(writeHandle) == Cache::RemoveRes::kSuccess) {
pageNum_.fetch_sub(1);
pagesList_.erase(key);
isDel = true;
} else {
res = PAGE_DEL_FAIL;
}
}
if (!isDel) {
SetLastVer(pageValue, newVer);
UnLock(pageValue);
}
}
return res;
}
int PageCacheImpl::Delete(const std::string &key) {
assert(cache_);
int res = cache_->remove(key) == Cache::RemoveRes::kSuccess ? SUCCESS : PAGE_NOT_FOUND;
if (SUCCESS == res) {
pageNum_.fetch_sub(1);
pagesList_.erase(key);
}
return res;
}
Cache::WriteHandle PageCacheImpl::FindOrCreateWriteHandle(const std::string &key) {
auto writeHandle = cache_->findToWrite(key);
if (!writeHandle) {
writeHandle = cache_->allocate(pool_, key, GetRealPageSize());
assert(writeHandle);
assert(writeHandle->getMemory());
// need init
memset(writeHandle->getMemory(), 0, cfg_.PageMetaSize + bitmapSize_);
if (cfg_.CacheLibCfg.EnableNvmCache) {
// insertOrReplace will insert or replace existing item for the key,
// and return the handle of the replaced old item
// Note: write cache nonsupport NVM, because it will be replaced
if (!cache_->insertOrReplace(writeHandle)) {
pageNum_.fetch_add(1);
pagesList_.insert(key);
}
} else {
if (cache_->insert(writeHandle)) {
pageNum_.fetch_add(1);
pagesList_.insert(key);
} else {
writeHandle = cache_->findToWrite(key);
}
}
}
return writeHandle;
}
} // namespace HybridCache

161
local_cache/page_cache.h Normal file
View File

@ -0,0 +1,161 @@
/*
* Project: HybridCache
* Created Date: 24-2-21
* Author: lshb
*/
#ifndef HYBRIDCACHE_PAGE_CACHE_H_
#define HYBRIDCACHE_PAGE_CACHE_H_
#include <string>
#include <set>
#include "folly/ConcurrentSkipList.h"
#include "cachelib/allocator/CacheAllocator.h"
#include "common.h"
#include "config.h"
namespace HybridCache {
typedef folly::ConcurrentSkipList<std::string> StringSkipList;
using facebook::cachelib::PoolId;
using Cache = facebook::cachelib::LruAllocator;
enum class MetaPos {
LOCK = 0,
LASTVER,
NEWVER,
FAST_BITMAP
};
class PageCache {
public:
PageCache(const CacheConfig& cfg): cfg_(cfg) {}
virtual ~PageCache() {}
virtual int Init() = 0;
virtual int Close() = 0;
virtual int Write(const std::string &key, // page key
uint32_t pagePos,
uint32_t length,
const char *buf // user buf
) = 0;
virtual int Read(const std::string &key,
uint32_t pagePos,
uint32_t length,
char *buf, // user buf
std::vector<std::pair<size_t, size_t>>& dataBoundary // valid data segment boundar
) = 0;
// upper layer need to guarantee that the page will not be delete
virtual int GetAllCache(const std::string &key,
std::vector<std::pair<ByteBuffer, size_t>>& dataSegments // <ByteBuffer(buf+len), pageOff>
) = 0;
// delete part data from page
// if the whole page is empty then delete that page
virtual int DeletePart(const std::string &key,
uint32_t pagePos,
uint32_t length
) = 0;
virtual int Delete(const std::string &key) = 0;
virtual size_t GetCacheSize() = 0;
virtual size_t GetCacheMaxSize() = 0;
const folly::ConcurrentSkipList<std::string>::Accessor& GetPageList() {
return this->pagesList_;
}
protected:
// CAS operate
bool Lock(char* pageMemory);
void UnLock(char* pageMemory);
uint8_t AddNewVer(char* pageMemory);
void SetLastVer(char* pageMemory, uint8_t newVer);
uint8_t GetLastVer(const char* pageMemory);
uint8_t GetNewVer(const char* pageMemory);
// bitmap operate
void SetFastBitmap(char* pageMemory, bool valid);
bool GetFastBitmap(const char* pageMemory);
void SetBitMap(char* pageMemory, int pos, int len, bool valid);
void SetBit(char *x, int n) { *x |= (1 << n); }
void ClearBit(char *x, int n) { *x &= ~ (1 << n); }
bool GetBit(const char *x, int n) { return *x & (1 << n); }
protected:
StringSkipList::Accessor pagesList_ = StringSkipList::create(SKIP_LIST_HEIGHT);
CacheConfig cfg_;
};
class PageCacheImpl : public PageCache {
public:
PageCacheImpl(const CacheConfig& cfg): PageCache(cfg) {
bitmapSize_ = cfg_.PageBodySize / BYTE_LEN;
}
~PageCacheImpl() {}
int Init();
int Close();
int Write(const std::string &key,
uint32_t pagePos,
uint32_t length,
const char *buf
);
int Read(const std::string &key,
uint32_t pagePos,
uint32_t length,
char *buf,
std::vector<std::pair<size_t, size_t>>& dataBoundary
);
int GetAllCache(const std::string &key,
std::vector<std::pair<ByteBuffer, size_t>>& dataSegments
);
int DeletePart(const std::string &key,
uint32_t pagePos,
uint32_t length
);
int Delete(const std::string &key);
size_t GetCacheSize() {
return GetPageNum() * GetRealPageSize();
}
size_t GetCacheMaxSize() {
if (!cfg_.CacheLibCfg.EnableNvmCache)
return cfg_.MaxCacheSize;
size_t nvmMaxSize = cfg_.CacheLibCfg.RaidFileNum *
cfg_.CacheLibCfg.RaidFileSize;
return cfg_.MaxCacheSize + nvmMaxSize;
}
private:
uint64_t GetPageNum() {
return pageNum_.load();
}
uint32_t GetRealPageSize() {
return cfg_.PageMetaSize + bitmapSize_ + cfg_.PageBodySize;
}
Cache::WriteHandle FindOrCreateWriteHandle(const std::string &key);
private:
std::shared_ptr<Cache> cache_;
PoolId pool_;
std::atomic<uint64_t> pageNum_{0};
uint32_t bitmapSize_;
};
} // namespace HybridCache
#endif // HYBRIDCACHE_PAGE_CACHE_H_

257
local_cache/read_cache.cpp Normal file
View File

@ -0,0 +1,257 @@
#include "errorcode.h"
#include "read_cache.h"
namespace HybridCache {
ReadCache::ReadCache(const ReadCacheConfig& cfg,
std::shared_ptr<DataAdaptor> dataAdaptor,
std::shared_ptr<ThreadPool> executor) :
cfg_(cfg), dataAdaptor_(dataAdaptor), executor_(executor) {
Init();
}
folly::Future<int> ReadCache::Get(const std::string &key, size_t start,
size_t len, ByteBuffer &buffer) {
std::chrono::steady_clock::time_point startTime;
if (EnableLogging) startTime = std::chrono::steady_clock::now();
int res = SUCCESS;
uint32_t pageSize = cfg_.CacheCfg.PageBodySize;
size_t index = start / pageSize;
uint32_t pagePos = start % pageSize;
size_t readLen = 0;
size_t realReadLen = 0;
size_t bufOffset = 0;
size_t remainLen = len;
uint64_t readPageCnt = 0;
std::vector<std::pair<size_t, size_t>> dataBoundary;
while (remainLen > 0) {
readLen = pagePos + remainLen > pageSize ? pageSize - pagePos : remainLen;
std::string pageKey = std::move(GetPageKey(key, index));
std::vector<std::pair<size_t, size_t>> stepDataBoundary;
int tmpRes = pageCache_->Read(pageKey, pagePos, readLen,
(buffer.data + bufOffset), stepDataBoundary);
if (SUCCESS == tmpRes) {
++readPageCnt;
} else if (PAGE_NOT_FOUND != tmpRes) {
res = tmpRes;
break;
}
for (auto& it : stepDataBoundary) {
dataBoundary.push_back(std::make_pair(it.first + bufOffset, it.second));
realReadLen += it.second;
}
remainLen -= readLen;
++index;
bufOffset += readLen;
pagePos = (pagePos + readLen) % pageSize;
}
remainLen = len - realReadLen;
if (remainLen > 0 && !dataAdaptor_) {
res = ADAPTOR_NOT_FOUND;
}
// handle cache misses
readLen = 0;
size_t stepStart = 0;
size_t fileStartOff = 0;
std::vector<folly::Future<int>> fs;
auto it = dataBoundary.begin();
while (remainLen > 0 && SUCCESS == res) {
ByteBuffer stepBuffer(buffer.data + stepStart);
fileStartOff = start + stepStart;
if (it != dataBoundary.end()) {
readLen = it->first - stepStart;
if (!readLen) {
stepStart = it->first + it->second;
++it;
continue;
}
stepStart = it->first + it->second;
++it;
} else {
readLen = remainLen;
}
stepBuffer.len = readLen;
remainLen -= readLen;
auto download = folly::via(executor_.get(), [this, readLen]() {
// download flow control
while(!this->tokenBucket_->consume(readLen));
return SUCCESS;
}).thenValue([this, key, fileStartOff, readLen, stepBuffer](int i) {
// LOG(INFO) << "Extra download: " << key << " " << readLen;
ByteBuffer tmpBuffer(stepBuffer.data, readLen);
return this->dataAdaptor_->DownLoad(key, fileStartOff, readLen, tmpBuffer).get();
}).thenValue([this, key, fileStartOff, readLen, stepBuffer](int downRes) {
if (EnableLogging && SUCCESS != downRes) {
LOG(ERROR) << "[ReadCache]DownLoad failed, file:" << key
<< ", start:" << fileStartOff << ", len:" << readLen
<< ", res:" << downRes;
return downRes;
}
return this->Put(key, fileStartOff, readLen, stepBuffer);
});
fs.emplace_back(std::move(download));
}
if (!fs.empty()) {
return collectAll(fs).via(executor_.get())
.thenValue([key, start, len, readPageCnt, startTime](
std::vector<folly::Try<int>, std::allocator<folly::Try<int>>>&& tups) {
int finalRes = SUCCESS;
for (const auto& t : tups) {
if (SUCCESS != t.value()) finalRes = t.value();
}
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[ReadCache]Get, key:" << key << ", start:" << start
<< ", len:" << len << ", res:" << finalRes
<< ", readPageCnt:" << readPageCnt
<< ", time:" << totalTime << "ms";
}
return finalRes;
});
// auto tups = collectAll(fs).get();
// int finalRes = SUCCESS;
// for (const auto& t : tups) {
// if (SUCCESS != t.value()) finalRes = t.value();
// }
// if (EnableLogging) {
// double totalTime = std::chrono::duration<double, std::milli>(
// std::chrono::steady_clock::now() - startTime).count();
// LOG(INFO) << "[ReadCache]Get, key:" << key << ", start:" << start
// << ", len:" << len << ", res:" << finalRes
// << ", readPageCnt:" << readPageCnt
// << ", time:" << totalTime << "ms";
// }
// return finalRes;
}
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[ReadCache]Get, key:" << key << ", start:" << start
<< ", len:" << len << ", res:" << res
<< ", readPageCnt:" << readPageCnt
<< ", time:" << totalTime << "ms";
}
return folly::makeFuture(res);
}
int ReadCache::Put(const std::string &key, size_t start, size_t len,
const ByteBuffer &buffer) {
std::chrono::steady_clock::time_point startTime;
if (EnableLogging) startTime = std::chrono::steady_clock::now();
int res = SUCCESS;
uint32_t pageSize = cfg_.CacheCfg.PageBodySize;
uint64_t index = start / pageSize;
uint64_t pagePos = start % pageSize;
uint64_t writeLen = 0;
uint64_t writeOffset = 0;
uint64_t writePageCnt = 0;
size_t remainLen = len;
while (remainLen > 0) {
writeLen = pagePos + remainLen > pageSize ? pageSize - pagePos : remainLen;
std::string pageKey = std::move(GetPageKey(key, index));
res = pageCache_->Write(pageKey, pagePos, writeLen,
(buffer.data + writeOffset));
if (SUCCESS != res) break;
++writePageCnt;
remainLen -= writeLen;
++index;
writeOffset += writeLen;
pagePos = (pagePos + writeLen) % pageSize;
}
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[ReadCache]Put, key:" << key << ", start:" << start
<< ", len:" << len << ", res:" << res
<< ", writePageCnt:" << writePageCnt
<< ", time:" << totalTime << "ms";
}
return res;
}
int ReadCache::Delete(const std::string &key) {
std::chrono::steady_clock::time_point startTime;
if (EnableLogging) startTime = std::chrono::steady_clock::now();
int res = SUCCESS;
size_t delPageNum = 0;
std::string firstPage = std::move(GetPageKey(key, 0));
auto pageKey = pageCache_->GetPageList().lower_bound(firstPage);
while (pageKey != pageCache_->GetPageList().end()) {
std::vector<std::string> tokens;
split(*pageKey, PAGE_SEPARATOR, tokens);
if (key != tokens[0]) break;
int tmpRes = pageCache_->Delete(*pageKey);
if (SUCCESS == tmpRes) {
++delPageNum;
} else if (PAGE_NOT_FOUND != tmpRes) {
res = tmpRes;
break;
}
++pageKey;
}
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[ReadCache]Delete, key:" << key << ", res:" << res
<< ", delPageCnt:" << delPageNum
<< ", time:" << totalTime << "ms";
}
return res;
}
int ReadCache::GetAllKeys(std::set<std::string>& keys) {
std::chrono::steady_clock::time_point startTime;
if (EnableLogging) startTime = std::chrono::steady_clock::now();
auto pageKey = pageCache_->GetPageList().begin();
while (pageKey != pageCache_->GetPageList().end()) {
std::vector<std::string> tokens;
split(*pageKey, PAGE_SEPARATOR, tokens);
keys.insert(tokens[0]);
++pageKey;
}
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[ReadCache]Get all keys, keyCnt:" << keys.size()
<< ", time:" << totalTime << "ms";
}
return SUCCESS;
}
void ReadCache::Close() {
pageCache_->Close();
LOG(WARNING) << "[ReadCache]Close";
}
int ReadCache::Init() {
pageCache_ = std::make_shared<PageCacheImpl>(cfg_.CacheCfg);
tokenBucket_ = std::make_shared<folly::TokenBucket>(
cfg_.DownloadNormalFlowLimit, cfg_.DownloadBurstFlowLimit);
int res = pageCache_->Init();
LOG(WARNING) << "[ReadCache]Init, res:" << res;
return res;
}
std::string ReadCache::GetPageKey(const std::string &key, size_t pageIndex) {
std::string pageKey(key);
pageKey.append(std::string(1, PAGE_SEPARATOR)).append(std::to_string(pageIndex));
return pageKey;
}
} // namespace HybridCache

57
local_cache/read_cache.h Normal file
View File

@ -0,0 +1,57 @@
/*
* Project: HybridCache
* Created Date: 24-2-29
* Author: lshb
*/
#ifndef HYBRIDCACHE_READ_CACHE_H_
#define HYBRIDCACHE_READ_CACHE_H_
#include "folly/TokenBucket.h"
#include "page_cache.h"
#include "data_adaptor.h"
namespace HybridCache {
class ReadCache {
public:
ReadCache(const ReadCacheConfig& cfg,
std::shared_ptr<DataAdaptor> dataAdaptor,
std::shared_ptr<ThreadPool> executor);
ReadCache() = default;
~ReadCache() { Close(); }
// Read the local page cache first, and get it from the DataAdaptor if it misses
folly::Future<int> Get(const std::string &key,
size_t start,
size_t len,
ByteBuffer &buffer // user buf
);
int Put(const std::string &key,
size_t start,
size_t len,
const ByteBuffer &buffer);
int Delete(const std::string &key);
int GetAllKeys(std::set<std::string>& keys);
void Close();
private:
int Init();
std::string GetPageKey(const std::string &key, size_t pageIndex);
private:
ReadCacheConfig cfg_;
std::shared_ptr<PageCache> pageCache_;
std::shared_ptr<DataAdaptor> dataAdaptor_;
std::shared_ptr<ThreadPool> executor_;
std::shared_ptr<folly::TokenBucket> tokenBucket_; // download flow limit
};
} // namespace HybridCache
#endif // HYBRIDCACHE_READ_CACHE_H_

286
local_cache/write_cache.cpp Normal file
View File

@ -0,0 +1,286 @@
#include "glog/logging.h"
#include "errorcode.h"
#include "write_cache.h"
namespace HybridCache {
int WriteCache::Put(const std::string &key, size_t start, size_t len,
const ByteBuffer &buffer) {
std::chrono::steady_clock::time_point startTime;
if (EnableLogging) startTime = std::chrono::steady_clock::now();
int res = SUCCESS;
uint32_t pageSize = cfg_.CacheCfg.PageBodySize;
uint64_t index = start / pageSize;
uint64_t pagePos = start % pageSize;
uint64_t writeLen = 0;
uint64_t writeOffset = 0;
uint64_t writePageCnt = 0;
size_t remainLen = len;
while (remainLen > 0) {
writeLen = pagePos + remainLen > pageSize ? pageSize - pagePos : remainLen;
std::string pageKey = std::move(GetPageKey(key, index));
res = pageCache_->Write(pageKey, pagePos, writeLen,
(buffer.data + writeOffset));
if (SUCCESS != res) break;
++writePageCnt;
remainLen -= writeLen;
++index;
writeOffset += writeLen;
pagePos = (pagePos + writeLen) % pageSize;
}
if (0 < writePageCnt)
keys_.insert(key, time(nullptr));
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[WriteCache]Put, key:" << key << ", start:" << start
<< ", len:" << len << ", res:" << res
<< ", writePageCnt:" << writePageCnt
<< ", time:" << totalTime << "ms";
}
return res;
}
int WriteCache::Get(const std::string &key, size_t start, size_t len,
ByteBuffer &buffer,
std::vector<std::pair<size_t, size_t>>& dataBoundary) {
std::chrono::steady_clock::time_point startTime;
if (EnableLogging) startTime = std::chrono::steady_clock::now();
int res = SUCCESS;
uint32_t pageSize = cfg_.CacheCfg.PageBodySize;
size_t index = start / pageSize;
uint32_t pagePos = start % pageSize;
size_t readLen = 0;
size_t bufOffset = 0;
size_t remainLen = len;
uint64_t readPageCnt = 0;
while (remainLen > 0) {
readLen = pagePos + remainLen > pageSize ? pageSize - pagePos : remainLen;
std::string pageKey = std::move(GetPageKey(key, index));
std::vector<std::pair<size_t, size_t>> stepDataBoundary;
int tmpRes = pageCache_->Read(pageKey, pagePos, readLen,
(buffer.data + bufOffset), stepDataBoundary);
if (SUCCESS == tmpRes) {
++readPageCnt;
} else if (PAGE_NOT_FOUND != tmpRes) {
res = tmpRes;
break;
}
for (auto& it : stepDataBoundary) {
size_t realStart = it.first + bufOffset;
auto last = dataBoundary.rbegin();
if (last != dataBoundary.rend() && (last->first + last->second) == realStart) {
last->second += it.second;
} else {
dataBoundary.push_back(std::make_pair(realStart, it.second));
}
}
remainLen -= readLen;
++index;
bufOffset += readLen;
pagePos = (pagePos + readLen) % pageSize;
}
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[WriteCache]Get, key:" << key << ", start:" << start
<< ", len:" << len << ", res:" << res
<< ", boundaryVecSize:" << dataBoundary.size()
<< ", readPageCnt:" << readPageCnt
<< ", time:" << totalTime << "ms";
}
return res;
}
int WriteCache::GetAllCacheWithLock(const std::string &key,
std::vector<std::pair<ByteBuffer, size_t>>& dataSegments) {
std::chrono::steady_clock::time_point startTime;
if (EnableLogging) startTime = std::chrono::steady_clock::now();
int res = SUCCESS;
Lock(key);
std::string firstPage = std::move(GetPageKey(key, 0));
auto pageKey = pageCache_->GetPageList().lower_bound(firstPage);
while (pageKey != pageCache_->GetPageList().end()) {
std::vector<std::string> tokens;
split(*pageKey, PAGE_SEPARATOR, tokens);
if (key != tokens[0]) break;
size_t pageIdx = 0;
std::stringstream sstream(tokens[1]);
sstream >> pageIdx;
size_t wholeValueOff = pageIdx * cfg_.CacheCfg.PageBodySize;
std::vector<std::pair<ByteBuffer, size_t>> stepDataSegments;
res = pageCache_->GetAllCache(*pageKey, stepDataSegments);
if (SUCCESS != res) break;
for (auto& it : stepDataSegments) {
dataSegments.push_back(std::make_pair(it.first,
it.second + wholeValueOff));
}
++pageKey;
}
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[WriteCache]Get all cache with lock, key:" << key
<< ", res:" << res << ", dataVecSize:" << dataSegments.size()
<< ", time:" << totalTime << "ms";
}
return res;
}
int WriteCache::Delete(const std::string &key, LockType type) {
std::chrono::steady_clock::time_point startTime;
if (EnableLogging) startTime = std::chrono::steady_clock::now();
int res = SUCCESS;
if (LockType::ALREADY_LOCKED != type) {
Lock(key);
}
keys_.erase(key);
size_t delPageNum = 0;
std::string firstPage = std::move(GetPageKey(key, 0));
auto pageKey = pageCache_->GetPageList().lower_bound(firstPage);
while (pageKey != pageCache_->GetPageList().end()) {
std::vector<std::string> tokens;
split(*pageKey, PAGE_SEPARATOR, tokens);
if (key != tokens[0]) break;
int tmpRes = pageCache_->Delete(*pageKey);
if (SUCCESS == tmpRes) {
++delPageNum;
} else if (PAGE_NOT_FOUND != tmpRes) {
res = tmpRes;
break;
}
++pageKey;
}
UnLock(key);
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[WriteCache]Delete, key:" << key << ", res:" << res
<< ", delPageCnt:" << delPageNum
<< ", time:" << totalTime << "ms";
}
return res;
}
int WriteCache::Truncate(const std::string &key, size_t len) {
std::chrono::steady_clock::time_point startTime;
if (EnableLogging) startTime = std::chrono::steady_clock::now();
int res = SUCCESS;
uint32_t pageSize = cfg_.CacheCfg.PageBodySize;
uint64_t index = len / pageSize;
uint64_t pagePos = len % pageSize;
if (0 != pagePos) {
uint32_t TruncateLen = pageSize - pagePos;
std::string TruncatePage = std::move(GetPageKey(key, index));
int tmpRes = pageCache_->DeletePart(TruncatePage, pagePos, TruncateLen);
if (SUCCESS != tmpRes && PAGE_NOT_FOUND != tmpRes) {
res = tmpRes;
}
++index;
}
size_t delPageNum = 0;
if (SUCCESS == res) {
Lock(key);
std::string firstPage = std::move(GetPageKey(key, index));
auto pageKey = pageCache_->GetPageList().lower_bound(firstPage);
while (pageKey != pageCache_->GetPageList().end()) {
std::vector<std::string> tokens;
split(*pageKey, PAGE_SEPARATOR, tokens);
if (key != tokens[0]) break;
int tmpRes = pageCache_->Delete(*pageKey);
if (SUCCESS == tmpRes) {
++delPageNum;
} else if (PAGE_NOT_FOUND != tmpRes) {
res = tmpRes;
break;
}
++pageKey;
}
UnLock(key);
}
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[WriteCache]Truncate, key:" << key << ", len:" << len
<< ", res:" << res << ", delPageCnt:" << delPageNum
<< ", time:" << totalTime << "ms";
}
return res;
}
void WriteCache::UnLock(const std::string &key) {
keyLocks_.erase(key);
if (EnableLogging) {
LOG(INFO) << "[WriteCache]UnLock, key:" << key;
}
}
int WriteCache::GetAllKeys(std::map<std::string, time_t>& keys) {
std::chrono::steady_clock::time_point startTime;
if (EnableLogging) startTime = std::chrono::steady_clock::now();
for (auto& it : keys_) {
keys[it.first] = it.second;
}
if (EnableLogging) {
double totalTime = std::chrono::duration<double, std::milli>(
std::chrono::steady_clock::now() - startTime).count();
LOG(INFO) << "[WriteCache]Get all keys, keyCnt:" << keys.size()
<< ", time:" << totalTime << "ms";
}
return SUCCESS;
}
void WriteCache::Close() {
pageCache_->Close();
keys_.clear();
LOG(WARNING) << "[WriteCache]Close";
}
size_t WriteCache::GetCacheSize() {
return pageCache_->GetCacheSize();
}
size_t WriteCache::GetCacheMaxSize() {
return pageCache_->GetCacheMaxSize();
}
int WriteCache::Init() {
pageCache_ = std::make_shared<PageCacheImpl>(cfg_.CacheCfg);
int res = pageCache_->Init();
LOG(WARNING) << "[WriteCache]Init, res:" << res;
return res;
}
void WriteCache::Lock(const std::string &key) {
while(!keyLocks_.add(key));
}
std::string WriteCache::GetPageKey(const std::string &key, size_t pageIndex) {
std::string pageKey(key);
pageKey.append(std::string(1, PAGE_SEPARATOR)).append(std::to_string(pageIndex));
return pageKey;
}
} // namespace HybridCache

74
local_cache/write_cache.h Normal file
View File

@ -0,0 +1,74 @@
/*
* Project: HybridCache
* Created Date: 24-3-18
* Author: lshb
*/
#ifndef HYBRIDCACHE_WRITE_CACHE_H_
#define HYBRIDCACHE_WRITE_CACHE_H_
#include "folly/concurrency/ConcurrentHashMap.h"
#include "page_cache.h"
namespace HybridCache {
class WriteCache {
public:
WriteCache(const WriteCacheConfig& cfg) : cfg_(cfg) { Init(); }
WriteCache() = default;
~WriteCache() { Close(); }
enum class LockType {
NONE = 0,
ALREADY_LOCKED = -1,
};
int Put(const std::string &key,
size_t start,
size_t len,
const ByteBuffer &buffer
);
int Get(const std::string &key,
size_t start,
size_t len,
ByteBuffer &buffer,
std::vector<std::pair<size_t, size_t>>& dataBoundary // valid data segment boundar
);
// lock to ensure the availability of the returned buf
// After being locked, it can be read and written, but cannot be deleted
int GetAllCacheWithLock(const std::string &key,
std::vector<std::pair<ByteBuffer, size_t>>& dataSegments // ByteBuffer + off of key value(file)
);
int Delete(const std::string &key, LockType type = LockType::NONE);
int Truncate(const std::string &key, size_t len);
void UnLock(const std::string &key);
int GetAllKeys(std::map<std::string, time_t>& keys);
void Close();
size_t GetCacheSize();
size_t GetCacheMaxSize();
private:
int Init();
void Lock(const std::string &key);
std::string GetPageKey(const std::string &key, size_t pageIndex);
private:
WriteCacheConfig cfg_;
std::shared_ptr<PageCache> pageCache_;
folly::ConcurrentHashMap<std::string, time_t> keys_; // <key, create_time>
StringSkipList::Accessor keyLocks_ = StringSkipList::create(SKIP_LIST_HEIGHT); // presence key indicates lock
};
} // namespace HybridCache
#endif // HYBRIDCACHE_WRITE_CACHE_H_

16
s3fs/CMakeLists.txt Normal file
View File

@ -0,0 +1,16 @@
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -O3 -D_FILE_OFFSET_BITS=64 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3 -D_FILE_OFFSET_BITS=64 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3")
file(GLOB_RECURSE ALL_SOURCES CONFIGURE_DEPENDS "*.cpp")
list(REMOVE_ITEM ALL_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/s3fs_lib.cpp")
add_executable(s3fs ${ALL_SOURCES})
target_include_directories(s3fs PRIVATE /usr/include/fuse /usr/include/libxml2)
target_link_libraries(s3fs PUBLIC hybridcache_local madfs_global -lfuse -pthread -lcurl -lxml2 -lcrypto -ldl)
file(GLOB_RECURSE LIB_SOURCES CONFIGURE_DEPENDS "*.cpp")
list(REMOVE_ITEM LIB_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/s3fs.cpp")
add_library(s3fs_lib STATIC ${LIB_SOURCES})
target_include_directories(s3fs_lib PRIVATE /usr/include/fuse /usr/include/libxml2)
target_link_libraries(s3fs_lib PUBLIC hybridcache_local madfs_global -pthread -lcurl -lxml2 -lcrypto -ldl)

248
s3fs/addhead.cpp Normal file
View File

@ -0,0 +1,248 @@
/*
* s3fs - FUSE-based file system backed by Amazon S3
*
* Copyright(C) 2007 Randy Rizun <rrizun@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <sstream>
#include <fstream>
#include <strings.h>
#include <vector>
#include "s3fs.h"
#include "addhead.h"
#include "curl_util.h"
#include "s3fs_logger.h"
//-------------------------------------------------------------------
// Symbols
//-------------------------------------------------------------------
static constexpr char ADD_HEAD_REGEX[] = "reg:";
//-------------------------------------------------------------------
// Class AdditionalHeader
//-------------------------------------------------------------------
AdditionalHeader AdditionalHeader::singleton;
//-------------------------------------------------------------------
// Class AdditionalHeader method
//-------------------------------------------------------------------
AdditionalHeader::AdditionalHeader()
{
if(this == AdditionalHeader::get()){
is_enable = false;
}else{
abort();
}
}
AdditionalHeader::~AdditionalHeader()
{
if(this == AdditionalHeader::get()){
Unload();
}else{
abort();
}
}
bool AdditionalHeader::Load(const char* file)
{
if(!file){
S3FS_PRN_WARN("file is nullptr.");
return false;
}
Unload();
std::ifstream AH(file);
if(!AH.good()){
S3FS_PRN_WARN("Could not open file(%s).", file);
return false;
}
// read file
std::string line;
while(getline(AH, line)){
if(line.empty()){
continue;
}
if('#' == line[0]){
continue;
}
// load a line
std::istringstream ss(line);
std::string key; // suffix(key)
std::string head; // additional HTTP header
std::string value; // header value
if(0 == isblank(line[0])){
ss >> key;
}
if(ss){
ss >> head;
if(ss && static_cast<size_t>(ss.tellg()) < line.size()){
value = line.substr(static_cast<int>(ss.tellg()) + 1);
}
}
// check it
if(head.empty()){
if(key.empty()){
continue;
}
S3FS_PRN_ERR("file format error: %s key(suffix) is no HTTP header value.", key.c_str());
Unload();
return false;
}
if(0 == strncasecmp(key.c_str(), ADD_HEAD_REGEX, strlen(ADD_HEAD_REGEX))){
// regex
if(key.size() <= strlen(ADD_HEAD_REGEX)){
S3FS_PRN_ERR("file format error: %s key(suffix) does not have key std::string.", key.c_str());
continue;
}
key.erase(0, strlen(ADD_HEAD_REGEX));
// compile
std::unique_ptr<regex_t> preg(new regex_t);
int result;
if(0 != (result = regcomp(preg.get(), key.c_str(), REG_EXTENDED | REG_NOSUB))){ // we do not need matching info
char errbuf[256];
regerror(result, preg.get(), errbuf, sizeof(errbuf));
S3FS_PRN_ERR("failed to compile regex from %s key by %s.", key.c_str(), errbuf);
continue;
}
addheadlist.emplace_back(std::move(preg), key, head, value);
}else{
// not regex, directly comparing
addheadlist.emplace_back(nullptr, key, head, value);
}
// set flag
is_enable = true;
}
return true;
}
void AdditionalHeader::Unload()
{
is_enable = false;
addheadlist.clear();
}
bool AdditionalHeader::AddHeader(headers_t& meta, const char* path) const
{
if(!is_enable){
return true;
}
if(!path){
S3FS_PRN_WARN("path is nullptr.");
return false;
}
size_t pathlength = strlen(path);
// loop
//
// [NOTE]
// Because to allow duplicate key, and then scanning the entire table.
//
for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter){
const add_header *paddhead = &*iter;
if(paddhead->pregex){
// regex
regmatch_t match; // not use
if(0 == regexec(paddhead->pregex.get(), path, 1, &match, 0)){
// match -> adding header
meta[paddhead->headkey] = paddhead->headvalue;
}
}else{
// directly comparing
if(paddhead->basestring.length() < pathlength){
if(paddhead->basestring.empty() || paddhead->basestring == &path[pathlength - paddhead->basestring.length()]){
// match -> adding header
meta[paddhead->headkey] = paddhead->headvalue;
}
}
}
}
return true;
}
struct curl_slist* AdditionalHeader::AddHeader(struct curl_slist* list, const char* path) const
{
headers_t meta;
if(!AddHeader(meta, path)){
return list;
}
for(headers_t::iterator iter = meta.begin(); iter != meta.end(); ++iter){
// Adding header
list = curl_slist_sort_insert(list, iter->first.c_str(), iter->second.c_str());
}
meta.clear();
S3FS_MALLOCTRIM(0);
return list;
}
bool AdditionalHeader::Dump() const
{
if(!S3fsLog::IsS3fsLogDbg()){
return true;
}
std::ostringstream ssdbg;
int cnt = 1;
ssdbg << "Additional Header list[" << addheadlist.size() << "] = {" << std::endl;
for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter, ++cnt){
const add_header *paddhead = &*iter;
ssdbg << " [" << cnt << "] = {" << std::endl;
if(paddhead->pregex){
ssdbg << " type\t\t--->\tregex" << std::endl;
}else{
ssdbg << " type\t\t--->\tsuffix matching" << std::endl;
}
ssdbg << " base std::string\t--->\t" << paddhead->basestring << std::endl;
ssdbg << " add header\t--->\t" << paddhead->headkey << ": " << paddhead->headvalue << std::endl;
ssdbg << " }" << std::endl;
}
ssdbg << "}" << std::endl;
// print all
S3FS_PRN_DBG("%s", ssdbg.str().c_str());
return true;
}
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: expandtab sw=4 ts=4 fdm=marker
* vim<600: expandtab sw=4 ts=4
*/

98
s3fs/addhead.h Normal file
View File

@ -0,0 +1,98 @@
/*
* s3fs - FUSE-based file system backed by Amazon S3
*
* Copyright(C) 2007 Randy Rizun <rrizun@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef S3FS_ADDHEAD_H_
#define S3FS_ADDHEAD_H_
#include <memory>
#include <regex.h>
#include <vector>
#include "metaheader.h"
//----------------------------------------------
// Structure / Typedef
//----------------------------------------------
struct add_header{
add_header(std::unique_ptr<regex_t> pregex, std::string basestring, std::string headkey, std::string headvalue)
: pregex(std::move(pregex))
, basestring(std::move(basestring))
, headkey(std::move(headkey))
, headvalue(std::move(headvalue))
{}
~add_header() {
if(pregex){
regfree(pregex.get());
}
}
add_header(const add_header&) = delete;
add_header(add_header&& val) = default;
add_header& operator=(const add_header&) = delete;
add_header& operator=(add_header&&) = delete;
std::unique_ptr<regex_t> pregex; // not nullptr means using regex, nullptr means comparing suffix directly.
std::string basestring;
std::string headkey;
std::string headvalue;
};
typedef std::vector<add_header> addheadlist_t;
//----------------------------------------------
// Class AdditionalHeader
//----------------------------------------------
class AdditionalHeader
{
private:
static AdditionalHeader singleton;
bool is_enable;
addheadlist_t addheadlist;
protected:
AdditionalHeader();
~AdditionalHeader();
AdditionalHeader(const AdditionalHeader&) = delete;
AdditionalHeader(AdditionalHeader&&) = delete;
AdditionalHeader& operator=(const AdditionalHeader&) = delete;
AdditionalHeader& operator=(AdditionalHeader&&) = delete;
public:
// Reference singleton
static AdditionalHeader* get() { return &singleton; }
bool Load(const char* file);
void Unload();
bool AddHeader(headers_t& meta, const char* path) const;
struct curl_slist* AddHeader(struct curl_slist* list, const char* path) const;
bool Dump() const;
};
#endif // S3FS_ADDHEAD_H_
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: expandtab sw=4 ts=4 fdm=marker
* vim<600: expandtab sw=4 ts=4
*/

78
s3fs/autolock.cpp Normal file
View File

@ -0,0 +1,78 @@
/*
* s3fs - FUSE-based file system backed by Amazon S3
*
* Copyright(C) 2007 Takeshi Nakatani <ggtakec.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <cstdlib>
#include <cerrno>
#include "autolock.h"
#include "s3fs_logger.h"
//-------------------------------------------------------------------
// Class AutoLock
//-------------------------------------------------------------------
AutoLock::AutoLock(pthread_mutex_t* pmutex, Type type) : auto_mutex(pmutex)
{
if (type == ALREADY_LOCKED) {
is_lock_acquired = false;
} else if (type == NO_WAIT) {
int result = pthread_mutex_trylock(auto_mutex);
if(result == 0){
is_lock_acquired = true;
}else if(result == EBUSY){
is_lock_acquired = false;
}else{
S3FS_PRN_CRIT("pthread_mutex_trylock returned: %d", result);
abort();
}
} else {
int result = pthread_mutex_lock(auto_mutex);
if(result == 0){
is_lock_acquired = true;
}else{
S3FS_PRN_CRIT("pthread_mutex_lock returned: %d", result);
abort();
}
}
}
bool AutoLock::isLockAcquired() const
{
return is_lock_acquired;
}
AutoLock::~AutoLock()
{
if (is_lock_acquired) {
int result = pthread_mutex_unlock(auto_mutex);
if(result != 0){
S3FS_PRN_CRIT("pthread_mutex_unlock returned: %d", result);
abort();
}
}
}
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: expandtab sw=4 ts=4 fdm=marker
* vim<600: expandtab sw=4 ts=4
*/

63
s3fs/autolock.h Normal file
View File

@ -0,0 +1,63 @@
/*
* s3fs - FUSE-based file system backed by Amazon S3
*
* Copyright(C) 2007 Randy Rizun <rrizun@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef S3FS_AUTOLOCK_H_
#define S3FS_AUTOLOCK_H_
#include <pthread.h>
//-------------------------------------------------------------------
// AutoLock Class
//-------------------------------------------------------------------
class AutoLock
{
public:
enum Type {
NO_WAIT = 1,
ALREADY_LOCKED = 2,
NONE = 0
};
private:
pthread_mutex_t* const auto_mutex;
bool is_lock_acquired;
private:
AutoLock(const AutoLock&) = delete;
AutoLock(AutoLock&&) = delete;
AutoLock& operator=(const AutoLock&) = delete;
AutoLock& operator=(AutoLock&&) = delete;
public:
explicit AutoLock(pthread_mutex_t* pmutex, Type type = NONE);
~AutoLock();
bool isLockAcquired() const;
};
#endif // S3FS_AUTOLOCK_H_
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: expandtab sw=4 ts=4 fdm=marker
* vim<600: expandtab sw=4 ts=4
*/

933
s3fs/cache.cpp Normal file
View File

@ -0,0 +1,933 @@
/*
* s3fs - FUSE-based file system backed by Amazon S3
*
* Copyright(C) 2007 Randy Rizun <rrizun@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <algorithm>
#include <cerrno>
#include <cstdlib>
#include <vector>
#include "s3fs.h"
#include "s3fs_logger.h"
#include "s3fs_util.h"
#include "cache.h"
#include "autolock.h"
#include "string_util.h"
//-------------------------------------------------------------------
// Utility
//-------------------------------------------------------------------
inline void SetStatCacheTime(struct timespec& ts)
{
if(-1 == clock_gettime(static_cast<clockid_t>(CLOCK_MONOTONIC_COARSE), &ts)){
S3FS_PRN_CRIT("clock_gettime failed: %d", errno);
abort();
}
}
inline void InitStatCacheTime(struct timespec& ts)
{
ts.tv_sec = 0;
ts.tv_nsec = 0;
}
inline int CompareStatCacheTime(const struct timespec& ts1, const struct timespec& ts2)
{
// return -1: ts1 < ts2
// 0: ts1 == ts2
// 1: ts1 > ts2
if(ts1.tv_sec < ts2.tv_sec){
return -1;
}else if(ts1.tv_sec > ts2.tv_sec){
return 1;
}else{
if(ts1.tv_nsec < ts2.tv_nsec){
return -1;
}else if(ts1.tv_nsec > ts2.tv_nsec){
return 1;
}
}
return 0;
}
inline bool IsExpireStatCacheTime(const struct timespec& ts, const time_t& expire)
{
struct timespec nowts;
SetStatCacheTime(nowts);
nowts.tv_sec -= expire;
return (0 < CompareStatCacheTime(nowts, ts));
}
//
// For stats cache out
//
typedef std::vector<stat_cache_t::iterator> statiterlist_t;
struct sort_statiterlist{
// ascending order
bool operator()(const stat_cache_t::iterator& src1, const stat_cache_t::iterator& src2) const
{
int result = CompareStatCacheTime(src1->second.cache_date, src2->second.cache_date);
if(0 == result){
if(src1->second.hit_count < src2->second.hit_count){
result = -1;
}
}
return (result < 0);
}
};
//
// For symbolic link cache out
//
typedef std::vector<symlink_cache_t::iterator> symlinkiterlist_t;
struct sort_symlinkiterlist{
// ascending order
bool operator()(const symlink_cache_t::iterator& src1, const symlink_cache_t::iterator& src2) const
{
int result = CompareStatCacheTime(src1->second.cache_date, src2->second.cache_date); // use the same as Stats
if(0 == result){
if(src1->second.hit_count < src2->second.hit_count){
result = -1;
}
}
return (result < 0);
}
};
//-------------------------------------------------------------------
// Static
//-------------------------------------------------------------------
StatCache StatCache::singleton;
pthread_mutex_t StatCache::stat_cache_lock;
//-------------------------------------------------------------------
// Constructor/Destructor
//-------------------------------------------------------------------
StatCache::StatCache() : IsExpireTime(true), IsExpireIntervalType(false), ExpireTime(15 * 60), CacheSize(100000), IsCacheNoObject(true)
{
if(this == StatCache::getStatCacheData()){
stat_cache.clear();
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
#if S3FS_PTHREAD_ERRORCHECK
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
#endif
int result;
if(0 != (result = pthread_mutex_init(&StatCache::stat_cache_lock, &attr))){
S3FS_PRN_CRIT("failed to init stat_cache_lock: %d", result);
abort();
}
}else{
abort();
}
}
StatCache::~StatCache()
{
if(this == StatCache::getStatCacheData()){
Clear();
int result = pthread_mutex_destroy(&StatCache::stat_cache_lock);
if(result != 0){
S3FS_PRN_CRIT("failed to destroy stat_cache_lock: %d", result);
abort();
}
}else{
abort();
}
}
//-------------------------------------------------------------------
// Methods
//-------------------------------------------------------------------
unsigned long StatCache::GetCacheSize() const
{
return CacheSize;
}
unsigned long StatCache::SetCacheSize(unsigned long size)
{
unsigned long old = CacheSize;
CacheSize = size;
return old;
}
time_t StatCache::GetExpireTime() const
{
return (IsExpireTime ? ExpireTime : (-1));
}
time_t StatCache::SetExpireTime(time_t expire, bool is_interval)
{
time_t old = ExpireTime;
ExpireTime = expire;
IsExpireTime = true;
IsExpireIntervalType = is_interval;
return old;
}
time_t StatCache::UnsetExpireTime()
{
time_t old = IsExpireTime ? ExpireTime : (-1);
ExpireTime = 0;
IsExpireTime = false;
IsExpireIntervalType = false;
return old;
}
bool StatCache::SetCacheNoObject(bool flag)
{
bool old = IsCacheNoObject;
IsCacheNoObject = flag;
return old;
}
void StatCache::Clear()
{
AutoLock lock(&StatCache::stat_cache_lock);
stat_cache.clear();
S3FS_MALLOCTRIM(0);
}
bool StatCache::GetStat(const std::string& key, struct stat* pst, headers_t* meta, bool overcheck, const char* petag, bool* pisforce)
{
bool is_delete_cache = false;
std::string strpath = key;
AutoLock lock(&StatCache::stat_cache_lock);
stat_cache_t::iterator iter = stat_cache.end();
if(overcheck && '/' != *strpath.rbegin()){
strpath += "/";
iter = stat_cache.find(strpath);
}
if(iter == stat_cache.end()){
strpath = key;
iter = stat_cache.find(strpath);
}
if(iter != stat_cache.end()){
stat_cache_entry* ent = &iter->second;
if(0 < ent->notruncate || !IsExpireTime || !IsExpireStatCacheTime(ent->cache_date, ExpireTime)){
if(ent->noobjcache){
if(!IsCacheNoObject){
// need to delete this cache.
DelStat(strpath, AutoLock::ALREADY_LOCKED);
}else{
// noobjcache = true means no object.
}
return false;
}
// hit without checking etag
std::string stretag;
if(petag){
// find & check ETag
for(headers_t::iterator hiter = ent->meta.begin(); hiter != ent->meta.end(); ++hiter){
std::string tag = lower(hiter->first);
if(tag == "etag"){
stretag = hiter->second;
if('\0' != petag[0] && petag != stretag){
is_delete_cache = true;
}
break;
}
}
}
if(is_delete_cache){
// not hit by different ETag
S3FS_PRN_DBG("stat cache not hit by ETag[path=%s][time=%lld.%09ld][hit count=%lu][ETag(%s)!=(%s)]",
strpath.c_str(), static_cast<long long>(ent->cache_date.tv_sec), ent->cache_date.tv_nsec, ent->hit_count, petag ? petag : "null", stretag.c_str());
}else{
// hit
S3FS_PRN_DBG("stat cache hit [path=%s][time=%lld.%09ld][hit count=%lu]",
strpath.c_str(), static_cast<long long>(ent->cache_date.tv_sec), ent->cache_date.tv_nsec, ent->hit_count);
if(pst!= nullptr){
*pst= ent->stbuf;
}
if(meta != nullptr){
*meta = ent->meta;
}
if(pisforce != nullptr){
(*pisforce) = ent->isforce;
}
ent->hit_count++;
if(IsExpireIntervalType){
SetStatCacheTime(ent->cache_date);
}
return true;
}
}else{
// timeout
is_delete_cache = true;
}
}
if(is_delete_cache){
DelStat(strpath, AutoLock::ALREADY_LOCKED);
}
return false;
}
bool StatCache::IsNoObjectCache(const std::string& key, bool overcheck)
{
bool is_delete_cache = false;
std::string strpath = key;
if(!IsCacheNoObject){
return false;
}
AutoLock lock(&StatCache::stat_cache_lock);
stat_cache_t::iterator iter = stat_cache.end();
if(overcheck && '/' != *strpath.rbegin()){
strpath += "/";
iter = stat_cache.find(strpath);
}
if(iter == stat_cache.end()){
strpath = key;
iter = stat_cache.find(strpath);
}
if(iter != stat_cache.end()) {
const stat_cache_entry* ent = &iter->second;
if(0 < ent->notruncate || !IsExpireTime || !IsExpireStatCacheTime(iter->second.cache_date, ExpireTime)){
if(iter->second.noobjcache){
// noobjcache = true means no object.
SetStatCacheTime((*iter).second.cache_date);
return true;
}
}else{
// timeout
is_delete_cache = true;
}
}
if(is_delete_cache){
DelStat(strpath, AutoLock::ALREADY_LOCKED);
}
return false;
}
bool StatCache::AddStat(const std::string& key, const headers_t& meta, bool forcedir, bool no_truncate)
{
if(!no_truncate && CacheSize< 1){
return true;
}
S3FS_PRN_INFO3("add stat cache entry[path=%s]", key.c_str());
AutoLock lock(&StatCache::stat_cache_lock);
if(stat_cache.end() != stat_cache.find(key)){
// found cache
DelStat(key.c_str(), AutoLock::ALREADY_LOCKED);
}else{
// check: need to truncate cache
if(stat_cache.size() > CacheSize){
// cppcheck-suppress unmatchedSuppression
// cppcheck-suppress knownConditionTrueFalse
if(!TruncateCache(AutoLock::ALREADY_LOCKED)){
return false;
}
}
}
// make new
stat_cache_entry ent;
if(!convert_header_to_stat(key.c_str(), meta, &ent.stbuf, forcedir)){
return false;
}
ent.hit_count = 0;
ent.isforce = forcedir;
ent.noobjcache = false;
ent.notruncate = (no_truncate ? 1L : 0L);
ent.meta.clear();
SetStatCacheTime(ent.cache_date); // Set time.
//copy only some keys
for(headers_t::const_iterator iter = meta.begin(); iter != meta.end(); ++iter){
std::string tag = lower(iter->first);
std::string value = iter->second;
if(tag == "content-type"){
ent.meta[iter->first] = value;
}else if(tag == "content-length"){
ent.meta[iter->first] = value;
}else if(tag == "etag"){
ent.meta[iter->first] = value;
}else if(tag == "last-modified"){
ent.meta[iter->first] = value;
}else if(is_prefix(tag.c_str(), "x-amz")){
ent.meta[tag] = value; // key is lower case for "x-amz"
}
}
const auto& value = stat_cache[key] = std::move(ent);
// check symbolic link cache
if(!S_ISLNK(value.stbuf.st_mode)){
if(symlink_cache.end() != symlink_cache.find(key)){
// if symbolic link cache has key, thus remove it.
DelSymlink(key.c_str(), AutoLock::ALREADY_LOCKED);
}
}
// If no_truncate flag is set, set file name to notruncate_file_cache
//
if(no_truncate){
AddNotruncateCache(key);
}
return true;
}
// [NOTE]
// Updates only meta data if cached data exists.
// And when these are updated, it also updates the cache time.
//
// Since the file mode may change while the file is open, it is
// updated as well.
//
bool StatCache::UpdateMetaStats(const std::string& key, const headers_t& meta)
{
if(CacheSize < 1){
return true;
}
S3FS_PRN_INFO3("update stat cache entry[path=%s]", key.c_str());
AutoLock lock(&StatCache::stat_cache_lock);
stat_cache_t::iterator iter = stat_cache.find(key);
if(stat_cache.end() == iter){
return true;
}
stat_cache_entry* ent = &iter->second;
// update only meta keys
for(headers_t::const_iterator metaiter = meta.begin(); metaiter != meta.end(); ++metaiter){
std::string tag = lower(metaiter->first);
std::string value = metaiter->second;
if(tag == "content-type"){
ent->meta[metaiter->first] = value;
}else if(tag == "content-length"){
ent->meta[metaiter->first] = value;
}else if(tag == "etag"){
ent->meta[metaiter->first] = value;
}else if(tag == "last-modified"){
ent->meta[metaiter->first] = value;
}else if(is_prefix(tag.c_str(), "x-amz")){
ent->meta[tag] = value; // key is lower case for "x-amz"
}
}
// Update time.
SetStatCacheTime(ent->cache_date);
// Update only mode
ent->stbuf.st_mode = get_mode(meta, key);
return true;
}
bool StatCache::AddNoObjectCache(const std::string& key)
{
if(!IsCacheNoObject){
return true; // pretend successful
}
if(CacheSize < 1){
return true;
}
S3FS_PRN_INFO3("add no object cache entry[path=%s]", key.c_str());
AutoLock lock(&StatCache::stat_cache_lock);
if(stat_cache.end() != stat_cache.find(key)){
// found
DelStat(key.c_str(), AutoLock::ALREADY_LOCKED);
}else{
// check: need to truncate cache
if(stat_cache.size() > CacheSize){
// cppcheck-suppress unmatchedSuppression
// cppcheck-suppress knownConditionTrueFalse
if(!TruncateCache(AutoLock::ALREADY_LOCKED)){
return false;
}
}
}
// make new
stat_cache_entry ent;
memset(&ent.stbuf, 0, sizeof(struct stat));
ent.hit_count = 0;
ent.isforce = false;
ent.noobjcache = true;
ent.notruncate = 0L;
ent.meta.clear();
SetStatCacheTime(ent.cache_date); // Set time.
stat_cache[key] = std::move(ent);
// check symbolic link cache
if(symlink_cache.end() != symlink_cache.find(key)){
// if symbolic link cache has key, thus remove it.
DelSymlink(key.c_str(), AutoLock::ALREADY_LOCKED);
}
return true;
}
void StatCache::ChangeNoTruncateFlag(const std::string& key, bool no_truncate)
{
AutoLock lock(&StatCache::stat_cache_lock);
stat_cache_t::iterator iter = stat_cache.find(key);
if(stat_cache.end() != iter){
stat_cache_entry* ent = &iter->second;
if(no_truncate){
if(0L == ent->notruncate){
// need to add no truncate cache.
AddNotruncateCache(key);
}
++(ent->notruncate);
}else{
if(0L < ent->notruncate){
--(ent->notruncate);
if(0L == ent->notruncate){
// need to delete from no truncate cache.
DelNotruncateCache(key);
}
}
}
}
}
bool StatCache::TruncateCache(AutoLock::Type locktype)
{
AutoLock lock(&StatCache::stat_cache_lock, locktype);
if(stat_cache.empty()){
return true;
}
// 1) erase over expire time
if(IsExpireTime){
for(stat_cache_t::iterator iter = stat_cache.begin(); iter != stat_cache.end(); ){
const stat_cache_entry* entry = &iter->second;
if(0L == entry->notruncate && IsExpireStatCacheTime(entry->cache_date, ExpireTime)){
iter = stat_cache.erase(iter);
}else{
++iter;
}
}
}
// 2) check stat cache count
if(stat_cache.size() < CacheSize){
return true;
}
// 3) erase from the old cache in order
size_t erase_count= stat_cache.size() - CacheSize + 1;
statiterlist_t erase_iters;
for(stat_cache_t::iterator iter = stat_cache.begin(); iter != stat_cache.end() && 0 < erase_count; ++iter){
// check no truncate
const stat_cache_entry* ent = &iter->second;
if(0L < ent->notruncate){
// skip for no truncate entry and keep extra counts for this entity.
if(0 < erase_count){
--erase_count; // decrement
}
}else{
// iter is not have notruncate flag
erase_iters.push_back(iter);
}
if(erase_count < erase_iters.size()){
std::sort(erase_iters.begin(), erase_iters.end(), sort_statiterlist());
while(erase_count < erase_iters.size()){
erase_iters.pop_back();
}
}
}
for(statiterlist_t::iterator iiter = erase_iters.begin(); iiter != erase_iters.end(); ++iiter){
stat_cache_t::iterator siter = *iiter;
S3FS_PRN_DBG("truncate stat cache[path=%s]", siter->first.c_str());
stat_cache.erase(siter);
}
S3FS_MALLOCTRIM(0);
return true;
}
bool StatCache::DelStat(const char* key, AutoLock::Type locktype)
{
if(!key){
return false;
}
S3FS_PRN_INFO3("delete stat cache entry[path=%s]", key);
AutoLock lock(&StatCache::stat_cache_lock, locktype);
stat_cache_t::iterator iter;
if(stat_cache.end() != (iter = stat_cache.find(key))){
stat_cache.erase(iter);
DelNotruncateCache(key);
}
if(0 < strlen(key) && 0 != strcmp(key, "/")){
std::string strpath = key;
if('/' == *strpath.rbegin()){
// If there is "path" cache, delete it.
strpath.erase(strpath.length() - 1);
}else{
// If there is "path/" cache, delete it.
strpath += "/";
}
if(stat_cache.end() != (iter = stat_cache.find(strpath))){
stat_cache.erase(iter);
DelNotruncateCache(strpath);
}
}
S3FS_MALLOCTRIM(0);
return true;
}
bool StatCache::GetSymlink(const std::string& key, std::string& value)
{
bool is_delete_cache = false;
const std::string& strpath = key;
AutoLock lock(&StatCache::stat_cache_lock);
symlink_cache_t::iterator iter = symlink_cache.find(strpath);
if(iter != symlink_cache.end()){
symlink_cache_entry* ent = &iter->second;
if(!IsExpireTime || !IsExpireStatCacheTime(ent->cache_date, ExpireTime)){ // use the same as Stats
// found
S3FS_PRN_DBG("symbolic link cache hit [path=%s][time=%lld.%09ld][hit count=%lu]",
strpath.c_str(), static_cast<long long>(ent->cache_date.tv_sec), ent->cache_date.tv_nsec, ent->hit_count);
value = ent->link;
ent->hit_count++;
if(IsExpireIntervalType){
SetStatCacheTime(ent->cache_date);
}
return true;
}else{
// timeout
is_delete_cache = true;
}
}
if(is_delete_cache){
DelSymlink(strpath.c_str(), AutoLock::ALREADY_LOCKED);
}
return false;
}
bool StatCache::AddSymlink(const std::string& key, const std::string& value)
{
if(CacheSize< 1){
return true;
}
S3FS_PRN_INFO3("add symbolic link cache entry[path=%s, value=%s]", key.c_str(), value.c_str());
AutoLock lock(&StatCache::stat_cache_lock);
if(symlink_cache.end() != symlink_cache.find(key)){
// found
DelSymlink(key.c_str(), AutoLock::ALREADY_LOCKED);
}else{
// check: need to truncate cache
if(symlink_cache.size() > CacheSize){
// cppcheck-suppress unmatchedSuppression
// cppcheck-suppress knownConditionTrueFalse
if(!TruncateSymlink(AutoLock::ALREADY_LOCKED)){
return false;
}
}
}
// make new
symlink_cache_entry ent;
ent.link = value;
ent.hit_count = 0;
SetStatCacheTime(ent.cache_date); // Set time(use the same as Stats).
symlink_cache[key] = std::move(ent);
return true;
}
bool StatCache::TruncateSymlink(AutoLock::Type locktype)
{
AutoLock lock(&StatCache::stat_cache_lock, locktype);
if(symlink_cache.empty()){
return true;
}
// 1) erase over expire time
if(IsExpireTime){
for(symlink_cache_t::iterator iter = symlink_cache.begin(); iter != symlink_cache.end(); ){
const symlink_cache_entry* entry = &iter->second;
if(IsExpireStatCacheTime(entry->cache_date, ExpireTime)){ // use the same as Stats
iter = symlink_cache.erase(iter);
}else{
++iter;
}
}
}
// 2) check stat cache count
if(symlink_cache.size() < CacheSize){
return true;
}
// 3) erase from the old cache in order
size_t erase_count= symlink_cache.size() - CacheSize + 1;
symlinkiterlist_t erase_iters;
for(symlink_cache_t::iterator iter = symlink_cache.begin(); iter != symlink_cache.end(); ++iter){
erase_iters.push_back(iter);
sort(erase_iters.begin(), erase_iters.end(), sort_symlinkiterlist());
if(erase_count < erase_iters.size()){
erase_iters.pop_back();
}
}
for(symlinkiterlist_t::iterator iiter = erase_iters.begin(); iiter != erase_iters.end(); ++iiter){
symlink_cache_t::iterator siter = *iiter;
S3FS_PRN_DBG("truncate symbolic link cache[path=%s]", siter->first.c_str());
symlink_cache.erase(siter);
}
S3FS_MALLOCTRIM(0);
return true;
}
bool StatCache::DelSymlink(const char* key, AutoLock::Type locktype)
{
if(!key){
return false;
}
S3FS_PRN_INFO3("delete symbolic link cache entry[path=%s]", key);
AutoLock lock(&StatCache::stat_cache_lock, locktype);
symlink_cache_t::iterator iter;
if(symlink_cache.end() != (iter = symlink_cache.find(key))){
symlink_cache.erase(iter);
}
S3FS_MALLOCTRIM(0);
return true;
}
// [NOTE]
// Need to lock StatCache::stat_cache_lock before calling this method.
//
bool StatCache::AddNotruncateCache(const std::string& key)
{
if(key.empty() || '/' == *key.rbegin()){
return false;
}
std::string parentdir = mydirname(key);
std::string filename = mybasename(key);
if(parentdir.empty() || filename.empty()){
return false;
}
parentdir += '/'; // directory path must be '/' termination.
notruncate_dir_map_t::iterator iter = notruncate_file_cache.find(parentdir);
if(iter == notruncate_file_cache.end()){
// add new list
notruncate_filelist_t list;
list.push_back(filename);
notruncate_file_cache[parentdir] = list;
}else{
// add filename to existed list
notruncate_filelist_t& filelist = iter->second;
notruncate_filelist_t::const_iterator fiter = std::find(filelist.begin(), filelist.end(), filename);
if(fiter == filelist.end()){
filelist.push_back(filename);
}
}
return true;
}
// [NOTE]
// Need to lock StatCache::stat_cache_lock before calling this method.
//
bool StatCache::DelNotruncateCache(const std::string& key)
{
if(key.empty() || '/' == *key.rbegin()){
return false;
}
std::string parentdir = mydirname(key);
std::string filename = mybasename(key);
if(parentdir.empty() || filename.empty()){
return false;
}
parentdir += '/'; // directory path must be '/' termination.
notruncate_dir_map_t::iterator iter = notruncate_file_cache.find(parentdir);
if(iter != notruncate_file_cache.end()){
// found directory in map
notruncate_filelist_t& filelist = iter->second;
notruncate_filelist_t::iterator fiter = std::find(filelist.begin(), filelist.end(), filename);
if(fiter != filelist.end()){
// found filename in directory file list
filelist.erase(fiter);
if(filelist.empty()){
notruncate_file_cache.erase(parentdir);
}
}
}
return true;
}
// [Background]
// When s3fs creates a new file, the file does not exist until the file contents
// are uploaded.(because it doesn't create a 0 byte file)
// From the time this file is created(opened) until it is uploaded(flush), it
// will have a Stat cache with the No truncate flag added.
// This avoids file not existing errors in operations such as chmod and utimens
// that occur in the short period before file upload.
// Besides this, we also need to support readdir(list_bucket), this method is
// called to maintain the cache for readdir and return its value.
//
// [NOTE]
// Add the file names under parentdir to the list.
// However, if the same file name exists in the list, it will not be added.
// parentdir must be terminated with a '/'.
//
bool StatCache::GetNotruncateCache(const std::string& parentdir, notruncate_filelist_t& list)
{
if(parentdir.empty()){
return false;
}
std::string dirpath = parentdir;
if('/' != *dirpath.rbegin()){
dirpath += '/';
}
AutoLock lock(&StatCache::stat_cache_lock);
notruncate_dir_map_t::iterator iter = notruncate_file_cache.find(dirpath);
if(iter == notruncate_file_cache.end()){
// not found directory map
return true;
}
// found directory in map
const notruncate_filelist_t& filelist = iter->second;
for(notruncate_filelist_t::const_iterator fiter = filelist.begin(); fiter != filelist.end(); ++fiter){
if(list.end() == std::find(list.begin(), list.end(), *fiter)){
// found notuncate file that does not exist in the list, so add it.
list.push_back(*fiter);
}
}
return true;
}
//-------------------------------------------------------------------
// Functions
//-------------------------------------------------------------------
bool convert_header_to_stat(const char* path, const headers_t& meta, struct stat* pst, bool forcedir)
{
if(!path || !pst){
return false;
}
memset(pst, 0, sizeof(struct stat));
pst->st_nlink = 1; // see fuse FAQ
// mode
pst->st_mode = get_mode(meta, path, true, forcedir);
// blocks
if(S_ISREG(pst->st_mode)){
pst->st_blocks = get_blocks(pst->st_size);
}
pst->st_blksize = 4096;
// mtime
struct timespec mtime = get_mtime(meta);
if(pst->st_mtime < 0){
pst->st_mtime = 0L;
}else{
if(mtime.tv_sec < 0){
mtime.tv_sec = 0;
mtime.tv_nsec = 0;
}
set_timespec_to_stat(*pst, stat_time_type::MTIME, mtime);
}
// ctime
struct timespec ctime = get_ctime(meta);
if(pst->st_ctime < 0){
pst->st_ctime = 0L;
}else{
if(ctime.tv_sec < 0){
ctime.tv_sec = 0;
ctime.tv_nsec = 0;
}
set_timespec_to_stat(*pst, stat_time_type::CTIME, ctime);
}
// atime
struct timespec atime = get_atime(meta);
if(pst->st_atime < 0){
pst->st_atime = 0L;
}else{
if(atime.tv_sec < 0){
atime.tv_sec = 0;
atime.tv_nsec = 0;
}
set_timespec_to_stat(*pst, stat_time_type::ATIME, atime);
}
// size
if(S_ISDIR(pst->st_mode)){
pst->st_size = 4096;
}else{
pst->st_size = get_size(meta);
}
// uid/gid
pst->st_uid = get_uid(meta);
pst->st_gid = get_gid(meta);
return true;
}
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: expandtab sw=4 ts=4 fdm=marker
* vim<600: expandtab sw=4 ts=4
*/

Some files were not shown because too many files have changed in this diff Show More