diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..370b848 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,24 @@ +project(hybridcache) + +cmake_minimum_required(VERSION 3.7) +cmake_policy(SET CMP0079 NEW) +set(CMAKE_CXX_STANDARD 17) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-PIE") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fno-PIE") + +list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/thirdparties) +list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/thirdparties/CmakeFiles) +include(ThirdPartyConfig) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG -O3 -g -D__const__=__unused__ -pipe -W -Wno-deprecated -Wno-sign-compare -Wno-unused-parameter -fPIC") + +include_directories(AFTER ${CMAKE_SOURCE_DIR}/local_cache ${CMAKE_SOURCE_DIR}/global_cache) +include_directories(AFTER ${CMAKE_BINARY_DIR}/local_cache ${CMAKE_BINARY_DIR}/global_cache) + +# subdirectory +add_subdirectory(local_cache) +add_subdirectory(global_cache) +add_subdirectory(s3fs) +add_subdirectory(intercept) +add_subdirectory(test) diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..d511905 --- /dev/null +++ b/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/README.md b/README.md index e53ce33..5532956 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,153 @@ # JYCache +**九源缓存存储系统(简称:JYCache)** 是一款面向个人使用、大模型训练推理等多种场景,适配大容量对象存储等多种底层存储形态,高性能、易扩展的分布式缓存存储系统。通过层次化架构、接入层优化、I/O优化等多种组合优化,JYCache 不仅支持文件顺序/随机读写,其读写性能也领先国际主流产品 Alluxio。JYCache 现支持在 X86(Intel、AMD、海光等)及 ARM(鲲鹏、飞腾等)平台下运行。 + +缓存存储系统面向个人使用及集群使用等场景,可为用户提供以下两种运行模式: +1. **单机对象加速**:将 S3 对象存储通过 POSIX 接口挂载到本地,像本地磁盘一样进行读写访问。S3 上的一个完整对象对应本地的一个文件,通过对象名构造目录树结构。进一步地,热点数据可缓存于本地的 DRAM/SSD,通过减少与 S3 的数据交互操作,可提升文件系统性能。 +2. **分布式对象加速**:将 S3 对象存储通过 POSIX 接口挂载到本地,像本地磁盘一样进行读写访问。S3 上的一个完整对象对应本地的一个文件,通过对象名构造目录树结构。热点数据呈现两级缓存结构,除各个客户端内部 DRAM/SSD 缓存外,还提供一层共享的 DRAM/SSD 缓存,进一步提高缓存命中率,提升并发读等场景下的 IO 性能。 + +## 主要特性 + + - **兼容 POSIX 接口**。通过 FUSE 或动态库劫持技术,应用程序无需重新编译即可立即实现缓存存储加速。 + - **高可用缓存写**。数据写入缓存层即可视为持久化,通过多副本、纠删码机制实现缓存层内数据高可用,降低下层存储系统压力,提高 I/O 性能。 + - **支持用户态零拷贝 I/O**。动态库劫持技术(Syscall intercept)实现全用户态 I/O,降低上下文切换和拷贝,实现极限性能。 + - **层次化缓存存储**。本地缓存与计算任务同机部署,使用高速共享缓存可为用户进程提供高达 45GB/s 的缓存带宽;为进一步提高分布式系统缓存效率,可额外部署全局缓存服务,通过与多个本地缓存相关联,进一步提高缓存命中率。 + - **易于扩展和集成**。本地缓存与全局缓存采用模块化设计,可依据业务需要实现多样的组合。 + - **兼容多种平台**。支持在 X86(Intel、AMD、海光等)及 ARM(鲲鹏、飞腾等)平台下运行。 + +## 系统架构 + +![](doc/image/JYCache_architecture.PNG) +在单机对象加速部署模式下,对象存储可通过 FUSE(基于S3FS(V1.94)实现) 或系统调用劫持等方式挂载到本地,用户可像本地磁盘一样进行读写访问。对象存储系统上的一个完整对象对应本地的一个文件,通过对象名构造目录树结构。热点数据可缓存于本地的 DRAM/SSD,通过减少与对象存储系统的数据交互操作,可提升文件系统性能。 + +在分布式对象加速模式下,热点数据呈现两级缓存结构,除各个客户端内部 DRAM/SSD 缓存外,还提供一层共享的 DRAM/SSD 缓存,进一步提高缓存命中率,提升并发读等场景下的 IO 性能。 + +缓存存储系统的两个核心部件是客户端缓存模块及全局缓存模块。客户端缓存模块内部包含写缓存、读缓存。客户端缓存模块按需向全局缓存服务器发出 RPC 通信请求,实现数据的传递。全局缓存服务器包含写缓存和读缓存,其中写缓存提供多副本等高可用模式。当用户发出下刷(fsync)请求时,写数据会落入此处,可容忍少量全局缓存服务器故障时不丢失写入的数据。无论是读缓存还是写缓存,都会按需调用数据源访问组件访问对象存储等底层存储资源,从而轻松适配其他类型的底层存储。 + +此外,在intercept模式的缓存系统中,我们采用了client-server+中间件架构,利用系统调用拦截技术捕获POSIX请求,将posix请求封装后发送至服务器处理,处理完成后返回至客户端。通过绕过FUSE内核模块和采用零拷贝中间件,最大限度地减少了数据拷贝和系统开销,不仅确保了与常见posix接口的兼容,还显著提升了系统性能,尤其在读写密集的场景中,避免了数据的重复拷贝,性能优势明显。 + +## 系统性能 + +顺序读性能使用 FIO 测试工具,带宽数据如下表所示: + +| BS | 优化前 | JYCache(FUSE) | JYCache(intercept) | +| ------------ | ------------ | ------------ | ------------ | +| 4K | 761MiB/s | 933MiB/s | 3576MiB/s | +| 16K | 706MiB/s | 3643MiB/s | 11.6GiB/s | +| 128K | 2268MiB/s | 22.6GiB/s | 38GiB/s | + +顺序写性能使用 FIO 测试工具,带宽数据如下表所示: + +| BS | 优化前 | JYCache(FUSE) | JYCache(intercept) | +| ------------ | ------------ | ------------ | ------------ | +| 4K | 624MiB/s | 1226MiB/s | 2571MiB/s | +| 16K | 2153MiB/s | 5705MiB/s | 9711MiB/s | +| 128K | 7498MiB/s | 23.5GiB/s | 31.2GiB/s | + +## 系统构建 +**环境要求** + +- GCC 9.3.0 +- GLIBC 2.31 +- CMake 3.7 +- C++ 17 +- FUSE >= 2.6 + +**从源码构建** + +直接在根目录下运行build.sh脚本 +```bash +sh build.sh +``` +*在build.sh脚本中,会自动下载第三方依赖。* + +**系统安装** + +编译完成后,在根目录下运行install.sh脚本 +```bash +sh install.sh +``` + +## 快速使用 + +执行install.sh脚本后会在当前目录下构建JYCache运行环境,其目录为JYCache_Env。下述使用方法均以JYCache_Env为根目录。 + +**一、JYCache普通模式(不启用全局缓存)** + +修改conf/newcache.conf配置文件中的`UseGlobalCache=0` +```bash +# 1.启动minio +cd ./minio && sh start.sh && cd .. +# 2.启动s3fs +sh start_s3fs.sh +``` +启动完成后,在挂载目录 ./mnt 下的文件操作均为JYCache控制。 + +*注:需要在此模式下,在挂载目录 ./mnt 创建文件夹testdir,此为intercept模式所需。* + +**关闭服务** +```bash +sh stop_s3fs.sh +cd ./minio && sh stop.sh && cd .. +``` + +**二、JYCache普通模式(启用全局缓存)** + +修改conf/newcache.conf配置文件中的`UseGlobalCache=1` +```bash +# 1.启动minio +cd ./minio && sh start.sh && cd .. +# 2.启动etcd +sh start_etcd.sh +# 3.启动全局缓存 +sh start_global.sh +# 4.启动s3fs +sh start_s3fs.sh +``` +启动完成后,在挂载目录 ./mnt 下的文件操作均为JYCache控制 + +**关闭服务** +```bash +sh stop_s3fs.sh +sh stop_global.sh +sh stop_etcd.sh +cd ./minio && sh stop.sh && cd .. +``` + +**三、JYCache intercept模式** + +此模式也支持全局缓存,方法与二同。下述以不开全局缓存为例: +```bash +# 1.启动minio +cd ./minio && sh start.sh && cd .. +# 2.启动intercept_server +sh start_intercept_server.sh +``` +启动完成后,在JYCache_Env根目录下执行 +```bash +LD_LIBRARY_PATH=./libs/:$LD_LIBRARY_PATH LD_PRELOAD=./libintercept_client.so ${cmd} +``` +其中`${cmd}`为用户实际文件操作的命令。例如: +```bash +LD_LIBRARY_PATH=./libs/:$LD_LIBRARY_PATH LD_PRELOAD=./libintercept_client.so ll /testdir/ +``` +需要在testdir目录下进行文件操作,才为JYCache intercept模式控制。 +*且使用intercept模式前需要先通过普通模式在挂载目录下创建文件夹testdir。* + +**关闭服务** +```bash +sh stop_intercept_server.sh +cd ./minio && sh stop.sh && cd .. +``` + +## 常见问题 + +[常见问题](doc/frequently_asked_questions.md) + +## 许可 + +本项目使用了以下遵循GPLv2许可的代码: +- S3FS (https://github.com/s3fs-fuse/s3fs-fuse) + +This software is licensed under the GNU GPL version 2. + diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..2302716 --- /dev/null +++ b/build.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +if [ ! -d "./thirdparties" ]; then + wget https://madstorage.s3.cn-north-1.jdcloud-oss.com/JYCache_Dendepency_x64.tgz + md5=`md5sum JYCache_Dendepency_x64.tgz | awk {'print $1'}` + if [ "$md5" != "48f67dd9b7bcb1b2bdd6be9f2283b714" ]; then +   echo 'JYCache_Dendepency version inconsistency!' + exit 1 + fi + tar -zxvf JYCache_Dendepency_x64.tgz +fi + +mkdir -p build && cd build +cmake .. && cmake --build . -j 16 diff --git a/doc/cache_framework_design.md b/doc/cache_framework_design.md new file mode 100644 index 0000000..e994c3e --- /dev/null +++ b/doc/cache_framework_design.md @@ -0,0 +1,40 @@ +# 缓存系统设计 + +### 设计背景 + +在用户和数据服务器之间构建一套缓存系统,该缓存系统可以让用户以本地文件的形式透明且高效地访问数据服务器中的数据。其中,数据服务器的类型有对象存储、自建全局缓存等。以数据服务器为对象存储为例,用户可以通过fuse以本地文件形式访问存储在远端的对象,且远端的对象索引是用户可懂的。 +![](image/system_purpose.png) + +### 系统定位 +该缓存系统支持多种数据源,包括S3对象存储、自建全局缓存等,故称为HybridCache。同时借助S3FS对fuse的支持,以及其在元数据管理方面的能力,实现fuse模式下的文件管理操作。HybridCache的定位如下图所示: +![](image/system_positioning.png) + +### 系统架构 +HybridCache架构如下图所示: +![](image/HybridCache_architecture.PNG) + +1.写缓存模块 + +写缓存模块的定位是本地写缓存,写缓存中的key是文件的path,不理解远端数据源(对象存储和全局缓存等),从write->flush的过程由上层去做。 + +2.读缓存模块 + +读缓存模块的定位是文件(以远端数据源为对象存储为例)的只读缓存,读缓存中的key是对象的key。读缓存需要用到本地缓存,以及远端缓存(对象存储和全局缓存等)。 + +3.数据源访问组件 + +数据源访问组件负责和远端数据源进行交互,涉及数据的上传下载等。以Adaptor的形式支持多种数据源,包括对象存储和全局缓存等。 + +4.缓存管理组件 + +内存管理组件管理本地缓存,写缓存模块和读缓存模块中实际的本地缓存就是用的该组件。 +在本地缓存中,我们直接将文件切分为固定大小的page(page大小可配置,下文以64KB为例),并使用CacheLib来维护这些page。page在CacheLib中以KV形式进行存储,其存储结构如下: +- key为 cacheKey_pageid。读写模块各自维护自己的本地缓存,cacheKey在写缓存模块中就是文件的path,在读缓存模块中就是S3上对象的key。pageid即为页号,通过offset/64KB计算得来。 +- value的数据结构如下: +![](image/page_structure.jpg) + +通过 cacheKey+offset+size 即可接操作指定文件中的特定page。page并发操作的安全性是通过CacheLib自身的机制以及page内的lock和新旧版号位来保证。 + +5.HybridCache访问组件 + +HybridCache访问组件定位在胶水层,要根据上层调用方的特性定制化实现,其内需要理解到上层调用方的逻辑。 diff --git a/doc/conf_spec/newcache.conf_spec b/doc/conf_spec/newcache.conf_spec new file mode 100644 index 0000000..be7a951 --- /dev/null +++ b/doc/conf_spec/newcache.conf_spec @@ -0,0 +1,38 @@ +# ReadCache +ReadCacheConfig.CacheConfig.CacheName # 读缓存名称 +ReadCacheConfig.CacheConfig.MaxCacheSize # 读缓存内存容量限制 +ReadCacheConfig.CacheConfig.PageBodySize # 读缓存page大小 +ReadCacheConfig.CacheConfig.PageMetaSize # 读缓存page元数据大小 +ReadCacheConfig.CacheConfig.EnableCAS # 读缓存是否启用CAS +ReadCacheConfig.CacheConfig.CacheLibConfig.EnableNvmCache # 读缓存是否开启nvm缓存 +ReadCacheConfig.CacheConfig.CacheLibConfig.RaidPath # nvm缓存文件目录 +ReadCacheConfig.CacheConfig.CacheLibConfig.RaidFileNum # nvm缓存文件数量限制 +ReadCacheConfig.CacheConfig.CacheLibConfig.RaidFileSize # nvm单个缓存文件大小限制 +ReadCacheConfig.CacheConfig.CacheLibConfig.DataChecksum # nvm缓存是否进行数据校验 +ReadCacheConfig.DownloadNormalFlowLimit # 读缓存内存未命中从远端下载时的平峰流控 +ReadCacheConfig.DownloadBurstFlowLimit # 读缓存内存未命中从远端下载时的顶峰流控 + +# WriteCache +WriteCacheConfig.CacheConfig.CacheName # 写缓存名称 +WriteCacheConfig.CacheConfig.MaxCacheSize # 写缓存内存容量限制 +WriteCacheConfig.CacheConfig.PageBodySize # 写缓存page大小 +WriteCacheConfig.CacheConfig.PageMetaSize # 写缓存page元数据大小 +WriteCacheConfig.CacheConfig.EnableCAS # 写缓存是否启用CAS +WriteCacheConfig.CacheSafeRatio # 写缓存安全容量阈值(百分比), 缓存达到阈值时阻塞待异步flush释放空间 + +# GlobalCache +UseGlobalCache # 全局缓存开关 +GlobalCacheConfig.EnableWriteCache # 全局缓存是否启用写缓存 +GlobalCacheConfig.EtcdAddress # etcd地址,例如 http://127.0.0.1:2379 +GlobalCacheConfig.GlobalServers # 全局缓存服务端地址,例如 127.0.0.1:8000 +GlobalCacheConfig.GflagFile # 全局缓存gflag文件形式输入 + +ThreadNum=48 # 线程数 +BackFlushCacheRatio # 写缓存异步flush阈值(百分比) +UploadNormalFlowLimit # 上传平峰流控 +UploadBurstFlowLimit # 上传顶峰流控 +LogPath # 日志文件路径 +LogLevel # 日志级别,INFO=0, WARNING=1, ERROR=2, FATAL=3 +EnableLog # 是否启用日志打印 +FlushToRead # 文件flush完成后是否写入读缓存 +CleanCacheByOpen # 文件open时是否清理读缓存 diff --git a/doc/frequently_asked_questions.md b/doc/frequently_asked_questions.md new file mode 100644 index 0000000..cc36869 --- /dev/null +++ b/doc/frequently_asked_questions.md @@ -0,0 +1,7 @@ +**1. 如何切换挂载目录?** + +在start_s3fs.sh中 +```bash +LD_LIBRARY_PATH=./libs/:$LD_LIBRARY_PATH nohup ./s3fs -o passwd_file=./conf/passwd -o use_path_request_style -o endpoint=us-east-1 -o url=http://127.0.0.1:9000 -o bucket=test ./mnt -o dbglevel=err -o use_cache=./diskcache -o del_cache -o newcache_conf=./conf/newcache.conf -f >> ./log/s3fs.log 2>&1 & +``` +更换其中的 `./mnt` 即可 \ No newline at end of file diff --git a/doc/image/HybridCache_architecture.PNG b/doc/image/HybridCache_architecture.PNG new file mode 100644 index 0000000..7dd1657 Binary files /dev/null and b/doc/image/HybridCache_architecture.PNG differ diff --git a/doc/image/JYCache_architecture.PNG b/doc/image/JYCache_architecture.PNG new file mode 100644 index 0000000..c12f578 Binary files /dev/null and b/doc/image/JYCache_architecture.PNG differ diff --git a/doc/image/page_structure.jpg b/doc/image/page_structure.jpg new file mode 100644 index 0000000..0d2bf5d Binary files /dev/null and b/doc/image/page_structure.jpg differ diff --git a/doc/image/system_positioning.png b/doc/image/system_positioning.png new file mode 100644 index 0000000..08215b6 Binary files /dev/null and b/doc/image/system_positioning.png differ diff --git a/doc/image/system_purpose.png b/doc/image/system_purpose.png new file mode 100644 index 0000000..ef854bc Binary files /dev/null and b/doc/image/system_purpose.png differ diff --git a/global_cache/CMakeLists.txt b/global_cache/CMakeLists.txt new file mode 100644 index 0000000..c185c8d --- /dev/null +++ b/global_cache/CMakeLists.txt @@ -0,0 +1,46 @@ +include(FindThreads) +include(FindProtobuf) +protobuf_generate_cpp(PROTO_SRC PROTO_HEADER gcache.proto) +include_directories(${CMAKE_CURRENT_BINARY_DIR} /usr/local/include/jerasure) + +add_library(madfs_global + Common.h + Common.cpp + FileSystemDataAdaptor.h + EtcdClient.h + Placement.h + GlobalCacheClient.h + GlobalCacheClient.cpp + S3DataAdaptor.h + S3DataAdaptor.cpp + GlobalDataAdaptor.h + GlobalDataAdaptor.cpp + ReadCacheClient.h + ReadCacheClient.cpp + ReplicationWriteCacheClient.h + ReplicationWriteCacheClient.cpp + GlobalCacheServer.h + GlobalCacheServer.cpp + ReadCache.h + ReadCache.cpp + WriteCache.h + WriteCache.cpp + WriteCacheClient.h + ErasureCodingWriteCacheClient.h + ErasureCodingWriteCacheClient.cpp + ${PROTO_SRC} + ${PROTO_HEADER} +) + +option(ENABLE_EC "Enable Erasure Coding" OFF) +target_link_libraries(madfs_global PUBLIC hybridcache_local aio) +if(ENABLE_EC) + add_definitions(-DCONFIG_JERASURE) + target_link_libraries(madfs_global PUBLIC Jerasure) +endif() + +add_executable(madfs_global_server GlobalCacheServerMain.cpp) +target_link_libraries(madfs_global_server PUBLIC madfs_global) + +add_executable(madfs_gc GarbageCollectorMain.cpp) +target_link_libraries(madfs_gc PUBLIC madfs_global) diff --git a/global_cache/Common.cpp b/global_cache/Common.cpp new file mode 100644 index 0000000..03a6f20 --- /dev/null +++ b/global_cache/Common.cpp @@ -0,0 +1,122 @@ +#include "Common.h" + +DEFINE_int32(rpc_timeout, 30000, "RPC timeout in milliseconds"); +DEFINE_int32(rpc_threads, 16, "Maximum number of threads in brpc"); +DEFINE_int32(folly_threads, 48, "Maximum number of threads in folly's executor"); +DEFINE_int32(rpc_connections, 10, "RPC connections"); +DEFINE_bool(use_rdma, true, "Use remote memory direct access"); + +DEFINE_int64(read_chunk_size, 256 * 1024, "Granularity of global read cache"); +DEFINE_int32(read_replication_factor, 1, "Replication factor of global read cache"); + +DEFINE_string(read_cache_dir, "/mnt/nvme0/renfeng/readcache", "Read cache directory"); +DEFINE_string(write_cache_dir, "/mnt/nvme0/renfeng/writecache", "Write cache directory"); + +DEFINE_string(write_cache_type, "nocache", "Policy of global write cache: nocache, replication, reed-solomon"); +DEFINE_int32(write_replication_factor, 1, "Replication factor of global write cache, available if --write_cache_type=replication"); +DEFINE_int32(write_data_blocks, 3, "Data blocks of global write cache, available if --write_cache_type=reed-solomon"); +DEFINE_int32(write_parity_blocks, 2, "Parity blocks of global write cache, available if --write_cache_type=reed-solomon"); + +DEFINE_string(s3_address, "", "S3 - server address (URL)"); +DEFINE_string(s3_access_key, "", "S3 - AccessKey"); +DEFINE_string(s3_secret_access_key, "", "S3 - SecretAccessKey"); +DEFINE_string(s3_bucket, "madfs", "S3 - bucket name"); +DEFINE_int32(s3_bg_threads, 4, "S3 - number of background threads"); + +DEFINE_uint64(read_normal_flow_limit, 1024, "Read cache normal flow limit"); +DEFINE_uint64(read_burst_flow_limit, 10 * 1024, "Read cache burst flow limit"); +DEFINE_uint64(read_capacity_mb, 4096, "Read cache capacity in MB"); +DEFINE_uint64(read_page_body_size, 64 * 1024, "Read cache page body size"); +DEFINE_uint64(read_page_meta_size, 1024, "Read cache page meta size"); +DEFINE_bool(read_cas, true, "Read cache enable CAS"); +DEFINE_bool(read_nvm_cache, false, "Read cache enable NVM cache"); + +DEFINE_bool(use_meta_cache, true, "Enable meta cache"); +DEFINE_uint64(meta_cache_max_size, 1024 * 1024, "Max size of meta cache"); +DEFINE_uint64(meta_cache_clear_size, 512 * 1024, "Read cache burst flow limit"); + +DEFINE_uint64(write_chunk_size, 16 * 1024 * 1024, "Granularity of global write cache"); +DEFINE_uint64(max_inflight_payload_size, 256 * 1024 * 1024, "Max inflight payload size in bytes"); + +DEFINE_string(etcd_prefix, "/madfs/", "Etcd directory prefix"); + +DEFINE_bool(verbose, false, "Print debug logging"); + +namespace brpc { + DECLARE_int64(socket_max_unwritten_bytes); +}; + +static GlobalConfig g_cfg; +std::once_flag g_cfg_once; + +#define SAFE_ASSIGN(conf, flag, min_val, max_val) { \ + const static auto flag##_min = (min_val); \ + const static auto flag##_max = (max_val); \ + if (flag < (min_val) || flag > (max_val)) { \ + LOG(WARNING) << "Invalid " #flag ", reset to " << (max_val); \ + flag = (max_val); \ + } \ + conf = flag; \ +} + +void InitGlobalConfig() { + SAFE_ASSIGN(g_cfg.rpc_timeout, FLAGS_rpc_timeout, 0, 60000); + SAFE_ASSIGN(g_cfg.rpc_threads, FLAGS_rpc_threads, 0, 256); + SAFE_ASSIGN(g_cfg.rpc_connections, FLAGS_rpc_connections, 0, 64); + SAFE_ASSIGN(g_cfg.folly_threads, FLAGS_folly_threads, 0, 256); + g_cfg.use_rdma = FLAGS_use_rdma; + g_cfg.write_chunk_size = FLAGS_write_chunk_size; + + g_cfg.default_policy.read_chunk_size = FLAGS_read_chunk_size; + g_cfg.default_policy.read_replication_factor = FLAGS_read_replication_factor; + + g_cfg.default_policy.read_chunk_size = FLAGS_read_chunk_size; + g_cfg.default_policy.read_replication_factor = FLAGS_read_replication_factor; + + g_cfg.use_meta_cache = FLAGS_use_meta_cache; + g_cfg.meta_cache_max_size = size_t(FLAGS_meta_cache_max_size); + g_cfg.meta_cache_clear_size = size_t(FLAGS_meta_cache_clear_size); + + g_cfg.read_cache_dir = FLAGS_read_cache_dir; + g_cfg.write_cache_dir = FLAGS_write_cache_dir; + + g_cfg.etcd_prefix = FLAGS_etcd_prefix; + g_cfg.max_inflight_payload_size = FLAGS_max_inflight_payload_size; + + if (FLAGS_write_cache_type == "nocache") { + g_cfg.default_policy.write_cache_type = NOCACHE; + } else if (FLAGS_write_cache_type == "replication") { + g_cfg.default_policy.write_cache_type = REPLICATION; + g_cfg.default_policy.write_replication_factor = FLAGS_write_replication_factor; + } else if (FLAGS_write_cache_type == "reed-solomon") { + g_cfg.default_policy.write_cache_type = REED_SOLOMON; + g_cfg.default_policy.write_data_blocks = FLAGS_write_data_blocks; + g_cfg.default_policy.write_parity_blocks = FLAGS_write_parity_blocks; + } else { + LOG(ERROR) << "The program will be terminated because of unsupported write cache type: " << FLAGS_write_cache_type; + exit(EXIT_FAILURE); + } + + g_cfg.s3_config.address = FLAGS_s3_address; + g_cfg.s3_config.access_key = FLAGS_s3_access_key; + g_cfg.s3_config.secret_access_key = FLAGS_s3_secret_access_key; + g_cfg.s3_config.bucket = FLAGS_s3_bucket; + g_cfg.s3_config.bg_threads = FLAGS_s3_bg_threads; + + HybridCache::ReadCacheConfig &read_cache = g_cfg.read_cache; + read_cache.DownloadNormalFlowLimit = FLAGS_read_normal_flow_limit; + read_cache.DownloadBurstFlowLimit = FLAGS_read_burst_flow_limit; + read_cache.CacheCfg.CacheName = "Read"; + read_cache.CacheCfg.MaxCacheSize = FLAGS_read_capacity_mb * 1024 * 1024;; + read_cache.CacheCfg.PageBodySize = FLAGS_read_page_body_size; + read_cache.CacheCfg.PageMetaSize = FLAGS_read_page_meta_size; + read_cache.CacheCfg.EnableCAS = FLAGS_read_cas; + read_cache.CacheCfg.CacheLibCfg.EnableNvmCache = FLAGS_read_nvm_cache; + + brpc::FLAGS_socket_max_unwritten_bytes = FLAGS_max_inflight_payload_size * 2; +} + +GlobalConfig &GetGlobalConfig() { + std::call_once(g_cfg_once, InitGlobalConfig); + return g_cfg; +} diff --git a/global_cache/Common.h b/global_cache/Common.h new file mode 100644 index 0000000..246a10a --- /dev/null +++ b/global_cache/Common.h @@ -0,0 +1,130 @@ +#ifndef MADFS_COMMON_H +#define MADFS_COMMON_H + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "config.h" + +using folly::Future; +using folly::Promise; + +#define RED "\033[1;31m" +#define GREEN "\033[1;32m" +#define YELLOW "\033[1;33m" +#define WHITE "\033[0m" + +DECLARE_bool(verbose); + +const static int OK = 0; +const static int RPC_FAILED = -2; +const static int NOT_FOUND = -3; +const static int CACHE_ENTRY_NOT_FOUND = -3; // deprecated +const static int INVALID_ARGUMENT = -4; +const static int S3_INTERNAL_ERROR = -5; +const static int FOLLY_ERROR = -6; +const static int NO_ENOUGH_REPLICAS = -7; +const static int METADATA_ERROR = -8; +const static int IO_ERROR = -9; +const static int END_OF_FILE = -10; +const static int NO_ENOUGH_DISKSPACE = -11; +const static int UNSUPPORTED_TYPE = -12; +const static int UNSUPPORTED_OPERATION = -13; +const static int UNIMPLEMENTED = -128; + +struct GetOutput { + int status; + butil::IOBuf buf; +}; + +struct PutOutput { + int status; + std::string internal_key; +}; + +struct QueryTsOutput { + int status; + uint64_t timestamp; +}; + +enum WriteCacheType { + NOCACHE, REPLICATION, REED_SOLOMON +}; + +struct S3Config { + std::string address; + std::string access_key; + std::string secret_access_key; + std::string bucket; + int bg_threads; +}; + +struct CachePolicy { + size_t read_chunk_size; + size_t read_replication_factor; + + WriteCacheType write_cache_type; + size_t write_replication_factor; // if write_cache_type == REPLICATION + size_t write_data_blocks; + size_t write_parity_blocks; // if write_cache_type == REED_SOLOMON +}; + +struct GlobalConfig { + int rpc_timeout; + int rpc_threads; + int rpc_connections; + int folly_threads; + bool use_rdma; + + bool use_meta_cache; + size_t meta_cache_max_size; + size_t meta_cache_clear_size; + + size_t write_chunk_size; + + size_t max_inflight_payload_size; + + CachePolicy default_policy; + S3Config s3_config; + + HybridCache::ReadCacheConfig read_cache; + HybridCache::WriteCacheConfig write_cache; + + std::string read_cache_dir; + std::string write_cache_dir; + + std::string etcd_prefix; +}; + +GlobalConfig &GetGlobalConfig(); + +static inline std::string PathJoin(const std::string &left, const std::string &right) { + if (left.empty()) { + return right; + } else if (left[left.length() - 1] == '/') { + return left + right; + } else { + return left + "/" + right; + } +} + +static inline int CreateParentDirectories(const std::string &path) { + auto pos = path.rfind('/'); + if (pos == path.npos) { + return 0; + } + auto parent = path.substr(0, pos); + boost::filesystem::create_directories(parent); + return 0; +} + +#endif // MADFS_COMMON_H \ No newline at end of file diff --git a/global_cache/ErasureCodingWriteCacheClient.cpp b/global_cache/ErasureCodingWriteCacheClient.cpp new file mode 100644 index 0000000..e01e5b3 --- /dev/null +++ b/global_cache/ErasureCodingWriteCacheClient.cpp @@ -0,0 +1,333 @@ +#include "ErasureCodingWriteCacheClient.h" +#include "GlobalDataAdaptor.h" + +// #define CONFIG_JERASURE + +#ifdef CONFIG_JERASURE +#include +#include + +static int _roundup(int a, int b) { + if (a % b == 0) return a; + return a + b - (a % b); +} + +folly::Future ErasureCodingWriteCacheClient::Put(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers, + size_t off) { + std::vector > future_list; + Json::Value root; + Json::Value json_replica(Json::arrayValue), json_headers; + + const std::vector replicas = GetReplica(key); + for (auto server_id: replicas) { + json_replica.append(server_id); + } + + auto &policy = parent_->GetCachePolicy(key); + const int k = policy.write_data_blocks; + const int m = policy.write_parity_blocks; + const int w = 32; + auto matrix = reed_sol_vandermonde_coding_matrix(k, m, w); + std::vector data_buf_list; + auto rpc_client = parent_->GetRpcClient(); + auto write_chunk_size = GetGlobalConfig().write_chunk_size; + for (uint64_t offset = 0; offset < size; offset += write_chunk_size) { + const auto unit_size = _roundup((write_chunk_size + k - 1) / k, w); + const auto region_size = std::min(write_chunk_size, size - offset); + char *data_buf = new char[(k + m) * unit_size]; + data_buf_list.push_back(data_buf); + memcpy(data_buf, &buffer.data[offset], region_size); + memset(data_buf + region_size, 0, k * unit_size - region_size); + char *data_ptrs[k] = { nullptr }, *coding_ptrs[m] = { nullptr }; + for (int i = 0; i < k + m; ++i) { + if (i < k) { + data_ptrs[i] = &data_buf[i * unit_size]; + } else { + coding_ptrs[i - k] = &data_buf[i * unit_size]; + } + } + jerasure_matrix_encode(k, m, w, matrix, data_ptrs, coding_ptrs, unit_size); + auto cur_data_buf = data_buf; + for (auto server_id: replicas) { + ByteBuffer region_buffer(cur_data_buf, unit_size); + cur_data_buf += unit_size; + std::string partial_key = key + + "-" + std::to_string(offset / write_chunk_size) + + "-" + std::to_string(write_chunk_size); + future_list.emplace_back(rpc_client->PutEntryFromWriteCache(server_id, partial_key, region_buffer, unit_size)); + } + } + for (auto iter = headers.begin(); iter != headers.end(); ++iter) { + json_headers[iter->first] = iter->second; + } + + root["type"] = "reed-solomon"; + root["size"] = size; + root["replica"] = json_replica; + root["headers"] = json_headers; + + return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue( + [this, root, data_buf_list, matrix](std::vector > output) -> PutResult { + free(matrix); + for (auto &entry : data_buf_list) { + delete []entry; + } + Json::Value res_root; + Json::Value json_path(Json::arrayValue); + for (auto &entry: output) { + if (!entry.hasValue()) + return PutResult { FOLLY_ERROR, res_root }; + if (entry.value().status != OK) + return PutResult { entry.value().status, res_root }; + json_path.append(entry.value().internal_key); + } + res_root = root; + res_root["path"] = json_path; + return PutResult { OK, res_root }; + }); +} + +folly::Future ErasureCodingWriteCacheClient::Get(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + Json::Value &root) { + std::vector replicas; + for (auto &entry : root["replica"]) { + replicas.push_back(entry.asInt()); + } + + std::vector internal_keys; + for (auto &entry : root["path"]) { + internal_keys.push_back(entry.asString()); + } + + std::vector > future_list; + std::vector requests; + auto write_chunk_size = GetGlobalConfig().write_chunk_size; + GenerateGetChunkRequestsV2(key, start, size, buffer, requests, write_chunk_size); + if (requests.empty()) + return folly::makeFuture(OK); + + for (auto &entry: requests) { + auto &policy = parent_->GetCachePolicy(key); + const int k = policy.write_data_blocks; + const int m = policy.write_parity_blocks; + const int w = 32; + const auto unit_size = _roundup((write_chunk_size + k - 1) / k, w); + const auto start_replica_id = entry.chunk_start / unit_size; + const auto end_replica_id = (entry.chunk_start + entry.chunk_len + unit_size - 1) / unit_size; + size_t dest_buf_pos = 0; + for (auto replica_id = start_replica_id; replica_id < end_replica_id; ++replica_id) { + auto start_off = (replica_id == start_replica_id) ? entry.chunk_start % unit_size : 0; + auto end_off = (replica_id + 1 == end_replica_id) ? (entry.chunk_start + entry.chunk_len) - replica_id * unit_size : unit_size; + int server_id = replicas[replica_id]; + std::string internal_key = internal_keys[entry.chunk_id * replicas.size() + replica_id]; + auto cur_dest_buf_pos = dest_buf_pos; + dest_buf_pos += (end_off - start_off); + future_list.emplace_back(parent_->GetRpcClient()->GetEntryFromWriteCache(server_id, internal_key, start_off, end_off - start_off) + .then([this, server_id, entry, start_off, end_off, cur_dest_buf_pos](folly::Try &&output) -> folly::Future { + if (!output.hasValue()) { + return folly::makeFuture(FOLLY_ERROR); + } + auto &value = output.value(); + if (value.status == OK) { + value.buf.copy_to(entry.buffer.data + cur_dest_buf_pos, end_off - start_off); + return folly::makeFuture(OK); + } else { + return folly::makeFuture(value.status); + } + })); + } + } + + return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue( + [=](std::vector > output) -> int { + for (auto &entry: output) + if (entry.value_or(FOLLY_ERROR) != OK) { + LOG(ERROR) << "Failed to get data from write cache, key: " << key + << ", start: " << start + << ", size: " << size + << ", buf: " << (void *) buffer.data << " " << buffer.len + << ", error code: " << entry.hasValue() << " " << entry.value_or(FOLLY_ERROR); + return entry.value_or(FOLLY_ERROR); + } + return OK; + }); +} + + +folly::Future ErasureCodingWriteCacheClient::GetDecode(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + Json::Value &root) { + std::vector replicas; + for (auto &entry : root["replica"]) { + replicas.push_back(entry.asInt()); + } + + std::vector internal_keys; + for (auto &entry : root["path"]) { + internal_keys.push_back(entry.asString()); + } + + std::vector requests; + auto write_chunk_size = GetGlobalConfig().write_chunk_size; + GenerateGetChunkRequestsV2(key, start, size, buffer, requests, write_chunk_size); + if (requests.empty()) + return folly::makeFuture(OK); + + std::vector > future_list; + + for (auto &entry: requests) { + auto &policy = parent_->GetCachePolicy(key); + const int k = policy.write_data_blocks; + const int m = policy.write_parity_blocks; + const int w = 32; + auto matrix = reed_sol_vandermonde_coding_matrix(k, m, w); + const auto unit_size = _roundup((write_chunk_size + k - 1) / k, w); + const auto start_replica_id = entry.chunk_start / unit_size; + const auto end_replica_id = (entry.chunk_start + entry.chunk_len + unit_size - 1) / unit_size; + int erasures[k + m + 1] = { 0 }; + int erasures_idx = 0; + + char *data_buf = new char[(k + m) * unit_size]; + char *data_ptrs[k] = { nullptr }, *coding_ptrs[m] = { nullptr }; + for (int i = 0; i < k + m; ++i) { + if (i < k) { + data_ptrs[i] = &data_buf[i * unit_size]; + } else { + coding_ptrs[i - k] = &data_buf[i * unit_size]; + } + } + + // rarely occurred, can be synchronized + for (auto replica_id = 0; replica_id < k + m; ++replica_id) { + int server_id = replicas[replica_id]; + std::string internal_key = internal_keys[entry.chunk_id * replicas.size() + replica_id]; + auto output = parent_->GetRpcClient()->GetEntryFromWriteCache(server_id, internal_key, 0, unit_size).get(); + if (output.status == OK) { + if (replica_id < k) { + output.buf.copy_to(data_ptrs[replica_id], unit_size); + } else { + output.buf.copy_to(coding_ptrs[replica_id - k], unit_size); + } + } else { + erasures[erasures_idx++] = replica_id; + } + } + + erasures[erasures_idx] = -1; + + int rc = jerasure_matrix_decode(k, m, w, matrix, 1, erasures, data_ptrs, coding_ptrs, unit_size); + if (rc == -1) { + LOG(FATAL) << "Unable to decode RS matrix"; + return IO_ERROR; + } + + auto cur_pos = 0; + for (auto replica_id = start_replica_id; replica_id < end_replica_id; ++replica_id) { + auto start_pos = (replica_id == start_replica_id) ? entry.chunk_start % unit_size : 0; + auto end_pos = (replica_id + 1 == end_replica_id) ? (entry.chunk_start + entry.chunk_len) - replica_id * unit_size : unit_size; + memcpy(entry.buffer.data + cur_pos, data_ptrs[replica_id] + start_pos, end_pos - start_pos); + cur_pos += end_pos - start_pos; + } + + delete []data_buf; + free(matrix); + } + + return OK; +} + +std::vector ErasureCodingWriteCacheClient::GetReplica(const std::string &key) { + const int num_available = parent_->server_list_.size(); + auto &policy = parent_->GetCachePolicy(key); + const int num_choose = policy.write_data_blocks + policy.write_parity_blocks; + uint64_t seed = std::hash < std::string > {}(key); + std::vector output; + // for (int i = 0; i < std::min(num_available, num_choose); ++i) + for (int i = 0; i < num_choose; ++i) + output.push_back((seed + i) % num_available); + return output; +} + +void ErasureCodingWriteCacheClient::GenerateGetChunkRequestsV2(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + std::vector &requests, + size_t chunk_size) { + const size_t end = start + size; + + const size_t begin_chunk_id = start / chunk_size; + const size_t end_chunk_id = (end + chunk_size - 1) / chunk_size; + + if (buffer.len < size) { + LOG(WARNING) << "Buffer capacity may be not enough, expect " << size << ", actual " << buffer.len; + } + + size_t buffer_offset = 0; + for (size_t chunk_id = begin_chunk_id; chunk_id < end_chunk_id; ++chunk_id) { + size_t chunk_start = std::max(chunk_id * chunk_size, start); + size_t chunk_stop = std::min((chunk_id + 1) * chunk_size, end); + if (chunk_stop <= chunk_start) + return; + GetChunkRequestV2 item; + item.user_key = key; + item.chunk_id = chunk_id; + item.chunk_start = chunk_start % chunk_size; + item.chunk_len = chunk_stop - chunk_start; + item.chunk_granularity = chunk_size; + item.buffer.data = buffer.data + buffer_offset; + item.buffer.len = item.chunk_len; + buffer_offset += item.chunk_len; + requests.emplace_back(item); + } + LOG_ASSERT(buffer_offset == size); +} +#else +folly::Future ErasureCodingWriteCacheClient::Put(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers, + size_t off) { + PutResult res; + res.status = UNSUPPORTED_OPERATION; + return res; +} + +folly::Future ErasureCodingWriteCacheClient::Get(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + Json::Value &root) { + return UNSUPPORTED_OPERATION; +} + + +folly::Future ErasureCodingWriteCacheClient::GetDecode(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + Json::Value &root) { + return UNSUPPORTED_OPERATION; +} + +std::vector ErasureCodingWriteCacheClient::GetReplica(const std::string &key) { + return std::vector{}; +} + +void ErasureCodingWriteCacheClient::GenerateGetChunkRequestsV2(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + std::vector &requests, + size_t chunk_size) { + +} +#endif \ No newline at end of file diff --git a/global_cache/ErasureCodingWriteCacheClient.h b/global_cache/ErasureCodingWriteCacheClient.h new file mode 100644 index 0000000..ba5263a --- /dev/null +++ b/global_cache/ErasureCodingWriteCacheClient.h @@ -0,0 +1,61 @@ +#ifndef MADFS_EC_WRITE_CACHE_CLIENT_H +#define MADFS_EC_WRITE_CACHE_CLIENT_H + +#include "WriteCacheClient.h" + +using HybridCache::ByteBuffer; + +class GlobalDataAdaptor; + +using PutResult = WriteCacheClient::PutResult; + +class ErasureCodingWriteCacheClient : public WriteCacheClient { + friend class GetChunkContext; + +public: + ErasureCodingWriteCacheClient(GlobalDataAdaptor *parent) : parent_(parent) {} + + ~ErasureCodingWriteCacheClient() {} + + virtual folly::Future Put(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers, + size_t off = 0); + + virtual folly::Future Get(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + Json::Value &root); + + virtual folly::Future GetDecode(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + Json::Value &root); + +public: + std::vector GetReplica(const std::string &key); + + struct GetChunkRequestV2 { + std::string user_key; + size_t chunk_id; + size_t chunk_start; + size_t chunk_len; + size_t chunk_granularity; + ByteBuffer buffer; + }; + + static void GenerateGetChunkRequestsV2(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + std::vector &requests, + size_t chunk_size); + +private: + GlobalDataAdaptor *parent_; +}; + +#endif // MADFS_EC_WRITE_CACHE_CLIENT_H \ No newline at end of file diff --git a/global_cache/EtcdClient.h b/global_cache/EtcdClient.h new file mode 100644 index 0000000..321683d --- /dev/null +++ b/global_cache/EtcdClient.h @@ -0,0 +1,101 @@ +#ifndef ETCD_CLIENT_H +#define ETCD_CLIENT_H + +#include +#include +#include + +#include "WriteCacheClient.h" + +class EtcdClient { +public: + EtcdClient(const std::string &etcd_url) : client_(etcd_url) {}; + + ~EtcdClient() {} + + struct GetResult { + int status; + Json::Value root; + }; + + folly::Future GetJson(const std::string &key) { + std::lock_guard lock(mutex_); + Json::Reader reader; + Json::Value root; + auto resp = client_.get(PathJoin(GetGlobalConfig().etcd_prefix, key)); + if (!resp.is_ok()) { + if (resp.error_code() != 100) { + LOG(ERROR) << "Error from etcd client: " << resp.error_code() + << ", message: " << resp.error_message(); + return folly::makeFuture(GetResult{ METADATA_ERROR, root }); + } else { + LOG(WARNING) << "Record not found in the etcd storage: key " << key; + return folly::makeFuture(GetResult{ NOT_FOUND, root }); + } + } + if (!reader.parse(resp.value().as_string(), root)) { + LOG(ERROR) << "Error from etcd client: failed to parse record: " << resp.value().as_string(); + return folly::makeFuture(GetResult{ METADATA_ERROR, root }); + } + LOG(INFO) << "Record get: " << key; + return folly::makeFuture(GetResult{ OK, root }); + } + + folly::Future PutJson(const std::string &key, const Json::Value &root) { + std::lock_guard lock(mutex_); + Json::FastWriter writer; + const std::string json_file = writer.write(root); + auto resp = client_.put(PathJoin(GetGlobalConfig().etcd_prefix, key), json_file); + if (!resp.is_ok()) { + LOG(ERROR) << "Error from etcd client: " << resp.error_code() + << ", message: " << resp.error_message(); + return folly::makeFuture(METADATA_ERROR); + } + LOG(INFO) << "Record put: " << key; + return folly::makeFuture(OK); + } + + folly::Future DeleteJson(const std::string &key) { + std::lock_guard lock(mutex_); + auto resp = client_.rm(PathJoin(GetGlobalConfig().etcd_prefix, key)); + if (!resp.is_ok()) { + if (resp.error_code() != 100) { + LOG(ERROR) << "Error from etcd client: " << resp.error_code() + << ", message: " << resp.error_message(); + return folly::makeFuture(METADATA_ERROR); + } else { + LOG(WARNING) << "Record not found in the etcd storage: key " << key; + return folly::makeFuture(NOT_FOUND); + } + return folly::makeFuture(METADATA_ERROR); + } + return folly::makeFuture(OK); + } + + folly::Future ListJson(const std::string &key_prefix, std::vector &key_list) { + std::lock_guard lock(mutex_); + const std::string etcd_prefix = GetGlobalConfig().etcd_prefix; + auto resp = client_.keys(PathJoin(etcd_prefix, key_prefix)); + if (!resp.is_ok()) { + if (resp.error_code() != 100) { + LOG(ERROR) << "Error from etcd client: " << resp.error_code() + << ", message: " << resp.error_message(); + return folly::makeFuture(METADATA_ERROR); + } else { + LOG(WARNING) << "Record not found in the etcd storage: key " << key_prefix; + return folly::makeFuture(NOT_FOUND); + } + return folly::makeFuture(METADATA_ERROR); + } + for (auto &entry : resp.keys()) { + key_list.push_back(entry.substr(etcd_prefix.length())); + } + return folly::makeFuture(OK); + } + +private: + std::mutex mutex_; + etcd::SyncClient client_; +}; + +#endif // ETCD_CLIENT_H \ No newline at end of file diff --git a/global_cache/FileSystemDataAdaptor.h b/global_cache/FileSystemDataAdaptor.h new file mode 100644 index 0000000..4edf98f --- /dev/null +++ b/global_cache/FileSystemDataAdaptor.h @@ -0,0 +1,323 @@ +#ifndef MADFS_FILE_SYSTEM_DATA_ADAPTOR_H +#define MADFS_FILE_SYSTEM_DATA_ADAPTOR_H + +#include +#include +#include +#include +#include +#include + +#include "Common.h" +#include "data_adaptor.h" + +#include +#include +#include +#include + +#include + +using HybridCache::ByteBuffer; +using HybridCache::DataAdaptor; + +static inline ssize_t fully_pread(int fd, void* buf, size_t n, size_t offset) { + ssize_t total_read = 0; + ssize_t bytes_read; + while (total_read < n) { + bytes_read = pread(fd, buf + total_read, n - total_read, offset); + if (bytes_read < 0) { + if (errno == EAGAIN) continue; + return -1; + } else if (bytes_read == 0) { + break; + } + total_read += bytes_read; + offset += bytes_read; + } + return total_read; +} + +static inline ssize_t fully_pwrite(int fd, void* buf, size_t n, size_t offset) { + ssize_t total_written = 0; + ssize_t bytes_written; + while (total_written < n) { + bytes_written = pwrite(fd, buf + total_written, n - total_written, offset); + if (bytes_written < 0) { + if (errno == EAGAIN) continue; + return -1; + } else if (bytes_written == 0) { + break; + } + total_written += bytes_written; + offset += bytes_written; + } + return total_written; +} + +class FileSystemDataAdaptor : public DataAdaptor { + const std::string prefix_; + std::shared_ptr base_adaptor_; + bool use_optimized_path_; + std::shared_ptr executor_; + bool fsync_required_; + +public: + FileSystemDataAdaptor(const std::string &prefix = "", + std::shared_ptr base_adaptor = nullptr, + bool use_optimized_path = false, + std::shared_ptr executor = nullptr, + bool fsync_required = true) + : prefix_(prefix), + base_adaptor_(base_adaptor), + use_optimized_path_(use_optimized_path), + executor_(executor), + fsync_required_(fsync_required) {} + + ~FileSystemDataAdaptor() {} + + virtual folly::Future DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer) { + LOG_IF(INFO, FLAGS_verbose) << "Download key: " << key << ", start: " << start << ", size: " << size; + + if (!buffer.data || buffer.len < size) { + LOG(ERROR) << "Buffer capacity is not enough, expected " << size + << ", actual " << buffer.len; + return folly::makeFuture(INVALID_ARGUMENT); + } + + auto path = BuildPath(prefix_, key); + if (access(path.c_str(), F_OK)) { + if (base_adaptor_) { + #if 1 + size_t full_size; + std::map headers; + if (base_adaptor_->Head(key, full_size, headers).get()) { + LOG(ERROR) << "Fail to retrive metadata of key: " << key; + return folly::makeFuture(IO_ERROR); + } + ByteBuffer tmp_buffer(new char[full_size], full_size); + return base_adaptor_->DownLoad(key, 0, full_size, tmp_buffer).thenValue([buffer, tmp_buffer, start, size, key](int rc) -> int { + if (rc) { + LOG(ERROR) << "Fail to retrive data of key: " << key; + return IO_ERROR; + } + memcpy(buffer.data, tmp_buffer.data + start, size); + delete []tmp_buffer.data; + return OK; + }); + #else + return base_adaptor_->DownLoad(key, start, size, buffer); + #endif + } else if (errno == ENOENT) { + LOG_IF(ERROR, FLAGS_verbose) << "File not found: " << path; + return folly::makeFuture(NOT_FOUND); + } else { + PLOG(ERROR) << "Fail inaccessible: " << path; + return folly::makeFuture(IO_ERROR); + } + } + + butil::Timer t; + t.start(); + + const bool kUseDirectIO = false; // ((uint64_t) buffer.data & 4095) == 0 && (size & 4095) == 0; + int flags = O_RDONLY; + flags |= kUseDirectIO ? O_DIRECT : 0; + int fd = open(path.c_str(), flags); + if (fd < 0) { + PLOG(ERROR) << "Fail to open file: " << path; + return folly::makeFuture(IO_ERROR); + } + +#ifdef ASYNC_IO + if (kUseDirectIO) { + thread_local folly::SimpleAsyncIO aio(folly::SimpleAsyncIO::Config().setCompletionExecutor(executor_.get())); + auto promise = std::make_shared>(); + aio.pread(fd, buffer.data, size, start, [key, size, promise, fd](int rc) { + if (rc != size) { + PLOG(ERROR) << "Fail to read file: " << key + << ", expected read " << size + << ", actual read " << rc; + close(fd); + promise->setValue(IO_ERROR); + } else { + close(fd); + promise->setValue(OK); + } + }); + return promise->getFuture(); + } +#endif + + ssize_t nbytes = fully_pread(fd, buffer.data, size, start); + if (nbytes != size) { + PLOG(ERROR) << "Fail to read file: " << key + << ", expected read " << size + << ", actual read " << nbytes; + close(fd); + return folly::makeFuture(IO_ERROR); + } + + t.stop(); + // LOG_EVERY_N(INFO, 1) << t.u_elapsed() << " " << size; + + close(fd); + return folly::makeFuture(OK); + } + + virtual folly::Future UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers) { + butil::Timer t; + t.start(); + LOG_IF(INFO, FLAGS_verbose) << "Upload key: " << key << ", size: " << size; + if (!buffer.data || buffer.len < size) { + LOG(ERROR) << "Buffer capacity is not enough, expected " << size + << ", actual " << buffer.len; + return folly::makeFuture(INVALID_ARGUMENT); + } + + auto path = BuildPath(prefix_, key); + if (CreateParentDirectories(path)) { + return folly::makeFuture(IO_ERROR); + } + + t.stop(); + //LOG(INFO) << "Upload P0: " << key << " " << t.u_elapsed() << " " << size; + const bool kUseDirectIO = false; // ((uint64_t) buffer.data & 4095) == 0 && (size & 4095) == 0; + int flags = O_WRONLY | O_CREAT; + flags |= kUseDirectIO ? O_DIRECT : 0; + int fd = open(path.c_str(), flags, 0644); + if (fd < 0) { + PLOG(ERROR) << "Fail to open file: " << path; + return folly::makeFuture(IO_ERROR); + } + +#ifdef ASYNC_IO + if (kUseDirectIO) { + thread_local folly::SimpleAsyncIO aio(folly::SimpleAsyncIO::Config().setCompletionExecutor(executor_.get())); + auto promise = std::make_shared>(); + aio.pwrite(fd, buffer.data, size, 0, [key, size, promise, fd](int rc) { + if (rc != size) { + PLOG(ERROR) << "Fail to write file: " << key + << ", expected " << size + << ", actual " << rc; + close(fd); + promise->setValue(IO_ERROR); + } + + if (ftruncate64(fd, size) < 0) { + PLOG(ERROR) << "Fail to truncate file: " << key; + close(fd); + return folly::makeFuture(IO_ERROR); + } + + if (fsync_required_ && fsync(fd) < 0) { + PLOG(ERROR) << "Fail to sync file: " << key; + close(fd); + return folly::makeFuture(IO_ERROR); + } + + close(fd); + promise->setValue(OK); + + }); + return promise->getFuture(); + } +#endif + + ssize_t nbytes = fully_pwrite(fd, buffer.data, size, 0); + if (nbytes != size) { + PLOG(ERROR) << "Fail to write file: " << key + << ", expected read " << size + << ", actual read " << nbytes; + close(fd); + return folly::makeFuture(IO_ERROR); + } + + t.stop(); + //LOG(INFO) << "Upload P2: " << key << " " << t.u_elapsed() << " " << size; + if (ftruncate64(fd, size) < 0) { + PLOG(ERROR) << "Fail to truncate file: " << key; + close(fd); + return folly::makeFuture(IO_ERROR); + } + + t.stop(); + //LOG(INFO) << "Upload P3: " << key << " " << t.u_elapsed() << " " << size; + if (fsync_required_ && fsync(fd) < 0) { + PLOG(ERROR) << "Fail to sync file: " << key; + close(fd); + return folly::makeFuture(IO_ERROR); + } + + close(fd); + + if (base_adaptor_) { + return base_adaptor_->UpLoad(key, size, buffer, headers); + } + t.stop(); + // LOG(INFO) << "Upload P4: " << key << " " << t.u_elapsed() << " " << size; + return folly::makeFuture(OK); + } + + virtual folly::Future Delete(const std::string &key) { + LOG_IF(INFO, FLAGS_verbose) << "Delete key: " << key; + auto path = BuildPath(prefix_, key); + if (remove(path.c_str())) { + if (errno == ENOENT) { + LOG_IF(ERROR, FLAGS_verbose) << "File not found: " << path; + return folly::makeFuture(NOT_FOUND); + } else { + PLOG(ERROR) << "Failed to remove file: " << path; + return folly::makeFuture(IO_ERROR); + } + } + if (base_adaptor_) { + return base_adaptor_->Delete(key); + } + return folly::makeFuture(OK); + } + + virtual folly::Future Head(const std::string &key, + size_t &size, + std::map &headers) { + LOG_IF(INFO, FLAGS_verbose) << "Head key: " << key; + if (base_adaptor_) { + return base_adaptor_->Head(key, size, headers); + } + auto path = BuildPath(prefix_, key); + struct stat st; + if (access(path.c_str(), F_OK)) { + if (errno == ENOENT) { + LOG_IF(ERROR, FLAGS_verbose) << "File not found: " << path; + return folly::makeFuture(NOT_FOUND); + } else { + PLOG(ERROR) << "Failed to access file: " << path; + return folly::makeFuture(IO_ERROR); + } + } + if (stat(path.c_str(), &st)) { + PLOG(ERROR) << "Fail to state file: " << path; + return folly::makeFuture(IO_ERROR); + } + size = st.st_size; + return folly::makeFuture(OK); + } + + std::string BuildPath(const std::string &prefix, const std::string &key) { + if (use_optimized_path_) { + std::size_t h1 = std::hash{}(key); + std::string suffix = std::to_string(h1 % 256) + '/' + std::to_string(h1 % 65536) + '/' + key; + return PathJoin(prefix, suffix); + } else { + return PathJoin(prefix, key); + } + } +}; + +#endif // MADFS_FILE_SYSTEM_DATA_ADAPTOR_H \ No newline at end of file diff --git a/global_cache/GarbageCollectorMain.cpp b/global_cache/GarbageCollectorMain.cpp new file mode 100644 index 0000000..fe52134 --- /dev/null +++ b/global_cache/GarbageCollectorMain.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include + +#include "S3DataAdaptor.h" +#include "FileSystemDataAdaptor.h" +#include "GlobalDataAdaptor.h" +#include "ReadCacheClient.h" + +#include "GlobalCacheServer.h" +#include "S3DataAdaptor.h" + +DEFINE_string(data_server, "0.0.0.0:8000", "IP address of global data servers"); +DEFINE_string(etcd_server, "http://127.0.0.1:2379", "Location of etcd server"); +DEFINE_string(prefix, "", "Key prefix for garbage collection"); +DEFINE_bool(use_s3, false, "Use S3 storage"); + +std::vector SplitString(const std::string &input) { + std::vector result; + std::stringstream ss(input); + std::string item; + while (std::getline(ss, item, ',')) { + result.push_back(item); + } + return result; +} + +int main(int argc, char *argv[]) { + std::cerr << YELLOW << "MADFS GC TOOL" << WHITE << std::endl; + + gflags::ParseCommandLineFlags(&argc, &argv, true); + auto etcd_client = std::make_shared(FLAGS_etcd_server); + std::shared_ptr base_adaptor; + if (FLAGS_use_s3) { + base_adaptor = std::make_shared(); + } else { + base_adaptor = std::make_shared(); + } + + auto global_adaptor = std::make_shared(base_adaptor, SplitString(FLAGS_data_server), etcd_client); + if (global_adaptor->PerformGarbageCollection(FLAGS_prefix)) { + std::cerr << RED << "Garbage collection failed!" << WHITE << std::endl; + exit(EXIT_FAILURE); + } else { + std::cerr << GREEN << "Garbage collection successfully" << WHITE << std::endl; + exit(EXIT_SUCCESS); + } +} diff --git a/global_cache/GlobalCacheClient.cpp b/global_cache/GlobalCacheClient.cpp new file mode 100644 index 0000000..972fa5a --- /dev/null +++ b/global_cache/GlobalCacheClient.cpp @@ -0,0 +1,368 @@ +#include +#include +#include +#include + +#include "gcache.pb.h" +#include "GlobalCacheClient.h" + +GlobalCacheClient::GlobalCacheClient(const std::string &group) : group_(group), inflight_payload_size_(0) {} + +GlobalCacheClient::~GlobalCacheClient() { + for (auto &entry: server_map_) { + delete entry.second; + } + server_map_.clear(); +} + +int GlobalCacheClient::RegisterServer(int server_id, const char *hostname) { + std::lock_guard lock(mutex_); + if (server_map_.count(server_id)) { + LOG(WARNING) << "Server has been registered, previous regitration will be override" + << ", group: " << group_ + << ", server_id: " << server_id + << ", hostname: " << hostname; + } + + brpc::ChannelOptions options; + options.use_rdma = GetGlobalConfig().use_rdma; + options.timeout_ms = GetGlobalConfig().rpc_timeout; + options.connection_group = group_; + + int32_t fixed_backoff_time_ms = 100; // 固定时间间隔(毫秒) + int32_t no_backoff_remaining_rpc_time_ms = 150; // 无需重试退避的剩余rpc时间阈值(毫秒) + bool retry_backoff_in_pthread = false; + static brpc::RpcRetryPolicyWithFixedBackoff g_retry_policy_with_fixed_backoff( + fixed_backoff_time_ms, no_backoff_remaining_rpc_time_ms, retry_backoff_in_pthread); + options.retry_policy = &g_retry_policy_with_fixed_backoff; + options.max_retry = 5; + + + auto channel = new brpc::Channel(); + if (channel->Init(hostname, &options)) { + PLOG(ERROR) << "Unable to initialize channel object" + << ", group: " << group_ + << ", server_id: " << server_id + << ", hostname: " << hostname; + delete channel; + return RPC_FAILED; + } + + // Sending sync register RPC + gcache::GlobalCacheService_Stub stub(channel); + brpc::Controller cntl; + gcache::RegisterRequest request; + gcache::RegisterResponse response; + stub.Register(&cntl, &request, &response, nullptr); + if (cntl.Failed() || response.status_code() != OK) { + LOG(ERROR) << "Failed to register server, reason: " << cntl.ErrorText() + << ", group: " << group_ + << ", server_id: " << server_id + << ", hostname: " << hostname; + delete channel; + return RPC_FAILED; + } + + LOG_IF(INFO, FLAGS_verbose) << "Register server successfully" + << ", group: " << group_ + << ", server_id: " << server_id + << ", hostname: " << hostname; + + server_map_[server_id] = channel; + return OK; +} + +brpc::Channel *GlobalCacheClient::GetChannelByServerId(int server_id) { + std::lock_guard lock(mutex_); + if (!server_map_.count(server_id)) { + LOG_EVERY_SECOND(ERROR) << "Server not registered. server_id: " << server_id; + return nullptr; + } + return server_map_[server_id]; +} + +Future GlobalCacheClient::GetEntry(int server_id, + const std::string &key, + uint64_t start, + uint64_t length, + bool is_read_cache) { + // while (inflight_payload_size_.load() >= GetGlobalConfig().max_inflight_payload_size) { + // LOG_EVERY_SECOND(INFO) << "Overcroweded " << inflight_payload_size_.load(); + // sched_yield(); + // } + inflight_payload_size_.fetch_add(length); + + auto channel = GetChannelByServerId(server_id); + if (!channel) { + GetOutput output; + output.status = RPC_FAILED; + return folly::makeFuture(output); + } + + gcache::GlobalCacheService_Stub stub(channel); + gcache::GetEntryRequest request; + request.set_key(key); + request.set_start(start); + request.set_length(length); + + struct OnRPCDone : public google::protobuf::Closure { + virtual void Run() { + GetOutput output; + if (cntl.Failed()) { + LOG(WARNING) << "RPC error: " << cntl.ErrorText() + << ", server id: " << server_id + << ", key: " << key + << ", start: " << start + << ", length: " << length; + output.status = RPC_FAILED; + } else { + output.status = response.status_code(); + output.buf = cntl.response_attachment(); + if (output.status == OK && output.buf.length() != length) { + LOG(WARNING) << "Received truncated attachment, expected " << length + << " bytes, actual " << output.buf.length() << " bytes" + << ", server id: " << server_id + << ", key: " << key + << ", start: " << start + << ", length: " << length; + output.status = RPC_FAILED; + } + } + promise.setValue(output); + parent->inflight_payload_size_.fetch_sub(length); + t.stop(); + LOG_EVERY_N(INFO, 1000) << t.u_elapsed(); + delete this; + } + + brpc::Controller cntl; + gcache::GetEntryResponse response; + Promise promise; + + int server_id; + std::string key; + uint64_t start; + uint64_t length; + GlobalCacheClient *parent; + butil::Timer t; + }; + + auto done = new OnRPCDone(); + done->t.start(); + done->parent = this; + done->server_id = server_id; + done->key = key; + done->start = start; + done->length = length; + + auto future = done->promise.getFuture(); + if (is_read_cache) + stub.GetEntryFromReadCache(&done->cntl, &request, &done->response, done); + else + stub.GetEntryFromWriteCache(&done->cntl, &request, &done->response, done); + return std::move(future); +} + +Future GlobalCacheClient::PutEntry(int server_id, + const std::string &key, + const ByteBuffer &buf, + uint64_t length, + bool is_read_cache) { + // while (inflight_payload_size_.load() >= GetGlobalConfig().max_inflight_payload_size) { + // LOG_EVERY_SECOND(INFO) << "Overcroweded " << inflight_payload_size_.load(); + // sched_yield(); + // } + inflight_payload_size_.fetch_add(length); + + auto channel = GetChannelByServerId(server_id); + if (!channel) { + PutOutput output; + output.status = RPC_FAILED; + return folly::makeFuture(output); + } + + gcache::GlobalCacheService_Stub stub(channel); + gcache::PutEntryRequest request; + request.set_key(key); + request.set_length(length); + + struct OnRPCDone : public google::protobuf::Closure { + virtual void Run() { + PutOutput output; + if (cntl.Failed()) { + LOG(WARNING) << "RPC error: " << cntl.ErrorText() + << ", server id: " << server_id + << ", key: " << key + << ", length: " << length; + output.status = RPC_FAILED; + } else { + output.status = response.status_code(); + output.internal_key = response.internal_key(); + } + promise.setValue(output); + parent->inflight_payload_size_.fetch_sub(length); + delete this; + } + + brpc::Controller cntl; + gcache::PutEntryResponse response; + Promise promise; + + int server_id; + std::string key; + uint64_t length; + GlobalCacheClient *parent; + }; + + auto done = new OnRPCDone(); + done->parent = this; + done->server_id = server_id; + done->key = key; + done->length = length; + + done->cntl.request_attachment().append(buf.data, length); + auto future = done->promise.getFuture(); + if (is_read_cache) + stub.PutEntryFromReadCache(&done->cntl, &request, &done->response, done); + else + stub.PutEntryFromWriteCache(&done->cntl, &request, &done->response, done); + return std::move(future); +} + +Future GlobalCacheClient::DeleteEntryFromReadCache(int server_id, + const std::string &key, + uint64_t chunk_size, + uint64_t max_chunk_id) { + auto channel = GetChannelByServerId(server_id); + if (!channel) { + LOG(ERROR) << "Cannot find channel for server " << server_id; + return folly::makeFuture(RPC_FAILED); + } + + gcache::GlobalCacheService_Stub stub(channel); + gcache::DeleteEntryRequest request; + request.set_key(key); + request.set_chunk_size(chunk_size); + request.set_max_chunk_id(max_chunk_id); + + struct OnRPCDone : public google::protobuf::Closure { + virtual void Run() { + int status; + if (cntl.Failed()) { + LOG(WARNING) << "RPC error: " << cntl.ErrorText() + << ", server id: " << server_id + << ", key: " << key; + status = RPC_FAILED; + } else { + status = response.status_code(); + } + promise.setValue(status); + delete this; + } + + brpc::Controller cntl; + gcache::DeleteEntryResponse response; + Promise promise; + + int server_id; + std::string key; + }; + + auto done = new OnRPCDone(); + done->server_id = server_id; + done->key = key; + + auto future = done->promise.getFuture(); + stub.DeleteEntryFromReadCache(&done->cntl, &request, &done->response, done); + return std::move(future); +} + +Future GlobalCacheClient::QueryTsFromWriteCache(int server_id) { + auto channel = GetChannelByServerId(server_id); + if (!channel) { + QueryTsOutput output; + output.status = RPC_FAILED; + return folly::makeFuture(output); + } + + gcache::GlobalCacheService_Stub stub(channel); + gcache::QueryTsRequest request; + + struct OnRPCDone : public google::protobuf::Closure { + virtual void Run() { + QueryTsOutput output; + if (cntl.Failed()) { + LOG(WARNING) << "RPC error: " << cntl.ErrorText() + << ", server id: " << server_id; + output.status = RPC_FAILED; + } else { + output.status = response.status_code(); + output.timestamp = response.timestamp(); + } + promise.setValue(output); + delete this; + } + + brpc::Controller cntl; + gcache::QueryTsResponse response; + Promise promise; + + int server_id; + }; + + auto done = new OnRPCDone(); + done->server_id = server_id; + + auto future = done->promise.getFuture(); + stub.QueryTsFromWriteCache(&done->cntl, &request, &done->response, done); + return std::move(future); +} + +Future GlobalCacheClient::DeleteEntryFromWriteCache(int server_id, + const std::string &key_prefix, + uint64_t max_ts, + std::vector &except_keys) { + auto channel = GetChannelByServerId(server_id); + if (!channel) { + LOG(ERROR) << "Cannot find channel for server " << server_id; + return folly::makeFuture(RPC_FAILED); + } + + gcache::GlobalCacheService_Stub stub(channel); + gcache::DeleteEntryRequestForWriteCache request; + request.set_key_prefix(key_prefix); + request.set_max_ts(max_ts); + for (auto &entry : except_keys) + request.add_except_keys(entry); + + struct OnRPCDone : public google::protobuf::Closure { + virtual void Run() { + int status; + if (cntl.Failed()) { + LOG(WARNING) << "RPC error: " << cntl.ErrorText() + << ", server id: " << server_id + << ", key: " << key; + status = RPC_FAILED; + } else { + status = response.status_code(); + } + promise.setValue(status); + delete this; + } + + brpc::Controller cntl; + gcache::DeleteEntryResponse response; + Promise promise; + + int server_id; + std::string key; + }; + + auto done = new OnRPCDone(); + done->server_id = server_id; + done->key = key_prefix; + + auto future = done->promise.getFuture(); + stub.DeleteEntryFromWriteCache(&done->cntl, &request, &done->response, done); + return std::move(future); +} \ No newline at end of file diff --git a/global_cache/GlobalCacheClient.h b/global_cache/GlobalCacheClient.h new file mode 100644 index 0000000..297bbee --- /dev/null +++ b/global_cache/GlobalCacheClient.h @@ -0,0 +1,62 @@ +#ifndef MADFS_GLOBAL_CACHE_CLIENT_H +#define MADFS_GLOBAL_CACHE_CLIENT_H + +#include +#include +#include +#include +#include + +#include "Common.h" +#include "common.h" + +using HybridCache::ByteBuffer; + +class GlobalCacheClient { +public: + GlobalCacheClient(const std::string &group = ""); + + ~GlobalCacheClient(); + + int RegisterServer(int server_id, const char *hostname); + + Future GetEntryFromReadCache(int server_id, const std::string &key, uint64_t start, uint64_t length) { + return GetEntry(server_id, key, start, length, true); + } + + Future PutEntryFromReadCache(int server_id, const std::string &key, const ByteBuffer &buf, uint64_t length) { + return PutEntry(server_id, key, buf, length, true); + } + + Future DeleteEntryFromReadCache(int server_id, const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id); + + Future GetEntryFromWriteCache(int server_id, const std::string &key, uint64_t start, uint64_t length){ + return GetEntry(server_id, key, start, length, false); + } + + Future PutEntryFromWriteCache(int server_id, const std::string &key, const ByteBuffer &buf, uint64_t length){ + return PutEntry(server_id, key, buf, length, false); + } + + Future QueryTsFromWriteCache(int server_id); + + Future DeleteEntryFromWriteCache(int server_id, + const std::string &key_prefix, + uint64_t max_ts, + std::vector &except_keys); + +private: + brpc::Channel *GetChannelByServerId(int server_id); + + Future GetEntry(int server_id, const std::string &key, uint64_t start, uint64_t length, bool is_read_cache); + + Future PutEntry(int server_id, const std::string &key, const ByteBuffer &buf, uint64_t length, bool is_read_cache); + +private: + std::mutex mutex_; + const std::string group_; + std::map server_map_; + std::atomic inflight_payload_size_; +}; + +#endif // MADFS_GLOBAL_CACHE_CLIENT_H \ No newline at end of file diff --git a/global_cache/GlobalCacheServer.cpp b/global_cache/GlobalCacheServer.cpp new file mode 100644 index 0000000..bb59b13 --- /dev/null +++ b/global_cache/GlobalCacheServer.cpp @@ -0,0 +1,107 @@ +#include "GlobalCacheServer.h" + +namespace gcache { + GlobalCacheServiceImpl::GlobalCacheServiceImpl(std::shared_ptr executor, + std::shared_ptr base_adaptor) + : executor_(executor) { + read_cache_ = std::make_shared(executor_, base_adaptor); + write_cache_ = std::make_shared(executor_); + } + + void GlobalCacheServiceImpl::GetEntryFromReadCache(google::protobuf::RpcController *cntl_base, + const GetEntryRequest *request, + GetEntryResponse *response, + google::protobuf::Closure *done) { + brpc::Controller *cntl = static_cast(cntl_base); + read_cache_->Get(request->key(), request->start(), request->length()) + .thenValue([this, cntl, request, done, response](GetOutput output) { + response->set_status_code(output.status); + butil::Timer t; + t.start(); + cntl->response_attachment().append(output.buf); + t.stop(); + // LOG_EVERY_N(INFO, 1000) << t.u_elapsed(); + done->Run(); + }); + } + + void GlobalCacheServiceImpl:: PutEntryFromReadCache(google::protobuf::RpcController *cntl_base, + const PutEntryRequest *request, + PutEntryResponse *response, + google::protobuf::Closure *done) { + brpc::Controller *cntl = static_cast(cntl_base); + auto output = read_cache_->Put(request->key(), request->length(), cntl->request_attachment()); + response->set_status_code(output); + done->Run(); + } + + void GlobalCacheServiceImpl::DeleteEntryFromReadCache(google::protobuf::RpcController *cntl_base, + const DeleteEntryRequest *request, + DeleteEntryResponse *response, + google::protobuf::Closure *done) { + brpc::Controller *cntl = static_cast(cntl_base); + if (request->has_chunk_size() && request->has_max_chunk_id()) { + response->set_status_code(read_cache_->Delete(request->key(), + request->chunk_size(), + request->max_chunk_id())); + } else { + response->set_status_code(read_cache_->Delete(request->key())); + } + done->Run(); + } + + void GlobalCacheServiceImpl::GetEntryFromWriteCache(google::protobuf::RpcController *cntl_base, + const GetEntryRequest *request, + GetEntryResponse *response, + google::protobuf::Closure *done) { + brpc::Controller *cntl = static_cast(cntl_base); + auto output = write_cache_->Get(request->key(), request->start(), request->length()); + response->set_status_code(output.status); + cntl->response_attachment().append(output.buf); + done->Run(); + } + + void GlobalCacheServiceImpl::PutEntryFromWriteCache(google::protobuf::RpcController *cntl_base, + const PutEntryRequest *request, + PutEntryResponse *response, + google::protobuf::Closure *done) { + brpc::Controller *cntl = static_cast(cntl_base); + auto output = write_cache_->Put(request->key(), request->length(), cntl->request_attachment()); + response->set_status_code(output.status); + response->set_internal_key(output.internal_key); + done->Run(); + } + + void GlobalCacheServiceImpl::DeleteEntryFromWriteCache(google::protobuf::RpcController *cntl_base, + const DeleteEntryRequestForWriteCache *request, + DeleteEntryResponse *response, + google::protobuf::Closure *done) { + brpc::Controller *cntl = static_cast(cntl_base); + std::unordered_set except_keys; + for (auto &entry : request->except_keys()) { + except_keys.insert(entry); + } + auto output = write_cache_->Delete(request->key_prefix(), request->max_ts(), except_keys); + response->set_status_code(output); + done->Run(); + } + + void GlobalCacheServiceImpl::QueryTsFromWriteCache(google::protobuf::RpcController *cntl_base, + const QueryTsRequest *request, + QueryTsResponse *response, + google::protobuf::Closure *done) { + brpc::Controller *cntl = static_cast(cntl_base); + response->set_timestamp(write_cache_->QueryTS()); + response->set_status_code(OK); + done->Run(); + } + + void GlobalCacheServiceImpl::Register(google::protobuf::RpcController *cntl_base, + const RegisterRequest *request, + RegisterResponse *response, + google::protobuf::Closure *done) { + brpc::Controller *cntl = static_cast(cntl_base); + response->set_status_code(OK); + done->Run(); + } +} \ No newline at end of file diff --git a/global_cache/GlobalCacheServer.h b/global_cache/GlobalCacheServer.h new file mode 100644 index 0000000..58a9c62 --- /dev/null +++ b/global_cache/GlobalCacheServer.h @@ -0,0 +1,74 @@ +#ifndef MADFS_GLOBAL_CACHE_SERVER_H +#define MADFS_GLOBAL_CACHE_SERVER_H + +#include +#include +#include +#include +#include +#include + +#include "butil/time.h" +#include "bvar/bvar.h" + +#include "gcache.pb.h" +#include "ReadCache.h" +#include "WriteCache.h" +#include "data_adaptor.h" + +namespace gcache { + class GlobalCacheServiceImpl : public GlobalCacheService { + public: + GlobalCacheServiceImpl(std::shared_ptr executor, + std::shared_ptr base_adaptor); + + virtual ~GlobalCacheServiceImpl() {} + + virtual void GetEntryFromReadCache(google::protobuf::RpcController *cntl_base, + const GetEntryRequest *request, + GetEntryResponse *response, + google::protobuf::Closure *done); + + virtual void PutEntryFromReadCache(google::protobuf::RpcController *cntl_base, + const PutEntryRequest *request, + PutEntryResponse *response, + google::protobuf::Closure *done); + + virtual void DeleteEntryFromReadCache(google::protobuf::RpcController *cntl_base, + const DeleteEntryRequest *request, + DeleteEntryResponse *response, + google::protobuf::Closure *done); + + virtual void GetEntryFromWriteCache(google::protobuf::RpcController *cntl_base, + const GetEntryRequest *request, + GetEntryResponse *response, + google::protobuf::Closure *done); + + virtual void PutEntryFromWriteCache(google::protobuf::RpcController *cntl_base, + const PutEntryRequest *request, + PutEntryResponse *response, + google::protobuf::Closure *done); + + virtual void DeleteEntryFromWriteCache(google::protobuf::RpcController *cntl_base, + const DeleteEntryRequestForWriteCache *request, + DeleteEntryResponse *response, + google::protobuf::Closure *done); + + virtual void QueryTsFromWriteCache(google::protobuf::RpcController *cntl_base, + const QueryTsRequest *request, + QueryTsResponse *response, + google::protobuf::Closure *done); + + virtual void Register(google::protobuf::RpcController *cntl_base, + const RegisterRequest *request, + RegisterResponse *response, + google::protobuf::Closure *done); + + private: + std::shared_ptr executor_; + std::shared_ptr read_cache_; + std::shared_ptr write_cache_; + }; +} + +#endif // MADFS_GLOBAL_CACHE_SERVER_H \ No newline at end of file diff --git a/global_cache/GlobalCacheServerMain.cpp b/global_cache/GlobalCacheServerMain.cpp new file mode 100644 index 0000000..5ca94fa --- /dev/null +++ b/global_cache/GlobalCacheServerMain.cpp @@ -0,0 +1,41 @@ +#include "GlobalCacheServer.h" +#include "S3DataAdaptor.h" + +#include + +DEFINE_int32(port, 8000, "TCP Port of global cache server"); +DEFINE_bool(fetch_s3_if_miss, false, "Allow fetch data from S3 if cache miss"); + +int main(int argc, char *argv[]) { + LOG(INFO) << "MADFS Global Cache Server"; + gflags::ParseCommandLineFlags(&argc, &argv, true); + brpc::Server server; + + folly::SingletonVault::singleton()->registrationComplete(); + + brpc::ServerOptions options; + options.num_threads = GetGlobalConfig().rpc_threads; + options.use_rdma = GetGlobalConfig().use_rdma; + + std::shared_ptr base_adaptor = nullptr; + if (FLAGS_fetch_s3_if_miss) { + base_adaptor = std::make_shared(); + } + + auto executor = std::make_shared(GetGlobalConfig().folly_threads); + auto gcache_service = std::make_shared(executor, base_adaptor); + + if (server.AddService(gcache_service.get(), brpc::SERVER_DOESNT_OWN_SERVICE)) { + PLOG(ERROR) << "Failed to register global cache service"; + return -1; + } + + butil::EndPoint point = butil::EndPoint(butil::IP_ANY, FLAGS_port); + if (server.Start(point, &options) != 0) { + PLOG(ERROR) << "Failed to start global cache server"; + return -1; + } + + server.RunUntilAskedToQuit(); + return 0; +} diff --git a/global_cache/GlobalDataAdaptor.cpp b/global_cache/GlobalDataAdaptor.cpp new file mode 100644 index 0000000..6d3c973 --- /dev/null +++ b/global_cache/GlobalDataAdaptor.cpp @@ -0,0 +1,674 @@ +#include "GlobalDataAdaptor.h" +#include "ReadCacheClient.h" +#include "ReplicationWriteCacheClient.h" +#include "ErasureCodingWriteCacheClient.h" + +using HybridCache::ByteBuffer; + +#define CONFIG_GC_ON_EXCEEDING_DISKSPACE + +DEFINE_uint32(bg_execution_period, 10, "Background execution period in seconds"); + +GlobalDataAdaptor::GlobalDataAdaptor(std::shared_ptr base_adaptor, + const std::vector &server_list, + std::shared_ptr etcd_client, + std::shared_ptr executor) + : base_adaptor_(base_adaptor), + executor_(executor), + server_list_(server_list), + etcd_client_(etcd_client), + meta_cache_(GetGlobalConfig().meta_cache_max_size, GetGlobalConfig().meta_cache_clear_size) { + if (!executor_) { + executor_ = std::make_shared(GetGlobalConfig().folly_threads); + } + + read_cache_ = std::make_shared(this); + write_caches_[WC_TYPE_REPLICATION] = std::make_shared(this); + write_caches_[WC_TYPE_REEDSOLOMON] = std::make_shared(this); + + for (int conn_id = 0; conn_id < GetGlobalConfig().rpc_connections; conn_id++) { + auto client = std::make_shared(std::to_string(conn_id)); + int server_id = 0; + for (auto &entry: server_list_) { + if (client->RegisterServer(server_id, entry.c_str())) { + // TODO 周期性尝试重连 + LOG(WARNING) << "Failed to connect with server id: " << server_id + << ", address: " << entry; + bg_mutex_.lock(); + bg_tasks_.push_back([client,server_id, entry]() -> int { + return client->RegisterServer(server_id, entry.c_str()); + }); + bg_mutex_.unlock(); + } + server_id++; + } + rpc_client_.push_back(client); + } + srand48(time(nullptr)); + bg_running_ = true; + bg_thread_ = std::thread(std::bind(&GlobalDataAdaptor::BackgroundWorker, this)); +} + +GlobalDataAdaptor::~GlobalDataAdaptor() { + bg_running_ = false; + bg_cv_.notify_all(); + bg_thread_.join(); +} + +void GlobalDataAdaptor::BackgroundWorker() { + while (bg_running_) { + std::unique_lock lock(bg_mutex_); + std::vector> bg_tasks_next; + for (auto &entry : bg_tasks_) { + if (entry()) { + bg_tasks_next.push_back(entry); + } + } + bg_tasks_ = bg_tasks_next; + bg_cv_.wait_for(lock, std::chrono::seconds(FLAGS_bg_execution_period)); + } +} + +struct DownloadArgs { + DownloadArgs(const std::string &key, size_t start, size_t size, ByteBuffer &buffer) + : key(key), start(start), size(size), buffer(buffer) {} + + std::string key; + size_t start; + size_t size; + ByteBuffer &buffer; +}; + +folly::Future GlobalDataAdaptor::DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer) { + return DownLoadFromGlobalCache(key, start, size, buffer).then( + [this, key, start, size, &buffer](folly::Try &&output) -> folly::Future { + if (output.value_or(FOLLY_ERROR) == RPC_FAILED) { + return base_adaptor_->DownLoad(key, start, size, buffer); + } + return output.value_or(FOLLY_ERROR); + }); +} + +folly::Future GlobalDataAdaptor::DownLoadFromGlobalCache(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer) { + auto &policy = GetCachePolicy(key); + auto meta_cache_entry = GetMetaCacheEntry(key); + if (meta_cache_entry->present) { + if (!meta_cache_entry->existed) { + LOG(ERROR) << "Request for potential deleted file: " << key; + return folly::makeFuture(NOT_FOUND); + } + if (start + size > meta_cache_entry->size) { + LOG(ERROR) << "Request out of file range, key: " << key + << ", start: " << start + << ", size: " << size + << ", file length: " << meta_cache_entry->size; + return folly::makeFuture(END_OF_FILE); + } + } + + if (policy.write_cache_type != NOCACHE) { + auto args = std::make_shared(key, start, size, buffer); + if (meta_cache_entry->present) { + if (meta_cache_entry->write_cached) { + auto &root = meta_cache_entry->root; + if (root["type"] == "replication") { + return write_caches_[WC_TYPE_REPLICATION]->Get(args->key, args->start, args->size, args->buffer, root); + } else if (root["type"] == "reed-solomon") { + return write_caches_[WC_TYPE_REEDSOLOMON]->Get(args->key, args->start, args->size, args->buffer, root); + } + LOG(ERROR) << "Failed to download data, reason: unsuppported type, key: " << args->key + << ", start: " << args->start + << ", size: " << args->size + << ", type: " << root["type"]; + return folly::makeFuture(UNSUPPORTED_TYPE); + } else { + return read_cache_->Get(key, start, size, buffer); + } + } else { + return etcd_client_->GetJson(key).then( + [this, args, meta_cache_entry](folly::Try &&output) -> folly::Future { + if (!output.hasValue()) { // 当 GetJson 函数抛出异常时执行这部分代码 + LOG(ERROR) << "Failed to download data, reason: internal error, key: " << args->key + << ", start: " << args->start + << ", size: " << args->size; + return folly::makeFuture(FOLLY_ERROR); + } + + auto &status = output.value().status; + if (status == NOT_FOUND) { + if (GetGlobalConfig().use_meta_cache) { + return base_adaptor_->Head(args->key, meta_cache_entry->size, meta_cache_entry->headers).then( + [this, meta_cache_entry, args](folly::Try &&output) -> folly::Future { + int res = output.value_or(FOLLY_ERROR); + if (res == OK || res == NOT_FOUND) { + meta_cache_entry->present = true; + meta_cache_entry->existed = (res == OK); + meta_cache_entry->write_cached = false; + } + if (res == OK) { + return read_cache_->Get(args->key, args->start, args->size, args->buffer); + } + return res; + }); + } else { + return read_cache_->Get(args->key, args->start, args->size, args->buffer); + } + } else if (status != OK) { + return folly::makeFuture(status); + } + + auto &root = output.value().root; + if (GetGlobalConfig().use_meta_cache) { + meta_cache_entry->present = true; + meta_cache_entry->existed = true; + meta_cache_entry->write_cached = true; + meta_cache_entry->size = root["size"].asInt64(); + for (auto iter = root["headers"].begin(); iter != root["headers"].end(); iter++) { + meta_cache_entry->headers[iter.key().asString()] = (*iter).asString(); + } + meta_cache_entry->root = root; + } + + if (root["type"] == "replication") { + return write_caches_[WC_TYPE_REPLICATION]->Get(args->key, args->start, args->size, args->buffer, root); + } else if (root["type"] == "reed-solomon") { + return write_caches_[WC_TYPE_REEDSOLOMON]->Get(args->key, args->start, args->size, args->buffer, root); + } + + LOG(ERROR) << "Failed to download data, reason: unsuppported type, key: " << args->key + << ", start: " << args->start + << ", size: " << args->size + << ", type: " << root["type"]; + + return folly::makeFuture(UNSUPPORTED_TYPE); + }); + } + } else { + return read_cache_->Get(key, start, size, buffer); + } +} + +folly::Future GlobalDataAdaptor::UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers) { +#ifdef CONFIG_GC_ON_EXCEEDING_DISKSPACE + return DoUpLoad(key, size, buffer, headers).thenValue([this, key, size, &buffer, &headers](int &&res) -> int { + if (res != NO_ENOUGH_DISKSPACE) { + return res; + } + LOG(INFO) << "Disk limit exceeded - perform GC immediately"; + res = PerformGarbageCollection(); + if (res) { + LOG(WARNING) << "GC failed"; + return res; + } + LOG(INFO) << "Disk limit exceeded - GC completed, now retry"; + return DoUpLoad(key, size, buffer, headers).get(); + }); +#else + return DoUpLoad(key, size, buffer, headers); +#endif +} + +folly::Future GlobalDataAdaptor::DoUpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers) { + butil::Timer *t = new butil::Timer(); + t->start(); + auto &policy = GetCachePolicy(key); + auto meta_cache_entry = GetMetaCacheEntry(key); + meta_cache_entry->present = false; + meta_cache_entry->existed = true; + meta_cache_entry->size = size; + meta_cache_entry->headers = headers; + auto pre_op = read_cache_->Invalidate(key, size); + if (policy.write_cache_type == REPLICATION || policy.write_cache_type == REED_SOLOMON) { + auto write_cache = policy.write_cache_type == REPLICATION + ? write_caches_[WC_TYPE_REPLICATION] + : write_caches_[WC_TYPE_REEDSOLOMON]; + return std::move(pre_op) + .then(std::bind(&WriteCacheClient::Put, write_cache.get(), key, size, buffer, headers, 0)) + .then([this, key, meta_cache_entry, t] (folly::Try output) -> folly::Future { + int status = output.hasValue() ? output.value().status : FOLLY_ERROR; + if (status == OK) { + status = etcd_client_->PutJson(key, output.value().root).get(); + if (status == OK && GetGlobalConfig().use_meta_cache) { + meta_cache_entry->root = output.value().root; + meta_cache_entry->write_cached = true; + meta_cache_entry->present = true; + } + t->stop(); + LOG(INFO) << "JSON: " << t->u_elapsed(); + delete t; + } + return folly::makeFuture(status); + }); + } else if (policy.write_cache_type == NOCACHE) { + return std::move(pre_op) + .then(std::bind(&DataAdaptor::UpLoad, base_adaptor_.get(), key, size, buffer, headers)) + .thenValue([meta_cache_entry](int &&res) -> int { + if (res == OK && GetGlobalConfig().use_meta_cache) { + meta_cache_entry->write_cached = false; + meta_cache_entry->present = true; + } + return res; + }); + } else { + LOG(ERROR) << "Failed to upload data, reason: unsuppported type, key: " << key + << ", size: " << size + << ", type: " << policy.write_cache_type; + return folly::makeFuture(UNSUPPORTED_TYPE); + } +} + +folly::Future GlobalDataAdaptor::Delete(const std::string &key) { + auto &policy = GetCachePolicy(key); + if (policy.write_cache_type == NOCACHE) { + InvalidateMetaCacheEntry(key); + return base_adaptor_->Delete(key); + } else { + auto meta_cache_entry = GetMetaCacheEntry(key); + auto size = meta_cache_entry->size; + bool present = meta_cache_entry->present; + bool has_write_cache = false; + + if (!present) { + auto result = etcd_client_->GetJson(key).get(); + if (result.status == OK) { + size = result.root["size"].asInt64(); + has_write_cache = true; + } else if (result.status == NOT_FOUND) { // 只在 S3 里存储 + std::map headers; + int ret = base_adaptor_->Head(key, size, headers).get(); + if (ret) return ret; + } else { + return folly::makeFuture(result.status); + } + } + + InvalidateMetaCacheEntry(key); + + if (has_write_cache) { + return base_adaptor_->Delete(key) + .then(std::bind(&ReadCacheClient::Invalidate, read_cache_.get(), key, size)) + .then(std::bind(&EtcdClient::DeleteJson, etcd_client_.get(), key)); + } else { + return base_adaptor_->Delete(key) + .then(std::bind(&ReadCacheClient::Invalidate, read_cache_.get(), key, size)); + } + } +} + +struct DeepFlushArgs { + DeepFlushArgs(const std::string &key) : key(key) {} + ~DeepFlushArgs() { if (buffer.data) delete []buffer.data; } + + std::string key; + std::map headers; + ByteBuffer buffer; +}; + +folly::Future GlobalDataAdaptor::DeepFlush(const std::string &key) { + butil::Timer *t = new butil::Timer(); + t->start(); + auto &policy = GetCachePolicy(key); + if (policy.write_cache_type == REPLICATION || policy.write_cache_type == REED_SOLOMON) { + auto args = std::make_shared(key); + return etcd_client_->GetJson(key).then([this, t, args](folly::Try &&output) -> folly::Future { + if (!output.hasValue()) { + return folly::makeFuture(FOLLY_ERROR); + } + if (output.value().status != OK) { + return folly::makeFuture(output.value().status); + } + auto &root = output.value().root; + args->buffer.len = root["size"].asInt64(); + args->buffer.data = new char[args->buffer.len]; + for (auto iter = root["headers"].begin(); iter != root["headers"].end(); iter++) { + args->headers[iter.key().asString()] = (*iter).asString(); + } + t->stop(); + LOG(INFO) << "DeepFlush phase 1: " << t->u_elapsed(); + + return DownLoad(args->key, 0, args->buffer.len, args->buffer); + }).then([this, t, args](folly::Try &&output) -> folly::Future { + int res = output.value_or(FOLLY_ERROR); + t->stop(); + LOG(INFO) << "DeepFlush phase 2: " << t->u_elapsed(); + if (res != OK) { + return folly::makeFuture(res); + } else { + return base_adaptor_->UpLoad(args->key, args->buffer.len, args->buffer, args->headers); + } + }).then([this, t, key, args](folly::Try &&output) -> folly::Future { + t->stop(); + LOG(INFO) << "DeepFlush phase 3: " << t->u_elapsed(); + int res = output.value_or(FOLLY_ERROR); + if (res != OK) { + return folly::makeFuture(res); + } else { + InvalidateMetaCacheEntry(key); + return etcd_client_->DeleteJson(key); + } + }); + } else { + t->stop(); + LOG(INFO) << "DeepFlush phase 4: " << t->u_elapsed(); + return folly::makeFuture(OK); + } +} + +struct HeadArgs { + HeadArgs(const std::string &key, size_t &size, std::map &headers) + : key(key), size(size), headers(headers) {} + + std::string key; + size_t &size; + std::map &headers; +}; + +folly::Future GlobalDataAdaptor::Head(const std::string &key, + size_t &size, + std::map &headers) { + auto &policy = GetCachePolicy(key); + auto meta_cache_entry = GetMetaCacheEntry(key); + if (meta_cache_entry->present) { + if (!meta_cache_entry->existed) { + LOG(ERROR) << "Request for potential deleted file: " << key; + return folly::makeFuture(NOT_FOUND); + } + size = meta_cache_entry->size; + headers = meta_cache_entry->headers; + return folly::makeFuture(OK); + } + + if (policy.write_cache_type == REPLICATION || policy.write_cache_type == REED_SOLOMON) { + auto args = std::make_shared(key, size, headers); + return etcd_client_->GetJson(key).then([this, args, meta_cache_entry](folly::Try &&output) -> folly::Future { + if (!output.hasValue()) { + return folly::makeFuture(FOLLY_ERROR); + } + if (output.value().status != OK) { + return folly::makeFuture(output.value().status); + } + auto &root = output.value().root; + args->size = root["size"].asInt64(); + for (auto iter = root["headers"].begin(); iter != root["headers"].end(); iter++) { + args->headers[iter.key().asString()] = (*iter).asString(); + } + + if (GetGlobalConfig().use_meta_cache) { + meta_cache_entry->present = true; + meta_cache_entry->existed = true; + meta_cache_entry->write_cached = true; + meta_cache_entry->size = args->size; + meta_cache_entry->headers = args->headers; + meta_cache_entry->root = output.value().root; + } + + return folly::makeFuture(OK); + }).then([this, args, meta_cache_entry](folly::Try &&output) -> folly::Future { + int res = output.value_or(FOLLY_ERROR); + if (res != NOT_FOUND) { + return folly::makeFuture(res); + } else { + return base_adaptor_->Head(args->key, args->size, args->headers).thenValue([args, meta_cache_entry](int &&res) -> int { + if (GetGlobalConfig().use_meta_cache && (res == OK || res == NOT_FOUND)) { + meta_cache_entry->present = true; + meta_cache_entry->existed = (res == OK); + meta_cache_entry->write_cached = false; + meta_cache_entry->size = args->size; + meta_cache_entry->headers = args->headers; + } + return res; + }); + } + }); + } else { + return base_adaptor_->Head(key, size, headers).thenValue([meta_cache_entry, &size, &headers](int &&res) -> int { + if (GetGlobalConfig().use_meta_cache && (res == OK || res == NOT_FOUND)) { + meta_cache_entry->present = true; + meta_cache_entry->existed = (res == OK); + meta_cache_entry->write_cached = false; + meta_cache_entry->size = size; + meta_cache_entry->headers = headers; + } + return res; + }); + } +} + +void GlobalDataAdaptor::InvalidateMetaCache() { + std::lock_guard lock(meta_cache_mutex_); + meta_cache_.clear(); +} + +void GlobalDataAdaptor::InvalidateMetaCacheEntry(const std::string &key) { + std::lock_guard lock(meta_cache_mutex_); + meta_cache_.erase(key); +} + +std::shared_ptr GlobalDataAdaptor::GetMetaCacheEntry(const std::string &key) { + std::lock_guard lock(meta_cache_mutex_); + auto iter = meta_cache_.find(key); + if (iter == meta_cache_.end()) { + auto entry = std::make_shared(key); + meta_cache_.insert(key, entry); + return entry; + } else { + return iter->second; + } +} + + +void GlobalDataAdaptor::SetCachePolicy(const std::string &key, CachePolicy &policy) { + // ... +} + +const CachePolicy &GlobalDataAdaptor::GetCachePolicy(const std::string &key) const { + return GetGlobalConfig().default_policy; +} + +std::shared_ptr GlobalDataAdaptor::GetRpcClient() const { + return rpc_client_[lrand48() % rpc_client_.size()]; +} + +int GlobalDataAdaptor::PerformGarbageCollection(const std::string &prefix) { + LOG(INFO) << "==================GC START==================="; + butil::Timer t; + t.start(); + + std::vector write_cache_ts; + std::set skipped_server_id_list; + for (int server_id = 0; server_id < server_list_.size(); ++server_id) { + auto res = GetRpcClient()->QueryTsFromWriteCache(server_id).get(); + if (res.status != OK) { + std::cerr << RED << "Skip recycling write cache data in server " << server_id << WHITE << std::endl; + skipped_server_id_list.insert(server_id); + } + write_cache_ts.push_back(res.timestamp); + LOG(INFO) << "TS for server " << server_id << ": " << res.timestamp; + } + + t.stop(); + LOG(INFO) << "Flush stage 1: " << t.u_elapsed(); + + if (server_list_.size() == skipped_server_id_list.size()) { + std::cerr << RED << "All servers are not available." << WHITE << std::endl; + return RPC_FAILED; + } + + std::vector key_list; + int rc = etcd_client_->ListJson(prefix, key_list).get(); + if (rc) { + std::cerr << RED << "Failed to list metadata in write cache. " + << "Check the availability of etcd server." << WHITE << std::endl; + return rc; + } + + for (auto &key : key_list) { + LOG(INFO) << "Found entry: " << key; + } + + t.stop(); + LOG(INFO) << "Flush stage 2: " << t.u_elapsed(); + + std::vector> future_list; + for (auto &key : key_list) { + future_list.emplace_back(DeepFlush(key)); + } + + auto output = folly::collectAll(future_list).get(); + for (auto &entry: output) + if (entry.value_or(FOLLY_ERROR) != OK) { + LOG(ERROR) << "Cannot flush data to S3 storage"; + } + + t.stop(); + LOG(INFO) << "Flush stage 3: " << t.u_elapsed(); + + // Recheck the JSON metadata from etcd server + rc = etcd_client_->ListJson(prefix, key_list).get(); + if (rc != 0 && rc != NOT_FOUND) { + return rc; + } + + t.stop(); + LOG(INFO) << "Flush stage 4: " << t.u_elapsed(); + + std::unordered_map> preserve_chunk_keys_map; + for (auto &key : key_list) { + auto resp = etcd_client_->GetJson(key).get(); + if (resp.status) { + continue; + } + + std::vector replicas; + for (auto &entry : resp.root["replica"]) { + replicas.push_back(entry.asInt()); + } + + std::vector internal_keys; + for (auto &entry : resp.root["path"]) { + internal_keys.push_back(entry.asString()); + } + + assert(!replicas.empty() && !internal_keys.empty()); + for (int i = 0; i < internal_keys.size(); ++i) { + preserve_chunk_keys_map[replicas[i % replicas.size()]].push_back(internal_keys[i]); + } + } + + for (int server_id = 0; server_id < server_list_.size(); ++server_id) { + if (skipped_server_id_list.count(server_id)) { + continue; + } + + std::vector except_keys; + if (preserve_chunk_keys_map.count(server_id)) { + except_keys = preserve_chunk_keys_map[server_id]; + } + + int rc = GetRpcClient()->DeleteEntryFromWriteCache(server_id, + prefix, + write_cache_ts[server_id], + except_keys).get(); + if (rc) { + LOG(WARNING) << "Cannot delete unused entries from write cache. Server id: " << server_id; + } + } + + t.stop(); + LOG(INFO) << "Flush stage 5: " << t.u_elapsed(); + + LOG(INFO) << "==================GC END==================="; + return 0; +} + +folly::Future GlobalDataAdaptor::UpLoadPart(const std::string &key, + size_t off, + size_t size, + const ByteBuffer &buffer, + const std::map &headers, + Json::Value& root) { +#ifdef CONFIG_GC_ON_EXCEEDING_DISKSPACE + return DoUpLoadPart(key, off, size, buffer, headers, root) + .thenValue([this, key, off, size, &buffer, &headers, &root](int &&res) -> int { + if (res != NO_ENOUGH_DISKSPACE) { + return res; + } + LOG(INFO) << "Disk limit exceeded - perform GC immediately"; + res = PerformGarbageCollection(); + if (res) { + LOG(WARNING) << "GC failed"; + return res; + } + LOG(INFO) << "Disk limit exceeded - GC completed, now retry"; + return DoUpLoadPart(key, off, size, buffer, headers, root).get(); + }); +#else + return DoUpLoadPart(key, off, size, buffer, headers, root); +#endif +} + +folly::Future GlobalDataAdaptor::DoUpLoadPart(const std::string &key, + size_t off, + size_t size, + const ByteBuffer &buffer, + const std::map &headers, + Json::Value& root) { + butil::Timer *t = new butil::Timer(); + t->start(); + auto &policy = GetCachePolicy(key); + auto pre_op = read_cache_->Invalidate(key, off + size); + if (policy.write_cache_type == REPLICATION || policy.write_cache_type == REED_SOLOMON) { + auto write_cache = policy.write_cache_type == REPLICATION + ? write_caches_[WC_TYPE_REPLICATION] + : write_caches_[WC_TYPE_REEDSOLOMON]; + return std::move(pre_op) + .then(std::bind(&WriteCacheClient::Put, write_cache.get(), key, size, buffer, headers, off)) + .then([this, t, &root] (folly::Try output) -> folly::Future { + int status = output.hasValue() ? output.value().status : FOLLY_ERROR; + if (status == OK) { + root = std::move(output.value().root); + t->stop(); + delete t; + } + return folly::makeFuture(status); + }); + } else { + LOG(ERROR) << "Failed to upload data, reason: unsuppported type, key: " << key + << ", size: " << size + << ", type: " << policy.write_cache_type; + return folly::makeFuture(UNSUPPORTED_TYPE); + } +} + +folly::Future GlobalDataAdaptor::Completed(const std::string &key, + const std::vector &roots, + size_t size) { + if (!roots.empty()) { + auto meta_cache_entry = GetMetaCacheEntry(key); + meta_cache_entry->present = false; + + Json::Value json_path(Json::arrayValue); + for (int i=0; iPutJson(key, new_root); + } + return folly::makeFuture(OK); +} diff --git a/global_cache/GlobalDataAdaptor.h b/global_cache/GlobalDataAdaptor.h new file mode 100644 index 0000000..183b7cb --- /dev/null +++ b/global_cache/GlobalDataAdaptor.h @@ -0,0 +1,143 @@ +#ifndef MADFS_GLOBAL_DATA_ADAPTOR_H +#define MADFS_GLOBAL_DATA_ADAPTOR_H + +#include +#include +#include + +#include "data_adaptor.h" +#include "EtcdClient.h" +#include "ReadCacheClient.h" +#include "WriteCacheClient.h" +#include "GlobalCacheClient.h" + +#define NUM_WC_TYPES 2 +#define WC_TYPE_REPLICATION 0 +#define WC_TYPE_REEDSOLOMON 1 + +using HybridCache::ByteBuffer; +using HybridCache::DataAdaptor; + +class GlobalDataAdaptor : public DataAdaptor { + friend class ReadCacheClient; + + friend class ReplicationWriteCacheClient; + friend class ErasureCodingWriteCacheClient; + +public: + GlobalDataAdaptor(std::shared_ptr base_adaptor, + const std::vector &server_list, + std::shared_ptr etcd_client = nullptr, + std::shared_ptr executor = nullptr); + + ~GlobalDataAdaptor(); + + // 从数据服务器加载数据 + virtual folly::Future DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer); + + folly::Future DownLoadFromGlobalCache(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer); + + // 上传数据到数据服务器 + virtual folly::Future UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers); + + virtual folly::Future DoUpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers); + + virtual folly::Future UpLoadPart(const std::string &key, + size_t off, + size_t size, + const ByteBuffer &buffer, + const std::map &headers, + Json::Value& root); + + virtual folly::Future DoUpLoadPart(const std::string &key, + size_t off, + size_t size, + const ByteBuffer &buffer, + const std::map &headers, + Json::Value& root); + + virtual folly::Future Completed(const std::string &key, + const std::vector &roots, + size_t size); + + // 删除数据服务器的数据 + virtual folly::Future Delete(const std::string &key); + + // 数据源flush到S3(全局缓存用) + virtual folly::Future DeepFlush(const std::string &key); + + // 获取数据的元数据 + virtual folly::Future Head(const std::string &key, + size_t &size, + std::map &headers); + + int PerformGarbageCollection(const std::string &prefix = ""); + + void SetCachePolicy(const std::string &key, CachePolicy &policy); + +public: + struct MetaCacheEntry { + MetaCacheEntry(const std::string &key) : key(key), present(false) {} + + const std::string key; + bool present; // 只有设为 true,这个缓存才有效 + bool existed; // key 目前是存在的 + bool write_cached; // key 的数据位于全局写缓存 + size_t size; + std::map headers; + Json::Value root; + }; + + void InvalidateMetaCache(); + + void InvalidateMetaCacheEntry(const std::string &key); + + std::shared_ptr GetMetaCacheEntry(const std::string &key); + + const CachePolicy &GetCachePolicy(const std::string &key) const; + + std::shared_ptr GetRpcClient() const; + + const std::string GetServerHostname(int server_id) const { + if (server_id >= 0 && server_id < server_list_.size()) + return server_list_[server_id]; + return ""; + }; + + void BackgroundWorker(); + +private: + std::shared_ptr executor_; + + std::shared_ptr read_cache_; + std::shared_ptr write_caches_[NUM_WC_TYPES]; + + std::shared_ptr base_adaptor_; + + std::vector> rpc_client_; + std::shared_ptr etcd_client_; + std::vector server_list_; + + std::mutex meta_cache_mutex_; + folly::EvictingCacheMap> meta_cache_; + + std::atomic bg_running_; + std::thread bg_thread_; + std::mutex bg_mutex_; + std::condition_variable bg_cv_; + std::vector> bg_tasks_; +}; + +#endif // MADFS_GLOBAL_DATA_ADAPTOR_H \ No newline at end of file diff --git a/global_cache/Placement.h b/global_cache/Placement.h new file mode 100644 index 0000000..b8940a5 --- /dev/null +++ b/global_cache/Placement.h @@ -0,0 +1,15 @@ +#ifndef MADFS_PLACEMENT_H +#define MADFS_PLACEMENT_H + +#include +#include "Common.h" + +inline static std::vector Placement(const std::string &key, int num_available, int num_choose) { + uint64_t seed = std::hash < std::string > {}(key); + std::vector output; + for (int i = 0; i < std::min(num_available, num_choose); ++i) + output.push_back((seed + i) % num_available); + return output; +} + +#endif // MADFS_PLACEMENT_H \ No newline at end of file diff --git a/global_cache/ReadCache.cpp b/global_cache/ReadCache.cpp new file mode 100644 index 0000000..4fc67ac --- /dev/null +++ b/global_cache/ReadCache.cpp @@ -0,0 +1,215 @@ +#include +#include +#include +#include + +#define BRPC_WITH_RDMA 1 +#include + +#include "ReadCache.h" +#include "FileSystemDataAdaptor.h" + +bvar::LatencyRecorder g_latency_readcache4cachelib_get("readcache4cachelib_get"); + +class ReadCache4Cachelib : public ReadCacheImpl { +public: + explicit ReadCache4Cachelib(std::shared_ptr executor, + std::shared_ptr base_adaptor = nullptr); + + ~ReadCache4Cachelib() {} + + virtual Future Get(const std::string &key, uint64_t start, uint64_t length); + + virtual int Put(const std::string &key, uint64_t length, const butil::IOBuf &buf); + + virtual int Delete(const std::string &key); + + virtual int Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id); + +private: + std::shared_ptr executor_; + std::shared_ptr base_adaptor_; + std::shared_ptr impl_; +}; + +ReadCache4Cachelib::ReadCache4Cachelib(std::shared_ptr executor, + std::shared_ptr base_adaptor) + : executor_(executor), base_adaptor_(base_adaptor) { + HybridCache::EnableLogging = false; + impl_ = std::make_shared(GetGlobalConfig().read_cache, + base_adaptor_, + executor); +} + +Future ReadCache4Cachelib::Get(const std::string &key, uint64_t start, uint64_t length) { + butil::Timer *t = new butil::Timer(); + t->start(); +#ifndef BRPC_WITH_RDMA + auto wrap = HybridCache::ByteBuffer(new char[length], length); +#else + auto wrap = HybridCache::ByteBuffer((char *) brpc::rdma::AllocBlock(length), length); +#endif + return impl_->Get(key, start, length, wrap).thenValue([wrap, key, start, length, t](int res) -> GetOutput { + t->stop(); + g_latency_readcache4cachelib_get << t->u_elapsed(); + delete t; + GetOutput output; + output.status = res; +#ifndef BRPC_WITH_RDMA + if (res == OK) { + output.buf.append(wrap.data, wrap.len); + } + delete []wrap.data; +#else + if (res == OK) { + output.buf.append_user_data(wrap.data, wrap.len, brpc::rdma::DeallocBlock); + } else { + brpc::rdma::DeallocBlock(wrap.data); + } +#endif + LOG_IF(INFO, FLAGS_verbose) << "Get key: " << key + << ", start: " << start + << ", length: " << length + << ", status: " << res; + return output; + }); +} + +int ReadCache4Cachelib::Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) { + auto data_len = buf.length(); + auto aux_buffer = malloc(data_len); + auto data = buf.fetch(aux_buffer, data_len); + auto wrap = HybridCache::ByteBuffer((char *) data, data_len); + int res = impl_->Put(key, 0, length, wrap); + free(aux_buffer); + LOG_IF(INFO, FLAGS_verbose) << "Put key: " << key + << ", length: " << length + << ", status: " << res; + return res; +} + +int ReadCache4Cachelib::Delete(const std::string &key) { + LOG_IF(INFO, FLAGS_verbose) << "Delete key: " << key; + return impl_->Delete(key); +} + +int ReadCache4Cachelib::Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id) { + LOG_IF(INFO, FLAGS_verbose) << "Delete key: " << key; + for (uint64_t chunk_id = 0; chunk_id < max_chunk_id; chunk_id++) { + auto internal_key = key + "-" + std::to_string(chunk_id) + "-" + std::to_string(chunk_size); + int ret = impl_->Delete(internal_key); + if (ret) { + return ret; + } + } + return OK; +} + +bvar::LatencyRecorder g_latency_readcache4disk_get("readcache4disk_get"); + +// ---------------------------------------------------------------------------- +class ReadCache4Disk : public ReadCacheImpl { +public: + explicit ReadCache4Disk(std::shared_ptr executor, + std::shared_ptr base_adaptor = nullptr); + + ~ReadCache4Disk() {} + + virtual Future Get(const std::string &key, uint64_t start, uint64_t length); + + virtual int Put(const std::string &key, uint64_t length, const butil::IOBuf &buf); + + virtual int Delete(const std::string &key); + + virtual int Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id); + +private: + std::shared_ptr executor_; + std::shared_ptr base_adaptor_; + std::shared_ptr cache_fs_adaptor_; +}; + +ReadCache4Disk::ReadCache4Disk(std::shared_ptr executor, + std::shared_ptr base_adaptor) + : executor_(executor), base_adaptor_(base_adaptor) { + cache_fs_adaptor_ = std::make_shared(GetGlobalConfig().read_cache_dir, base_adaptor_, true, executor_); +} + +Future ReadCache4Disk::Get(const std::string &key, uint64_t start, uint64_t length) { + butil::Timer *t = new butil::Timer(); + t->start(); +#ifndef BRPC_WITH_RDMA + auto wrap = HybridCache::ByteBuffer(new char[length], length); +#else + auto wrap = HybridCache::ByteBuffer((char *) brpc::rdma::AllocBlock(length), length); +#endif + return cache_fs_adaptor_->DownLoad(key, start, length, wrap).thenValue([wrap, key, start, length, t](int res) -> GetOutput { + GetOutput output; + output.status = res; +#ifndef BRPC_WITH_RDMA + if (res == OK) { + output.buf.append(wrap.data, wrap.len); + } + delete []wrap.data; +#else + if (res == OK) { + output.buf.append_user_data(wrap.data, wrap.len, brpc::rdma::DeallocBlock); + } else { + brpc::rdma::DeallocBlock(wrap.data); + } +#endif + t->stop(); + g_latency_readcache4disk_get << t->u_elapsed(); + delete t; + LOG_IF(INFO, FLAGS_verbose) << "Get key: " << key + << ", start: " << start + << ", length: " << length + << ", status: " << res; + return output; + }); +} + +int ReadCache4Disk::Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) { + auto data_len = buf.length(); + auto aux_buffer = malloc(data_len); + auto data = buf.fetch(aux_buffer, data_len); + auto wrap = HybridCache::ByteBuffer((char *) data, data_len); + std::map headers; + int res = cache_fs_adaptor_->UpLoad(key, length, wrap, headers).get(); + free(aux_buffer); + LOG_IF(INFO, FLAGS_verbose) << "Put key: " << key + << ", length: " << length + << ", status: " << res; + return res; +} + +int ReadCache4Disk::Delete(const std::string &key) { + LOG_IF(INFO, FLAGS_verbose) << "Delete key: " << key; + return cache_fs_adaptor_->Delete(key).get(); +} + +int ReadCache4Disk::Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id) { + LOG_IF(INFO, FLAGS_verbose) << "Delete key: " << key; + for (uint64_t chunk_id = 0; chunk_id < max_chunk_id; chunk_id++) { + auto internal_key = key + "-" + std::to_string(chunk_id) + "-" + std::to_string(chunk_size); + int ret = cache_fs_adaptor_->Delete(internal_key).get(); + if (ret) { + return ret; + } + } + return OK; +} + +DEFINE_string(read_cache_engine, "cachelib", "Read cache engine: cachelib | disk"); + +ReadCache::ReadCache(std::shared_ptr executor, + std::shared_ptr base_adaptor) { + if (FLAGS_read_cache_engine == "cachelib") + impl_ = new ReadCache4Cachelib(executor, base_adaptor); + else if (FLAGS_read_cache_engine == "disk") + impl_ = new ReadCache4Disk(executor, base_adaptor); + else { + LOG(FATAL) << "unsupported read cache engine"; + exit(EXIT_FAILURE); + } +} \ No newline at end of file diff --git a/global_cache/ReadCache.h b/global_cache/ReadCache.h new file mode 100644 index 0000000..5f1a686 --- /dev/null +++ b/global_cache/ReadCache.h @@ -0,0 +1,53 @@ +#ifndef MADFS_READ_CACHE_H +#define MADFS_READ_CACHE_H + +#include +#include +#include + +#include + +#include "Common.h" +#include "data_adaptor.h" +#include "read_cache.h" + +using HybridCache::DataAdaptor; + +class ReadCacheImpl { +public: + virtual Future Get(const std::string &key, uint64_t start, uint64_t length) = 0; + virtual int Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) = 0; + virtual int Delete(const std::string &key) = 0; + virtual int Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id) = 0; +}; + +class ReadCache { +public: + explicit ReadCache(std::shared_ptr executor, + std::shared_ptr base_adaptor = nullptr); + + ~ReadCache() { + delete impl_; + } + + Future Get(const std::string &key, uint64_t start, uint64_t length) { + return impl_->Get(key, start, length); + } + + int Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) { + return impl_->Put(key, length, buf); + } + + int Delete(const std::string &key) { + return impl_->Delete(key); + } + + int Delete(const std::string &key, uint64_t chunk_size, uint64_t max_chunk_id) { + return impl_->Delete(key, chunk_size, max_chunk_id); + } + +private: + ReadCacheImpl *impl_; +}; + +#endif // MADFS_READ_CACHE_H \ No newline at end of file diff --git a/global_cache/ReadCacheClient.cpp b/global_cache/ReadCacheClient.cpp new file mode 100644 index 0000000..3ffc4f7 --- /dev/null +++ b/global_cache/ReadCacheClient.cpp @@ -0,0 +1,245 @@ +#include "ReadCacheClient.h" +#include "GlobalDataAdaptor.h" + +#define AWS_BUFFER_PADDING 64 + +ReadCacheClient::ReadCacheClient(GlobalDataAdaptor *parent) + : parent_(parent) {} + +ReadCacheClient::~ReadCacheClient() {} + +folly::Future ReadCacheClient::Get(const std::string &key, size_t start, size_t size, ByteBuffer &buffer) { + butil::Timer t; + t.start(); + LOG_IF(INFO, FLAGS_verbose) << "Get key=" << key << ", start=" << start << ", size=" << size; + std::vector> future_list; + std::vector requests; + auto &policy = parent_->GetCachePolicy(key); + const int num_choose = policy.read_replication_factor; + GenerateGetChunkRequestsV2(key, start, size, buffer, requests, policy.read_chunk_size); + if (requests.empty()) + return folly::makeFuture(OK); + + auto DoGetChunkAsync = [this, num_choose](GetChunkRequestV2 &entry) -> folly::Future { + auto replicas = GetReplica(entry.internal_key, num_choose); + int primary_server_id = replicas[lrand48() % replicas.size()]; + return GetChunkAsync(primary_server_id, entry).thenValue([this, replicas, entry, primary_server_id](int res) -> int { + if (res != RPC_FAILED) { + return res; + } + LOG_EVERY_SECOND(WARNING) << "Unable to connect primary replicas. server_id " << primary_server_id + << ", hostname: " << parent_->GetServerHostname(primary_server_id); + for (auto &server_id : replicas) { + if (server_id == primary_server_id) { + continue; + } + res = GetChunkAsync(server_id, entry).get(); + if (res != RPC_FAILED) { + return res; + } + LOG_EVERY_SECOND(WARNING) << "Unable to connect secondary replicas. server_id " << server_id + << ", hostname: " << parent_->GetServerHostname(server_id); + } + LOG_EVERY_SECOND(ERROR) << "Unable to connect all target replicas"; + return RPC_FAILED; + }); + }; + + if (requests.size() == 1) { + return DoGetChunkAsync(requests[0]); + } + + size_t aggregated_size = 0; + for (auto &entry: requests) { + aggregated_size += entry.chunk_len; + future_list.emplace_back(DoGetChunkAsync(entry)); + if (aggregated_size >= GetGlobalConfig().max_inflight_payload_size) { + auto output = folly::collectAll(future_list).get(); + for (auto &entry: output) + if (entry.value_or(FOLLY_ERROR) != OK) { + LOG(ERROR) << "Failed to get data from read cache, key: " << key + << ", start: " << start + << ", size: " << size + << ", buf: " << (void *) buffer.data << " " << buffer.len + << ", error code: " << entry.value_or(FOLLY_ERROR); + return entry.value_or(FOLLY_ERROR); + } + future_list.clear(); + } + } + + if (future_list.empty()) return OK; + + return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue( + [=](std::vector > output) -> int { + for (auto &entry: output) + if (entry.value_or(FOLLY_ERROR) != OK) { + LOG(ERROR) << "Failed to get data from read cache, key: " << key + << ", start: " << start + << ", size: " << size + << ", buf: " << (void *) buffer.data << " " << buffer.len + << ", error code: " << entry.value_or(FOLLY_ERROR); + return entry.value_or(FOLLY_ERROR); + } + return OK; + }); +} + +folly::Future ReadCacheClient::GetChunkAsync(int server_id, GetChunkRequestV2 request) { + LOG_IF(INFO, FLAGS_verbose) << "GetChunkAsync server_id=" << server_id + << ", internal_key=" << request.internal_key + << ", chunk_id=" << request.chunk_id + << ", chunk_start=" << request.chunk_start + << ", chunk_len=" << request.chunk_len + << ", buffer=" << (void *) request.buffer.data; + return parent_->GetRpcClient()->GetEntryFromReadCache(server_id, request.internal_key, request.chunk_start, request.chunk_len) + .then([this, server_id, request](folly::Try &&output) -> folly::Future { + if (!output.hasValue()) { + return folly::makeFuture(FOLLY_ERROR); + } + auto &value = output.value(); + if (value.status == OK) { + value.buf.copy_to(request.buffer.data, request.buffer.len); + return folly::makeFuture(OK); + } else if (value.status == CACHE_ENTRY_NOT_FOUND) { + return GetChunkFromGlobalCache(server_id, request); + } else { + return folly::makeFuture(value.status); + } + }); +} + +folly::Future ReadCacheClient::GetChunkFromGlobalCache(int server_id, GetChunkRequestV2 request) { + struct Args { + size_t size; + std::map headers; + ByteBuffer data_buf; + + ~Args() { + if (data_buf.data) { + delete []data_buf.data; + } + } + }; + auto args = std::make_shared(); + // auto f = parent_->base_adaptor_->Head(request.user_key, args->size, args->headers) + auto f = parent_->Head(request.user_key, args->size, args->headers) + .then([this, args, request] (folly::Try &&output) -> folly::Future { + if (output.value_or(FOLLY_ERROR) != OK) { + return folly::makeFuture(output.value_or(FOLLY_ERROR)); + } + + const size_t align_chunk_start = request.chunk_id * request.chunk_granularity; + const size_t align_chunk_stop = std::min(align_chunk_start + request.chunk_granularity, args->size); + + if (align_chunk_start + request.chunk_start + request.chunk_len > args->size) { + LOG(WARNING) << "Requested data range exceeds object size, key: " << request.user_key + << " request offset: " << align_chunk_start + request.chunk_start + request.chunk_len + << ", size: " << args->size; + return folly::makeFuture(END_OF_FILE); + } else if (align_chunk_start == align_chunk_stop) { + return folly::makeFuture(OK); + } else if (align_chunk_start > align_chunk_stop) { + LOG(WARNING) << "Unexpected request range, key: " << request.user_key + << " start offset: " << align_chunk_start + << ", end offset: " << align_chunk_stop; + return folly::makeFuture(INVALID_ARGUMENT); + } + + args->data_buf.len = align_chunk_stop - align_chunk_start + AWS_BUFFER_PADDING; + args->data_buf.data = new char[args->data_buf.len]; + return parent_->base_adaptor_->DownLoad(request.user_key, + align_chunk_start, + align_chunk_stop - align_chunk_start, + args->data_buf); + }).then([this, args, request] (folly::Try &&output) -> folly::Future { + if (output.value_or(FOLLY_ERROR) != OK) { + return folly::makeFuture(output.value_or(FOLLY_ERROR)); + } + + memcpy(request.buffer.data, args->data_buf.data + request.chunk_start, request.chunk_len); + args->data_buf.len -= AWS_BUFFER_PADDING; + auto &policy = parent_->GetCachePolicy(request.user_key); + auto replicas = GetReplica(request.internal_key, policy.read_replication_factor); + std::vector > future_list; + for (auto server_id: replicas) + future_list.emplace_back(parent_->GetRpcClient()->PutEntryFromReadCache(server_id, + request.internal_key, + args->data_buf, + args->data_buf.len)); + return folly::collectAll(std::move(future_list)).via(parent_->executor_.get()).thenValue( + [](std::vector > &&output) -> int { + for (auto &entry: output) { + if (!entry.hasValue()) + return FOLLY_ERROR; + if (entry.value().status != OK) + return entry.value().status; + } + return OK; + }); + }); + return f; +} + +folly::Future ReadCacheClient::Invalidate(const std::string &key, size_t size) { + // LOG(INFO) << "Invalidate key=" << key; + std::vector > future_list; + auto &policy = parent_->GetCachePolicy(key); + const size_t chunk_size = policy.read_chunk_size; + const size_t end_chunk_id = (size + chunk_size - 1) / chunk_size; + for (int server_id = 0; server_id < parent_->server_list_.size(); server_id++) { + future_list.emplace_back(parent_->GetRpcClient()->DeleteEntryFromReadCache(server_id, key, chunk_size, end_chunk_id)); + } + return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue( + [](std::vector > output) -> int { + for (auto &entry: output) + if (entry.value_or(FOLLY_ERROR) != OK) + return entry.value_or(FOLLY_ERROR); + return OK; + }); +} + +void ReadCacheClient::GenerateGetChunkRequestsV2(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + std::vector &requests, + size_t chunk_size) { + const size_t end = start + size; + + const size_t begin_chunk_id = start / chunk_size; + const size_t end_chunk_id = (end + chunk_size - 1) / chunk_size; + + if (buffer.len < size) { + LOG(WARNING) << "Buffer capacity may be not enough, expect " << size << ", actual " << buffer.len; + } + + size_t buffer_offset = 0; + for (size_t chunk_id = begin_chunk_id; chunk_id < end_chunk_id; ++chunk_id) { + size_t chunk_start = std::max(chunk_id * chunk_size, start); + size_t chunk_stop = std::min((chunk_id + 1) * chunk_size, end); + if (chunk_stop <= chunk_start) + return; + GetChunkRequestV2 item; + item.user_key = key; + item.internal_key = key + "-" + std::to_string(chunk_id) + "-" + std::to_string(chunk_size); + item.chunk_id = chunk_id; + item.chunk_start = chunk_start % chunk_size; + item.chunk_len = chunk_stop - chunk_start; + item.chunk_granularity = chunk_size; + item.buffer.data = buffer.data + buffer_offset; + item.buffer.len = item.chunk_len; + buffer_offset += item.chunk_len; + requests.emplace_back(item); + } + LOG_ASSERT(buffer_offset == size); +} + +std::vector ReadCacheClient::GetReplica(const std::string &key, int num_choose) { + const int num_available = parent_->server_list_.size(); + uint64_t seed = std::hash < std::string > {}(key); + std::vector output; + for (int i = 0; i < std::min(num_available, num_choose); ++i) + output.push_back((seed + i) % num_available); + return output; +} \ No newline at end of file diff --git a/global_cache/ReadCacheClient.h b/global_cache/ReadCacheClient.h new file mode 100644 index 0000000..389437c --- /dev/null +++ b/global_cache/ReadCacheClient.h @@ -0,0 +1,60 @@ +#ifndef MADFS_READ_CACHE_CLIENT_H +#define MADFS_READ_CACHE_CLIENT_H + +#include +#include +#include + +#include "Common.h" +#include "Placement.h" +#include "data_adaptor.h" + +using HybridCache::ByteBuffer; + +class GlobalDataAdaptor; + +class ReadCacheClient { + friend class GetChunkContext; + +public: + ReadCacheClient(GlobalDataAdaptor *parent); + + ~ReadCacheClient(); + + virtual folly::Future Get(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer); + + virtual folly::Future Invalidate(const std::string &key, size_t size); + + // for testing only +public: + struct GetChunkRequestV2 { + std::string user_key; + std::string internal_key; + size_t chunk_id; + size_t chunk_start; + size_t chunk_len; + size_t chunk_granularity; + ByteBuffer buffer; + }; + + static void GenerateGetChunkRequestsV2(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + std::vector &requests, + size_t chunk_size); + + folly::Future GetChunkAsync(int server_id, GetChunkRequestV2 context); + + folly::Future GetChunkFromGlobalCache(int server_id, GetChunkRequestV2 context); + + std::vector GetReplica(const std::string &key, int num_choose); + +private: + GlobalDataAdaptor *parent_; +}; + +#endif // MADFS_READ_CACHE_CLIENT_H \ No newline at end of file diff --git a/global_cache/ReplicationWriteCacheClient.cpp b/global_cache/ReplicationWriteCacheClient.cpp new file mode 100644 index 0000000..a2cb1be --- /dev/null +++ b/global_cache/ReplicationWriteCacheClient.cpp @@ -0,0 +1,248 @@ +#include "ReplicationWriteCacheClient.h" +#include "GlobalDataAdaptor.h" + +folly::Future ReplicationWriteCacheClient::Put(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers, + size_t off) { + std::vector > future_list; + Json::Value root, dummy_root; + Json::Value json_replica(Json::arrayValue), json_path(Json::arrayValue), json_headers; + + butil::Timer *t = new butil::Timer(); + t->start(); + + const std::vector replicas = GetReplica(key); + for (auto server_id: replicas) { + json_replica.append(server_id); + } + + auto rpc_client = parent_->GetRpcClient(); + auto write_chunk_size = GetGlobalConfig().write_chunk_size; + + for (auto iter = headers.begin(); iter != headers.end(); ++iter) { + json_headers[iter->first] = iter->second; + } + + size_t aggregated_size = 0; + for (uint64_t offset = 0; offset < size; offset += write_chunk_size) { + for (auto server_id: replicas) { + auto region_size = std::min(size - offset, write_chunk_size); + ByteBuffer region_buffer(buffer.data + offset, region_size); + std::string partial_key = key + + "-" + std::to_string((off + offset) / write_chunk_size) + + "-" + std::to_string(write_chunk_size); + auto PutRPC = folly::via(parent_->executor_.get(), [this, server_id, partial_key, region_buffer, region_size]() -> PutOutput { + return parent_->GetRpcClient()->PutEntryFromWriteCache(server_id, partial_key, region_buffer, region_size).get(); + }); + future_list.emplace_back(std::move(PutRPC)); + } + } + + t->stop(); + LOG(INFO) << "Phase 1: " << t->u_elapsed(); + + root["type"] = "replication"; + root["size"] = size; + root["replica"] = json_replica; + root["headers"] = json_headers; + + return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue([root, t](std::vector> &&output) -> folly::Future { + Json::Value dummy_root; + Json::Value json_path(Json::arrayValue); + for (auto &entry: output) { + if (!entry.hasValue()) + return PutResult { FOLLY_ERROR, dummy_root }; + if (entry.value().status != OK) { + LOG(INFO) << "Found error"; + return PutResult { entry.value().status, dummy_root }; + } + json_path.append(entry.value().internal_key); + } + Json::Value new_root = root; + new_root["path"] = json_path; + t->stop(); + LOG(INFO) << "Duration: " << t->u_elapsed(); + delete t; + return PutResult { OK, new_root }; + }); +} + +folly::Future ReplicationWriteCacheClient::Get(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + Json::Value &root) { + std::vector replicas; + for (auto &entry : root["replica"]) { + replicas.push_back(entry.asInt()); + } + + std::vector internal_keys; + for (auto &entry : root["path"]) { + internal_keys.push_back(entry.asString()); + } + + std::vector > future_list; + std::vector requests; + auto write_chunk_size = GetGlobalConfig().write_chunk_size; + GenerateGetChunkRequestsV2(key, start, size, buffer, requests, write_chunk_size); + if (requests.empty()) + return folly::makeFuture(OK); + + size_t aggregated_size = 0; + for (auto &entry: requests) { + int primary_replica_id = lrand48() % replicas.size(); + int primary_server_id = replicas[primary_replica_id]; + std::string internal_key = internal_keys[entry.chunk_id * replicas.size() + primary_replica_id]; + future_list.emplace_back(GetChunkAsync(primary_server_id, entry, internal_key) + .thenValue([this, replicas, entry, primary_server_id, internal_keys](int res) -> int { + if (res != RPC_FAILED) { + return res; + } + LOG_EVERY_SECOND(WARNING) << "Unable to connect primary replicas. server_id " << primary_server_id + << ", hostname: " << parent_->GetServerHostname(primary_server_id); + for (auto &server_id : replicas) { + if (server_id == primary_server_id) { + continue; + } + auto internal_key = internal_keys[entry.chunk_id * replicas.size() + server_id]; + res = GetChunkAsync(server_id, entry, internal_key).get(); + if (res != RPC_FAILED) { + return res; + } + LOG_EVERY_SECOND(WARNING) << "Unable to connect secondary replicas. server_id " << server_id + << ", hostname: " << parent_->GetServerHostname(server_id); + } + LOG_EVERY_SECOND(ERROR) << "Unable to connect all target replicas"; + return RPC_FAILED; + })); + + aggregated_size += entry.chunk_len; + if (aggregated_size >= GetGlobalConfig().max_inflight_payload_size) { + auto output = folly::collectAll(future_list).get(); + for (auto &entry: output) + if (entry.value_or(FOLLY_ERROR) != OK) { + LOG(ERROR) << "Failed to get data from write cache, key: " << key + << ", start: " << start + << ", size: " << size + << ", buf: " << (void *) buffer.data << " " << buffer.len + << ", error code: " << entry.hasValue() << " " << entry.value_or(FOLLY_ERROR); + return entry.value_or(FOLLY_ERROR); + } + aggregated_size = 0; + future_list.clear(); + } + } + + return folly::collectAll(future_list).via(parent_->executor_.get()).thenValue( + [=](std::vector > output) -> int { + for (auto &entry: output) + if (entry.value_or(FOLLY_ERROR) != OK) { + LOG(ERROR) << "Failed to get data from write cache, key: " << key + << ", start: " << start + << ", size: " << size + << ", buf: " << (void *) buffer.data << " " << buffer.len + << ", error code: " << entry.hasValue() << " " << entry.value_or(FOLLY_ERROR); + return entry.value_or(FOLLY_ERROR); + } + return OK; + }); + + // return parent_->GetRpcClient()->GetEntryFromWriteCache(replica[primary_index], internal_keys[primary_index], start, size).thenValue( + // [this, &buffer, start, size, replica, internal_keys, primary_index](GetOutput &&output) -> int { + // if (output.status == OK) { + // output.buf.copy_to(buffer.data, size); + // } + // if (output.status == RPC_FAILED) { + // for (int index = 0; index < replica.size(); ++index) { + // if (index == primary_index) { + // continue; + // } + // auto res = parent_->GetRpcClient()->GetEntryFromWriteCache(replica[index], internal_keys[index], start, size).get(); + // if (res.status == OK) { + // output.buf.copy_to(buffer.data, size); + // } + // if (res.status != RPC_FAILED) { + // return res.status; + // } + // } + // LOG(ERROR) << "All target replicas are crashed"; + // return RPC_FAILED; + // } + // return output.status; + // } + // ); +} + +folly::Future ReplicationWriteCacheClient::GetChunkAsync(int server_id, GetChunkRequestV2 request, std::string &internal_key) { + LOG_IF(INFO, FLAGS_verbose) << "GetChunkAsync server_id=" << server_id + << ", internal_key=" << internal_key + << ", chunk_id=" << request.chunk_id + << ", chunk_start=" << request.chunk_start + << ", chunk_len=" << request.chunk_len + << ", buffer=" << (void *) request.buffer.data; + auto f = parent_->GetRpcClient()->GetEntryFromWriteCache(server_id, internal_key, request.chunk_start, request.chunk_len) + .then([this, server_id, request](folly::Try &&output) -> folly::Future { + if (!output.hasValue()) { + return folly::makeFuture(FOLLY_ERROR); + } + auto &value = output.value(); + if (value.status == OK) { + value.buf.copy_to(request.buffer.data, request.buffer.len); + return folly::makeFuture(OK); + } else { + return folly::makeFuture(value.status); + } + }).via(parent_->executor_.get()); + return f; + // memset(request.buffer.data, 'x', request.buffer.len); + // return folly::makeFuture(OK); +} + +std::vector ReplicationWriteCacheClient::GetReplica(const std::string &key) { + const int num_available = parent_->server_list_.size(); + auto &policy = parent_->GetCachePolicy(key); + const int num_choose = policy.write_replication_factor; + uint64_t seed = std::hash < std::string > {}(key); + std::vector output; + for (int i = 0; i < num_choose; ++i) + output.push_back((seed + i) % num_available); + return output; +} + +void ReplicationWriteCacheClient::GenerateGetChunkRequestsV2(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + std::vector &requests, + size_t chunk_size) { + const size_t end = start + size; + + const size_t begin_chunk_id = start / chunk_size; + const size_t end_chunk_id = (end + chunk_size - 1) / chunk_size; + + if (buffer.len < size) { + LOG(WARNING) << "Buffer capacity may be not enough, expect " << size << ", actual " << buffer.len; + } + + size_t buffer_offset = 0; + for (size_t chunk_id = begin_chunk_id; chunk_id < end_chunk_id; ++chunk_id) { + size_t chunk_start = std::max(chunk_id * chunk_size, start); + size_t chunk_stop = std::min((chunk_id + 1) * chunk_size, end); + if (chunk_stop <= chunk_start) + return; + GetChunkRequestV2 item; + item.user_key = key; + item.chunk_id = chunk_id; + item.chunk_start = chunk_start % chunk_size; + item.chunk_len = chunk_stop - chunk_start; + item.chunk_granularity = chunk_size; + item.buffer.data = buffer.data + buffer_offset; + item.buffer.len = item.chunk_len; + buffer_offset += item.chunk_len; + requests.emplace_back(item); + } + LOG_ASSERT(buffer_offset == size); +} diff --git a/global_cache/ReplicationWriteCacheClient.h b/global_cache/ReplicationWriteCacheClient.h new file mode 100644 index 0000000..1bb8dbf --- /dev/null +++ b/global_cache/ReplicationWriteCacheClient.h @@ -0,0 +1,57 @@ +#ifndef MADFS_REPLICATION_WRITE_CACHE_CLIENT_H +#define MADFS_REPLICATION_WRITE_CACHE_CLIENT_H + +#include "WriteCacheClient.h" + +using HybridCache::ByteBuffer; + +class GlobalDataAdaptor; + +using PutResult = WriteCacheClient::PutResult; + +class ReplicationWriteCacheClient : public WriteCacheClient { + friend class GetChunkContext; + +public: + ReplicationWriteCacheClient(GlobalDataAdaptor *parent) : parent_(parent) {} + + ~ReplicationWriteCacheClient() {} + + virtual folly::Future Put(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers, + size_t off = 0); + + virtual folly::Future Get(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + Json::Value &root); + +public: + std::vector GetReplica(const std::string &key); + + struct GetChunkRequestV2 { + std::string user_key; + size_t chunk_id; + size_t chunk_start; + size_t chunk_len; + size_t chunk_granularity; + ByteBuffer buffer; + }; + + static void GenerateGetChunkRequestsV2(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + std::vector &requests, + size_t chunk_size); + + folly::Future GetChunkAsync(int server_id, GetChunkRequestV2 context, std::string &internal_key); + +private: + GlobalDataAdaptor *parent_; +}; + +#endif // MADFS_REPLICATION_WRITE_CACHE_CLIENT_H \ No newline at end of file diff --git a/global_cache/S3DataAdaptor.cpp b/global_cache/S3DataAdaptor.cpp new file mode 100644 index 0000000..ba0e6a2 --- /dev/null +++ b/global_cache/S3DataAdaptor.cpp @@ -0,0 +1,188 @@ +#include "S3DataAdaptor.h" + +#include +#include +#include +#include +#include +#include + +#define STRINGIFY_HELPER(val) #val +#define STRINGIFY(val) STRINGIFY_HELPER(val) +#define AWS_ALLOCATE_TAG __FILE__ ":" STRINGIFY(__LINE__) + +std::once_flag S3INIT_FLAG; +std::once_flag S3SHUTDOWN_FLAG; +Aws::SDKOptions AWS_SDK_OPTIONS; + +// https://github.com/aws/aws-sdk-cpp/issues/1430 +class PreallocatedIOStream : public Aws::IOStream { +public: + PreallocatedIOStream(char *buf, size_t size) + : Aws::IOStream(new Aws::Utils::Stream::PreallocatedStreamBuf( + reinterpret_cast(buf), size)) {} + + PreallocatedIOStream(const char *buf, size_t size) + : PreallocatedIOStream(const_cast(buf), size) {} + + ~PreallocatedIOStream() { + // corresponding new in constructor + delete rdbuf(); + } +}; + +Aws::String GetObjectRequestRange(uint64_t offset, uint64_t len) { + auto range = + "bytes=" + std::to_string(offset) + "-" + std::to_string(offset + len); + return {range.data(), range.size()}; +} + +S3DataAdaptor::S3DataAdaptor() { + auto initSDK = [&]() { + Aws::InitAPI(AWS_SDK_OPTIONS); + }; + std::call_once(S3INIT_FLAG, initSDK); + auto &s3_config = GetGlobalConfig().s3_config; + setenv("AWS_EC2_METADATA_DISABLED", "true", 1); + clientCfg_ = Aws::New(AWS_ALLOCATE_TAG, true); + clientCfg_->scheme = Aws::Http::Scheme::HTTP; + clientCfg_->verifySSL = false; + clientCfg_->maxConnections = 10; + clientCfg_->endpointOverride = s3_config.address; + clientCfg_->executor = Aws::MakeShared("S3Adapter.S3Client", s3_config.bg_threads); + + s3Client_ = Aws::New(AWS_ALLOCATE_TAG, + Aws::Auth::AWSCredentials(s3_config.access_key, s3_config.secret_access_key), + *clientCfg_, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false); +} + +S3DataAdaptor::~S3DataAdaptor() { + if (clientCfg_ != nullptr) { + Aws::Delete(clientCfg_); + clientCfg_ = nullptr; + } + if (s3Client_ != nullptr) { + Aws::Delete(s3Client_); + s3Client_ = nullptr; + } + auto shutdownSDK = [&]() { + Aws::ShutdownAPI(AWS_SDK_OPTIONS); + }; + std::call_once(S3SHUTDOWN_FLAG, shutdownSDK); +} + +folly::Future S3DataAdaptor::DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer) { + Aws::S3::Model::GetObjectRequest request; + request.SetBucket(GetGlobalConfig().s3_config.bucket); + request.SetKey(Aws::String{key.c_str(), key.size()}); + request.SetRange(GetObjectRequestRange(start, size)); + request.SetResponseStreamFactory( + [&buffer]() { return Aws::New(AWS_ALLOCATE_TAG, buffer.data, buffer.len); }); + auto promise = std::make_shared < folly::Promise < int >> (); + Aws::S3::GetObjectResponseReceivedHandler handler = + [&buffer, size, promise]( + const Aws::S3::S3Client */*client*/, + const Aws::S3::Model::GetObjectRequest &/*request*/, + const Aws::S3::Model::GetObjectOutcome &response, + const std::shared_ptr &awsCtx) { + if (response.IsSuccess()) { + promise->setValue(OK); + } else if (response.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) { + promise->setValue(NOT_FOUND); + } else { + LOG(ERROR) << "GetObjectAsync error: " + << response.GetError().GetExceptionName() + << "message: " << response.GetError().GetMessage(); + promise->setValue(S3_INTERNAL_ERROR); + } + }; + s3Client_->GetObjectAsync(request, handler, nullptr); + return promise->getFuture(); +} + +folly::Future S3DataAdaptor::UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers) { + Aws::S3::Model::PutObjectRequest request; + request.SetBucket(GetGlobalConfig().s3_config.bucket); + request.SetKey(key); + request.SetMetadata(headers); + request.SetBody(Aws::MakeShared(AWS_ALLOCATE_TAG, buffer.data, buffer.len)); + auto promise = std::make_shared < folly::Promise < int >> (); + Aws::S3::PutObjectResponseReceivedHandler handler = + [promise]( + const Aws::S3::S3Client */*client*/, + const Aws::S3::Model::PutObjectRequest &/*request*/, + const Aws::S3::Model::PutObjectOutcome &response, + const std::shared_ptr &awsCtx) { + LOG_IF(ERROR, !response.IsSuccess()) + << "PutObjectAsync error: " + << response.GetError().GetExceptionName() + << "message: " << response.GetError().GetMessage(); + promise->setValue(response.IsSuccess() ? OK : S3_INTERNAL_ERROR); + }; + s3Client_->PutObjectAsync(request, handler, nullptr); + return promise->getFuture(); +} + +folly::Future S3DataAdaptor::Delete(const std::string &key) { + Aws::S3::Model::DeleteObjectRequest request; + request.SetBucket(GetGlobalConfig().s3_config.bucket); + request.SetKey(key); + auto promise = std::make_shared < folly::Promise < int >> (); + Aws::S3::DeleteObjectResponseReceivedHandler handler = + [promise]( + const Aws::S3::S3Client */*client*/, + const Aws::S3::Model::DeleteObjectRequest &/*request*/, + const Aws::S3::Model::DeleteObjectOutcome &response, + const std::shared_ptr &awsCtx) { + if (response.IsSuccess()) { + promise->setValue(OK); + } else if (response.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) { + promise->setValue(NOT_FOUND); + } else { + LOG(ERROR) << "DeleteObjectAsync error: " + << response.GetError().GetExceptionName() + << "message: " << response.GetError().GetMessage(); + promise->setValue(S3_INTERNAL_ERROR); + } + }; + s3Client_->DeleteObjectAsync(request, handler, nullptr); + return promise->getFuture(); +} + +folly::Future S3DataAdaptor::Head(const std::string &key, + size_t &size, + std::map &headers) { + Aws::S3::Model::HeadObjectRequest request; + request.SetBucket(GetGlobalConfig().s3_config.bucket); + request.SetKey(key); + auto promise = std::make_shared < folly::Promise < int >> (); + Aws::S3::HeadObjectResponseReceivedHandler handler = + [promise, &size, &headers]( + const Aws::S3::S3Client */*client*/, + const Aws::S3::Model::HeadObjectRequest &/*request*/, + const Aws::S3::Model::HeadObjectOutcome &response, + const std::shared_ptr &awsCtx) { + if (response.IsSuccess()) { + headers = response.GetResult().GetMetadata(); + size = response.GetResult().GetContentLength(); + promise->setValue(OK); + } else if (response.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) { + promise->setValue(NOT_FOUND); + } else { + LOG(ERROR) << "HeadObjectAsync error: " + << response.GetError().GetExceptionName() + << "message: " << response.GetError().GetMessage(); + promise->setValue(S3_INTERNAL_ERROR); + } + }; + s3Client_->HeadObjectAsync(request, handler, nullptr); + return promise->getFuture(); +} diff --git a/global_cache/S3DataAdaptor.h b/global_cache/S3DataAdaptor.h new file mode 100644 index 0000000..677e1c8 --- /dev/null +++ b/global_cache/S3DataAdaptor.h @@ -0,0 +1,47 @@ +#ifndef MADFS_S3_DATA_ADAPTOR_H +#define MADFS_S3_DATA_ADAPTOR_H + +#include +#include +#include +#include + +#include "data_adaptor.h" + +#include "Common.h" + +using HybridCache::ByteBuffer; +using HybridCache::DataAdaptor; + +class S3DataAdaptor : public DataAdaptor { +public: + S3DataAdaptor(); + + ~S3DataAdaptor(); + + // 从数据服务器加载数据 + virtual folly::Future DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer); + + // 上传数据到数据服务器 + virtual folly::Future UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers); + + // 删除数据服务器的数据 + virtual folly::Future Delete(const std::string &key); + + // 获取数据的元数据 + virtual folly::Future Head(const std::string &key, + size_t &size, + std::map &headers); + +private: + Aws::Client::ClientConfiguration *clientCfg_; + Aws::S3::S3Client *s3Client_; +}; + +#endif // MADFS_S3_DATA_ADAPTOR_H \ No newline at end of file diff --git a/global_cache/WriteCache.cpp b/global_cache/WriteCache.cpp new file mode 100644 index 0000000..2f27582 --- /dev/null +++ b/global_cache/WriteCache.cpp @@ -0,0 +1,404 @@ +#include + +#include "WriteCache.h" +#include "FileSystemDataAdaptor.h" +#include +#include "write_cache.h" + + +//#define BRPC_WITH_RDMA 1 +//#include + +class WriteCache4RocksDB : public WriteCacheImpl { +public: + explicit WriteCache4RocksDB(std::shared_ptr executor); + + ~WriteCache4RocksDB(); + + virtual GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length); + + virtual PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf); + + virtual int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set &except_keys); + +private: + std::string rocksdb_path_; + rocksdb::DB *db_; +}; + +WriteCache4RocksDB::WriteCache4RocksDB(std::shared_ptr executor) + : WriteCacheImpl(executor) { + rocksdb::Options options; + options.create_if_missing = true; + rocksdb_path_ = PathJoin(GetGlobalConfig().write_cache_dir, ".write_cache.db"); + if (CreateParentDirectories(rocksdb_path_)) { + LOG(WARNING) << "Failed to create directory: " << rocksdb_path_; + abort(); + } + auto status = rocksdb::DB::Open(options, rocksdb_path_, &db_); + if (!status.ok()) { + LOG(WARNING) << "Failed to open RocksDB: " << status.ToString(); + abort(); + } +} + +WriteCache4RocksDB::~WriteCache4RocksDB() { + if (db_) { + db_->Close(); + } +} + +GetOutput WriteCache4RocksDB::Get(const std::string &internal_key, uint64_t start, uint64_t length) { + rocksdb::ReadOptions options; + std::string value; + auto status = db_->Get(options, internal_key, &value); + GetOutput output; + if (status.IsNotFound()) { + output.status = CACHE_ENTRY_NOT_FOUND; + return output; + } else if (!status.ok()) { + LOG(WARNING) << "Failed to get key " << internal_key << " from RocksDB: " << status.ToString(); + output.status = IO_ERROR; + return output; + } + if (length == 0 || start + length > value.size()) { + output.status = INVALID_ARGUMENT; + return output; + } + output.status = OK; + output.buf.append(&value[start], length); + LOG_IF(INFO, FLAGS_verbose) << "GetWriteCache internal_key: " << internal_key << ", size: " << length; + return output; +} + +PutOutput WriteCache4RocksDB::Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) { + auto oid = next_object_id_.fetch_add(1); + auto internal_key = key + "-" + std::to_string(oid); + rocksdb::WriteOptions options; + std::string value = buf.to_string(); + auto status = db_->Put(options, internal_key, value); + if (!status.ok()) { + LOG(WARNING) << "Failed to put key " << internal_key << " from RocksDB: " << status.ToString(); + return {IO_ERROR, ""}; + } + LOG_IF(INFO, FLAGS_verbose) << "PutWriteCache key: " << key << ", internal_key: " << internal_key << ", size: " << length; + return {OK, internal_key}; +} + +static bool HasPrefix(const std::string &key, const std::string &key_prefix) { + return key.substr(0, key_prefix.size()) == key_prefix; +} + +static uint64_t ParseTS(const std::string &key) { + size_t pos = key.rfind('-'); + if (pos != std::string::npos) { + std::string lastSubStr = key.substr(pos + 1); + uint64_t number; + std::istringstream(lastSubStr) >> number; + if (!std::cin.fail()) { + return number; + } else { + return UINT64_MAX; + } + } else { + return UINT64_MAX; + } +} + +// Delete all entries that: match the prefix, < ts, and not in except_keys +int WriteCache4RocksDB::Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set &except_keys) { + LOG(INFO) << "Request key_prefix = " << key_prefix << ", ts = " << ts; + rocksdb::ReadOptions read_options; + rocksdb::WriteOptions write_options; + auto iter = db_->NewIterator(read_options); + iter->Seek(key_prefix); + for (; iter->Valid(); iter->Next()) { + std::string key = iter->key().ToString(); + LOG(INFO) << "Processing key " << key; + if (!HasPrefix(key, key_prefix)) { + break; + } + if (ParseTS(key) >= ts || except_keys.count(key)) { + continue; + } + auto status = db_->Delete(write_options, key); + if (!status.ok() && !status.IsNotFound()) { + LOG(WARNING) << "Failed to delete key " << key << " from RocksDB: " << status.ToString(); + iter->Reset(); + return IO_ERROR; + } + LOG(INFO) << "Deleted key " << key; + } + iter->Reset(); + return OK; +} + +// ---------------------------------------------------------------------------- + +class WriteCache4Disk : public WriteCacheImpl { +public: + explicit WriteCache4Disk(std::shared_ptr executor); + + ~WriteCache4Disk(); + + virtual GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length); + + virtual PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf); + + virtual int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set &except_keys); + +private: + std::shared_ptr cache_fs_adaptor_; +}; + +WriteCache4Disk::WriteCache4Disk(std::shared_ptr executor) + : WriteCacheImpl(executor) { + cache_fs_adaptor_ = std::make_shared(GetGlobalConfig().write_cache_dir, nullptr, false, nullptr, false); +} + +WriteCache4Disk::~WriteCache4Disk() {} + +GetOutput WriteCache4Disk::Get(const std::string &internal_key, uint64_t start, uint64_t length) { + butil::Timer t; + t.start(); +#ifndef BRPC_WITH_RDMA + auto wrap = HybridCache::ByteBuffer(new char[length], length); +#else + auto wrap = HybridCache::ByteBuffer((char *) brpc::rdma::AllocBlock(length), length); +#endif + int res = cache_fs_adaptor_->DownLoad(internal_key, start, length, wrap).get(); + GetOutput output; + output.status = res; +#ifndef BRPC_WITH_RDMA + if (res == OK) { + output.buf.append(wrap.data, wrap.len); + } + delete []wrap.data; +#else + if (res == OK) { + output.buf.append_user_data(wrap.data, wrap.len, brpc::rdma::DeallocBlock); + } else { + brpc::rdma::DeallocBlock(wrap.data); + } +#endif + t.stop(); + LOG_IF(INFO, FLAGS_verbose) << "Get key: " << internal_key + << ", start: " << start + << ", length: " << length + << ", status: " << res + << ", latency: " << t.u_elapsed(); + return output; +} + +uint64_t ReportAvailableDiskSpace(std::string &path) { + struct statvfs stat; + if (statvfs(path.c_str(), &stat)) { + PLOG(ERROR) << "Failed to statvfs"; + return 0; + } + return stat.f_bavail * stat.f_bsize; +} + +const static size_t kMinDiskFreeSpace = 1024 * 1024 * 512; + +PutOutput WriteCache4Disk::Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) { + butil::Timer t; + t.start(); + auto oid = next_object_id_.fetch_add(1); + auto internal_key = key + "-" + std::to_string(oid); + + if (ReportAvailableDiskSpace(GetGlobalConfig().write_cache_dir) < std::max(length, kMinDiskFreeSpace)) { + // LOG(WARNING) << "No enough space to persist data, please perform one GC immediately"; + return {NO_ENOUGH_DISKSPACE, ""}; + } + + t.stop(); + // LOG_IF(INFO, FLAGS_verbose) << "duration: " << t.u_elapsed(); + + auto data_len = buf.length(); + + thread_local void *aux_buffer = nullptr; + if (!aux_buffer) + posix_memalign(&aux_buffer, 4096, GetGlobalConfig().write_chunk_size); + + auto data = buf.fetch(aux_buffer, data_len); + auto wrap = HybridCache::ByteBuffer((char *) data, data_len); + std::map headers; + + t.stop(); + // LOG_IF(INFO, FLAGS_verbose) << "duration: " << t.u_elapsed(); + + int res = cache_fs_adaptor_->UpLoad(internal_key, length, wrap, headers).get(); + // free(aux_buffer); + if (res) { + LOG(WARNING) << "Failed to put key " << internal_key << " to disk"; + return {IO_ERROR, ""}; + } + t.stop(); + LOG_IF(INFO, FLAGS_verbose) << "PutWriteCache key: " << key << ", internal_key: " << internal_key << ", size: " << length << ", duration: " << t.u_elapsed(); + return {OK, internal_key}; +} + + +void listFilesRecursively(const std::string &directoryPath, + std::vector &to_remove, + const std::string &key_prefix, + uint64_t ts, + const std::unordered_set &except_keys) { + DIR* dir = opendir(directoryPath.c_str()); + if (dir == nullptr) { + std::cerr << "Error opening directory: " << directoryPath << std::endl; + return; + } + + struct dirent* entry; + while ((entry = readdir(dir)) != nullptr) { + // Skip "." and ".." entries + if (std::string(entry->d_name) == "." || std::string(entry->d_name) == "..") { + continue; + } + + std::string fullPath = PathJoin(directoryPath, entry->d_name); + std::string rootPath = GetGlobalConfig().write_cache_dir; + struct stat statbuf; + if (stat(fullPath.c_str(), &statbuf) == 0) { + if (S_ISDIR(statbuf.st_mode)) { + // It's a directory, recurse into it + listFilesRecursively(fullPath, to_remove, key_prefix, ts, except_keys); + } else if (S_ISREG(statbuf.st_mode)) { + std::string key = fullPath.substr(rootPath.length()); + if (!key.empty() && key[0] == '/') { + key = key.substr(1); + } + if (!HasPrefix(key, key_prefix) || except_keys.count(key) || ParseTS(key) >= ts) { + continue; + } + to_remove.push_back(fullPath); + // LOG(INFO) << "Deleted key " << key << ", location " << fullPath; + } + } + } + closedir(dir); +} + + +// Delete all entries that: match the prefix, < ts, and not in except_keys +int WriteCache4Disk::Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set &except_keys) { + LOG(INFO) << "Request key_prefix = " << key_prefix << ", ts = " << ts; + std::vector to_remove; + listFilesRecursively(GetGlobalConfig().write_cache_dir, + to_remove, + key_prefix, + ts, + except_keys); + for (auto &entry : to_remove) { + if (remove(entry.c_str())) { + LOG(WARNING) << "Failed to remove file: " << entry; + return IO_ERROR; + } + } + return OK; +} + + +class WriteCache4Fake : public WriteCacheImpl { +public: + explicit WriteCache4Fake(std::shared_ptr executor) : WriteCacheImpl(executor) {} + + virtual ~WriteCache4Fake() {} + + virtual GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length) { + LOG_IF(INFO, FLAGS_verbose) << "Get internal_key " << internal_key << " start " << start << " length " << length; + GetOutput ret; + ret.status = OK; + ret.buf.resize(length, 'x'); + return ret; + } + + virtual PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) { + LOG_IF(INFO, FLAGS_verbose) << "Put key " << key << " length " << length; + PutOutput ret; + ret.status = OK; + ret.internal_key = key; + return ret; + } + + virtual int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set &except_keys) { + return OK; + } +}; + +class WriteCache4Cachelib : public WriteCacheImpl { +public: + explicit WriteCache4Cachelib(std::shared_ptr executor) : WriteCacheImpl(executor) { + HybridCache::EnableLogging = false; + impl_ = std::make_shared(GetGlobalConfig().write_cache); + } + + virtual ~WriteCache4Cachelib() {} + + virtual GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length) { + butil::Timer t; + t.start(); + std::vector> dataBoundary; +#ifndef BRPC_WITH_RDMA + auto wrap = HybridCache::ByteBuffer(new char[length], length); +#else + auto wrap = HybridCache::ByteBuffer((char *) brpc::rdma::AllocBlock(length), length); +#endif + int res = impl_->Get(internal_key, start, length, wrap, dataBoundary); + GetOutput output; + output.status = res; +#ifndef BRPC_WITH_RDMA + if (res == OK) { + output.buf.append(wrap.data, wrap.len); + } + delete []wrap.data; +#else + if (res == OK) { + output.buf.append_user_data(wrap.data, wrap.len, brpc::rdma::DeallocBlock); + } else { + brpc::rdma::DeallocBlock(wrap.data); + } +#endif + t.stop(); + LOG_IF(INFO, FLAGS_verbose) << "Get key: " << internal_key + << ", start: " << start + << ", length: " << length + << ", status: " << res + << ", latency: " << t.u_elapsed(); + return output; + } + + virtual PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) { + LOG_IF(INFO, FLAGS_verbose) << "Put key " << key << " length " << length; + PutOutput ret; + ret.status = OK; + ret.internal_key = key; + return ret; + } + + virtual int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set &except_keys) { + return OK; + } + +private: + std::shared_ptr impl_; + +}; + + +DEFINE_string(write_cache_engine, "disk", "Write cache engine: rocksdb | disk"); + +WriteCache::WriteCache(std::shared_ptr executor) { + if (FLAGS_write_cache_engine == "rocksdb") + impl_ = new WriteCache4RocksDB(executor); + else if (FLAGS_write_cache_engine == "disk") + impl_ = new WriteCache4Disk(executor); + else if (FLAGS_write_cache_engine == "fake") + impl_ = new WriteCache4Fake(executor); + else { + LOG(WARNING) << "unsupported write cache engine"; + exit(EXIT_FAILURE); + } +} diff --git a/global_cache/WriteCache.h b/global_cache/WriteCache.h new file mode 100644 index 0000000..52e1028 --- /dev/null +++ b/global_cache/WriteCache.h @@ -0,0 +1,53 @@ +#ifndef MADFS_WRITE_CACHE_H +#define MADFS_WRITE_CACHE_H + +#include +#include +#include +#include +#include + +#include +#include + +#include "Common.h" + +class WriteCacheImpl { +public: + WriteCacheImpl(std::shared_ptr executor) : executor_(executor), next_object_id_(0) {} + virtual GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length) = 0; + virtual PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) = 0; + virtual uint64_t QueryTS() { return next_object_id_.load(); } + virtual int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set &except_keys) = 0; + + std::shared_ptr executor_; + std::atomic next_object_id_; +}; + +class WriteCache { +public: + explicit WriteCache(std::shared_ptr executor); + + ~WriteCache() { + delete impl_; + } + + GetOutput Get(const std::string &internal_key, uint64_t start, uint64_t length) { + return impl_->Get(internal_key, start, length); + } + + PutOutput Put(const std::string &key, uint64_t length, const butil::IOBuf &buf) { + return impl_->Put(key, length, buf); + } + + uint64_t QueryTS() { return impl_->QueryTS(); } + + int Delete(const std::string &key_prefix, uint64_t ts, const std::unordered_set &except_keys) { + return impl_->Delete(key_prefix, ts, except_keys); + } + +private: + WriteCacheImpl *impl_; +}; + +#endif // MADFS_WRITE_CACHE_H \ No newline at end of file diff --git a/global_cache/WriteCacheClient.h b/global_cache/WriteCacheClient.h new file mode 100644 index 0000000..c0112a0 --- /dev/null +++ b/global_cache/WriteCacheClient.h @@ -0,0 +1,42 @@ +#ifndef MADFS_WRITE_CACHE_CLIENT_H +#define MADFS_WRITE_CACHE_CLIENT_H + +#include +#include +#include + +#include "Common.h" +#include "Placement.h" +#include "data_adaptor.h" +#include "EtcdClient.h" + +using HybridCache::ByteBuffer; + +class GlobalDataAdaptor; + +class WriteCacheClient { +public: + struct PutResult { + int status; + Json::Value root; + }; + +public: + WriteCacheClient() {} + + ~WriteCacheClient() {} + + virtual folly::Future Put(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map &headers, + size_t off = 0) = 0; + + virtual folly::Future Get(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer, + Json::Value &root) = 0; +}; + +#endif // MADFS_WRITE_CACHE_CLIENT_H \ No newline at end of file diff --git a/global_cache/gcache.proto b/global_cache/gcache.proto new file mode 100644 index 0000000..9d350ba --- /dev/null +++ b/global_cache/gcache.proto @@ -0,0 +1,72 @@ +syntax="proto2"; +package gcache; + +option cc_generic_services = true; + +message GetEntryRequest { + required string key = 1; + required uint64 start = 2; + required uint64 length = 3; +}; + +message GetEntryResponse { + required int32 status_code = 1; + optional bytes data = 2; +}; + +message PutEntryRequest { + required string key = 1; + required uint64 length = 2; + optional bytes data = 3; +}; + +message PutEntryResponse { + required int32 status_code = 1; + optional string internal_key = 2; // for write cache +}; + +message DeleteEntryRequest { + required string key = 1; // actually 'prefix' + optional uint64 chunk_size = 2; + optional uint64 max_chunk_id = 3; +}; + +message DeleteEntryRequestForWriteCache { + required string key_prefix = 1; + required uint64 max_ts = 2; + repeated string except_keys = 3; +}; + +message DeleteEntryResponse { + required int32 status_code = 1; +}; + +message RegisterRequest { + // nothing +}; + +message QueryTsRequest { + // nothing +}; + +message QueryTsResponse { + required int32 status_code = 1; + required uint64 timestamp = 2; +}; + +message RegisterResponse { + required int32 status_code = 1; +}; + +service GlobalCacheService { + rpc GetEntryFromReadCache(GetEntryRequest) returns (GetEntryResponse); + rpc PutEntryFromReadCache(PutEntryRequest) returns (PutEntryResponse); + rpc DeleteEntryFromReadCache(DeleteEntryRequest) returns (DeleteEntryResponse); + + rpc GetEntryFromWriteCache(GetEntryRequest) returns (GetEntryResponse); + rpc PutEntryFromWriteCache(PutEntryRequest) returns (PutEntryResponse); + rpc DeleteEntryFromWriteCache(DeleteEntryRequestForWriteCache) returns (DeleteEntryResponse); + rpc QueryTsFromWriteCache(QueryTsRequest) returns (QueryTsResponse); + + rpc Register(RegisterRequest) returns (RegisterResponse); +}; diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..cde4575 --- /dev/null +++ b/install.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +if [ ! -d "./JYCache_Env" ]; then + wget https://madstorage.s3.cn-north-1.jdcloud-oss.com/JYCache_Env_x64.tgz + md5=`md5sum JYCache_Env_x64.tgz | awk {'print $1'}` + if [ "$md5" != "cd27e0db8b1fc33b88bf1c467ed012b8" ]; then +   echo 'JYCache_Env version inconsistency!' + exit 1 + fi + tar -zxvf JYCache_Env_x64.tgz +fi + +cp ./build/intercept/intercept_server JYCache_Env/ +cp ./build/intercept/libintercept_client.so JYCache_Env/ +cp ./build/global_cache/madfs_gc JYCache_Env/ +cp ./build/global_cache/madfs_global_server JYCache_Env/ +cp ./build/bin/s3fs JYCache_Env/ diff --git a/intercept/CMakeLists.txt b/intercept/CMakeLists.txt new file mode 100644 index 0000000..170886d --- /dev/null +++ b/intercept/CMakeLists.txt @@ -0,0 +1,34 @@ +link_libraries(-lrt) + +find_library(ICEORYX_POSH_LIB iceoryx_posh PATHS ../thirdparties/iceoryx/lib) +find_library(ICEORYX_HOOFS_LIB iceoryx_hoofs PATHS ../thirdparties/iceoryx/lib) +find_library(ICEORYX_PLATFORM_LIB iceoryx_platform PATHS ../thirdparties/iceoryx/lib) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2") + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../thirdparties/spdlog/include) + +add_subdirectory(common) +add_subdirectory(internal) +add_subdirectory(discovery) +add_subdirectory(filesystem) +add_subdirectory(registry) +add_subdirectory(middleware) +add_subdirectory(posix) + +add_executable(intercept_server server.cpp) +target_link_libraries(intercept_server PUBLIC intercept_discovery intercept_internal common_lib intercept_filesystem intercept_middleware intercept_registry hybridcache_local madfs_global s3fs_lib ${THIRD_PARTY_LIBRARIES} ${ICEORYX_POSH_LIB} ${ICEORYX_HOOFS_LIB} ${ICEORYX_PLATFORM_LIB} -pthread -lcurl -lxml2 -lcrypto -ldl -laio -lrt -lacl) + +add_library(intercept_client SHARED client.cpp) +target_link_libraries(intercept_client PUBLIC + intercept_posix_interface_client + -lsyscall_intercept + -pthread + ${ICEORYX_POSH_LIB} + ${ICEORYX_HOOFS_LIB} + ${ICEORYX_PLATFORM_LIB} + -lrt + -L${CMAKE_CURRENT_SOURCE_DIR}/../thirdparties/intercept/lib +) +target_compile_options(intercept_client PUBLIC -DCLIENT_BUILD -mavx2) diff --git a/intercept/client.cpp b/intercept/client.cpp new file mode 100644 index 0000000..04c14dc --- /dev/null +++ b/intercept/client.cpp @@ -0,0 +1,138 @@ + + + +// int main2(int argc, char *argv[]) { +// InitSyscall(); +// GlobalInit(); +// long args[6]; +// const char* pathname = "/curvefs/test_mount/testfile"; +// args[0] = (long)(pathname); +// args[1] = O_CREAT | O_WRONLY | O_TRUNC; +// args[2] = S_IRUSR | S_IWUSR; +// long result = 0; +// PosixOpOpen(args, &result); +// PosixOpAccess(args, &result); +// return 0; +// } + +#include "registry/client_server_registry.h" +// ! 暂时注释,使用时不能注释 +#include "posix/posix_helper.h" +using intercept::middleware::ReqResMiddlewareWrapper; +int main() { + constexpr char APP_NAME[] = "iox-intercept-client"; + iox::runtime::PoshRuntime::initRuntime(APP_NAME); + + intercept::internal::ServiceMetaInfo info; + info.service = SERVICE_FLAG; + info.instance = INTERCEPT_INSTANCE_FLAG; + intercept::registry::ClientServerRegistry registry(ICEORYX, info); + auto dummyserver = registry.CreateDummyServer(); + sleep(2); + + info = dummyserver->GetServiceMetaInfo(); + info.service = SERVICE_FLAG; + info.instance = INTERCEPT_INSTANCE_FLAG; + + std::shared_ptr wrapper = registry.CreateClient(info); + + intercept::internal::OpenOpReqRes req("/testdir/hellofile1", O_CREAT|O_RDWR, S_IRUSR | S_IWUSR); + wrapper->OnRequest(req); + const auto& openRes = static_cast (req.GetResponse()); + + + char* writebuf = (char*)malloc(sizeof(char) * 1024); + char str[] = "hello world"; + memcpy(writebuf, str, sizeof(str)); + intercept::internal::WriteOpReqRes writeReq(openRes.fd, writebuf, strlen(writebuf)); + wrapper->OnRequest(writeReq); + + // open and read + intercept::internal::OpenOpReqRes req2("/testdir/hellofile1", O_RDWR, S_IRUSR | S_IWUSR); + wrapper->OnRequest(req2); + const auto& openRes2 = static_cast (req2.GetResponse()); + char* buf2 = (char*)malloc(sizeof(char) * 1024); + + intercept::internal::ReadOpReqRes readReq2(openRes2.fd, buf2, 8); + wrapper->OnRequest(readReq2); + free((void*)buf2); + + dummyserver->StopServer(); + std::cout << "stop dummyserver in main" << std::endl; + //sleep(5); + return 0; +} + +int mainposix() { + char filename[256]; + + // 循环执行流程 + while (true) { + std::cout << "Enter filename (or 'exit' to quit): "; + std::cin >> filename; + + if (strcmp(filename, "exit") == 0) { + std::cout << "Exiting program..." << std::endl; + break; + } + + std::cout << "Enter 'write' to write to file, 'read' to read from file: "; + std::string operation; + std::cin >> operation; + + if (operation == "write") { + // 打开文件进行写入 + int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); + if (fd == -1) { + std::cerr << "Error: Failed to open file for writing." << std::endl; + continue; + } + + std::string content; + std::cout << "Enter content to write to file (end with 'EOF'): " << std::endl; + std::cin.ignore(); // 忽略前一个输入的换行符 + while (true) { + std::string line; + std::getline(std::cin, line); + if (line == "EOF") { + break; + } + content += line + "\n"; + } + + ssize_t bytes_written = write(fd, content.c_str(), content.size()); + std::cout << "the write byte: " << bytes_written << std::endl; + close(fd); + } else if (operation == "read") { + // 打开文件进行读取 + int fd = open(filename, O_RDONLY); + if (fd == -1) { + std::cerr << "Error: Failed to open file for reading." << std::endl; + continue; + } + + char buffer[4096]; + ssize_t bytesRead; + std::cout << "Content read from file:" << std::endl; + while ((bytesRead = read(fd, buffer, sizeof(buffer))) > 0) { + std::cout.write(buffer, bytesRead); + } + std::cout << std::endl; + + // 获取文件的状态信息 + struct stat fileStat; + if (fstat(fd, &fileStat) == 0) { + std::cout << "File size: " << fileStat.st_size << " bytes" << std::endl; + std::cout << "File permissions: " << (fileStat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) << std::endl; + } else { + std::cerr << "Error: Failed to get file status." << std::endl; + } + + close(fd); + } else { + std::cerr << "Error: Invalid operation. Please enter 'write' or 'read'." << std::endl; + } + } + + return 0; +} diff --git a/intercept/common/CMakeLists.txt b/intercept/common/CMakeLists.txt new file mode 100644 index 0000000..84081d6 --- /dev/null +++ b/intercept/common/CMakeLists.txt @@ -0,0 +1,8 @@ +file(GLOB COMMON_SOURCES *.cpp) + +add_library(common_lib ${COMMON_SOURCES}) +target_include_directories(common_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +add_library(common_lib_client ${COMMON_SOURCES}) +target_compile_options(common_lib_client PUBLIC -fPIC -DCLIENT_BUILD) +target_include_directories(common_lib_client PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/intercept/common/common.cpp b/intercept/common/common.cpp new file mode 100644 index 0000000..6ee255d --- /dev/null +++ b/intercept/common/common.cpp @@ -0,0 +1,175 @@ +#include + +#include "common.h" +#include "spdlog/sinks/basic_file_sink.h" +#include "spdlog/sinks/stdout_color_sinks.h" + + + +namespace intercept{ +namespace common { +void dummy() { + +} + + +// Constructor starts the timer +Timer::Timer() : m_startTimePoint(std::chrono::high_resolution_clock::now()) {} + +Timer::Timer(const std::string& message) : m_message(message), + m_startTimePoint(std::chrono::high_resolution_clock::now()) {} + +// Destructor prints the elapsed time if the timer hasn't been stopped manually +Timer::~Timer() { + if (!m_stopped) { + Stop(); + } + // std::cout << m_message << " Elapsed time: " << m_elapsedTime << " ms" << std::endl; +} + +// Method to stop the timer and return the elapsed time in milliseconds +void Timer::Stop() { + if (!m_stopped) { + auto endTimePoint = std::chrono::high_resolution_clock::now(); + auto start = std::chrono::time_point_cast(m_startTimePoint).time_since_epoch().count(); + auto end = std::chrono::time_point_cast(endTimePoint).time_since_epoch().count(); + + m_elapsedTime = end - start; + m_stopped = true; + if (m_elapsedTime > 0) { + // std::cout << m_message << ", Elapsed time: " << m_elapsedTime << " us" << std::endl; + spdlog::warn("{}, Elapsed time: {} us ", m_message, m_elapsedTime); + } + } +} + +// Method to get the elapsed time in microseconds +long long Timer::ElapsedMicroseconds() const { + return m_elapsedTime; +} + +// Method to restart the timer +void Timer::Restart() { + m_startTimePoint = std::chrono::high_resolution_clock::now(); + m_stopped = false; +} + +ThreadPool::ThreadPool(size_t numThreads) : stop(false) { + for (size_t i = 0; i < numThreads; ++i) { + workers.emplace_back([this] { + for (;;) { + std::function task; + { + std::unique_lock lock(this->queueMutex); + this->condition.wait(lock, [this] { return this->stop || !this->tasks.empty(); }); + if (this->stop && this->tasks.empty()) + return; + task = std::move(this->tasks.front()); + this->tasks.pop(); + } + task(); + } + }); + } +} + +ThreadPool::~ThreadPool() { + { + std::unique_lock lock(queueMutex); + stop = true; + } + condition.notify_all(); + for (std::thread &worker : workers) + worker.join(); +} + +void ThreadPool::enqueue(std::function task) { + { + std::unique_lock lock(queueMutex); + if (stop) + throw std::runtime_error("enqueue on stopped ThreadPool"); + tasks.emplace(task); + } + condition.notify_one(); +} + + +std::string generateRandomSuffix() { + // 使用当前时间作为随机数生成器的种子,以获得更好的随机性 + unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); + std::default_random_engine generator(seed); + std::uniform_int_distribution distribution(0, 25); // 生成0-25之间的整数,对应字母'a'到'z' + + std::string suffix; + suffix.reserve(5); // 假设我们想要生成5个随机字符的后缀 + for (size_t i = 0; i < 5; ++i) { + suffix += static_cast('a' + distribution(generator)); + } + return suffix; +} + +std::atomic running(true); +void UpdateLogLevelPeriodically(int intervalSeconds) { + auto& config = Configure::getInstance(); + while (running) { + std::this_thread::sleep_for(std::chrono::seconds(intervalSeconds)); + // std::cout << "reload the config: " << CONFIG_FILE << std::endl; + config.loadConfig(CONFIG_FILE); // Assuming this reloads the configuration + std::string loglevel = config.getConfig("loglevel"); + if (loglevel == "debug") { + spdlog::set_level(spdlog::level::debug); + } else if (loglevel == "warning") { + spdlog::set_level(spdlog::level::warn); + } else if (loglevel == "info") { + spdlog::set_level(spdlog::level::info); + } else if (loglevel == "error") { + spdlog::set_level(spdlog::level::err); + } else { + std::cerr << "Invalid log level specified in the config file" << std::endl; + } + } +} +void InitLog() { + const auto& config = Configure::getInstance(); + std::string pid = std::to_string((long)getpid()); + std::string logpath = config.getConfig("logpath") + "." + pid; + std::string loglevel = config.getConfig("loglevel"); + try + { + std::shared_ptr logger; + std::string printtype = config.getConfig("logprinttype"); + if (printtype == "console") { + logger = spdlog::stdout_color_mt("console"); + } else { + logger = spdlog::basic_logger_mt("basic_logger", logpath); + } + spdlog::set_default_logger(logger); + if (loglevel == "debug") { + spdlog::set_level(spdlog::level::debug); + } + else if (loglevel == "warning") { + spdlog::set_level(spdlog::level::warn); + } + else if (loglevel == "info") { + spdlog::set_level(spdlog::level::info); + } + else if (loglevel == "error") { + spdlog::set_level(spdlog::level::err); + } + else { + std::cerr << "Invalid log level specified in the config file" << std::endl; + } + //spdlog::set_pattern("[%H:%M:%S %z] [%n] [%^---%L---%$] [thread %t] %v"); + spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%n] [%l] [pid %P tid %t] %v"); + spdlog::flush_every(std::chrono::seconds(5)); + // Start the periodic log level updater thread + std::thread updateThread(UpdateLogLevelPeriodically, 5); // Check every 60 seconds + updateThread.detach(); // Detach the thread so it runs independently + } + catch (const spdlog::spdlog_ex &ex) { + std::cout << "Log init failed: " << ex.what() << std::endl; + } +} + +} // namespace common +} // namespace intercept diff --git a/intercept/common/common.h b/intercept/common/common.h new file mode 100644 index 0000000..00ebff2 --- /dev/null +++ b/intercept/common/common.h @@ -0,0 +1,143 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "spdlog/spdlog.h" +#include "spdlog/fmt/fmt.h" + +namespace intercept { +namespace common { + +#ifndef CLIENT_BUILD +const std::string CONFIG_FILE = "conf/server.conf"; +#else +const std::string CONFIG_FILE = "conf/client.conf"; +#endif + +using Ino = uint64_t; +struct DirStream { + Ino ino; + uint64_t fh; + uint64_t offset; +}; + +class Timer { +public: + // Constructor starts the timer + Timer(); + Timer(const std::string& message); + + // Destructor prints the elapsed time if the timer hasn't been stopped manually + ~Timer(); + + // Method to stop the timer and return the elapsed time in milliseconds + void Stop(); + + // Method to get the elapsed time in microseconds + long long ElapsedMicroseconds() const; + + // Method to restart the timer + void Restart(); + +private: + std::chrono::time_point m_startTimePoint; + long long m_elapsedTime = 0; + bool m_stopped = false; + std::string m_message; +}; + + +class Configure { +public: + // 获取单例实例的静态方法 + static Configure& getInstance() { + static Configure instance; + return instance; + } + + // 加载配置文件的方法 + bool loadConfig(const std::string& filePath) { + std::ifstream file(filePath); + if (!file.is_open()) { + std::cerr << "Failed to open config file: " << filePath << std::endl; + return false; + } + + std::string line; + while (std::getline(file, line)) { + // Ignore comments and empty lines + if (line.empty() || line[0] == '#') { + continue; + } + + std::istringstream iss(line); + std::string key, value; + + // Split line into key and value + if (std::getline(iss, key, '=') && std::getline(iss, value)) { + // Remove whitespace from the key and value + key.erase(key.find_last_not_of(" \t\n\r\f\v") + 1); + key.erase(0, key.find_first_not_of(" \t\n\r\f\v")); + value.erase(value.find_last_not_of(" \t\n\r\f\v") + 1); + value.erase(0, value.find_first_not_of(" \t\n\r\f\v")); + + configMap[key] = value; + } + } + + file.close(); + return true; + } + + // 获取配置值的方法 + std::string getConfig(const std::string& key) const { + auto it = configMap.find(key); + if (it != configMap.end()) { + return it->second; + } + return ""; + } + +private: + std::map configMap; // 存储配置键值对 + Configure() {} // 私有构造函数,防止外部直接实例化 + Configure(const Configure&) = delete; // 禁止拷贝构造 + Configure& operator=(const Configure&) = delete; // 禁止赋值操作 +}; + +class ThreadPool { +public: + ThreadPool(size_t numThreads = 30); + ~ThreadPool(); + void enqueue(std::function task); + +private: + std::vector workers; + std::queue> tasks; + std::mutex queueMutex; + std::condition_variable condition; + bool stop; +}; + + + + +std::string generateRandomSuffix(); +void InitLog(); + +} // namespace common +} // namespace intercept \ No newline at end of file diff --git a/intercept/discovery/CMakeLists.txt b/intercept/discovery/CMakeLists.txt new file mode 100644 index 0000000..e48f08a --- /dev/null +++ b/intercept/discovery/CMakeLists.txt @@ -0,0 +1,22 @@ +# discovery/CMakeLists.txt + +file(GLOB DISCOVERY_SOURCES *.cpp) + +find_library(ICEORYX_POSH_LIB NAMES iceoryx_posh PATHS ../../thirdparties/iceoryx/lib) +find_library(ICEORYX_HOOFS_LIB NAMES iceoryx_hoofs PATHS ../../thirdparties/iceoryx/lib) +find_library(ICEORYX_PLATFORM_LIB NAMES iceoryx_platform PATHS ../../thirdparties/iceoryx/lib) + +add_library(intercept_discovery ${DISCOVERY_SOURCES}) +target_include_directories(intercept_discovery PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include +) +target_link_libraries(intercept_discovery PUBLIC intercept_internal ${ICEORYX_POSH_LIB}) + +add_library(intercept_discovery_client ${DISCOVERY_SOURCES}) +target_include_directories(intercept_discovery_client PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include +) +target_compile_options(intercept_discovery_client PUBLIC -fPIC ) +target_link_libraries(intercept_discovery_client PUBLIC -lrt intercept_internal_client ${ICEORYX_POSH_LIB} ${ICEORYX_HOOFS_LIB} ${ICEORYX_PLATFORM_LIB} ) \ No newline at end of file diff --git a/intercept/discovery/discovery.h b/intercept/discovery/discovery.h new file mode 100644 index 0000000..59a8a91 --- /dev/null +++ b/intercept/discovery/discovery.h @@ -0,0 +1,66 @@ +#pragma once +#include +#include +#include +#include + +#include "internal/metainfo.h" + +namespace intercept { +namespace discovery { + +using intercept::internal::ServiceMetaInfo; + + + +// Discovery : use to discover the existing servers +// and the servers to be deleted +class Discovery { +public: + // Constructor + Discovery() { + // Initialization code + } + + // Initialize the discovery + virtual void Init() = 0; + + // Start the discovery loop + virtual void Start() = 0; + + // Stop the discovery loop + virtual void Stop() = 0; + + // Get the existing servers + virtual std::vector GetServers() const { + // Return the existing servers + return std::vector(); + } + + // Get the servers to be deleted + virtual std::set GetServersToDelete() const { + // Return the servers to be deleted + return std::set(); + } + + virtual std::vector FindServices(const ServiceMetaInfo& info) = 0; + + // Create a new server + virtual void CreateServer(const ServiceMetaInfo& serverInfo) { + // Create a new server using the serverInfo + } + + // Delete a server + virtual void DeleteServer(const ServiceMetaInfo& serverInfo) { + // Delete a server using the serverInfo + } + +protected: + std::vector existingServers; + std::set serversToDelete; + bool DISCOVERY_RUNNING; +}; + +} +} + diff --git a/intercept/discovery/iceoryx_discovery.cpp b/intercept/discovery/iceoryx_discovery.cpp new file mode 100644 index 0000000..de06b3c --- /dev/null +++ b/intercept/discovery/iceoryx_discovery.cpp @@ -0,0 +1,125 @@ +#include +#include + +#include "iceoryx_discovery.h" + + +#include "iox/signal_watcher.hpp" +#include "iceoryx_posh/runtime/posh_runtime.hpp" +#include "iceoryx_posh/runtime/service_discovery.hpp" + +namespace intercept { +namespace discovery { + +// TODO: Add your own discovery service implementation here +#define DISCOVERY_SERVICE_NAME "IceoryxDiscoveryService" +#define DISCOVERY_SERVICE_VERSION "1.0.0" +#define DISCOVERY_SERVICE_DESCRIPTION "IceoryxDiscoveryServiceDescription" +#define DISCOVERY_SERVICE_PROVIDER "IceoryxDiscoveryServiceProvider" +// constexpr char APP_NAME[] = "iox-discovery-service"; + +IceoryxDiscovery::IceoryxDiscovery() { + +} + +IceoryxDiscovery::~IceoryxDiscovery() { + // TODO: Clean up your discovery service implementation here +} + +void IceoryxDiscovery::Init() { + // TODO: Initialize your discovery service implementation here +} + +void IceoryxDiscovery::Start() { + // TODO: Start your discovery service implementation here + while (!iox::hasTerminationRequested()) { + // TODO: Implement discovery service logic here + const auto& servers = GetServers(); + const auto& newservers = GetNewServers(existingServers, servers); + for (auto& server : newservers) { + // TODO: Implement logic to handle new servers here + CreateServer(server); + } + + const auto& removedServers = GetRemovedServers(existingServers, servers); + for (auto& server : removedServers) { + // TODO: Implement logic to handle deleted servers here + DeleteServer(server); + } + existingServers = servers; + + } +} + +void IceoryxDiscovery::Stop() { + // TODO: Stop your discovery service implementation here +} + +std::vector IceoryxDiscovery::GetServers() const { + return {}; +} + +std::vector IceoryxDiscovery::GetNewServers(const std::vector& existingServers, const std::vector& newServers) { + std::vector newServersList; + return newServersList; +} + +std::set IceoryxDiscovery::GetRemovedServers(const std::vector& existingServers, const std::vector& newServers) { + std::set removedServersList; + return removedServersList; +} + +std::vector IceoryxDiscovery::FindServices(const ServiceMetaInfo& info) { + iox::capro::IdString_t serviceStr(iox::TruncateToCapacity, info.service.c_str()); + iox::capro::IdString_t instanceStr(iox::TruncateToCapacity, info.instance.c_str()); + iox::capro::IdString_t eventStr(iox::TruncateToCapacity, info.instance.c_str()); + + iox::optional service = serviceStr; + iox::optional instance = instanceStr; + iox::optional event = eventStr; + + if (info.service == "") { + //service = iox::capro::Wildcard; + service = iox::optional(iox::capro::Wildcard); + + } + if (info.instance == "") { + //instance = iox::capro::Wildcard; + instance = iox::optional(iox::capro::Wildcard); + + } + if (info.event == "") { + //event = iox::capro::Wildcard; + event = iox::optional(iox::capro::Wildcard); + } + + std::vector results; + serviceDiscovery_.findService(service, instance, event, + [&results](const iox::capro::ServiceDescription& serviceDescription) { + results.push_back(serviceDescription); + }, + iox::popo::MessagingPattern::REQ_RES + ); + std::vector metainfos; + for (const iox::capro::ServiceDescription& result : results) { + ServiceMetaInfo metaInfo; + metaInfo.service = result.getServiceIDString().c_str(); + metaInfo.instance = result.getInstanceIDString().c_str(); + metaInfo.event = result.getEventIDString().c_str(); + metainfos.push_back(metaInfo); + // std::cout << "Found service: " << metaInfo.service + // << " instance: " << metaInfo.instance << " event: " << metaInfo.event << std::endl; + } + return metainfos; +} + +void IceoryxDiscovery::CreateServer(const ServiceMetaInfo& server) { + // TODO: Implement logic to handle new servers here +} + +void IceoryxDiscovery::DeleteServer(const ServiceMetaInfo& server) { + +} + +} // namespace discovery +} // namespace intercept \ No newline at end of file diff --git a/intercept/discovery/iceoryx_discovery.h b/intercept/discovery/iceoryx_discovery.h new file mode 100644 index 0000000..96be187 --- /dev/null +++ b/intercept/discovery/iceoryx_discovery.h @@ -0,0 +1,41 @@ +#pragma once +#include "discovery.h" +#include "iceoryx_posh/runtime/service_discovery.hpp" + +namespace intercept { +namespace discovery { + +class IceoryxDiscovery : public Discovery +{ +public: + IceoryxDiscovery(); + + virtual ~IceoryxDiscovery(); + + virtual void Init(); + + virtual void Start(); + + virtual void Stop(); + + virtual std::vector GetServers() const; + + virtual std::vector GetNewServers(const std::vector& oldservers, + const std::vector& newservers); + + virtual std::set GetRemovedServers( + const std::vector& oldservers, const std::vector& newservers); + + virtual std::vector FindServices(const ServiceMetaInfo& info); + + virtual void CreateServer(const ServiceMetaInfo& serverInfo); + + virtual void DeleteServer(const ServiceMetaInfo& serverInfo); + +private: + iox::runtime::ServiceDiscovery serviceDiscovery_; +}; + +} // namespace discovery +} // namespace intercept + diff --git a/intercept/filesystem/CMakeLists.txt b/intercept/filesystem/CMakeLists.txt new file mode 100644 index 0000000..aeaeffe --- /dev/null +++ b/intercept/filesystem/CMakeLists.txt @@ -0,0 +1,28 @@ +find_library(ICEORYX_POSH_LIB iceoryx_posh PATHS ../thirdparties/iceoryx/lib) +find_library(ICEORYX_HOOFS_LIB iceoryx_hoofs PATHS ../thirdparties/iceoryx/lib) +find_library(ICEORYX_PLATFORM_LIB iceoryx_platform PATHS ../thirdparties/iceoryx/lib) + +file(GLOB FILESYSTEM_SOURCES *.cpp) +file(GLOB FILESYSTEM_HEADERS *.h) + +add_library(intercept_filesystem ${FILESYSTEM_SOURCES}) +target_include_directories(intercept_filesystem PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(intercept_filesystem PUBLIC + ${ICEORYX_POSH_LIB} ${ICEORYX_HOOFS_LIB} ${ICEORYX_PLATFORM_LIB} + hybridcache_local madfs_global s3fs_lib ${THIRD_PARTY_LIBRARIES} common_lib + -pthread + -lcurl + -lxml2 + -lcrypto + -ldl + -laio + -lrt +) + +add_library(intercept_filesystem_client INTERFACE) +target_include_directories(intercept_filesystem_client INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(intercept_filesystem_client INTERFACE + common_lib_client + -lrt +) +target_compile_options(intercept_filesystem_client INTERFACE -DCLIENT_BUILD) \ No newline at end of file diff --git a/intercept/filesystem/abstract_filesystem.h b/intercept/filesystem/abstract_filesystem.h new file mode 100644 index 0000000..7dc3c9b --- /dev/null +++ b/intercept/filesystem/abstract_filesystem.h @@ -0,0 +1,57 @@ +#ifndef ABSTRACT_FILESYSTEM_H +#define ABSTRACT_FILESYSTEM_H + +#include +#include +#include +#include +#include + +#include "common/common.h" + +namespace intercept { +namespace filesystem { + +using intercept::common::DirStream; +class AbstractFileSystem { +public: + virtual ~AbstractFileSystem() {} + virtual void Init() = 0; + virtual void Shutdown() = 0; + virtual int Open(const char* path, int flags, int mode) = 0; + virtual ssize_t Read(int fd, void* buf, size_t count) = 0; + virtual ssize_t Write(int fd, const void* buf, size_t count) = 0; + virtual int Close(int fd) = 0; + virtual off_t Lseek(int fd, off_t offset, int whence) = 0; + virtual int Stat(const char* path, struct stat* st) = 0; + virtual int Fstat(int fd, struct stat* st) = 0; + virtual int Fsync(int fd) = 0; + virtual int Truncate(const char* path, off_t length) = 0; + virtual int Ftruncate(int fd, off_t length) = 0; + virtual int Unlink(const char* path) = 0; + virtual int Mkdir(const char* path, mode_t mode) = 0; + virtual int Opendir(const char* path, DirStream* dirstream) = 0; + virtual int Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes) = 0; + virtual int Closedir(DirStream* dirstream) = 0; + virtual int Rmdir(const char* path) = 0; + virtual int Chmod(const char* path, mode_t mode) = 0; + virtual int Chown(const char* path, uid_t owner, gid_t group) = 0; + virtual int Rename(const char* oldpath, const char* newpath) = 0; + virtual int Link(const char* oldpath, const char* newpath) = 0; + virtual int Symlink(const char* oldpath, const char* newpath) = 0; + virtual int Readlink(const char* path, char* buf, size_t bufsize) = 0; + virtual int Utime(const char* path, const struct utimbuf* times) = 0; + + virtual ssize_t MultiRead(int fd, void* buf, size_t count) {} + virtual ssize_t MultiWrite(int fd, const void* buf, size_t count) {} + +protected: + virtual std::string NormalizePath(const std::string& path) = 0; +}; + +} // namespace filesystem +} // namespace intercept + + + +#endif // ABSTRACT_FILESYSTEM_H diff --git a/intercept/filesystem/curve_filesystem.cpp b/intercept/filesystem/curve_filesystem.cpp new file mode 100644 index 0000000..67aea1b --- /dev/null +++ b/intercept/filesystem/curve_filesystem.cpp @@ -0,0 +1,166 @@ +#include +#include + +#include "curve_filesystem.h" +#include "libcurvefs_external.h" + +namespace intercept { +namespace filesystem { + +#define POSIX_SET_ATTR_SIZE (1 << 3) +CurveFileSystem::CurveFileSystem() {} +CurveFileSystem::~CurveFileSystem() { + curvefs_release(instance_); +} + +void CurveFileSystem::Init() { + instance_ = curvefs_create(); + curvefs_load_config(instance_, "./curve_posix_client.conf"); + //curvefs_mount(instance_, "s3cy1", "/tmp/curvefs"); + curvefs_mount(instance_, "s3cy1", "/"); + std::cout << "finish curvefs create" << std::endl; +} + +void CurveFileSystem::Shutdown() { + +} + +int CurveFileSystem::Open(const char* path, int flags, int mode) { + std::cout << "open, the path: " << path << std::endl; + int ret = curvefs_open(instance_, path, flags, mode); + // 注意,EEXIST为17, 那么当fd ret也是17时 是不是就会判断错误。 + if (ret == EEXIST) { // 不去创建 + ret = curvefs_open(instance_, path, flags & ~O_CREAT, mode); + } + //std::cout << "the path: " << path << " , the stat: " << tmp.st_size << " , the time: " << tmp.st_mtime << std::endl; + return ret; +} + +ssize_t CurveFileSystem::Read(int fd, void* buf, size_t count) { + int ret = curvefs_read(instance_, fd, (char*)buf, count); + //int ret = count; + //std::cout << "read, the fd: " << fd << " the buf: " << (char*)buf << ", the count: " << count << ", the ret: " << ret << std::endl; + return ret; +} + +ssize_t CurveFileSystem::Write(int fd, const void* buf, size_t count) { + int ret = curvefs_write(instance_, fd, (char*)buf, count); + //int ret = count; + //std::cout << "write, the fd: " << fd << " the buf: " << (char*)buf << ", the count: " << count << ", the ret: " << ret << std::endl; + return ret; +} + +int CurveFileSystem::Close(int fd) { + int ret = curvefs_close(instance_, fd); + std::cout << "curve close, the fd: " << fd << std::endl; + return ret; +} + +off_t CurveFileSystem::Lseek(int fd, off_t offset, int whence) { + int ret = curvefs_lseek(instance_, fd, offset, whence); + std::cout << "curve lseek, the fd: " << fd << ", the offset: " << offset << ", the whence: " << whence << ", the ret: " << ret << std::endl; + return ret; +} + +int CurveFileSystem::Stat(const char* path, struct stat* st) { + int ret = curvefs_lstat(instance_, path, st); + return ret; +} + +int CurveFileSystem::Fstat(int fd, struct stat* st) { + int ret = curvefs_fstat(instance_, fd, st); + return ret; +} + +int CurveFileSystem::Fsync(int fd) { + int ret = curvefs_fsync(instance_, fd); + return ret; +} + +int CurveFileSystem::Ftruncate(int fd, off_t length) { + throw std::runtime_error("未实现"); +} + +int CurveFileSystem::Unlink(const char* path) { + int ret = curvefs_unlink(instance_, path); + std::cout << "unlink, the path: " << path << ", the ret: " << ret << std::endl; + return ret; +} + +int CurveFileSystem::Mkdir(const char* path, mode_t mode) { + int ret = curvefs_mkdir(instance_, path, mode); + std::cout << "mkdir, the path: " << path << ", the mode: " << mode << ", the ret: " << ret << std::endl; + return ret; +} + +int CurveFileSystem::Opendir(const char* path, DirStream* dirstream) { + int ret = curvefs_opendir(instance_, path, (dir_stream_t*)dirstream); + std::cout << "opendir, the path: " << path << ", the dirstream ino: " << dirstream->ino << ", the ret: " << ret << std::endl; + return ret; +} + +int CurveFileSystem::Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes) { + int ret = curvefs_getdents(instance_, (dir_stream_t*)dirstream, contents, maxread, realbytes); + std::cout << "getdents, the dirstream ino: " << dirstream->ino << ", the maxread: " << maxread << ", the realbytes: " << realbytes << ", the ret: " << ret << std::endl; + return ret; +} + +int CurveFileSystem::Closedir(DirStream* dirstream) { + int ret = curvefs_closedir(instance_, (dir_stream_t*)dirstream);; + std::cout << "closedir, the fd: " << dirstream->fh << " ino:" << dirstream->ino << std::endl; + return ret; +} + +int CurveFileSystem::Rmdir(const char* path) { + int ret = curvefs_rmdir(instance_, path); + std::cout << "rmdir, the path: " << path << ", the ret: " << ret << std::endl; + return ret; +} + +int CurveFileSystem::Rename(const char* oldpath, const char* newpath) { + int ret = curvefs_rename(instance_, oldpath, newpath); + std::cout << "rename, the oldpath: " << oldpath << ", the newpath: " << newpath << ", the ret: " << ret << std::endl; + return ret; + +} +int CurveFileSystem::Link(const char* oldpath, const char* newpath) { + throw std::runtime_error("未实现"); +} + +int CurveFileSystem::Symlink(const char* oldpath, const char* newpath) { + throw std::runtime_error("未实现"); +} + +int CurveFileSystem::Readlink(const char* path, char* buf, size_t bufsize) { + throw std::runtime_error("未实现"); +} + +int CurveFileSystem::Chmod(const char* path, mode_t mode) { + throw std::runtime_error("未实现"); +} + +int CurveFileSystem::Chown(const char* path, uid_t uid, gid_t gid) { + throw std::runtime_error("未实现"); +} + +int CurveFileSystem::Truncate(const char* path, off_t length) { + struct stat attr; + attr.st_size = length; + int set = POSIX_SET_ATTR_SIZE ; + int ret = curvefs_setattr(instance_, path, &attr, set); + return ret; +} + +int CurveFileSystem::Utime(const char* path, const struct utimbuf* ubuf) { + throw std::runtime_error("未实现"); +} + + + +std::string CurveFileSystem::NormalizePath(const std::string& path) { + throw std::runtime_error("未实现"); +} + + +} // namespace filesystem +} // namespace intercept diff --git a/intercept/filesystem/curve_filesystem.h b/intercept/filesystem/curve_filesystem.h new file mode 100644 index 0000000..306b802 --- /dev/null +++ b/intercept/filesystem/curve_filesystem.h @@ -0,0 +1,47 @@ +#ifndef CURVE_FILESYSTEM_H +#define CURVE_FILESYSTEM_H + +#include "abstract_filesystem.h" +namespace intercept { +namespace filesystem { +class CurveFileSystem : public AbstractFileSystem { +public: + CurveFileSystem(); + ~CurveFileSystem() override; + void Init() override; + void Shutdown() override; + int Open(const char* path, int flags, int mode) override; + ssize_t Read(int fd, void* buf, size_t count) override; + ssize_t Write(int fd, const void* buf, size_t count) override; + int Close(int fd) override; + off_t Lseek(int fd, off_t offset, int whence) override; + int Stat(const char* path, struct stat* st) override; + int Fstat(int fd, struct stat* st) override; + int Fsync(int fd) override; + int Ftruncate(int fd, off_t length) override; + int Unlink(const char* path) override; + int Mkdir(const char* path, mode_t mode) override; + int Opendir(const char* path, DirStream* dirstream); + int Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes); + int Closedir(DirStream* dirstream); + int Rmdir(const char* path) override; + int Rename(const char* from, const char* to) override; + int Link(const char* from, const char* to) override; + int Symlink(const char* from, const char* to) override; + int Readlink(const char* path, char* buf, size_t bufsize) override; + int Chmod(const char* path, mode_t mode) override; + int Chown(const char* path, uid_t uid, gid_t gid) override; + int Truncate(const char* path, off_t length) override; + int Utime(const char* path, const struct utimbuf* times) override; + + +protected: + std::string NormalizePath(const std::string& path) override; + uintptr_t instance_; +}; + +} // namespace filesystem +} // namespace intercept + + +#endif // CURVE_FILESYSTEM_H diff --git a/intercept/filesystem/dummy_filesystem.cpp b/intercept/filesystem/dummy_filesystem.cpp new file mode 100644 index 0000000..79825bf --- /dev/null +++ b/intercept/filesystem/dummy_filesystem.cpp @@ -0,0 +1,186 @@ +#include +#include + +#include "dummy_filesystem.h" + +namespace intercept { +namespace filesystem{ + +std::size_t g_size = 10240000000; +char* DummyFileSystem::memory_ = nullptr; + +DummyFileSystem::DummyFileSystem() +{ + if (memory_ == nullptr) { + memory_ = new char[g_size]; + //memset(memory_, 'j', g_size); + std::cout << "Memory allocated for shared_memory" << std::endl; + } + std::cout << "DummyFileSystem created" << std::endl; +} + +DummyFileSystem::~DummyFileSystem() +{ + std::cout << "DummyFileSystem destroyed, copy num: " << copynum_ << std::endl; + if (memory_ != nullptr) { + delete[] memory_; + memory_ = nullptr; + std::cout << "Memory deallocated for shared_memory" << std::endl; + } +} + +void DummyFileSystem::Init() { + std::cout << "DummyFileSystem Init" << std::endl; +} + +void DummyFileSystem::Shutdown() { + std::cout << "DummyFileSystem Shutdown" << std::endl; +} + +int DummyFileSystem::Open(const char* path, int flags, int mode) { + fd_.fetch_add(1); + std::cout << "DummyFileSystem Open: " << path << " ret: " << fd_.load() << std::endl; + + return fd_.load(); +} + +char buffer[] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; +ssize_t DummyFileSystem::Read(int fd, void* buf, size_t count) { + // std::cout << "DummyFileSystem Read: " << fd << std::endl; + offset_ += count; + if (offset_ > g_size - count) { + // std::cout << "begin offset_: " << offset_ << " g_size: "<< g_size << ", count: " << count << std::endl; + offset_ = offset_ % (g_size - 10000000); + // std::cout << "after offset_: " << offset_ << std::endl; + } + if (offset_ < (g_size - 10000000)) { + memcpy((char*)buf, memory_ + offset_, count); + // memcpy((char*)buf, buffer, count); + } else { + memcpy((char*)buf, memory_ + 128, count); + // memcpy((char*)buf, buffer, count); + } + copynum_++; + return count; +} + +ssize_t DummyFileSystem::Write(int fd, const void* buf, size_t count) { + std::cout << "DummyFileSystem Write: " << fd << ", count: " << count << std::endl; + //memcpy(memory_ + offset_, buf, count); + return count; +} + +int DummyFileSystem::Close(int fd) { + std::cout << "DummyFileSystem Close: " << fd << " ,copynum_ :" << copynum_ << std::endl; + return 0; +} + + +off_t DummyFileSystem::Lseek(int fd, off_t offset, int whence) { + std::cout << "DummyFileSystem Lseek: " << fd << std::endl; + + if (offset_ > g_size - 10000000) { + offset_ = offset_ % (g_size-10000000); + } else { + offset_ = offset; + } + return 0; +} + +int DummyFileSystem::Stat(const char* path, struct stat* buf) { + buf->st_ino = 111111; + std::cout << "DummyFileSystem Stat: " << path << std::endl; + return 0; +} + +int DummyFileSystem::Fstat(int fd, struct stat* buf) { + std::cout << "DummyFileSystem Fstat: " << fd << std::endl; + return 0; +} + +int DummyFileSystem::Fsync(int fd) { + std::cout << "DummyFileSystem Fsync: " << fd << std::endl; + return 0; +} + +int DummyFileSystem::Ftruncate(int fd, off_t length) { + std::cout << "DummyFileSystem Ftruncate: " << fd << std::endl; + return 0; +} + + +int DummyFileSystem::Unlink(const char* path) { + std::cout << "DummyFileSystem Unlink: " << path << std::endl; + return 0; +} + + +int DummyFileSystem::Mkdir(const char* path, mode_t mode) { + std::cout << "DummyFileSystem Mkdir: " << path << std::endl; + return 0; +} + +int DummyFileSystem::Opendir(const char* path, DirStream* dirstream) { + std::cout << "DummyFileSystem Opendir: " << path << std::endl; + return 0; +} + +int DummyFileSystem::Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes) { + std::cout << "DummyFileSystem getdentes: " << std::endl; + return 0; +} + + +int DummyFileSystem::Closedir(DirStream* dirstream) { + std::cout << "DummyFileSystem Closedir: " << std::endl; + return 0; +} + +int DummyFileSystem::Rmdir(const char* path) { + std::cout << "DummyFileSystem Rmdir: " << path << std::endl; + return 0; +} + +int DummyFileSystem::Rename(const char* oldpath, const char* newpath) { + std::cout << "DummyFileSystem Rename: " << oldpath << " to " << newpath << std::endl; + return 0; +} + +int DummyFileSystem::Link(const char* oldpath, const char* newpath) { + std::cout << "DummyFileSystem Link: " << oldpath << " to " << newpath << std::endl; + return 0; +} + +int DummyFileSystem::Symlink(const char* oldpath, const char* newpath) { + std::cout << "DummyFileSystem Symlink: " << oldpath << std::endl; + return 0; +} + +int DummyFileSystem::Readlink(const char* path, char* buf, size_t bufsize) { + throw std::runtime_error("未实现"); +} + +int DummyFileSystem::Chmod(const char* path, mode_t mode) { + throw std::runtime_error("未实现"); +} + +int DummyFileSystem::Chown(const char* path, uid_t uid, gid_t gid) { + throw std::runtime_error("未实现"); +} + +int DummyFileSystem::Truncate(const char* path, off_t length) { + return 0; +} + +int DummyFileSystem::Utime(const char* path, const struct utimbuf* ubuf) { + throw std::runtime_error("未实现"); +} + + + +std::string DummyFileSystem::NormalizePath(const std::string& path) { + throw std::runtime_error("未实现"); +} + +} // namespace intercept +} // namespace filesystem diff --git a/intercept/filesystem/dummy_filesystem.h b/intercept/filesystem/dummy_filesystem.h new file mode 100644 index 0000000..aa0b02b --- /dev/null +++ b/intercept/filesystem/dummy_filesystem.h @@ -0,0 +1,50 @@ +#ifndef DUMMY_FILESYSTEM_H +#define DUMMY_FILESYSTEM_H +#include + +#include "abstract_filesystem.h" +namespace intercept { +namespace filesystem { +class DummyFileSystem : public AbstractFileSystem { +public: + DummyFileSystem(); + ~DummyFileSystem() override; + void Init() override; + void Shutdown() override; + int Open(const char* path, int flags, int mode) override; + ssize_t Read(int fd, void* buf, size_t count) override; + ssize_t Write(int fd, const void* buf, size_t count) override; + int Close(int fd) override; + off_t Lseek(int fd, off_t offset, int whence) override; + int Stat(const char* path, struct stat* st) override; + int Fstat(int fd, struct stat* st) override; + int Fsync(int fd) override; + int Ftruncate(int fd, off_t length) override; + int Unlink(const char* path) override; + int Mkdir(const char* path, mode_t mode) override; + int Opendir(const char* path, DirStream* dirstream); + int Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes); + int Closedir(DirStream* dirstream); + int Rmdir(const char* path) override; + int Rename(const char* from, const char* to) override; + int Link(const char* from, const char* to) override; + int Symlink(const char* from, const char* to) override; + int Readlink(const char* path, char* buf, size_t bufsize) override; + int Chmod(const char* path, mode_t mode) override; + int Chown(const char* path, uid_t uid, gid_t gid) override; + int Truncate(const char* path, off_t length) override; + int Utime(const char* path, const struct utimbuf* times) override; + + +protected: + std::string NormalizePath(const std::string& path) override; + uintptr_t instance_; + std::atomic fd_ = 0; + off_t offset_ = 0; + long copynum_ = 0; + static char* memory_; +}; + +} // namespace filesystem +} // namespace intercept +#endif // DUMMY_FILESYSTEM_H \ No newline at end of file diff --git a/intercept/filesystem/libcurvefs_external.cpp b/intercept/filesystem/libcurvefs_external.cpp new file mode 100644 index 0000000..5cb3078 --- /dev/null +++ b/intercept/filesystem/libcurvefs_external.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2023 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Project: Curve + * Created Date: 2023-07-12 + * Author: Jingli Chen (Wine93) + */ + + +#include +#include +#include +#include + +#include "libcurvefs_external.h" + + +uintptr_t curvefs_create() {return 0;} + +void curvefs_load_config(uintptr_t instance_ptr, + const char* config_file) {} + +void curvefs_release(uintptr_t instance_ptr) {} + +// NOTE: instance_ptr is the pointer of curvefs_mount_t instance. +void curvefs_conf_set(uintptr_t instance_ptr, + const char* key, + const char* value) {} + +int curvefs_mount(uintptr_t instance_ptr, + const char* fsname, + const char* mountpoint) {return 0;} + +int curvefs_umonut(uintptr_t instance_ptr, + const char* fsname, + const char* mountpoint) {return 0;} + +// directory +int curvefs_mkdir(uintptr_t instance_ptr, const char* path, uint16_t mode) {return 0;} + +int curvefs_mkdirs(uintptr_t instance_ptr, const char* path, uint16_t mode) {return 0;} + +int curvefs_rmdir(uintptr_t instance_ptr, const char* path) {return 0;} + +int curvefs_opendir(uintptr_t instance_ptr, + const char* path, + dir_stream_t* dir_stream) {return 0;} + +ssize_t curvefs_readdir(uintptr_t instance_ptr, + dir_stream_t* dir_stream, + dirent_t* dirent) {return 0;} + +int curvefs_getdents(uintptr_t instance_ptr, + dir_stream_t* dir_stream, + char* data, size_t maxread, ssize_t* realbytes) {return 0;} + +int curvefs_closedir(uintptr_t instance_ptr, dir_stream_t* dir_stream) {return 0;} + +// file +int curvefs_open(uintptr_t instance_ptr, + const char* path, + uint32_t flags, + uint16_t mode) {return 0;} + +int curvefs_lseek(uintptr_t instance_ptr, + int fd, + uint64_t offset, + int whence){return 0;} + +ssize_t curvefs_read(uintptr_t instance_ptr, + int fd, + char* buffer, + size_t count) {return 0;} + +ssize_t curvefs_write(uintptr_t instance_ptr, + int fd, + char* buffer, + size_t count) {return 0;} + +int curvefs_fsync(uintptr_t instance_ptr, int fd) {return 0;} + +int curvefs_close(uintptr_t instance_ptr, int fd) {return 0;} + +int curvefs_unlink(uintptr_t instance_ptr, const char* path) {return 0;} + +// others +int curvefs_statfs(uintptr_t instance_ptr, struct statvfs* statvfs) {return 0;} + +int curvefs_lstat(uintptr_t instance_ptr, const char* path, struct stat* stat) {return 0;} + +int curvefs_fstat(uintptr_t instance_ptr, int fd, struct stat* stat) {return 0;} + +int curvefs_setattr(uintptr_t instance_ptr, + const char* path, + struct stat* stat, + int to_set) {return 0;} + +int curvefs_chmod(uintptr_t instance_ptr, const char* path, uint16_t mode) {return 0;} + +int curvefs_chown(uintptr_t instance_ptr, + const char* path, + uint32_t uid, + uint32_t gid) {return 0;} + +int curvefs_rename(uintptr_t instance_ptr, + const char* oldpath, + const char* newpath) {return 0;} diff --git a/intercept/filesystem/libcurvefs_external.h b/intercept/filesystem/libcurvefs_external.h new file mode 100644 index 0000000..7909596 --- /dev/null +++ b/intercept/filesystem/libcurvefs_external.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2023 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Project: Curve + * Created Date: 2023-07-12 + * Author: Jingli Chen (Wine93) + */ + +#ifndef CURVEFS_SDK_LIBCURVEFS_LIBCURVEFS_H_ +#define CURVEFS_SDK_LIBCURVEFS_LIBCURVEFS_H_ + +#include +#include +#include +#include + + +// Must be synchronized with DirStream if changed +typedef struct { + uint64_t ino; + uint64_t fh; + uint64_t offset; +} dir_stream_t; + +typedef struct { + struct stat stat; + char name[256]; +} dirent_t; + +#ifdef __cplusplus +extern "C" { +#endif + +uintptr_t curvefs_create(); + +void curvefs_load_config(uintptr_t instance_ptr, + const char* config_file); + +void curvefs_release(uintptr_t instance_ptr); + +// NOTE: instance_ptr is the pointer of curvefs_mount_t instance. +void curvefs_conf_set(uintptr_t instance_ptr, + const char* key, + const char* value); + +int curvefs_mount(uintptr_t instance_ptr, + const char* fsname, + const char* mountpoint); + +int curvefs_umonut(uintptr_t instance_ptr, + const char* fsname, + const char* mountpoint); + +// directory +int curvefs_mkdir(uintptr_t instance_ptr, const char* path, uint16_t mode); + +int curvefs_mkdirs(uintptr_t instance_ptr, const char* path, uint16_t mode); + +int curvefs_rmdir(uintptr_t instance_ptr, const char* path); + +int curvefs_opendir(uintptr_t instance_ptr, + const char* path, + dir_stream_t* dir_stream); + +ssize_t curvefs_readdir(uintptr_t instance_ptr, + dir_stream_t* dir_stream, + dirent_t* dirent); + +int curvefs_getdents(uintptr_t instance_ptr, + dir_stream_t* dir_stream, + char* data, size_t maxread, ssize_t* realbytes); + +int curvefs_closedir(uintptr_t instance_ptr, dir_stream_t* dir_stream); + +// file +int curvefs_open(uintptr_t instance_ptr, + const char* path, + uint32_t flags, + uint16_t mode); + +int curvefs_lseek(uintptr_t instance_ptr, + int fd, + uint64_t offset, + int whence); + +ssize_t curvefs_read(uintptr_t instance_ptr, + int fd, + char* buffer, + size_t count); + +ssize_t curvefs_write(uintptr_t instance_ptr, + int fd, + char* buffer, + size_t count); + +int curvefs_fsync(uintptr_t instance_ptr, int fd); + +int curvefs_close(uintptr_t instance_ptr, int fd); + +int curvefs_unlink(uintptr_t instance_ptr, const char* path); + +// others +int curvefs_statfs(uintptr_t instance_ptr, struct statvfs* statvfs); + +int curvefs_lstat(uintptr_t instance_ptr, const char* path, struct stat* stat); + +int curvefs_fstat(uintptr_t instance_ptr, int fd, struct stat* stat); + +int curvefs_setattr(uintptr_t instance_ptr, + const char* path, + struct stat* stat, + int to_set); + +int curvefs_chmod(uintptr_t instance_ptr, const char* path, uint16_t mode); + +int curvefs_chown(uintptr_t instance_ptr, + const char* path, + uint32_t uid, + uint32_t gid); + +int curvefs_rename(uintptr_t instance_ptr, + const char* oldpath, + const char* newpath); +#ifdef __cplusplus +} +#endif + +#endif // CURVEFS_SDK_LIBCURVEFS_LIBCURVEFS_H_ diff --git a/intercept/filesystem/s3fs_filesystem.cpp b/intercept/filesystem/s3fs_filesystem.cpp new file mode 100644 index 0000000..9dabc1a --- /dev/null +++ b/intercept/filesystem/s3fs_filesystem.cpp @@ -0,0 +1,222 @@ +#include +#include +#include "spdlog/spdlog.h" + +#include "s3fs_filesystem.h" +#include "s3fs_lib.h" + +namespace intercept { +namespace filesystem { + +S3fsFileSystem::S3fsFileSystem() { + +} + +S3fsFileSystem::~S3fsFileSystem() { + s3fs_global_uninit(); +} + + +void S3fsFileSystem::Init() { + s3fs_global_init(); +} + +void S3fsFileSystem::Shutdown() { + std::cout << "S3fsFileSystem::Shutdown" << std::endl; +} + +int S3fsFileSystem::Open(const char* path, int flags, int mode) { + // std::cout << "S3fsFileSystem::Open: " << path << std::endl; + spdlog::info("S3fsFileSystem::Open:{}", path); + return posix_s3fs_open(path, flags, mode); +} + +ssize_t S3fsFileSystem::MultiRead(int fd, void* buf, size_t count) { + intercept::common::Timer timer("server S3fsFileSystem::MultiRead"); + int numThreads = intercept::common::Configure::getInstance().getConfig("opThreadnum") == "" ? + 1 : atoi(intercept::common::Configure::getInstance().getConfig("opThreadnum").c_str()); + size_t partSize = count / numThreads; // Part size for each thread + size_t remaining = count % numThreads; // Remaining part + + std::vector threads; + char* charBuf = static_cast(buf); + + std::atomic totalBytesRead(0); // Atomic variable to accumulate bytes read + std::mutex readMutex; // Mutex to protect shared variable + + for (size_t i = 0; i < numThreads; ++i) { + size_t offset = i * partSize; + size_t size = (i == numThreads - 1) ? (partSize + remaining) : partSize; + threads.emplace_back([=, &totalBytesRead, &readMutex]() { + ssize_t bytesRead = posix_s3fs_multiread(fd, charBuf + offset, size, offset); + spdlog::debug("S3fsFileSystem::MultiRead, fd: {}, offset: {}, size: {}, bytesRead: {}", fd, offset, size, bytesRead); + std::lock_guard lock(readMutex); + totalBytesRead += bytesRead; + }); + } + for (auto& th : threads) { + th.join(); + } + posix_s3fs_lseek(fd, totalBytesRead.load(), SEEK_CUR); + spdlog::info("S3fsFileSystem::MultiRead, read bytes: {}", totalBytesRead.load()); + return totalBytesRead.load(); // Return the total bytes read +} + +ssize_t S3fsFileSystem::Read(int fd, void* buf, size_t count) { + // std::cout << "S3fsFileSystem::Read: " << fd << std::endl; + spdlog::debug("S3fsFileSystem::Read, fd: {}, count: {}", fd, count); + return posix_s3fs_read(fd, buf, count); +} + +ssize_t S3fsFileSystem::MultiWrite(int fd, const void* buf, size_t count) { + intercept::common::Timer timer("server S3fsFileSystem::MultiWrite"); + int numThreads = intercept::common::Configure::getInstance().getConfig("opThreadnum") == "" ? + 1 : atoi(intercept::common::Configure::getInstance().getConfig("opThreadnum").c_str()); + size_t partSize = count / numThreads; // Part size for each thread + size_t remaining = count % numThreads; // Remaining part + + std::vector threads; + const char* charBuf = static_cast(buf); + + std::atomic totalBytesWrite(0); // Atomic variable to accumulate bytes write + std::mutex writeMutex; // Mutex to protect shared variable + + for (size_t i = 0; i < numThreads; ++i) { + size_t offset = i * partSize; + size_t size = (i == numThreads - 1) ? (partSize + remaining) : partSize; + threads.emplace_back([=, &totalBytesWrite, &writeMutex]() { + ssize_t bytesWrite = posix_s3fs_multiwrite(fd, charBuf + offset, size, offset); + spdlog::debug("finish S3fsFileSystem::Multiwrite, fd: {}, offset: {}, size: {}, bytesRead: {}", fd, offset, size, bytesWrite); + std::lock_guard lock(writeMutex); + totalBytesWrite += bytesWrite; + }); + } + for (auto& th : threads) { + th.join(); + } + posix_s3fs_lseek(fd, totalBytesWrite.load(), SEEK_CUR); + spdlog::debug("S3fsFileSystem::Multiwrite, multiwrite bytes: {}", totalBytesWrite.load()); + return totalBytesWrite.load(); // Return the total bytes write +} + +ssize_t S3fsFileSystem::Write(int fd, const void* buf, size_t count) { + // std::cout << "S3fsFileSystem::Write: " << fd << std::endl; + spdlog::debug("S3fsFileSystem::Write, fd: {}, count: {}", fd, count); + return posix_s3fs_write(fd, buf, count); +} +int S3fsFileSystem::Close(int fd) { + //std::cout << "S3fsFileSystem::Close: " << fd << std::endl; + spdlog::info("S3fsFileSystem::Close, fd: {}", fd); + return posix_s3fs_close(fd); +} + +off_t S3fsFileSystem::Lseek(int fd, off_t offset, int whence) { + //std::cout << "S3fsFileSystem::Lseek: " << fd << std::endl; + spdlog::debug("S3fsFileSystem::Lseek, fd: {}, offset: {}, whence: {}", fd, offset, whence); + return posix_s3fs_lseek(fd, offset, whence); +} + +int S3fsFileSystem::Stat(const char* path, struct stat* statbuf) { + // std::cout << "S3fsFileSystem::Stat: " << path << std::endl; + spdlog::info("S3fsFileSystem::Stat, path: {}", path); + int ret = posix_s3fs_stat(path, statbuf); + return ret; +} + +int S3fsFileSystem::Fstat(int fd, struct stat* statbuf) { + // std::cout << "S3fsFileSystem::Fstat: " << fd << std::endl; + spdlog::info("S3fsFileSystem::Stat, fd: {}", fd); + int ret = posix_s3fs_fstat(fd, statbuf); + return ret; +} + +int S3fsFileSystem::Fsync(int fd) { + // std::cout << "S3fsFileSystem::Fsync: " << fd << std::endl; + spdlog::info("S3fsFileSystem::Fsync, fd: {} no implement....", fd); + return 0; +} + +int S3fsFileSystem::Ftruncate(int fd, off_t length) { + // std::cout << "S3fsFileSystem::Ftruncate: " << fd << " " << length << std::endl; + spdlog::info("S3fsFileSystem::Ftruncate, fd: {} length: {} no implement...", fd, length); + return 0; +} + +int S3fsFileSystem::Unlink(const char* path) { + // std::cout << "S3fsFileSystem::Unlink: " << path << std::endl; + spdlog::info("S3fsFileSystem::Unlink, path: {}", path); + return posix_s3fs_unlink(path); +} +int S3fsFileSystem::Mkdir(const char* path, mode_t mode) { + // std::cout << "S3fsFileSystem::Mkdir: " << path << " " << mode << std::endl; + spdlog::info("S3fsFileSystem::Mkdir, path: {} mode: {}", path, mode); + return posix_s3fs_mkdir(path, mode); +} + +int S3fsFileSystem::Opendir(const char* path, DirStream* dirstream) { + int ret = posix_s3fs_opendir(path, (S3DirStream*)dirstream); + // std::cout << "S3fsFileSystem::Opendir: " << path << std::endl; + spdlog::info("S3fsFileSystem::Opendir path: {} ret {}", path, ret); + return 0; +} + +int S3fsFileSystem::Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes) { + //std::cout << "S3fsFileSystem::Getdents: " << dirstream << " " << maxread << " " << realbytes << std::endl; + int ret = posix_s3fs_getdents((S3DirStream*)dirstream, contents, maxread, realbytes); + spdlog::info("S3fsFileSystem::Getdents, maxread: {}, realbytes: {}", maxread, *realbytes); + return ret; +} + +int S3fsFileSystem::Closedir(DirStream* dirstream) { + // std::cout << "S3fsFileSystem::Closedir: " << dirstream << std::endl; + int ret = posix_s3fs_closedir((S3DirStream*)dirstream); + spdlog::info("S3fsFileSystem::Closedir, ret: {}", ret); + return ret; +} + +int S3fsFileSystem::Rmdir(const char* path) { + std::cout << "S3fsFileSystem::Rmdir: " << path << std::endl; + return 0; +} + +int S3fsFileSystem::Rename(const char* from, const char* to) { + std::cout << "S3fsFileSystem::Rename: " << from << " to " << to << std::endl; + return 0; +} + +int S3fsFileSystem::Link(const char* oldpath, const char* newpath) { + throw std::runtime_error("未实现"); +} + +int S3fsFileSystem::Symlink(const char* oldpath, const char* newpath) { + throw std::runtime_error("未实现"); +} + +int S3fsFileSystem::Readlink(const char* path, char* buf, size_t bufsize) { + throw std::runtime_error("未实现"); +} + +int S3fsFileSystem::Chmod(const char* path, mode_t mode) { + throw std::runtime_error("未实现"); +} + +int S3fsFileSystem::Chown(const char* path, uid_t uid, gid_t gid) { + throw std::runtime_error("未实现"); +} + +int S3fsFileSystem::Truncate(const char* path, off_t length) { + std::cout << "S3fsFileSystem::Truncate" << std::endl; + return 0; +} + +int S3fsFileSystem::Utime(const char* path, const struct utimbuf* ubuf) { + throw std::runtime_error("未实现"); +} + + +std::string S3fsFileSystem::NormalizePath(const std::string& path) { + throw std::runtime_error("未实现"); +} + +} // namespace filesystem +} // namespace intercept diff --git a/intercept/filesystem/s3fs_filesystem.h b/intercept/filesystem/s3fs_filesystem.h new file mode 100644 index 0000000..33b84c7 --- /dev/null +++ b/intercept/filesystem/s3fs_filesystem.h @@ -0,0 +1,49 @@ +#ifndef S3FS_FILESYSTEM_H +#define S3FS_FILESYSTEM_H + +#include "abstract_filesystem.h" +namespace intercept { +namespace filesystem { + +class S3fsFileSystem : public AbstractFileSystem { +public: + S3fsFileSystem(); + ~S3fsFileSystem() override; + void Init() override; + void Shutdown() override; + int Open(const char* path, int flags, int mode) override; + ssize_t Read(int fd, void* buf, size_t count) override; + ssize_t Write(int fd, const void* buf, size_t count) override; + int Close(int fd) override; + off_t Lseek(int fd, off_t offset, int whence) override; + int Stat(const char* path, struct stat* st) override; + int Fstat(int fd, struct stat* st) override; + int Fsync(int fd) override; + int Ftruncate(int fd, off_t length) override; + int Unlink(const char* path) override; + int Mkdir(const char* path, mode_t mode) override; + int Opendir(const char* path, DirStream* dirstream); + int Getdents(DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes); + int Closedir(DirStream* dirstream); + int Rmdir(const char* path) override; + int Rename(const char* from, const char* to) override; + int Link(const char* from, const char* to) override; + int Symlink(const char* from, const char* to) override; + int Readlink(const char* path, char* buf, size_t bufsize) override; + int Chmod(const char* path, mode_t mode) override; + int Chown(const char* path, uid_t uid, gid_t gid) override; + int Truncate(const char* path, off_t length) override; + int Utime(const char* path, const struct utimbuf* times) override; + + ssize_t MultiRead(int fd, void* buf, size_t count) override; + ssize_t MultiWrite(int fd, const void* buf, size_t count) override; + +protected: + std::string NormalizePath(const std::string& path) override; + +}; + +} // namespace filesystem +} // namespace intercept + +#endif \ No newline at end of file diff --git a/intercept/filesystem/s3fs_lib.h b/intercept/filesystem/s3fs_lib.h new file mode 100644 index 0000000..69a5980 --- /dev/null +++ b/intercept/filesystem/s3fs_lib.h @@ -0,0 +1,63 @@ +#ifndef S3FS_S3FS_LIB_H_ +#define S3FS_S3FS_LIB_H_ + +#ifdef S3FS_MALLOC_TRIM +#ifdef HAVE_MALLOC_TRIM +#include +#define S3FS_MALLOCTRIM(pad) malloc_trim(pad) +#else // HAVE_MALLOC_TRIM +#define S3FS_MALLOCTRIM(pad) +#endif // HAVE_MALLOC_TRIM +#else // S3FS_MALLOC_TRIM +#define S3FS_MALLOCTRIM(pad) +#endif // S3FS_MALLOC_TRIM + + +//------------------------------------------------------------------- +// posix interface functions +//------------------------------------------------------------------- +#ifdef __cplusplus +extern "C" { +#endif + +struct S3DirStream; + +void s3fs_global_init(); + +void s3fs_global_uninit(); + +int posix_s3fs_create(const char* _path, int flags, mode_t mode); + +int posix_s3fs_open(const char* _path, int flags, mode_t mode); + +int posix_s3fs_multiread(int fd, void* buf, size_t size, off_t file_offset); + +int posix_s3fs_read(int fd, void* buf, size_t size); + +int posix_s3fs_multiwrite(int fd, const void* buf, size_t size, off_t file_offset); + +int posix_s3fs_write(int fd, const void* buf, size_t size); + +off_t posix_s3fs_lseek(int fd, off_t offset, int whence); + +int posix_s3fs_close(int fd); + +int posix_s3fs_stat(const char* _path, struct stat* stbuf); + +int posix_s3fs_fstat(int fd, struct stat* stbuf) ; + +int posix_s3fs_mkdir(const char* _path, mode_t mode); + +int posix_s3fs_opendir(const char* _path, S3DirStream* dirstream); + +int posix_s3fs_getdents(S3DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes); + +int posix_s3fs_closedir(S3DirStream* dirstream); + +int posix_s3fs_unlink(const char* _path); + +#ifdef __cplusplus +} +#endif + +#endif // S3FS_S3FS_LIB_H_ \ No newline at end of file diff --git a/intercept/internal/CMakeLists.txt b/intercept/internal/CMakeLists.txt new file mode 100644 index 0000000..0ca488e --- /dev/null +++ b/intercept/internal/CMakeLists.txt @@ -0,0 +1,12 @@ +# internal/CMakeLists.txt + +file(GLOB INTERNAL_SOURCES *.cpp) + +add_library(intercept_internal ${INTERNAL_SOURCES}) +target_include_directories(intercept_internal PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(intercept_internal PUBLIC common_lib) + +add_library(intercept_internal_client ${INTERNAL_SOURCES}) +target_include_directories(intercept_internal_client PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_compile_options(intercept_internal_client PUBLIC -fPIC -mavx2) +target_link_libraries(intercept_internal_client PUBLIC common_lib_client) diff --git a/intercept/internal/metainfo.h b/intercept/internal/metainfo.h new file mode 100644 index 0000000..078eca5 --- /dev/null +++ b/intercept/internal/metainfo.h @@ -0,0 +1,112 @@ +// Copyright (c) 2022 by Apex.AI Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef IOX_EXAMPLES_REQUEST_AND_RESPONSE_TYPES_HPP +#define IOX_EXAMPLES_REQUEST_AND_RESPONSE_TYPES_HPP + +#include +#include +#include +#include + +#include + + + +#define SERVICE_FLAG "interceptservice" +#define DUMMY_INSTANCE_FLAG "dummyserver" +#define INTERCEPT_INSTANCE_FLAG "interceptserver" + +#define ICEORYX "ICEORYX" + +namespace intercept { +namespace internal { +//! [request] +struct AddRequest +{ + uint64_t augend{0}; + uint64_t addend{0}; +}; +//! [request] + +//! [response] +struct AddResponse +{ + uint64_t sum{0}; +}; +//! [response] + +struct UserRequest +{ + uint64_t pid{0}; + uint64_t threadid{0}; +}; + +struct UserResponse +{ + uint64_t pid{0}; + uint64_t threadid{0}; +}; + + +struct Metainfo { + int type = 0; + int fd = 0; + size_t count = 0; +}; + + + +struct ServiceMetaInfo { + std::string service = ""; + std::string instance = ""; + std::string event = ""; + std::string serverType = ""; // server类型 : normal dummy +}; + +} // namespace internal +} // namespace intercept + + +#define MAX_LENGTH 2000000 +// 生成随机字符,不包括 '\0' +// char randomChar() { +// const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; +// return charset[rand() % (sizeof(charset) - 1)]; +// } + +// // 生成随机字符串 +// char* generateRandomString(size_t length) { +// if (length > MAX_LENGTH) { +// fprintf(stderr, "String length is too long.\n"); +// } + +// char *str = (char*)malloc((length + 1) * sizeof(char)); // +1 为字符串的终止符 '\0' 预留空间 +// if (str == NULL) { +// perror("malloc"); +// } + +// for (size_t i = 0; i < length; ++i) { +// str[i] = randomChar(); +// } +// str[length] = '\0'; // 确保字符串以空字符结尾 + +// return str; +// } + + + +#endif // IOX_EXAMPLES_REQUEST_AND_RESPONSE_TYPES_HPP diff --git a/intercept/internal/posix_op_req_res.cpp b/intercept/internal/posix_op_req_res.cpp new file mode 100644 index 0000000..209903b --- /dev/null +++ b/intercept/internal/posix_op_req_res.cpp @@ -0,0 +1,1014 @@ +#include +#include +#include +#include +#include +#include + +#include "posix_op_req_res.h" + +namespace intercept { +namespace internal { +std::string TypeToStr(PosixOpType opType) { + switch(opType) { + case PosixOpType::OPEN: return "OPEN"; + case PosixOpType::WRITE: return "WRITE"; + case PosixOpType::READ: return "READ"; + case PosixOpType::ACCESS: return "ACCESS"; + case PosixOpType::CLOSE: return "CLOSE"; + case PosixOpType::FSYNC: return "FSYNC"; + case PosixOpType::TRUNCATE: return "TRUNCATE"; + case PosixOpType::FTRUNCATE: return "FTRUNCATE"; + case PosixOpType::FUTIMES: return "FUTIMES"; + case PosixOpType::LSEEK: return "LSEEK"; + case PosixOpType::MKDIR: return "MKDIR"; + case PosixOpType::MKNOD: return "MKNOD"; + case PosixOpType::OPENDIR: return "OPENDIR"; + case PosixOpType::READDIR: return "READDIR"; + case PosixOpType::GETDENTS: return "GETDENTS"; + case PosixOpType::CLOSEDIR: return "CLOSEDIR"; + case PosixOpType::RENAME: return "RENAME"; + case PosixOpType::STAT: return "STAT"; + case PosixOpType::FSTAT: return "FSTAT"; + case PosixOpType::UNLINK: return "UNLINK"; + case PosixOpType::UTIMES: return "UTIMES"; + case PosixOpType::TERMINAL: return "TERMINAL"; + // ... 其他操作类型 + default: return "UNKNOWN"; + } +} + + +// PosixOpReqRes 类的实现 +PosixOpReqRes::PosixOpReqRes(PosixOpType opType) : opType_(opType) {} + +PosixOpReqRes::PosixOpReqRes(const long* args, long* result) {} + +void PosixOpReqRes::SetOpType(PosixOpType type) { opType_ = type; } + +PosixOpType PosixOpReqRes::GetOpType() const { return opType_; } + + +// ------------------------------open--------------------------- +OpenOpReqRes::OpenOpReqRes(const char* path, int flags, mode_t mode) + : PosixOpReqRes(PosixOpType::OPEN) { + strcpy(requestData_.path, path); + requestData_.flags = flags; + requestData_.mode = mode; + requestData_.opType = opType_; +} + +OpenOpReqRes::OpenOpReqRes(const long *args, long *result) : PosixOpReqRes(PosixOpType::OPEN) { + strcpy(requestData_.path, reinterpret_cast(args[0])); + requestData_.flags = static_cast(args[1]); + requestData_.mode = static_cast(args[2]); + requestData_.opType = opType_; +} + +OpenOpReqRes::~OpenOpReqRes() { + +} + +void OpenOpReqRes::CopyRequestDataToBuf(void* buf) { + // 将请求数据复制到缓冲区 + memcpy(buf, &requestData_.opType, sizeof(requestData_.opType)); + + memcpy(buf + sizeof(requestData_.opType), requestData_.path, sizeof(requestData_.path)); + + memcpy(buf + sizeof(requestData_.opType) + sizeof(requestData_.path), &requestData_.flags, sizeof(requestData_.flags)); + + memcpy(buf + sizeof(requestData_.opType) + sizeof(requestData_.path) + sizeof(requestData_.flags), &requestData_.mode, sizeof(requestData_.mode)); + return; +} + +int OpenOpReqRes::GetRequestSize() { + return sizeof(OpenRequestData); +} + +int OpenOpReqRes::GetRequestAlignSize() { + return alignof(OpenRequestData); +} + +int OpenOpReqRes::GetResponseSize() { + return sizeof(OpenResponseData); +} + +int OpenOpReqRes::GetResponseAlignSize() { + return alignof(OpenResponseData); +} + +// 将response转化为Response +PosixOpResponse& OpenOpReqRes::GetResponse() { + return responseData_; +} + +void OpenOpReqRes::SetResponse(void* response) { + OpenResponseData + *responseData = reinterpret_cast(response); + responseData_.opType = responseData->opType; + responseData_.fd = responseData->fd; + spdlog::info("OpenOpReqRes::SetResponse: fd ={}",responseData_.fd); +} + + +// ------------------------------read---------------------------- +ReadOpReqRes::ReadOpReqRes(int fd, void* buf, size_t count) + : PosixOpReqRes(PosixOpType::READ) { + // for request + requestData_.fd = fd; + requestData_.count = count; + requestData_.opType = opType_; + // for response + responseData_.buf = buf; +} + +ReadOpReqRes::ReadOpReqRes(const long *args, long *result): PosixOpReqRes(PosixOpType::READ) { + requestData_.opType = opType_; + // for reqeust + requestData_.fd = static_cast(args[0]); + requestData_.count = static_cast(args[2]); + // for response + responseData_.buf = reinterpret_cast(args[1]); +} + +ReadOpReqRes::~ReadOpReqRes() { + // 析构函数 +} + +void ReadOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int ReadOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int ReadOpReqRes::GetRequestAlignSize() { + return alignof(ReadRequestData); +} + +int ReadOpReqRes::GetResponseSize() { + // 响应数据大小 结构体大小+需要的长度 + return sizeof(responseData_) + requestData_.count; +} + +int ReadOpReqRes::GetResponseAlignSize() { + return alignof(ReadResponseData); +} + +PosixOpResponse& ReadOpReqRes::GetResponse() { + return responseData_; +} + +void ReadOpReqRes::SetResponse(void* response) { + ReadResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; + responseData_.length = responseData->length; + if (intercept::common::Configure::getInstance().getConfig("multiop") == "true" + && responseData_.length >= atol(intercept::common::Configure::getInstance().getConfig("blocksize").c_str())) { + SetResponseMultithreads(response); + } else { + if (responseData_.length > 0 && responseData_.buf != nullptr) { + intercept::common::Timer timer("client ReadOpReqRes::SetResponse time "); + memcpy(responseData_.buf, responseData->content, responseData->length); + //std::cout << "the read response, the length: " << responseData->length << " , the buf: " << (char*)responseData_.buf << std::endl; + } else { + spdlog::debug("the length: {}, the buf maybe nullptr", responseData_.length); + } + } + +} + +void initialize_memory(char* ptr, size_t size) { + // 通过访问每个页面确保内存已分配 + for (size_t i = 0; i < size; i += sysconf(_SC_PAGESIZE)) { + ptr[i] = 0; + } + ptr[size - 1] = 0; // 访问最后一个字节确保全部内存已分配 +} +void ReadOpReqRes::SetResponseMultithreads(void* response) { + ReadResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; + responseData_.length = responseData->length; + + if (responseData_.length > 0 && responseData_.buf != nullptr) { + intercept::common::Timer timer("client ReadOpReqRes::SetResponseMultithreads time "); + // Determine the number of threads to use (for example, 4) + int numThreads = intercept::common::Configure::getInstance().getConfig("opThreadnum") == "" ? + 1 : atoi(intercept::common::Configure::getInstance().getConfig("opThreadnum").c_str()); + size_t chunkSize = responseData_.length / numThreads; + size_t remainder = responseData_.length % numThreads; + auto copyChunk = [](char* dest, const char* src, size_t len) { + + // initialize_memory(dest, len); + // mlock(dest, len); + // memmove(dest, src, len); + memcpy(dest, src, len); + + // size_t i = 0; + // // 处理前面未对齐的部分 + // while (i < len && reinterpret_cast(dest + i) % 32 != 0) { + // dest[i] = src[i]; + // i++; + // } + + // // 处理对齐的中间部分 + // for (; i + 31 < len; i += 32) { + // __m256i data = _mm256_loadu_si256(reinterpret_cast(src + i)); + // _mm256_storeu_si256(reinterpret_cast<__m256i*>(dest + i), data); + // } + + // // 处理末尾未对齐的部分 + // for (; i < len; ++i) { + // dest[i] = src[i]; + // } + // munlock(dest, len); + }; + + std::vector threads; + std::vector numaNode1Cores = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95}; + + for (int i = 0; i < numThreads; ++i) { + size_t startIdx = i * chunkSize; + size_t len = (i == numThreads - 1) ? (chunkSize + remainder) : chunkSize; + + // threads.emplace_back([&, startIdx, len, i]() { + // cpu_set_t cpuset; + // CPU_ZERO(&cpuset); + // CPU_SET(numaNode1Cores[i % numaNode1Cores.size()], &cpuset); + // sched_setaffinity(0, sizeof(cpu_set_t), &cpuset); + + // copyChunk(static_cast(responseData_.buf) + startIdx, responseData->content + startIdx, len); + // }); + threads.emplace_back(copyChunk, static_cast(responseData_.buf) + startIdx, responseData->content + startIdx, len); + } + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + spdlog::debug("the read response, the length: {}" ,responseData_.length); + } else { + spdlog::debug("the length: {}, the buf maybe nullptr", responseData_.length); + } +} + +// void ReadOpReqRes::SetResponse(void* response) { +// ReadResponseData* responseData = static_cast(response); +// responseData_.opType = responseData->opType; +// responseData_.ret = responseData->ret; +// responseData_.length = responseData->length; + +// if (responseData_.length > 0 && responseData_.buf != nullptr) { + +// int numThreads = intercept::common::Configure::getInstance().getConfig("opThreadnum").empty() ? +// 1 : std::stoi(intercept::common::Configure::getInstance().getConfig("opThreadnum")); +// size_t chunkSize = responseData_.length / numThreads; +// size_t remainder = responseData_.length % numThreads; + +// if (intercept::common::Configure::getInstance().getConfig("multiop") == "true") { +// intercept::common::Timer timer("client ReadOpReqRes::SetResponse time "); +// auto copyChunk = [](char* dest, const char* src, size_t len) { +// memcpy(dest, src, len); +// }; +// std::atomic tasksRemaining(numThreads); +// auto tasksMutex = std::make_shared(); +// auto tasksCondition = std::make_shared(); + +// for (int i = 0; i < numThreads; ++i) { +// size_t startIdx = i * chunkSize; +// size_t len = (i == numThreads - 1) ? (chunkSize + remainder) : chunkSize; +// threadPool_.enqueue([=, &tasksRemaining, tasksMutex, tasksCondition]() { +// copyChunk(static_cast(responseData_.buf) + startIdx, responseData->content + startIdx, len); +// if (--tasksRemaining == 0) { +// std::unique_lock lock(*tasksMutex); +// tasksCondition->notify_all(); +// } +// }); +// } + +// { +// std::unique_lock lock(*tasksMutex); +// tasksCondition->wait(lock, [&tasksRemaining] { return tasksRemaining.load() == 0; }); +// } + +// } else { +// memcpy(responseData_.buf, responseData->content, responseData->length); +// } + +// spdlog::debug("The read response, length: {}", responseData_.length); +// } else { +// spdlog::debug("The length: {}, the buffer may be nullptr", responseData_.length); +// } +// } + + +// -----------------------------write------------------------- +WriteOpReqRes::WriteOpReqRes(int fd, void* buf, size_t count) + : PosixOpReqRes(PosixOpType::WRITE) { + requestData_.opType = opType_; + requestData_.fd = fd; + requestData_.buf = buf; + requestData_.count = count; +} + +WriteOpReqRes::WriteOpReqRes(const long *args, long *result) + : PosixOpReqRes(PosixOpType::WRITE) { + // 从参数中初始化 + requestData_.opType = opType_; + requestData_.fd = static_cast(args[0]); + requestData_.buf = reinterpret_cast(args[1]); + requestData_.count = static_cast(args[2]); +} + +WriteOpReqRes::~WriteOpReqRes() { + // 析构函数 +} + +void WriteOpReqRes::CopyRequestDataToBuf(void* buf) { + // 元信息 + memcpy(buf, &requestData_, sizeof(requestData_)); + // 数据 + if (intercept::common::Configure::getInstance().getConfig("multiop") == "true" && + requestData_.count >= atoi(intercept::common::Configure::getInstance().getConfig("blocksize").c_str()) ) { + int numThreads = intercept::common::Configure::getInstance().getConfig("opThreadnum") == "" ? + 1 : atoi(intercept::common::Configure::getInstance().getConfig("opThreadnum").c_str()); + CopyRequestDataToBufMultithread((char*)buf + sizeof(requestData_), requestData_.buf, requestData_.count, numThreads); + } else { + memcpy((char*)buf + sizeof(requestData_), requestData_.buf, requestData_.count); + } +} + +void WriteOpReqRes::CopyRequestDataToBufMultithread(void* dest, const void* src, size_t count, int numThreads) { + size_t chunkSize = count / numThreads; + size_t remainder = count % numThreads; + intercept::common::Timer timer("client WriteOpReqRes::CopyRequestDataToBufMultithread time:"); + auto copyChunk = [](char* dest, const char* src, size_t len) { + memcpy(dest, src, len); + }; + spdlog::info("copy request with multithread for writing, chunksize: {}, remainder: {}", chunkSize, remainder); + std::vector threads; + for (int i = 0; i < numThreads; ++i) { + size_t startIdx = i * chunkSize; + size_t len = (i == numThreads - 1) ? (chunkSize + remainder) : chunkSize; + threads.emplace_back(copyChunk, static_cast(dest + startIdx), static_cast(src + startIdx), len); + } + + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } +} + +int WriteOpReqRes::GetRequestSize() { + return sizeof(requestData_) + requestData_.count; +} + +int WriteOpReqRes::GetRequestAlignSize() { + return alignof(WriteRequestData); +} + +int WriteOpReqRes::GetResponseSize() { + return sizeof(WriteResponseData); +} + +int WriteOpReqRes::GetResponseAlignSize() { + return alignof(WriteResponseData); +} + +PosixOpResponse& WriteOpReqRes::GetResponse() { + return responseData_; +} + +void WriteOpReqRes::SetResponse(void* response) { + WriteResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; + responseData_.length = responseData->length; + // std::cout << "write response, optype: " << (int)responseData_.opType << " , ret: " << responseData_.ret << " , length: " << responseData_.length << std::endl; +} + +// -----------------------close------------------------------- +CloseOpReqRes::CloseOpReqRes(int fd) : PosixOpReqRes(PosixOpType::CLOSE) { + requestData_.opType = PosixOpType::CLOSE; + requestData_.fd = fd; +} + +CloseOpReqRes::CloseOpReqRes(const long* args, long* result) : PosixOpReqRes(PosixOpType::CLOSE) { + requestData_.opType = PosixOpType::CLOSE; + requestData_.fd = static_cast(args[0]); +} + +CloseOpReqRes::~CloseOpReqRes() { + // 析构函数 +} + +void CloseOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int CloseOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int CloseOpReqRes::GetRequestAlignSize() { + return alignof(CloseRequestData); +} + +int CloseOpReqRes::GetResponseSize() { + return sizeof(CloseResponseData); +} + +int CloseOpReqRes::GetResponseAlignSize() { + return alignof(CloseResponseData); +} + +PosixOpResponse& CloseOpReqRes::GetResponse() { + return responseData_; +} + +void CloseOpReqRes::SetResponse(void* response) { + CloseResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; +} + +// ------------------------fysnc------------------------------------- +FsyncOpReqRes::FsyncOpReqRes(int fd) { + requestData_.opType = PosixOpType::FSYNC; + requestData_.fd = fd; +} + +FsyncOpReqRes::FsyncOpReqRes(const long* args, long* result) { + requestData_.opType = PosixOpType::FSYNC; + requestData_.fd = static_cast(args[0]); +} + +FsyncOpReqRes::~FsyncOpReqRes() { + +} +void FsyncOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int FsyncOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int FsyncOpReqRes::GetRequestAlignSize() { + return alignof(FsyncRequestData); +} + +int FsyncOpReqRes::GetResponseSize() { + return sizeof(FsyncResponseData); +} + +int FsyncOpReqRes::GetResponseAlignSize() { + return alignof(FsyncResponseData); +} + +PosixOpResponse& FsyncOpReqRes::GetResponse() { + return responseData_; +} + +void FsyncOpReqRes::SetResponse(void* response) { + FsyncResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; +} + +// -------------------stat--------------------------- +StatOpReqRes::StatOpReqRes(const char* path, struct stat* st) + : PosixOpReqRes(PosixOpType::STAT){ + requestData_.opType = opType_; + strncpy(requestData_.path, path, strlen(path)); + requestData_.path[strlen(path)] = '\0'; + responseData_.st = st; + spdlog::debug("StatOpReqRes, the type: {}, the path: {}", TypeToStr(requestData_.opType), requestData_.path); +} + +StatOpReqRes::StatOpReqRes(const long* args, long* result) + : PosixOpReqRes(PosixOpType::STAT){ + requestData_.opType = opType_; + strncpy(requestData_.path, (const char*)args[1], strlen((const char*)args[1])); + requestData_.path[strlen((const char*)args[1])] = '\0'; + responseData_.st = (struct stat*)(args[1]); + spdlog::debug("StatOpReqRes, the type: {}, the path: {}", TypeToStr(requestData_.opType), requestData_.path ); +} +StatOpReqRes::~StatOpReqRes() { +} + +void StatOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int StatOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int StatOpReqRes::GetRequestAlignSize() { + return alignof(StatRequestData); +} + +int StatOpReqRes::GetResponseSize() { + return sizeof(StatResponseData); +} + +int StatOpReqRes::GetResponseAlignSize() { + return alignof(StatResponseData); +} + +PosixOpResponse& StatOpReqRes::GetResponse() { + return responseData_; +} + +void StatOpReqRes::SetResponse(void* response) { + StatResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; + memcpy(responseData_.st, &responseData->fileStat, sizeof(struct stat)); +} + +// --------------------------------fstat-------------------------------------- + +FstatOpReqRes::FstatOpReqRes(int fd, struct stat* st) + : PosixOpReqRes(PosixOpType::FSTAT){ + requestData_.opType = opType_; + requestData_.fd = fd; + responseData_.st = st; + spdlog::debug("FstatOpReqRes, the type: {}, the fd: {}", TypeToStr(requestData_.opType), requestData_.fd ); + +} + +FstatOpReqRes::FstatOpReqRes(const long* args, long* result) + : PosixOpReqRes(PosixOpType::FSTAT){ + requestData_.opType = opType_; + requestData_.fd = (int)args[0]; + responseData_.st = (struct stat*)(args[1]); + spdlog::debug("FstatOpReqRes, the type: {}, the fd: {}", TypeToStr(requestData_.opType), requestData_.fd); + +} +FstatOpReqRes::~FstatOpReqRes() { +} + +void FstatOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int FstatOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int FstatOpReqRes::GetRequestAlignSize() { + return alignof(FstatRequestData); +} + +int FstatOpReqRes::GetResponseSize() { + return sizeof(FstatResponseData); +} + +int FstatOpReqRes::GetResponseAlignSize() { + return alignof(FstatResponseData); +} + +PosixOpResponse& FstatOpReqRes::GetResponse() { + return responseData_; +} + +void FstatOpReqRes::SetResponse(void* response) { + FstatResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; + memcpy(responseData_.st, &responseData->fileStat, sizeof(struct stat)); +} + +// --------------------------------lseek--------------------------------------- +LseekOpReqRes::LseekOpReqRes(int fd, uint64_t offset, int whence) + : PosixOpReqRes(PosixOpType::LSEEK){ + requestData_.opType = opType_; + requestData_.fd = fd; + requestData_.offset = offset; + requestData_.whence = whence; +} + +LseekOpReqRes::LseekOpReqRes(const long* args, long* result) + : PosixOpReqRes(PosixOpType::LSEEK){ + requestData_.opType = opType_; + requestData_.fd = (int)args[0]; + requestData_.offset = (off_t)args[1]; + requestData_.whence = (int)args[2]; +} + +LseekOpReqRes::~LseekOpReqRes() { +} + +void LseekOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int LseekOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int LseekOpReqRes::GetRequestAlignSize() { + return alignof(LseekRequestData); +} + +int LseekOpReqRes::GetResponseSize() { + return sizeof(LseekResponseData); +} + +int LseekOpReqRes::GetResponseAlignSize() { + return alignof(LseekResponseData); +} + +PosixOpResponse& LseekOpReqRes::GetResponse() { + return responseData_; +} + +void LseekOpReqRes::SetResponse(void* response) { + LseekResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; +} + +// -------------------------------mkdir---------------------------------------- +MkdirOpReqRes::MkdirOpReqRes(const char* path, mode_t mode) + : PosixOpReqRes(PosixOpType::MKDIR){ + requestData_.opType = opType_; + strncpy(requestData_.path, path, strlen(path)); + requestData_.path[strlen(path)] = '\0'; + requestData_.mode = mode; +} + +MkdirOpReqRes::MkdirOpReqRes(const long* args, long* result) + : PosixOpReqRes(PosixOpType::MKDIR){ + requestData_.opType = opType_; + strncpy(requestData_.path, (const char*)args[0], strlen((const char*)args[0])); + requestData_.path[strlen((const char*)args[0])] = '\0'; + requestData_.mode = (mode_t)args[1]; +} + +MkdirOpReqRes::~MkdirOpReqRes() { +} + +void MkdirOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int MkdirOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int MkdirOpReqRes::GetRequestAlignSize() { + return alignof(MkdirRequestData); +} + +int MkdirOpReqRes::GetResponseSize() { + return sizeof(MkdirResponseData); +} + +int MkdirOpReqRes::GetResponseAlignSize() { + return alignof(MkdirResponseData); +} + +PosixOpResponse& MkdirOpReqRes::GetResponse() { + return responseData_; +} +void MkdirOpReqRes::SetResponse(void* response) { + MkdirResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; +} + +// -------------------------------opendir--------------------------------------- +OpendirOpReqRes::OpendirOpReqRes(const char* path) + : PosixOpReqRes(PosixOpType::OPENDIR){ + requestData_.opType = opType_; + strncpy(requestData_.path, path, strlen(path)); + requestData_.path[strlen(path)] = '\0'; +} + +OpendirOpReqRes::OpendirOpReqRes(const long* args, long* result) { + requestData_.opType = opType_; + strncpy(requestData_.path, (const char*)args[0], strlen((const char*)args[0])); + requestData_.path[strlen((const char*)args[0])] = '\0'; +} + +OpendirOpReqRes::~OpendirOpReqRes() { +} + +void OpendirOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int OpendirOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int OpendirOpReqRes::GetRequestAlignSize() { + return alignof(OpendirRequestData); +} + +int OpendirOpReqRes::GetResponseSize() { + return sizeof(OpendirResponseData); +} + +int OpendirOpReqRes::GetResponseAlignSize() { + return alignof(OpendirResponseData); +} + +PosixOpResponse& OpendirOpReqRes::GetResponse() { + return responseData_; +} + +void OpendirOpReqRes::SetResponse(void* response) { + OpendirResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; + responseData_.dirStream = responseData->dirStream; +} + +//------------------------------getdents--------------------------- +GetdentsOpReqRes::GetdentsOpReqRes(DirStream dirinfo, char* data, size_t maxread) + : PosixOpReqRes(PosixOpType::GETDENTS){ + requestData_.opType = opType_; + requestData_.dirinfo = dirinfo; + requestData_.maxread = maxread; + + responseData_.data = data; +} + +GetdentsOpReqRes::GetdentsOpReqRes(const long* args, long* result) { + requestData_.opType = opType_; + // TODO +} + +GetdentsOpReqRes::~GetdentsOpReqRes() { + +} + +void GetdentsOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int GetdentsOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int GetdentsOpReqRes::GetRequestAlignSize() { + return alignof(GetdentsRequestData); +} + +int GetdentsOpReqRes::GetResponseSize() { + return sizeof(GetdentsResponseData); +} + +int GetdentsOpReqRes::GetResponseAlignSize() { + return alignof(GetdentsResponseData); +} + +PosixOpResponse& GetdentsOpReqRes::GetResponse() { + return responseData_; +} + +void GetdentsOpReqRes::SetResponse(void* response) { + GetdentsResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.dirinfo = responseData->dirinfo; + responseData_.realbytes = responseData->realbytes;; + responseData_.ret = responseData->ret; + memcpy(responseData_.data, responseData->contents, responseData->realbytes); +} + +// ------------------------------closedir------------------------------- +ClosedirOpReqRes::ClosedirOpReqRes(const DirStream& dirstream) + : PosixOpReqRes(PosixOpType::CLOSEDIR){ + requestData_.opType = opType_; + requestData_.dirstream = dirstream; +} + +ClosedirOpReqRes::ClosedirOpReqRes(const long* args, long* result) { + requestData_.opType = opType_; + // TODO +} + +ClosedirOpReqRes::~ClosedirOpReqRes() { +} + +void ClosedirOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int ClosedirOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int ClosedirOpReqRes::GetRequestAlignSize() { + return alignof(ClosedirRequestData); +} + +int ClosedirOpReqRes::GetResponseSize() { + return sizeof(ClosedirResponseData); +} + +int ClosedirOpReqRes::GetResponseAlignSize() { + return alignof(ClosedirResponseData); +} + +PosixOpResponse& ClosedirOpReqRes::GetResponse() { + return responseData_; +} + +void ClosedirOpReqRes::SetResponse(void* response) { + ClosedirResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; +} + +// -------------------------------unlink------------------------------- +UnlinkOpReqRes::UnlinkOpReqRes(const char* path) + : PosixOpReqRes(PosixOpType::UNLINK){ + requestData_.opType = opType_; + strncpy(requestData_.path, path, strlen(path)); + requestData_.path[strlen(path)] = '\0'; +} + +UnlinkOpReqRes::UnlinkOpReqRes(const long* args, long* result) { + requestData_.opType = opType_; + strncpy(requestData_.path, (const char*)args[0], strlen((const char*)args[0])); + requestData_.path[strlen((const char*)args[0])] = '\0'; +} + +UnlinkOpReqRes::~UnlinkOpReqRes() { +} + +void UnlinkOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int UnlinkOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int UnlinkOpReqRes::GetRequestAlignSize() { + return alignof(UnlinkRequestData); +} + +int UnlinkOpReqRes::GetResponseSize() { + return sizeof(UnlinkResponseData); +} + +int UnlinkOpReqRes::GetResponseAlignSize() { + return alignof(UnlinkResponseData); +} + +PosixOpResponse& UnlinkOpReqRes::GetResponse() { + return responseData_; +} +void UnlinkOpReqRes::SetResponse(void* response) { + UnlinkResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; +} + +// ------------------------------rename------------------------------------- +RenameOpReqRes::RenameOpReqRes(const char* oldpath, const char* newpath) + : PosixOpReqRes(PosixOpType::RENAME){ + requestData_.opType = opType_; + strncpy(requestData_.oldpath, oldpath, strlen(oldpath)); + requestData_.oldpath[strlen(oldpath)] = '\0'; + strncpy(requestData_.newpath, newpath, strlen(newpath)); + requestData_.newpath[strlen(newpath)] = '\0'; +} + +RenameOpReqRes::RenameOpReqRes(const long* args, long* result) { + requestData_.opType = opType_; + strncpy(requestData_.oldpath, (const char*)args[0], strlen((const char*)args[0])); + requestData_.oldpath[strlen((const char*)args[0])] = '\0'; + strncpy(requestData_.newpath, (const char*)args[1], strlen((const char*)args[1])); + requestData_.newpath[strlen((const char*)args[1])] = '\0'; +} + +RenameOpReqRes::~RenameOpReqRes() { +} + +void RenameOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int RenameOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int RenameOpReqRes::GetRequestAlignSize() { + return alignof(RenameRequestData); +} + +int RenameOpReqRes::GetResponseSize() { + return sizeof(RenameResponseData); +} + +int RenameOpReqRes::GetResponseAlignSize() { + return alignof(RenameResponseData); +} + +PosixOpResponse& RenameOpReqRes::GetResponse() { + return responseData_; +} +void RenameOpReqRes::SetResponse(void* response) { + RenameResponseData* responseData = static_cast(response); + responseData_.ret = responseData->ret; + responseData_.opType = responseData->opType; +} + +// -------------------------truncate--------------------------------- +TruncateOpReqRes::TruncateOpReqRes(const char* path, off_t length) + : PosixOpReqRes(PosixOpType::TRUNCATE){ + requestData_.opType = opType_; + strncpy(requestData_.path, path, strlen(path)); + requestData_.path[strlen(path)] = '\0'; + requestData_.length = length; +} + +TruncateOpReqRes::TruncateOpReqRes(const long* args, long* result) { + requestData_.opType = opType_; + strncpy(requestData_.path, (const char*)args[0], strlen((const char*)args[0])); + requestData_.path[strlen((const char*)args[0])] = '\0'; + requestData_.length = (off_t)args[1]; +} + +TruncateOpReqRes::~TruncateOpReqRes() { +} + +void TruncateOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int TruncateOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int TruncateOpReqRes::GetRequestAlignSize() { + return alignof(TruncateRequestData); +} + +int TruncateOpReqRes::GetResponseSize() { + return sizeof(TruncateResponseData); +} + +int TruncateOpReqRes::GetResponseAlignSize() { + return alignof(TruncateResponseData); +} + +PosixOpResponse& TruncateOpReqRes::GetResponse() { + return responseData_; +} + +void TruncateOpReqRes::SetResponse(void* response) { + TruncateResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; +} + +// -------------------------terminal--------------------------------- +TerminalOpReqRes::TerminalOpReqRes() + : PosixOpReqRes(PosixOpType::TERMINAL){ + requestData_.opType = opType_; +} + +void TerminalOpReqRes::CopyRequestDataToBuf(void* buf) { + memcpy(buf, &requestData_, sizeof(requestData_)); +} + +int TerminalOpReqRes::GetRequestSize() { + return sizeof(requestData_); +} + +int TerminalOpReqRes::GetRequestAlignSize() { + return alignof(TerminalRequestData); +} + +int TerminalOpReqRes::GetResponseSize() { + return sizeof(TerminalResponseData); +} + +int TerminalOpReqRes::GetResponseAlignSize() { + return alignof(TerminalResponseData); +} + +PosixOpResponse& TerminalOpReqRes::GetResponse() { + return responseData_; +} + +void TerminalOpReqRes::SetResponse(void* response) { + TerminalResponseData* responseData = static_cast(response); + responseData_.opType = responseData->opType; + responseData_.ret = responseData->ret; +} + +} // namespace internal +} // namespace intercept diff --git a/intercept/internal/posix_op_req_res.h b/intercept/internal/posix_op_req_res.h new file mode 100644 index 0000000..6be289f --- /dev/null +++ b/intercept/internal/posix_op_req_res.h @@ -0,0 +1,650 @@ +#pragma once +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "common/common.h" + +namespace intercept { +namespace internal { +using intercept::common::DirStream; +// 操作类型枚举 +enum class FileType { + FILE = 0, + DIR = 1, +}; + +enum class PosixOpType { + OPEN = 0, + WRITE, + READ, + ACCESS, + CLOSE, + FSYNC, + TRUNCATE, + FTRUNCATE, + FUTIMES, + LSEEK, + MKDIR, + MKNOD, + OPENDIR, + READDIR, + GETDENTS, + CLOSEDIR, + RENAME, + STAT, + FSTAT, + UNLINK, + UTIMES, + TERMINAL, // 程序退出时的操作 + // ... 其他操作类型 +}; + +std::string TypeToStr(PosixOpType opType); + +// 请求数据结构体 +struct PosixOpRequest { + PosixOpType opType; + //virtual ~PosixOpRequest() = default; // 添加虚析构函数使类变为多态 +}; + +// 响应数据结构体 +struct PosixOpResponse{ + PosixOpType opType; + //virtual ~PosixOpResponse() = default; // 添加虚析构函数使类变为多态 +}; + +// 请求/响应类 +class PosixOpReqRes { +public: + PosixOpReqRes() = default; + + PosixOpReqRes(PosixOpType opType); + + PosixOpReqRes(const long* args, long* result); + + virtual ~PosixOpReqRes() = default; // 添加虚析构函数使类变为多态 + + void SetOpType(PosixOpType type); + + PosixOpType GetOpType() const; + + // virtual void Init() = 0; + + // virtual void Shutdown() = 0; + + // 设置和获取请求数据 + // virtual const PosixOpRequest& GetRequestData() const = 0; + // virtual void SetRequestData(const PosixOpRequest& requestData) = 0; + // virtual void SetRequestData(const long* args, long* result) = 0; + + // 复制请求数据到缓冲区 + virtual void CopyRequestDataToBuf(void* buf) = 0; + + // 获取请求大小 + virtual int GetRequestSize() = 0; + virtual int GetRequestAlignSize() = 0; + virtual int GetResponseSize() = 0; + virtual int GetResponseAlignSize() = 0; + + // 设置和获取响应数据 + virtual PosixOpResponse& GetResponse() = 0; + + virtual void SetResponse(void* response) = 0; + +protected: + PosixOpType opType_; +}; + +// ---------------------------------open------------------------------------------------ +struct OpenRequestData : PosixOpRequest { + char path[200]; + int flags; + mode_t mode; +}; + +struct OpenResponseData : PosixOpResponse { + int fd; +}; +class OpenOpReqRes : public PosixOpReqRes { +public: + OpenOpReqRes(const char* path, int flags, mode_t mode); + + OpenOpReqRes(const long *args, long *result); + + ~OpenOpReqRes() override; + + // 复制请求数据到缓冲区 + virtual void CopyRequestDataToBuf(void* buf); + + // 获取请求大小 + int GetRequestSize() override; + int GetRequestAlignSize() override; + + int GetResponseSize() override; + int GetResponseAlignSize() override; + + // 获取和设置响应数据 + PosixOpResponse& GetResponse() override; + void SetResponse(void* request) override; + +private: + OpenRequestData requestData_; + OpenResponseData responseData_; +}; + + +// --------------------------------------read---------------------------------------- +struct ReadRequestData : PosixOpRequest { + int fd; + size_t count; + // void* buf; +}; + +struct ReadResponseData : PosixOpResponse { + int ret; // 返回值 + ssize_t length; // 返回长度 + void* buf; // 为上游保存数据的指针 + char content[0]; // server返回数据 +}; + +class ReadOpReqRes : public PosixOpReqRes { +public: + ReadOpReqRes(int fd, void* buf, size_t count); + ReadOpReqRes(const long *args, long *result); + virtual ~ReadOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; + void SetResponseMultithreads(void* response); + +private: + ReadRequestData requestData_; + ReadResponseData responseData_; + // intercept::common::ThreadPool threadPool_; +}; + +// ---------------------------------write------------------------------------------- +struct WriteRequestData : PosixOpRequest { + int fd; + size_t count; // 要求长度 + void* buf; + char content[0]; // 传输时保存数据 +}; + +struct WriteResponseData : PosixOpResponse { + int ret; // 返回值 + ssize_t length; // 返回长度 +}; + +class WriteOpReqRes : public PosixOpReqRes { +public: + WriteOpReqRes() + : PosixOpReqRes(PosixOpType::WRITE) {} + WriteOpReqRes(int fd, void* buf, size_t count); + WriteOpReqRes(const long *args, long *result); + ~WriteOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + void CopyRequestDataToBufMultithread(void* dest, const void* src, size_t count, int numThreads); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; + + +private: + WriteRequestData requestData_; + WriteResponseData responseData_; +}; + +//-------------------------------------close--------------------------------------- +struct CloseRequestData : PosixOpRequest { + int fd; +}; + +struct CloseResponseData : PosixOpResponse { + int ret; // 返回值 +}; + +class CloseOpReqRes : public PosixOpReqRes { +public: + CloseOpReqRes() + : PosixOpReqRes(PosixOpType::CLOSE) {} + CloseOpReqRes(int fd); + CloseOpReqRes(const long *args, long *result); + ~CloseOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; + +private: + CloseRequestData requestData_; + CloseResponseData responseData_; +}; + +// ----------------------------------------fsync------------------------------- +struct FsyncRequestData : PosixOpRequest { + int fd; +}; + +struct FsyncResponseData : PosixOpResponse { + int ret; // 返回值 +}; + +class FsyncOpReqRes : public PosixOpReqRes { +public: + FsyncOpReqRes() + : PosixOpReqRes(PosixOpType::CLOSE) {} + FsyncOpReqRes(int fd); + FsyncOpReqRes(const long *args, long *result); + ~FsyncOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; + +private: + FsyncRequestData requestData_; + FsyncResponseData responseData_; +}; +// -----------------------------------stat---------------------------------------- +struct StatRequestData : PosixOpRequest { + char path[200]; +}; + +struct StatResponseData : PosixOpResponse { + int ret; // 返回值 + void* st; // 为上游保存数据的指针 + struct stat fileStat; // server返回数据 +}; + +class StatOpReqRes : public PosixOpReqRes { +public: + StatOpReqRes() + : PosixOpReqRes(PosixOpType::STAT) {} + StatOpReqRes(const char *path, struct stat *st); + StatOpReqRes(const long *args, long *result); + ~StatOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; +private: + StatRequestData requestData_; + StatResponseData responseData_; +}; + +// ----------------------------------fstat------------------------------------------ +struct FstatRequestData : PosixOpRequest { + int fd; +}; + +struct FstatResponseData : PosixOpResponse { + int ret; // 返回值 + void* st; // 为上游保存数据的指针 + struct stat fileStat; // server返回数据 +}; + +class FstatOpReqRes : public PosixOpReqRes { +public: + FstatOpReqRes() + : PosixOpReqRes(PosixOpType::FSTAT) {} + FstatOpReqRes(int fd, struct stat *st); + FstatOpReqRes(const long *args, long *result); + ~FstatOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; +private: + FstatRequestData requestData_; + FstatResponseData responseData_; +}; + +// -----------------------------------lseek------------------------------------------ +struct LseekRequestData : PosixOpRequest { + int fd; + uint64_t offset; + int whence; +}; + +struct LseekResponseData : PosixOpResponse { + off_t ret; // 返回值 +}; + +class LseekOpReqRes : public PosixOpReqRes { +public: + LseekOpReqRes() + : PosixOpReqRes(PosixOpType::LSEEK) {} + + LseekOpReqRes(int fd, uint64_t offset, int whence); + LseekOpReqRes(const long *args, long *result); + ~LseekOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; +private: + LseekRequestData requestData_; + LseekResponseData responseData_; +}; + +// ----------------------------------mkdir----------------------------------------------- +struct MkdirRequestData : PosixOpRequest { + char path[200]; + mode_t mode; +}; + +struct MkdirResponseData : PosixOpResponse { + int ret; // 返回值 +}; + +class MkdirOpReqRes : public PosixOpReqRes { +public: + MkdirOpReqRes() + : PosixOpReqRes(PosixOpType::MKDIR) {} + MkdirOpReqRes(const char *path, mode_t mode); + MkdirOpReqRes(const long *args, long *result); + + ~MkdirOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; +private: + MkdirRequestData requestData_; + MkdirResponseData responseData_; +}; + +// ----------------------------------opendir------------------------------------ + + +struct OpendirRequestData : PosixOpRequest { + char path[200]; +}; + +struct OpendirResponseData : PosixOpResponse { + int ret; // 返回值 + DIR* dir; // 上游保存dir的指针 + DirStream dirStream; // 保存server获取的结果 +}; + +class OpendirOpReqRes : public PosixOpReqRes { +public: + OpendirOpReqRes() + : PosixOpReqRes(PosixOpType::OPENDIR) {} + OpendirOpReqRes(const char *path); + OpendirOpReqRes(const long *args, long *result); + + ~OpendirOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; +private: + OpendirRequestData requestData_; + OpendirResponseData responseData_; +}; + +// ----------------------------------getdents------------------------ + +struct GetdentsRequestData : PosixOpRequest { + DirStream dirinfo; + size_t maxread; +}; + +struct GetdentsResponseData : PosixOpResponse { + int ret; // 返回值 + DirStream dirinfo; + ssize_t realbytes; + char* data; // 上游数据指针 + char contents[0]; // 保存server获取的结果 +}; + +class GetdentsOpReqRes : public PosixOpReqRes { +public: + GetdentsOpReqRes() + : PosixOpReqRes(PosixOpType::GETDENTS) {} + GetdentsOpReqRes(DirStream dirinfo, char* data, size_t maxread); + GetdentsOpReqRes(const long *args, long *result); + + ~GetdentsOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; +private: + GetdentsRequestData requestData_; + GetdentsResponseData responseData_; +}; + +// ----------------------------------closedir------------------------------------ +struct ClosedirRequestData : PosixOpRequest { + DirStream dirstream; +}; + +struct ClosedirResponseData : PosixOpResponse { + int ret; // 返回值 +}; + +class ClosedirOpReqRes : public PosixOpReqRes { +public: + ClosedirOpReqRes() + : PosixOpReqRes(PosixOpType::CLOSEDIR) {} + ClosedirOpReqRes(const DirStream& dirstream); + ClosedirOpReqRes(const long *args, long *result); + + ~ClosedirOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + virtual PosixOpResponse& GetResponse() override; + virtual void SetResponse(void* response) override; + +private: + ClosedirRequestData requestData_; + ClosedirResponseData responseData_; +}; + +// -------------------------unlink----------------------------------------- +struct UnlinkRequestData : PosixOpRequest { + char path[200]; +}; + +struct UnlinkResponseData : PosixOpResponse { + int ret; // 返回值 +}; + +class UnlinkOpReqRes : public PosixOpReqRes { +public: + UnlinkOpReqRes() + : PosixOpReqRes(PosixOpType::UNLINK) {} + UnlinkOpReqRes(const char *path); + UnlinkOpReqRes(const long *args, long *result); + ~UnlinkOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; +private: + UnlinkRequestData requestData_; + UnlinkResponseData responseData_; +}; + +struct RenameRequestData : PosixOpRequest { + char oldpath[200]; + char newpath[200]; +}; + +struct RenameResponseData : PosixOpResponse { + int ret; // 返回值 +}; + +class RenameOpReqRes : public PosixOpReqRes { +public: + RenameOpReqRes() + : PosixOpReqRes(PosixOpType::RENAME) {} + RenameOpReqRes(const char *oldpath, const char *newpath); + RenameOpReqRes(const long *args, long *result); + + ~RenameOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; +private: + RenameRequestData requestData_; + RenameResponseData responseData_; +}; + +// ----------------------truncate----------------------------------------- +class TruncateRequestData : public PosixOpRequest { +public: + char path[200]; + off_t length; +}; + +class TruncateResponseData : public PosixOpResponse { +public: + int ret; // 返回值 +}; + +class TruncateOpReqRes : public PosixOpReqRes { +public: + TruncateOpReqRes() + : PosixOpReqRes(PosixOpType::TRUNCATE) {} + TruncateOpReqRes(const char *path, off_t length); + TruncateOpReqRes(const long *args, long *result); + + ~TruncateOpReqRes() override; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override; +private: + TruncateRequestData requestData_; + TruncateResponseData responseData_; +}; + +// ----------------------TERMINAL------------------- +class TerminalRequestData : public PosixOpRequest { +public: + +}; + +class TerminalResponseData : public PosixOpResponse { +public: + int ret; // 返回值 +}; + +class TerminalOpReqRes : public PosixOpReqRes { +public: + TerminalOpReqRes(); + ~TerminalOpReqRes() override {}; + + virtual void CopyRequestDataToBuf(void* buf); + + virtual int GetRequestSize() override; + virtual int GetRequestAlignSize() override; + virtual int GetResponseSize() override ; + virtual int GetResponseAlignSize() override; + + virtual PosixOpResponse& GetResponse() override; + void SetResponse(void* response) override;; +private: + TerminalRequestData requestData_; + TerminalResponseData responseData_; +}; + +} // namespace internal +} // namespace intercept + diff --git a/intercept/middleware/CMakeLists.txt b/intercept/middleware/CMakeLists.txt new file mode 100644 index 0000000..d506dcb --- /dev/null +++ b/intercept/middleware/CMakeLists.txt @@ -0,0 +1,45 @@ +# src/middleware/CMakeLists.txt + +find_library(ICEORYX_POSH_LIB NAMES iceoryx_posh PATHS ../../thirdparties/iceoryx/lib) +find_library(ICEORYX_HOOFS_LIB NAMES iceoryx_hoofs PATHS ../../thirdparties/iceoryx/lib) + +file(GLOB MIDDLEWARE_SOURCES *.cpp) +file(GLOB MIDDLEWARE_HEADERS *.h) + +add_library(intercept_middleware ${MIDDLEWARE_SOURCES}) +target_include_directories(intercept_middleware PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include +) +target_link_libraries(intercept_middleware PUBLIC + intercept_internal + intercept_filesystem + ${ICEORYX_HOOFS_LIB} + ${ICEORYX_POSH_LIB} + +) + + +set(CMAKE_FIND_LIBRARY_SUFFIXES ".so" ".a") + +find_library(ICEORYX_POSH_LIB NAMES iceoryx_posh PATHS ../../thirdparties/iceoryx/lib) +find_library(ICEORYX_HOOFS_LIB NAMES iceoryx_hoofs PATHS ../../thirdparties/iceoryx/lib) +find_library(ICEORYX_PLATFORM_LIB NAMES iceoryx_hoofs PATHS ../../thirdparties/iceoryx/lib) + +file(GLOB CLIENT_MIDDLEWARE_SOURCES *.cpp) +file(GLOB CLIENT_MIDDLEWARE_HEADERS *.h) + +add_library(intercept_middleware_client ${CLIENT_MIDDLEWARE_SOURCES}) +target_include_directories(intercept_middleware_client PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include +) +target_link_libraries(intercept_middleware_client PUBLIC + -lrt + intercept_internal_client + intercept_filesystem_client + ${ICEORYX_POSH_LIB} + ${ICEORYX_HOOFS_LIB} + ${ICEORYX_PLATFORM_LIB} +) +target_compile_options(intercept_middleware_client PUBLIC -DCLIENT_BUILD -fPIC ) \ No newline at end of file diff --git a/intercept/middleware/iceoryx_wrapper.cpp b/intercept/middleware/iceoryx_wrapper.cpp new file mode 100644 index 0000000..82ad99a --- /dev/null +++ b/intercept/middleware/iceoryx_wrapper.cpp @@ -0,0 +1,645 @@ +#include "filesystem/abstract_filesystem.h" +#ifndef CLIENT_BUILD +#include "filesystem/curve_filesystem.h" +#endif +#include "iox/signal_watcher.hpp" +#include "iceoryx_wrapper.h" + +#include "iceoryx_posh/mepoo/chunk_header.hpp" + +namespace intercept { +namespace middleware { + +using intercept::internal::PosixOpReqRes; +using intercept::internal::PosixOpRequest; +using intercept::internal::PosixOpResponse; +using intercept::internal::PosixOpType; + +using intercept::internal::OpenRequestData; +using intercept::internal::OpenResponseData; +using intercept::internal::ReadRequestData; +using intercept::internal::ReadResponseData; +using intercept::internal::WriteRequestData; +using intercept::internal::WriteResponseData; +using intercept::internal::CloseRequestData; +using intercept::internal::CloseResponseData; +using intercept::internal::StatRequestData; +using intercept::internal::StatResponseData; +using intercept::internal::FstatRequestData; +using intercept::internal::FstatResponseData; +using intercept::internal::FsyncRequestData; +using intercept::internal::FsyncResponseData; +using intercept::internal::LseekRequestData; +using intercept::internal::LseekResponseData; +using intercept::internal::MkdirRequestData; +using intercept::internal::MkdirResponseData; +using intercept::internal::OpendirRequestData; +using intercept::internal::OpendirResponseData; +using intercept::internal::GetdentsRequestData; +using intercept::internal::GetdentsResponseData; +using intercept::internal::ClosedirRequestData; +using intercept::internal::ClosedirResponseData; +using intercept::internal::UnlinkRequestData; +using intercept::internal::UnlinkResponseData; +using intercept::internal::RenameRequestData; +using intercept::internal::RenameResponseData; +using intercept::internal::TruncateRequestData; +using intercept::internal::TruncateResponseData; +using intercept::internal::TerminalRequestData; +using intercept::internal::TerminalResponseData; + +std::shared_ptr ReqResMiddlewareWrapper::fileSystem_ = nullptr; + +IceoryxWrapper::IceoryxWrapper(const ServiceMetaInfo& info) : + ReqResMiddlewareWrapper(info){ +} + +IceoryxWrapper::~IceoryxWrapper() { + Shutdown(); +} + +void IceoryxWrapper::Init() { + +} +void IceoryxWrapper::InitClient() { + // 创建client + iox::capro::IdString_t service(iox::TruncateToCapacity, + info_.service.c_str(), info_.service.length()); + iox::capro::IdString_t instance(iox::TruncateToCapacity, + info_.instance.c_str(), info_.instance.length()); + iox::capro::IdString_t event(iox::TruncateToCapacity, + info_.event.c_str(), info_.event.length()); + + client_.reset(new iox::popo::UntypedClient({service, instance, event})); + spdlog::info("client init, service: {}, instance: {}, event: {}", + info_.service, info_.instance, info_.event); +} + +void IceoryxWrapper::InitServer() { + // 创建server + ReqResMiddlewareWrapper::InitServer(); + iox::capro::IdString_t service(iox::TruncateToCapacity, + info_.service.c_str(), info_.service.length()); + iox::capro::IdString_t instance(iox::TruncateToCapacity, + info_.instance.c_str(), info_.instance.length()); + iox::capro::IdString_t event(iox::TruncateToCapacity, + info_.event.c_str(), info_.event.length()); + server_.reset(new iox::popo::UntypedServer({service, instance, event})); + // std::cout << "server init, service: " << info_.service << ", instance: " << info_.instance << ", event: " << info_.event << std::endl; + spdlog::info("IceoryxWrapper::InitServer, server: {}, instance: {}, event: {} ", info_.service, info_.instance, info_.event); +} + +void IceoryxWrapper::InitDummyServer() { + iox::capro::IdString_t service(iox::TruncateToCapacity, + info_.service.c_str(), info_.service.length()); + iox::capro::IdString_t instance(iox::TruncateToCapacity, + info_.instance.c_str(), info_.instance.length()); + iox::capro::IdString_t event(iox::TruncateToCapacity, + info_.event.c_str(), info_.event.length()); + server_.reset(new iox::popo::UntypedServer({service, instance, event})); + // std::cout << "server init, service: " << info_.service << ", instance: " << info_.instance << ", event: " << info_.event << std::endl; + spdlog::info("IceoryxWrapper::InitDummyServer, server: {}, instance: {}, event: {} ", info_.service, info_.instance, info_.event); +} + +void IceoryxWrapper::Shutdown() { + spdlog::info("shutdown IceoryxWrapper"); + if (servicetype_ == ServiceType::SERVER) { + spdlog::info("stop the server...."); + // StopServer(); + } else if (servicetype_ == ServiceType::CLIENT) { + StopClient(); + spdlog::info("stop the client...."); + } else if (servicetype_ == ServiceType::DUMMYSERVER) { + spdlog::info("stop the dummyserver, do nothing"); + } else { + spdlog::info("unknown service type : {}", (int)servicetype_); + } +} + +void IceoryxWrapper::StartServer() { + // 启动server + if (server_.get() == nullptr) { + std::cerr << "server is nullptr" << std::endl; + return; + } + spdlog::info("enter IceoryxWrapper::StartServer, bgein OnResponse"); + running_ = true; + OnResponse(); + spdlog::info("enter IceoryxWrapper::StartServer, end OnResponse"); +} + +// 暂时没有调用 +void IceoryxWrapper::StartClient() { + // 启动client + InitClient(); +} + +void IceoryxWrapper::StopServer() { + kill(getpid(), SIGINT); + running_ = false; +} + +void IceoryxWrapper::StopClient() { + intercept::internal::TerminalOpReqRes terminal; + spdlog::info("wait stop client, service: {}, instance: {}, event: {}, client count: {}", + info_.service, info_.instance, info_.event, client_.use_count()); + OnRequest(terminal); +} + +// client: 这里组织请求并处理返回的响应 +void IceoryxWrapper::OnRequest(PosixOpReqRes& reqRes) { + // 上游用户侧需要调用 + // 假设我们直接将请求的响应数据复制回响应对象 + int reqsize = reqRes.GetRequestSize(); + int alignsize = reqRes.GetRequestAlignSize(); + int64_t expectedResponseSequenceId = requestSequenceId_; + + { + // intercept::common::Timer timer("client request"); + client_->loan(reqsize, alignsize) + .and_then([&](auto& requestPayload) { + + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + requestHeader->setSequenceId(requestSequenceId_); + expectedResponseSequenceId = requestSequenceId_; + requestSequenceId_ += 1; + char* request = static_cast(requestPayload); + + const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload); + spdlog::info("to loan chunk in client, head info, chunksize: {}", chunkHeader->chunkSize()); + + reqRes.CopyRequestDataToBuf((void*)request); + client_->send(request).or_else( + [&](auto& error) { std::cout << "Could not send Request! Error: " << error << std::endl; }); + }) + .or_else([](auto& error) { std::cout << "Could not allocate Request! Error: " << error << std::endl; }); + + } + //! [take response] + { + // intercept::common::Timer timer("client response"); + bool hasReceivedResponse{false}; + do{ + client_->take().and_then([&](const auto& responsePayload) { + auto responseHeader = iox::popo::ResponseHeader::fromPayload(responsePayload); + if (responseHeader->getSequenceId() == expectedResponseSequenceId) + { + const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(responsePayload); + spdlog::info("to release chunk in client, head info, type: {} typestr: {} , chunksize: {}", int(reqRes.GetOpType()), TypeToStr(reqRes.GetOpType()), chunkHeader->chunkSize()); + + reqRes.SetResponse((void*)responsePayload); + + client_->releaseResponse(responsePayload); + // sleep(10); + const iox::mepoo::ChunkHeader* nowheader = iox::mepoo::ChunkHeader::fromUserPayload(responsePayload); + if (nowheader == nullptr) { + spdlog::error("the chunkheader is nullptr!!!!"); + } + spdlog::info("chunkheader info, chunksize {}", nowheader->chunkSize()); + // std::cout << "Got Response with expected sequence ID! -> continue" << std::endl; + } + else + { + spdlog::error("Got Response with outdated sequence ID! Expected = {}; Actual = {} ! -> skip", + expectedResponseSequenceId, responseHeader->getSequenceId()); + } + hasReceivedResponse = true; + }); + } while (!hasReceivedResponse); + } + +} + +// server: 这里获取、处理请求并返回响应结果 +void IceoryxWrapper::OnResponse() { + auto lastRequestTime = std::chrono::steady_clock::now(); // 初始化上一次处理请求的时间戳 + int intervalSeconds = intercept::common::Configure::getInstance().getConfig("waitRequestMaxSeconds") == "" ? 5 : std::stoi(intercept::common::Configure::getInstance().getConfig("waitRequestMaxSeconds")); + int trynumber = 0; + int getnum = 0; + int missnum = 0; + + std::chrono::steady_clock::duration totalDuration = std::chrono::steady_clock::duration::zero(); // 总耗时 + while (!iox::hasTerminationRequested() && running_) { + trynumber++; + if(trynumber > 2000000) { + // ! 注意的判断可能会导致某些连接过早被中断,使得client无法正常响应 + auto now = std::chrono::steady_clock::now(); // 获取当前时间 + if (now - lastRequestTime > std::chrono::seconds(intervalSeconds)) { // 检查是否超过n秒无请求处理 + spdlog::info("No request handled in the last {} seconds. Exiting loop.", intervalSeconds); + break; + } + } + server_->take().and_then([&](auto& requestPayload) { + auto begintime = std::chrono::steady_clock::now(); + auto request = static_cast(requestPayload); + // std::cout << "request type: " << (int)request->opType << std::endl; + switch (request->opType) { + case PosixOpType::OPEN: + HandleOpenRequest(requestPayload); + break; + case PosixOpType::READ: + HandleReadRequest(requestPayload); + break; + case PosixOpType::WRITE: + HandleWriteRequest(requestPayload); + break; + case PosixOpType::CLOSE: + HandleCloseRequest(requestPayload); + break; + case PosixOpType::STAT: + HandleStatRequest(requestPayload); + break; + case PosixOpType::FSTAT: + HandleFstatRequest(requestPayload); + break; + case PosixOpType::FSYNC: + HandleFsyncRequest(requestPayload); + break; + case PosixOpType::LSEEK: + HandleLseekRequest(requestPayload); + break; + case PosixOpType::MKDIR: + HandleMkdirRequest(requestPayload); + break; + case PosixOpType::UNLINK: + HandleUnlinkRequest(requestPayload); + break; + case PosixOpType::OPENDIR: + HandleOpendirRequest(requestPayload); + break; + case PosixOpType::GETDENTS: + HandleGetdentsRequest(requestPayload); + break; + case PosixOpType::CLOSEDIR: + HandleClosedirRequest(requestPayload); + break; + case PosixOpType::RENAME: + HandleRenameRequest(requestPayload); + break; + case PosixOpType::TRUNCATE: + HandleTruncateRequest(requestPayload); + break; + case PosixOpType::TERMINAL: + HandleTerminalRequest(requestPayload); + break; + default: + spdlog::error("Unsupported request type: {}", (int)request->opType); + break; + } + + // 更新最后处理请求的时间戳 + lastRequestTime = std::chrono::steady_clock::now(); + trynumber = 0; // 归零 + getnum++; + totalDuration += (lastRequestTime - begintime); + } + ); + // TODO: 如果不sleep 获取不到数据 待排查 + // sleep(1); + } + std::cout << "exit Server OnResponse... " << info_.service << " " << info_.instance << " " << info_.event << std::endl; + + // if (getnum > 0) { + // std::cout << "total request time: " << totalDuration.count() << " , average time : " << totalDuration.count()/ getnum << std::endl; + // } +} + +void IceoryxWrapper::HandleOpenRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + spdlog::info("Open file request, path: {}, flags: {}, mode: {}", request->path, request->flags, request->mode); + // 这里可以调用posix open函数 + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(OpenResponseData), alignof(OpenResponseData)) + .and_then([&](auto& responsePayload) { + const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload); + spdlog::info("to loan chunk in server open , head info, chunksize: {}", chunkHeader->chunkSize()); + + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->fd = fileSystem_->Open(request->path, request->flags, request->mode); + server_->send(responsePayload).or_else( + [&](auto& error) { std::cout << "Could not send Response! Error: " << error << std::endl; }); + spdlog::info("open response info, the type: {}, the fd: {}", intercept::internal::TypeToStr(response->opType), response->fd ); + }) + .or_else( + [&](auto& error) { std::cout << "Could not allocate Open Response! Error: " << error << std::endl; }); + + const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload); + spdlog::info("to release chunk in server open , head info, chunksize: {}", chunkHeader->chunkSize()); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleReadRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(ReadResponseData) + request->count, alignof(ReadResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + char* buf = (char*) response + sizeof(ReadResponseData); + + const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload); + // spdlog::info("to loan chunk in server read , head info, chunksize: {} real size: {}", chunkHeader->chunkSize(), sizeof(ReadResponseData) + request->count); + + if (intercept::common::Configure::getInstance().getConfig("multiop") == "true" + && request->count >= atol(intercept::common::Configure::getInstance().getConfig("blocksize").c_str())) { + response->length = fileSystem_->MultiRead(request->fd, buf, request->count); + } else { + response->length = fileSystem_->Read(request->fd, buf, request->count); + } + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Read! Error: " << error << std::endl;}); + spdlog::debug("read response, fd: {}, count: {}, read response info, the type: {}, the length: {}", + request->fd, request->count, intercept::internal::TypeToStr(response->opType), response->length); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Read Response! Error: " << error << std::endl; }); + + const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload); + // spdlog::info("to release chunk in server read , head info, chunksize: {}", chunkHeader->chunkSize()); + + server_->releaseRequest(request); + +} + +void IceoryxWrapper::HandleWriteRequest(const auto& requestPayload) { + spdlog::debug("handle one write request"); + auto request = static_cast(requestPayload); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(WriteResponseData), alignof(WriteResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + if (intercept::common::Configure::getInstance().getConfig("multiop") == "true" + && request->count >= atol(intercept::common::Configure::getInstance().getConfig("blocksize").c_str())) { + response->length = fileSystem_->MultiWrite(request->fd, request->content, request->count); + } else { + response->length = fileSystem_->Write(request->fd, request->content, request->count); + } + + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Write! Error: " << error << std::endl;}); + spdlog::debug("write response, fd: {}, count: {}, write response info, the type: {}, the length: {}", + request->fd, request->count, intercept::internal::TypeToStr(response->opType), response->length); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleCloseRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + spdlog::info("close request, fd: {}", request->fd); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(CloseResponseData), alignof(CloseResponseData)) + .and_then([&](auto& responsePayload) { + + const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload); + spdlog::info("to loan chunk in server close , head info, chunksize: {}", chunkHeader->chunkSize()); + + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->ret = fileSystem_->Close(request->fd); + spdlog::info("finish close, fd: {}", request->fd); + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Close! Error: " << error << std::endl;}); + + spdlog::info("close response info, the type: {}, the ret: {}", intercept::internal::TypeToStr(response->opType), response->ret); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + + const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload); + spdlog::info("to release chunk in server close , head info, chunksize: {}", chunkHeader->chunkSize()); + + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleFsyncRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + spdlog::info("fsync reqeust, fd: {}", request->fd); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(FsyncResponseData), alignof(FsyncResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->ret = fileSystem_->Fsync(request->fd); + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;}); + spdlog::info("fsync response info, ret: {}", response->ret); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleStatRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + spdlog::info("stat request, pathname: {}", request->path); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(StatResponseData), alignof(StatResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->ret = fileSystem_->Stat(request->path, &(response->fileStat)); + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;}); + spdlog::info("stat response info, the ino: {}, size: {}, the ret: {}", + (int)response->fileStat.st_ino, response->fileStat.st_size, response->ret); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleFstatRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + spdlog::info("fstat request, fd: {}", request->fd); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(FstatResponseData), alignof(FstatResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->ret = fileSystem_->Fstat(request->fd, &(response->fileStat)); + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;}); + spdlog::info("fstat response info, the ino: {}, size: {}, the ret: {}", + (int)response->fileStat.st_ino, response->fileStat.st_size, response->ret); + + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleLseekRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + spdlog::debug("lseek request, fd: {}, offset: {}", request->fd, request->offset); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(LseekResponseData), alignof(LseekResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->ret = fileSystem_->Lseek(request->fd, request->offset, request->whence); + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;}); + spdlog::debug("lseek response, ret: {}", response->ret); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleMkdirRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + spdlog::info("mkdir request, pathname: {}, mode: {}", request->path, request->mode); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(MkdirResponseData), alignof(MkdirResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->ret = fileSystem_->Mkdir(request->path, request->mode); + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;}); + spdlog::info("mkdir resposne, ret: {}", response->ret); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleGetdentsRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + int maxread = request->maxread; + maxread = 200; // 暂时读取目录下的200个文件,否则分配会失败 + spdlog::info("getdents request, fd: {}, the info: {}", request->dirinfo.fh, request->dirinfo.ino); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(GetdentsResponseData) + maxread * sizeof(dirent64), alignof(GetdentsResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + auto req = const_cast(request); + response->ret = fileSystem_->Getdents(&req->dirinfo, response->contents, maxread, &response->realbytes); + response->dirinfo = req->dirinfo; + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;}); + spdlog::info("getdents response, ret: {}, thre realbytes: {}, the offset: {}", + response->ret, response->realbytes, response->dirinfo.offset); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleOpendirRequest(const auto&requestPayload) { + auto request = static_cast(requestPayload); + spdlog::info("opendir request, path: {}", request->path); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(OpendirResponseData), alignof(OpendirResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->ret = fileSystem_->Opendir(request->path, &response->dirStream); + server_->send(responsePayload).or_else( + [&](auto& error) { std::cout << "Could not send Response! Error: " << error << std::endl; }); + spdlog::info("opendir response, the type: {}, the fd: {}", TypeToStr(response->opType), response->dirStream.fh); + }) + .or_else( + [&](auto& error) { std::cout << "Could not allocate Open Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleClosedirRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + spdlog::info("closedir requset, fd: {}", request->dirstream.fh); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(ClosedirResponseData), alignof(ClosedirResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->ret = fileSystem_->Closedir(const_cast(&request->dirstream)); + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Closedir! Error: " << error << std::endl;}); + spdlog::info("closedir response, the type: {}, the ret: {}", TypeToStr(response->opType), response->ret ); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + + +void IceoryxWrapper::HandleUnlinkRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + spdlog::info("unlink reqeust, pathname: {}", request->path); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(UnlinkResponseData), alignof(UnlinkResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->ret = fileSystem_->Unlink(request->path); + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;}); + spdlog::info("unlink response, ret: ", response->ret); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleRenameRequest(const auto& requestPayload) { + auto request = static_cast(requestPayload); + spdlog::info("rename request, oldpath: {}, newpath: {}", request->oldpath, request->newpath); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(RenameResponseData), alignof(RenameResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + response->opType = request->opType; + response->ret = fileSystem_->Rename(request->oldpath, request->newpath); + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;}); + spdlog::info("rename response, ret: {}", response->ret); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleTruncateRequest(const auto& requestPayload) { + + auto request = static_cast(requestPayload); + spdlog::info("truncate request, path: {}, length: {}", request->path, request->length); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(TruncateResponseData), alignof(TruncateResponseData)) + .and_then([&](auto& responsePayload) { + auto response = static_cast(responsePayload); + + response->opType = request->opType; + response->ret = fileSystem_->Truncate(request->path, request->length); + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Stat! Error: " << error << std::endl;}); + spdlog::info("truncate response, ret: {}", response->ret); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Write Response! Error: " << error << std::endl; }); + server_->releaseRequest(request); +} + +void IceoryxWrapper::HandleTerminalRequest(const auto& requestPayload) { + + auto request = static_cast(requestPayload); + spdlog::info("terminal request."); + auto requestHeader = iox::popo::RequestHeader::fromPayload(requestPayload); + server_->loan(requestHeader, sizeof(TerminalResponseData), alignof(TerminalResponseData)) + .and_then([&](auto& responsePayload) { + + auto response = static_cast(responsePayload); + + response->opType = request->opType; + response->ret = 0; + running_ = false; // 终结退出 + server_->send(responsePayload).or_else([&](auto& error){ std::cout << "Could not send Response for Terminal! Error: " << error << std::endl;}); + + const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(responsePayload); + spdlog::info("terminal response, ret: {}, pid: {}, tid: {}, loan chunk chunksize: {}", + response->ret, (unsigned int) getpid(), (unsigned int) pthread_self(), chunkHeader->chunkSize()); + sleep(0.1); + }).or_else( + [&](auto& error) { std::cout << "Could not allocate Terminal Response! Error: " << error << std::endl; }); + + const iox::mepoo::ChunkHeader * chunkHeader = iox::mepoo::ChunkHeader::fromUserPayload(requestPayload); + spdlog::info("to release chunk in server terminal , head info, chunksize: {}", chunkHeader->chunkSize()); + server_->releaseRequest(request); +} + +} // namespace middleware +} // namespace intercept + + +int test() { + // std::string servicename = "MyService"; + // std::unique_ptr middleware = std::make_unique(servicename); + // AddClientService(servicename); + // WriteOpReqRes writeReqRes(1, "data".data(), 4, 0); + // int ret = middleware->OnRequest(writeReqRes); + // const auto& response = middleware->GetResponse(writeRequest); + // if (response.result >= 0) { + // std::cout << "Write operation successful!" << std::endl; + // } else { + // std::cout << "Write operation failed with error code: " << response.result << std::endl; + // } + return 0; +} diff --git a/intercept/middleware/iceoryx_wrapper.h b/intercept/middleware/iceoryx_wrapper.h new file mode 100644 index 0000000..eabc093 --- /dev/null +++ b/intercept/middleware/iceoryx_wrapper.h @@ -0,0 +1,76 @@ +#pragma once + +#include "req_res_middleware_wrapper.h" + +#include "iceoryx_posh/popo/untyped_server.hpp" +#include "iceoryx_posh/popo/untyped_client.hpp" + +namespace intercept { +namespace filesystem { + class AbstractFileSystem; // Forward declaration +} +} + +namespace intercept { +namespace middleware { + +class IceoryxWrapper : public ReqResMiddlewareWrapper { +public: + explicit IceoryxWrapper(const ServiceMetaInfo& info); + + ~IceoryxWrapper() override; + + virtual void Init() override; + + virtual void InitClient() override; + + virtual void InitServer() override; + + virtual void InitDummyServer() override; + + virtual void StartServer(); + + virtual void StartClient(); + + virtual void StopServer() override; + + virtual void StopClient() override; + + virtual void OnRequest(PosixOpReqRes& reqRes) override; + + virtual void OnResponse() override; + + virtual void Shutdown() override; + + virtual ServiceMetaInfo GetServiceMetaInfo() override {return info_;} + +private: + void HandleOpenRequest(const auto& requestPayload); + void HandleReadRequest(const auto& requestPayload); + void HandleWriteRequest(const auto& requestPayload); + void HandleCloseRequest(const auto& requestPayload); + void HandleLseekRequest(const auto& requestPayload); + void HandleFsyncRequest(const auto& requestPayload); + void HandleStatRequest(const auto& requestPayload); + void HandleFstatRequest(const auto& requestPayload); + void HandleMkdirRequest(const auto& requestPayload); + void HandleOpendirRequest(const auto& requestPayload); + void HandleGetdentsRequest(const auto& requestPayload); + void HandleClosedirRequest(const auto& requestPayload); + void HandleUnlinkRequest(const auto& requestPayload); + void HandleRenameRequest(const auto& requestPayload); + void HandleTruncateRequest(const auto& requestPayload); + void HandleTerminalRequest(const auto& requestPayload); + +private: + std::shared_ptr server_; + + std::shared_ptr client_; + + int64_t requestSequenceId_ = 0; + bool running_ = false; +}; + + +} // namespace middleware +} // namespace intercept diff --git a/intercept/middleware/req_res_middleware_wrapper.cpp b/intercept/middleware/req_res_middleware_wrapper.cpp new file mode 100644 index 0000000..ffbea21 --- /dev/null +++ b/intercept/middleware/req_res_middleware_wrapper.cpp @@ -0,0 +1,49 @@ +#include + +#include "middleware/req_res_middleware_wrapper.h" +#ifndef CLIENT_BUILD +#include "filesystem/curve_filesystem.h" +#include "filesystem/s3fs_filesystem.h" +#include "filesystem/dummy_filesystem.h" +#endif +#include "filesystem/abstract_filesystem.h" + + +namespace intercept { +namespace middleware { +using intercept::common::Configure; +void ReqResMiddlewareWrapper::Init() { + +} + +void ReqResMiddlewareWrapper::InitServer() { + if (info_.serverType == "dummy") { + spdlog::info("dont create fileSystem in ReqResMiddlewareWrapper::InitServer"); + return; + } + if (!fileSystem_) { + #ifndef CLIENT_BUILD + if (Configure::getInstance().getConfig("backendFilesystem") == "s3fs") { + fileSystem_.reset(new intercept::filesystem::S3fsFileSystem); + } else if (Configure::getInstance().getConfig("backendFilesystem") == "curvefs") { + fileSystem_.reset(new intercept::filesystem::CurveFileSystem); + } else if (Configure::getInstance().getConfig("backendFilesystem") == "dummyfs") { + fileSystem_.reset(new intercept::filesystem::DummyFileSystem); + } else { + spdlog::error("dont create fileSystem in ReqResMiddlewareWrapper::InitServer"); + return; + } + fileSystem_->Init(); + spdlog::info("Initserver, filesystem: {}", Configure::getInstance().getConfig("backendFilesystem")); + #endif + } else { + spdlog::info("ReqResMiddlewareWrapper::InitServer, have inited, donot need to init again"); + } +} + +void ReqResMiddlewareWrapper::InitClient() { + +} + +} // namespace middleware +} // namespace intercept \ No newline at end of file diff --git a/intercept/middleware/req_res_middleware_wrapper.h b/intercept/middleware/req_res_middleware_wrapper.h new file mode 100644 index 0000000..99157eb --- /dev/null +++ b/intercept/middleware/req_res_middleware_wrapper.h @@ -0,0 +1,80 @@ +#pragma once +#include + +#include "internal/posix_op_req_res.h" +#include "internal/metainfo.h" + +namespace intercept { +namespace filesystem { + class AbstractFileSystem; // Forward declaration +} +} + +namespace intercept +{ +namespace middleware +{ +using intercept::internal::ServiceMetaInfo; +using intercept::internal::PosixOpReqRes; + +enum class ServiceType { + CLIENT = 0, + SERVER = 1, + DUMMYSERVER = 2, +}; + +class ReqResMiddlewareWrapper { +public: + ReqResMiddlewareWrapper() { + spdlog::info("construct ReqResMiddlewareWrapper"); + } + + ReqResMiddlewareWrapper(ServiceMetaInfo info) : info_(info) { + spdlog::info("construct ReqResMiddlewareWrapper"); + + } + + virtual ~ReqResMiddlewareWrapper() { + spdlog::info("deconstruct ReqResMiddlewareWrapper"); + + } + + virtual void Init(); + + virtual void InitClient(); + + virtual void InitServer(); + + virtual void SetServiceType(ServiceType type) { + servicetype_ = type; + } + + virtual void InitDummyServer() {} + + virtual void StartServer() = 0; + + virtual void StartClient() = 0; + + virtual void StopServer() = 0; + + virtual void StopClient() = 0; + + // 对外request接口 + virtual void OnRequest(PosixOpReqRes& reqRes) = 0; + + // 对外response接口 + virtual void OnResponse() = 0; + + virtual void Shutdown() = 0; + + virtual ServiceMetaInfo GetServiceMetaInfo() = 0; + +protected: + static std::shared_ptr fileSystem_; + ServiceMetaInfo info_; + ServiceType servicetype_; +}; + +} // namespace middleware +} // namespace intercept + diff --git a/intercept/posix/CMakeLists.txt b/intercept/posix/CMakeLists.txt new file mode 100644 index 0000000..eb35a58 --- /dev/null +++ b/intercept/posix/CMakeLists.txt @@ -0,0 +1,13 @@ +# src/posix/CMakeLists.txt + +file(GLOB POSIX_SOURCES *.cpp) +file(GLOB POSIX_HEADERS *.h) + +add_library(intercept_posix_interface_client ${POSIX_SOURCES}) +target_include_directories(intercept_posix_interface_client PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} +) +target_link_libraries(intercept_posix_interface_client PUBLIC + intercept_registry_client +) +target_compile_options(intercept_posix_interface_client PUBLIC -DCLIENT_BUILD -fPIC) diff --git a/intercept/posix/libsyscall_intercept_hook_point.h b/intercept/posix/libsyscall_intercept_hook_point.h new file mode 100644 index 0000000..2fe7d57 --- /dev/null +++ b/intercept/posix/libsyscall_intercept_hook_point.h @@ -0,0 +1,102 @@ +/* + * Copyright 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LIBSYSCALL_INTERCEPT_HOOK_POINT_H +#define LIBSYSCALL_INTERCEPT_HOOK_POINT_H + +/* + * The inteface for using the intercepting library. + * This callback function should be implemented by + * the code using the library. + * + * The syscall_number, and the six args describe the syscall + * currently being intercepted. + * A non-zero return value means libsyscall_intercept + * should execute the original syscall, use its result. A zero return value + * means libsyscall_intercept should not execute the syscall, and + * use the integer stored to *result as the result of the syscall + * to be returned in RAX to libc. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +extern int (*intercept_hook_point)(long syscall_number, + long arg0, long arg1, + long arg2, long arg3, + long arg4, long arg5, + long *result); + +extern void (*intercept_hook_point_clone_child)(void); +extern void (*intercept_hook_point_clone_parent)(long pid); + +/* + * syscall_no_intercept - syscall without interception + * + * Call syscall_no_intercept to make syscalls + * from the interceptor library, once glibc is already patched. + * Don't use the syscall function from glibc, that + * would just result in an infinite recursion. + */ +long syscall_no_intercept(long syscall_number, ...); + +/* + * syscall_error_code - examines a return value from + * syscall_no_intercept, and returns an error code if said + * return value indicates an error. + */ +static inline int +syscall_error_code(long result) +{ + if (result < 0 && result >= -0x1000) + return (int)-result; + + return 0; +} + +/* + * The syscall intercepting library checks for the + * INTERCEPT_HOOK_CMDLINE_FILTER environment variable, with which one can + * control in which processes interception should actually happen. + * If the library is loaded in this process, but syscall interception + * is not allowed, the syscall_hook_in_process_allowed function returns zero, + * otherwise, it returns one. The user of the library can use it to notice + * such situations, where the code is loaded, but no syscall will be hooked. + */ +int syscall_hook_in_process_allowed(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/intercept/posix/posix_helper.h b/intercept/posix/posix_helper.h new file mode 100644 index 0000000..0a06226 --- /dev/null +++ b/intercept/posix/posix_helper.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/* + * Project: curve + * Created Date: Thur May 27 2021 + * Author: xuchaojie + */ + +#include +#include +#include + +#include +#include + +#include "posix_op.h" +#include "syscall_client.h" + +// 仅用于联编 +int help(int argc, char *argv[]) { + return 0; +} diff --git a/intercept/posix/posix_op.cpp b/intercept/posix/posix_op.cpp new file mode 100644 index 0000000..979b04b --- /dev/null +++ b/intercept/posix/posix_op.cpp @@ -0,0 +1,657 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/common.h" +#include "posix_op.h" +#include "middleware/iceoryx_wrapper.h" +#include "registry/client_server_registry.h" + +using intercept::internal::FileType; +struct PosixInfo { + std::string fileName; + FileType fileType; + uint64_t fd; + intercept::internal::DirStream dirinfo; +}; + +// key : 返回给上游的fd, value: 存储文件信息 +std::unordered_map g_fdtofile(10000); +// 以BEGIN_COUNTER为起始值在map中保存,避免fd从0开始与系统内部fd冲突 +constexpr uint32_t BEGIN_COUNTER = 10000; +std::atomic g_fdCounter(BEGIN_COUNTER); + +std::chrono::steady_clock::duration totalDuration = std::chrono::steady_clock::duration::zero(); // 总耗时 +int readnum = 0; + +unsigned long g_processid = -1; +thread_local std::shared_ptr g_wrapper; +thread_local bool g_initflag = false; +std::mutex global_mutex; + +thread_local struct ThreadCleanup { + ThreadCleanup() { + std::cout << "Thread cleanup object created\n"; + } + + ~ThreadCleanup() { + std::cout << "Thread cleanup object destroyed\n"; + } +} cleanup; + +struct syscall_desc table[1000] = { + {0, 0, {argNone, argNone, argNone, argNone, argNone, argNone}}}; + +#define FUNC_NAME(name) PosixOp##name +#define REGISTER_CALL(sysname, funcname, ...) \ + table[SYS_##sysname] = syscall_desc { \ + #sysname, (FUNC_NAME(funcname)), { __VA_ARGS__, } \ + } + + +// ---------------------------init and unint---------------------------------- + + + +int ThreadInit() { + // std::lock_guard lock(global_mutex); + if (g_initflag == true) { + return 0; + } + std::stringstream ss; + auto myid = std::this_thread::get_id(); + ss << myid; + std::string threadid = ss.str(); + pthread_t tid = pthread_self(); + pid_t pid = getpid(); + if (g_processid == -1) { + // 进程级初始化 + g_processid = (unsigned long)pid; + GlobalInit(); + } + spdlog::warn("thread init, processid: {}, threadid: {}, flag id: {}", + (unsigned long) pid, (unsigned long)tid, g_initflag); + // sleep(10); + + intercept::internal::ServiceMetaInfo info; + info.service = SERVICE_FLAG; + info.instance = INTERCEPT_INSTANCE_FLAG; + intercept::registry::ClientServerRegistry registry(ICEORYX, info); + auto dummyserver = registry.CreateDummyServer(); + std::cout << "wait dummy server for client...." << std::endl; + sleep(5); + + info = dummyserver->GetServiceMetaInfo(); + info.service = SERVICE_FLAG; + info.instance = INTERCEPT_INSTANCE_FLAG; + g_wrapper = registry.CreateClient(info); + g_initflag = true; + return 0; +} +int GlobalInit() { + if (intercept::common::Configure::getInstance().loadConfig(intercept::common::CONFIG_FILE)) { + std::cout << "Config file loaded : " << intercept::common::CONFIG_FILE << std::endl; + } else { + std::cout << "Config file not loaded:" << intercept::common::CONFIG_FILE << std::endl; + return 0; + } + intercept::common::InitLog(); + + constexpr char BASE_APP_NAME[] = "iox-intercept-client"; + std::string appNameWithRandom = BASE_APP_NAME + intercept::common::generateRandomSuffix(); + iox::string appname(iox::TruncateToCapacity, appNameWithRandom.c_str(), appNameWithRandom.length()); + spdlog::info("create app name: {}", appNameWithRandom); + iox::runtime::PoshRuntime::initRuntime(appname); + return 0; +} + +void UnInitPosixClient() { +} + +// 初始化函数 +static __attribute__((constructor)) void Init(void) { + printf("Library loaded: PID %d TID: %lu\n", getpid(), (unsigned long)pthread_self()); + //GlobalInit(); +} + +// 退出函数 +static __attribute__((destructor)) void Clean(void) { + // std::cout << "readnum: " << readnum << " , total time : " << totalDuration.count() << " , average time : " << totalDuration.count() / readnum << std::endl; + pthread_t tid = pthread_self(); + pid_t pid = getpid(); + std::cout << "exit and kill, pid:" << (unsigned long)pid + << " threadid:" << (unsigned long) tid << std::endl; + //kill(getpid(), SIGINT); + //sleep(5); +} + +// ---------------------------posix func---------------------------------------------- + +// 判断字符串是否以指定挂载点开头 +bool StartsWithMountPath(const char *str) { + // 指定路径 + const std::string mountpath = "/testdir"; + //"/home/caiyi/shared_memory_code/iceoryx/iceoryx_examples/intercept/testdir"; + size_t prefixLen = mountpath.length(); + return strncmp(str, mountpath.c_str(), prefixLen) == 0; +} + +std::string GetPath(const char* path) { + return ""; +} + +// 获取相对路径 +std::string GetRelativeFilePath(const std::string& fullPath) { + size_t found = fullPath.find_last_of("/\\"); + return fullPath.substr(found+1); +} + +// 判断路径是否有效 +bool IsValidPath(arg_type type, long arg0, long arg1) { + int fd = -1; + switch (type) { + case argFd: + fd = (int)arg0; + if (fd >= BEGIN_COUNTER && + (g_fdtofile.empty() == false && g_fdtofile.count(fd)) > 0) { + return true; + } else { + return false; + } + case argCstr: + if (StartsWithMountPath(reinterpret_cast(arg0))) { + return true; + } else { + // printf("cstr, not right filepath: %s\n", reinterpret_cast(arg0)); + return false; + } + case argAtfd: + if (StartsWithMountPath(reinterpret_cast(arg1)) || + (g_fdtofile.empty() == false && g_fdtofile.count((int)arg0)) > 0) { + return true; + } else { + // printf("atfd, not right filepath: %s\n", reinterpret_cast(arg1)); + return false; + } + case arg_: + return true; + default: + return false; + } +} + +// 判断系统调用是否需要拦截 +bool ShouldInterceptSyscall(const struct syscall_desc *desc, const long *args) { + return IsValidPath(desc->args[0], args[0], args[1]); +} + +const struct syscall_desc *GetSyscallDesc(long syscallNumber, + const long args[6]) { + //char buffer[1024]; + if (syscallNumber < 0 || + static_cast(syscallNumber) >= + sizeof(table) / sizeof(table[0]) || + table[syscallNumber].name == NULL || + ShouldInterceptSyscall(&table[syscallNumber], args) == false) { + return nullptr; + } + //sprintf(buffer, "right number:%ld, name:%s\n", syscallNumber, table[syscallNumber].name); + //printSyscall(buffer); + return table + syscallNumber; +} + +uint32_t GetNextFileDescriptor() { return g_fdCounter.fetch_add(1); } + +void InitSyscall() { + #ifdef __aarch64__ + //REGISTER_CALL(access, Access, argCstr, argMode); + REGISTER_CALL(faccessat, Faccessat, argAtfd, argCstr, argMode); + //REGISTER_CALL(open, Open, argCstr, argOpenFlags, argMode); + REGISTER_CALL(close, Close, argFd); + REGISTER_CALL(openat, Openat, argAtfd, argCstr, argOpenFlags, argMode); + //REGISTER_CALL(creat, Creat, argCstr, argMode); + REGISTER_CALL(write, Write, argFd); + REGISTER_CALL(read, Read, argFd); + REGISTER_CALL(fsync, Fsync, argFd); + REGISTER_CALL(lseek, Lseek, argFd); + //REGISTER_CALL(stat, Stat, argCstr); + // for fstatat + REGISTER_CALL(newfstatat, Newfstatat, argAtfd, argCstr); + REGISTER_CALL(fstat, Fstat, argFd); + REGISTER_CALL(statx, Statx, argAtfd, argCstr); + //REGISTER_CALL(lstat, Lstat, argCstr); + //REGISTER_CALL(mkdir, MkDir, argCstr, argMode); + REGISTER_CALL(mkdirat, MkDirat, argAtfd, argCstr, argMode); + REGISTER_CALL(getdents64, Getdents64, argFd, argCstr, arg_); + //REGISTER_CALL(unlink, Unlink, argCstr); + REGISTER_CALL(unlinkat, Unlinkat, argAtfd, argCstr, argMode); + //REGISTER_CALL(rmdir, Rmdir, argCstr); + REGISTER_CALL(chdir, Chdir, argCstr); + REGISTER_CALL(utimensat, Utimensat, argAtfd, argCstr); + REGISTER_CALL(statfs, Statfs, argCstr); + REGISTER_CALL(fstatfs, Fstatfs, argFd); + + REGISTER_CALL(truncate, Truncate, argCstr); + REGISTER_CALL(ftruncate, Ftruncate, argFd); + REGISTER_CALL(renameat, Renameat, argAtfd, argCstr); + #else + REGISTER_CALL(access, Access, argCstr, argMode); + REGISTER_CALL(faccessat, Faccessat, argAtfd, argCstr, argMode); + REGISTER_CALL(open, Open, argCstr, argOpenFlags, argMode); + REGISTER_CALL(close, Close, argFd); + REGISTER_CALL(openat, Openat, argAtfd, argCstr, argOpenFlags, argMode); + REGISTER_CALL(creat, Creat, argCstr, argMode); + REGISTER_CALL(write, Write, argFd); + REGISTER_CALL(read, Read, argFd); + REGISTER_CALL(fsync, Fsync, argFd); + REGISTER_CALL(lseek, Lseek, argFd); + REGISTER_CALL(stat, Stat, argCstr); + // for fstatat + REGISTER_CALL(newfstatat, Newfstatat, argAtfd, argCstr); + REGISTER_CALL(fstat, Fstat, argFd); + REGISTER_CALL(lstat, Lstat, argCstr); + REGISTER_CALL(mkdir, MkDir, argCstr, argMode); + REGISTER_CALL(getdents64, Getdents64, argFd, argCstr, arg_); + REGISTER_CALL(unlink, Unlink, argCstr); + REGISTER_CALL(unlinkat, Unlinkat, argAtfd, argCstr, argMode); + REGISTER_CALL(rmdir, Rmdir, argCstr); + REGISTER_CALL(chdir, Chdir, argCstr); + REGISTER_CALL(utimensat, Utimensat, argAtfd, argCstr); + REGISTER_CALL(statfs, Statfs, argCstr); + REGISTER_CALL(fstatfs, Fstatfs, argFd); + + REGISTER_CALL(truncate, Truncate, argCstr); + REGISTER_CALL(ftruncate, Ftruncate, argFd); + REGISTER_CALL(rename, Rename, argCstr, argCstr); + #endif +} + +int PosixOpAccess(const long *args, long *result) { + return 0; +} + +int PosixOpFaccessat(const long *args, long *result) { + return PosixOpAccess(args + 1, result); +} + +int PosixOpOpen(const long *args, long *result) { + ThreadInit(); + const char* path = (const char*)args[0]; + int flags = args[1]; + mode_t mode = args[2]; + + if (flags & O_DIRECTORY) { + intercept::internal::OpendirOpReqRes req(path); + g_wrapper->OnRequest(req); + const auto& openRes = static_cast (req.GetResponse()); + // 向上游返回的fd + *result = openRes.dirStream.fh + BEGIN_COUNTER; + // 记录打开的fd + PosixInfo info; + info.fd = *result; + info.dirinfo = openRes.dirStream; + info.fileType = FileType::DIR; + g_fdtofile[*result] = info; + std::cout << "the opendir result fd is: " << *result << std::endl; + } else { + intercept::internal::OpenOpReqRes req(path, flags, mode); + g_wrapper->OnRequest(req); + const auto& openRes = static_cast (req.GetResponse()); + // 向上游返回的fd + *result = openRes.fd + BEGIN_COUNTER; + // 记录打开的fd + PosixInfo info; + info.fd = *result; + info.fileType = FileType::FILE; + info.fileName = path; + g_fdtofile[*result] = info; + spdlog::info("the open result fd: {}, path: {}", *result, path); + } + return 0; +} + +int PosixOpOpenat(const long *args, long *result) { + return PosixOpOpen(args + 1, result); // args[0] is dir fd, jump +} + +int PosixOpCreat(const long *args, long *result) { + return 0; +} + +int PosixOpRead(const long *args, long *result) { + ThreadInit(); + int fd = args[0] - BEGIN_COUNTER; + char* buf = (char*)args[1]; + int count = args[2]; + const auto& info = g_fdtofile[fd]; + std::string timeinfo = "client read, count: " + std::to_string(count) + " filename: " + info.fileName; + intercept::common::Timer timer(timeinfo); + + intercept::internal::ReadOpReqRes readReq(fd, buf, count); + //intercept::common::Timer timer("client OnRequest"); + g_wrapper->OnRequest(readReq); + + const auto& readRes = static_cast (readReq.GetResponse()); + *result = readRes.length; + spdlog::debug("read fd: {}, length: {}", fd, readRes.length); + return 0; +} + +int PosixOpWrite(const long *args, long *result) { + spdlog::debug("get write request..."); + ThreadInit(); + int fd = args[0] - BEGIN_COUNTER; + char* writebuf = (char*)args[1]; + int count = args[2]; + std::string timeinfo = "client write, count: " + std::to_string(count); + intercept::common::Timer timer(timeinfo); + intercept::internal::WriteOpReqRes writeReq(fd, writebuf, count); + g_wrapper->OnRequest(writeReq); + const auto& writeRes = static_cast (writeReq.GetResponse()); + *result = writeRes.length; + spdlog::debug("write fd: {}, length: {}", fd, writeRes.length); + return 0; +} +int PosixOpFsync(const long *args, long *result) { + ThreadInit(); + int fd = args[0] - BEGIN_COUNTER; + spdlog::info("begin fsync, fd: {}", fd); + intercept::internal::FsyncOpReqRes fsyncReq(fd); + g_wrapper->OnRequest(fsyncReq); + const auto& fsyncRes = static_cast (fsyncReq.GetResponse()); + *result = fsyncRes.ret; + spdlog::info("the fysnc result is: {}", *result); + return 0; +} + +int PosixOpLseek(const long *args, long *result) { + ThreadInit(); + int fd = args[0] - BEGIN_COUNTER; + long offset = args[1]; + int whence = args[2]; + intercept::internal::LseekOpReqRes lseekReq(fd, offset, whence); + g_wrapper->OnRequest(lseekReq); + const auto& lseekRes = static_cast (lseekReq.GetResponse()); + *result = lseekRes.ret; + // std::cout << "the lseek result is: " << *result << " , the offset: "<< offset << std::endl; + spdlog::debug("lseek, fd: {}, offset: {}, whence: {}, result: {}", fd, offset, whence, *result); + return 0; +} + +int PosixOpStat(const long *args, long *result) { + ThreadInit(); + spdlog::debug("it is opstat..."); + const char* filename = (const char*) args[0]; + struct stat* statbuf = (struct stat*) args[1]; + intercept::internal::StatOpReqRes statReq(filename, statbuf); + g_wrapper->OnRequest(statReq); + const auto& statRes = static_cast (statReq.GetResponse()); + // 向上游返回的fd + *result = statRes.ret; + spdlog::debug("the stat result fd: {}", *result); + return 0; +} +int PosixOpNewfstatat(const long *args, long *result) { + std::cout << "newfstatat" << std::endl; + // TODO: 以args[0]为起点,找到args[1]路径 + int ret = 0; + if (strlen((char*)args[1]) == 0) { + // 空目录 + long newargs[4]; + newargs[0] = args[0]; + newargs[1] = args[2]; + return PosixOpFstat(newargs, result); + } + return PosixOpStat(args + 1, result); +} + +int PosixOpLstat(const long *args, long *result) { + std::cout << "call PosixOpLstat" << std::endl; + return PosixOpStat(args, result); +} + +int PosixOpFstat(const long *args, long *result) { + ThreadInit(); + spdlog::debug("it is opfstat..."); + int fd = args[0] - BEGIN_COUNTER; + struct stat* statbuf = (struct stat*) args[1]; + intercept::internal::FstatOpReqRes statReq(fd, statbuf); + g_wrapper->OnRequest(statReq); + const auto& statRes = static_cast (statReq.GetResponse()); + // 向上游返回的fd + *result = statRes.ret; + spdlog::debug("the fstat result fd: {}, the stat ino: {}, size: {}", + fd, statbuf->st_ino, statbuf->st_size); + return 0; +} + +int PosixOpFstat64(const long *args, long *result) { + std::cout << "it is opfstat64" << std::endl; + return 0; +} + +int PosixOpStatx(const long *args, long *result) { + ThreadInit(); + std::cout << "it is opstatx" << std::endl; + const char* filename = (const char*) args[1]; + struct statx* fileStat = (struct statx*) args[4]; + struct stat tmpStat; + intercept::internal::StatOpReqRes statReq(filename, &tmpStat); + g_wrapper->OnRequest(statReq); + const auto& statRes = static_cast (statReq.GetResponse()); + if (statRes.ret != 0 ) { + std::cout << "get stat failed.." << std::endl; + } + + *result = statRes.ret; + // inode number + fileStat->stx_ino = (uint64_t)tmpStat.st_ino; + + // total size, in bytes + fileStat->stx_size = (uint64_t)tmpStat.st_size; + + // protection + fileStat->stx_mode = (uint32_t)tmpStat.st_mode; + + // number of hard links + fileStat->stx_nlink = (uint32_t)tmpStat.st_nlink; + + // user ID of owner + fileStat->stx_uid = (uint32_t)tmpStat.st_uid; + + // group ID of owner + fileStat->stx_gid = (uint32_t)tmpStat.st_gid; + + // last access time + fileStat->stx_atime.tv_sec = tmpStat.st_atim.tv_sec; + fileStat->stx_atime.tv_nsec = tmpStat.st_atim.tv_nsec; + + // last modification time + fileStat->stx_mtime.tv_sec = tmpStat.st_mtim.tv_sec; + fileStat->stx_mtime.tv_nsec = tmpStat.st_mtim.tv_nsec; + + // last status change time + fileStat->stx_ctime.tv_sec = tmpStat.st_ctim.tv_sec; + fileStat->stx_ctime.tv_nsec = tmpStat.st_ctim.tv_nsec; + + // 示意性地为stx_attributes设置一个默认值,实际上这需要更具体的场景考虑 + fileStat->stx_attributes = 0; // 假设没有额外的属性 + + // stx_attributes_mask通常和stx_attributes一起使用,表示希望查询或设置哪些属性 + fileStat->stx_attributes_mask = 0; // 示意性地设置,可能需要根据场景具体调整 + return 0; +} + +int PosixOpClose(const long *args, long *result) { + if (g_fdtofile.find((int)args[0]) == g_fdtofile.end()) { + std::cout << "fd not found: " << args[0] << std::endl; + } + const auto& info = g_fdtofile[(int)args[0]]; + if (info.fileType == FileType::FILE) { + int fd = args[0] - BEGIN_COUNTER; + intercept::internal::CloseOpReqRes req(fd); + spdlog::info("begin close, fd: {}", fd); + g_wrapper->OnRequest(req); + const auto& closeRes = static_cast (req.GetResponse()); + // 向上游返回的fd + *result = closeRes.ret; + spdlog::info("the close result, fd: {}", fd); + } else if (info.fileType == FileType::DIR) { + int fd = args[0] - BEGIN_COUNTER; + intercept::internal::ClosedirOpReqRes req(info.dirinfo); + g_wrapper->OnRequest(req); + const auto& closeRes = static_cast (req.GetResponse()); + // 向上游返回的fd + *result = closeRes.ret; + std::cout << "the closedir result fd is: " << fd << std::endl; + } else { + std::cout << "unknown file type for close" << std::endl; + } + g_fdtofile.erase((int)args[0]); + return 0; +} + +int PosixOpMkDir(const long *args, long *result) { + ThreadInit(); + const char* path = (const char*) args[0]; + mode_t mode = args[1]; + intercept::internal::MkdirOpReqRes req(path, mode); + g_wrapper->OnRequest(req); + const auto& mkdirRes = static_cast (req.GetResponse()); + // 向上游返回的fd + *result = mkdirRes.ret; + std::cout << "the mkdir result fd is: " << *result << std::endl; + return 0; +} + +int PosixOpMkDirat(const long *args, long *result) { + // 直接按照绝对路径处理 + return PosixOpMkDir(args + 1, result); +} + +int PosixOpOpenDir(const long *args, long *result) { + std::cout << "open dir....." << std::endl; + return 0; +} + +int PosixOpGetdents64(const long *args, long *result) { + int fd = args[0] - BEGIN_COUNTER; + char* data = (char*)args[1]; + size_t maxread = args[2]; + if (g_fdtofile.find(args[0]) == g_fdtofile.end()) { + std::cout << "fd not found" << std::endl; + *result = 0; + return 0; + } + std::cout << "getdents request, fd: " << fd << " maxread: " << maxread << std::endl; + PosixInfo& posixinfo = g_fdtofile[args[0]]; + intercept::internal::GetdentsOpReqRes req(posixinfo.dirinfo, data, maxread); + g_wrapper->OnRequest(req); + const auto& getdentsRes = static_cast (req.GetResponse()); + posixinfo.dirinfo.offset = getdentsRes.dirinfo.offset; + *result = getdentsRes.realbytes; + std::cout << "the getdents result bytes:" << getdentsRes.realbytes << ", offset is: " << getdentsRes.dirinfo.offset << std::endl; + return 0; +} + +int PosixOpRmdir(const long *args, long *result) { + std::cout << "rmdir, call thePosixOpUnlink " << std::endl; + PosixOpUnlink(args, result); + return 0; +} + +int PosixOpChdir(const long *args, long *result) { + return 0; +} + +int PosixOpUnlink(const long *args, long *result) { + const char* path = (const char*) args[0]; + intercept::internal::UnlinkOpReqRes req(path); + g_wrapper->OnRequest(req); + const auto& unlinkRes = static_cast (req.GetResponse()); + // 向上游返回的fd + *result = unlinkRes.ret; + std::cout << "the unlink path: " << path << " ,result fd is: " << *result << std::endl; + return 0; +} + +int PosixOpUnlinkat(const long *args, long *result) { + const char *filename = (const char *)args[1]; + int flags = args[2]; + if (flags & AT_REMOVEDIR) { + // 删除目录 + std::cout << "unlinkat remove dir..." << std::endl; + PosixOpRmdir(args + 1, result); + return 0; + } + int ret = 0; + // 暂不支持从指定位置开始删除 + ret = PosixOpUnlink(args + 1, result); + std::cout << "unlinkat... ret: " << ret << std::endl; + return ret; +} + +int PosixOpUtimensat(const long* args, long *result) { + int dirfd = args[0]; + return 0; +} + +int PosixOpExitgroup(const long* args, long *result) { + return 0; +} + +int PosixOpStatfs(const long* args, long *result) { + return 0; +} + +int PosixOpFstatfs(const long* args, long *result) { + return 0; +} + +int PosixOpTruncate(const long* args, long *result) { + const char* path = (const char*) args[0]; + off_t length = args[1]; + intercept::internal::TruncateOpReqRes req(path, length); + g_wrapper->OnRequest(req); + const auto& truncateRes = static_cast (req.GetResponse()); + // 向上游返回的fd + *result = truncateRes.ret; + std::cout << "the truncate path: " << path << " ,result fd is: " << *result << std::endl; + return 0; +} + +int PosixOpFtruncate(const long* args, long *result) { + return 0; +} + +int PosixOpRename(const long *args, long *result) { + return 0; +} + +int PosixOpRenameat(const long *args, long *result) { + // 假设都从根目录开始 + const char *oldpath = (const char *)args[1]; + const char* newpath = (const char*)args[3]; + intercept::internal::RenameOpReqRes req(oldpath, newpath); + g_wrapper->OnRequest(req); + const auto& renameRes = static_cast (req.GetResponse()); + // 向上游返回的fd + *result = renameRes.ret; + std::cout << "the rename path: " << oldpath << " ,result fd is: " << *result << std::endl; + return 0; +} + + diff --git a/intercept/posix/posix_op.h b/intercept/posix/posix_op.h new file mode 100644 index 0000000..cba3267 --- /dev/null +++ b/intercept/posix/posix_op.h @@ -0,0 +1,493 @@ + +#ifndef CURVEFS_SRC_CLIENT_CURVE_POSIX_OP_H_ +#define CURVEFS_SRC_CLIENT_CURVE_POSIX_OP_H_ + +#include +#include + +// #include "curvefs/src/client/filesystem/meta.h" +// using ::curvefs::client::filesystem::PosixFile; + +// extern std::unordered_map g_fdtofile; + +typedef int (*syscallFunction_t)(const long *args, long *result); + +enum arg_type { + argNone, + argFd, + argAtfd, + argCstr, + argOpenFlags, + argMode, + arg_ /* no special formatting implemented yet, print as hex number */ +}; + +struct syscall_desc { + const char *name; + syscallFunction_t syscallFunction; + enum arg_type args[6]; +}; + +extern struct syscall_desc table[1000]; + + +bool ShouldInterceptSyscall(const struct syscall_desc* desc, const long* args); + +void InitSyscall(); + +const struct syscall_desc* GetSyscallDesc(long syscallNumber, const long args[6]); + +bool StartsWithMountPath(const char* str); + +int GlobalInit(); + +void UnInitPosixClient(); + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * The access() function is used to check the permissions of a file or directory. + * + * @param args[0] const char* path The path name of the file or directory to be checked. + * @param args[1] int: mode The mode specifies the desired permissions to be verified, and can be a combination of the following constants using bitwise OR: + * - R_OK: Check if the file or directory is readable. + * - W_OK: Check if the file or directory is writable. + * - X_OK: Check if the file or directory is executable. + * - F_OK: Check if the file or directory exists. + * @return If the file or directory has the specified permissions (or exists), it returns 0. Otherwise, it returns -1 (with an errno error code set). + */ +int PosixOpAccess(const long *args, long *result); + +/** + * The faccessat() function is used to check the permissions of a file or directory relative to a specified directory file descriptor. + * + * @param args[0] int: dirfd The file descriptor of the directory from which the path is relative. + * @param args[1] const char* pathname The relative path name of the file or directory to be checked. + * @param args[2] int The mode specifies the desired permissions to be verified, and can be a combination of the following constants using bitwise OR: + * - R_OK: Check if the file or directory is readable. + * - W_OK: Check if the file or directory is writable. + * - X_OK: Check if the file or directory is executable. + * - F_OK: Check if the file or directory exists. + * @param args[3] int Flags for controlling how the function operates, such as AT_SYMLINK_NOFOLLOW to not follow symbolic links. + * @return If the file or directory has the specified permissions (or exists), it returns 0. Otherwise, it returns -1 (with an errno error code set). + */ +int PosixOpFaccessat(const long *args, long *result); + +/** + * Open a file + * + * Opens the file specified by 'path' with the given 'flags'. + * The 'flags' parameter provides information about the access mode + * (read, write, read-write) and other options for opening the file. + * + * args[0]: path The path of the file to be opened + * args[1]: flags The flags controlling the file open operation + * args[2]: mode The mode for accessing file, only be used for creating new file + * result: The file descriptor on success, or -1 on failure with errno set + */ +int PosixOpOpen(const long *args, long *result); + +int PosixOpOpenat(const long *args, long *result); + +/** + * Creates a new file or truncates an existing file. + * + * args[0] pathname The path to the file to be created. + * args[1] mode The permissions to be set for the newly created file. + * + * result: On success, the file descriptor for the newly created file is returned. + * On error, -1 is returned, and errno is set appropriately. + */ +int PosixOpCreat(const long *args, long *result); + + +/** + * Read data from a file + * + * Reads up to 'count' bytes from the file associated with the file + * descriptor 'fd' into the buffer pointed to by 'buf', + * The actual number of bytes read is returned. + * + * args[0]: int fd: The file descriptor of the file to read from + * args[1]: void* buf: The buffer to store the read data + * args[2]: size_t count: The maximum number of bytes to read + * result: The number of bytes read on success, or -1 on failure with errno set + */ +int PosixOpRead(const long *args, long *result); + + +/** + * Read data from a file + * + * Reads up to 'count' bytes from the file associated with the file + * descriptor 'fd' into the buffer pointed to by 'buf', starting at + * the specified 'offset'. The actual number of bytes read is returned. + * + * args[0] int fd: The file descriptor of the file to read from + * args[1] void* buf: The buffer to store the read data + * args[2] size_t count: The maximum number of bytes to read + * args[3] off_t offset: The offset within the file to start reading from + * result: The number of bytes read on success, or -1 on failure with errno set + */ +int PosixOpPread(const long *args, long *result); + + +/** + * Write data to a file + * + * Writes up to 'count' bytes from the buffer pointed to by 'buf' + * to the file associated with the file descriptor 'fd'. + * The actual number of bytes written is returned. + * + * args[0] int fd: The file descriptor of the file to write to + * args[1] const void* buf: The buffer containing the data to be written + * args[2] size_t count: The number of bytes to write + * result: The number of bytes written on success, or -1 on failure with errno set + */ +int PosixOpWrite(const long *args, long *result); + +/** + * Write data to a file + * + * Writes up to 'count' bytes from the buffer pointed to by 'buf' + * to the file associated with the file descriptor 'fd', starting at + * the specified 'offset'. The actual number of bytes written is returned. + * + * args[0] int fd: The file descriptor of the file to write to + * args[1] const void* buf: The buffer containing the data to be written + * args[2] size_t count: The number of bytes to write + * args[3] off_t offset: The offset within the file to start writing to + * result: The number of bytes written on success, or -1 on failure with errno set + */ +int PosixOpPwrite(const long *args, long *result); + + +/** + * Sets the current read/write position of a file descriptor. + * + * args[0] int fd: The file descriptor representing the file. + * args[1] off_t offset: The offset relative to the 'whence' position. + * args[2] int whence: The reference position for calculating the offset: + * - SEEK_SET: Calculates from the beginning of the file. + * - SEEK_CUR: Calculates from the current position. + * - SEEK_END: Calculates from the end of the file. + * + * result The new offset of the file, or -1 if an error occurs. + */ +int PosixOpLseek(const long *args, long *result); + +/** + * Close a file + * + * args[0] int fd: The file descriptor of the file to close + * result: 0 on success, or -1 on failure with errno set + */ +int PosixOpClose(const long *args, long *result); + +/** + * Create a directory. + * + * args[0] const char* name: Name of the directory to create + * args[1] mode_t mode: Mode with which to create the new directory + * result: 0 on success, -1 on failure + */ +int PosixOpMkDir(const long *args, long *result); + +/** + * mkdirat - create a new directory relative to a directory file descriptor + * @dirfd: the file descriptor of the base directory + * @pathname: the pathname of the new directory to be created + * @mode: the permissions to be set for the new directory + * + * Returns: 0 on success, or -1 on failure + */ +int PosixOpMkDirat(const long *args, long *result); + +/** + * Open a directory + * + * @args[0] const char* name: dirname The path to the directory you want to open. + * + * @result: If successful, returns a pointer to a DIR structure that can be + * used for subsequent directory operations. If there's an error, + * it returns NULL, and you can use the errno variable to check the + * specific error. + */ +int PosixOpOpenDir(const long *args, long *result); + +/** + * Read directory entries from a directory file descriptor. + * + * @args[0]: fd File descriptor of the directory to read. + * @args[1]: dirp Pointer to a buffer where the directory entries will be stored. + * @args[2]: count The size of the buffer `dirp` in bytes. + * + * @result: realbytes, On success, returns the number of bytes read into the buffer `dirp`. + * On error, returns -1 and sets the appropriate errno. + */ +//ssize_t PosixOpGetdents64(int fd, struct linux_dirent64 *dirp, size_t count); +int PosixOpGetdents64(const long *args, long *result); + +/** + * Deletes a directory, which must be empty. + * + * + * args[0] const char* name: Name of the directory to remove + * result: 0 on success, -1 on failure + */ +int PosixOpRmdir(const long *args, long *result); + +/** + A function to change the current working directory of the calling process. + @param args - A pointer to a null-terminated string specifying the path to the new working directory + @param result - A pointer to an integer where the result of the operation will be stored. + On successful completion, 0 will be returned. + In case of failure, a non-zero value is returned. + @return - On successful completion, the function should return 0. + If the function encounters an error, it will return -1 and set errno accordingly. +*/ +int PosixOpChdir(const long *args, long *result); + +/** + * Rename a file + * + * Renames the file specified by 'oldpath' to 'newpath'. + * If 'newpath' already exists, it should be replaced atomically. + * If the target's inode's lookup count is non-zero, the file system + * is expected to postpone any removal of the inode until the lookup + * count reaches zero.s + * + * args[0] const char* oldpath: The path of the file to be renamed + * args[1] const char* newpath: The new path of the file + * result: 0 on success, or -1 on failure with errno set + */ +int PosixOpRename(const long *args, long *result); + +/* + * Renameat renames a file, moving it between directories if required. + * + * args[0] int olddirfd: The file descriptor of the directory containing the file to be renamed + * args[1] const char* oldpath: The path of the file to be renamed + * args[2] int newdirfd: The file descriptor of the directory containing the new path of the file + * args[3] const char* newpath: The new path of the file + * result: 0 on success, or -1 on failure with errno set + * +*/ +int PosixOpRenameat(const long *args, long *result); + + +/** + * Get pathname attributes. + * + * args[0] const char* pathname: The path name + * args[1] struct stat* attr: Pointer to struct stat to store the file attributes + + * result: 0 on success, -1 on failure + */ +int PosixOpStat(const long *args, long *result); + +/** + * Get file attributes. + * + * args[0] int fd: file descriptor + * args[1] struct stat* attr: Pointer to struct stat to store the file attributes + + * result: 0 on success, -1 on failure + */ +int PosixOpFstat(const long *args, long *result); + +/** + * Get file status relative to a directory file descriptor + * args[0] int dirfd + * args[1] pathname + * args[2] struct stat* buf + * args[3] flags :can either be 0, or include one or more of the following flags ORed: + * AT_EMPTY_PATH AT_NO_AUTOMOUNT AT_SYMLINK_NOFOLLOW +*/ +int PosixOpNewfstatat(const long *args, long *result); + +/** + * Get file status information for a symbolic link or file. + * + * args[0] const char* pathname The path to the symbolic link or file. + * args[1] struct stat* statbuf A pointer to a struct stat object where the file status + * information will be stored. + * + * result: On success, 0 is returned. On error, -1 is returned, and errno is + * set appropriately. If the symbolic link is encountered and the + * 'pathname' argument refers to a symbolic link, then the 'statbuf' + * parameter will contain information about the link itself rather + * than the file it refers to. + */ +int PosixOpLstat(const long *args, long *result); + +/* + Obtain file status information. + + Parameters: + - args[0] dirfd: A file descriptor referring to the directory in which the file resides. + Use AT_FDCWD to refer to the current working directory. + - args[1] pathname: The path to the file whose status information is to be retrieved. + - args[2] flags: Flags controlling the behavior of the call. + - args[3] mask: Mask specifying which fields in the returned 'statx' structure should be populated. + - args[4] statxbuf: Pointer to the 'statx' structure where the retrieved status information is stored. + + Return Value: + - On success, returns 0. The 'statxbuf' structure contains the requested file status information. + - On failure, returns -1 and sets errno to indicate the error. +*/ +int PosixOpStatx(const long *args, long *result); + +/** + * Creates a symbolic link. + * + * args[0] const char* target: The target file or directory that the symbolic link should point to. + * args[1] const cahr* linkpath: The path and name of the symbolic link to be created. + * + * result: On success, 0 is returned. On error, -1 is returned, and errno is + * set appropriately. + */ +int PosixOpSymlink(const long *args, long *result); + + +/** + * Create a hard link + * + * Creates a hard link between the file specified by 'oldpath' + * and the 'newpath'. + * + * args[0] const char* oldpath: The path of the existing file + * args[1] const char* newpath: The path of the new link to be created + * result: 0 on success, or -1 on failure with errno set + */ +void PosixOpLink(const long *args, long *result); + +/** + * Deletes a file by removing its directory entry. + * + * args[0] const char* pathname: The path to the file to be deleted. + * + * result: On success, 0 is returned. On error, -1 is returned, and errno is + * set appropriately. + */ +int PosixOpUnlink(const long *args, long *result); + +/* + * Deletes a specified file or directory at a given path + * + * args[0] dirfd: A file descriptor representing the directory in which to perform the unlinkat operation. + * Typically, you can use AT_FDCWD to indicate the current working directory. + * This parameter specifies the base directory for the operation. + * args[1] pathname: The path to the file to be removed. It can be either a relative or absolute path, + * depending on the setting of dirfd. + * args[2] flags: An integer value used to control the behavior of the unlinkat operation. + * You can use flags to influence the operation. Common flags include 0 (default behavior) + * and AT_REMOVEDIR (to remove a directory instead of a file). + + * result: On success, returns 0, indicating the successful removal of the file or directory. + * On failure, returns -1 and sets the global variable errno to indicate the type of error. + */ +int PosixOpUnlinkat(const long *args, long *result); + + +/** + * Synchronize the file data and metadata to disk. + * + * arg[0] int fd The file descriptor associated with the file. + * + * result: On success, the function should return 0. On error, it should + * return a negative value, + */ +int PosixOpFsync(const long* args, long *result); + +/* + * int utimensat(int dirfd, const char *pathname, const struct timespec *times, int flags); + * + * args[0] dirfd:The file descriptor of the directory containing the file or directory to be modified. + * If dirfd is AT_FDCWD, then the current working directory is used. + * + * args[1] pathname: The path to the file or directory to be modified. + * + * args[2] times: A pointer to a structure containing the new access and modification times for the file or directory. + * If times is NULL, then the current time is used for both times. + * + * args[3] flags: A bitwise OR of flags that modify the behavior of the call. + * See the `man utimensat` page for a list of supported flags. + * + * result: 0 on success; -1 on error, with errno set to the error number. + */ +int PosixOpUtimensat(const long* args, long *result); + + +/** + * Terminate all threads in a process and exit. + * + * This system call terminates all threads in the calling process and + * causes the process to exit. The exit status of the process is + * specified by the parameter "status". + * + * args[0] status The exit status of the process. + */ +int PosixOpExitgroup(const long* args, long *result); + + +/** + * statfs() - Get filesystem statistics + * + * @param args[0] path The path to the filesystem to query. + * @param args[1] buf A pointer to a statfs structure to store the results. + * + * @return 0 on success, or a negative error code on failure. + * + */ +int PosixOpStatfs(const long* args, long *result); + +/** + * fstatfs() - Get filesystem statistics for a file descriptor + * + * @param args[0] fd The file descriptor of the filesystem to query. + * @param args[1] buf A pointer to a statfs structure to store the results. + * + * @return 0 on success, or a negative error code on failure. + */ +int PosixOpFstatfs(const long* args, long *result); + +/** + * @brief Truncate a file to the specified length. + * + * This function truncates the file specified by the given path to the specified + * length. If the file is larger than the specified length, it is truncated to + * the specified size; if it is smaller, it is extended and filled with zeros. + * + * @param args[0] path: The path to the file to be truncated. + * @param args[1] length:The desired length to which the file should be truncated. + * + * @return On success, returns 0. On failure, returns -1, and sets errno to indicate + * the error type. + */ +int PosixOpTruncate(const long* args, long *result); + +/** + * @brief Truncate a file opened with the specified file descriptor to the specified length. + * + * This function truncates the file associated with the given file descriptor to the + * specified length. If the file is larger than the specified length, it is truncated; + * if it is smaller, it is extended and filled with zeros. + * + * @param args[0] :fd The file descriptor of the file to be truncated. + * @param args[1]: length The desired length to which the file should be truncated. + * + * @return On success, returns 0. On failure, returns -1, and sets errno to indicate + * the error type. + */ +int PosixOpFtruncate(const long* args, long *result); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // CURVEFS_SRC_CLIENT_CURVE_POSIX_OP_H_ + diff --git a/intercept/posix/syscall_client.h b/intercept/posix/syscall_client.h new file mode 100644 index 0000000..3cd5cf8 --- /dev/null +++ b/intercept/posix/syscall_client.h @@ -0,0 +1,44 @@ +#ifndef CURVEFS_SRC_CLIENT_SYSCALL_CLIENT_ +#define CURVEFS_SRC_CLIENT_SYSCALL_CLIENT_ + +#include +#include +#include +#include + +#include "posix/libsyscall_intercept_hook_point.h" +//#include "syscall_interception.h" +#include "posix/posix_op.h" +#include +#include +#include + +// 拦截函数 + +static int hook(long syscallNumber, + long arg0, long arg1, + long arg2, long arg3, + long arg4, long arg5, + long* result) { + + long args[6] = {arg0, arg1, arg2, arg3, arg4, arg5}; + const struct syscall_desc* desc = GetSyscallDesc(syscallNumber, args); + if (desc != nullptr) { + int ret = desc->syscallFunction(args, result); + //return 0; // 接管 + return ret; + } + + return 1; // 如果不需要拦截,返回1 +} + +// 初始化函数 +static __attribute__((constructor)) void start(void) { + InitSyscall(); + intercept_hook_point = &hook; +} + +#endif + + + diff --git a/intercept/registry/CMakeLists.txt b/intercept/registry/CMakeLists.txt new file mode 100644 index 0000000..34d412f --- /dev/null +++ b/intercept/registry/CMakeLists.txt @@ -0,0 +1,40 @@ +# src/registry/CMakeLists.txt + +find_library(ICEORYX_POSH_LIB NAMES iceoryx_posh PATHS ../../thirdparties/iceoryx/lib) +find_library(ICEORYX_HOOFS_LIB iceoryx_hoofs PATHS ../thirdparties/iceoryx/lib) +find_library(ICEORYX_PLATFORM_LIB iceoryx_platform PATHS ../thirdparties/iceoryx/lib) + +file(GLOB REGISTRY_SOURCES *.cpp) +file(GLOB REGISTRY_HEADERS *.h) + +add_library(intercept_registry ${REGISTRY_SOURCES}) +target_include_directories(intercept_registry PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include +) +target_link_libraries(intercept_registry PUBLIC + intercept_middleware + intercept_discovery + ${ICEORYX_HOOFS_LIB} + ${ICEORYX_PLATFORM_LIB} + ${ICEORYX_POSH_LIB} +) + + +file(GLOB CLIENT_REGISTRY_SOURCES *.cpp) +file(GLOB CLIENT_REGISTRY_HEADERS *.h) + +add_library(intercept_registry_client ${CLIENT_REGISTRY_SOURCES}) +target_include_directories(intercept_registry_client PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparties/iceoryx/include +) +target_link_libraries(intercept_registry_client PUBLIC + intercept_middleware_client + intercept_discovery_client + ${ICEORYX_POSH_LIB} + ${ICEORYX_HOOFS_LIB} + ${ICEORYX_PLATFORM_LIB} + -lrt +) +target_compile_options(intercept_registry_client PUBLIC -DCLIENT_BUILD ) \ No newline at end of file diff --git a/intercept/registry/client_server_registry.cpp b/intercept/registry/client_server_registry.cpp new file mode 100644 index 0000000..7a87bf0 --- /dev/null +++ b/intercept/registry/client_server_registry.cpp @@ -0,0 +1,169 @@ +#include +#include +#include + +#include "middleware/iceoryx_wrapper.h" +#include "client_server_registry.h" + +namespace intercept { +namespace registry { + +using intercept::discovery::IceoryxDiscovery; +using intercept::middleware::IceoryxWrapper; +std::string generateRandomString(int length) { + std::string result; + const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + srand(time(0)); // 初始化随机数生成器 + for (int i = 0; i < length; i++) { + int randomIndex = rand() % strlen(charset); + result += charset[randomIndex]; + } + return result; +} + +ClientServerRegistry::ClientServerRegistry(const std::string& middlewareType, const ServiceMetaInfo& info) { + // 根据middlewareType创建对应的ServiceDiscovery + discovery_ = std::make_shared(); + serviceInfo_ = info; + middlewareType_ = middlewareType; + spdlog::info("ClientServerRegistry init"); +} + +ClientServerRegistry::~ClientServerRegistry() { + spdlog::info("ClientServerRegistry destory"); +} + +// 在用户侧,创建dummpserver + +std::shared_ptr ClientServerRegistry::CreateDummyServer() { + std::string dummpyserver = "dummy_server"; + ServiceMetaInfo info; + info.service = SERVICE_FLAG; + info.instance = DUMMY_INSTANCE_FLAG; + pid_t pid = getpid(); + auto myid = std::this_thread::get_id(); + std::stringstream ss; + ss << myid; + std::string threadid = ss.str(); + info.event = generateRandomString(10) + std::to_string((long)pid) + threadid; + info.serverType = "dummy"; + + spdlog::info("ClientServerRegistry try to create dummy server, the service: {}, instance: {}, event: {}", + info.service, info.instance, info.event); + + std::shared_ptr wrapper; + if (middlewareType_ == ICEORYX) { + wrapper = std::make_shared(info); + wrapper->SetServiceType(intercept::middleware::ServiceType::DUMMYSERVER); + } + wrapper->InitDummyServer(); + spdlog::info("ClientServerRegistry finish creating dummy server, server: {}, instance: {}, event: {}", + info.service, info.instance, info.event); + return wrapper; +} + +void ClientServerRegistry::DestroyDummyServer() { + std::string dummpyserver = "dummy_server"; +} + +std::shared_ptr + ClientServerRegistry::CreateClient(const ServiceMetaInfo& info) { + // 1. 获取客户端创建client的请求 + // 2. 创建对应的client + // 3. 返回对应的client + if (middlewareType_ == ICEORYX) { + spdlog::info("ClientServerRegistry begin creating client, service: {}, instance: {}, event: {}", + info.service, info.instance, info.event); + std::shared_ptr wrapper = std::make_shared(info); + wrapper->SetServiceType(intercept::middleware::ServiceType::CLIENT); + wrapper->InitClient(); + return wrapper; + } + return nullptr; +} + +std::shared_ptr + ClientServerRegistry::CreateServer(const ServiceMetaInfo& info) { + // 1. 获取客户端创建server的请求 + // 2. 创建对应的server + // 3. 返回对应的server + if (middlewareType_ == ICEORYX) { + std::shared_ptr wrapper = std::make_shared(info); + wrapper->SetServiceType(intercept::middleware::ServiceType::SERVER); + // wrapper->InitServer(); + return wrapper; + } + return nullptr; +} + + +// 作用于服务端 +void ClientServerRegistry::CreateServers() { + // 1. 获取客户端创建server的请求 + std::vector results = discovery_->FindServices(serviceInfo_); + std::vector neededServers; + + // 通过dummy请求获取创建server的需求 + for (auto& result : results) { + if (result.instance == DUMMY_INSTANCE_FLAG && + serverMap_.find(result.event) == serverMap_.end()){ + // 根据dummy 创建一个serveiceinfo + ServiceMetaInfo info; + info.service = result.service; + info.instance = INTERCEPT_INSTANCE_FLAG; + info.event = result.event; + neededServers.push_back(info); + + spdlog::info("ClientServerRegistry create server, service: {}, instance: {}, event: {}", + info.service, info.instance, info.event); + } + } + + // 2. 创建对应的server + for (const auto& result : neededServers) { + // 启动一个线程,创建ReqResMiddlewareWrapper 并调用它的StartServer函数 + // 2.1 是否已经创建对应server + // 2.2 如果没有创建, 创建server,并添加到serverMap_中 + // 2.3 如果已经创建,跳过 + if (middlewareType_ == ICEORYX) { + std::thread t([this, result]() { + // 创建server + auto wrapper = std::make_shared(result); + wrapper->SetServiceType(intercept::middleware::ServiceType::SERVER); + this->serverMap_[result.event] = wrapper; + // 启动server + wrapper->InitServer(); + wrapper->StartServer(); + // 添加到serverMap_中 + }); + threads_.push_back(std::move(t)); + } + sleep(0.1); + } + +} + +void ClientServerRegistry::DestroyServers() { + // 1. 获取客户端销毁server的请求 + // 2. 销毁对应的server +} + +void ClientServerRegistry::MonitorServers() { + spdlog::info("ClientServerRegistry monitor servers"); + while (1) { + // create: + CreateServers(); + // destroy: + DestroyServers(); + // TODO: 这个等待很重要 + sleep(1); + } + for (auto& t : threads_) { + t.join(); + } +} + +} // namespace internal +} // namespace intercecpt + + diff --git a/intercept/registry/client_server_registry.h b/intercept/registry/client_server_registry.h new file mode 100644 index 0000000..0cb7cc1 --- /dev/null +++ b/intercept/registry/client_server_registry.h @@ -0,0 +1,78 @@ +#pragma once + +#include "middleware/req_res_middleware_wrapper.h" +#include "discovery/iceoryx_discovery.h" +#include "discovery/discovery.h" + +#define CREATE_FLAG "create" +#define DESTROY_FLAG "destroy" +#define SERVER_FLAG "server" + +namespace intercept { +namespace registry { + +using intercept::middleware::ReqResMiddlewareWrapper; +using intercept::discovery::Discovery; +using intercept::internal::OpenOpReqRes; +using intercept::internal::ServiceMetaInfo; + + +class ClientServerRegistry { + +public: + // ... + ClientServerRegistry(const std::string& middlewareType, const ServiceMetaInfo& info); + ~ClientServerRegistry(); + // 创建临时的server,主要用于通过server创建数据交换的server + std::shared_ptr CreateDummyServer(); + void DestroyDummyServer(); + + // 返回一个已经初始化的middleWrapper_; + std::shared_ptr CreateClient(const ServiceMetaInfo& info); + std::shared_ptr CreateServer(const ServiceMetaInfo& info); + + // 在daemon端更新server + void MonitorServers(); + +private: + // 根据client传递的信息 + void CreateServers(); // 创建服务 + void DestroyServers(); // 销毁服务 + +private: + // ... + std::string middlewareType_; + ServiceMetaInfo serviceInfo_; // 这里一个service由:service instance构成 + std::shared_ptr discovery_; + + std::vector> clientWrapper_; + std::vector> serverWrapper_; + + std::set dummyevent_; + std::unordered_map> serverMap_; + + // 存放创建的线程 + std::vector threads_; + +}; + +/// +// int client() { +// ServiceMetaInfo info = {"Service", "Instance", "Event"}; +// ClientServerRegistry registry("ICE", info); +// registry.CreateDummyServer(); +// auto client = registry.CreateClient(ServiceMetaInfo{"Service", "Instance", "Event"}); +// OpenOpReqRes reqres("test", 1, 1); +// client->OnRequest(reqres); +// // 全局使用这一个client去操作请求 + +// registry.DestroyDummyServer(); +// return 0; +// } + +} +} + + + + diff --git a/intercept/server.cpp b/intercept/server.cpp new file mode 100644 index 0000000..9b46596 --- /dev/null +++ b/intercept/server.cpp @@ -0,0 +1,32 @@ + +#include +#include +#include +#include +#include + +#include "registry/client_server_registry.h" + +using namespace intercept::internal; +using namespace intercept::registry; +std::mutex mtx; +std::condition_variable cv; +std::atomic discovery_thread_running{false}; + +int main() { + constexpr char APP_NAME[] = "iox-intercept-server"; + if (intercept::common::Configure::getInstance().loadConfig(intercept::common::CONFIG_FILE)) { + std::cout << "Config file loaded" << std::endl; + } else { + std::cout << "Config file not loaded: server.conf" << std::endl; + return 0; + } + intercept::common::InitLog(); + iox::runtime::PoshRuntime::initRuntime(APP_NAME); + ServiceMetaInfo info = {SERVICE_FLAG, "", ""}; + std::string type = ICEORYX; + ClientServerRegistry registry(type, info); + spdlog::info("begin to monitor servers"); + registry.MonitorServers(); + return 0; +} \ No newline at end of file diff --git a/local_cache/CMakeLists.txt b/local_cache/CMakeLists.txt new file mode 100644 index 0000000..f3e3e61 --- /dev/null +++ b/local_cache/CMakeLists.txt @@ -0,0 +1,5 @@ +SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) + +file (GLOB_RECURSE LOCAL_CACHE_SOURCES CONFIGURE_DEPENDS "*.cpp") +add_library(hybridcache_local STATIC ${LOCAL_CACHE_SOURCES}) +target_link_libraries(hybridcache_local PUBLIC ${THIRD_PARTY_LIBRARIES} -laio) diff --git a/local_cache/accessor.h b/local_cache/accessor.h new file mode 100644 index 0000000..2733e37 --- /dev/null +++ b/local_cache/accessor.h @@ -0,0 +1,52 @@ +/* + * Project: HybridCache + * Created Date: 24-3-25 + * Author: lshb + */ +#ifndef HYBRIDCACHE_ACCESSOR_H_ +#define HYBRIDCACHE_ACCESSOR_H_ + +#include "read_cache.h" +#include "write_cache.h" + +namespace HybridCache { + +class HybridCacheAccessor { + public: + HybridCacheAccessor(const HybridCacheConfig& cfg) : cfg_(cfg) {} + ~HybridCacheAccessor() {} + + // Put in write cache. + // If the write cache is full, block waiting for asynchronous flush to release the write cache space + virtual int Put(const std::string &key, size_t start, size_t len, const char* buf) = 0; + + // 1.Read from write cache. 2.Read from read cache. + virtual int Get(const std::string &key, size_t start, size_t len, char* buf) = 0; + + // Get4ReadHandle(); + + // File flush. Need to handle flush/write concurrency. + virtual int Flush(const std::string &key) = 0; + + // Flush to the final data source, such as global cache to s3. + virtual int DeepFlush(const std::string &key) = 0; + + virtual int Delete(const std::string &key) = 0; + + // Invalidated the local read cache. + // Delete read cache when open the file. That is a configuration item. + virtual int Invalidate(const std::string &key) = 0; + + // Background asynchronous flush all files and releases write cache space. + virtual int FsSync() = 0; + + protected: + HybridCacheConfig cfg_; + std::shared_ptr writeCache_; + std::shared_ptr readCache_; + std::shared_ptr dataAdaptor_; +}; + +} // namespace HybridCache + +#endif // HYBRIDCACHE_ACCESSOR_H_ diff --git a/local_cache/common.cpp b/local_cache/common.cpp new file mode 100644 index 0000000..b77a980 --- /dev/null +++ b/local_cache/common.cpp @@ -0,0 +1,18 @@ +#include "common.h" + +namespace HybridCache { + +bool EnableLogging = true; + +void split(const std::string& str, const char delim, + std::vector& items) { + std::istringstream iss(str); + std::string tmp; + while (std::getline(iss, tmp, delim)) { + if (!tmp.empty()) { + items.emplace_back(std::move(tmp)); + } + } +} + +} // namespace HybridCache diff --git a/local_cache/common.h b/local_cache/common.h new file mode 100644 index 0000000..bb33bfb --- /dev/null +++ b/local_cache/common.h @@ -0,0 +1,40 @@ +/* + * Project: HybridCache + * Created Date: 24-2-21 + * Author: lshb + */ +#ifndef HYBRIDCACHE_COMMON_H_ +#define HYBRIDCACHE_COMMON_H_ + +#include +#include +#include +#include + +#include "folly/executors/CPUThreadPoolExecutor.h" + +namespace HybridCache { + +typedef folly::CPUThreadPoolExecutor ThreadPool; + +static const char PAGE_SEPARATOR = 26; + +static const uint32_t BYTE_LEN = 8; + +// ConcurrentSkipList height +static const int SKIP_LIST_HEIGHT = 2; + +extern bool EnableLogging; + +struct ByteBuffer { + char* data; + size_t len; + ByteBuffer(char* buf = nullptr, size_t bufLen = 0) : data(buf), len(bufLen) {} +}; + +void split(const std::string& str, const char delim, + std::vector& items); + +} // namespace HybridCache + +#endif // HYBRIDCACHE_COMMON_H_ diff --git a/local_cache/config.cpp b/local_cache/config.cpp new file mode 100644 index 0000000..db5ce4e --- /dev/null +++ b/local_cache/config.cpp @@ -0,0 +1,187 @@ +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "common.h" +#include "config.h" + +namespace HybridCache { + +std::vector SplitString(const std::string &input) { + std::vector result; + std::stringstream ss(input); + std::string item; + while (std::getline(ss, item, ',')) { + result.push_back(item); + } + return result; +} + +bool GetHybridCacheConfig(const std::string& file, HybridCacheConfig& cfg) { + Configuration conf; + if (!conf.LoadConfig(file)) return false; + + // ReadCache + conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheName", + cfg.ReadCacheCfg.CacheCfg.CacheName); + conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.MaxCacheSize", + cfg.ReadCacheCfg.CacheCfg.MaxCacheSize); + conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.PageBodySize", + cfg.ReadCacheCfg.CacheCfg.PageBodySize); + conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.PageMetaSize", + cfg.ReadCacheCfg.CacheCfg.PageMetaSize); + conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.EnableCAS", + cfg.ReadCacheCfg.CacheCfg.EnableCAS); + conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheLibConfig.EnableNvmCache", + cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.EnableNvmCache); + if (cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.EnableNvmCache) { + conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheLibConfig.RaidPath", + cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.RaidPath); + conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheLibConfig.RaidFileNum", + cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.RaidFileNum); + conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheLibConfig.RaidFileSize", + cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.RaidFileSize); + conf.GetValueFatalIfFail("ReadCacheConfig.CacheConfig.CacheLibConfig.DataChecksum", + cfg.ReadCacheCfg.CacheCfg.CacheLibCfg.DataChecksum); + } + conf.GetValueFatalIfFail("ReadCacheConfig.DownloadNormalFlowLimit", + cfg.ReadCacheCfg.DownloadNormalFlowLimit); + conf.GetValueFatalIfFail("ReadCacheConfig.DownloadBurstFlowLimit", + cfg.ReadCacheCfg.DownloadBurstFlowLimit); + + // WriteCache + conf.GetValueFatalIfFail("WriteCacheConfig.CacheConfig.CacheName", + cfg.WriteCacheCfg.CacheCfg.CacheName); + conf.GetValueFatalIfFail("WriteCacheConfig.CacheConfig.MaxCacheSize", + cfg.WriteCacheCfg.CacheCfg.MaxCacheSize); + conf.GetValueFatalIfFail("WriteCacheConfig.CacheConfig.PageBodySize", + cfg.WriteCacheCfg.CacheCfg.PageBodySize); + conf.GetValueFatalIfFail("WriteCacheConfig.CacheConfig.PageMetaSize", + cfg.WriteCacheCfg.CacheCfg.PageMetaSize); + conf.GetValueFatalIfFail("WriteCacheConfig.CacheConfig.EnableCAS", + cfg.WriteCacheCfg.CacheCfg.EnableCAS); + conf.GetValueFatalIfFail("WriteCacheConfig.CacheSafeRatio", + cfg.WriteCacheCfg.CacheSafeRatio); + + // GlobalCache + conf.GetValueFatalIfFail("UseGlobalCache", cfg.UseGlobalCache); + if (cfg.UseGlobalCache) { + conf.GetValueFatalIfFail("GlobalCacheConfig.EnableWriteCache", + cfg.GlobalCacheCfg.EnableWriteCache); + conf.GetValueFatalIfFail("GlobalCacheConfig.EtcdAddress", + cfg.GlobalCacheCfg.EtcdAddress); + std::string servers; + conf.GetValueFatalIfFail("GlobalCacheConfig.GlobalServers", + servers); + cfg.GlobalCacheCfg.GlobalServers = std::move(SplitString(servers)); + conf.GetValueFatalIfFail("GlobalCacheConfig.GflagFile", + cfg.GlobalCacheCfg.GflagFile); + } + + conf.GetValueFatalIfFail("ThreadNum", cfg.ThreadNum); + conf.GetValueFatalIfFail("BackFlushCacheRatio", cfg.BackFlushCacheRatio); + conf.GetValueFatalIfFail("UploadNormalFlowLimit", cfg.UploadNormalFlowLimit); + conf.GetValueFatalIfFail("UploadBurstFlowLimit", cfg.UploadBurstFlowLimit); + conf.GetValueFatalIfFail("LogPath", cfg.LogPath); + conf.GetValueFatalIfFail("LogLevel", cfg.LogLevel); + conf.GetValueFatalIfFail("EnableLog", cfg.EnableLog); + conf.GetValueFatalIfFail("FlushToRead", cfg.FlushToRead); + conf.GetValueFatalIfFail("CleanCacheByOpen", cfg.CleanCacheByOpen); + + conf.PrintConfig(); + return CheckConfig(cfg); +} + +bool CheckConfig(const HybridCacheConfig& cfg) { + if (cfg.WriteCacheCfg.CacheCfg.CacheLibCfg.EnableNvmCache) { + LOG(FATAL) << "Config error. Write Cache not support nvm cache!"; + return false; + } + + if (cfg.ReadCacheCfg.CacheCfg.PageBodySize % BYTE_LEN || + cfg.WriteCacheCfg.CacheCfg.PageBodySize % BYTE_LEN) { + LOG(FATAL) << "Config error. Page body size must be a multiple of " << BYTE_LEN; + return false; + } + + return true; +} + +bool ParseFlagFromFile(const std::string& file) { + std::ifstream config_file(file); + if (config_file.is_open()) { + std::string line; + std::vector args; + args.push_back("hybridcache"); + while (std::getline(config_file, line)) { + args.push_back(line); + } + char* dummy_argv[args.size()]; + for (size_t i = 0; i < args.size(); ++i) { + dummy_argv[i] = const_cast(args[i].c_str()); + } + int size = args.size(); + char** tmp = const_cast(dummy_argv); + google::ParseCommandLineFlags(&size, &tmp, true); + config_file.close(); + } else { + LOG(ERROR) << "Unable to open gflag file '" << file << "' failed: " + << strerror(errno); + return false; + } + return true; +} + +bool Configuration::LoadConfig(const std::string& file) { + confFile_ = file; + std::ifstream cFile(confFile_); + + if (cFile.is_open()) { + std::string line; + while (getline(cFile, line)) { + // FIXME: may not remove middle spaces + line.erase(std::remove_if(line.begin(), line.end(), isspace), + line.end()); + if (line[0] == '#' || line.empty()) + continue; + + int delimiterPos = line.find("="); + std::string key = line.substr(0, delimiterPos); + int commentPos = line.find("#"); + std::string value = line.substr(delimiterPos + 1, + commentPos - delimiterPos - 1); + config_[key] = value; + } + } else { + LOG(ERROR) << "Open config file '" << confFile_ << "' failed: " + << strerror(errno); + return false; + } + + return true; +} + +void Configuration::PrintConfig() { + LOG(INFO) << std::string(30, '=') << "BEGIN" << std::string(30, '='); + for (auto &item : config_) { + LOG(INFO) << item.first << std::string(60 - item.first.size(), ' ') + << ": " << item.second; + } + LOG(INFO) << std::string(31, '=') << "END" << std::string(31, '='); +} + +template +void Configuration::GetValueFatalIfFail(const std::string& key, T& value) { + if (config_.find(key) != config_.end()) { + std::stringstream sstream(config_[key]); + sstream >> value; + return; + } + LOG(FATAL) << "Get " << key << " from " << confFile_ << " fail"; +} + +} // namespace HybridCache diff --git a/local_cache/config.h b/local_cache/config.h new file mode 100644 index 0000000..7026527 --- /dev/null +++ b/local_cache/config.h @@ -0,0 +1,93 @@ +/* + * Project: HybridCache + * Created Date: 24-2-21 + * Author: lshb + */ +#ifndef HYBRIDCACHE_CONFIG_H_ +#define HYBRIDCACHE_CONFIG_H_ + +#include +#include + +namespace HybridCache { + +struct CacheLibConfig { + bool EnableNvmCache = false; + std::string RaidPath; + uint64_t RaidFileNum; + size_t RaidFileSize; + bool DataChecksum = false; +}; + +struct CacheConfig { + std::string CacheName; + size_t MaxCacheSize; + uint32_t PageBodySize; + uint32_t PageMetaSize; + bool EnableCAS; + CacheLibConfig CacheLibCfg; +}; + +struct ReadCacheConfig { + CacheConfig CacheCfg; + uint64_t DownloadNormalFlowLimit; + uint64_t DownloadBurstFlowLimit; +}; + +struct WriteCacheConfig { + CacheConfig CacheCfg; + uint32_t CacheSafeRatio; // cache safety concern threshold (percent) +}; + +struct GlobalCacheConfig { + bool EnableWriteCache; + std::string EtcdAddress; + std::vector GlobalServers; + std::string GflagFile; +}; + +struct HybridCacheConfig { + ReadCacheConfig ReadCacheCfg; + WriteCacheConfig WriteCacheCfg; + GlobalCacheConfig GlobalCacheCfg; + uint32_t ThreadNum; + uint32_t BackFlushCacheRatio; + uint64_t UploadNormalFlowLimit; + uint64_t UploadBurstFlowLimit; + std::string LogPath; + uint32_t LogLevel; + bool EnableLog = true; + bool UseGlobalCache = false; + bool FlushToRead = false; // write to read cache after flush + bool CleanCacheByOpen = false; // clean read cache when open file +}; + +bool GetHybridCacheConfig(const std::string& file, HybridCacheConfig& cfg); +bool CheckConfig(const HybridCacheConfig& cfg); +bool ParseFlagFromFile(const std::string& file); + +class Configuration { + public: + bool LoadConfig(const std::string& file); + void PrintConfig(); + + /* + * @brief GetValueFatalIfFail Get the value of the specified config item + * log it if get error + * + * @param[in] key config name + * @param[out] value config value + * + * @return + */ + template + void GetValueFatalIfFail(const std::string& key, T& value); + + private: + std::string confFile_; + std::map config_; +}; + +} // namespace HybridCache + +#endif // HYBRIDCACHE_CONFIG_H_ diff --git a/local_cache/data_adaptor.h b/local_cache/data_adaptor.h new file mode 100644 index 0000000..0694c15 --- /dev/null +++ b/local_cache/data_adaptor.h @@ -0,0 +1,89 @@ +/* + * Project: HybridCache + * Created Date: 24-2-26 + * Author: lshb + */ +#ifndef HYBRIDCACHE_DATA_ADAPTOR_H_ +#define HYBRIDCACHE_DATA_ADAPTOR_H_ + +#include + +#include "folly/futures/Future.h" +#include "glog/logging.h" + +#include "common.h" +#include "errorcode.h" + +namespace HybridCache { + +class DataAdaptor { + public: + virtual folly::Future DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer) = 0; + + virtual folly::Future UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map& headers) = 0; + + virtual folly::Future Delete(const std::string &key) = 0; + + // for global cache + virtual folly::Future DeepFlush(const std::string &key) { + return folly::makeFuture(0); + } + + virtual folly::Future Head(const std::string &key, + size_t& size, + std::map& headers) = 0; + + void SetExecutor(std::shared_ptr executor) { + executor_ = executor; + } + + protected: + std::shared_ptr executor_; +}; + +class DataAdaptor4Test : public DataAdaptor { + public: + folly::Future DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer) { + assert(executor_); + return folly::via(executor_.get(), [key, start, size, buffer]() -> int { + LOG(INFO) << "[DataAdaptor]DownLoad start, key:" << key + << ", start:" << start << ", size:" << size; + std::this_thread::sleep_for(std::chrono::seconds(3)); + LOG(INFO) << "[DataAdaptor]DownLoad error, key:" << key + << ", start:" << start << ", size:" << size; + return REMOTE_FILE_NOT_FOUND; + }); + } + + folly::Future UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map& headers) { + return folly::makeFuture(REMOTE_FILE_NOT_FOUND); + } + + folly::Future Delete(const std::string &key) { + return folly::makeFuture(REMOTE_FILE_NOT_FOUND); + } + + folly::Future Head(const std::string &key, + size_t& size, + std::map& headers) { + return folly::makeFuture(REMOTE_FILE_NOT_FOUND); + } +}; + +} // namespace HybridCache + +#endif // HYBRIDCACHE_DATA_ADAPTOR_H_ diff --git a/local_cache/errorcode.h b/local_cache/errorcode.h new file mode 100644 index 0000000..addd210 --- /dev/null +++ b/local_cache/errorcode.h @@ -0,0 +1,21 @@ +/* + * Project: HybridCache + * Created Date: 24-3-18 + * Author: lshb + */ +#ifndef HYBRIDCACHE_ERRORCODE_H_ +#define HYBRIDCACHE_ERRORCODE_H_ + +namespace HybridCache { + +enum ErrCode { + SUCCESS = 0, + PAGE_NOT_FOUND = -1, + PAGE_DEL_FAIL = -2, + ADAPTOR_NOT_FOUND = -3, + REMOTE_FILE_NOT_FOUND = -4, +}; + +} // namespace HybridCache + +#endif // HYBRIDCACHE_ERRORCODE_H_ diff --git a/local_cache/page_cache.cpp b/local_cache/page_cache.cpp new file mode 100644 index 0000000..b6a023c --- /dev/null +++ b/local_cache/page_cache.cpp @@ -0,0 +1,440 @@ +#include "glog/logging.h" + +#include "common.h" +#include "errorcode.h" +#include "page_cache.h" + +namespace HybridCache { + +bool PageCache::Lock(char* pageMemory) { + if (!cfg_.EnableCAS) return true; + uint8_t* lock = reinterpret_cast(pageMemory + int(MetaPos::LOCK)); + uint8_t lockExpected = 0; + return __atomic_compare_exchange_n(lock, &lockExpected, 1, true, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); +} + +void PageCache::UnLock(char* pageMemory) { + if (!cfg_.EnableCAS) return; + uint8_t* lock = reinterpret_cast(pageMemory + int(MetaPos::LOCK)); + __atomic_store_n(lock, 0, __ATOMIC_SEQ_CST); +} + +uint8_t PageCache::AddNewVer(char* pageMemory) { + if (!cfg_.EnableCAS) return 0; + uint8_t* newVer = reinterpret_cast(pageMemory + int(MetaPos::NEWVER)); + return __atomic_add_fetch(newVer, 1, __ATOMIC_SEQ_CST); +} + +void PageCache::SetLastVer(char* pageMemory, uint8_t newVer) { + if (!cfg_.EnableCAS) return; + uint8_t* lastVer = reinterpret_cast(pageMemory + int(MetaPos::LASTVER)); + __atomic_store_n(lastVer, newVer, __ATOMIC_SEQ_CST); +} + +uint8_t PageCache::GetLastVer(const char* pageMemory) { + if (!cfg_.EnableCAS) return 0; + const uint8_t* lastVer = reinterpret_cast(pageMemory + int(MetaPos::LASTVER)); + return __atomic_load_n(lastVer, __ATOMIC_SEQ_CST); +} + +uint8_t PageCache::GetNewVer(const char* pageMemory) { + if (!cfg_.EnableCAS) return 0; + const uint8_t* newVer = reinterpret_cast(pageMemory + int(MetaPos::NEWVER)); + return __atomic_load_n(newVer, __ATOMIC_SEQ_CST); +} + +void PageCache::SetFastBitmap(char* pageMemory, bool valid) { + uint8_t* fastBitmap = reinterpret_cast(pageMemory + int(MetaPos::FAST_BITMAP)); + if (valid) *fastBitmap = 1; + else *fastBitmap = 0; +} + +bool PageCache::GetFastBitmap(const char* pageMemory) { + const uint8_t* fastBitmap = reinterpret_cast(pageMemory + int(MetaPos::FAST_BITMAP)); + return *fastBitmap == 1; +} + +void PageCache::SetBitMap(char* pageMemory, int pos, int len, bool valid) { + if (len == cfg_.PageBodySize && valid) + SetFastBitmap(pageMemory, valid); + if (!valid) + SetFastBitmap(pageMemory, valid); + + char* x = pageMemory + cfg_.PageMetaSize; + uint32_t startByte = pos / BYTE_LEN; + // head byte + if (pos % BYTE_LEN > 0) { + int headByteSetLen = BYTE_LEN - pos % BYTE_LEN; + headByteSetLen = headByteSetLen > len ? len : headByteSetLen; + len -= headByteSetLen; + while (headByteSetLen) { + if (valid) + SetBit(x+startByte, pos%BYTE_LEN+(--headByteSetLen)); + else + ClearBit(x+startByte, pos%BYTE_LEN+(--headByteSetLen)); + } + ++startByte; + } + // mid bytes + int midLen = len / BYTE_LEN; + if (midLen > 0) { + if (valid) + memset(x+startByte, UINT8_MAX, midLen); + else + memset(x+startByte, 0, midLen); + len -= BYTE_LEN * midLen; + startByte += midLen; + } + // tail byte + while (len > 0) { + if (valid) + SetBit(x+startByte, --len); + else + ClearBit(x+startByte, --len); + } +} + +int PageCacheImpl::Init() { + const uint64_t REDUNDANT_SIZE = 1024 * 1024 * 1024; + const unsigned bucketsPower = 25; + const unsigned locksPower = 15; + + Cache::Config config; + config + .setCacheSize(cfg_.MaxCacheSize + REDUNDANT_SIZE) + .setCacheName(cfg_.CacheName) + .setAccessConfig({bucketsPower, locksPower}) + .validate(); + if (cfg_.CacheLibCfg.EnableNvmCache) { + Cache::NvmCacheConfig nvmConfig; + std::vector raidPaths; + for (int i=0; i(config); + pool_ = cache_->addPool(cfg_.CacheName + "_pool", cfg_.MaxCacheSize); + + LOG(WARNING) << "[PageCache]Init, name:" << config.getCacheName() + << ", size:" << config.getCacheSize() + << ", dir:" << config.getCacheDir(); + return SUCCESS; +} + +int PageCacheImpl::Close() { + if (cache_) + cache_.reset(); + LOG(WARNING) << "[PageCache]Close, name:" << cfg_.CacheName; + return SUCCESS; +} + +int PageCacheImpl::Write(const std::string &key, + uint32_t pagePos, + uint32_t length, + const char *buf) { + assert(cfg_.PageBodySize >= pagePos + length); + assert(cache_); + + Cache::WriteHandle writeHandle = nullptr; + char* pageValue = nullptr; + while (true) { + writeHandle = std::move(FindOrCreateWriteHandle(key)); + pageValue = reinterpret_cast(writeHandle->getMemory()); + if (Lock(pageValue)) break; + } + + uint64_t realOffset = cfg_.PageMetaSize + bitmapSize_ + pagePos; + uint8_t newVer = AddNewVer(pageValue); + std::memcpy(pageValue + realOffset, buf, length); + SetBitMap(pageValue, pagePos, length, true); + SetLastVer(pageValue, newVer); + UnLock(pageValue); + return SUCCESS; +} + +int PageCacheImpl::Read(const std::string &key, + uint32_t pagePos, + uint32_t length, + char *buf, + std::vector>& dataBoundary) { + assert(cfg_.PageBodySize >= pagePos + length); + assert(cache_); + + int res = SUCCESS; + while (true) { + auto readHandle = cache_->find(key); + if (!readHandle) { + res = PAGE_NOT_FOUND; + break; + } + while (!readHandle.isReady()); + + const char* pageValue = reinterpret_cast( + readHandle->getMemory()); + uint8_t lastVer = GetLastVer(pageValue); + uint8_t newVer = GetNewVer(pageValue); + if (lastVer != newVer) continue; + + dataBoundary.clear(); + uint32_t cur = pagePos; + if (GetFastBitmap(pageValue)) { + uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_ + pagePos; + std::memcpy(buf, pageValue + pageOff, length); + dataBoundary.push_back(std::make_pair(0, length)); + cur += length; + } + + bool continuousDataValid = false; // continuous Data valid or invalid + uint32_t continuousLen = 0; + while (cur < pagePos+length) { + const char *byte = pageValue + cfg_.PageMetaSize + cur / BYTE_LEN; + + // fast to judge full byte of bitmap + uint16_t batLen = 0; + bool batByteValid = false, isBatFuncValid = false; + + batLen = 64; + if (cur % batLen == 0 && (pagePos+length-cur) >= batLen) { + uint64_t byteValue = *reinterpret_cast(byte); + if (byteValue == UINT64_MAX) { + batByteValid = true; + isBatFuncValid = true; + } else if (byteValue == 0) { + isBatFuncValid = true; + } + } + + if (isBatFuncValid && (continuousLen == 0 || + continuousDataValid == batByteValid)) { + continuousDataValid = batByteValid; + continuousLen += batLen; + cur += batLen; + continue; + } + + bool curByteValid = GetBit(byte, cur % BYTE_LEN); + if (continuousLen == 0 || continuousDataValid == curByteValid) { + continuousDataValid = curByteValid; + ++continuousLen; + ++cur; + continue; + } + + if (continuousDataValid) { + uint32_t bufOff = cur - continuousLen - pagePos; + uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_ + + cur - continuousLen; + std::memcpy(buf + bufOff, pageValue + pageOff, continuousLen); + dataBoundary.push_back(std::make_pair(bufOff, continuousLen)); + } + + continuousDataValid = curByteValid; + continuousLen = 1; + ++cur; + } + if (continuousDataValid) { + uint32_t bufOff = cur - continuousLen - pagePos; + uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_ + + cur - continuousLen; + std::memcpy(buf + bufOff, pageValue + pageOff, continuousLen); + dataBoundary.push_back(std::make_pair(bufOff, continuousLen)); + } + + newVer = GetNewVer(pageValue); + if (lastVer == newVer) break; + } + return res; +} + +int PageCacheImpl::GetAllCache(const std::string &key, + std::vector>& dataSegments) { + assert(cache_); + uint32_t pageSize = cfg_.PageBodySize; + + int res = SUCCESS; + while (true) { + auto readHandle = cache_->find(key); + if (!readHandle) { + res = PAGE_NOT_FOUND; + break; + } + while (!readHandle.isReady()); + + const char* pageValue = reinterpret_cast( + readHandle->getMemory()); + uint8_t lastVer = GetLastVer(pageValue); + uint8_t newVer = GetNewVer(pageValue); + if (lastVer != newVer) continue; + + dataSegments.clear(); + uint32_t cur = 0; + if (GetFastBitmap(pageValue)) { + uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_; + dataSegments.push_back(std::make_pair( + ByteBuffer(const_cast(pageValue + pageOff), pageSize), 0)); + cur += pageSize; + } + + bool continuousDataValid = false; // continuous Data valid or invalid + uint32_t continuousLen = 0; + while (cur < pageSize) { + const char *byte = pageValue + cfg_.PageMetaSize + cur / BYTE_LEN; + + // fast to judge full byte of bitmap + uint16_t batLen = 0; + bool batByteValid = false, isBatFuncValid = false; + + batLen = 64; + if (cur % batLen == 0 && (pageSize-cur) >= batLen) { + uint64_t byteValue = *reinterpret_cast(byte); + if (byteValue == UINT64_MAX) { + batByteValid = true; + isBatFuncValid = true; + } else if (byteValue == 0) { + isBatFuncValid = true; + } + } + + if (isBatFuncValid && (continuousLen == 0 || + continuousDataValid == batByteValid)) { + continuousDataValid = batByteValid; + continuousLen += batLen; + cur += batLen; + continue; + } + + bool curByteValid = GetBit(byte, cur % BYTE_LEN); + if (continuousLen == 0 || continuousDataValid == curByteValid) { + continuousDataValid = curByteValid; + ++continuousLen; + ++cur; + continue; + } + + if (continuousDataValid) { + uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_ + + cur - continuousLen; + dataSegments.push_back(std::make_pair( + ByteBuffer(const_cast(pageValue + pageOff), continuousLen), + cur - continuousLen)); + } + + continuousDataValid = curByteValid; + continuousLen = 1; + ++cur; + } + if (continuousDataValid) { + uint32_t pageOff = cfg_.PageMetaSize + bitmapSize_ + + cur - continuousLen; + dataSegments.push_back(std::make_pair( + ByteBuffer(const_cast(pageValue + pageOff), continuousLen), + cur - continuousLen)); + } + + newVer = GetNewVer(pageValue); + if (lastVer == newVer) break; + } + return res; +} + +int PageCacheImpl::DeletePart(const std::string &key, + uint32_t pagePos, + uint32_t length) { + assert(cfg_.PageBodySize >= pagePos + length); + assert(cache_); + + int res = SUCCESS; + Cache::WriteHandle writeHandle = nullptr; + char* pageValue = nullptr; + while (true) { + writeHandle = cache_->findToWrite(key); + if (!writeHandle) { + res = PAGE_NOT_FOUND; + break; + } + pageValue = reinterpret_cast(writeHandle->getMemory()); + if (Lock(pageValue)) break; + } + + if (SUCCESS == res) { + uint8_t newVer = AddNewVer(pageValue); + SetBitMap(pageValue, pagePos, length, false); + + bool isEmpty = true; + uint32_t pos = 0; + while (pos < bitmapSize_) { + if (*(pageValue + cfg_.PageMetaSize + pos) != 0) { + isEmpty = false; + break; + } + ++pos; + } + + bool isDel = false; + if (isEmpty) { + if (cache_->remove(writeHandle) == Cache::RemoveRes::kSuccess) { + pageNum_.fetch_sub(1); + pagesList_.erase(key); + isDel = true; + } else { + res = PAGE_DEL_FAIL; + } + } + + if (!isDel) { + SetLastVer(pageValue, newVer); + UnLock(pageValue); + } + } + return res; +} + +int PageCacheImpl::Delete(const std::string &key) { + assert(cache_); + int res = cache_->remove(key) == Cache::RemoveRes::kSuccess ? SUCCESS : PAGE_NOT_FOUND; + if (SUCCESS == res) { + pageNum_.fetch_sub(1); + pagesList_.erase(key); + } + return res; +} + +Cache::WriteHandle PageCacheImpl::FindOrCreateWriteHandle(const std::string &key) { + auto writeHandle = cache_->findToWrite(key); + if (!writeHandle) { + writeHandle = cache_->allocate(pool_, key, GetRealPageSize()); + assert(writeHandle); + assert(writeHandle->getMemory()); + // need init + memset(writeHandle->getMemory(), 0, cfg_.PageMetaSize + bitmapSize_); + + if (cfg_.CacheLibCfg.EnableNvmCache) { + // insertOrReplace will insert or replace existing item for the key, + // and return the handle of the replaced old item + // Note: write cache nonsupport NVM, because it will be replaced + if (!cache_->insertOrReplace(writeHandle)) { + pageNum_.fetch_add(1); + pagesList_.insert(key); + } + } else { + if (cache_->insert(writeHandle)) { + pageNum_.fetch_add(1); + pagesList_.insert(key); + } else { + writeHandle = cache_->findToWrite(key); + } + } + } + return writeHandle; +} + +} // namespace HybridCache diff --git a/local_cache/page_cache.h b/local_cache/page_cache.h new file mode 100644 index 0000000..dbfb72a --- /dev/null +++ b/local_cache/page_cache.h @@ -0,0 +1,161 @@ +/* + * Project: HybridCache + * Created Date: 24-2-21 + * Author: lshb + */ +#ifndef HYBRIDCACHE_PAGE_CACHE_H_ +#define HYBRIDCACHE_PAGE_CACHE_H_ + +#include +#include + +#include "folly/ConcurrentSkipList.h" +#include "cachelib/allocator/CacheAllocator.h" + +#include "common.h" +#include "config.h" + +namespace HybridCache { + +typedef folly::ConcurrentSkipList StringSkipList; +using facebook::cachelib::PoolId; +using Cache = facebook::cachelib::LruAllocator; + +enum class MetaPos { + LOCK = 0, + LASTVER, + NEWVER, + FAST_BITMAP +}; + +class PageCache { + public: + PageCache(const CacheConfig& cfg): cfg_(cfg) {} + virtual ~PageCache() {} + + virtual int Init() = 0; + virtual int Close() = 0; + + virtual int Write(const std::string &key, // page key + uint32_t pagePos, + uint32_t length, + const char *buf // user buf + ) = 0; + + virtual int Read(const std::string &key, + uint32_t pagePos, + uint32_t length, + char *buf, // user buf + std::vector>& dataBoundary // valid data segment boundar + ) = 0; + + // upper layer need to guarantee that the page will not be delete + virtual int GetAllCache(const std::string &key, + std::vector>& dataSegments // + ) = 0; + + // delete part data from page + // if the whole page is empty then delete that page + virtual int DeletePart(const std::string &key, + uint32_t pagePos, + uint32_t length + ) = 0; + + virtual int Delete(const std::string &key) = 0; + + virtual size_t GetCacheSize() = 0; + virtual size_t GetCacheMaxSize() = 0; + + const folly::ConcurrentSkipList::Accessor& GetPageList() { + return this->pagesList_; + } + + protected: + // CAS operate + bool Lock(char* pageMemory); + void UnLock(char* pageMemory); + uint8_t AddNewVer(char* pageMemory); + void SetLastVer(char* pageMemory, uint8_t newVer); + uint8_t GetLastVer(const char* pageMemory); + uint8_t GetNewVer(const char* pageMemory); + + // bitmap operate + void SetFastBitmap(char* pageMemory, bool valid); + bool GetFastBitmap(const char* pageMemory); + void SetBitMap(char* pageMemory, int pos, int len, bool valid); + void SetBit(char *x, int n) { *x |= (1 << n); } + void ClearBit(char *x, int n) { *x &= ~ (1 << n); } + bool GetBit(const char *x, int n) { return *x & (1 << n); } + + protected: + StringSkipList::Accessor pagesList_ = StringSkipList::create(SKIP_LIST_HEIGHT); + CacheConfig cfg_; +}; + +class PageCacheImpl : public PageCache { + public: + PageCacheImpl(const CacheConfig& cfg): PageCache(cfg) { + bitmapSize_ = cfg_.PageBodySize / BYTE_LEN; + } + ~PageCacheImpl() {} + + int Init(); + + int Close(); + + int Write(const std::string &key, + uint32_t pagePos, + uint32_t length, + const char *buf + ); + + int Read(const std::string &key, + uint32_t pagePos, + uint32_t length, + char *buf, + std::vector>& dataBoundary + ); + + int GetAllCache(const std::string &key, + std::vector>& dataSegments + ); + + int DeletePart(const std::string &key, + uint32_t pagePos, + uint32_t length + ); + + int Delete(const std::string &key); + + size_t GetCacheSize() { + return GetPageNum() * GetRealPageSize(); + } + size_t GetCacheMaxSize() { + if (!cfg_.CacheLibCfg.EnableNvmCache) + return cfg_.MaxCacheSize; + size_t nvmMaxSize = cfg_.CacheLibCfg.RaidFileNum * + cfg_.CacheLibCfg.RaidFileSize; + return cfg_.MaxCacheSize + nvmMaxSize; + } + + private: + uint64_t GetPageNum() { + return pageNum_.load(); + } + + uint32_t GetRealPageSize() { + return cfg_.PageMetaSize + bitmapSize_ + cfg_.PageBodySize; + } + + Cache::WriteHandle FindOrCreateWriteHandle(const std::string &key); + + private: + std::shared_ptr cache_; + PoolId pool_; + std::atomic pageNum_{0}; + uint32_t bitmapSize_; +}; + +} // namespace HybridCache + +#endif // HYBRIDCACHE_PAGE_CACHE_H_ diff --git a/local_cache/read_cache.cpp b/local_cache/read_cache.cpp new file mode 100644 index 0000000..31aed1a --- /dev/null +++ b/local_cache/read_cache.cpp @@ -0,0 +1,257 @@ +#include "errorcode.h" +#include "read_cache.h" + +namespace HybridCache { + +ReadCache::ReadCache(const ReadCacheConfig& cfg, + std::shared_ptr dataAdaptor, + std::shared_ptr executor) : + cfg_(cfg), dataAdaptor_(dataAdaptor), executor_(executor) { + Init(); +} + +folly::Future ReadCache::Get(const std::string &key, size_t start, + size_t len, ByteBuffer &buffer) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + uint32_t pageSize = cfg_.CacheCfg.PageBodySize; + size_t index = start / pageSize; + uint32_t pagePos = start % pageSize; + size_t readLen = 0; + size_t realReadLen = 0; + size_t bufOffset = 0; + size_t remainLen = len; + uint64_t readPageCnt = 0; + std::vector> dataBoundary; + + while (remainLen > 0) { + readLen = pagePos + remainLen > pageSize ? pageSize - pagePos : remainLen; + std::string pageKey = std::move(GetPageKey(key, index)); + std::vector> stepDataBoundary; + int tmpRes = pageCache_->Read(pageKey, pagePos, readLen, + (buffer.data + bufOffset), stepDataBoundary); + if (SUCCESS == tmpRes) { + ++readPageCnt; + } else if (PAGE_NOT_FOUND != tmpRes) { + res = tmpRes; + break; + } + + for (auto& it : stepDataBoundary) { + dataBoundary.push_back(std::make_pair(it.first + bufOffset, it.second)); + realReadLen += it.second; + } + remainLen -= readLen; + ++index; + bufOffset += readLen; + pagePos = (pagePos + readLen) % pageSize; + } + + remainLen = len - realReadLen; + if (remainLen > 0 && !dataAdaptor_) { + res = ADAPTOR_NOT_FOUND; + } + + // handle cache misses + readLen = 0; + size_t stepStart = 0; + size_t fileStartOff = 0; + std::vector> fs; + auto it = dataBoundary.begin(); + while (remainLen > 0 && SUCCESS == res) { + ByteBuffer stepBuffer(buffer.data + stepStart); + fileStartOff = start + stepStart; + if (it != dataBoundary.end()) { + readLen = it->first - stepStart; + if (!readLen) { + stepStart = it->first + it->second; + ++it; + continue; + } + stepStart = it->first + it->second; + ++it; + } else { + readLen = remainLen; + } + stepBuffer.len = readLen; + remainLen -= readLen; + + auto download = folly::via(executor_.get(), [this, readLen]() { + // download flow control + while(!this->tokenBucket_->consume(readLen)); + return SUCCESS; + }).thenValue([this, key, fileStartOff, readLen, stepBuffer](int i) { + // LOG(INFO) << "Extra download: " << key << " " << readLen; + ByteBuffer tmpBuffer(stepBuffer.data, readLen); + return this->dataAdaptor_->DownLoad(key, fileStartOff, readLen, tmpBuffer).get(); + }).thenValue([this, key, fileStartOff, readLen, stepBuffer](int downRes) { + if (EnableLogging && SUCCESS != downRes) { + LOG(ERROR) << "[ReadCache]DownLoad failed, file:" << key + << ", start:" << fileStartOff << ", len:" << readLen + << ", res:" << downRes; + return downRes; + } + return this->Put(key, fileStartOff, readLen, stepBuffer); + }); + + fs.emplace_back(std::move(download)); + } + + if (!fs.empty()) { + return collectAll(fs).via(executor_.get()) + .thenValue([key, start, len, readPageCnt, startTime]( + std::vector, std::allocator>>&& tups) { + int finalRes = SUCCESS; + for (const auto& t : tups) { + if (SUCCESS != t.value()) finalRes = t.value(); + } + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[ReadCache]Get, key:" << key << ", start:" << start + << ", len:" << len << ", res:" << finalRes + << ", readPageCnt:" << readPageCnt + << ", time:" << totalTime << "ms"; + } + return finalRes; + }); + // auto tups = collectAll(fs).get(); + // int finalRes = SUCCESS; + // for (const auto& t : tups) { + // if (SUCCESS != t.value()) finalRes = t.value(); + // } + // if (EnableLogging) { + // double totalTime = std::chrono::duration( + // std::chrono::steady_clock::now() - startTime).count(); + // LOG(INFO) << "[ReadCache]Get, key:" << key << ", start:" << start + // << ", len:" << len << ", res:" << finalRes + // << ", readPageCnt:" << readPageCnt + // << ", time:" << totalTime << "ms"; + // } + // return finalRes; + } + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[ReadCache]Get, key:" << key << ", start:" << start + << ", len:" << len << ", res:" << res + << ", readPageCnt:" << readPageCnt + << ", time:" << totalTime << "ms"; + } + return folly::makeFuture(res); +} + +int ReadCache::Put(const std::string &key, size_t start, size_t len, + const ByteBuffer &buffer) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + uint32_t pageSize = cfg_.CacheCfg.PageBodySize; + uint64_t index = start / pageSize; + uint64_t pagePos = start % pageSize; + uint64_t writeLen = 0; + uint64_t writeOffset = 0; + uint64_t writePageCnt = 0; + size_t remainLen = len; + + while (remainLen > 0) { + writeLen = pagePos + remainLen > pageSize ? pageSize - pagePos : remainLen; + std::string pageKey = std::move(GetPageKey(key, index)); + res = pageCache_->Write(pageKey, pagePos, writeLen, + (buffer.data + writeOffset)); + if (SUCCESS != res) break; + ++writePageCnt; + remainLen -= writeLen; + ++index; + writeOffset += writeLen; + pagePos = (pagePos + writeLen) % pageSize; + } + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[ReadCache]Put, key:" << key << ", start:" << start + << ", len:" << len << ", res:" << res + << ", writePageCnt:" << writePageCnt + << ", time:" << totalTime << "ms"; + } + return res; +} + +int ReadCache::Delete(const std::string &key) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + size_t delPageNum = 0; + std::string firstPage = std::move(GetPageKey(key, 0)); + auto pageKey = pageCache_->GetPageList().lower_bound(firstPage); + while (pageKey != pageCache_->GetPageList().end()) { + std::vector tokens; + split(*pageKey, PAGE_SEPARATOR, tokens); + if (key != tokens[0]) break; + int tmpRes = pageCache_->Delete(*pageKey); + if (SUCCESS == tmpRes) { + ++delPageNum; + } else if (PAGE_NOT_FOUND != tmpRes) { + res = tmpRes; + break; + } + ++pageKey; + } + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[ReadCache]Delete, key:" << key << ", res:" << res + << ", delPageCnt:" << delPageNum + << ", time:" << totalTime << "ms"; + } + return res; +} + +int ReadCache::GetAllKeys(std::set& keys) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + auto pageKey = pageCache_->GetPageList().begin(); + while (pageKey != pageCache_->GetPageList().end()) { + std::vector tokens; + split(*pageKey, PAGE_SEPARATOR, tokens); + keys.insert(tokens[0]); + ++pageKey; + } + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[ReadCache]Get all keys, keyCnt:" << keys.size() + << ", time:" << totalTime << "ms"; + } + return SUCCESS; +} + +void ReadCache::Close() { + pageCache_->Close(); + LOG(WARNING) << "[ReadCache]Close"; +} + +int ReadCache::Init() { + pageCache_ = std::make_shared(cfg_.CacheCfg); + tokenBucket_ = std::make_shared( + cfg_.DownloadNormalFlowLimit, cfg_.DownloadBurstFlowLimit); + int res = pageCache_->Init(); + LOG(WARNING) << "[ReadCache]Init, res:" << res; + return res; +} + +std::string ReadCache::GetPageKey(const std::string &key, size_t pageIndex) { + std::string pageKey(key); + pageKey.append(std::string(1, PAGE_SEPARATOR)).append(std::to_string(pageIndex)); + return pageKey; +} + +} // namespace HybridCache diff --git a/local_cache/read_cache.h b/local_cache/read_cache.h new file mode 100644 index 0000000..f3f5fe5 --- /dev/null +++ b/local_cache/read_cache.h @@ -0,0 +1,57 @@ +/* + * Project: HybridCache + * Created Date: 24-2-29 + * Author: lshb + */ +#ifndef HYBRIDCACHE_READ_CACHE_H_ +#define HYBRIDCACHE_READ_CACHE_H_ + +#include "folly/TokenBucket.h" + +#include "page_cache.h" +#include "data_adaptor.h" + +namespace HybridCache { + +class ReadCache { + public: + ReadCache(const ReadCacheConfig& cfg, + std::shared_ptr dataAdaptor, + std::shared_ptr executor); + ReadCache() = default; + ~ReadCache() { Close(); } + + // Read the local page cache first, and get it from the DataAdaptor if it misses + folly::Future Get(const std::string &key, + size_t start, + size_t len, + ByteBuffer &buffer // user buf + ); + + int Put(const std::string &key, + size_t start, + size_t len, + const ByteBuffer &buffer); + + int Delete(const std::string &key); + + int GetAllKeys(std::set& keys); + + void Close(); + + private: + int Init(); + + std::string GetPageKey(const std::string &key, size_t pageIndex); + + private: + ReadCacheConfig cfg_; + std::shared_ptr pageCache_; + std::shared_ptr dataAdaptor_; + std::shared_ptr executor_; + std::shared_ptr tokenBucket_; // download flow limit +}; + +} // namespace HybridCache + +#endif // HYBRIDCACHE_READ_CACHE_H_ diff --git a/local_cache/write_cache.cpp b/local_cache/write_cache.cpp new file mode 100644 index 0000000..364a0df --- /dev/null +++ b/local_cache/write_cache.cpp @@ -0,0 +1,286 @@ +#include "glog/logging.h" + +#include "errorcode.h" +#include "write_cache.h" + +namespace HybridCache { + +int WriteCache::Put(const std::string &key, size_t start, size_t len, + const ByteBuffer &buffer) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + uint32_t pageSize = cfg_.CacheCfg.PageBodySize; + uint64_t index = start / pageSize; + uint64_t pagePos = start % pageSize; + uint64_t writeLen = 0; + uint64_t writeOffset = 0; + uint64_t writePageCnt = 0; + size_t remainLen = len; + + while (remainLen > 0) { + writeLen = pagePos + remainLen > pageSize ? pageSize - pagePos : remainLen; + std::string pageKey = std::move(GetPageKey(key, index)); + res = pageCache_->Write(pageKey, pagePos, writeLen, + (buffer.data + writeOffset)); + if (SUCCESS != res) break; + ++writePageCnt; + remainLen -= writeLen; + ++index; + writeOffset += writeLen; + pagePos = (pagePos + writeLen) % pageSize; + } + if (0 < writePageCnt) + keys_.insert(key, time(nullptr)); + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[WriteCache]Put, key:" << key << ", start:" << start + << ", len:" << len << ", res:" << res + << ", writePageCnt:" << writePageCnt + << ", time:" << totalTime << "ms"; + } + return res; +} + +int WriteCache::Get(const std::string &key, size_t start, size_t len, + ByteBuffer &buffer, + std::vector>& dataBoundary) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + uint32_t pageSize = cfg_.CacheCfg.PageBodySize; + size_t index = start / pageSize; + uint32_t pagePos = start % pageSize; + size_t readLen = 0; + size_t bufOffset = 0; + size_t remainLen = len; + uint64_t readPageCnt = 0; + + while (remainLen > 0) { + readLen = pagePos + remainLen > pageSize ? pageSize - pagePos : remainLen; + std::string pageKey = std::move(GetPageKey(key, index)); + std::vector> stepDataBoundary; + int tmpRes = pageCache_->Read(pageKey, pagePos, readLen, + (buffer.data + bufOffset), stepDataBoundary); + if (SUCCESS == tmpRes) { + ++readPageCnt; + } else if (PAGE_NOT_FOUND != tmpRes) { + res = tmpRes; + break; + } + + for (auto& it : stepDataBoundary) { + size_t realStart = it.first + bufOffset; + auto last = dataBoundary.rbegin(); + if (last != dataBoundary.rend() && (last->first + last->second) == realStart) { + last->second += it.second; + } else { + dataBoundary.push_back(std::make_pair(realStart, it.second)); + } + } + remainLen -= readLen; + ++index; + bufOffset += readLen; + pagePos = (pagePos + readLen) % pageSize; + } + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[WriteCache]Get, key:" << key << ", start:" << start + << ", len:" << len << ", res:" << res + << ", boundaryVecSize:" << dataBoundary.size() + << ", readPageCnt:" << readPageCnt + << ", time:" << totalTime << "ms"; + } + return res; +} + +int WriteCache::GetAllCacheWithLock(const std::string &key, + std::vector>& dataSegments) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + Lock(key); + + std::string firstPage = std::move(GetPageKey(key, 0)); + auto pageKey = pageCache_->GetPageList().lower_bound(firstPage); + while (pageKey != pageCache_->GetPageList().end()) { + std::vector tokens; + split(*pageKey, PAGE_SEPARATOR, tokens); + if (key != tokens[0]) break; + + size_t pageIdx = 0; + std::stringstream sstream(tokens[1]); + sstream >> pageIdx; + size_t wholeValueOff = pageIdx * cfg_.CacheCfg.PageBodySize; + + std::vector> stepDataSegments; + res = pageCache_->GetAllCache(*pageKey, stepDataSegments); + if (SUCCESS != res) break; + for (auto& it : stepDataSegments) { + dataSegments.push_back(std::make_pair(it.first, + it.second + wholeValueOff)); + } + ++pageKey; + } + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[WriteCache]Get all cache with lock, key:" << key + << ", res:" << res << ", dataVecSize:" << dataSegments.size() + << ", time:" << totalTime << "ms"; + } + return res; +} + +int WriteCache::Delete(const std::string &key, LockType type) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + if (LockType::ALREADY_LOCKED != type) { + Lock(key); + } + + keys_.erase(key); + size_t delPageNum = 0; + std::string firstPage = std::move(GetPageKey(key, 0)); + auto pageKey = pageCache_->GetPageList().lower_bound(firstPage); + while (pageKey != pageCache_->GetPageList().end()) { + std::vector tokens; + split(*pageKey, PAGE_SEPARATOR, tokens); + if (key != tokens[0]) break; + int tmpRes = pageCache_->Delete(*pageKey); + if (SUCCESS == tmpRes) { + ++delPageNum; + } else if (PAGE_NOT_FOUND != tmpRes) { + res = tmpRes; + break; + } + ++pageKey; + } + + UnLock(key); + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[WriteCache]Delete, key:" << key << ", res:" << res + << ", delPageCnt:" << delPageNum + << ", time:" << totalTime << "ms"; + } + return res; +} + +int WriteCache::Truncate(const std::string &key, size_t len) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + uint32_t pageSize = cfg_.CacheCfg.PageBodySize; + uint64_t index = len / pageSize; + uint64_t pagePos = len % pageSize; + + if (0 != pagePos) { + uint32_t TruncateLen = pageSize - pagePos; + std::string TruncatePage = std::move(GetPageKey(key, index)); + int tmpRes = pageCache_->DeletePart(TruncatePage, pagePos, TruncateLen); + if (SUCCESS != tmpRes && PAGE_NOT_FOUND != tmpRes) { + res = tmpRes; + } + ++index; + } + + size_t delPageNum = 0; + if (SUCCESS == res) { + Lock(key); + std::string firstPage = std::move(GetPageKey(key, index)); + auto pageKey = pageCache_->GetPageList().lower_bound(firstPage); + while (pageKey != pageCache_->GetPageList().end()) { + std::vector tokens; + split(*pageKey, PAGE_SEPARATOR, tokens); + if (key != tokens[0]) break; + int tmpRes = pageCache_->Delete(*pageKey); + if (SUCCESS == tmpRes) { + ++delPageNum; + } else if (PAGE_NOT_FOUND != tmpRes) { + res = tmpRes; + break; + } + ++pageKey; + } + UnLock(key); + } + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[WriteCache]Truncate, key:" << key << ", len:" << len + << ", res:" << res << ", delPageCnt:" << delPageNum + << ", time:" << totalTime << "ms"; + } + return res; +} + +void WriteCache::UnLock(const std::string &key) { + keyLocks_.erase(key); + if (EnableLogging) { + LOG(INFO) << "[WriteCache]UnLock, key:" << key; + } +} + +int WriteCache::GetAllKeys(std::map& keys) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + for (auto& it : keys_) { + keys[it.first] = it.second; + } + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[WriteCache]Get all keys, keyCnt:" << keys.size() + << ", time:" << totalTime << "ms"; + } + return SUCCESS; +} + +void WriteCache::Close() { + pageCache_->Close(); + keys_.clear(); + LOG(WARNING) << "[WriteCache]Close"; +} + +size_t WriteCache::GetCacheSize() { + return pageCache_->GetCacheSize(); +} + +size_t WriteCache::GetCacheMaxSize() { + return pageCache_->GetCacheMaxSize(); +} + +int WriteCache::Init() { + pageCache_ = std::make_shared(cfg_.CacheCfg); + int res = pageCache_->Init(); + LOG(WARNING) << "[WriteCache]Init, res:" << res; + return res; +} + +void WriteCache::Lock(const std::string &key) { + while(!keyLocks_.add(key)); +} + +std::string WriteCache::GetPageKey(const std::string &key, size_t pageIndex) { + std::string pageKey(key); + pageKey.append(std::string(1, PAGE_SEPARATOR)).append(std::to_string(pageIndex)); + return pageKey; +} + +} // namespace HybridCache diff --git a/local_cache/write_cache.h b/local_cache/write_cache.h new file mode 100644 index 0000000..82e005e --- /dev/null +++ b/local_cache/write_cache.h @@ -0,0 +1,74 @@ +/* + * Project: HybridCache + * Created Date: 24-3-18 + * Author: lshb + */ +#ifndef HYBRIDCACHE_WRITE_CACHE_H_ +#define HYBRIDCACHE_WRITE_CACHE_H_ + +#include "folly/concurrency/ConcurrentHashMap.h" + +#include "page_cache.h" + +namespace HybridCache { + +class WriteCache { + public: + WriteCache(const WriteCacheConfig& cfg) : cfg_(cfg) { Init(); } + WriteCache() = default; + ~WriteCache() { Close(); } + + enum class LockType { + NONE = 0, + ALREADY_LOCKED = -1, + }; + + int Put(const std::string &key, + size_t start, + size_t len, + const ByteBuffer &buffer + ); + + int Get(const std::string &key, + size_t start, + size_t len, + ByteBuffer &buffer, + std::vector>& dataBoundary // valid data segment boundar + ); + + // lock to ensure the availability of the returned buf + // After being locked, it can be read and written, but cannot be deleted + int GetAllCacheWithLock(const std::string &key, + std::vector>& dataSegments // ByteBuffer + off of key value(file) + ); + + int Delete(const std::string &key, LockType type = LockType::NONE); + + int Truncate(const std::string &key, size_t len); + + void UnLock(const std::string &key); + + int GetAllKeys(std::map& keys); + + void Close(); + + size_t GetCacheSize(); + size_t GetCacheMaxSize(); + + private: + int Init(); + + void Lock(const std::string &key); + + std::string GetPageKey(const std::string &key, size_t pageIndex); + + private: + WriteCacheConfig cfg_; + std::shared_ptr pageCache_; + folly::ConcurrentHashMap keys_; // + StringSkipList::Accessor keyLocks_ = StringSkipList::create(SKIP_LIST_HEIGHT); // presence key indicates lock +}; + +} // namespace HybridCache + +#endif // HYBRIDCACHE_WRITE_CACHE_H_ diff --git a/s3fs/CMakeLists.txt b/s3fs/CMakeLists.txt new file mode 100644 index 0000000..fb1985b --- /dev/null +++ b/s3fs/CMakeLists.txt @@ -0,0 +1,16 @@ +SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -O3 -D_FILE_OFFSET_BITS=64 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3 -D_FILE_OFFSET_BITS=64 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3") + +file(GLOB_RECURSE ALL_SOURCES CONFIGURE_DEPENDS "*.cpp") +list(REMOVE_ITEM ALL_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/s3fs_lib.cpp") +add_executable(s3fs ${ALL_SOURCES}) +target_include_directories(s3fs PRIVATE /usr/include/fuse /usr/include/libxml2) +target_link_libraries(s3fs PUBLIC hybridcache_local madfs_global -lfuse -pthread -lcurl -lxml2 -lcrypto -ldl) + +file(GLOB_RECURSE LIB_SOURCES CONFIGURE_DEPENDS "*.cpp") +list(REMOVE_ITEM LIB_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/s3fs.cpp") +add_library(s3fs_lib STATIC ${LIB_SOURCES}) +target_include_directories(s3fs_lib PRIVATE /usr/include/fuse /usr/include/libxml2) +target_link_libraries(s3fs_lib PUBLIC hybridcache_local madfs_global -pthread -lcurl -lxml2 -lcrypto -ldl) diff --git a/s3fs/addhead.cpp b/s3fs/addhead.cpp new file mode 100644 index 0000000..bfcd5da --- /dev/null +++ b/s3fs/addhead.cpp @@ -0,0 +1,248 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "s3fs.h" +#include "addhead.h" +#include "curl_util.h" +#include "s3fs_logger.h" + +//------------------------------------------------------------------- +// Symbols +//------------------------------------------------------------------- +static constexpr char ADD_HEAD_REGEX[] = "reg:"; + +//------------------------------------------------------------------- +// Class AdditionalHeader +//------------------------------------------------------------------- +AdditionalHeader AdditionalHeader::singleton; + +//------------------------------------------------------------------- +// Class AdditionalHeader method +//------------------------------------------------------------------- +AdditionalHeader::AdditionalHeader() +{ + if(this == AdditionalHeader::get()){ + is_enable = false; + }else{ + abort(); + } +} + +AdditionalHeader::~AdditionalHeader() +{ + if(this == AdditionalHeader::get()){ + Unload(); + }else{ + abort(); + } +} + +bool AdditionalHeader::Load(const char* file) +{ + if(!file){ + S3FS_PRN_WARN("file is nullptr."); + return false; + } + Unload(); + + std::ifstream AH(file); + if(!AH.good()){ + S3FS_PRN_WARN("Could not open file(%s).", file); + return false; + } + + // read file + std::string line; + while(getline(AH, line)){ + if(line.empty()){ + continue; + } + if('#' == line[0]){ + continue; + } + // load a line + std::istringstream ss(line); + std::string key; // suffix(key) + std::string head; // additional HTTP header + std::string value; // header value + if(0 == isblank(line[0])){ + ss >> key; + } + if(ss){ + ss >> head; + if(ss && static_cast(ss.tellg()) < line.size()){ + value = line.substr(static_cast(ss.tellg()) + 1); + } + } + + // check it + if(head.empty()){ + if(key.empty()){ + continue; + } + S3FS_PRN_ERR("file format error: %s key(suffix) is no HTTP header value.", key.c_str()); + Unload(); + return false; + } + + if(0 == strncasecmp(key.c_str(), ADD_HEAD_REGEX, strlen(ADD_HEAD_REGEX))){ + // regex + if(key.size() <= strlen(ADD_HEAD_REGEX)){ + S3FS_PRN_ERR("file format error: %s key(suffix) does not have key std::string.", key.c_str()); + continue; + } + key.erase(0, strlen(ADD_HEAD_REGEX)); + + // compile + std::unique_ptr preg(new regex_t); + int result; + if(0 != (result = regcomp(preg.get(), key.c_str(), REG_EXTENDED | REG_NOSUB))){ // we do not need matching info + char errbuf[256]; + regerror(result, preg.get(), errbuf, sizeof(errbuf)); + S3FS_PRN_ERR("failed to compile regex from %s key by %s.", key.c_str(), errbuf); + continue; + } + + addheadlist.emplace_back(std::move(preg), key, head, value); + }else{ + // not regex, directly comparing + addheadlist.emplace_back(nullptr, key, head, value); + } + + // set flag + is_enable = true; + } + return true; +} + +void AdditionalHeader::Unload() +{ + is_enable = false; + + addheadlist.clear(); +} + +bool AdditionalHeader::AddHeader(headers_t& meta, const char* path) const +{ + if(!is_enable){ + return true; + } + if(!path){ + S3FS_PRN_WARN("path is nullptr."); + return false; + } + + size_t pathlength = strlen(path); + + // loop + // + // [NOTE] + // Because to allow duplicate key, and then scanning the entire table. + // + for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter){ + const add_header *paddhead = &*iter; + + if(paddhead->pregex){ + // regex + regmatch_t match; // not use + if(0 == regexec(paddhead->pregex.get(), path, 1, &match, 0)){ + // match -> adding header + meta[paddhead->headkey] = paddhead->headvalue; + } + }else{ + // directly comparing + if(paddhead->basestring.length() < pathlength){ + if(paddhead->basestring.empty() || paddhead->basestring == &path[pathlength - paddhead->basestring.length()]){ + // match -> adding header + meta[paddhead->headkey] = paddhead->headvalue; + } + } + } + } + return true; +} + +struct curl_slist* AdditionalHeader::AddHeader(struct curl_slist* list, const char* path) const +{ + headers_t meta; + + if(!AddHeader(meta, path)){ + return list; + } + for(headers_t::iterator iter = meta.begin(); iter != meta.end(); ++iter){ + // Adding header + list = curl_slist_sort_insert(list, iter->first.c_str(), iter->second.c_str()); + } + meta.clear(); + S3FS_MALLOCTRIM(0); + return list; +} + +bool AdditionalHeader::Dump() const +{ + if(!S3fsLog::IsS3fsLogDbg()){ + return true; + } + + std::ostringstream ssdbg; + int cnt = 1; + + ssdbg << "Additional Header list[" << addheadlist.size() << "] = {" << std::endl; + + for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter, ++cnt){ + const add_header *paddhead = &*iter; + + ssdbg << " [" << cnt << "] = {" << std::endl; + + if(paddhead->pregex){ + ssdbg << " type\t\t--->\tregex" << std::endl; + }else{ + ssdbg << " type\t\t--->\tsuffix matching" << std::endl; + } + ssdbg << " base std::string\t--->\t" << paddhead->basestring << std::endl; + ssdbg << " add header\t--->\t" << paddhead->headkey << ": " << paddhead->headvalue << std::endl; + ssdbg << " }" << std::endl; + } + + + ssdbg << "}" << std::endl; + + // print all + S3FS_PRN_DBG("%s", ssdbg.str().c_str()); + + return true; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/addhead.h b/s3fs/addhead.h new file mode 100644 index 0000000..adb77a1 --- /dev/null +++ b/s3fs/addhead.h @@ -0,0 +1,98 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_ADDHEAD_H_ +#define S3FS_ADDHEAD_H_ + +#include +#include +#include + +#include "metaheader.h" + +//---------------------------------------------- +// Structure / Typedef +//---------------------------------------------- +struct add_header{ + add_header(std::unique_ptr pregex, std::string basestring, std::string headkey, std::string headvalue) + : pregex(std::move(pregex)) + , basestring(std::move(basestring)) + , headkey(std::move(headkey)) + , headvalue(std::move(headvalue)) + {} + ~add_header() { + if(pregex){ + regfree(pregex.get()); + } + } + + add_header(const add_header&) = delete; + add_header(add_header&& val) = default; + add_header& operator=(const add_header&) = delete; + add_header& operator=(add_header&&) = delete; + + std::unique_ptr pregex; // not nullptr means using regex, nullptr means comparing suffix directly. + std::string basestring; + std::string headkey; + std::string headvalue; +}; + +typedef std::vector addheadlist_t; + +//---------------------------------------------- +// Class AdditionalHeader +//---------------------------------------------- +class AdditionalHeader +{ + private: + static AdditionalHeader singleton; + bool is_enable; + addheadlist_t addheadlist; + + protected: + AdditionalHeader(); + ~AdditionalHeader(); + AdditionalHeader(const AdditionalHeader&) = delete; + AdditionalHeader(AdditionalHeader&&) = delete; + AdditionalHeader& operator=(const AdditionalHeader&) = delete; + AdditionalHeader& operator=(AdditionalHeader&&) = delete; + + public: + // Reference singleton + static AdditionalHeader* get() { return &singleton; } + + bool Load(const char* file); + void Unload(); + + bool AddHeader(headers_t& meta, const char* path) const; + struct curl_slist* AddHeader(struct curl_slist* list, const char* path) const; + bool Dump() const; +}; + +#endif // S3FS_ADDHEAD_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/autolock.cpp b/s3fs/autolock.cpp new file mode 100644 index 0000000..b8c4371 --- /dev/null +++ b/s3fs/autolock.cpp @@ -0,0 +1,78 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include + +#include "autolock.h" +#include "s3fs_logger.h" + +//------------------------------------------------------------------- +// Class AutoLock +//------------------------------------------------------------------- +AutoLock::AutoLock(pthread_mutex_t* pmutex, Type type) : auto_mutex(pmutex) +{ + if (type == ALREADY_LOCKED) { + is_lock_acquired = false; + } else if (type == NO_WAIT) { + int result = pthread_mutex_trylock(auto_mutex); + if(result == 0){ + is_lock_acquired = true; + }else if(result == EBUSY){ + is_lock_acquired = false; + }else{ + S3FS_PRN_CRIT("pthread_mutex_trylock returned: %d", result); + abort(); + } + } else { + int result = pthread_mutex_lock(auto_mutex); + if(result == 0){ + is_lock_acquired = true; + }else{ + S3FS_PRN_CRIT("pthread_mutex_lock returned: %d", result); + abort(); + } + } +} + +bool AutoLock::isLockAcquired() const +{ + return is_lock_acquired; +} + +AutoLock::~AutoLock() +{ + if (is_lock_acquired) { + int result = pthread_mutex_unlock(auto_mutex); + if(result != 0){ + S3FS_PRN_CRIT("pthread_mutex_unlock returned: %d", result); + abort(); + } + } +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/autolock.h b/s3fs/autolock.h new file mode 100644 index 0000000..2202cfd --- /dev/null +++ b/s3fs/autolock.h @@ -0,0 +1,63 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_AUTOLOCK_H_ +#define S3FS_AUTOLOCK_H_ + +#include + +//------------------------------------------------------------------- +// AutoLock Class +//------------------------------------------------------------------- +class AutoLock +{ + public: + enum Type { + NO_WAIT = 1, + ALREADY_LOCKED = 2, + NONE = 0 + }; + + private: + pthread_mutex_t* const auto_mutex; + bool is_lock_acquired; + + private: + AutoLock(const AutoLock&) = delete; + AutoLock(AutoLock&&) = delete; + AutoLock& operator=(const AutoLock&) = delete; + AutoLock& operator=(AutoLock&&) = delete; + + public: + explicit AutoLock(pthread_mutex_t* pmutex, Type type = NONE); + ~AutoLock(); + bool isLockAcquired() const; +}; + +#endif // S3FS_AUTOLOCK_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/cache.cpp b/s3fs/cache.cpp new file mode 100644 index 0000000..76e3720 --- /dev/null +++ b/s3fs/cache.cpp @@ -0,0 +1,933 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include + +#include "s3fs.h" +#include "s3fs_logger.h" +#include "s3fs_util.h" +#include "cache.h" +#include "autolock.h" +#include "string_util.h" + +//------------------------------------------------------------------- +// Utility +//------------------------------------------------------------------- +inline void SetStatCacheTime(struct timespec& ts) +{ + if(-1 == clock_gettime(static_cast(CLOCK_MONOTONIC_COARSE), &ts)){ + S3FS_PRN_CRIT("clock_gettime failed: %d", errno); + abort(); + } +} + +inline void InitStatCacheTime(struct timespec& ts) +{ + ts.tv_sec = 0; + ts.tv_nsec = 0; +} + +inline int CompareStatCacheTime(const struct timespec& ts1, const struct timespec& ts2) +{ + // return -1: ts1 < ts2 + // 0: ts1 == ts2 + // 1: ts1 > ts2 + if(ts1.tv_sec < ts2.tv_sec){ + return -1; + }else if(ts1.tv_sec > ts2.tv_sec){ + return 1; + }else{ + if(ts1.tv_nsec < ts2.tv_nsec){ + return -1; + }else if(ts1.tv_nsec > ts2.tv_nsec){ + return 1; + } + } + return 0; +} + +inline bool IsExpireStatCacheTime(const struct timespec& ts, const time_t& expire) +{ + struct timespec nowts; + SetStatCacheTime(nowts); + nowts.tv_sec -= expire; + + return (0 < CompareStatCacheTime(nowts, ts)); +} + +// +// For stats cache out +// +typedef std::vector statiterlist_t; + +struct sort_statiterlist{ + // ascending order + bool operator()(const stat_cache_t::iterator& src1, const stat_cache_t::iterator& src2) const + { + int result = CompareStatCacheTime(src1->second.cache_date, src2->second.cache_date); + if(0 == result){ + if(src1->second.hit_count < src2->second.hit_count){ + result = -1; + } + } + return (result < 0); + } +}; + +// +// For symbolic link cache out +// +typedef std::vector symlinkiterlist_t; + +struct sort_symlinkiterlist{ + // ascending order + bool operator()(const symlink_cache_t::iterator& src1, const symlink_cache_t::iterator& src2) const + { + int result = CompareStatCacheTime(src1->second.cache_date, src2->second.cache_date); // use the same as Stats + if(0 == result){ + if(src1->second.hit_count < src2->second.hit_count){ + result = -1; + } + } + return (result < 0); + } +}; + +//------------------------------------------------------------------- +// Static +//------------------------------------------------------------------- +StatCache StatCache::singleton; +pthread_mutex_t StatCache::stat_cache_lock; + +//------------------------------------------------------------------- +// Constructor/Destructor +//------------------------------------------------------------------- +StatCache::StatCache() : IsExpireTime(true), IsExpireIntervalType(false), ExpireTime(15 * 60), CacheSize(100000), IsCacheNoObject(true) +{ + if(this == StatCache::getStatCacheData()){ + stat_cache.clear(); + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + int result; + if(0 != (result = pthread_mutex_init(&StatCache::stat_cache_lock, &attr))){ + S3FS_PRN_CRIT("failed to init stat_cache_lock: %d", result); + abort(); + } + }else{ + abort(); + } +} + +StatCache::~StatCache() +{ + if(this == StatCache::getStatCacheData()){ + Clear(); + int result = pthread_mutex_destroy(&StatCache::stat_cache_lock); + if(result != 0){ + S3FS_PRN_CRIT("failed to destroy stat_cache_lock: %d", result); + abort(); + } + }else{ + abort(); + } +} + +//------------------------------------------------------------------- +// Methods +//------------------------------------------------------------------- +unsigned long StatCache::GetCacheSize() const +{ + return CacheSize; +} + +unsigned long StatCache::SetCacheSize(unsigned long size) +{ + unsigned long old = CacheSize; + CacheSize = size; + return old; +} + +time_t StatCache::GetExpireTime() const +{ + return (IsExpireTime ? ExpireTime : (-1)); +} + +time_t StatCache::SetExpireTime(time_t expire, bool is_interval) +{ + time_t old = ExpireTime; + ExpireTime = expire; + IsExpireTime = true; + IsExpireIntervalType = is_interval; + return old; +} + +time_t StatCache::UnsetExpireTime() +{ + time_t old = IsExpireTime ? ExpireTime : (-1); + ExpireTime = 0; + IsExpireTime = false; + IsExpireIntervalType = false; + return old; +} + +bool StatCache::SetCacheNoObject(bool flag) +{ + bool old = IsCacheNoObject; + IsCacheNoObject = flag; + return old; +} + +void StatCache::Clear() +{ + AutoLock lock(&StatCache::stat_cache_lock); + + stat_cache.clear(); + S3FS_MALLOCTRIM(0); +} + +bool StatCache::GetStat(const std::string& key, struct stat* pst, headers_t* meta, bool overcheck, const char* petag, bool* pisforce) +{ + bool is_delete_cache = false; + std::string strpath = key; + + AutoLock lock(&StatCache::stat_cache_lock); + + stat_cache_t::iterator iter = stat_cache.end(); + if(overcheck && '/' != *strpath.rbegin()){ + strpath += "/"; + iter = stat_cache.find(strpath); + } + if(iter == stat_cache.end()){ + strpath = key; + iter = stat_cache.find(strpath); + } + + if(iter != stat_cache.end()){ + stat_cache_entry* ent = &iter->second; + if(0 < ent->notruncate || !IsExpireTime || !IsExpireStatCacheTime(ent->cache_date, ExpireTime)){ + if(ent->noobjcache){ + if(!IsCacheNoObject){ + // need to delete this cache. + DelStat(strpath, AutoLock::ALREADY_LOCKED); + }else{ + // noobjcache = true means no object. + } + return false; + } + // hit without checking etag + std::string stretag; + if(petag){ + // find & check ETag + for(headers_t::iterator hiter = ent->meta.begin(); hiter != ent->meta.end(); ++hiter){ + std::string tag = lower(hiter->first); + if(tag == "etag"){ + stretag = hiter->second; + if('\0' != petag[0] && petag != stretag){ + is_delete_cache = true; + } + break; + } + } + } + if(is_delete_cache){ + // not hit by different ETag + S3FS_PRN_DBG("stat cache not hit by ETag[path=%s][time=%lld.%09ld][hit count=%lu][ETag(%s)!=(%s)]", + strpath.c_str(), static_cast(ent->cache_date.tv_sec), ent->cache_date.tv_nsec, ent->hit_count, petag ? petag : "null", stretag.c_str()); + }else{ + // hit + S3FS_PRN_DBG("stat cache hit [path=%s][time=%lld.%09ld][hit count=%lu]", + strpath.c_str(), static_cast(ent->cache_date.tv_sec), ent->cache_date.tv_nsec, ent->hit_count); + + if(pst!= nullptr){ + *pst= ent->stbuf; + } + if(meta != nullptr){ + *meta = ent->meta; + } + if(pisforce != nullptr){ + (*pisforce) = ent->isforce; + } + ent->hit_count++; + + if(IsExpireIntervalType){ + SetStatCacheTime(ent->cache_date); + } + return true; + } + + }else{ + // timeout + is_delete_cache = true; + } + } + + if(is_delete_cache){ + DelStat(strpath, AutoLock::ALREADY_LOCKED); + } + return false; +} + +bool StatCache::IsNoObjectCache(const std::string& key, bool overcheck) +{ + bool is_delete_cache = false; + std::string strpath = key; + + if(!IsCacheNoObject){ + return false; + } + + AutoLock lock(&StatCache::stat_cache_lock); + + stat_cache_t::iterator iter = stat_cache.end(); + if(overcheck && '/' != *strpath.rbegin()){ + strpath += "/"; + iter = stat_cache.find(strpath); + } + if(iter == stat_cache.end()){ + strpath = key; + iter = stat_cache.find(strpath); + } + + if(iter != stat_cache.end()) { + const stat_cache_entry* ent = &iter->second; + if(0 < ent->notruncate || !IsExpireTime || !IsExpireStatCacheTime(iter->second.cache_date, ExpireTime)){ + if(iter->second.noobjcache){ + // noobjcache = true means no object. + SetStatCacheTime((*iter).second.cache_date); + return true; + } + }else{ + // timeout + is_delete_cache = true; + } + } + + if(is_delete_cache){ + DelStat(strpath, AutoLock::ALREADY_LOCKED); + } + return false; +} + +bool StatCache::AddStat(const std::string& key, const headers_t& meta, bool forcedir, bool no_truncate) +{ + if(!no_truncate && CacheSize< 1){ + return true; + } + S3FS_PRN_INFO3("add stat cache entry[path=%s]", key.c_str()); + + AutoLock lock(&StatCache::stat_cache_lock); + + if(stat_cache.end() != stat_cache.find(key)){ + // found cache + DelStat(key.c_str(), AutoLock::ALREADY_LOCKED); + }else{ + // check: need to truncate cache + if(stat_cache.size() > CacheSize){ + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!TruncateCache(AutoLock::ALREADY_LOCKED)){ + return false; + } + } + } + + // make new + stat_cache_entry ent; + if(!convert_header_to_stat(key.c_str(), meta, &ent.stbuf, forcedir)){ + return false; + } + ent.hit_count = 0; + ent.isforce = forcedir; + ent.noobjcache = false; + ent.notruncate = (no_truncate ? 1L : 0L); + ent.meta.clear(); + SetStatCacheTime(ent.cache_date); // Set time. + //copy only some keys + for(headers_t::const_iterator iter = meta.begin(); iter != meta.end(); ++iter){ + std::string tag = lower(iter->first); + std::string value = iter->second; + if(tag == "content-type"){ + ent.meta[iter->first] = value; + }else if(tag == "content-length"){ + ent.meta[iter->first] = value; + }else if(tag == "etag"){ + ent.meta[iter->first] = value; + }else if(tag == "last-modified"){ + ent.meta[iter->first] = value; + }else if(is_prefix(tag.c_str(), "x-amz")){ + ent.meta[tag] = value; // key is lower case for "x-amz" + } + } + + const auto& value = stat_cache[key] = std::move(ent); + + // check symbolic link cache + if(!S_ISLNK(value.stbuf.st_mode)){ + if(symlink_cache.end() != symlink_cache.find(key)){ + // if symbolic link cache has key, thus remove it. + DelSymlink(key.c_str(), AutoLock::ALREADY_LOCKED); + } + } + + // If no_truncate flag is set, set file name to notruncate_file_cache + // + if(no_truncate){ + AddNotruncateCache(key); + } + + return true; +} + +// [NOTE] +// Updates only meta data if cached data exists. +// And when these are updated, it also updates the cache time. +// +// Since the file mode may change while the file is open, it is +// updated as well. +// +bool StatCache::UpdateMetaStats(const std::string& key, const headers_t& meta) +{ + if(CacheSize < 1){ + return true; + } + S3FS_PRN_INFO3("update stat cache entry[path=%s]", key.c_str()); + + AutoLock lock(&StatCache::stat_cache_lock); + stat_cache_t::iterator iter = stat_cache.find(key); + if(stat_cache.end() == iter){ + return true; + } + stat_cache_entry* ent = &iter->second; + + // update only meta keys + for(headers_t::const_iterator metaiter = meta.begin(); metaiter != meta.end(); ++metaiter){ + std::string tag = lower(metaiter->first); + std::string value = metaiter->second; + if(tag == "content-type"){ + ent->meta[metaiter->first] = value; + }else if(tag == "content-length"){ + ent->meta[metaiter->first] = value; + }else if(tag == "etag"){ + ent->meta[metaiter->first] = value; + }else if(tag == "last-modified"){ + ent->meta[metaiter->first] = value; + }else if(is_prefix(tag.c_str(), "x-amz")){ + ent->meta[tag] = value; // key is lower case for "x-amz" + } + } + + // Update time. + SetStatCacheTime(ent->cache_date); + + // Update only mode + ent->stbuf.st_mode = get_mode(meta, key); + + return true; +} + +bool StatCache::AddNoObjectCache(const std::string& key) +{ + if(!IsCacheNoObject){ + return true; // pretend successful + } + if(CacheSize < 1){ + return true; + } + S3FS_PRN_INFO3("add no object cache entry[path=%s]", key.c_str()); + + AutoLock lock(&StatCache::stat_cache_lock); + + if(stat_cache.end() != stat_cache.find(key)){ + // found + DelStat(key.c_str(), AutoLock::ALREADY_LOCKED); + }else{ + // check: need to truncate cache + if(stat_cache.size() > CacheSize){ + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!TruncateCache(AutoLock::ALREADY_LOCKED)){ + return false; + } + } + } + + // make new + stat_cache_entry ent; + memset(&ent.stbuf, 0, sizeof(struct stat)); + ent.hit_count = 0; + ent.isforce = false; + ent.noobjcache = true; + ent.notruncate = 0L; + ent.meta.clear(); + SetStatCacheTime(ent.cache_date); // Set time. + + stat_cache[key] = std::move(ent); + + // check symbolic link cache + if(symlink_cache.end() != symlink_cache.find(key)){ + // if symbolic link cache has key, thus remove it. + DelSymlink(key.c_str(), AutoLock::ALREADY_LOCKED); + } + return true; +} + +void StatCache::ChangeNoTruncateFlag(const std::string& key, bool no_truncate) +{ + AutoLock lock(&StatCache::stat_cache_lock); + stat_cache_t::iterator iter = stat_cache.find(key); + + if(stat_cache.end() != iter){ + stat_cache_entry* ent = &iter->second; + if(no_truncate){ + if(0L == ent->notruncate){ + // need to add no truncate cache. + AddNotruncateCache(key); + } + ++(ent->notruncate); + }else{ + if(0L < ent->notruncate){ + --(ent->notruncate); + if(0L == ent->notruncate){ + // need to delete from no truncate cache. + DelNotruncateCache(key); + } + } + } + } +} + +bool StatCache::TruncateCache(AutoLock::Type locktype) +{ + AutoLock lock(&StatCache::stat_cache_lock, locktype); + + if(stat_cache.empty()){ + return true; + } + + // 1) erase over expire time + if(IsExpireTime){ + for(stat_cache_t::iterator iter = stat_cache.begin(); iter != stat_cache.end(); ){ + const stat_cache_entry* entry = &iter->second; + if(0L == entry->notruncate && IsExpireStatCacheTime(entry->cache_date, ExpireTime)){ + iter = stat_cache.erase(iter); + }else{ + ++iter; + } + } + } + + // 2) check stat cache count + if(stat_cache.size() < CacheSize){ + return true; + } + + // 3) erase from the old cache in order + size_t erase_count= stat_cache.size() - CacheSize + 1; + statiterlist_t erase_iters; + for(stat_cache_t::iterator iter = stat_cache.begin(); iter != stat_cache.end() && 0 < erase_count; ++iter){ + // check no truncate + const stat_cache_entry* ent = &iter->second; + if(0L < ent->notruncate){ + // skip for no truncate entry and keep extra counts for this entity. + if(0 < erase_count){ + --erase_count; // decrement + } + }else{ + // iter is not have notruncate flag + erase_iters.push_back(iter); + } + if(erase_count < erase_iters.size()){ + std::sort(erase_iters.begin(), erase_iters.end(), sort_statiterlist()); + while(erase_count < erase_iters.size()){ + erase_iters.pop_back(); + } + } + } + for(statiterlist_t::iterator iiter = erase_iters.begin(); iiter != erase_iters.end(); ++iiter){ + stat_cache_t::iterator siter = *iiter; + + S3FS_PRN_DBG("truncate stat cache[path=%s]", siter->first.c_str()); + stat_cache.erase(siter); + } + S3FS_MALLOCTRIM(0); + + return true; +} + +bool StatCache::DelStat(const char* key, AutoLock::Type locktype) +{ + if(!key){ + return false; + } + S3FS_PRN_INFO3("delete stat cache entry[path=%s]", key); + + AutoLock lock(&StatCache::stat_cache_lock, locktype); + + stat_cache_t::iterator iter; + if(stat_cache.end() != (iter = stat_cache.find(key))){ + stat_cache.erase(iter); + DelNotruncateCache(key); + } + if(0 < strlen(key) && 0 != strcmp(key, "/")){ + std::string strpath = key; + if('/' == *strpath.rbegin()){ + // If there is "path" cache, delete it. + strpath.erase(strpath.length() - 1); + }else{ + // If there is "path/" cache, delete it. + strpath += "/"; + } + if(stat_cache.end() != (iter = stat_cache.find(strpath))){ + stat_cache.erase(iter); + DelNotruncateCache(strpath); + } + } + S3FS_MALLOCTRIM(0); + + return true; +} + +bool StatCache::GetSymlink(const std::string& key, std::string& value) +{ + bool is_delete_cache = false; + const std::string& strpath = key; + + AutoLock lock(&StatCache::stat_cache_lock); + + symlink_cache_t::iterator iter = symlink_cache.find(strpath); + if(iter != symlink_cache.end()){ + symlink_cache_entry* ent = &iter->second; + if(!IsExpireTime || !IsExpireStatCacheTime(ent->cache_date, ExpireTime)){ // use the same as Stats + // found + S3FS_PRN_DBG("symbolic link cache hit [path=%s][time=%lld.%09ld][hit count=%lu]", + strpath.c_str(), static_cast(ent->cache_date.tv_sec), ent->cache_date.tv_nsec, ent->hit_count); + + value = ent->link; + + ent->hit_count++; + if(IsExpireIntervalType){ + SetStatCacheTime(ent->cache_date); + } + return true; + }else{ + // timeout + is_delete_cache = true; + } + } + + if(is_delete_cache){ + DelSymlink(strpath.c_str(), AutoLock::ALREADY_LOCKED); + } + return false; +} + +bool StatCache::AddSymlink(const std::string& key, const std::string& value) +{ + if(CacheSize< 1){ + return true; + } + S3FS_PRN_INFO3("add symbolic link cache entry[path=%s, value=%s]", key.c_str(), value.c_str()); + + AutoLock lock(&StatCache::stat_cache_lock); + + if(symlink_cache.end() != symlink_cache.find(key)){ + // found + DelSymlink(key.c_str(), AutoLock::ALREADY_LOCKED); + }else{ + // check: need to truncate cache + if(symlink_cache.size() > CacheSize){ + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!TruncateSymlink(AutoLock::ALREADY_LOCKED)){ + return false; + } + } + } + + // make new + symlink_cache_entry ent; + ent.link = value; + ent.hit_count = 0; + SetStatCacheTime(ent.cache_date); // Set time(use the same as Stats). + + symlink_cache[key] = std::move(ent); + + return true; +} + +bool StatCache::TruncateSymlink(AutoLock::Type locktype) +{ + AutoLock lock(&StatCache::stat_cache_lock, locktype); + + if(symlink_cache.empty()){ + return true; + } + + // 1) erase over expire time + if(IsExpireTime){ + for(symlink_cache_t::iterator iter = symlink_cache.begin(); iter != symlink_cache.end(); ){ + const symlink_cache_entry* entry = &iter->second; + if(IsExpireStatCacheTime(entry->cache_date, ExpireTime)){ // use the same as Stats + iter = symlink_cache.erase(iter); + }else{ + ++iter; + } + } + } + + // 2) check stat cache count + if(symlink_cache.size() < CacheSize){ + return true; + } + + // 3) erase from the old cache in order + size_t erase_count= symlink_cache.size() - CacheSize + 1; + symlinkiterlist_t erase_iters; + for(symlink_cache_t::iterator iter = symlink_cache.begin(); iter != symlink_cache.end(); ++iter){ + erase_iters.push_back(iter); + sort(erase_iters.begin(), erase_iters.end(), sort_symlinkiterlist()); + if(erase_count < erase_iters.size()){ + erase_iters.pop_back(); + } + } + for(symlinkiterlist_t::iterator iiter = erase_iters.begin(); iiter != erase_iters.end(); ++iiter){ + symlink_cache_t::iterator siter = *iiter; + + S3FS_PRN_DBG("truncate symbolic link cache[path=%s]", siter->first.c_str()); + symlink_cache.erase(siter); + } + S3FS_MALLOCTRIM(0); + + return true; +} + +bool StatCache::DelSymlink(const char* key, AutoLock::Type locktype) +{ + if(!key){ + return false; + } + S3FS_PRN_INFO3("delete symbolic link cache entry[path=%s]", key); + + AutoLock lock(&StatCache::stat_cache_lock, locktype); + + symlink_cache_t::iterator iter; + if(symlink_cache.end() != (iter = symlink_cache.find(key))){ + symlink_cache.erase(iter); + } + S3FS_MALLOCTRIM(0); + + return true; +} + +// [NOTE] +// Need to lock StatCache::stat_cache_lock before calling this method. +// +bool StatCache::AddNotruncateCache(const std::string& key) +{ + if(key.empty() || '/' == *key.rbegin()){ + return false; + } + + std::string parentdir = mydirname(key); + std::string filename = mybasename(key); + if(parentdir.empty() || filename.empty()){ + return false; + } + parentdir += '/'; // directory path must be '/' termination. + + notruncate_dir_map_t::iterator iter = notruncate_file_cache.find(parentdir); + if(iter == notruncate_file_cache.end()){ + // add new list + notruncate_filelist_t list; + list.push_back(filename); + notruncate_file_cache[parentdir] = list; + }else{ + // add filename to existed list + notruncate_filelist_t& filelist = iter->second; + notruncate_filelist_t::const_iterator fiter = std::find(filelist.begin(), filelist.end(), filename); + if(fiter == filelist.end()){ + filelist.push_back(filename); + } + } + return true; +} + +// [NOTE] +// Need to lock StatCache::stat_cache_lock before calling this method. +// +bool StatCache::DelNotruncateCache(const std::string& key) +{ + if(key.empty() || '/' == *key.rbegin()){ + return false; + } + + std::string parentdir = mydirname(key); + std::string filename = mybasename(key); + if(parentdir.empty() || filename.empty()){ + return false; + } + parentdir += '/'; // directory path must be '/' termination. + + notruncate_dir_map_t::iterator iter = notruncate_file_cache.find(parentdir); + if(iter != notruncate_file_cache.end()){ + // found directory in map + notruncate_filelist_t& filelist = iter->second; + notruncate_filelist_t::iterator fiter = std::find(filelist.begin(), filelist.end(), filename); + if(fiter != filelist.end()){ + // found filename in directory file list + filelist.erase(fiter); + if(filelist.empty()){ + notruncate_file_cache.erase(parentdir); + } + } + } + return true; +} + +// [Background] +// When s3fs creates a new file, the file does not exist until the file contents +// are uploaded.(because it doesn't create a 0 byte file) +// From the time this file is created(opened) until it is uploaded(flush), it +// will have a Stat cache with the No truncate flag added. +// This avoids file not existing errors in operations such as chmod and utimens +// that occur in the short period before file upload. +// Besides this, we also need to support readdir(list_bucket), this method is +// called to maintain the cache for readdir and return its value. +// +// [NOTE] +// Add the file names under parentdir to the list. +// However, if the same file name exists in the list, it will not be added. +// parentdir must be terminated with a '/'. +// +bool StatCache::GetNotruncateCache(const std::string& parentdir, notruncate_filelist_t& list) +{ + if(parentdir.empty()){ + return false; + } + + std::string dirpath = parentdir; + if('/' != *dirpath.rbegin()){ + dirpath += '/'; + } + + AutoLock lock(&StatCache::stat_cache_lock); + + notruncate_dir_map_t::iterator iter = notruncate_file_cache.find(dirpath); + if(iter == notruncate_file_cache.end()){ + // not found directory map + return true; + } + + // found directory in map + const notruncate_filelist_t& filelist = iter->second; + for(notruncate_filelist_t::const_iterator fiter = filelist.begin(); fiter != filelist.end(); ++fiter){ + if(list.end() == std::find(list.begin(), list.end(), *fiter)){ + // found notuncate file that does not exist in the list, so add it. + list.push_back(*fiter); + } + } + return true; +} + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +bool convert_header_to_stat(const char* path, const headers_t& meta, struct stat* pst, bool forcedir) +{ + if(!path || !pst){ + return false; + } + memset(pst, 0, sizeof(struct stat)); + + pst->st_nlink = 1; // see fuse FAQ + + // mode + pst->st_mode = get_mode(meta, path, true, forcedir); + + // blocks + if(S_ISREG(pst->st_mode)){ + pst->st_blocks = get_blocks(pst->st_size); + } + pst->st_blksize = 4096; + + // mtime + struct timespec mtime = get_mtime(meta); + if(pst->st_mtime < 0){ + pst->st_mtime = 0L; + }else{ + if(mtime.tv_sec < 0){ + mtime.tv_sec = 0; + mtime.tv_nsec = 0; + } + set_timespec_to_stat(*pst, stat_time_type::MTIME, mtime); + } + + // ctime + struct timespec ctime = get_ctime(meta); + if(pst->st_ctime < 0){ + pst->st_ctime = 0L; + }else{ + if(ctime.tv_sec < 0){ + ctime.tv_sec = 0; + ctime.tv_nsec = 0; + } + set_timespec_to_stat(*pst, stat_time_type::CTIME, ctime); + } + + // atime + struct timespec atime = get_atime(meta); + if(pst->st_atime < 0){ + pst->st_atime = 0L; + }else{ + if(atime.tv_sec < 0){ + atime.tv_sec = 0; + atime.tv_nsec = 0; + } + set_timespec_to_stat(*pst, stat_time_type::ATIME, atime); + } + + // size + if(S_ISDIR(pst->st_mode)){ + pst->st_size = 4096; + }else{ + pst->st_size = get_size(meta); + } + + // uid/gid + pst->st_uid = get_uid(meta); + pst->st_gid = get_gid(meta); + + return true; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/cache.h b/s3fs/cache.h new file mode 100644 index 0000000..5157f74 --- /dev/null +++ b/s3fs/cache.h @@ -0,0 +1,214 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_CACHE_H_ +#define S3FS_CACHE_H_ + +#include + +#include "autolock.h" +#include "metaheader.h" + +//------------------------------------------------------------------- +// Structure +//------------------------------------------------------------------- +// +// Struct for stats cache +// +struct stat_cache_entry { + struct stat stbuf; + unsigned long hit_count; + struct timespec cache_date; + headers_t meta; + bool isforce; + bool noobjcache; // Flag: cache is no object for no listing. + unsigned long notruncate; // 0<: not remove automatically at checking truncate + + stat_cache_entry() : hit_count(0), isforce(false), noobjcache(false), notruncate(0L) + { + memset(&stbuf, 0, sizeof(struct stat)); + cache_date.tv_sec = 0; + cache_date.tv_nsec = 0; + meta.clear(); + } +}; + +typedef std::map stat_cache_t; // key=path + +// +// Struct for symbolic link cache +// +struct symlink_cache_entry { + std::string link; + unsigned long hit_count; + struct timespec cache_date; // The function that operates timespec uses the same as Stats + + symlink_cache_entry() : link(""), hit_count(0) + { + cache_date.tv_sec = 0; + cache_date.tv_nsec = 0; + } +}; + +typedef std::map symlink_cache_t; + +// +// Typedefs for No truncate file name cache +// +typedef std::vector notruncate_filelist_t; // untruncated file name list in dir +typedef std::map notruncate_dir_map_t; // key is parent dir path + +//------------------------------------------------------------------- +// Class StatCache +//------------------------------------------------------------------- +// [NOTE] About Symbolic link cache +// The Stats cache class now also has a symbolic link cache. +// It is possible to take out the Symbolic link cache in another class, +// but the cache out etc. should be synchronized with the Stats cache +// and implemented in this class. +// Symbolic link cache size and timeout use the same settings as Stats +// cache. This simplifies user configuration, and from a user perspective, +// the symbolic link cache appears to be included in the Stats cache. +// +class StatCache +{ + private: + static StatCache singleton; + static pthread_mutex_t stat_cache_lock; + stat_cache_t stat_cache; + bool IsExpireTime; + bool IsExpireIntervalType; // if this flag is true, cache data is updated at last access time. + time_t ExpireTime; + unsigned long CacheSize; + bool IsCacheNoObject; + symlink_cache_t symlink_cache; + notruncate_dir_map_t notruncate_file_cache; + + private: + StatCache(); + ~StatCache(); + + void Clear(); + bool GetStat(const std::string& key, struct stat* pst, headers_t* meta, bool overcheck, const char* petag, bool* pisforce); + // Truncate stat cache + bool TruncateCache(AutoLock::Type locktype = AutoLock::NONE); + // Truncate symbolic link cache + bool TruncateSymlink(AutoLock::Type locktype = AutoLock::NONE); + + bool AddNotruncateCache(const std::string& key); + bool DelNotruncateCache(const std::string& key); + + public: + // Reference singleton + static StatCache* getStatCacheData() + { + return &singleton; + } + + // Attribute + unsigned long GetCacheSize() const; + unsigned long SetCacheSize(unsigned long size); + time_t GetExpireTime() const; + time_t SetExpireTime(time_t expire, bool is_interval = false); + time_t UnsetExpireTime(); + bool SetCacheNoObject(bool flag); + bool EnableCacheNoObject() + { + return SetCacheNoObject(true); + } + bool DisableCacheNoObject() + { + return SetCacheNoObject(false); + } + bool GetCacheNoObject() const + { + return IsCacheNoObject; + } + + // Get stat cache + bool GetStat(const std::string& key, struct stat* pst, headers_t* meta, bool overcheck = true, bool* pisforce = nullptr) + { + return GetStat(key, pst, meta, overcheck, nullptr, pisforce); + } + bool GetStat(const std::string& key, struct stat* pst, bool overcheck = true) + { + return GetStat(key, pst, nullptr, overcheck, nullptr, nullptr); + } + bool GetStat(const std::string& key, headers_t* meta, bool overcheck = true) + { + return GetStat(key, nullptr, meta, overcheck, nullptr, nullptr); + } + bool HasStat(const std::string& key, bool overcheck = true) + { + return GetStat(key, nullptr, nullptr, overcheck, nullptr, nullptr); + } + bool HasStat(const std::string& key, const char* etag, bool overcheck = true) + { + return GetStat(key, nullptr, nullptr, overcheck, etag, nullptr); + } + bool HasStat(const std::string& key, struct stat* pst, const char* etag) + { + return GetStat(key, pst, nullptr, true, etag, nullptr); + } + + // Cache For no object + bool IsNoObjectCache(const std::string& key, bool overcheck = true); + bool AddNoObjectCache(const std::string& key); + + // Add stat cache + bool AddStat(const std::string& key, const headers_t& meta, bool forcedir = false, bool no_truncate = false); + + // Update meta stats + bool UpdateMetaStats(const std::string& key, const headers_t& meta); + + // Change no truncate flag + void ChangeNoTruncateFlag(const std::string& key, bool no_truncate); + + // Delete stat cache + bool DelStat(const char* key, AutoLock::Type locktype = AutoLock::NONE); + bool DelStat(const std::string& key, AutoLock::Type locktype = AutoLock::NONE) + { + return DelStat(key.c_str(), locktype); + } + + // Cache for symbolic link + bool GetSymlink(const std::string& key, std::string& value); + bool AddSymlink(const std::string& key, const std::string& value); + bool DelSymlink(const char* key, AutoLock::Type locktype = AutoLock::NONE); + + // Cache for Notruncate file + bool GetNotruncateCache(const std::string& parentdir, notruncate_filelist_t& list); +}; + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +bool convert_header_to_stat(const char* path, const headers_t& meta, struct stat* pst, bool forcedir = false); + +#endif // S3FS_CACHE_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/common.h b/s3fs/common.h new file mode 100644 index 0000000..6f49c98 --- /dev/null +++ b/s3fs/common.h @@ -0,0 +1,68 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_COMMON_H_ +#define S3FS_COMMON_H_ + +#include + +#include "config.h" +#include "types.h" +#include "hybridcache_accessor_4_s3fs.h" + +//------------------------------------------------------------------- +// Global variables +//------------------------------------------------------------------- +// TODO: namespace these +static constexpr int64_t FIVE_GB = 5LL * 1024LL * 1024LL * 1024LL; +static constexpr off_t MIN_MULTIPART_SIZE = 5 * 1024 * 1024; +static constexpr int NEW_CACHE_FAKE_FD = -2; + +extern bool foreground; +extern bool nomultipart; +extern bool pathrequeststyle; +extern bool complement_stat; +extern bool noxmlns; +extern bool use_newcache; +extern std::string program_name; +extern std::string service_path; +extern std::string s3host; +extern std::string mount_prefix; +extern std::string endpoint; +extern std::string cipher_suites; +extern std::string instance_name; + +extern std::shared_ptr accessor; + +//------------------------------------------------------------------- +// For weak attribute +//------------------------------------------------------------------- +#define S3FS_FUNCATTR_WEAK __attribute__ ((weak,unused)) + +#endif // S3FS_COMMON_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/common_auth.cpp b/s3fs/common_auth.cpp new file mode 100644 index 0000000..b42a39f --- /dev/null +++ b/s3fs/common_auth.cpp @@ -0,0 +1,71 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include + +#include "s3fs_auth.h" +#include "string_util.h" + +//------------------------------------------------------------------- +// Utility Function +//------------------------------------------------------------------- +std::string s3fs_get_content_md5(int fd) +{ + md5_t md5; + if(!s3fs_md5_fd(fd, 0, -1, &md5)){ + // TODO: better return value? + return ""; + } + return s3fs_base64(md5.data(), md5.size()); +} + +std::string s3fs_sha256_hex_fd(int fd, off_t start, off_t size) +{ + sha256_t sha256; + + if(!s3fs_sha256_fd(fd, start, size, &sha256)){ + // TODO: better return value? + return ""; + } + + std::string sha256hex = s3fs_hex_lower(sha256.data(), sha256.size()); + + return sha256hex; +} + + +std::string s3fs_get_content_md5(off_t fsize, char* buf) { + md5_t md5; + if(!s3fs_md5(reinterpret_cast(buf), fsize, &md5)){ + // TODO: better return value? + return ""; + } + return s3fs_base64(md5.data(), md5.size()); +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/config.h b/s3fs/config.h new file mode 100644 index 0000000..808fe47 --- /dev/null +++ b/s3fs/config.h @@ -0,0 +1,92 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* short commit hash value on github */ +#define COMMIT_HASH_VAL "70a30d6" + +/* Define to 1 if you have the header file. */ +#define HAVE_ATTR_XATTR_H 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +#define HAVE_CLOCK_GETTIME 1 + +/* Define to 1 if libcurl has CURLOPT_KEEP_SENDING_ON_ERROR CURLoption */ +#define HAVE_CURLOPT_KEEP_SENDING_ON_ERROR 1 + +/* Define to 1 if libcurl has CURLOPT_SSL_ENABLE_ALPN CURLoption */ +#define HAVE_CURLOPT_SSL_ENABLE_ALPN 1 + +/* Define to 1 if libcurl has CURLOPT_TCP_KEEPALIVE CURLoption */ +#define HAVE_CURLOPT_TCP_KEEPALIVE 1 + +/* Define to 1 if you have the `fallocate' function. */ +#define HAVE_FALLOCATE 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `dl' library (-ldl). */ +#define HAVE_LIBDL 1 + +/* Define to 1 if you have the `malloc_trim' function. */ +#define HAVE_MALLOC_TRIM 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_EXTATTR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_XATTR_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Name of package */ +#define PACKAGE "s3fs" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "s3fs" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "s3fs 1.94" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "s3fs" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "1.94" + +/* Define if you have PTHREAD_MUTEX_RECURSIVE_NP */ +#define S3FS_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Version number of package */ +#define VERSION "1.94" diff --git a/s3fs/curl.cpp b/s3fs/curl.cpp new file mode 100644 index 0000000..6f76185 --- /dev/null +++ b/s3fs/curl.cpp @@ -0,0 +1,4576 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "s3fs.h" +#include "s3fs_logger.h" +#include "curl.h" +#include "curl_multi.h" +#include "curl_util.h" +#include "s3fs_auth.h" +#include "autolock.h" +#include "curl_handlerpool.h" +#include "s3fs_cred.h" +#include "s3fs_util.h" +#include "string_util.h" +#include "addhead.h" + +//------------------------------------------------------------------- +// Symbols +//------------------------------------------------------------------- +static constexpr char EMPTY_PAYLOAD_HASH[] = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; +static constexpr char EMPTY_MD5_BASE64_HASH[] = "1B2M2Y8AsgTpgAmY7PhCfg=="; + +//------------------------------------------------------------------- +// Class S3fsCurl +//------------------------------------------------------------------- +static constexpr int MULTIPART_SIZE = 10 * 1024 * 1024; +static constexpr int GET_OBJECT_RESPONSE_LIMIT = 1024; + +// [NOTE] about default mime.types file +// If no mime.types file is specified in the mime option, s3fs +// will look for /etc/mime.types on all operating systems and +// load mime information. +// However, in the case of macOS, when this file does not exist, +// it tries to detect the /etc/apache2/mime.types file. +// The reason for this is that apache2 is preinstalled on macOS, +// and the mime.types file is expected to exist in this path. +// If the mime.types file is not found, s3fs will exit with an +// error. +// +static constexpr char DEFAULT_MIME_FILE[] = "/etc/mime.types"; +static constexpr char SPECIAL_DARWIN_MIME_FILE[] = "/etc/apache2/mime.types"; + +// [NOTICE] +// This symbol is for libcurl under 7.23.0 +#ifndef CURLSHE_NOT_BUILT_IN +#define CURLSHE_NOT_BUILT_IN 5 +#endif + +#if LIBCURL_VERSION_NUM >= 0x073100 +#define S3FS_CURLOPT_XFERINFOFUNCTION CURLOPT_XFERINFOFUNCTION +#else +#define S3FS_CURLOPT_XFERINFOFUNCTION CURLOPT_PROGRESSFUNCTION +#endif + +//------------------------------------------------------------------- +// Class S3fsCurl +//------------------------------------------------------------------- +pthread_mutex_t S3fsCurl::curl_warnings_lock; +pthread_mutex_t S3fsCurl::curl_handles_lock; +S3fsCurl::callback_locks_t S3fsCurl::callback_locks; +bool S3fsCurl::is_initglobal_done = false; +CurlHandlerPool* S3fsCurl::sCurlPool = nullptr; +int S3fsCurl::sCurlPoolSize = 32; +CURLSH* S3fsCurl::hCurlShare = nullptr; +bool S3fsCurl::is_cert_check = true; // default +bool S3fsCurl::is_dns_cache = true; // default +bool S3fsCurl::is_ssl_session_cache= true; // default +long S3fsCurl::connect_timeout = 300; // default +time_t S3fsCurl::readwrite_timeout = 120; // default +int S3fsCurl::retries = 5; // default +bool S3fsCurl::is_public_bucket = false; +acl_t S3fsCurl::default_acl = acl_t::PRIVATE; +std::string S3fsCurl::storage_class = "STANDARD"; +sseckeylist_t S3fsCurl::sseckeys; +std::string S3fsCurl::ssekmsid; +sse_type_t S3fsCurl::ssetype = sse_type_t::SSE_DISABLE; +bool S3fsCurl::is_content_md5 = false; +bool S3fsCurl::is_verbose = false; +bool S3fsCurl::is_dump_body = false; +S3fsCred* S3fsCurl::ps3fscred = nullptr; +long S3fsCurl::ssl_verify_hostname = 1; // default(original code...) + +// protected by curl_warnings_lock +bool S3fsCurl::curl_warnings_once = false; + +// protected by curl_handles_lock +curltime_t S3fsCurl::curl_times; +curlprogress_t S3fsCurl::curl_progress; + +std::string S3fsCurl::curl_ca_bundle; +mimes_t S3fsCurl::mimeTypes; +std::string S3fsCurl::userAgent; +int S3fsCurl::max_parallel_cnt = 5; // default +int S3fsCurl::max_multireq = 20; // default +off_t S3fsCurl::multipart_size = MULTIPART_SIZE; // default +off_t S3fsCurl::multipart_copy_size = 512 * 1024 * 1024; // default +signature_type_t S3fsCurl::signature_type = signature_type_t::V2_OR_V4; // default +bool S3fsCurl::is_unsigned_payload = false; // default +bool S3fsCurl::is_ua = true; // default +bool S3fsCurl::listobjectsv2 = false; // default +bool S3fsCurl::requester_pays = false; // default +std::string S3fsCurl::proxy_url; +bool S3fsCurl::proxy_http = false; +std::string S3fsCurl::proxy_userpwd; + +//------------------------------------------------------------------- +// Class methods for S3fsCurl +//------------------------------------------------------------------- +bool S3fsCurl::InitS3fsCurl() +{ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + if(0 != pthread_mutex_init(&S3fsCurl::curl_warnings_lock, &attr)){ + return false; + } + if(0 != pthread_mutex_init(&S3fsCurl::curl_handles_lock, &attr)){ + return false; + } + if(0 != pthread_mutex_init(&S3fsCurl::callback_locks.dns, &attr)){ + return false; + } + if(0 != pthread_mutex_init(&S3fsCurl::callback_locks.ssl_session, &attr)){ + return false; + } + if(!S3fsCurl::InitGlobalCurl()){ + return false; + } + if(!S3fsCurl::InitShareCurl()){ + return false; + } + if(!S3fsCurl::InitCryptMutex()){ + return false; + } + // [NOTE] + // sCurlPoolSize must be over parallel(or multireq) count. + // + if(sCurlPoolSize < std::max(GetMaxParallelCount(), GetMaxMultiRequest())){ + sCurlPoolSize = std::max(GetMaxParallelCount(), GetMaxMultiRequest()); + } + sCurlPool = new CurlHandlerPool(sCurlPoolSize); + if (!sCurlPool->Init()) { + return false; + } + return true; +} + +bool S3fsCurl::DestroyS3fsCurl() +{ + bool result = true; + + if(!S3fsCurl::DestroyCryptMutex()){ + result = false; + } + if(!sCurlPool->Destroy()){ + result = false; + } + delete sCurlPool; + sCurlPool = nullptr; + if(!S3fsCurl::DestroyShareCurl()){ + result = false; + } + if(!S3fsCurl::DestroyGlobalCurl()){ + result = false; + } + if(0 != pthread_mutex_destroy(&S3fsCurl::callback_locks.dns)){ + result = false; + } + if(0 != pthread_mutex_destroy(&S3fsCurl::callback_locks.ssl_session)){ + result = false; + } + if(0 != pthread_mutex_destroy(&S3fsCurl::curl_handles_lock)){ + result = false; + } + if(0 != pthread_mutex_destroy(&S3fsCurl::curl_warnings_lock)){ + result = false; + } + return result; +} + +bool S3fsCurl::InitGlobalCurl() +{ + if(S3fsCurl::is_initglobal_done){ + return false; + } + if(CURLE_OK != curl_global_init(CURL_GLOBAL_ALL)){ + S3FS_PRN_ERR("init_curl_global_all returns error."); + return false; + } + S3fsCurl::is_initglobal_done = true; + return true; +} + +bool S3fsCurl::DestroyGlobalCurl() +{ + if(!S3fsCurl::is_initglobal_done){ + return false; + } + curl_global_cleanup(); + S3fsCurl::is_initglobal_done = false; + return true; +} + +bool S3fsCurl::InitShareCurl() +{ + CURLSHcode nSHCode; + + if(!S3fsCurl::is_dns_cache && !S3fsCurl::is_ssl_session_cache){ + S3FS_PRN_INFO("Curl does not share DNS data."); + return true; + } + if(S3fsCurl::hCurlShare){ + S3FS_PRN_WARN("already initiated."); + return false; + } + if(nullptr == (S3fsCurl::hCurlShare = curl_share_init())){ + S3FS_PRN_ERR("curl_share_init failed"); + return false; + } + if(CURLSHE_OK != (nSHCode = curl_share_setopt(S3fsCurl::hCurlShare, CURLSHOPT_LOCKFUNC, S3fsCurl::LockCurlShare))){ + S3FS_PRN_ERR("curl_share_setopt(LOCKFUNC) returns %d(%s)", nSHCode, curl_share_strerror(nSHCode)); + return false; + } + if(CURLSHE_OK != (nSHCode = curl_share_setopt(S3fsCurl::hCurlShare, CURLSHOPT_UNLOCKFUNC, S3fsCurl::UnlockCurlShare))){ + S3FS_PRN_ERR("curl_share_setopt(UNLOCKFUNC) returns %d(%s)", nSHCode, curl_share_strerror(nSHCode)); + return false; + } + if(S3fsCurl::is_dns_cache){ + nSHCode = curl_share_setopt(S3fsCurl::hCurlShare, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS); + if(CURLSHE_OK != nSHCode && CURLSHE_BAD_OPTION != nSHCode && CURLSHE_NOT_BUILT_IN != nSHCode){ + S3FS_PRN_ERR("curl_share_setopt(DNS) returns %d(%s)", nSHCode, curl_share_strerror(nSHCode)); + return false; + }else if(CURLSHE_BAD_OPTION == nSHCode || CURLSHE_NOT_BUILT_IN == nSHCode){ + S3FS_PRN_WARN("curl_share_setopt(DNS) returns %d(%s), but continue without shared dns data.", nSHCode, curl_share_strerror(nSHCode)); + } + } + if(S3fsCurl::is_ssl_session_cache){ + nSHCode = curl_share_setopt(S3fsCurl::hCurlShare, CURLSHOPT_SHARE, CURL_LOCK_DATA_SSL_SESSION); + if(CURLSHE_OK != nSHCode && CURLSHE_BAD_OPTION != nSHCode && CURLSHE_NOT_BUILT_IN != nSHCode){ + S3FS_PRN_ERR("curl_share_setopt(SSL SESSION) returns %d(%s)", nSHCode, curl_share_strerror(nSHCode)); + return false; + }else if(CURLSHE_BAD_OPTION == nSHCode || CURLSHE_NOT_BUILT_IN == nSHCode){ + S3FS_PRN_WARN("curl_share_setopt(SSL SESSION) returns %d(%s), but continue without shared ssl session data.", nSHCode, curl_share_strerror(nSHCode)); + } + } + if(CURLSHE_OK != (nSHCode = curl_share_setopt(S3fsCurl::hCurlShare, CURLSHOPT_USERDATA, &S3fsCurl::callback_locks))){ + S3FS_PRN_ERR("curl_share_setopt(USERDATA) returns %d(%s)", nSHCode, curl_share_strerror(nSHCode)); + return false; + } + return true; +} + +bool S3fsCurl::DestroyShareCurl() +{ + if(!S3fsCurl::hCurlShare){ + if(!S3fsCurl::is_dns_cache && !S3fsCurl::is_ssl_session_cache){ + return true; + } + S3FS_PRN_WARN("already destroy share curl."); + return false; + } + if(CURLSHE_OK != curl_share_cleanup(S3fsCurl::hCurlShare)){ + return false; + } + S3fsCurl::hCurlShare = nullptr; + return true; +} + +void S3fsCurl::LockCurlShare(CURL* handle, curl_lock_data nLockData, curl_lock_access laccess, void* useptr) +{ + if(!hCurlShare){ + return; + } + S3fsCurl::callback_locks_t* locks = static_cast(useptr); + int result; + if(CURL_LOCK_DATA_DNS == nLockData){ + if(0 != (result = pthread_mutex_lock(&locks->dns))){ + S3FS_PRN_CRIT("pthread_mutex_lock returned: %d", result); + abort(); + } + }else if(CURL_LOCK_DATA_SSL_SESSION == nLockData){ + if(0 != (result = pthread_mutex_lock(&locks->ssl_session))){ + S3FS_PRN_CRIT("pthread_mutex_lock returned: %d", result); + abort(); + } + } +} + +void S3fsCurl::UnlockCurlShare(CURL* handle, curl_lock_data nLockData, void* useptr) +{ + if(!hCurlShare){ + return; + } + S3fsCurl::callback_locks_t* locks = static_cast(useptr); + int result; + if(CURL_LOCK_DATA_DNS == nLockData){ + if(0 != (result = pthread_mutex_unlock(&locks->dns))){ + S3FS_PRN_CRIT("pthread_mutex_unlock returned: %d", result); + abort(); + } + }else if(CURL_LOCK_DATA_SSL_SESSION == nLockData){ + if(0 != (result = pthread_mutex_unlock(&locks->ssl_session))){ + S3FS_PRN_CRIT("pthread_mutex_unlock returned: %d", result); + abort(); + } + } +} + +bool S3fsCurl::InitCryptMutex() +{ + return s3fs_init_crypt_mutex(); +} + +bool S3fsCurl::DestroyCryptMutex() +{ + return s3fs_destroy_crypt_mutex(); +} + +// homegrown timeout mechanism +int S3fsCurl::CurlProgress(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow) +{ + CURL* curl = static_cast(clientp); + time_t now = time(nullptr); + progress_t p(dlnow, ulnow); + + AutoLock lock(&S3fsCurl::curl_handles_lock); + + // any progress? + if(p != S3fsCurl::curl_progress[curl]){ + // yes! + S3fsCurl::curl_times[curl] = now; + S3fsCurl::curl_progress[curl] = p; + }else{ + // timeout? + if(now - S3fsCurl::curl_times[curl] > readwrite_timeout){ + S3FS_PRN_ERR("timeout now: %lld, curl_times[curl]: %lld, readwrite_timeout: %lld", + static_cast(now), static_cast((S3fsCurl::curl_times[curl])), static_cast(readwrite_timeout)); + return CURLE_ABORTED_BY_CALLBACK; + } + } + return 0; +} + +bool S3fsCurl::InitCredentialObject(S3fsCred* pcredobj) +{ + // Set the only Credential object + if(!pcredobj || S3fsCurl::ps3fscred){ + S3FS_PRN_ERR("Unable to set the only Credential object."); + return false; + } + S3fsCurl::ps3fscred = pcredobj; + + return true; +} + +bool S3fsCurl::InitMimeType(const std::string& strFile) +{ + std::string MimeFile; + if(!strFile.empty()){ + MimeFile = strFile; + }else{ + // search default mime.types + std::string errPaths = DEFAULT_MIME_FILE; + struct stat st; + if(0 == stat(DEFAULT_MIME_FILE, &st)){ + MimeFile = DEFAULT_MIME_FILE; + }else if(compare_sysname("Darwin")){ + // for macOS, search another default file. + if(0 == stat(SPECIAL_DARWIN_MIME_FILE, &st)){ + MimeFile = SPECIAL_DARWIN_MIME_FILE; + }else{ + errPaths += " and "; + errPaths += SPECIAL_DARWIN_MIME_FILE; + } + } + if(MimeFile.empty()){ + S3FS_PRN_WARN("Could not find mime.types files, you have to create file(%s) or specify mime option for existing mime.types file.", errPaths.c_str()); + return false; + } + } + S3FS_PRN_DBG("Try to load mime types from %s file.", MimeFile.c_str()); + + std::ifstream MT(MimeFile.c_str()); + if(MT.good()){ + S3FS_PRN_DBG("The old mime types are cleared to load new mime types."); + S3fsCurl::mimeTypes.clear(); + std::string line; + + while(getline(MT, line)){ + if(line.empty()){ + continue; + } + if(line[0]=='#'){ + continue; + } + + std::istringstream tmp(line); + std::string mimeType; + tmp >> mimeType; + std::string ext; + while(tmp >> ext){ + S3fsCurl::mimeTypes[ext] = mimeType; + } + } + S3FS_PRN_INIT_INFO("Loaded mime information from %s", MimeFile.c_str()); + }else{ + S3FS_PRN_WARN("Could not load mime types from %s, please check the existence and permissions of this file.", MimeFile.c_str()); + return false; + } + return true; +} + +void S3fsCurl::InitUserAgent() +{ + if(S3fsCurl::userAgent.empty()){ + S3fsCurl::userAgent = "s3fs/"; + S3fsCurl::userAgent += VERSION; + S3fsCurl::userAgent += " (commit hash "; + S3fsCurl::userAgent += COMMIT_HASH_VAL; + S3fsCurl::userAgent += "; "; + S3fsCurl::userAgent += s3fs_crypt_lib_name(); + S3fsCurl::userAgent += ")"; + S3fsCurl::userAgent += instance_name; + } +} + +// +// @param s e.g., "index.html" +// @return e.g., "text/html" +// +std::string S3fsCurl::LookupMimeType(const std::string& name) +{ + if(!name.empty() && name[name.size() - 1] == '/'){ + return "application/x-directory"; + } + + std::string result("application/octet-stream"); + std::string::size_type last_pos = name.find_last_of('.'); + std::string::size_type first_pos = name.find_first_of('.'); + std::string prefix, ext, ext2; + + // No dots in name, just return + if(last_pos == std::string::npos){ + return result; + } + // extract the last extension + ext = name.substr(1+last_pos, std::string::npos); + + if (last_pos != std::string::npos) { + // one dot was found, now look for another + if (first_pos != std::string::npos && first_pos < last_pos) { + prefix = name.substr(0, last_pos); + // Now get the second to last file extension + std::string::size_type next_pos = prefix.find_last_of('.'); + if (next_pos != std::string::npos) { + ext2 = prefix.substr(1+next_pos, std::string::npos); + } + } + } + + // if we get here, then we have an extension (ext) + mimes_t::const_iterator iter = S3fsCurl::mimeTypes.find(ext); + // if the last extension matches a mimeType, then return + // that mime type + if (iter != S3fsCurl::mimeTypes.end()) { + result = (*iter).second; + return result; + } + + // return with the default result if there isn't a second extension + if(first_pos == last_pos){ + return result; + } + + // Didn't find a mime-type for the first extension + // Look for second extension in mimeTypes, return if found + iter = S3fsCurl::mimeTypes.find(ext2); + if (iter != S3fsCurl::mimeTypes.end()) { + result = (*iter).second; + return result; + } + + // neither the last extension nor the second-to-last extension + // matched a mimeType, return the default mime type + return result; +} + +bool S3fsCurl::LocateBundle() +{ + // See if environment variable CURL_CA_BUNDLE is set + // if so, check it, if it is a good path, then set the + // curl_ca_bundle variable to it + if(S3fsCurl::curl_ca_bundle.empty()){ + char* CURL_CA_BUNDLE = getenv("CURL_CA_BUNDLE"); + if(CURL_CA_BUNDLE != nullptr) { + // check for existence and readability of the file + std::ifstream BF(CURL_CA_BUNDLE); + if(!BF.good()){ + S3FS_PRN_ERR("%s: file specified by CURL_CA_BUNDLE environment variable is not readable", program_name.c_str()); + return false; + } + BF.close(); + S3fsCurl::curl_ca_bundle = CURL_CA_BUNDLE; + return true; + } + }else{ + // Already set ca bundle variable + return true; + } + + // not set via environment variable, look in likely locations + + /////////////////////////////////////////// + // following comment from curl's (7.21.2) acinclude.m4 file + /////////////////////////////////////////// + // dnl CURL_CHECK_CA_BUNDLE + // dnl ------------------------------------------------- + // dnl Check if a default ca-bundle should be used + // dnl + // dnl regarding the paths this will scan: + // dnl /etc/ssl/certs/ca-certificates.crt Debian systems + // dnl /etc/pki/tls/certs/ca-bundle.crt Redhat and Mandriva + // dnl /usr/share/ssl/certs/ca-bundle.crt old(er) Redhat + // dnl /usr/local/share/certs/ca-root.crt FreeBSD + // dnl /etc/ssl/cert.pem OpenBSD + // dnl /etc/ssl/certs/ (ca path) SUSE + /////////////////////////////////////////// + // Within CURL the above path should have been checked + // according to the OS. Thus, although we do not need + // to check files here, we will only examine some files. + // + std::ifstream BF("/etc/pki/tls/certs/ca-bundle.crt"); + if(BF.good()){ + BF.close(); + S3fsCurl::curl_ca_bundle = "/etc/pki/tls/certs/ca-bundle.crt"; + }else{ + BF.open("/etc/ssl/certs/ca-certificates.crt"); + if(BF.good()){ + BF.close(); + S3fsCurl::curl_ca_bundle = "/etc/ssl/certs/ca-certificates.crt"; + }else{ + BF.open("/usr/share/ssl/certs/ca-bundle.crt"); + if(BF.good()){ + BF.close(); + S3fsCurl::curl_ca_bundle = "/usr/share/ssl/certs/ca-bundle.crt"; + }else{ + BF.open("/usr/local/share/certs/ca-root.crt"); + if(BF.good()){ + BF.close(); + S3fsCurl::curl_ca_bundle = "/usr/share/ssl/certs/ca-bundle.crt"; + }else{ + S3FS_PRN_ERR("%s: /.../ca-bundle.crt is not readable", program_name.c_str()); + return false; + } + } + } + } + return true; +} + +size_t S3fsCurl::WriteMemoryCallback(void* ptr, size_t blockSize, size_t numBlocks, void* data) +{ + std::string* body = static_cast(data); + body->append(static_cast(ptr), blockSize * numBlocks); + return (blockSize * numBlocks); +} + +size_t S3fsCurl::ReadCallback(void* ptr, size_t size, size_t nmemb, void* userp) +{ + S3fsCurl* pCurl = static_cast(userp); + + if(1 > (size * nmemb)){ + return 0; + } + if(0 >= pCurl->postdata_remaining){ + return 0; + } + size_t copysize = std::min(static_cast(size * nmemb), pCurl->postdata_remaining); + memcpy(ptr, pCurl->postdata, copysize); + + pCurl->postdata_remaining = (pCurl->postdata_remaining > static_cast(copysize) ? (pCurl->postdata_remaining - copysize) : 0); + pCurl->postdata += static_cast(copysize); + + return copysize; +} + +size_t S3fsCurl::UploadReadCallbackByMemory(void *ptr, size_t size, size_t nmemb, void *stream) +{ + drp_upload_ctx *ctx = static_cast(stream); + S3FS_PRN_INFO("Upload [path=%s][size=%zu][pos=%zu]", ctx->path.c_str(), ctx->len, ctx->pos); + + if(1 > (size * nmemb)){ + return 0; + } + if(ctx->pos >= ctx->len){ + return 0; + } + + size_t len = std::min(size * nmemb, ctx->len - ctx->pos); + memcpy(ptr, ctx->data + ctx->pos, len); + + if(len < ctx->len){ + S3FS_PRN_WARN("Upload send data copy [path=%s][size=%zu][pos=%zu][sendlen=%zu]", + ctx->path.c_str(), ctx->len, ctx->pos, len); + } + + ctx->pos += len; + return len; +} + +size_t S3fsCurl::HeaderCallback(void* data, size_t blockSize, size_t numBlocks, void* userPtr) +{ + headers_t* headers = static_cast(userPtr); + std::string header(static_cast(data), blockSize * numBlocks); + std::string key; + std::istringstream ss(header); + + if(getline(ss, key, ':')){ + // Force to lower, only "x-amz" + std::string lkey = key; + transform(lkey.begin(), lkey.end(), lkey.begin(), static_cast(std::tolower)); + if(is_prefix(lkey.c_str(), "x-amz")){ + key = lkey; + } + std::string value; + getline(ss, value); + (*headers)[key] = trim(value); + } + return blockSize * numBlocks; +} + +size_t S3fsCurl::UploadReadCallback(void* ptr, size_t size, size_t nmemb, void* userp) +{ + S3fsCurl* pCurl = static_cast(userp); + + if(1 > (size * nmemb)){ + return 0; + } + if(-1 == pCurl->partdata.fd || 0 >= pCurl->partdata.size){ + return 0; + } + // read size + ssize_t copysize = (size * nmemb) < (size_t)pCurl->partdata.size ? (size * nmemb) : (size_t)pCurl->partdata.size; + ssize_t readbytes; + ssize_t totalread; + // read and set + if(use_newcache){ + std::memcpy(static_cast(ptr), pCurl->partdata.buf, copysize); + readbytes = copysize; + totalread = copysize; + pCurl->partdata.buf += copysize; + if(copysize < pCurl->partdata.size){ + std::string uploadPath = pCurl->path + "@" + std::to_string(pCurl->partdata.get_part_number()); + S3FS_PRN_WARN("Upload send data copy [path=%s][startpos:%zu][partsize=%zu][uploadlen=%zu]", + uploadPath.c_str(), pCurl->partdata.startpos, pCurl->partdata.size, copysize); + } + }else{ + for(totalread = 0, readbytes = 0; totalread < copysize; totalread += readbytes){ + readbytes = pread(pCurl->partdata.fd, &(static_cast(ptr))[totalread], (copysize - totalread), pCurl->partdata.startpos + totalread); + if(0 == readbytes){ + // eof + break; + }else if(-1 == readbytes){ + // error + S3FS_PRN_ERR("read file error(%d).", errno); + return 0; + } + } + } + pCurl->partdata.startpos += totalread; + pCurl->partdata.size -= totalread; + + return totalread; +} + +size_t S3fsCurl::DownloadWriteCallback(void* ptr, size_t size, size_t nmemb, void* userp) +{ + S3fsCurl* pCurl = static_cast(userp); + + if(1 > (size * nmemb)){ + return 0; + } + if(-1 == pCurl->partdata.fd || 0 >= pCurl->partdata.size){ + return 0; + } + + // Buffer initial bytes in case it is an XML error response. + if(pCurl->bodydata.size() < GET_OBJECT_RESPONSE_LIMIT){ + pCurl->bodydata.append(static_cast(ptr), std::min(size * nmemb, GET_OBJECT_RESPONSE_LIMIT - pCurl->bodydata.size())); + } + + // write size + ssize_t copysize = (size * nmemb) < (size_t)pCurl->partdata.size ? (size * nmemb) : (size_t)pCurl->partdata.size; + ssize_t writebytes; + ssize_t totalwrite; + + // write + if(use_newcache && nullptr != pCurl->partdata.buf){ + std::memcpy(pCurl->partdata.buf, static_cast(ptr), copysize); + writebytes = copysize; + totalwrite = copysize; + pCurl->partdata.buf += copysize; + if(copysize < pCurl->partdata.size){ + S3FS_PRN_WARN("Download recv data copy [path=%s][startpos:%zu][partsize=%zu][downlen=%zu]", + pCurl->path.c_str(), pCurl->partdata.startpos, pCurl->partdata.size, copysize); + } + }else{ + for(totalwrite = 0, writebytes = 0; totalwrite < copysize; totalwrite += writebytes){ + writebytes = pwrite(pCurl->partdata.fd, &(static_cast(ptr))[totalwrite], (copysize - totalwrite), pCurl->partdata.startpos + totalwrite); + if(0 == writebytes){ + // eof? + break; + }else if(-1 == writebytes){ + // error + S3FS_PRN_ERR("write file error(%d).", errno); + return 0; + } + } + } + pCurl->partdata.startpos += totalwrite; + pCurl->partdata.size -= totalwrite; + + return totalwrite; +} + +bool S3fsCurl::SetCheckCertificate(bool isCertCheck) +{ + bool old = S3fsCurl::is_cert_check; + S3fsCurl::is_cert_check = isCertCheck; + return old; +} + +bool S3fsCurl::SetDnsCache(bool isCache) +{ + bool old = S3fsCurl::is_dns_cache; + S3fsCurl::is_dns_cache = isCache; + return old; +} + +void S3fsCurl::ResetOffset(S3fsCurl* pCurl) +{ + pCurl->partdata.startpos = pCurl->b_partdata_startpos; + pCurl->partdata.size = pCurl->b_partdata_size; +} + +bool S3fsCurl::SetSslSessionCache(bool isCache) +{ + bool old = S3fsCurl::is_ssl_session_cache; + S3fsCurl::is_ssl_session_cache = isCache; + return old; +} + +long S3fsCurl::SetConnectTimeout(long timeout) +{ + long old = S3fsCurl::connect_timeout; + S3fsCurl::connect_timeout = timeout; + return old; +} + +time_t S3fsCurl::SetReadwriteTimeout(time_t timeout) +{ + time_t old = S3fsCurl::readwrite_timeout; + S3fsCurl::readwrite_timeout = timeout; + return old; +} + +int S3fsCurl::SetRetries(int count) +{ + int old = S3fsCurl::retries; + S3fsCurl::retries = count; + return old; +} + +bool S3fsCurl::SetPublicBucket(bool flag) +{ + bool old = S3fsCurl::is_public_bucket; + S3fsCurl::is_public_bucket = flag; + return old; +} + +acl_t S3fsCurl::SetDefaultAcl(acl_t acl) +{ + acl_t old = S3fsCurl::default_acl; + S3fsCurl::default_acl = acl; + return old; +} + +acl_t S3fsCurl::GetDefaultAcl() +{ + return S3fsCurl::default_acl; +} + +std::string S3fsCurl::SetStorageClass(const std::string& storage_class) +{ + std::string old = S3fsCurl::storage_class; + S3fsCurl::storage_class = storage_class; + // AWS requires uppercase storage class values + transform(S3fsCurl::storage_class.begin(), S3fsCurl::storage_class.end(), S3fsCurl::storage_class.begin(), ::toupper); + return old; +} + +bool S3fsCurl::PushbackSseKeys(const std::string& input) +{ + std::string onekey = trim(input); + if(onekey.empty()){ + return false; + } + if('#' == onekey[0]){ + return false; + } + // make base64 if the key is short enough, otherwise assume it is already so + std::string base64_key; + std::string raw_key; + if(onekey.length() > 256 / 8){ + std::string p_key(s3fs_decode64(onekey.c_str(), onekey.size())); + raw_key = p_key; + base64_key = onekey; + } else { + base64_key = s3fs_base64(reinterpret_cast(onekey.c_str()), onekey.length()); + raw_key = onekey; + } + + // make MD5 + std::string strMd5; + if(!make_md5_from_binary(raw_key.c_str(), raw_key.length(), strMd5)){ + S3FS_PRN_ERR("Could not make MD5 from SSE-C keys(%s).", raw_key.c_str()); + return false; + } + // mapped MD5 = SSE Key + sseckeymap_t md5map; + md5map.clear(); + md5map[strMd5] = base64_key; + S3fsCurl::sseckeys.push_back(md5map); + + return true; +} + +sse_type_t S3fsCurl::SetSseType(sse_type_t type) +{ + sse_type_t old = S3fsCurl::ssetype; + S3fsCurl::ssetype = type; + return old; +} + +bool S3fsCurl::SetSseCKeys(const char* filepath) +{ + if(!filepath){ + S3FS_PRN_ERR("SSE-C keys filepath is empty."); + return false; + } + struct stat st; + if(0 != stat(filepath, &st)){ + S3FS_PRN_ERR("could not open use_sse keys file(%s).", filepath); + return false; + } + if(st.st_mode & (S_IXUSR | S_IRWXG | S_IRWXO)){ + S3FS_PRN_ERR("use_sse keys file %s should be 0600 permissions.", filepath); + return false; + } + + S3fsCurl::sseckeys.clear(); + + std::ifstream ssefs(filepath); + if(!ssefs.good()){ + S3FS_PRN_ERR("Could not open SSE-C keys file(%s).", filepath); + return false; + } + + std::string line; + while(getline(ssefs, line)){ + S3fsCurl::PushbackSseKeys(line); + } + if(S3fsCurl::sseckeys.empty()){ + S3FS_PRN_ERR("There is no SSE Key in file(%s).", filepath); + return false; + } + return true; +} + +bool S3fsCurl::SetSseKmsid(const char* kmsid) +{ + if(!kmsid || '\0' == kmsid[0]){ + S3FS_PRN_ERR("SSE-KMS kms id is empty."); + return false; + } + S3fsCurl::ssekmsid = kmsid; + return true; +} + +// [NOTE] +// Because SSE is set by some options and environment, +// this function check the integrity of the SSE data finally. +bool S3fsCurl::FinalCheckSse() +{ + switch(S3fsCurl::ssetype){ + case sse_type_t::SSE_DISABLE: + S3fsCurl::ssekmsid.erase(); + return true; + case sse_type_t::SSE_S3: + S3fsCurl::ssekmsid.erase(); + return true; + case sse_type_t::SSE_C: + if(S3fsCurl::sseckeys.empty()){ + S3FS_PRN_ERR("sse type is SSE-C, but there is no custom key."); + return false; + } + S3fsCurl::ssekmsid.erase(); + return true; + case sse_type_t::SSE_KMS: + if(S3fsCurl::ssekmsid.empty()){ + S3FS_PRN_ERR("sse type is SSE-KMS, but there is no specified kms id."); + return false; + } + if(S3fsCurl::GetSignatureType() == signature_type_t::V2_ONLY){ + S3FS_PRN_ERR("sse type is SSE-KMS, but signature type is not v4. SSE-KMS require signature v4."); + return false; + } + + // SSL/TLS is required for KMS + // + if(!is_prefix(s3host.c_str(), "https://")){ + S3FS_PRN_ERR("The sse type is SSE-KMS, but it is not configured to use SSL/TLS. SSE-KMS requires SSL/TLS communication."); + return false; + } + return true; + } + S3FS_PRN_ERR("sse type is unknown(%d).", static_cast(S3fsCurl::ssetype)); + + return false; +} + +bool S3fsCurl::LoadEnvSseCKeys() +{ + char* envkeys = getenv("AWSSSECKEYS"); + if(nullptr == envkeys){ + // nothing to do + return true; + } + S3fsCurl::sseckeys.clear(); + + std::istringstream fullkeys(envkeys); + std::string onekey; + while(getline(fullkeys, onekey, ':')){ + S3fsCurl::PushbackSseKeys(onekey); + } + + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(S3fsCurl::sseckeys.empty()){ + S3FS_PRN_ERR("There is no SSE Key in environment(AWSSSECKEYS=%s).", envkeys); + return false; + } + return true; +} + +bool S3fsCurl::LoadEnvSseKmsid() +{ + const char* envkmsid = getenv("AWSSSEKMSID"); + if(nullptr == envkmsid){ + // nothing to do + return true; + } + return S3fsCurl::SetSseKmsid(envkmsid); +} + +// +// If md5 is empty, returns first(current) sse key. +// +bool S3fsCurl::GetSseKey(std::string& md5, std::string& ssekey) +{ + for(sseckeylist_t::const_iterator iter = S3fsCurl::sseckeys.begin(); iter != S3fsCurl::sseckeys.end(); ++iter){ + if(md5.empty() || md5 == (*iter).begin()->first){ + md5 = iter->begin()->first; + ssekey = iter->begin()->second; + return true; + } + } + return false; +} + +bool S3fsCurl::GetSseKeyMd5(size_t pos, std::string& md5) +{ + if(S3fsCurl::sseckeys.size() <= static_cast(pos)){ + return false; + } + size_t cnt = 0; + for(sseckeylist_t::const_iterator iter = S3fsCurl::sseckeys.begin(); iter != S3fsCurl::sseckeys.end(); ++iter, ++cnt){ + if(pos == cnt){ + md5 = iter->begin()->first; + return true; + } + } + return false; +} + +size_t S3fsCurl::GetSseKeyCount() +{ + return S3fsCurl::sseckeys.size(); +} + +bool S3fsCurl::SetContentMd5(bool flag) +{ + bool old = S3fsCurl::is_content_md5; + S3fsCurl::is_content_md5 = flag; + return old; +} + +bool S3fsCurl::SetVerbose(bool flag) +{ + bool old = S3fsCurl::is_verbose; + S3fsCurl::is_verbose = flag; + return old; +} + +bool S3fsCurl::SetDumpBody(bool flag) +{ + bool old = S3fsCurl::is_dump_body; + S3fsCurl::is_dump_body = flag; + return old; +} + +long S3fsCurl::SetSslVerifyHostname(long value) +{ + if(0 != value && 1 != value){ + return -1; + } + long old = S3fsCurl::ssl_verify_hostname; + S3fsCurl::ssl_verify_hostname = value; + return old; +} + +bool S3fsCurl::SetMultipartSize(off_t size) +{ + size = size * 1024 * 1024; + if(size < MIN_MULTIPART_SIZE){ + return false; + } + S3fsCurl::multipart_size = size; + return true; +} + +bool S3fsCurl::SetMultipartCopySize(off_t size) +{ + size = size * 1024 * 1024; + if(size < MIN_MULTIPART_SIZE){ + return false; + } + S3fsCurl::multipart_copy_size = size; + return true; +} + +int S3fsCurl::SetMaxParallelCount(int value) +{ + int old = S3fsCurl::max_parallel_cnt; + S3fsCurl::max_parallel_cnt = value; + return old; +} + +int S3fsCurl::SetMaxMultiRequest(int max) +{ + int old = S3fsCurl::max_multireq; + S3fsCurl::max_multireq = max; + return old; +} + +// [NOTE] +// This proxy setting is as same as the "--proxy" option of the curl command, +// and equivalent to the "CURLOPT_PROXY" option of the curl_easy_setopt() +// function. +// However, currently s3fs does not provide another option to set the schema +// and port, so you need to specify these it in this function. (Other than +// this function, there is no means of specifying the schema and port.) +// Therefore, it should be specified "url" as "[://][:]". +// s3fs passes this string to curl_easy_setopt() function with "CURLOPT_PROXY". +// If no "schema" is specified, "http" will be used as default, and if no port +// is specified, "443" will be used for "HTTPS" and "1080" otherwise. +// (See the description of "CURLOPT_PROXY" in libcurl document.) +// +bool S3fsCurl::SetProxy(const char* url) +{ + if(!url || '\0' == url[0]){ + return false; + } + std::string tmpurl = url; + + // check schema + bool is_http = true; + size_t pos = 0; + if(std::string::npos != (pos = tmpurl.find("://", pos))){ + if(0 == pos){ + // no schema string before "://" + return false; + } + pos += strlen("://"); + + // Check if it is other than "http://" + if(0 != tmpurl.find("http://", 0)){ + is_http = false; + } + }else{ + // not have schema string + pos = 0; + } + // check fqdn and port number string + if(std::string::npos != (pos = tmpurl.find(':', pos))){ + // specify port + if(0 == pos){ + // no fqdn(hostname) string before ":" + return false; + } + pos += strlen(":"); + if(std::string::npos != tmpurl.find(':', pos)){ + // found wrong separator + return false; + } + } + + S3fsCurl::proxy_url = tmpurl; + S3fsCurl::proxy_http = is_http; + return true; +} + +// [NOTE] +// This function loads proxy credentials(username and passphrase) +// from a file. +// The loaded values is set to "CURLOPT_PROXYUSERPWD" in the +// curl_easy_setopt() function. (But only used if the proxy is HTTP +// schema.) +// +// The file is expected to contain only one valid line: +// ------------------------ +// # comment line +// : +// ------------------------ +// Lines starting with a '#' character are treated as comments. +// Lines with only space characters and blank lines are ignored. +// If the user name contains spaces, it must be url encoded(ex. %20). +// +bool S3fsCurl::SetProxyUserPwd(const char* file) +{ + if(!file || '\0' == file[0]){ + return false; + } + if(!S3fsCurl::proxy_userpwd.empty()){ + S3FS_PRN_WARN("Already set username and passphrase for proxy."); + return false; + } + + std::ifstream credFileStream(file); + if(!credFileStream.good()){ + S3FS_PRN_WARN("Could not load username and passphrase for proxy from %s.", file); + return false; + } + + std::string userpwd; + std::string line; + while(getline(credFileStream, line)){ + line = trim(line); + if(line.empty()){ + continue; + } + if(line[0]=='#'){ + continue; + } + if(!userpwd.empty()){ + S3FS_PRN_WARN("Multiple valid username and passphrase found in %s file. Should specify only one pair.", file); + return false; + } + // check separator for username and passphrase + size_t pos = 0; + if(std::string::npos == (pos = line.find(':', pos))){ + S3FS_PRN_WARN("Found string for username and passphrase in %s file does not have separator ':'.", file); + return false; + } + if(0 == pos || (pos + 1) == line.length()){ + S3FS_PRN_WARN("Found string for username or passphrase in %s file is empty.", file); + return false; + } + if(std::string::npos != line.find(':', ++pos)){ + S3FS_PRN_WARN("Found string for username and passphrase in %s file has multiple separator ':'.", file); + return false; + } + userpwd = line; + } + if(userpwd.empty()){ + S3FS_PRN_WARN("No valid username and passphrase found in %s.", file); + return false; + } + + S3fsCurl::proxy_userpwd = userpwd; + return true; +} + +// cppcheck-suppress unmatchedSuppression +// cppcheck-suppress constParameter +// cppcheck-suppress constParameterCallback +bool S3fsCurl::UploadMultipartPostCallback(S3fsCurl* s3fscurl, void* param) +{ + if(!s3fscurl || param){ // this callback does not need a parameter + return false; + } + + return s3fscurl->UploadMultipartPostComplete(); +} + +// cppcheck-suppress unmatchedSuppression +// cppcheck-suppress constParameter +// cppcheck-suppress constParameterCallback +bool S3fsCurl::MixMultipartPostCallback(S3fsCurl* s3fscurl, void* param) +{ + if(!s3fscurl || param){ // this callback does not need a parameter + return false; + } + + return s3fscurl->MixMultipartPostComplete(); +} + +std::unique_ptr S3fsCurl::UploadMultipartPostRetryCallback(S3fsCurl* s3fscurl) +{ + if(!s3fscurl){ + return nullptr; + } + // parse and get part_num, upload_id. + std::string upload_id; + std::string part_num_str; + int part_num; + off_t tmp_part_num = 0; + if(!get_keyword_value(s3fscurl->url, "uploadId", upload_id)){ + return nullptr; + } + upload_id = urlDecode(upload_id); // decode + if(!get_keyword_value(s3fscurl->url, "partNumber", part_num_str)){ + return nullptr; + } + if(!s3fs_strtoofft(&tmp_part_num, part_num_str.c_str(), /*base=*/ 10)){ + return nullptr; + } + part_num = static_cast(tmp_part_num); + + if(s3fscurl->retry_count >= S3fsCurl::retries){ + S3FS_PRN_ERR("Over retry count(%d) limit(%s:%d).", s3fscurl->retry_count, s3fscurl->path.c_str(), part_num); + return nullptr; + } + + // duplicate request + std::unique_ptr newcurl(new S3fsCurl(s3fscurl->IsUseAhbe())); + newcurl->partdata.petag = s3fscurl->partdata.petag; + newcurl->partdata.fd = s3fscurl->partdata.fd; + newcurl->partdata.startpos = s3fscurl->b_partdata_startpos; + newcurl->partdata.size = s3fscurl->b_partdata_size; + newcurl->partdata.buf = s3fscurl->b_partdata_buf; + newcurl->b_partdata_startpos = s3fscurl->b_partdata_startpos; + newcurl->b_partdata_size = s3fscurl->b_partdata_size; + newcurl->b_partdata_buf = s3fscurl->b_partdata_buf; + newcurl->retry_count = s3fscurl->retry_count + 1; + newcurl->op = s3fscurl->op; + newcurl->type = s3fscurl->type; + + // setup new curl object + if(0 != newcurl->UploadMultipartPostSetup(s3fscurl->path.c_str(), part_num, upload_id)){ + S3FS_PRN_ERR("Could not duplicate curl object(%s:%d).", s3fscurl->path.c_str(), part_num); + return nullptr; + } + return newcurl; +} + +std::unique_ptr S3fsCurl::CopyMultipartPostRetryCallback(S3fsCurl* s3fscurl) +{ + if(!s3fscurl){ + return nullptr; + } + // parse and get part_num, upload_id. + std::string upload_id; + std::string part_num_str; + int part_num; + off_t tmp_part_num = 0; + if(!get_keyword_value(s3fscurl->url, "uploadId", upload_id)){ + return nullptr; + } + upload_id = urlDecode(upload_id); // decode + if(!get_keyword_value(s3fscurl->url, "partNumber", part_num_str)){ + return nullptr; + } + if(!s3fs_strtoofft(&tmp_part_num, part_num_str.c_str(), /*base=*/ 10)){ + return nullptr; + } + part_num = static_cast(tmp_part_num); + + if(s3fscurl->retry_count >= S3fsCurl::retries){ + S3FS_PRN_ERR("Over retry count(%d) limit(%s:%d).", s3fscurl->retry_count, s3fscurl->path.c_str(), part_num); + return nullptr; + } + + // duplicate request + std::unique_ptr newcurl(new S3fsCurl(s3fscurl->IsUseAhbe())); + newcurl->partdata.petag = s3fscurl->partdata.petag; + newcurl->b_from = s3fscurl->b_from; + newcurl->b_meta = s3fscurl->b_meta; + newcurl->retry_count = s3fscurl->retry_count + 1; + newcurl->op = s3fscurl->op; + newcurl->type = s3fscurl->type; + + // setup new curl object + if(0 != newcurl->CopyMultipartPostSetup(s3fscurl->b_from.c_str(), s3fscurl->path.c_str(), part_num, upload_id, s3fscurl->b_meta)){ + S3FS_PRN_ERR("Could not duplicate curl object(%s:%d).", s3fscurl->path.c_str(), part_num); + return nullptr; + } + return newcurl; +} + +std::unique_ptr S3fsCurl::MixMultipartPostRetryCallback(S3fsCurl* s3fscurl) +{ + if(!s3fscurl){ + return nullptr; + } + + if(-1 == s3fscurl->partdata.fd){ + return S3fsCurl::CopyMultipartPostRetryCallback(s3fscurl); + }else{ + return S3fsCurl::UploadMultipartPostRetryCallback(s3fscurl); + } +} + +int S3fsCurl::MapPutErrorResponse(int result) +{ + if(result != 0){ + return result; + } + // PUT returns 200 status code with something error, thus + // we need to check body. + // + // example error body: + // + // + // AccessDenied + // Access Denied + // E4CA6F6767D6685C + // BHzLOATeDuvN8Es1wI8IcERq4kl4dc2A9tOB8Yqr39Ys6fl7N4EJ8sjGiVvu6wLP + // + // + const char* pstrbody = bodydata.c_str(); + std::string code; + if(simple_parse_xml(pstrbody, bodydata.size(), "Code", code)){ + S3FS_PRN_ERR("Put request get 200 status response, but it included error body(or nullptr). The request failed during copying the object in S3. Code: %s", code.c_str()); + // TODO: parse more specific error from + result = -EIO; + } + return result; +} + +// [NOTE] +// It is a factory method as utility because it requires an S3fsCurl object +// initialized for multipart upload from outside this class. +// +std::unique_ptr S3fsCurl::CreateParallelS3fsCurl(const char* tpath, int fd, off_t start, off_t size, int part_num, bool is_copy, etagpair* petag, const std::string& upload_id, int& result) +{ + // duplicate fd + if(!tpath || -1 == fd || start < 0 || size <= 0 || !petag){ + S3FS_PRN_ERR("Parameters are wrong: tpath(%s), fd(%d), start(%lld), size(%lld), petag(%s)", SAFESTRPTR(tpath), fd, static_cast(start), static_cast(size), (petag ? "not null" : "null")); + result = -EIO; + return nullptr; + } + result = 0; + + std::unique_ptr s3fscurl(new S3fsCurl(true)); + + if(!is_copy){ + s3fscurl->partdata.fd = fd; + s3fscurl->partdata.startpos = start; + s3fscurl->partdata.size = size; + s3fscurl->partdata.is_copy = is_copy; + s3fscurl->partdata.petag = petag; // [NOTE] be careful, the value is set directly + s3fscurl->b_partdata_startpos = s3fscurl->partdata.startpos; + s3fscurl->b_partdata_size = s3fscurl->partdata.size; + + S3FS_PRN_INFO3("Upload Part [tpath=%s][start=%lld][size=%lld][part=%d]", SAFESTRPTR(tpath), static_cast(start), static_cast(size), part_num); + + if(0 != (result = s3fscurl->UploadMultipartPostSetup(tpath, part_num, upload_id))){ + S3FS_PRN_ERR("failed uploading part setup(%d)", result); + return nullptr; + } + }else{ + headers_t meta; + std::string srcresource; + std::string srcurl; + MakeUrlResource(get_realpath(tpath).c_str(), srcresource, srcurl); + meta["x-amz-copy-source"] = srcresource; + + std::ostringstream strrange; + strrange << "bytes=" << start << "-" << (start + size - 1); + meta["x-amz-copy-source-range"] = strrange.str(); + + s3fscurl->b_from = SAFESTRPTR(tpath); + s3fscurl->b_meta = meta; + s3fscurl->partdata.petag = petag; // [NOTE] be careful, the value is set directly + + S3FS_PRN_INFO3("Copy Part [tpath=%s][start=%lld][size=%lld][part=%d]", SAFESTRPTR(tpath), static_cast(start), static_cast(size), part_num); + + if(0 != (result = s3fscurl->CopyMultipartPostSetup(tpath, tpath, part_num, upload_id, meta))){ + S3FS_PRN_ERR("failed uploading part setup(%d)", result); + return nullptr; + } + } + + // Call lazy function + if(!s3fscurl->fpLazySetup || !s3fscurl->fpLazySetup(s3fscurl.get())){ + S3FS_PRN_ERR("failed lazy function setup for uploading part"); + result = -EIO; + return nullptr; + } + return s3fscurl; +} + +int S3fsCurl::ParallelMultipartUploadRequest(const char* tpath, headers_t& meta, int fd, off_t fsize, char* buf) +{ + int result; + std::string upload_id; + struct stat st; + etaglist_t list; + off_t remaining_bytes; + S3fsCurl s3fscurl(true); + + S3FS_PRN_INFO3("[tpath=%s][fd=%d]", SAFESTRPTR(tpath), fd); + + if(!use_newcache && -1 == fstat(fd, &st)){ + S3FS_PRN_ERR("Invalid file descriptor(errno=%d)", errno); + return -errno; + } + + if(0 != (result = s3fscurl.PreMultipartPostRequest(tpath, meta, upload_id, false))){ + return result; + } + s3fscurl.DestroyCurlHandle(); + + // Initialize S3fsMultiCurl + S3fsMultiCurl curlmulti(GetMaxParallelCount()); + curlmulti.SetSuccessCallback(S3fsCurl::UploadMultipartPostCallback); + curlmulti.SetRetryCallback(S3fsCurl::UploadMultipartPostRetryCallback); + off_t real_size = 0; + if (use_newcache) { + real_size = fsize; + } else { + real_size = st.st_size; + } + + // cycle through open fd, pulling off 10MB chunks at a time + for(remaining_bytes = real_size; 0 < remaining_bytes; ){ + off_t chunk = remaining_bytes > S3fsCurl::multipart_size ? S3fsCurl::multipart_size : remaining_bytes; + + // s3fscurl sub object + std::unique_ptr s3fscurl_para(new S3fsCurl(true)); + s3fscurl_para->partdata.fd = fd; + s3fscurl_para->partdata.startpos = real_size - remaining_bytes; + s3fscurl_para->partdata.size = chunk; + s3fscurl_para->partdata.buf = buf + real_size - remaining_bytes; + s3fscurl_para->b_partdata_startpos = s3fscurl_para->partdata.startpos; + s3fscurl_para->b_partdata_size = s3fscurl_para->partdata.size; + s3fscurl_para->b_partdata_buf = s3fscurl_para->partdata.buf; + + s3fscurl_para->partdata.add_etag_list(list); + + // initiate upload part for parallel + if(0 != (result = s3fscurl_para->UploadMultipartPostSetup(tpath, s3fscurl_para->partdata.get_part_number(), upload_id))){ + S3FS_PRN_ERR("failed uploading part setup(%d)", result); + return result; + } + + // set into parallel object + if(!curlmulti.SetS3fsCurlObject(std::move(s3fscurl_para))){ + S3FS_PRN_ERR("Could not make curl object into multi curl(%s).", tpath); + return -EIO; + } + remaining_bytes -= chunk; + } + + // Multi request + if(0 != (result = curlmulti.Request())){ + S3FS_PRN_ERR("error occurred in multi request(errno=%d).", result); + + S3fsCurl s3fscurl_abort(true); + int result2 = s3fscurl_abort.AbortMultipartUpload(tpath, upload_id); + s3fscurl_abort.DestroyCurlHandle(); + if(result2 != 0){ + S3FS_PRN_ERR("error aborting multipart upload(errno=%d).", result2); + } + + return result; + } + + if(0 != (result = s3fscurl.CompleteMultipartPostRequest(tpath, upload_id, list))){ + return result; + } + return 0; +} + +int S3fsCurl::ParallelMixMultipartUploadRequest(const char* tpath, headers_t& meta, int fd, const fdpage_list_t& mixuppages) +{ + int result; + std::string upload_id; + struct stat st; + etaglist_t list; + S3fsCurl s3fscurl(true); + + S3FS_PRN_INFO3("[tpath=%s][fd=%d]", SAFESTRPTR(tpath), fd); + + if(-1 == fstat(fd, &st)){ + S3FS_PRN_ERR("Invalid file descriptor(errno=%d)", errno); + return -errno; + } + + if(0 != (result = s3fscurl.PreMultipartPostRequest(tpath, meta, upload_id, true))){ + return result; + } + s3fscurl.DestroyCurlHandle(); + + // for copy multipart + std::string srcresource; + std::string srcurl; + MakeUrlResource(get_realpath(tpath).c_str(), srcresource, srcurl); + meta["Content-Type"] = S3fsCurl::LookupMimeType(tpath); + meta["x-amz-copy-source"] = srcresource; + + // Initialize S3fsMultiCurl + S3fsMultiCurl curlmulti(GetMaxParallelCount()); + curlmulti.SetSuccessCallback(S3fsCurl::MixMultipartPostCallback); + curlmulti.SetRetryCallback(S3fsCurl::MixMultipartPostRetryCallback); + + for(fdpage_list_t::const_iterator iter = mixuppages.begin(); iter != mixuppages.end(); ++iter){ + if(iter->modified){ + // Multipart upload + std::unique_ptr s3fscurl_para(new S3fsCurl(true)); + s3fscurl_para->partdata.fd = fd; + s3fscurl_para->partdata.startpos = iter->offset; + s3fscurl_para->partdata.size = iter->bytes; + s3fscurl_para->b_partdata_startpos = s3fscurl_para->partdata.startpos; + s3fscurl_para->b_partdata_size = s3fscurl_para->partdata.size; + s3fscurl_para->partdata.add_etag_list(list); + + S3FS_PRN_INFO3("Upload Part [tpath=%s][start=%lld][size=%lld][part=%d]", SAFESTRPTR(tpath), static_cast(iter->offset), static_cast(iter->bytes), s3fscurl_para->partdata.get_part_number()); + + // initiate upload part for parallel + if(0 != (result = s3fscurl_para->UploadMultipartPostSetup(tpath, s3fscurl_para->partdata.get_part_number(), upload_id))){ + S3FS_PRN_ERR("failed uploading part setup(%d)", result); + return result; + } + + // set into parallel object + if(!curlmulti.SetS3fsCurlObject(std::move(s3fscurl_para))){ + S3FS_PRN_ERR("Could not make curl object into multi curl(%s).", tpath); + return -EIO; + } + }else{ + // Multipart copy + for(off_t i = 0, bytes = 0; i < iter->bytes; i += bytes){ + std::unique_ptr s3fscurl_para(new S3fsCurl(true)); + + bytes = std::min(static_cast(GetMultipartCopySize()), iter->bytes - i); + /* every part should be larger than MIN_MULTIPART_SIZE and smaller than FIVE_GB */ + off_t remain_bytes = iter->bytes - i - bytes; + + if ((MIN_MULTIPART_SIZE > remain_bytes) && (0 < remain_bytes)){ + if(FIVE_GB < (bytes + remain_bytes)){ + bytes = (bytes + remain_bytes)/2; + } else{ + bytes += remain_bytes; + } + } + + std::ostringstream strrange; + strrange << "bytes=" << (iter->offset + i) << "-" << (iter->offset + i + bytes - 1); + meta["x-amz-copy-source-range"] = strrange.str(); + + s3fscurl_para->b_from = SAFESTRPTR(tpath); + s3fscurl_para->b_meta = meta; + s3fscurl_para->partdata.add_etag_list(list); + + S3FS_PRN_INFO3("Copy Part [tpath=%s][start=%lld][size=%lld][part=%d]", SAFESTRPTR(tpath), static_cast(iter->offset + i), static_cast(bytes), s3fscurl_para->partdata.get_part_number()); + + // initiate upload part for parallel + if(0 != (result = s3fscurl_para->CopyMultipartPostSetup(tpath, tpath, s3fscurl_para->partdata.get_part_number(), upload_id, meta))){ + S3FS_PRN_ERR("failed uploading part setup(%d)", result); + return result; + } + + // set into parallel object + if(!curlmulti.SetS3fsCurlObject(std::move(s3fscurl_para))){ + S3FS_PRN_ERR("Could not make curl object into multi curl(%s).", tpath); + return -EIO; + } + } + } + } + + // Multi request + if(0 != (result = curlmulti.Request())){ + S3FS_PRN_ERR("error occurred in multi request(errno=%d).", result); + + S3fsCurl s3fscurl_abort(true); + int result2 = s3fscurl_abort.AbortMultipartUpload(tpath, upload_id); + s3fscurl_abort.DestroyCurlHandle(); + if(result2 != 0){ + S3FS_PRN_ERR("error aborting multipart upload(errno=%d).", result2); + } + return result; + } + + if(0 != (result = s3fscurl.CompleteMultipartPostRequest(tpath, upload_id, list))){ + return result; + } + return 0; +} + +std::unique_ptr S3fsCurl::ParallelGetObjectRetryCallback(S3fsCurl* s3fscurl) +{ + int result; + + if(!s3fscurl){ + return nullptr; + } + if(s3fscurl->retry_count >= S3fsCurl::retries){ + S3FS_PRN_ERR("Over retry count(%d) limit(%s).", s3fscurl->retry_count, s3fscurl->path.c_str()); + return nullptr; + } + + // duplicate request(setup new curl object) + std::unique_ptr newcurl(new S3fsCurl(s3fscurl->IsUseAhbe())); + + if(0 != (result = newcurl->PreGetObjectRequest(s3fscurl->path.c_str(), s3fscurl->partdata.fd, s3fscurl->partdata.startpos, s3fscurl->partdata.size, s3fscurl->b_ssetype, s3fscurl->b_ssevalue, s3fscurl->partdata.buf))){ + S3FS_PRN_ERR("failed downloading part setup(%d)", result); + return nullptr; + } + newcurl->retry_count = s3fscurl->retry_count + 1; + + return newcurl; +} + +int S3fsCurl::ParallelGetObjectRequest(const char* tpath, int fd, off_t start, off_t size, char* buf) +{ + S3FS_PRN_INFO3("[tpath=%s][fd=%d]", SAFESTRPTR(tpath), fd); + + sse_type_t ssetype = sse_type_t::SSE_DISABLE; + std::string ssevalue; + if(!get_object_sse_type(tpath, ssetype, ssevalue)){ + S3FS_PRN_WARN("Failed to get SSE type for file(%s).", SAFESTRPTR(tpath)); + } + int result = 0; + off_t remaining_bytes; + + // cycle through open fd, pulling off 10MB chunks at a time + for(remaining_bytes = size; 0 < remaining_bytes; ){ + S3fsMultiCurl curlmulti(GetMaxParallelCount()); + int para_cnt; + off_t chunk; + + // Initialize S3fsMultiCurl + //curlmulti.SetSuccessCallback(nullptr); // not need to set success callback + curlmulti.SetRetryCallback(S3fsCurl::ParallelGetObjectRetryCallback); + + // Loop for setup parallel upload(multipart) request. + for(para_cnt = 0; para_cnt < S3fsCurl::max_parallel_cnt && 0 < remaining_bytes; para_cnt++, remaining_bytes -= chunk){ + // chunk size + chunk = remaining_bytes > S3fsCurl::multipart_size ? S3fsCurl::multipart_size : remaining_bytes; + + // s3fscurl sub object + std::unique_ptr s3fscurl_para(new S3fsCurl(true)); + char* realBuf = nullptr; + if (buf) realBuf = buf + size - remaining_bytes; + if(0 != (result = s3fscurl_para->PreGetObjectRequest(tpath, fd, (start + size - remaining_bytes), chunk, ssetype, ssevalue, realBuf))){ + S3FS_PRN_ERR("failed downloading part setup(%d)", result); + return result; + } + + // set into parallel object + if(!curlmulti.SetS3fsCurlObject(std::move(s3fscurl_para))){ + S3FS_PRN_ERR("Could not make curl object into multi curl(%s).", tpath); + return -EIO; + } + } + + // Multi request + if(0 != (result = curlmulti.Request())){ + S3FS_PRN_ERR("error occurred in multi request(errno=%d).", result); + break; + } + + // reinit for loop. + curlmulti.Clear(); + } + return result; +} + +bool S3fsCurl::UploadMultipartPostSetCurlOpts(S3fsCurl* s3fscurl) +{ + if(!s3fscurl){ + return false; + } + if(!s3fscurl->CreateCurlHandle()){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_URL, s3fscurl->url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_UPLOAD, true)){ // HTTP PUT + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&s3fscurl->bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_HEADERDATA, reinterpret_cast(&s3fscurl->responseHeaders))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_HEADERFUNCTION, HeaderCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_INFILESIZE_LARGE, static_cast(s3fscurl->partdata.size))){ // Content-Length + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_READFUNCTION, UploadReadCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_READDATA, reinterpret_cast(s3fscurl))){ + return false; + } + if(!S3fsCurl::AddUserAgent(s3fscurl->hCurl)){ // put User-Agent + return false; + } + + return true; +} + +bool S3fsCurl::CopyMultipartPostSetCurlOpts(S3fsCurl* s3fscurl) +{ + if(!s3fscurl){ + return false; + } + if(!s3fscurl->CreateCurlHandle()){ + return false; + } + + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_URL, s3fscurl->url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_UPLOAD, true)){ // HTTP PUT + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&s3fscurl->bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_HEADERDATA, reinterpret_cast(&s3fscurl->headdata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_HEADERFUNCTION, WriteMemoryCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_INFILESIZE, 0)){ // Content-Length + return false; + } + if(!S3fsCurl::AddUserAgent(s3fscurl->hCurl)){ // put User-Agent + return false; + } + + return true; +} + +bool S3fsCurl::PreGetObjectRequestSetCurlOpts(S3fsCurl* s3fscurl) +{ + if(!s3fscurl){ + return false; + } + if(!s3fscurl->CreateCurlHandle()){ + return false; + } + + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_URL, s3fscurl->url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_WRITEFUNCTION, DownloadWriteCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_WRITEDATA, reinterpret_cast(s3fscurl))){ + return false; + } + if(!S3fsCurl::AddUserAgent(s3fscurl->hCurl)){ // put User-Agent + return false; + } + + return true; +} + +bool S3fsCurl::PreHeadRequestSetCurlOpts(S3fsCurl* s3fscurl) +{ + if(!s3fscurl){ + return false; + } + if(!s3fscurl->CreateCurlHandle()){ + return false; + } + + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_URL, s3fscurl->url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_NOBODY, true)){ // HEAD + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_FILETIME, true)){ // Last-Modified + return false; + } + + // responseHeaders + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_HEADERDATA, reinterpret_cast(&s3fscurl->responseHeaders))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(s3fscurl->hCurl, CURLOPT_HEADERFUNCTION, HeaderCallback)){ + return false; + } + if(!S3fsCurl::AddUserAgent(s3fscurl->hCurl)){ // put User-Agent + return false; + } + + return true; +} + +bool S3fsCurl::AddUserAgent(CURL* hCurl) +{ + if(!hCurl){ + return false; + } + if(S3fsCurl::IsUserAgentFlag()){ + curl_easy_setopt(hCurl, CURLOPT_USERAGENT, S3fsCurl::userAgent.c_str()); + } + return true; +} + +int S3fsCurl::CurlDebugFunc(const CURL* hcurl, curl_infotype type, char* data, size_t size, void* userptr) +{ + return S3fsCurl::RawCurlDebugFunc(hcurl, type, data, size, userptr, CURLINFO_END); +} + +int S3fsCurl::CurlDebugBodyInFunc(const CURL* hcurl, curl_infotype type, char* data, size_t size, void* userptr) +{ + return S3fsCurl::RawCurlDebugFunc(hcurl, type, data, size, userptr, CURLINFO_DATA_IN); +} + +int S3fsCurl::CurlDebugBodyOutFunc(const CURL* hcurl, curl_infotype type, char* data, size_t size, void* userptr) +{ + return S3fsCurl::RawCurlDebugFunc(hcurl, type, data, size, userptr, CURLINFO_DATA_OUT); +} + +int S3fsCurl::RawCurlDebugFunc(const CURL* hcurl, curl_infotype type, char* data, size_t size, void* userptr, curl_infotype datatype) +{ + if(!hcurl){ + // something wrong... + return 0; + } + + switch(type){ + case CURLINFO_TEXT: + // Swap tab indentation with spaces so it stays pretty in syslog + int indent; + indent = 0; + while (*data == '\t' && size > 0) { + indent += 4; + size--; + data++; + } + if(foreground && 0 < size && '\n' == data[size - 1]){ + size--; + } + S3FS_PRN_CURL("* %*s%.*s", indent, "", (int)size, data); + break; + + case CURLINFO_DATA_IN: + case CURLINFO_DATA_OUT: + if(type != datatype || !S3fsCurl::is_dump_body){ + // not put + break; + } + case CURLINFO_HEADER_IN: + case CURLINFO_HEADER_OUT: + size_t remaining; + char* p; + + // Print each line individually for tidy output + remaining = size; + p = data; + do { + char* eol = reinterpret_cast(memchr(p, '\n', remaining)); + int newline = 0; + if (eol == nullptr) { + eol = reinterpret_cast(memchr(p, '\r', remaining)); + } else { + if (eol > p && *(eol - 1) == '\r') { + newline++; + } + newline++; + eol++; + } + size_t length = eol - p; + S3FS_PRN_CURL("%s %.*s", getCurlDebugHead(type), (int)length - newline, p); + remaining -= length; + p = eol; + } while (p != nullptr && remaining > 0); + break; + + case CURLINFO_SSL_DATA_IN: + case CURLINFO_SSL_DATA_OUT: + // not put + break; + default: + // why + break; + } + return 0; +} + +//------------------------------------------------------------------- +// Methods for S3fsCurl +//------------------------------------------------------------------- +S3fsCurl::S3fsCurl(bool ahbe) : + hCurl(nullptr), type(REQTYPE::UNSET), requestHeaders(nullptr), + LastResponseCode(S3FSCURL_RESPONSECODE_NOTSET), postdata(nullptr), postdata_remaining(0), is_use_ahbe(ahbe), + retry_count(0), b_infile(nullptr), b_postdata(nullptr), b_postdata_remaining(0), b_partdata_startpos(0), b_partdata_size(0), + b_ssekey_pos(-1), b_ssetype(sse_type_t::SSE_DISABLE), + sem(nullptr), completed_tids_lock(nullptr), completed_tids(nullptr), fpLazySetup(nullptr), curlCode(CURLE_OK) +{ + if(!S3fsCurl::ps3fscred){ + S3FS_PRN_CRIT("The object of S3fs Credential class is not initialized."); + abort(); + } +} + +S3fsCurl::~S3fsCurl() +{ + DestroyCurlHandle(); +} + +bool S3fsCurl::ResetHandle(AutoLock::Type locktype) +{ + bool run_once; + { + AutoLock lock(&S3fsCurl::curl_warnings_lock); + run_once = curl_warnings_once; + curl_warnings_once = true; + } + + sCurlPool->ResetHandler(hCurl); + + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_NOSIGNAL, 1)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_FOLLOWLOCATION, true)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_CONNECTTIMEOUT, S3fsCurl::connect_timeout)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_NOPROGRESS, 0)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, S3FS_CURLOPT_XFERINFOFUNCTION, S3fsCurl::CurlProgress)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_PROGRESSDATA, hCurl)){ + return false; + } + // curl_easy_setopt(hCurl, CURLOPT_FORBID_REUSE, 1); + if(CURLE_OK != curl_easy_setopt(hCurl, S3FS_CURLOPT_TCP_KEEPALIVE, 1) && !run_once){ + S3FS_PRN_WARN("The CURLOPT_TCP_KEEPALIVE option could not be set. For maximize performance you need to enable this option and you should use libcurl 7.25.0 or later."); + } + if(CURLE_OK != curl_easy_setopt(hCurl, S3FS_CURLOPT_SSL_ENABLE_ALPN, 0) && !run_once){ + S3FS_PRN_WARN("The CURLOPT_SSL_ENABLE_ALPN option could not be unset. S3 server does not support ALPN, then this option should be disabled to maximize performance. you need to use libcurl 7.36.0 or later."); + } + if(CURLE_OK != curl_easy_setopt(hCurl, S3FS_CURLOPT_KEEP_SENDING_ON_ERROR, 1) && !run_once){ + S3FS_PRN_WARN("The S3FS_CURLOPT_KEEP_SENDING_ON_ERROR option could not be set. For maximize performance you need to enable this option and you should use libcurl 7.51.0 or later."); + } + + if(type != REQTYPE::IAMCRED && type != REQTYPE::IAMROLE){ + // REQTYPE::IAMCRED and REQTYPE::IAMROLE are always HTTP + if(0 == S3fsCurl::ssl_verify_hostname){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_SSL_VERIFYHOST, 0)){ + return false; + } + } + if(!S3fsCurl::curl_ca_bundle.empty()){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_CAINFO, S3fsCurl::curl_ca_bundle.c_str())){ + return false; + } + } + } + if((S3fsCurl::is_dns_cache || S3fsCurl::is_ssl_session_cache) && S3fsCurl::hCurlShare){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_SHARE, S3fsCurl::hCurlShare)){ + return false; + } + } + if(!S3fsCurl::is_cert_check) { + S3FS_PRN_DBG("'no_check_certificate' option in effect."); + S3FS_PRN_DBG("The server certificate won't be checked against the available certificate authorities."); + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_SSL_VERIFYPEER, false)){ + return false; + } + } + if(S3fsCurl::is_verbose){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_VERBOSE, true)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_DEBUGFUNCTION, S3fsCurl::CurlDebugFunc)){ + return false; + } + } + if(!cipher_suites.empty()) { + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_SSL_CIPHER_LIST, cipher_suites.c_str())){ + return false; + } + } + if(!S3fsCurl::proxy_url.empty()){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_PROXY, S3fsCurl::proxy_url.c_str())){ + return false; + } + if(S3fsCurl::proxy_http){ + if(!S3fsCurl::proxy_userpwd.empty()){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_PROXYUSERPWD, S3fsCurl::proxy_userpwd.c_str())){ + return false; + } + } + }else if(!S3fsCurl::proxy_userpwd.empty()){ + S3FS_PRN_DBG("Username and passphrase are specified even though proxy is not 'http' scheme, so skip to set those."); + } + } + + AutoLock lock(&S3fsCurl::curl_handles_lock, locktype); + S3fsCurl::curl_times[hCurl] = time(nullptr); + S3fsCurl::curl_progress[hCurl] = progress_t(-1, -1); + + return true; +} + +bool S3fsCurl::CreateCurlHandle(bool only_pool, bool remake) +{ + AutoLock lock(&S3fsCurl::curl_handles_lock); + + if(hCurl && remake){ + if(!DestroyCurlHandle(false, true, AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("could not destroy handle."); + return false; + } + S3FS_PRN_INFO3("already has handle, so destroyed it or restored it to pool."); + } + + if(!hCurl){ + if(nullptr == (hCurl = sCurlPool->GetHandler(only_pool))){ + if(!only_pool){ + S3FS_PRN_ERR("Failed to create handle."); + return false; + }else{ + // [NOTE] + // Further initialization processing is left to lazy processing to be executed later. + // (Currently we do not use only_pool=true, but this code is remained for the future) + return true; + } + } + } + ResetHandle(AutoLock::ALREADY_LOCKED); + + return true; +} + +bool S3fsCurl::DestroyCurlHandle(bool restore_pool, bool clear_internal_data, AutoLock::Type locktype) +{ + // [NOTE] + // If type is REQTYPE::IAMCRED or REQTYPE::IAMROLE, do not clear type. + // Because that type only uses HTTP protocol, then the special + // logic in ResetHandle function. + // + if(type != REQTYPE::IAMCRED && type != REQTYPE::IAMROLE){ + type = REQTYPE::UNSET; + } + + AutoLock lock(&S3fsCurl::curl_handles_lock, locktype); + + if(clear_internal_data){ + ClearInternalData(); + } + + if(hCurl){ + S3fsCurl::curl_times.erase(hCurl); + S3fsCurl::curl_progress.erase(hCurl); + sCurlPool->ReturnHandler(hCurl, restore_pool); + hCurl = nullptr; + }else{ + return false; + } + return true; +} + +bool S3fsCurl::ClearInternalData() +{ + // Always clear internal data + // + type = REQTYPE::UNSET; + path = ""; + base_path = ""; + saved_path = ""; + url = ""; + op = ""; + query_string= ""; + if(requestHeaders){ + curl_slist_free_all(requestHeaders); + requestHeaders = nullptr; + } + responseHeaders.clear(); + bodydata.clear(); + headdata.clear(); + LastResponseCode = S3FSCURL_RESPONSECODE_NOTSET; + postdata = nullptr; + postdata_remaining = 0; + retry_count = 0; + b_infile = nullptr; + b_postdata = nullptr; + b_postdata_remaining = 0; + b_partdata_startpos = 0; + b_partdata_size = 0; + partdata.clear(); + + fpLazySetup = nullptr; + + S3FS_MALLOCTRIM(0); + + return true; +} + +bool S3fsCurl::SetUseAhbe(bool ahbe) +{ + bool old = is_use_ahbe; + is_use_ahbe = ahbe; + return old; +} + +bool S3fsCurl::GetResponseCode(long& responseCode, bool from_curl_handle) const +{ + responseCode = -1; + + if(!from_curl_handle){ + responseCode = LastResponseCode; + }else{ + if(!hCurl){ + return false; + } + if(CURLE_OK != curl_easy_getinfo(hCurl, CURLINFO_RESPONSE_CODE, &LastResponseCode)){ + return false; + } + responseCode = LastResponseCode; + } + return true; +} + +// +// Reset all options for retrying +// +bool S3fsCurl::RemakeHandle() +{ + S3FS_PRN_INFO3("Retry request. [type=%d][url=%s][path=%s]", static_cast(type), url.c_str(), path.c_str()); + + if(REQTYPE::UNSET == type){ + return false; + } + + // rewind file + struct stat st; + if(b_infile){ + if(-1 == fseek(b_infile, 0, SEEK_SET)){ + S3FS_PRN_WARN("Could not reset position(fd=%d)", fileno(b_infile)); + return false; + } + if(-1 == fstat(fileno(b_infile), &st)){ + S3FS_PRN_WARN("Could not get file stat(fd=%d)", fileno(b_infile)); + return false; + } + } + + // reinitialize internal data + requestHeaders = curl_slist_remove(requestHeaders, "Authorization"); + responseHeaders.clear(); + bodydata.clear(); + headdata.clear(); + LastResponseCode = S3FSCURL_RESPONSECODE_NOTSET; + + // count up(only use for multipart) + retry_count++; + + // set from backup + postdata = b_postdata; + postdata_remaining = b_postdata_remaining; + partdata.startpos = b_partdata_startpos; + partdata.size = b_partdata_size; + + // reset handle + ResetHandle(); + + // set options + switch(type){ + case REQTYPE::DELETE: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_CUSTOMREQUEST, "DELETE")){ + return false; + } + break; + + case REQTYPE::HEAD: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_NOBODY, true)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_FILETIME, true)){ + return false; + } + // responseHeaders + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_HEADERDATA, reinterpret_cast(&responseHeaders))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_HEADERFUNCTION, HeaderCallback)){ + return false; + } + break; + + case REQTYPE::PUTHEAD: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_UPLOAD, true)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE, 0)){ + return false; + } + break; + + case REQTYPE::PUT: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_UPLOAD, true)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(b_infile){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE_LARGE, static_cast(st.st_size))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILE, b_infile)){ + return false; + } + }else{ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE, 0)){ + return false; + } + } + break; + + case REQTYPE::GET: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, S3fsCurl::DownloadWriteCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(this))){ + return false; + } + break; + + case REQTYPE::CHKBUCKET: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + break; + + case REQTYPE::LISTBUCKET: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + break; + + case REQTYPE::PREMULTIPOST: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POST, true)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POSTFIELDSIZE, 0)){ + return false; + } + break; + + case REQTYPE::COMPLETEMULTIPOST: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POST, true)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POSTFIELDSIZE, static_cast(postdata_remaining))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READDATA, reinterpret_cast(this))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READFUNCTION, S3fsCurl::ReadCallback)){ + return false; + } + break; + + case REQTYPE::UPLOADMULTIPOST: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_UPLOAD, true)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_HEADERDATA, reinterpret_cast(&responseHeaders))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_HEADERFUNCTION, HeaderCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE_LARGE, static_cast(partdata.size))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READFUNCTION, S3fsCurl::UploadReadCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READDATA, reinterpret_cast(this))){ + return false; + } + break; + + case REQTYPE::COPYMULTIPOST: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_UPLOAD, true)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_HEADERDATA, reinterpret_cast(&headdata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_HEADERFUNCTION, WriteMemoryCallback)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE, 0)){ + return false; + } + break; + + case REQTYPE::MULTILIST: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + break; + + case REQTYPE::IAMCRED: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(S3fsCurl::ps3fscred->IsIBMIAMAuth()){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POST, true)){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POSTFIELDSIZE, static_cast(postdata_remaining))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READDATA, reinterpret_cast(this))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READFUNCTION, S3fsCurl::ReadCallback)){ + return false; + } + } + break; + + case REQTYPE::ABORTMULTIUPLOAD: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_CUSTOMREQUEST, "DELETE")){ + return false; + } + break; + + case REQTYPE::IAMROLE: + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + break; + + default: + S3FS_PRN_ERR("request type is unknown(%d)", static_cast(type)); + return false; + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return false; + } + + return true; +} + +// +// returns curl return code +// +int S3fsCurl::RequestPerform(bool dontAddAuthHeaders /*=false*/) +{ + if(S3fsLog::IsS3fsLogDbg()){ + char* ptr_url = nullptr; + curl_easy_getinfo(hCurl, CURLINFO_EFFECTIVE_URL , &ptr_url); + S3FS_PRN_DBG("connecting to URL %s", SAFESTRPTR(ptr_url)); + } + + LastResponseCode = S3FSCURL_RESPONSECODE_NOTSET; + long responseCode = S3FSCURL_RESPONSECODE_NOTSET; + int result = S3FSCURL_PERFORM_RESULT_NOTSET; + + // 1 attempt + retries... + for(int retrycnt = 0; S3FSCURL_PERFORM_RESULT_NOTSET == result && retrycnt < S3fsCurl::retries; ++retrycnt){ + // Reset response code + responseCode = S3FSCURL_RESPONSECODE_NOTSET; + + // Insert headers + if(!dontAddAuthHeaders) { + insertAuthHeaders(); + } + + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_HTTPHEADER, requestHeaders)){ + return false; + } + + // Requests + curlCode = curl_easy_perform(hCurl); + + // Check result + switch(curlCode){ + case CURLE_OK: + // Need to look at the HTTP response code + if(0 != curl_easy_getinfo(hCurl, CURLINFO_RESPONSE_CODE, &responseCode)){ + S3FS_PRN_ERR("curl_easy_getinfo failed while trying to retrieve HTTP response code"); + responseCode = S3FSCURL_RESPONSECODE_FATAL_ERROR; + result = -EIO; + break; + } + if(responseCode >= 200 && responseCode < 300){ + S3FS_PRN_INFO3("HTTP response code %ld", responseCode); + result = 0; + break; + } + + { + // Try to parse more specific AWS error code otherwise fall back to HTTP error code. + std::string value; + if(simple_parse_xml(bodydata.c_str(), bodydata.size(), "Code", value)){ + // TODO: other error codes + if(value == "EntityTooLarge"){ + result = -EFBIG; + break; + }else if(value == "InvalidObjectState"){ + result = -EREMOTE; + break; + }else if(value == "KeyTooLongError"){ + result = -ENAMETOOLONG; + break; + } + } + } + + // Service response codes which are >= 300 && < 500 + switch(responseCode){ + case 301: + case 307: + S3FS_PRN_ERR("HTTP response code 301(Moved Permanently: also happens when bucket's region is incorrect), returning EIO. Body Text: %s", bodydata.c_str()); + S3FS_PRN_ERR("The options of url and endpoint may be useful for solving, please try to use both options."); + result = -EIO; + break; + + case 400: + if(op == "HEAD"){ + if(path.size() > 1024){ + S3FS_PRN_ERR("HEAD HTTP response code %ld with path longer than 1024, returning ENAMETOOLONG.", responseCode); + return -ENAMETOOLONG; + } + S3FS_PRN_ERR("HEAD HTTP response code %ld, returning EPERM.", responseCode); + result = -EPERM; + }else{ + S3FS_PRN_ERR("HTTP response code %ld, returning EIO. Body Text: %s", responseCode, bodydata.c_str()); + result = -EIO; + } + break; + + case 403: + S3FS_PRN_ERR("HTTP response code %ld, returning EPERM. Body Text: %s", responseCode, bodydata.c_str()); + result = -EPERM; + break; + + case 404: + S3FS_PRN_INFO3("HTTP response code 404 was returned, returning ENOENT"); + S3FS_PRN_DBG("Body Text: %s", bodydata.c_str()); + result = -ENOENT; + break; + + case 416: // 416 Requested Range Not Satisfiable + if(use_newcache){ + S3FS_PRN_INFO3("HTTP response code 416 was returned, returning ENOENT"); + result = -ENOENT; + }else{ + S3FS_PRN_INFO3("HTTP response code 416 was returned, returning EIO"); + result = -EIO; + } + break; + + case 501: + S3FS_PRN_INFO3("HTTP response code 501 was returned, returning ENOTSUP"); + S3FS_PRN_DBG("Body Text: %s", bodydata.c_str()); + result = -ENOTSUP; + break; + + case 500: + case 503: { + S3FS_PRN_INFO3("HTTP response code %ld was returned, slowing down", responseCode); + S3FS_PRN_DBG("Body Text: %s", bodydata.c_str()); + // Add jitter to avoid thundering herd. + unsigned int sleep_time = 2 << retry_count; + sleep(sleep_time + static_cast(random()) % sleep_time); + break; + } + default: + S3FS_PRN_ERR("HTTP response code %ld, returning EIO. Body Text: %s", responseCode, bodydata.c_str()); + result = -EIO; + break; + } + break; + + case CURLE_WRITE_ERROR: + S3FS_PRN_ERR("### CURLE_WRITE_ERROR"); + sleep(2); + break; + + case CURLE_OPERATION_TIMEDOUT: + S3FS_PRN_ERR("### CURLE_OPERATION_TIMEDOUT"); + sleep(2); + break; + + case CURLE_COULDNT_RESOLVE_HOST: + S3FS_PRN_ERR("### CURLE_COULDNT_RESOLVE_HOST"); + sleep(2); + break; + + case CURLE_COULDNT_CONNECT: + S3FS_PRN_ERR("### CURLE_COULDNT_CONNECT"); + sleep(4); + break; + + case CURLE_GOT_NOTHING: + S3FS_PRN_ERR("### CURLE_GOT_NOTHING"); + sleep(4); + break; + + case CURLE_ABORTED_BY_CALLBACK: + S3FS_PRN_ERR("### CURLE_ABORTED_BY_CALLBACK"); + sleep(4); + { + AutoLock lock(&S3fsCurl::curl_handles_lock); + S3fsCurl::curl_times[hCurl] = time(nullptr); + } + break; + + case CURLE_PARTIAL_FILE: + S3FS_PRN_ERR("### CURLE_PARTIAL_FILE"); + sleep(4); + break; + + case CURLE_SEND_ERROR: + S3FS_PRN_ERR("### CURLE_SEND_ERROR"); + sleep(2); + break; + + case CURLE_RECV_ERROR: + S3FS_PRN_ERR("### CURLE_RECV_ERROR"); + sleep(2); + break; + + case CURLE_SSL_CONNECT_ERROR: + S3FS_PRN_ERR("### CURLE_SSL_CONNECT_ERROR"); + sleep(2); + break; + + case CURLE_SSL_CACERT: + S3FS_PRN_ERR("### CURLE_SSL_CACERT"); + + // try to locate cert, if successful, then set the + // option and continue + if(S3fsCurl::curl_ca_bundle.empty()){ + if(!S3fsCurl::LocateBundle()){ + S3FS_PRN_ERR("could not get CURL_CA_BUNDLE."); + result = -EIO; + } + // retry with CAINFO + }else{ + S3FS_PRN_ERR("curlCode: %d msg: %s", curlCode, curl_easy_strerror(curlCode)); + result = -EIO; + } + break; + +#ifdef CURLE_PEER_FAILED_VERIFICATION + case CURLE_PEER_FAILED_VERIFICATION: + S3FS_PRN_ERR("### CURLE_PEER_FAILED_VERIFICATION"); + + first_pos = S3fsCred::GetBucket().find_first_of('.'); + if(first_pos != std::string::npos){ + S3FS_PRN_INFO("curl returned a CURL_PEER_FAILED_VERIFICATION error"); + S3FS_PRN_INFO("security issue found: buckets with periods in their name are incompatible with http"); + S3FS_PRN_INFO("This check can be over-ridden by using the -o ssl_verify_hostname=0"); + S3FS_PRN_INFO("The certificate will still be checked but the hostname will not be verified."); + S3FS_PRN_INFO("A more secure method would be to use a bucket name without periods."); + }else{ + S3FS_PRN_INFO("my_curl_easy_perform: curlCode: %d -- %s", curlCode, curl_easy_strerror(curlCode)); + } + result = -EIO; + break; +#endif + + // This should be invalid since curl option HTTP FAILONERROR is now off + case CURLE_HTTP_RETURNED_ERROR: + S3FS_PRN_ERR("### CURLE_HTTP_RETURNED_ERROR"); + + if(0 != curl_easy_getinfo(hCurl, CURLINFO_RESPONSE_CODE, &responseCode)){ + result = -EIO; + }else{ + S3FS_PRN_INFO3("HTTP response code =%ld", responseCode); + + // Let's try to retrieve the + if(404 == responseCode){ + result = -ENOENT; + }else if(500 > responseCode){ + result = -EIO; + } + } + break; + + // Unknown CURL return code + default: + S3FS_PRN_ERR("###curlCode: %d msg: %s", curlCode, curl_easy_strerror(curlCode)); + result = -EIO; + break; + } // switch + + if(S3FSCURL_PERFORM_RESULT_NOTSET == result){ + S3FS_PRN_INFO("### retrying..."); + + if(!RemakeHandle()){ + S3FS_PRN_INFO("Failed to reset handle and internal data for retrying."); + result = -EIO; + break; + } + } + } // for + + // set last response code + if(S3FSCURL_RESPONSECODE_NOTSET == responseCode){ + LastResponseCode = S3FSCURL_RESPONSECODE_FATAL_ERROR; + }else{ + LastResponseCode = responseCode; + } + + if(S3FSCURL_PERFORM_RESULT_NOTSET == result){ + S3FS_PRN_ERR("### giving up"); + result = -EIO; + } + return result; +} + +// +// Returns the Amazon AWS signature for the given parameters. +// +// @param method e.g., "GET" +// @param content_type e.g., "application/x-directory" +// @param date e.g., get_date_rfc850() +// @param resource e.g., "/pub" +// +std::string S3fsCurl::CalcSignatureV2(const std::string& method, const std::string& strMD5, const std::string& content_type, const std::string& date, const std::string& resource, const std::string& secret_access_key, const std::string& access_token) +{ + std::string Signature; + std::string StringToSign; + + if(!access_token.empty()){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-security-token", access_token.c_str()); + } + + StringToSign += method + "\n"; + StringToSign += strMD5 + "\n"; // md5 + StringToSign += content_type + "\n"; + StringToSign += date + "\n"; + StringToSign += get_canonical_headers(requestHeaders, true); + StringToSign += resource; + + const void* key = secret_access_key.data(); + size_t key_len = secret_access_key.size(); + const unsigned char* sdata = reinterpret_cast(StringToSign.data()); + size_t sdata_len = StringToSign.size(); + unsigned int md_len = 0; + + std::unique_ptr md = s3fs_HMAC(key, key_len, sdata, sdata_len, &md_len); + + Signature = s3fs_base64(md.get(), md_len); + + return Signature; +} + +std::string S3fsCurl::CalcSignature(const std::string& method, const std::string& canonical_uri, const std::string& query_string, const std::string& strdate, const std::string& payload_hash, const std::string& date8601, const std::string& secret_access_key, const std::string& access_token) +{ + std::string StringCQ, StringToSign; + std::string uriencode; + + if(!access_token.empty()){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-security-token", access_token.c_str()); + } + + uriencode = urlEncodePath(canonical_uri); + StringCQ = method + "\n"; + if(method == "HEAD" || method == "PUT" || method == "DELETE"){ + StringCQ += uriencode + "\n"; + }else if(method == "GET" && uriencode.empty()){ + StringCQ +="/\n"; + }else if(method == "GET" && is_prefix(uriencode.c_str(), "/")){ + StringCQ += uriencode +"\n"; + }else if(method == "GET" && !is_prefix(uriencode.c_str(), "/")){ + StringCQ += "/\n" + urlEncodeQuery(canonical_uri) +"\n"; + }else if(method == "POST"){ + StringCQ += uriencode + "\n"; + } + StringCQ += urlEncodeQuery(query_string) + "\n"; + StringCQ += get_canonical_headers(requestHeaders) + "\n"; + StringCQ += get_sorted_header_keys(requestHeaders) + "\n"; + StringCQ += payload_hash; + + std::string kSecret = "AWS4" + secret_access_key; + unsigned int kDate_len,kRegion_len, kService_len, kSigning_len = 0; + + std::unique_ptr kDate = s3fs_HMAC256(kSecret.c_str(), kSecret.size(), reinterpret_cast(strdate.data()), strdate.size(), &kDate_len); + std::unique_ptr kRegion = s3fs_HMAC256(kDate.get(), kDate_len, reinterpret_cast(endpoint.c_str()), endpoint.size(), &kRegion_len); + std::unique_ptr kService = s3fs_HMAC256(kRegion.get(), kRegion_len, reinterpret_cast("s3"), sizeof("s3") - 1, &kService_len); + std::unique_ptr kSigning = s3fs_HMAC256(kService.get(), kService_len, reinterpret_cast("aws4_request"), sizeof("aws4_request") - 1, &kSigning_len); + + const unsigned char* cRequest = reinterpret_cast(StringCQ.c_str()); + size_t cRequest_len = StringCQ.size(); + sha256_t sRequest; + s3fs_sha256(cRequest, cRequest_len, &sRequest); + + StringToSign = "AWS4-HMAC-SHA256\n"; + StringToSign += date8601 + "\n"; + StringToSign += strdate + "/" + endpoint + "/s3/aws4_request\n"; + StringToSign += s3fs_hex_lower(sRequest.data(), sRequest.size()); + + const unsigned char* cscope = reinterpret_cast(StringToSign.c_str()); + size_t cscope_len = StringToSign.size(); + unsigned int md_len = 0; + + std::unique_ptr md = s3fs_HMAC256(kSigning.get(), kSigning_len, cscope, cscope_len, &md_len); + + return s3fs_hex_lower(md.get(), md_len); +} + +void S3fsCurl::insertV4Headers(const std::string& access_key_id, const std::string& secret_access_key, const std::string& access_token) +{ + std::string server_path = type == REQTYPE::LISTBUCKET ? "/" : path; + std::string payload_hash; + switch (type) { + case REQTYPE::PUT: + if(GetUnsignedPayload()){ + payload_hash = "UNSIGNED-PAYLOAD"; + }else{ + if(use_newcache && !sha256.empty()){ + payload_hash = sha256; + }else{ + payload_hash = s3fs_sha256_hex_fd(b_infile == nullptr ? -1 : fileno(b_infile), 0, -1); + } + } + break; + + case REQTYPE::COMPLETEMULTIPOST: + { + size_t cRequest_len = strlen(reinterpret_cast(b_postdata)); + sha256_t sRequest; + s3fs_sha256(b_postdata, cRequest_len, &sRequest); + payload_hash = s3fs_hex_lower(sRequest.data(), sRequest.size()); + break; + } + + case REQTYPE::UPLOADMULTIPOST: + if(GetUnsignedPayload()){ + payload_hash = "UNSIGNED-PAYLOAD"; + }else{ + if(use_newcache){ + sha256_t sRequest; + s3fs_sha256(reinterpret_cast(partdata.buf), partdata.size, &sRequest); + payload_hash = s3fs_hex_lower(sRequest.data(), sRequest.size()); + }else{ + payload_hash = s3fs_sha256_hex_fd(partdata.fd, partdata.startpos, partdata.size); + } + } + break; + default: + break; + } + + if(b_infile != nullptr && payload_hash.empty()){ + S3FS_PRN_ERR("Failed to make SHA256."); + // TODO: propagate error + } + + S3FS_PRN_INFO3("computing signature [%s] [%s] [%s] [%s]", op.c_str(), server_path.c_str(), query_string.c_str(), payload_hash.c_str()); + std::string strdate; + std::string date8601; + get_date_sigv3(strdate, date8601); + + std::string contentSHA256 = payload_hash.empty() ? EMPTY_PAYLOAD_HASH : payload_hash; + const std::string realpath = pathrequeststyle ? "/" + S3fsCred::GetBucket() + server_path : server_path; + + //string canonical_headers, signed_headers; + requestHeaders = curl_slist_sort_insert(requestHeaders, "host", get_bucket_host().c_str()); + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-content-sha256", contentSHA256.c_str()); + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-date", date8601.c_str()); + + if (S3fsCurl::IsRequesterPays()) { + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-request-payer", "requester"); + } + + if(!S3fsCurl::IsPublicBucket()){ + std::string Signature = CalcSignature(op, realpath, query_string + (type == REQTYPE::PREMULTIPOST || type == REQTYPE::MULTILIST ? "=" : ""), strdate, contentSHA256, date8601, secret_access_key, access_token); + std::string auth = "AWS4-HMAC-SHA256 Credential=" + access_key_id + "/" + strdate + "/" + endpoint + "/s3/aws4_request, SignedHeaders=" + get_sorted_header_keys(requestHeaders) + ", Signature=" + Signature; + requestHeaders = curl_slist_sort_insert(requestHeaders, "Authorization", auth.c_str()); + } +} + +void S3fsCurl::insertV2Headers(const std::string& access_key_id, const std::string& secret_access_key, const std::string& access_token) +{ + std::string resource; + std::string turl; + std::string server_path = type == REQTYPE::LISTBUCKET ? "/" : path; + MakeUrlResource(server_path.c_str(), resource, turl); + if(!query_string.empty() && type != REQTYPE::CHKBUCKET && type != REQTYPE::LISTBUCKET){ + resource += "?" + query_string; + } + + std::string date = get_date_rfc850(); + requestHeaders = curl_slist_sort_insert(requestHeaders, "Date", date.c_str()); + if(op != "PUT" && op != "POST"){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "Content-Type", nullptr); + } + + if(!S3fsCurl::IsPublicBucket()){ + std::string Signature = CalcSignatureV2(op, get_header_value(requestHeaders, "Content-MD5"), get_header_value(requestHeaders, "Content-Type"), date, resource, secret_access_key, access_token); + requestHeaders = curl_slist_sort_insert(requestHeaders, "Authorization", ("AWS " + access_key_id + ":" + Signature).c_str()); + } +} + +void S3fsCurl::insertIBMIAMHeaders(const std::string& access_key_id, const std::string& access_token) +{ + requestHeaders = curl_slist_sort_insert(requestHeaders, "Authorization", ("Bearer " + access_token).c_str()); + + if(op == "PUT" && path == mount_prefix + "/"){ + // ibm-service-instance-id header is required for bucket creation requests + requestHeaders = curl_slist_sort_insert(requestHeaders, "ibm-service-instance-id", access_key_id.c_str()); + } +} + +void S3fsCurl::insertAuthHeaders() +{ + std::string access_key_id; + std::string secret_access_key; + std::string access_token; + + // check and get credential variables + if(!S3fsCurl::ps3fscred->CheckIAMCredentialUpdate(&access_key_id, &secret_access_key, &access_token)){ + S3FS_PRN_ERR("An error occurred in checking IAM credential."); + return; // do not insert auth headers on error + } + + if(S3fsCurl::ps3fscred->IsIBMIAMAuth()){ + insertIBMIAMHeaders(access_key_id, access_token); + }else if(S3fsCurl::signature_type == signature_type_t::V2_ONLY){ + insertV2Headers(access_key_id, secret_access_key, access_token); + }else{ + insertV4Headers(access_key_id, secret_access_key, access_token); + } +} + +int S3fsCurl::DeleteRequest(const char* tpath) +{ + S3FS_PRN_INFO3("[tpath=%s]", SAFESTRPTR(tpath)); + + if(!tpath){ + return -EINVAL; + } + if(!CreateCurlHandle()){ + return -EIO; + } + std::string resource; + std::string turl; + MakeUrlResource(get_realpath(tpath).c_str(), resource, turl); + + url = prepare_url(turl.c_str()); + path = get_realpath(tpath); + requestHeaders = nullptr; + responseHeaders.clear(); + + op = "DELETE"; + type = REQTYPE::DELETE; + + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_CUSTOMREQUEST, "DELETE")){ + return -EIO; + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return -EIO; + } + + return RequestPerform(); +} + +int S3fsCurl::GetIAMv2ApiToken(const char* token_url, int token_ttl, const char* token_ttl_hdr, std::string& response) +{ + if(!token_url || !token_ttl_hdr){ + S3FS_PRN_ERR("IAMv2 token url(%s) or ttl_hdr(%s) parameter are wrong.", token_url ? token_url : "null", token_ttl_hdr ? token_ttl_hdr : "null"); + return -EIO; + } + response.erase(); + url = token_url; + if(!CreateCurlHandle()){ + return -EIO; + } + requestHeaders = nullptr; + responseHeaders.clear(); + bodydata.clear(); + + std::string ttlstr = std::to_string(token_ttl); + requestHeaders = curl_slist_sort_insert(requestHeaders, token_ttl_hdr, ttlstr.c_str()); + + // Curl appends an "Expect: 100-continue" header to the token request, + // and aws responds with a 417 Expectation Failed. This ensures the + // Expect header is empty before the request is sent. + requestHeaders = curl_slist_sort_insert(requestHeaders, "Expect", ""); + + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_UPLOAD, true)){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE, 0)){ + return false; + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return -EIO; + } + + // [NOTE] + // Be sure to give "dontAddAuthHeaders=true". + // If set to false(default), it will deadlock in S3fsCred. + // + int result = RequestPerform(true); + + if(0 == result){ + response.swap(bodydata); + }else{ + S3FS_PRN_ERR("Error(%d) occurred, could not get IAMv2 api token.", result); + } + bodydata.clear(); + + return result; +} + +// +// Get AccessKeyId/SecretAccessKey/AccessToken/Expiration by IAM role, +// and Set these value to class variable. +// +bool S3fsCurl::GetIAMCredentials(const char* cred_url, const char* iam_v2_token, const char* ibm_secret_access_key, std::string& response) +{ + if(!cred_url){ + S3FS_PRN_ERR("url is null."); + return false; + } + url = cred_url; + response.erase(); + + // at first set type for handle + type = REQTYPE::IAMCRED; + + if(!CreateCurlHandle()){ + return false; + } + requestHeaders = nullptr; + responseHeaders.clear(); + bodydata.clear(); + std::string postContent; + + if(ibm_secret_access_key){ + // make contents + postContent += "grant_type=urn:ibm:params:oauth:grant-type:apikey"; + postContent += "&response_type=cloud_iam"; + postContent += "&apikey="; + postContent += ibm_secret_access_key; + + // set postdata + postdata = reinterpret_cast(postContent.c_str()); + b_postdata = postdata; + postdata_remaining = postContent.size(); // without null + b_postdata_remaining = postdata_remaining; + + requestHeaders = curl_slist_sort_insert(requestHeaders, "Authorization", "Basic Yng6Yng="); + + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POST, true)){ // POST + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POSTFIELDSIZE, static_cast(postdata_remaining))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READDATA, reinterpret_cast(this))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READFUNCTION, S3fsCurl::ReadCallback)){ + return false; + } + } + + if(iam_v2_token){ + requestHeaders = curl_slist_sort_insert(requestHeaders, S3fsCred::IAMv2_token_hdr, iam_v2_token); + } + + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return false; + } + + // [NOTE] + // Be sure to give "dontAddAuthHeaders=true". + // If set to false(default), it will deadlock in S3fsCred. + // + int result = RequestPerform(true); + + // analyzing response + if(0 == result){ + response.swap(bodydata); + }else{ + S3FS_PRN_ERR("Error(%d) occurred, could not get IAM role name.", result); + } + bodydata.clear(); + + return (0 == result); +} + +// +// Get IAM role name automatically. +// +bool S3fsCurl::GetIAMRoleFromMetaData(const char* cred_url, const char* iam_v2_token, std::string& token) +{ + if(!cred_url){ + S3FS_PRN_ERR("url is null."); + return false; + } + url = cred_url; + token.erase(); + + S3FS_PRN_INFO3("Get IAM Role name"); + + // at first set type for handle + type = REQTYPE::IAMROLE; + + if(!CreateCurlHandle()){ + return false; + } + requestHeaders = nullptr; + responseHeaders.clear(); + bodydata.clear(); + + if(iam_v2_token){ + requestHeaders = curl_slist_sort_insert(requestHeaders, S3fsCred::IAMv2_token_hdr, iam_v2_token); + } + + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return false; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return false; + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return false; + } + + // [NOTE] + // Be sure to give "dontAddAuthHeaders=true". + // If set to false(default), it will deadlock in S3fsCred. + // + int result = RequestPerform(true); + + // analyzing response + if(0 == result){ + token.swap(bodydata); + }else{ + S3FS_PRN_ERR("Error(%d) occurred, could not get IAM role name from meta data.", result); + } + bodydata.clear(); + + return (0 == result); +} + +bool S3fsCurl::AddSseRequestHead(sse_type_t ssetype, const std::string& input, bool is_copy) +{ + std::string ssevalue = input; + switch(ssetype){ + case sse_type_t::SSE_DISABLE: + return true; + case sse_type_t::SSE_S3: + if(!is_copy){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-server-side-encryption", "AES256"); + } + return true; + case sse_type_t::SSE_C: + { + std::string sseckey; + if(S3fsCurl::GetSseKey(ssevalue, sseckey)){ + if(is_copy){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-copy-source-server-side-encryption-customer-algorithm", "AES256"); + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-copy-source-server-side-encryption-customer-key", sseckey.c_str()); + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-copy-source-server-side-encryption-customer-key-md5", ssevalue.c_str()); + }else{ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-server-side-encryption-customer-algorithm", "AES256"); + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-server-side-encryption-customer-key", sseckey.c_str()); + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-server-side-encryption-customer-key-md5", ssevalue.c_str()); + } + }else{ + S3FS_PRN_WARN("Failed to insert SSE-C header."); + } + return true; + } + case sse_type_t::SSE_KMS: + if(!is_copy){ + if(ssevalue.empty()){ + ssevalue = S3fsCurl::GetSseKmsId(); + } + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-server-side-encryption", "aws:kms"); + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-server-side-encryption-aws-kms-key-id", ssevalue.c_str()); + } + return true; + } + S3FS_PRN_ERR("sse type is unknown(%d).", static_cast(S3fsCurl::ssetype)); + + return false; +} + +// +// tpath : target path for head request +// bpath : saved into base_path +// savedpath : saved into saved_path +// ssekey_pos : -1 means "not" SSE-C type +// 0 - X means SSE-C type and position for SSE-C key(0 is latest key) +// +bool S3fsCurl::PreHeadRequest(const char* tpath, const char* bpath, const char* savedpath, size_t ssekey_pos) +{ + S3FS_PRN_INFO3("[tpath=%s][bpath=%s][save=%s][sseckeypos=%zu]", SAFESTRPTR(tpath), SAFESTRPTR(bpath), SAFESTRPTR(savedpath), ssekey_pos); + + if(!tpath){ + return false; + } + std::string resource; + std::string turl; + MakeUrlResource(get_realpath(tpath).c_str(), resource, turl); + + // libcurl 7.17 does deep copy of url, deep copy "stable" url + url = prepare_url(turl.c_str()); + path = get_realpath(tpath); + base_path = SAFESTRPTR(bpath); + saved_path = SAFESTRPTR(savedpath); + requestHeaders = nullptr; + responseHeaders.clear(); + + // requestHeaders(SSE-C) + if(0 <= static_cast(ssekey_pos) && ssekey_pos < S3fsCurl::sseckeys.size()){ + std::string md5; + if(!S3fsCurl::GetSseKeyMd5(ssekey_pos, md5) || !AddSseRequestHead(sse_type_t::SSE_C, md5, false)){ + S3FS_PRN_ERR("Failed to set SSE-C headers for sse-c key pos(%zu)(=md5(%s)).", ssekey_pos, md5.c_str()); + return false; + } + } + b_ssekey_pos = ssekey_pos; + + op = "HEAD"; + type = REQTYPE::HEAD; + + // set lazy function + fpLazySetup = PreHeadRequestSetCurlOpts; + + return true; +} + +int S3fsCurl::HeadRequest(const char* tpath, headers_t& meta) +{ + int result = -1; + + S3FS_PRN_INFO3("[tpath=%s]", SAFESTRPTR(tpath)); + + // At first, try to get without SSE-C headers + if(!PreHeadRequest(tpath) || !fpLazySetup || !fpLazySetup(this) || 0 != (result = RequestPerform())){ + // If has SSE-C keys, try to get with all SSE-C keys. + for(size_t pos = 0; pos < S3fsCurl::sseckeys.size(); pos++){ + if(!DestroyCurlHandle()){ + break; + } + if(!PreHeadRequest(tpath, nullptr, nullptr, pos)){ + break; + } + if(!fpLazySetup || !fpLazySetup(this)){ + S3FS_PRN_ERR("Failed to lazy setup in single head request."); + break; + } + if(0 == (result = RequestPerform())){ + break; + } + } + if(0 != result){ + DestroyCurlHandle(); // not check result. + return result; + } + } + + // file exists in s3 + // fixme: clean this up. + meta.clear(); + for(headers_t::iterator iter = responseHeaders.begin(); iter != responseHeaders.end(); ++iter){ + std::string key = lower(iter->first); + std::string value = iter->second; + if(key == "content-type"){ + meta[iter->first] = value; + }else if(key == "content-length"){ + meta[iter->first] = value; + }else if(key == "etag"){ + meta[iter->first] = value; + }else if(key == "last-modified"){ + meta[iter->first] = value; + }else if(is_prefix(key.c_str(), "x-amz")){ + meta[key] = value; // key is lower case for "x-amz" + } + } + return 0; +} + +int S3fsCurl::PutHeadRequest(const char* tpath, headers_t& meta, bool is_copy) +{ + S3FS_PRN_INFO3("[tpath=%s]", SAFESTRPTR(tpath)); + + if(!tpath){ + return -EINVAL; + } + if(!CreateCurlHandle()){ + return -EIO; + } + std::string resource; + std::string turl; + MakeUrlResource(get_realpath(tpath).c_str(), resource, turl); + + url = prepare_url(turl.c_str()); + path = get_realpath(tpath); + requestHeaders = nullptr; + responseHeaders.clear(); + bodydata.clear(); + + std::string contype = S3fsCurl::LookupMimeType(tpath); + requestHeaders = curl_slist_sort_insert(requestHeaders, "Content-Type", contype.c_str()); + + // Make request headers + for(headers_t::iterator iter = meta.begin(); iter != meta.end(); ++iter){ + std::string key = lower(iter->first); + std::string value = iter->second; + if(is_prefix(key.c_str(), "x-amz-acl")){ + // not set value, but after set it. + }else if(is_prefix(key.c_str(), "x-amz-meta")){ + requestHeaders = curl_slist_sort_insert(requestHeaders, iter->first.c_str(), value.c_str()); + }else if(key == "x-amz-copy-source"){ + requestHeaders = curl_slist_sort_insert(requestHeaders, iter->first.c_str(), value.c_str()); + }else if(key == "x-amz-server-side-encryption" && value != "aws:kms"){ + // skip this header, because this header is specified after logic. + }else if(key == "x-amz-server-side-encryption-aws-kms-key-id"){ + // skip this header, because this header is specified after logic. + }else if(key == "x-amz-server-side-encryption-customer-key-md5"){ + // Only copy mode. + if(is_copy){ + if(!AddSseRequestHead(sse_type_t::SSE_C, value, true)){ + S3FS_PRN_WARN("Failed to insert SSE-C header."); + } + } + } + } + + // "x-amz-acl", storage class, sse + if(S3fsCurl::default_acl != acl_t::PRIVATE){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-acl", str(S3fsCurl::default_acl)); + } + if(strcasecmp(GetStorageClass().c_str(), "STANDARD") != 0){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-storage-class", GetStorageClass().c_str()); + } + // SSE + if(S3fsCurl::GetSseType() != sse_type_t::SSE_DISABLE){ + std::string ssevalue; + if(!AddSseRequestHead(S3fsCurl::GetSseType(), ssevalue, false)){ + S3FS_PRN_WARN("Failed to set SSE header, but continue..."); + } + } + if(is_use_ahbe){ + // set additional header by ahbe conf + requestHeaders = AdditionalHeader::get()->AddHeader(requestHeaders, tpath); + } + + op = "PUT"; + type = REQTYPE::PUTHEAD; + + // setopt + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_UPLOAD, true)){ // HTTP PUT + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE, 0)){ // Content-Length + return -EIO; + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return -EIO; + } + + S3FS_PRN_INFO3("copying... [path=%s]", tpath); + + int result = RequestPerform(); + result = MapPutErrorResponse(result); + bodydata.clear(); + + return result; +} + +int S3fsCurl::PutRequest(const char* tpath, headers_t& meta, int fd, off_t fsize, char* buf) +{ + struct stat st; + std::unique_ptr file(nullptr, &s3fs_fclose); + + S3FS_PRN_INFO3("[tpath=%s]", SAFESTRPTR(tpath)); + + if(!tpath){ + return -EINVAL; + } + if (use_newcache) { + }else if(-1 != fd){ + // duplicate fd + // + // [NOTE] + // This process requires FILE*, then it is linked to fd with fdopen. + // After processing, the FILE* is closed with fclose, and fd is closed together. + // The fd should not be closed here, so call dup here to duplicate it. + // + int fd2; + if(-1 == (fd2 = dup(fd)) || -1 == fstat(fd2, &st) || 0 != lseek(fd2, 0, SEEK_SET) || nullptr == (file = {fdopen(fd2, "rb"), &s3fs_fclose})){ + S3FS_PRN_ERR("Could not duplicate file descriptor(errno=%d)", errno); + if(-1 != fd2){ + close(fd2); + } + return -errno; + } + b_infile = file.get(); + }else{ + // This case is creating zero byte object.(calling by create_file_object()) + S3FS_PRN_INFO3("create zero byte file object."); + } + + if(!CreateCurlHandle()){ + return -EIO; + } + std::string resource; + std::string turl; + MakeUrlResource(get_realpath(tpath).c_str(), resource, turl); + + url = prepare_url(turl.c_str()); + path = get_realpath(tpath); + requestHeaders = nullptr; + responseHeaders.clear(); + bodydata.clear(); + + // Make request headers + if(S3fsCurl::is_content_md5){ + std::string strMD5; + if(use_newcache){ + strMD5 = s3fs_get_content_md5(fsize, buf); + if(0 == strMD5.length()){ + S3FS_PRN_ERR("Failed to make MD5."); + return -EIO; + } + }else if(-1 != fd){ + strMD5 = s3fs_get_content_md5(fd); + if(0 == strMD5.length()){ + S3FS_PRN_ERR("Failed to make MD5."); + return -EIO; + } + }else{ + strMD5 = EMPTY_MD5_BASE64_HASH; + } + requestHeaders = curl_slist_sort_insert(requestHeaders, "Content-MD5", strMD5.c_str()); + } + + std::string contype = S3fsCurl::LookupMimeType(tpath); + requestHeaders = curl_slist_sort_insert(requestHeaders, "Content-Type", contype.c_str()); + + for(headers_t::iterator iter = meta.begin(); iter != meta.end(); ++iter){ + std::string key = lower(iter->first); + std::string value = iter->second; + if(is_prefix(key.c_str(), "x-amz-acl")){ + // not set value, but after set it. + }else if(is_prefix(key.c_str(), "x-amz-meta")){ + requestHeaders = curl_slist_sort_insert(requestHeaders, iter->first.c_str(), value.c_str()); + }else if(key == "x-amz-server-side-encryption" && value != "aws:kms"){ + // skip this header, because this header is specified after logic. + }else if(key == "x-amz-server-side-encryption-aws-kms-key-id"){ + // skip this header, because this header is specified after logic. + }else if(key == "x-amz-server-side-encryption-customer-key-md5"){ + // skip this header, because this header is specified after logic. + } + } + // "x-amz-acl", storage class, sse + if(S3fsCurl::default_acl != acl_t::PRIVATE){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-acl", str(S3fsCurl::default_acl)); + } + if(strcasecmp(GetStorageClass().c_str(), "STANDARD") != 0){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-storage-class", GetStorageClass().c_str()); + } + // SSE + // do not add SSE for create bucket + if(0 != strcmp(tpath, "/")){ + std::string ssevalue; + if(!AddSseRequestHead(S3fsCurl::GetSseType(), ssevalue, false)){ + S3FS_PRN_WARN("Failed to set SSE header, but continue..."); + } + } + if(is_use_ahbe){ + // set additional header by ahbe conf + requestHeaders = AdditionalHeader::get()->AddHeader(requestHeaders, tpath); + } + + op = "PUT"; + type = REQTYPE::PUT; + + // setopt + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_UPLOAD, true)){ // HTTP PUT + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return -EIO; + } + + if(use_newcache){ + // ps: Minio not support PostObject + if(0 < fsize){ + sha256_t sRequest; + s3fs_sha256(reinterpret_cast(buf), fsize, &sRequest); + sha256 = s3fs_hex_lower(sRequest.data(), sRequest.size()); + + drp_upload_ctx ctx(path, buf, 0, fsize); + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READFUNCTION, UploadReadCallbackByMemory)){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READDATA, ctx)){ // set memory data + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE_LARGE, fsize)){ // Content-Length + return -EIO; + } + }else if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE, 0)){ // Content-Length: 0 + return -EIO; + } + }else if(file){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE_LARGE, static_cast(st.st_size))){ // Content-Length + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILE, file.get())){ + return -EIO; + } + }else{ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE, 0)){ // Content-Length: 0 + return -EIO; + } + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return -EIO; + } + + off_t size = 0; + if(use_newcache){ + size = fsize; + }else{ + size = -1 != fd ? st.st_size : 0; + } + S3FS_PRN_INFO3("uploading... [path=%s][fd=%d][size=%lld]", tpath, fd, static_cast(size)); + + int result = RequestPerform(); + result = MapPutErrorResponse(result); + bodydata.clear(); + return result; +} + +int S3fsCurl::PreGetObjectRequest(const char* tpath, int fd, off_t start, off_t size, sse_type_t ssetype, const std::string& ssevalue, char* buf) +{ + S3FS_PRN_INFO3("[tpath=%s][start=%lld][size=%lld]", SAFESTRPTR(tpath), static_cast(start), static_cast(size)); + + if(!tpath || -1 == fd || 0 > start || 0 > size){ + return -EINVAL; + } + + std::string resource; + std::string turl; + MakeUrlResource(get_realpath(tpath).c_str(), resource, turl); + + url = prepare_url(turl.c_str()); + path = get_realpath(tpath); + requestHeaders = nullptr; + responseHeaders.clear(); + + if(0 < size){ + std::string range = "bytes="; + range += std::to_string(start); + range += "-"; + range += std::to_string(start + size - 1); + requestHeaders = curl_slist_sort_insert(requestHeaders, "Range", range.c_str()); + } + // SSE-C + if(sse_type_t::SSE_C == ssetype){ + if(!AddSseRequestHead(ssetype, ssevalue, false)){ + S3FS_PRN_WARN("Failed to set SSE header, but continue..."); + } + } + + op = "GET"; + type = REQTYPE::GET; + + // set lazy function + fpLazySetup = PreGetObjectRequestSetCurlOpts; + + // set info for callback func. + // (use only fd, startpos and size, other member is not used.) + partdata.clear(); + partdata.fd = fd; + partdata.startpos = start; + partdata.size = size; + partdata.buf = buf; + b_partdata_startpos = start; + b_partdata_size = size; + b_partdata_buf = buf; + b_ssetype = ssetype; + b_ssevalue = ssevalue; + b_ssekey_pos = -1; // not use this value for get object. + + return 0; +} + +int S3fsCurl::GetObjectRequest(const char* tpath, int fd, off_t start, off_t size, char* buf) +{ + int result; + + S3FS_PRN_INFO3("[tpath=%s][start=%lld][size=%lld]", SAFESTRPTR(tpath), static_cast(start), static_cast(size)); + + if(!tpath){ + return -EINVAL; + } + sse_type_t local_ssetype = sse_type_t::SSE_DISABLE; + std::string ssevalue; + if(!get_object_sse_type(tpath, local_ssetype, ssevalue)){ + S3FS_PRN_WARN("Failed to get SSE type for file(%s).", SAFESTRPTR(tpath)); + } + + if(0 != (result = PreGetObjectRequest(tpath, fd, start, size, local_ssetype, ssevalue, buf))){ + return result; + } + if(!fpLazySetup || !fpLazySetup(this)){ + S3FS_PRN_ERR("Failed to lazy setup in single get object request."); + return -EIO; + } + + S3FS_PRN_INFO3("downloading... [path=%s][fd=%d]", tpath, fd); + + result = RequestPerform(); + partdata.clear(); + + return result; +} + +int S3fsCurl::CheckBucket(const char* check_path, bool compat_dir, bool force_no_sse) +{ + S3FS_PRN_INFO3("check a bucket path(%s)%s.", (check_path && 0 < strlen(check_path)) ? check_path : "", compat_dir ? " containing compatible directory paths" : ""); + + if(!check_path || 0 == strlen(check_path)){ + return -EIO; + } + if(!CreateCurlHandle()){ + return -EIO; + } + + std::string strCheckPath; + std::string urlargs; + if(S3fsCurl::IsListObjectsV2()){ + query_string = "list-type=2"; + }else{ + query_string.clear(); + } + if(!compat_dir){ + // do not check compatibility directories + strCheckPath = check_path; + + }else{ + // check path including compatibility directory + strCheckPath = "/"; + + if(1 < strlen(check_path)){ // for directory path ("/...") not root path("/") + if(!query_string.empty()){ + query_string += '&'; + } + query_string += "prefix="; + query_string += &check_path[1]; // skip first '/' charactor + } + } + if(!query_string.empty()){ + urlargs = "?" + query_string; + } + + std::string resource; + std::string turl; + MakeUrlResource(strCheckPath.c_str(), resource, turl); + + turl += urlargs; + url = prepare_url(turl.c_str()); + path = strCheckPath; + requestHeaders = nullptr; + responseHeaders.clear(); + bodydata.clear(); + + // SSE + if(!force_no_sse && S3fsCurl::GetSseType() != sse_type_t::SSE_DISABLE){ + std::string ssevalue; + if(!AddSseRequestHead(S3fsCurl::GetSseType(), ssevalue, false)){ + S3FS_PRN_WARN("Failed to set SSE header, but continue..."); + } + } + + op = "GET"; + type = REQTYPE::CHKBUCKET; + + // setopt + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_UNRESTRICTED_AUTH, 1L)){ + return -EIO; + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return -EIO; + } + + int result = RequestPerform(); + if (result != 0) { + S3FS_PRN_ERR("Check bucket failed, S3 response: %s", bodydata.c_str()); + } + return result; +} + +int S3fsCurl::ListBucketRequest(const char* tpath, const char* query) +{ + S3FS_PRN_INFO3("[tpath=%s]", SAFESTRPTR(tpath)); + + if(!tpath){ + return -EINVAL; + } + if(!CreateCurlHandle()){ + return -EIO; + } + std::string resource; + std::string turl; + MakeUrlResource("", resource, turl); // NOTICE: path is "". + if(query){ + turl += "?"; + turl += query; + query_string = query; + } + + url = prepare_url(turl.c_str()); + path = get_realpath(tpath); + requestHeaders = nullptr; + responseHeaders.clear(); + bodydata.clear(); + + op = "GET"; + type = REQTYPE::LISTBUCKET; + + // setopt + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return -EIO; + } + if(S3fsCurl::is_verbose){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_DEBUGFUNCTION, S3fsCurl::CurlDebugBodyInFunc)){ // replace debug function + return -EIO; + } + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return -EIO; + } + + return RequestPerform(); +} + +// +// Initialize multipart upload +// +// Example : +// POST /example-object?uploads HTTP/1.1 +// Host: example-bucket.s3.amazonaws.com +// Date: Mon, 1 Nov 2010 20:34:56 GMT +// Authorization: AWS VGhpcyBtZXNzYWdlIHNpZ25lZCBieSBlbHZpbmc= +// +int S3fsCurl::PreMultipartPostRequest(const char* tpath, headers_t& meta, std::string& upload_id, bool is_copy) +{ + S3FS_PRN_INFO3("[tpath=%s]", SAFESTRPTR(tpath)); + + if(!tpath){ + return -EINVAL; + } + if(!CreateCurlHandle()){ + return -EIO; + } + std::string resource; + std::string turl; + MakeUrlResource(get_realpath(tpath).c_str(), resource, turl); + + query_string = "uploads"; + turl += "?" + query_string; + url = prepare_url(turl.c_str()); + path = get_realpath(tpath); + requestHeaders = nullptr; + bodydata.clear(); + responseHeaders.clear(); + + std::string contype = S3fsCurl::LookupMimeType(tpath); + + for(headers_t::iterator iter = meta.begin(); iter != meta.end(); ++iter){ + std::string key = lower(iter->first); + std::string value = iter->second; + if(is_prefix(key.c_str(), "x-amz-acl")){ + // not set value, but after set it. + }else if(is_prefix(key.c_str(), "x-amz-meta")){ + requestHeaders = curl_slist_sort_insert(requestHeaders, iter->first.c_str(), value.c_str()); + }else if(key == "x-amz-server-side-encryption" && value != "aws:kms"){ + // skip this header, because this header is specified after logic. + }else if(key == "x-amz-server-side-encryption-aws-kms-key-id"){ + // skip this header, because this header is specified after logic. + }else if(key == "x-amz-server-side-encryption-customer-key-md5"){ + // skip this header, because this header is specified after logic. + } + } + // "x-amz-acl", storage class, sse + if(S3fsCurl::default_acl != acl_t::PRIVATE){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-acl", str(S3fsCurl::default_acl)); + } + if(strcasecmp(GetStorageClass().c_str(), "STANDARD") != 0){ + requestHeaders = curl_slist_sort_insert(requestHeaders, "x-amz-storage-class", GetStorageClass().c_str()); + } + // SSE + if(S3fsCurl::GetSseType() != sse_type_t::SSE_DISABLE){ + std::string ssevalue; + if(!AddSseRequestHead(S3fsCurl::GetSseType(), ssevalue, false)){ + S3FS_PRN_WARN("Failed to set SSE header, but continue..."); + } + } + if(is_use_ahbe){ + // set additional header by ahbe conf + requestHeaders = AdditionalHeader::get()->AddHeader(requestHeaders, tpath); + } + + requestHeaders = curl_slist_sort_insert(requestHeaders, "Accept", nullptr); + requestHeaders = curl_slist_sort_insert(requestHeaders, "Content-Type", contype.c_str()); + + op = "POST"; + type = REQTYPE::PREMULTIPOST; + + // setopt + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POST, true)){ // POST + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POSTFIELDSIZE, 0)){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_INFILESIZE, 0)){ // Content-Length + return -EIO; + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return -EIO; + } + + // request + int result; + if(0 != (result = RequestPerform())){ + bodydata.clear(); + return result; + } + + if(!simple_parse_xml(bodydata.c_str(), bodydata.size(), "UploadId", upload_id)){ + bodydata.clear(); + return -EIO; + } + + bodydata.clear(); + return 0; +} + +int S3fsCurl::CompleteMultipartPostRequest(const char* tpath, const std::string& upload_id, etaglist_t& parts) +{ + S3FS_PRN_INFO3("[tpath=%s][parts=%zu]", SAFESTRPTR(tpath), parts.size()); + + if(!tpath){ + return -EINVAL; + } + + // make contents + std::string postContent; + postContent += "\n"; + for(etaglist_t::iterator it = parts.begin(); it != parts.end(); ++it){ + if(it->etag.empty()){ + S3FS_PRN_ERR("%d file part is not finished uploading.", it->part_num); + return -EIO; + } + postContent += "\n"; + postContent += " " + std::to_string(it->part_num) + "\n"; + postContent += " " + it->etag + "\n"; + postContent += "\n"; + } + postContent += "\n"; + + // set postdata + postdata = reinterpret_cast(postContent.c_str()); + b_postdata = postdata; + postdata_remaining = postContent.size(); // without null + b_postdata_remaining = postdata_remaining; + + if(!CreateCurlHandle()){ + postdata = nullptr; + b_postdata = nullptr; + return -EIO; + } + std::string resource; + std::string turl; + MakeUrlResource(get_realpath(tpath).c_str(), resource, turl); + + // [NOTE] + // Encode the upload_id here. + // In compatible S3 servers(Cloudflare, etc), there are cases where characters that require URL encoding are included. + // + query_string = "uploadId=" + urlEncodeGeneral(upload_id); + turl += "?" + query_string; + url = prepare_url(turl.c_str()); + path = get_realpath(tpath); + requestHeaders = nullptr; + bodydata.clear(); + responseHeaders.clear(); + std::string contype = "application/xml"; + + requestHeaders = curl_slist_sort_insert(requestHeaders, "Accept", nullptr); + requestHeaders = curl_slist_sort_insert(requestHeaders, "Content-Type", contype.c_str()); + + if(sse_type_t::SSE_C == S3fsCurl::GetSseType()){ + std::string ssevalue; + if(!AddSseRequestHead(S3fsCurl::GetSseType(), ssevalue, false)){ + S3FS_PRN_WARN("Failed to set SSE header, but continue..."); + } + } + + op = "POST"; + type = REQTYPE::COMPLETEMULTIPOST; + + // setopt + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POST, true)){ // POST + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_POSTFIELDSIZE, static_cast(postdata_remaining))){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READDATA, reinterpret_cast(this))){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_READFUNCTION, S3fsCurl::ReadCallback)){ + return -EIO; + } + if(S3fsCurl::is_verbose){ + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_DEBUGFUNCTION, S3fsCurl::CurlDebugBodyOutFunc)){ // replace debug function + return -EIO; + } + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return -EIO; + } + + // request + int result = RequestPerform(); + bodydata.clear(); + postdata = nullptr; + b_postdata = nullptr; + + return result; +} + +int S3fsCurl::MultipartListRequest(std::string& body) +{ + S3FS_PRN_INFO3("list request(multipart)"); + + if(!CreateCurlHandle()){ + return -EIO; + } + std::string resource; + std::string turl; + path = get_realpath("/"); + MakeUrlResource(path.c_str(), resource, turl); + + query_string = "uploads"; + turl += "?" + query_string; + url = prepare_url(turl.c_str()); + requestHeaders = nullptr; + responseHeaders.clear(); + bodydata.clear(); + + requestHeaders = curl_slist_sort_insert(requestHeaders, "Accept", nullptr); + + op = "GET"; + type = REQTYPE::MULTILIST; + + // setopt + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEDATA, reinterpret_cast(&bodydata))){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback)){ + return -EIO; + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return -EIO; + } + + int result; + if(0 == (result = RequestPerform()) && !bodydata.empty()){ + body.swap(bodydata); + }else{ + body = ""; + } + bodydata.clear(); + + return result; +} + +int S3fsCurl::AbortMultipartUpload(const char* tpath, const std::string& upload_id) +{ + S3FS_PRN_INFO3("[tpath=%s]", SAFESTRPTR(tpath)); + + if(!tpath){ + return -EINVAL; + } + if(!CreateCurlHandle()){ + return -EIO; + } + std::string resource; + std::string turl; + MakeUrlResource(get_realpath(tpath).c_str(), resource, turl); + + // [NOTE] + // Encode the upload_id here. + // In compatible S3 servers(Cloudflare, etc), there are cases where characters that require URL encoding are included. + // + query_string = "uploadId=" + urlEncodeGeneral(upload_id); + turl += "?" + query_string; + url = prepare_url(turl.c_str()); + path = get_realpath(tpath); + requestHeaders = nullptr; + responseHeaders.clear(); + + op = "DELETE"; + type = REQTYPE::ABORTMULTIUPLOAD; + + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_URL, url.c_str())){ + return -EIO; + } + if(CURLE_OK != curl_easy_setopt(hCurl, CURLOPT_CUSTOMREQUEST, "DELETE")){ + return -EIO; + } + if(!S3fsCurl::AddUserAgent(hCurl)){ // put User-Agent + return -EIO; + } + + return RequestPerform(); +} + +// +// PUT /ObjectName?partNumber=PartNumber&uploadId=UploadId HTTP/1.1 +// Host: BucketName.s3.amazonaws.com +// Date: date +// Content-Length: Size +// Authorization: Signature +// +// PUT /my-movie.m2ts?partNumber=1&uploadId=VCVsb2FkIElEIGZvciBlbZZpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZR HTTP/1.1 +// Host: example-bucket.s3.amazonaws.com +// Date: Mon, 1 Nov 2010 20:34:56 GMT +// Content-Length: 10485760 +// Content-MD5: pUNXr/BjKK5G2UKvaRRrOA== +// Authorization: AWS VGhpcyBtZXNzYWdlIHNpZ25lZGGieSRlbHZpbmc= +// +int S3fsCurl::UploadMultipartPostSetup(const char* tpath, int part_num, const std::string& upload_id) +{ + S3FS_PRN_INFO3("[tpath=%s][start=%lld][size=%lld][part=%d]", SAFESTRPTR(tpath), static_cast(partdata.startpos), static_cast(partdata.size), part_num); + + if(-1 == partdata.fd || -1 == partdata.startpos || -1 == partdata.size){ + return -EINVAL; + } + + requestHeaders = nullptr; + + // make md5 and file pointer + if(S3fsCurl::is_content_md5){ + md5_t md5raw; + if(use_newcache){ + if(!s3fs_md5(reinterpret_cast(partdata.buf), partdata.size, &md5raw)){ + S3FS_PRN_ERR("Could not make md5 for file(part %d)", part_num); + return -EIO; + } + }else if(!s3fs_md5_fd(partdata.fd, partdata.startpos, partdata.size, &md5raw)){ + S3FS_PRN_ERR("Could not make md5 for file(part %d)", part_num); + return -EIO; + } + partdata.etag = s3fs_hex_lower(md5raw.data(), md5raw.size()); + std::string md5base64 = s3fs_base64(md5raw.data(), md5raw.size()); + requestHeaders = curl_slist_sort_insert(requestHeaders, "Content-MD5", md5base64.c_str()); + } + + // make request + // + // [NOTE] + // Encode the upload_id here. + // In compatible S3 servers(Cloudflare, etc), there are cases where characters that require URL encoding are included. + // + query_string = "partNumber=" + std::to_string(part_num) + "&uploadId=" + urlEncodeGeneral(upload_id); + std::string urlargs = "?" + query_string; + std::string resource; + std::string turl; + MakeUrlResource(get_realpath(tpath).c_str(), resource, turl); + + turl += urlargs; + url = prepare_url(turl.c_str()); + path = get_realpath(tpath); + bodydata.clear(); + headdata.clear(); + responseHeaders.clear(); + + // SSE-C + if(sse_type_t::SSE_C == S3fsCurl::GetSseType()){ + std::string ssevalue; + if(!AddSseRequestHead(S3fsCurl::GetSseType(), ssevalue, false)){ + S3FS_PRN_WARN("Failed to set SSE header, but continue..."); + } + } + + requestHeaders = curl_slist_sort_insert(requestHeaders, "Accept", nullptr); + + op = "PUT"; + type = REQTYPE::UPLOADMULTIPOST; + + // set lazy function + fpLazySetup = UploadMultipartPostSetCurlOpts; + + return 0; +} + +int S3fsCurl::UploadMultipartPostRequest(const char* tpath, int part_num, const std::string& upload_id) +{ + int result; + + S3FS_PRN_INFO3("[tpath=%s][start=%lld][size=%lld][part=%d]", SAFESTRPTR(tpath), static_cast(partdata.startpos), static_cast(partdata.size), part_num); + + // setup + if(0 != (result = S3fsCurl::UploadMultipartPostSetup(tpath, part_num, upload_id))){ + return result; + } + + if(!fpLazySetup || !fpLazySetup(this)){ + S3FS_PRN_ERR("Failed to lazy setup in multipart upload post request."); + return -EIO; + } + + // request + if(0 == (result = RequestPerform())){ + // UploadMultipartPostComplete returns true on success -> convert to 0 + result = !UploadMultipartPostComplete(); + } + + // closing + bodydata.clear(); + headdata.clear(); + + return result; +} + +int S3fsCurl::CopyMultipartPostSetup(const char* from, const char* to, int part_num, const std::string& upload_id, headers_t& meta) +{ + S3FS_PRN_INFO3("[from=%s][to=%s][part=%d]", SAFESTRPTR(from), SAFESTRPTR(to), part_num); + + if(!from || !to){ + return -EINVAL; + } + // [NOTE] + // Encode the upload_id here. + // In compatible S3 servers(Cloudflare, etc), there are cases where characters that require URL encoding are included. + // + query_string = "partNumber=" + std::to_string(part_num) + "&uploadId=" + urlEncodeGeneral(upload_id); + std::string urlargs = "?" + query_string; + std::string resource; + std::string turl; + MakeUrlResource(get_realpath(to).c_str(), resource, turl); + + turl += urlargs; + url = prepare_url(turl.c_str()); + path = get_realpath(to); + requestHeaders = nullptr; + responseHeaders.clear(); + bodydata.clear(); + headdata.clear(); + + std::string contype = S3fsCurl::LookupMimeType(to); + requestHeaders = curl_slist_sort_insert(requestHeaders, "Content-Type", contype.c_str()); + + // Make request headers + for(headers_t::iterator iter = meta.begin(); iter != meta.end(); ++iter){ + std::string key = lower(iter->first); + std::string value = iter->second; + if(key == "x-amz-copy-source"){ + requestHeaders = curl_slist_sort_insert(requestHeaders, iter->first.c_str(), value.c_str()); + }else if(key == "x-amz-copy-source-range"){ + requestHeaders = curl_slist_sort_insert(requestHeaders, iter->first.c_str(), value.c_str()); + }else if(key == "x-amz-server-side-encryption" && value != "aws:kms"){ + // skip this header + }else if(key == "x-amz-server-side-encryption-aws-kms-key-id"){ + // skip this header + }else if(key == "x-amz-server-side-encryption-customer-key-md5"){ + if(!AddSseRequestHead(sse_type_t::SSE_C, value, true)){ + S3FS_PRN_WARN("Failed to insert SSE-C header."); + } + } + } + // SSE-C + if(sse_type_t::SSE_C == S3fsCurl::GetSseType()){ + std::string ssevalue; + if(!AddSseRequestHead(S3fsCurl::GetSseType(), ssevalue, false)){ + S3FS_PRN_WARN("Failed to set SSE header, but continue..."); + } + } + + op = "PUT"; + type = REQTYPE::COPYMULTIPOST; + + // set lazy function + fpLazySetup = CopyMultipartPostSetCurlOpts; + + // request + S3FS_PRN_INFO3("copying... [from=%s][to=%s][part=%d]", from, to, part_num); + + return 0; +} + +bool S3fsCurl::UploadMultipartPostComplete() +{ + headers_t::iterator it = responseHeaders.find("ETag"); + if (it == responseHeaders.end()) { + return false; + } + std::string etag = peeloff(it->second); + + // check etag(md5); + // + // The ETAG when using SSE_C and SSE_KMS does not reflect the MD5 we sent + // SSE_C: https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html + // SSE_KMS is ignored in the above, but in the following it states the same in the highlights: + // https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html + // + if(S3fsCurl::is_content_md5 && sse_type_t::SSE_C != S3fsCurl::GetSseType() && sse_type_t::SSE_KMS != S3fsCurl::GetSseType()){ + if(!etag_equals(etag, partdata.etag)){ + return false; + } + } + partdata.petag->etag = etag; + partdata.uploaded = true; + + return true; +} + +// cppcheck-suppress unmatchedSuppression +// cppcheck-suppress constParameter +// cppcheck-suppress constParameterCallback +bool S3fsCurl::CopyMultipartPostCallback(S3fsCurl* s3fscurl, void* param) +{ + if(!s3fscurl || param){ // this callback does not need a parameter + return false; + } + + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + return s3fscurl->CopyMultipartPostComplete(); +} + +bool S3fsCurl::CopyMultipartPostComplete() +{ + std::string etag; + partdata.uploaded = simple_parse_xml(bodydata.c_str(), bodydata.size(), "ETag", etag); + partdata.petag->etag = peeloff(etag); + + bodydata.clear(); + headdata.clear(); + + return true; +} + +bool S3fsCurl::MixMultipartPostComplete() +{ + bool result; + if(-1 == partdata.fd){ + result = CopyMultipartPostComplete(); + }else{ + result = UploadMultipartPostComplete(); + } + return result; +} + +int S3fsCurl::MultipartHeadRequest(const char* tpath, off_t size, headers_t& meta, bool is_copy) +{ + int result; + std::string upload_id; + off_t chunk; + off_t bytes_remaining; + etaglist_t list; + + S3FS_PRN_INFO3("[tpath=%s]", SAFESTRPTR(tpath)); + + if(0 != (result = PreMultipartPostRequest(tpath, meta, upload_id, is_copy))){ + return result; + } + DestroyCurlHandle(); + + // Initialize S3fsMultiCurl + S3fsMultiCurl curlmulti(GetMaxParallelCount()); + curlmulti.SetSuccessCallback(S3fsCurl::CopyMultipartPostCallback); + curlmulti.SetRetryCallback(S3fsCurl::CopyMultipartPostRetryCallback); + + for(bytes_remaining = size, chunk = 0; 0 < bytes_remaining; bytes_remaining -= chunk){ + chunk = bytes_remaining > GetMultipartCopySize() ? GetMultipartCopySize() : bytes_remaining; + + std::ostringstream strrange; + strrange << "bytes=" << (size - bytes_remaining) << "-" << (size - bytes_remaining + chunk - 1); + meta["x-amz-copy-source-range"] = strrange.str(); + + // s3fscurl sub object + std::unique_ptr s3fscurl_para(new S3fsCurl(true)); + s3fscurl_para->b_from = SAFESTRPTR(tpath); + s3fscurl_para->b_meta = meta; + s3fscurl_para->partdata.add_etag_list(list); + + // initiate upload part for parallel + if(0 != (result = s3fscurl_para->CopyMultipartPostSetup(tpath, tpath, s3fscurl_para->partdata.get_part_number(), upload_id, meta))){ + S3FS_PRN_ERR("failed uploading part setup(%d)", result); + return result; + } + + // set into parallel object + if(!curlmulti.SetS3fsCurlObject(std::move(s3fscurl_para))){ + S3FS_PRN_ERR("Could not make curl object into multi curl(%s).", tpath); + return -EIO; + } + } + + // Multi request + if(0 != (result = curlmulti.Request())){ + S3FS_PRN_ERR("error occurred in multi request(errno=%d).", result); + + S3fsCurl s3fscurl_abort(true); + int result2 = s3fscurl_abort.AbortMultipartUpload(tpath, upload_id); + s3fscurl_abort.DestroyCurlHandle(); + if(result2 != 0){ + S3FS_PRN_ERR("error aborting multipart upload(errno=%d).", result2); + } + return result; + } + + if(0 != (result = CompleteMultipartPostRequest(tpath, upload_id, list))){ + return result; + } + return 0; +} + +int S3fsCurl::MultipartUploadRequest(const std::string& upload_id, const char* tpath, int fd, off_t offset, off_t size, etagpair* petagpair) +{ + S3FS_PRN_INFO3("[upload_id=%s][tpath=%s][fd=%d][offset=%lld][size=%lld]", upload_id.c_str(), SAFESTRPTR(tpath), fd, static_cast(offset), static_cast(size)); + + // set + partdata.fd = fd; + partdata.startpos = offset; + partdata.size = size; + b_partdata_startpos = partdata.startpos; + b_partdata_size = partdata.size; + partdata.set_etag(petagpair); + + // upload part + int result; + if(0 != (result = UploadMultipartPostRequest(tpath, petagpair->part_num, upload_id))){ + S3FS_PRN_ERR("failed uploading %d part by error(%d)", petagpair->part_num, result); + return result; + } + DestroyCurlHandle(); + + return 0; +} + +int S3fsCurl::MultipartRenameRequest(const char* from, const char* to, headers_t& meta, off_t size) +{ + int result; + std::string upload_id; + off_t chunk; + off_t bytes_remaining; + etaglist_t list; + + S3FS_PRN_INFO3("[from=%s][to=%s]", SAFESTRPTR(from), SAFESTRPTR(to)); + + std::string srcresource; + std::string srcurl; + MakeUrlResource(get_realpath(from).c_str(), srcresource, srcurl); + + meta["Content-Type"] = S3fsCurl::LookupMimeType(to); + meta["x-amz-copy-source"] = srcresource; + + if(0 != (result = PreMultipartPostRequest(to, meta, upload_id, true))){ + return result; + } + DestroyCurlHandle(); + + // Initialize S3fsMultiCurl + S3fsMultiCurl curlmulti(GetMaxParallelCount()); + curlmulti.SetSuccessCallback(S3fsCurl::CopyMultipartPostCallback); + curlmulti.SetRetryCallback(S3fsCurl::CopyMultipartPostRetryCallback); + + for(bytes_remaining = size, chunk = 0; 0 < bytes_remaining; bytes_remaining -= chunk){ + chunk = bytes_remaining > GetMultipartCopySize() ? GetMultipartCopySize() : bytes_remaining; + + std::ostringstream strrange; + strrange << "bytes=" << (size - bytes_remaining) << "-" << (size - bytes_remaining + chunk - 1); + meta["x-amz-copy-source-range"] = strrange.str(); + + // s3fscurl sub object + std::unique_ptr s3fscurl_para(new S3fsCurl(true)); + s3fscurl_para->b_from = SAFESTRPTR(from); + s3fscurl_para->b_meta = meta; + s3fscurl_para->partdata.add_etag_list(list); + + // initiate upload part for parallel + if(0 != (result = s3fscurl_para->CopyMultipartPostSetup(from, to, s3fscurl_para->partdata.get_part_number(), upload_id, meta))){ + S3FS_PRN_ERR("failed uploading part setup(%d)", result); + return result; + } + + // set into parallel object + if(!curlmulti.SetS3fsCurlObject(std::move(s3fscurl_para))){ + S3FS_PRN_ERR("Could not make curl object into multi curl(%s).", to); + return -EIO; + } + } + + // Multi request + if(0 != (result = curlmulti.Request())){ + S3FS_PRN_ERR("error occurred in multi request(errno=%d).", result); + + S3fsCurl s3fscurl_abort(true); + int result2 = s3fscurl_abort.AbortMultipartUpload(to, upload_id); + s3fscurl_abort.DestroyCurlHandle(); + if(result2 != 0){ + S3FS_PRN_ERR("error aborting multipart upload(errno=%d).", result2); + } + return result; + } + + if(0 != (result = CompleteMultipartPostRequest(to, upload_id, list))){ + return result; + } + return 0; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/curl.h b/s3fs/curl.h new file mode 100644 index 0000000..06d8c42 --- /dev/null +++ b/s3fs/curl.h @@ -0,0 +1,418 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_CURL_H_ +#define S3FS_CURL_H_ + +#include +#include +#include +#include + +#include "autolock.h" +#include "metaheader.h" +#include "fdcache_page.h" + +//---------------------------------------------- +// Avoid dependency on libcurl version +//---------------------------------------------- +// [NOTE] +// The following symbols (enum) depend on the version of libcurl. +// CURLOPT_TCP_KEEPALIVE 7.25.0 and later +// CURLOPT_SSL_ENABLE_ALPN 7.36.0 and later +// CURLOPT_KEEP_SENDING_ON_ERROR 7.51.0 and later +// +// s3fs uses these, if you build s3fs with the old libcurl, +// substitute the following symbols to avoid errors. +// If the version of libcurl linked at runtime is old, +// curl_easy_setopt results in an error(CURLE_UNKNOWN_OPTION) and +// a message is output. +// +#if defined(HAVE_CURLOPT_TCP_KEEPALIVE) && (HAVE_CURLOPT_TCP_KEEPALIVE == 1) + #define S3FS_CURLOPT_TCP_KEEPALIVE CURLOPT_TCP_KEEPALIVE +#else + #define S3FS_CURLOPT_TCP_KEEPALIVE static_cast(213) +#endif + +#if defined(HAVE_CURLOPT_SSL_ENABLE_ALPN) && (HAVE_CURLOPT_SSL_ENABLE_ALPN == 1) + #define S3FS_CURLOPT_SSL_ENABLE_ALPN CURLOPT_SSL_ENABLE_ALPN +#else + #define S3FS_CURLOPT_SSL_ENABLE_ALPN static_cast(226) +#endif + +#if defined(HAVE_CURLOPT_KEEP_SENDING_ON_ERROR) && (HAVE_CURLOPT_KEEP_SENDING_ON_ERROR == 1) + #define S3FS_CURLOPT_KEEP_SENDING_ON_ERROR CURLOPT_KEEP_SENDING_ON_ERROR +#else + #define S3FS_CURLOPT_KEEP_SENDING_ON_ERROR static_cast(245) +#endif + +//---------------------------------------------- +// Structure / Typedefs +//---------------------------------------------- +typedef std::pair progress_t; +typedef std::map curltime_t; +typedef std::map curlprogress_t; + +//---------------------------------------------- +// class S3fsCurl +//---------------------------------------------- +class CurlHandlerPool; +class S3fsCred; +class S3fsCurl; +class Semaphore; + +// Prototype function for lazy setup options for curl handle +typedef bool (*s3fscurl_lazy_setup)(S3fsCurl* s3fscurl); + +typedef std::map sseckeymap_t; +typedef std::vector sseckeylist_t; + +// Class for lapping curl +// +class S3fsCurl +{ + friend class S3fsMultiCurl; + + private: + enum class REQTYPE { + UNSET = -1, + DELETE = 0, + HEAD, + PUTHEAD, + PUT, + GET, + CHKBUCKET, + LISTBUCKET, + PREMULTIPOST, + COMPLETEMULTIPOST, + UPLOADMULTIPOST, + COPYMULTIPOST, + MULTILIST, + IAMCRED, + ABORTMULTIUPLOAD, + IAMROLE + }; + + // class variables + static pthread_mutex_t curl_warnings_lock; + static bool curl_warnings_once; // emit older curl warnings only once + static pthread_mutex_t curl_handles_lock; + static struct callback_locks_t { + pthread_mutex_t dns; + pthread_mutex_t ssl_session; + } callback_locks; + static bool is_initglobal_done; + static CurlHandlerPool* sCurlPool; + static int sCurlPoolSize; + static CURLSH* hCurlShare; + static bool is_cert_check; + static bool is_dns_cache; + static bool is_ssl_session_cache; + static long connect_timeout; + static time_t readwrite_timeout; + static int retries; + static bool is_public_bucket; + static acl_t default_acl; + static std::string storage_class; + static sseckeylist_t sseckeys; + static std::string ssekmsid; + static sse_type_t ssetype; + static bool is_content_md5; + static bool is_verbose; + static bool is_dump_body; + static S3fsCred* ps3fscred; + static long ssl_verify_hostname; + static curltime_t curl_times; + static curlprogress_t curl_progress; + static std::string curl_ca_bundle; + static mimes_t mimeTypes; + static std::string userAgent; + static int max_parallel_cnt; + static int max_multireq; + static off_t multipart_size; + static off_t multipart_copy_size; + static signature_type_t signature_type; + static bool is_unsigned_payload; + static bool is_ua; // User-Agent + static bool listobjectsv2; + static bool requester_pays; + static std::string proxy_url; + static bool proxy_http; + static std::string proxy_userpwd; // load from file(:) + + // variables + CURL* hCurl; + REQTYPE type; // type of request + std::string path; // target object path + std::string base_path; // base path (for multi curl head request) + std::string saved_path; // saved path = cache key (for multi curl head request) + std::string url; // target object path(url) + struct curl_slist* requestHeaders; + headers_t responseHeaders; // header data by HeaderCallback + std::string bodydata; // body data by WriteMemoryCallback + std::string headdata; // header data by WriteMemoryCallback + long LastResponseCode; + const unsigned char* postdata; // use by post method and read callback function. + off_t postdata_remaining; // use by post method and read callback function. + filepart partdata; // use by multipart upload/get object callback + bool is_use_ahbe; // additional header by extension + int retry_count; // retry count for multipart + FILE* b_infile; // backup for retrying + const unsigned char* b_postdata; // backup for retrying + off_t b_postdata_remaining; // backup for retrying + off_t b_partdata_startpos; // backup for retrying + off_t b_partdata_size; // backup for retrying + char* b_partdata_buf; // backup for retrying + size_t b_ssekey_pos; // backup for retrying + std::string b_ssevalue; // backup for retrying + sse_type_t b_ssetype; // backup for retrying + std::string b_from; // backup for retrying(for copy request) + headers_t b_meta; // backup for retrying(for copy request) + std::string op; // the HTTP verb of the request ("PUT", "GET", etc.) + std::string query_string; // request query string + Semaphore *sem; + pthread_mutex_t *completed_tids_lock; + std::vector *completed_tids; + s3fscurl_lazy_setup fpLazySetup; // curl options for lazy setting function + CURLcode curlCode; // handle curl return + std::string sha256; // sha256 + + public: + static constexpr long S3FSCURL_RESPONSECODE_NOTSET = -1; + static constexpr long S3FSCURL_RESPONSECODE_FATAL_ERROR = -2; + static constexpr int S3FSCURL_PERFORM_RESULT_NOTSET = 1; + + public: + // constructor/destructor + explicit S3fsCurl(bool ahbe = false); + ~S3fsCurl(); + S3fsCurl(const S3fsCurl&) = delete; + S3fsCurl(S3fsCurl&&) = delete; + S3fsCurl& operator=(const S3fsCurl&) = delete; + S3fsCurl& operator=(S3fsCurl&&) = delete; + + private: + // class methods + static bool InitGlobalCurl(); + static bool DestroyGlobalCurl(); + static bool InitShareCurl(); + static bool DestroyShareCurl(); + static void LockCurlShare(CURL* handle, curl_lock_data nLockData, curl_lock_access laccess, void* useptr); + static void UnlockCurlShare(CURL* handle, curl_lock_data nLockData, void* useptr); + static bool InitCryptMutex(); + static bool DestroyCryptMutex(); + static int CurlProgress(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow); + + static bool LocateBundle(); + static size_t HeaderCallback(void *data, size_t blockSize, size_t numBlocks, void *userPtr); + static size_t WriteMemoryCallback(void *ptr, size_t blockSize, size_t numBlocks, void *data); + static size_t ReadCallback(void *ptr, size_t size, size_t nmemb, void *userp); + static size_t UploadReadCallback(void *ptr, size_t size, size_t nmemb, void *userp); + static size_t DownloadWriteCallback(void* ptr, size_t size, size_t nmemb, void* userp); + + struct drp_upload_ctx { + std::string path; + char* data; + size_t pos; + size_t len; + drp_upload_ctx(std::string path_t, char* data_t, size_t pos_t, size_t len_t) : + path(path_t), data(data_t), pos(pos_t), len(len_t) {} + }; + static size_t UploadReadCallbackByMemory(void *ptr, size_t size, size_t nmemb, void *stream); + + static bool UploadMultipartPostCallback(S3fsCurl* s3fscurl, void* param); + static bool CopyMultipartPostCallback(S3fsCurl* s3fscurl, void* param); + static bool MixMultipartPostCallback(S3fsCurl* s3fscurl, void* param); + static std::unique_ptr UploadMultipartPostRetryCallback(S3fsCurl* s3fscurl); + static std::unique_ptr CopyMultipartPostRetryCallback(S3fsCurl* s3fscurl); + static std::unique_ptr MixMultipartPostRetryCallback(S3fsCurl* s3fscurl); + static std::unique_ptr ParallelGetObjectRetryCallback(S3fsCurl* s3fscurl); + + // lazy functions for set curl options + static bool CopyMultipartPostSetCurlOpts(S3fsCurl* s3fscurl); + static bool PreGetObjectRequestSetCurlOpts(S3fsCurl* s3fscurl); + static bool PreHeadRequestSetCurlOpts(S3fsCurl* s3fscurl); + + static bool LoadEnvSseCKeys(); + static bool LoadEnvSseKmsid(); + static bool PushbackSseKeys(const std::string& onekey); + static bool AddUserAgent(CURL* hCurl); + + static int CurlDebugFunc(const CURL* hcurl, curl_infotype type, char* data, size_t size, void* userptr); + static int CurlDebugBodyInFunc(const CURL* hcurl, curl_infotype type, char* data, size_t size, void* userptr); + static int CurlDebugBodyOutFunc(const CURL* hcurl, curl_infotype type, char* data, size_t size, void* userptr); + static int RawCurlDebugFunc(const CURL* hcurl, curl_infotype type, char* data, size_t size, void* userptr, curl_infotype datatype); + + // methods + bool ResetHandle(AutoLock::Type locktype = AutoLock::NONE); + bool RemakeHandle(); + bool ClearInternalData(); + void insertV4Headers(const std::string& access_key_id, const std::string& secret_access_key, const std::string& access_token); + void insertV2Headers(const std::string& access_key_id, const std::string& secret_access_key, const std::string& access_token); + void insertIBMIAMHeaders(const std::string& access_key_id, const std::string& access_token); + void insertAuthHeaders(); + bool AddSseRequestHead(sse_type_t ssetype, const std::string& ssevalue, bool is_copy); + std::string CalcSignatureV2(const std::string& method, const std::string& strMD5, const std::string& content_type, const std::string& date, const std::string& resource, const std::string& secret_access_key, const std::string& access_token); + std::string CalcSignature(const std::string& method, const std::string& canonical_uri, const std::string& query_string, const std::string& strdate, const std::string& payload_hash, const std::string& date8601, const std::string& secret_access_key, const std::string& access_token); + int UploadMultipartPostSetup(const char* tpath, int part_num, const std::string& upload_id); + int CopyMultipartPostSetup(const char* from, const char* to, int part_num, const std::string& upload_id, headers_t& meta); + bool UploadMultipartPostComplete(); + bool CopyMultipartPostComplete(); + int MapPutErrorResponse(int result); + + public: + // class methods + static bool InitS3fsCurl(); + static bool InitCredentialObject(S3fsCred* pcredobj); + static bool InitMimeType(const std::string& strFile); + static bool DestroyS3fsCurl(); + static std::unique_ptr CreateParallelS3fsCurl(const char* tpath, int fd, off_t start, off_t size, int part_num, bool is_copy, etagpair* petag, const std::string& upload_id, int& result); + static int ParallelMultipartUploadRequest(const char* tpath, headers_t& meta, int fd, off_t fsize = -1, char* buf = nullptr); + static int ParallelMixMultipartUploadRequest(const char* tpath, headers_t& meta, int fd, const fdpage_list_t& mixuppages); + static int ParallelGetObjectRequest(const char* tpath, int fd, off_t start, off_t size, char* buf = nullptr); + + // lazy functions for set curl options(public) + static bool UploadMultipartPostSetCurlOpts(S3fsCurl* s3fscurl); + + // class methods(variables) + static std::string LookupMimeType(const std::string& name); + static bool SetCheckCertificate(bool isCertCheck); + static bool SetDnsCache(bool isCache); + static bool SetSslSessionCache(bool isCache); + static long SetConnectTimeout(long timeout); + static time_t SetReadwriteTimeout(time_t timeout); + static time_t GetReadwriteTimeout() { return S3fsCurl::readwrite_timeout; } + static int SetRetries(int count); + static bool SetPublicBucket(bool flag); + static bool IsPublicBucket() { return S3fsCurl::is_public_bucket; } + static acl_t SetDefaultAcl(acl_t acl); + static acl_t GetDefaultAcl(); + static std::string SetStorageClass(const std::string& storage_class); + static std::string GetStorageClass() { return S3fsCurl::storage_class; } + static bool LoadEnvSse() { return (S3fsCurl::LoadEnvSseCKeys() && S3fsCurl::LoadEnvSseKmsid()); } + static sse_type_t SetSseType(sse_type_t type); + static sse_type_t GetSseType() { return S3fsCurl::ssetype; } + static bool IsSseDisable() { return (sse_type_t::SSE_DISABLE == S3fsCurl::ssetype); } + static bool IsSseS3Type() { return (sse_type_t::SSE_S3 == S3fsCurl::ssetype); } + static bool IsSseCType() { return (sse_type_t::SSE_C == S3fsCurl::ssetype); } + static bool IsSseKmsType() { return (sse_type_t::SSE_KMS == S3fsCurl::ssetype); } + static bool FinalCheckSse(); + static bool SetSseCKeys(const char* filepath); + static bool SetSseKmsid(const char* kmsid); + static bool IsSetSseKmsId() { return !S3fsCurl::ssekmsid.empty(); } + static const char* GetSseKmsId() { return S3fsCurl::ssekmsid.c_str(); } + static bool GetSseKey(std::string& md5, std::string& ssekey); + static bool GetSseKeyMd5(size_t pos, std::string& md5); + static size_t GetSseKeyCount(); + static bool SetContentMd5(bool flag); + static bool SetVerbose(bool flag); + static bool GetVerbose() { return S3fsCurl::is_verbose; } + static bool SetDumpBody(bool flag); + static bool IsDumpBody() { return S3fsCurl::is_dump_body; } + static long SetSslVerifyHostname(long value); + static long GetSslVerifyHostname() { return S3fsCurl::ssl_verify_hostname; } + static void ResetOffset(S3fsCurl* pCurl); + // maximum parallel GET and PUT requests + static int SetMaxParallelCount(int value); + static int GetMaxParallelCount() { return S3fsCurl::max_parallel_cnt; } + // maximum parallel HEAD requests + static int SetMaxMultiRequest(int max); + static int GetMaxMultiRequest() { return S3fsCurl::max_multireq; } + static bool SetMultipartSize(off_t size); + static off_t GetMultipartSize() { return S3fsCurl::multipart_size; } + static bool SetMultipartCopySize(off_t size); + static off_t GetMultipartCopySize() { return S3fsCurl::multipart_copy_size; } + static signature_type_t SetSignatureType(signature_type_t signature_type) { signature_type_t bresult = S3fsCurl::signature_type; S3fsCurl::signature_type = signature_type; return bresult; } + static signature_type_t GetSignatureType() { return S3fsCurl::signature_type; } + static bool SetUnsignedPayload(bool issset) { bool bresult = S3fsCurl::is_unsigned_payload; S3fsCurl::is_unsigned_payload = issset; return bresult; } + static bool GetUnsignedPayload() { return S3fsCurl::is_unsigned_payload; } + static bool SetUserAgentFlag(bool isset) { bool bresult = S3fsCurl::is_ua; S3fsCurl::is_ua = isset; return bresult; } + static bool IsUserAgentFlag() { return S3fsCurl::is_ua; } + static void InitUserAgent(); + static bool SetListObjectsV2(bool isset) { bool bresult = S3fsCurl::listobjectsv2; S3fsCurl::listobjectsv2 = isset; return bresult; } + static bool IsListObjectsV2() { return S3fsCurl::listobjectsv2; } + static bool SetRequesterPays(bool flag) { bool old_flag = S3fsCurl::requester_pays; S3fsCurl::requester_pays = flag; return old_flag; } + static bool IsRequesterPays() { return S3fsCurl::requester_pays; } + static bool SetProxy(const char* url); + static bool SetProxyUserPwd(const char* userpwd); + + // methods + bool CreateCurlHandle(bool only_pool = false, bool remake = false); + bool DestroyCurlHandle(bool restore_pool = true, bool clear_internal_data = true, AutoLock::Type locktype = AutoLock::NONE); + + bool GetIAMCredentials(const char* cred_url, const char* iam_v2_token, const char* ibm_secret_access_key, std::string& response); + bool GetIAMRoleFromMetaData(const char* cred_url, const char* iam_v2_token, std::string& token); + bool GetResponseCode(long& responseCode, bool from_curl_handle = true) const; + int RequestPerform(bool dontAddAuthHeaders=false); + int DeleteRequest(const char* tpath); + int GetIAMv2ApiToken(const char* token_url, int token_ttl, const char* token_ttl_hdr, std::string& response); + bool PreHeadRequest(const char* tpath, const char* bpath = nullptr, const char* savedpath = nullptr, size_t ssekey_pos = -1); + bool PreHeadRequest(const std::string& tpath, const std::string& bpath, const std::string& savedpath, size_t ssekey_pos = -1) { + return PreHeadRequest(tpath.c_str(), bpath.c_str(), savedpath.c_str(), ssekey_pos); + } + int HeadRequest(const char* tpath, headers_t& meta); + int PutHeadRequest(const char* tpath, headers_t& meta, bool is_copy); + int PutRequest(const char* tpath, headers_t& meta, int fd, off_t fsize = -1, char* buf = nullptr); + int PreGetObjectRequest(const char* tpath, int fd, off_t start, off_t size, sse_type_t ssetype, const std::string& ssevalue, char* buf = nullptr); + int GetObjectRequest(const char* tpath, int fd, off_t start = -1, off_t size = -1, char* buf = nullptr); + int CheckBucket(const char* check_path, bool compat_dir, bool force_no_sse); + int ListBucketRequest(const char* tpath, const char* query); + int PreMultipartPostRequest(const char* tpath, headers_t& meta, std::string& upload_id, bool is_copy); + int CompleteMultipartPostRequest(const char* tpath, const std::string& upload_id, etaglist_t& parts); + int UploadMultipartPostRequest(const char* tpath, int part_num, const std::string& upload_id); + bool MixMultipartPostComplete(); + int MultipartListRequest(std::string& body); + int AbortMultipartUpload(const char* tpath, const std::string& upload_id); + int MultipartHeadRequest(const char* tpath, off_t size, headers_t& meta, bool is_copy); + int MultipartUploadRequest(const std::string& upload_id, const char* tpath, int fd, off_t offset, off_t size, etagpair* petagpair); + int MultipartRenameRequest(const char* from, const char* to, headers_t& meta, off_t size); + + // methods(variables) + CURL* GetCurlHandle() const { return hCurl; } + std::string GetPath() const { return path; } + std::string GetBasePath() const { return base_path; } + std::string GetSpecialSavedPath() const { return saved_path; } + std::string GetUrl() const { return url; } + std::string GetOp() const { return op; } + const headers_t* GetResponseHeaders() const { return &responseHeaders; } + const std::string* GetBodyData() const { return &bodydata; } + const std::string* GetHeadData() const { return &headdata; } + CURLcode GetCurlCode() const { return curlCode; } + long GetLastResponseCode() const { return LastResponseCode; } + bool SetUseAhbe(bool ahbe); + bool EnableUseAhbe() { return SetUseAhbe(true); } + bool DisableUseAhbe() { return SetUseAhbe(false); } + bool IsUseAhbe() const { return is_use_ahbe; } + int GetMultipartRetryCount() const { return retry_count; } + void SetMultipartRetryCount(int retrycnt) { retry_count = retrycnt; } + bool IsOverMultipartRetryCount() const { return (retry_count >= S3fsCurl::retries); } + size_t GetLastPreHeadSeecKeyPos() const { return b_ssekey_pos; } +}; + +#endif // S3FS_CURL_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/curl_handlerpool.cpp b/s3fs/curl_handlerpool.cpp new file mode 100644 index 0000000..8646f2f --- /dev/null +++ b/s3fs/curl_handlerpool.cpp @@ -0,0 +1,137 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include + +#include "s3fs_logger.h" +#include "curl_handlerpool.h" +#include "autolock.h" + +//------------------------------------------------------------------- +// Class CurlHandlerPool +//------------------------------------------------------------------- +bool CurlHandlerPool::Init() +{ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + if (0 != pthread_mutex_init(&mLock, &attr)) { + S3FS_PRN_ERR("Init curl handlers lock failed"); + return false; + } + + for(int cnt = 0; cnt < mMaxHandlers; ++cnt){ + CURL* hCurl = curl_easy_init(); + if(!hCurl){ + S3FS_PRN_ERR("Init curl handlers pool failed"); + Destroy(); + return false; + } + mPool.push_back(hCurl); + } + return true; +} + +bool CurlHandlerPool::Destroy() +{ + { + AutoLock lock(&mLock); + + while(!mPool.empty()){ + CURL* hCurl = mPool.back(); + mPool.pop_back(); + if(hCurl){ + curl_easy_cleanup(hCurl); + } + } + } + if (0 != pthread_mutex_destroy(&mLock)) { + S3FS_PRN_ERR("Destroy curl handlers lock failed"); + return false; + } + return true; +} + +CURL* CurlHandlerPool::GetHandler(bool only_pool) +{ + AutoLock lock(&mLock); + + CURL* hCurl = nullptr; + + if(!mPool.empty()){ + hCurl = mPool.back(); + mPool.pop_back(); + S3FS_PRN_DBG("Get handler from pool: rest = %d", static_cast(mPool.size())); + } + if(only_pool){ + return hCurl; + } + if(!hCurl){ + S3FS_PRN_INFO("Pool empty: force to create new handler"); + hCurl = curl_easy_init(); + } + return hCurl; +} + +void CurlHandlerPool::ReturnHandler(CURL* hCurl, bool restore_pool) +{ + if(!hCurl){ + return; + } + AutoLock lock(&mLock); + + if(restore_pool){ + S3FS_PRN_DBG("Return handler to pool"); + mPool.push_back(hCurl); + + while(mMaxHandlers < static_cast(mPool.size())){ + CURL* hOldCurl = mPool.front(); + mPool.pop_front(); + if(hOldCurl){ + S3FS_PRN_INFO("Pool full: destroy the oldest handler"); + curl_easy_cleanup(hOldCurl); + } + } + }else{ + S3FS_PRN_INFO("Pool full: destroy the handler"); + curl_easy_cleanup(hCurl); + } +} + +void CurlHandlerPool::ResetHandler(CURL* hCurl) +{ + if(!hCurl){ + return; + } + AutoLock lock(&mLock); + + curl_easy_reset(hCurl); +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/curl_handlerpool.h b/s3fs/curl_handlerpool.h new file mode 100644 index 0000000..a55c9b0 --- /dev/null +++ b/s3fs/curl_handlerpool.h @@ -0,0 +1,70 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_CURL_HANDLERPOOL_H_ +#define S3FS_CURL_HANDLERPOOL_H_ + +#include +#include +#include + +//---------------------------------------------- +// Typedefs +//---------------------------------------------- +typedef std::list hcurllist_t; + +//---------------------------------------------- +// class CurlHandlerPool +//---------------------------------------------- +class CurlHandlerPool +{ + public: + explicit CurlHandlerPool(int maxHandlers) : mMaxHandlers(maxHandlers) + { + assert(maxHandlers > 0); + } + CurlHandlerPool(const CurlHandlerPool&) = delete; + CurlHandlerPool(CurlHandlerPool&&) = delete; + CurlHandlerPool& operator=(const CurlHandlerPool&) = delete; + CurlHandlerPool& operator=(CurlHandlerPool&&) = delete; + + bool Init(); + bool Destroy(); + + CURL* GetHandler(bool only_pool); + void ReturnHandler(CURL* hCurl, bool restore_pool); + void ResetHandler(CURL* hCurl); + + private: + int mMaxHandlers; + pthread_mutex_t mLock; + hcurllist_t mPool; +}; + +#endif // S3FS_CURL_HANDLERPOOL_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/curl_multi.cpp b/s3fs/curl_multi.cpp new file mode 100644 index 0000000..8a761f6 --- /dev/null +++ b/s3fs/curl_multi.cpp @@ -0,0 +1,394 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include + +#include "s3fs.h" +#include "s3fs_logger.h" +#include "curl_multi.h" +#include "curl.h" +#include "autolock.h" +#include "psemaphore.h" + +//------------------------------------------------------------------- +// Class S3fsMultiCurl +//------------------------------------------------------------------- +S3fsMultiCurl::S3fsMultiCurl(int maxParallelism, bool not_abort) : maxParallelism(maxParallelism), not_abort(not_abort), SuccessCallback(nullptr), NotFoundCallback(nullptr), RetryCallback(nullptr), pSuccessCallbackParam(nullptr), pNotFoundCallbackParam(nullptr) +{ + int result; + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + if (0 != (result = pthread_mutex_init(&completed_tids_lock, &attr))) { + S3FS_PRN_ERR("could not initialize completed_tids_lock: %i", result); + abort(); + } +} + +S3fsMultiCurl::~S3fsMultiCurl() +{ + Clear(); + int result; + if(0 != (result = pthread_mutex_destroy(&completed_tids_lock))){ + S3FS_PRN_ERR("could not destroy completed_tids_lock: %i", result); + } +} + +bool S3fsMultiCurl::ClearEx(bool is_all) +{ + s3fscurllist_t::iterator iter; + for(iter = clist_req.begin(); iter != clist_req.end(); ++iter){ + S3fsCurl* s3fscurl = iter->get(); + if(s3fscurl){ + s3fscurl->DestroyCurlHandle(); + } + } + clist_req.clear(); + + if(is_all){ + for(iter = clist_all.begin(); iter != clist_all.end(); ++iter){ + S3fsCurl* s3fscurl = iter->get(); + s3fscurl->DestroyCurlHandle(); + } + clist_all.clear(); + } + + S3FS_MALLOCTRIM(0); + + return true; +} + +S3fsMultiSuccessCallback S3fsMultiCurl::SetSuccessCallback(S3fsMultiSuccessCallback function) +{ + S3fsMultiSuccessCallback old = SuccessCallback; + SuccessCallback = function; + return old; +} + +S3fsMultiNotFoundCallback S3fsMultiCurl::SetNotFoundCallback(S3fsMultiNotFoundCallback function) +{ + S3fsMultiNotFoundCallback old = NotFoundCallback; + NotFoundCallback = function; + return old; +} + +S3fsMultiRetryCallback S3fsMultiCurl::SetRetryCallback(S3fsMultiRetryCallback function) +{ + S3fsMultiRetryCallback old = RetryCallback; + RetryCallback = function; + return old; +} + +void* S3fsMultiCurl::SetSuccessCallbackParam(void* param) +{ + void* old = pSuccessCallbackParam; + pSuccessCallbackParam = param; + return old; +} + +void* S3fsMultiCurl::SetNotFoundCallbackParam(void* param) +{ + void* old = pNotFoundCallbackParam; + pNotFoundCallbackParam = param; + return old; +} + +bool S3fsMultiCurl::SetS3fsCurlObject(std::unique_ptr s3fscurl) +{ + if(!s3fscurl){ + return false; + } + clist_all.push_back(std::move(s3fscurl)); + + return true; +} + +int S3fsMultiCurl::MultiPerform() +{ + std::vector threads; + bool success = true; + bool isMultiHead = false; + Semaphore sem(GetMaxParallelism()); + int rc; + + for(s3fscurllist_t::iterator iter = clist_req.begin(); iter != clist_req.end(); ++iter) { + pthread_t thread; + S3fsCurl* s3fscurl = iter->get(); + if(!s3fscurl){ + continue; + } + + sem.wait(); + + { + AutoLock lock(&completed_tids_lock); + for(std::vector::iterator it = completed_tids.begin(); it != completed_tids.end(); ++it){ + void* retval; + + rc = pthread_join(*it, &retval); + if (rc) { + success = false; + S3FS_PRN_ERR("failed pthread_join - rc(%d) %s", rc, strerror(rc)); + } else { + long int_retval = reinterpret_cast(retval); + if (int_retval && !(int_retval == -ENOENT && isMultiHead)) { + S3FS_PRN_WARN("thread terminated with non-zero return code: %ld", int_retval); + } + } + } + completed_tids.clear(); + } + s3fscurl->sem = &sem; + s3fscurl->completed_tids_lock = &completed_tids_lock; + s3fscurl->completed_tids = &completed_tids; + + isMultiHead |= s3fscurl->GetOp() == "HEAD"; + + rc = pthread_create(&thread, nullptr, S3fsMultiCurl::RequestPerformWrapper, static_cast(s3fscurl)); + if (rc != 0) { + success = false; + S3FS_PRN_ERR("failed pthread_create - rc(%d)", rc); + break; + } + threads.push_back(thread); + } + + for(int i = 0; i < sem.get_value(); ++i){ + sem.wait(); + } + + AutoLock lock(&completed_tids_lock); + for (std::vector::iterator titer = completed_tids.begin(); titer != completed_tids.end(); ++titer) { + void* retval; + + rc = pthread_join(*titer, &retval); + if (rc) { + success = false; + S3FS_PRN_ERR("failed pthread_join - rc(%d)", rc); + } else { + long int_retval = reinterpret_cast(retval); + if (int_retval && !(int_retval == -ENOENT && isMultiHead)) { + S3FS_PRN_WARN("thread terminated with non-zero return code: %ld", int_retval); + } + } + } + completed_tids.clear(); + + return success ? 0 : -EIO; +} + +int S3fsMultiCurl::MultiRead() +{ + int result = 0; + + for(s3fscurllist_t::iterator iter = clist_req.begin(); iter != clist_req.end(); ){ + std::unique_ptr s3fscurl(std::move(*iter)); + + bool isRetry = false; + bool isPostpone = false; + bool isNeedResetOffset = true; + long responseCode = S3fsCurl::S3FSCURL_RESPONSECODE_NOTSET; + CURLcode curlCode = s3fscurl->GetCurlCode(); + + if(s3fscurl->GetResponseCode(responseCode, false) && curlCode == CURLE_OK){ + if(S3fsCurl::S3FSCURL_RESPONSECODE_NOTSET == responseCode){ + // This is a case where the processing result has not yet been updated (should be very rare). + isPostpone = true; + }else if(400 > responseCode){ + // add into stat cache + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownPointerToBool + if(SuccessCallback && !SuccessCallback(s3fscurl.get(), pSuccessCallbackParam)){ + S3FS_PRN_WARN("error from success callback function(%s).", s3fscurl->url.c_str()); + } + }else if(400 == responseCode){ + // as possibly in multipart + S3FS_PRN_WARN("failed a request(%ld: %s)", responseCode, s3fscurl->url.c_str()); + isRetry = true; + }else if(404 == responseCode){ + // not found + // HEAD requests on readdir_multi_head can return 404 + if(s3fscurl->GetOp() != "HEAD"){ + S3FS_PRN_WARN("failed a request(%ld: %s)", responseCode, s3fscurl->url.c_str()); + } + // Call callback function + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownPointerToBool + if(NotFoundCallback && !NotFoundCallback(s3fscurl.get(), pNotFoundCallbackParam)){ + S3FS_PRN_WARN("error from not found callback function(%s).", s3fscurl->url.c_str()); + } + }else if(500 == responseCode){ + // case of all other result, do retry.(11/13/2013) + // because it was found that s3fs got 500 error from S3, but could success + // to retry it. + S3FS_PRN_WARN("failed a request(%ld: %s)", responseCode, s3fscurl->url.c_str()); + isRetry = true; + }else{ + // Retry in other case. + S3FS_PRN_WARN("failed a request(%ld: %s)", responseCode, s3fscurl->url.c_str()); + isRetry = true; + } + }else{ + S3FS_PRN_ERR("failed a request(Unknown response code: %s)", s3fscurl->url.c_str()); + // Reuse partical file + switch(curlCode){ + case CURLE_OPERATION_TIMEDOUT: + isRetry = true; + isNeedResetOffset = false; + break; + + case CURLE_PARTIAL_FILE: + isRetry = true; + isNeedResetOffset = false; + break; + + default: + S3FS_PRN_ERR("###curlCode: %d msg: %s", curlCode, curl_easy_strerror(curlCode)); + isRetry = true; + break; + } + } + + if(isPostpone){ + clist_req.erase(iter); + clist_req.push_back(std::move(s3fscurl)); // Re-evaluate at the end + iter = clist_req.begin(); + }else{ + if(!isRetry || (!not_abort && 0 != result)){ + // If an EIO error has already occurred, it will be terminated + // immediately even if retry processing is required. + s3fscurl->DestroyCurlHandle(); + }else{ + // Reset offset + if(isNeedResetOffset){ + S3fsCurl::ResetOffset(s3fscurl.get()); + } + + // For retry + std::unique_ptr retrycurl; + const S3fsCurl* retrycurl_ptr = retrycurl.get(); // save this due to std::move below + if(RetryCallback){ + retrycurl = RetryCallback(s3fscurl.get()); + if(nullptr != retrycurl){ + clist_all.push_back(std::move(retrycurl)); + }else{ + // set EIO and wait for other parts. + result = -EIO; + } + } + // cppcheck-suppress mismatchingContainers + if(s3fscurl.get() != retrycurl_ptr){ + s3fscurl->DestroyCurlHandle(); + } + } + iter = clist_req.erase(iter); + } + } + clist_req.clear(); + + if(!not_abort && 0 != result){ + // If an EIO error has already occurred, clear all retry objects. + for(s3fscurllist_t::iterator iter = clist_all.begin(); iter != clist_all.end(); ++iter){ + S3fsCurl* s3fscurl = iter->get(); + s3fscurl->DestroyCurlHandle(); + } + clist_all.clear(); + } + return result; +} + +int S3fsMultiCurl::Request() +{ + S3FS_PRN_INFO3("[count=%zu]", clist_all.size()); + + // Make request list. + // + // Send multi request loop( with retry ) + // (When many request is sends, sometimes gets "Couldn't connect to server") + // + while(!clist_all.empty()){ + // set curl handle to multi handle + int result; + s3fscurllist_t::iterator iter; + for(iter = clist_all.begin(); iter != clist_all.end(); ++iter){ + clist_req.push_back(std::move(*iter)); + } + clist_all.clear(); + + // Send multi request. + if(0 != (result = MultiPerform())){ + Clear(); + return result; + } + + // Read the result + if(0 != (result = MultiRead())){ + Clear(); + return result; + } + + // Cleanup curl handle in multi handle + ClearEx(false); + } + return 0; +} + +// +// thread function for performing an S3fsCurl request +// +void* S3fsMultiCurl::RequestPerformWrapper(void* arg) +{ + S3fsCurl* s3fscurl= static_cast(arg); + void* result = nullptr; + if(!s3fscurl){ + return reinterpret_cast(static_cast(-EIO)); + } + if(s3fscurl->fpLazySetup){ + if(!s3fscurl->fpLazySetup(s3fscurl)){ + S3FS_PRN_ERR("Failed to lazy setup, then respond EIO."); + result = reinterpret_cast(static_cast(-EIO)); + } + } + + if(!result){ + result = reinterpret_cast(static_cast(s3fscurl->RequestPerform())); + s3fscurl->DestroyCurlHandle(true, false); + } + + AutoLock lock(s3fscurl->completed_tids_lock); + s3fscurl->completed_tids->push_back(pthread_self()); + s3fscurl->sem->post(); + + return result; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/curl_multi.h b/s3fs/curl_multi.h new file mode 100644 index 0000000..604f0b1 --- /dev/null +++ b/s3fs/curl_multi.h @@ -0,0 +1,90 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_CURL_MULTI_H_ +#define S3FS_CURL_MULTI_H_ + +#include +#include + +//---------------------------------------------- +// Typedef +//---------------------------------------------- +class S3fsCurl; + +typedef std::vector> s3fscurllist_t; +typedef bool (*S3fsMultiSuccessCallback)(S3fsCurl* s3fscurl, void* param); // callback for succeed multi request +typedef bool (*S3fsMultiNotFoundCallback)(S3fsCurl* s3fscurl, void* param); // callback for succeed multi request +typedef std::unique_ptr (*S3fsMultiRetryCallback)(S3fsCurl* s3fscurl); // callback for failure and retrying + +//---------------------------------------------- +// class S3fsMultiCurl +//---------------------------------------------- +class S3fsMultiCurl +{ + private: + const int maxParallelism; + + s3fscurllist_t clist_all; // all of curl requests + s3fscurllist_t clist_req; // curl requests are sent + bool not_abort; // complete all requests without aborting on errors + + S3fsMultiSuccessCallback SuccessCallback; + S3fsMultiNotFoundCallback NotFoundCallback; + S3fsMultiRetryCallback RetryCallback; + void* pSuccessCallbackParam; + void* pNotFoundCallbackParam; + + pthread_mutex_t completed_tids_lock; + std::vector completed_tids; + + private: + bool ClearEx(bool is_all); + int MultiPerform(); + int MultiRead(); + + static void* RequestPerformWrapper(void* arg); + + public: + explicit S3fsMultiCurl(int maxParallelism, bool not_abort = false); + ~S3fsMultiCurl(); + + int GetMaxParallelism() const { return maxParallelism; } + + S3fsMultiSuccessCallback SetSuccessCallback(S3fsMultiSuccessCallback function); + S3fsMultiNotFoundCallback SetNotFoundCallback(S3fsMultiNotFoundCallback function); + S3fsMultiRetryCallback SetRetryCallback(S3fsMultiRetryCallback function); + void* SetSuccessCallbackParam(void* param); + void* SetNotFoundCallbackParam(void* param); + bool Clear() { return ClearEx(true); } + bool SetS3fsCurlObject(std::unique_ptr s3fscurl); + int Request(); +}; + +#endif // S3FS_CURL_MULTI_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/curl_util.cpp b/s3fs/curl_util.cpp new file mode 100644 index 0000000..bfd0244 --- /dev/null +++ b/s3fs/curl_util.cpp @@ -0,0 +1,334 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include + +#include "common.h" +#include "s3fs_logger.h" +#include "curl_util.h" +#include "string_util.h" +#include "s3fs_auth.h" +#include "s3fs_cred.h" + +//------------------------------------------------------------------- +// Utility Functions +//------------------------------------------------------------------- +// +// curl_slist_sort_insert +// This function is like curl_slist_append function, but this adds data by a-sorting. +// Because AWS signature needs sorted header. +// +struct curl_slist* curl_slist_sort_insert(struct curl_slist* list, const char* key, const char* value) +{ + if(!key){ + return list; + } + + // key & value are trimmed and lower (only key) + std::string strkey = trim(key); + std::string strval = value ? trim(value) : ""; + std::string strnew = key + std::string(": ") + strval; + char* data; + if(nullptr == (data = strdup(strnew.c_str()))){ + return list; + } + + struct curl_slist **p = &list; + for(;*p; p = &(*p)->next){ + std::string strcur = (*p)->data; + size_t pos; + if(std::string::npos != (pos = strcur.find(':', 0))){ + strcur.erase(pos); + } + + int result = strcasecmp(strkey.c_str(), strcur.c_str()); + if(0 == result){ + free((*p)->data); + (*p)->data = data; + return list; + }else if(result < 0){ + break; + } + } + + struct curl_slist* new_item; + // Must use malloc since curl_slist_free_all calls free. + if(nullptr == (new_item = static_cast(malloc(sizeof(*new_item))))){ + free(data); + return list; + } + + struct curl_slist* before = *p; + *p = new_item; + new_item->data = data; + new_item->next = before; + + return list; +} + +struct curl_slist* curl_slist_remove(struct curl_slist* list, const char* key) +{ + if(!key){ + return list; + } + + std::string strkey = trim(key); + struct curl_slist **p = &list; + while(*p){ + std::string strcur = (*p)->data; + size_t pos; + if(std::string::npos != (pos = strcur.find(':', 0))){ + strcur.erase(pos); + } + + int result = strcasecmp(strkey.c_str(), strcur.c_str()); + if(0 == result){ + free((*p)->data); + struct curl_slist *tmp = *p; + *p = (*p)->next; + free(tmp); + }else{ + p = &(*p)->next; + } + } + + return list; +} + +std::string get_sorted_header_keys(const struct curl_slist* list) +{ + std::string sorted_headers; + + if(!list){ + return sorted_headers; + } + + for( ; list; list = list->next){ + std::string strkey = list->data; + size_t pos; + if(std::string::npos != (pos = strkey.find(':', 0))){ + if (trim(strkey.substr(pos + 1)).empty()) { + // skip empty-value headers (as they are discarded by libcurl) + continue; + } + strkey.erase(pos); + } + if(!sorted_headers.empty()){ + sorted_headers += ";"; + } + sorted_headers += lower(strkey); + } + + return sorted_headers; +} + +std::string get_header_value(const struct curl_slist* list, const std::string &key) +{ + if(!list){ + return ""; + } + + for( ; list; list = list->next){ + std::string strkey = list->data; + size_t pos; + if(std::string::npos != (pos = strkey.find(':', 0))){ + if(0 == strcasecmp(trim(strkey.substr(0, pos)).c_str(), key.c_str())){ + return trim(strkey.substr(pos+1)); + } + } + } + + return ""; +} + +std::string get_canonical_headers(const struct curl_slist* list, bool only_amz) +{ + std::string canonical_headers; + + if(!list){ + canonical_headers = "\n"; + return canonical_headers; + } + + for( ; list; list = list->next){ + std::string strhead = list->data; + size_t pos; + if(std::string::npos != (pos = strhead.find(':', 0))){ + std::string strkey = trim(lower(strhead.substr(0, pos))); + std::string strval = trim(strhead.substr(pos + 1)); + if (strval.empty()) { + // skip empty-value headers (as they are discarded by libcurl) + continue; + } + strhead = strkey; + strhead += ":"; + strhead += strval; + }else{ + strhead = trim(lower(strhead)); + } + if(only_amz && strhead.substr(0, 5) != "x-amz"){ + continue; + } + canonical_headers += strhead; + canonical_headers += "\n"; + } + return canonical_headers; +} + +// function for using global values +bool MakeUrlResource(const char* realpath, std::string& resourcepath, std::string& url) +{ + if(!realpath){ + return false; + } + resourcepath = urlEncodePath(service_path + S3fsCred::GetBucket() + realpath); + url = s3host + resourcepath; + return true; +} + +std::string prepare_url(const char* url) +{ + S3FS_PRN_INFO3("URL is %s", url); + + std::string uri; + std::string hostname; + std::string path; + std::string url_str = url; + std::string token = "/" + S3fsCred::GetBucket(); + size_t bucket_pos; + size_t bucket_length = token.size(); + size_t uri_length = 0; + + if(!strncasecmp(url_str.c_str(), "https://", 8)){ + uri_length = 8; + } else if(!strncasecmp(url_str.c_str(), "http://", 7)) { + uri_length = 7; + } + uri = url_str.substr(0, uri_length); + bucket_pos = url_str.find(token, uri_length); + + if(!pathrequeststyle){ + hostname = S3fsCred::GetBucket() + "." + url_str.substr(uri_length, bucket_pos - uri_length); + path = url_str.substr((bucket_pos + bucket_length)); + }else{ + hostname = url_str.substr(uri_length, bucket_pos - uri_length); + std::string part = url_str.substr((bucket_pos + bucket_length)); + if('/' != part[0]){ + part = "/" + part; + } + path = "/" + S3fsCred::GetBucket() + part; + } + + url_str = uri + hostname + path; + + S3FS_PRN_INFO3("URL changed is %s", url_str.c_str()); + + return url_str; +} + +bool make_md5_from_binary(const char* pstr, size_t length, std::string& md5) +{ + if(!pstr || '\0' == pstr[0]){ + S3FS_PRN_ERR("Parameter is wrong."); + return false; + } + md5_t binary; + if(!s3fs_md5(reinterpret_cast(pstr), length, &binary)){ + return false; + } + + md5 = s3fs_base64(binary.data(), binary.size()); + return true; +} + +std::string url_to_host(const std::string &url) +{ + S3FS_PRN_INFO3("url is %s", url.c_str()); + + static constexpr char HTTP[] = "http://"; + static constexpr char HTTPS[] = "https://"; + std::string hostname; + + if (is_prefix(url.c_str(), HTTP)) { + hostname = url.substr(sizeof(HTTP) - 1); + } else if (is_prefix(url.c_str(), HTTPS)) { + hostname = url.substr(sizeof(HTTPS) - 1); + } else { + S3FS_PRN_EXIT("url does not begin with http:// or https://"); + abort(); + } + + size_t idx; + if ((idx = hostname.find('/')) != std::string::npos) { + return hostname.substr(0, idx); + } else { + return hostname; + } +} + +std::string get_bucket_host() +{ + if(!pathrequeststyle){ + return S3fsCred::GetBucket() + "." + url_to_host(s3host); + } + return url_to_host(s3host); +} + +const char* getCurlDebugHead(curl_infotype type) +{ + const char* unknown = ""; + const char* dataIn = "BODY <"; + const char* dataOut = "BODY >"; + const char* headIn = "<"; + const char* headOut = ">"; + + switch(type){ + case CURLINFO_DATA_IN: + return dataIn; + case CURLINFO_DATA_OUT: + return dataOut; + case CURLINFO_HEADER_IN: + return headIn; + case CURLINFO_HEADER_OUT: + return headOut; + default: + break; + } + return unknown; +} + +// +// compare ETag ignoring quotes and case +// +bool etag_equals(const std::string& s1, const std::string& s2) +{ + return 0 == strcasecmp(peeloff(s1).c_str(), peeloff(s2).c_str()); +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/curl_util.h b/s3fs/curl_util.h new file mode 100644 index 0000000..596c6d4 --- /dev/null +++ b/s3fs/curl_util.h @@ -0,0 +1,56 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_CURL_UTIL_H_ +#define S3FS_CURL_UTIL_H_ + +#include + +enum class sse_type_t; + +//---------------------------------------------- +// Functions +//---------------------------------------------- +struct curl_slist* curl_slist_sort_insert(struct curl_slist* list, const char* key, const char* value); +struct curl_slist* curl_slist_remove(struct curl_slist* list, const char* key); +std::string get_sorted_header_keys(const struct curl_slist* list); +std::string get_canonical_headers(const struct curl_slist* list, bool only_amz = false); +std::string get_header_value(const struct curl_slist* list, const std::string &key); +bool MakeUrlResource(const char* realpath, std::string& resourcepath, std::string& url); +std::string prepare_url(const char* url); +bool get_object_sse_type(const char* path, sse_type_t& ssetype, std::string& ssevalue); // implement in s3fs.cpp + +bool make_md5_from_binary(const char* pstr, size_t length, std::string& md5); +std::string url_to_host(const std::string &url); +std::string get_bucket_host(); +const char* getCurlDebugHead(curl_infotype type); + +bool etag_equals(const std::string& s1, const std::string& s2); + +#endif // S3FS_CURL_UTIL_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache.cpp b/s3fs/fdcache.cpp new file mode 100644 index 0000000..77630d6 --- /dev/null +++ b/s3fs/fdcache.cpp @@ -0,0 +1,1157 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fdcache.h" +#include "fdcache_stat.h" +#include "s3fs_util.h" +#include "s3fs_logger.h" +#include "s3fs_cred.h" +#include "string_util.h" +#include "autolock.h" + +// +// The following symbols are used by FdManager::RawCheckAllCache(). +// +// These must be #defines due to string literal concatenation. +#define CACHEDBG_FMT_HEAD "---------------------------------------------------------------------------\n" \ + "Check cache file and its stats file consistency at %s\n" \ + "---------------------------------------------------------------------------" +#define CACHEDBG_FMT_FOOT "---------------------------------------------------------------------------\n" \ + "Summary - Total files: %d\n" \ + " Detected error files: %d\n" \ + " Detected error directories: %d\n" \ + "---------------------------------------------------------------------------" +#define CACHEDBG_FMT_FILE_OK "File: %s%s -> [OK] no problem" +#define CACHEDBG_FMT_FILE_PROB "File: %s%s" +#define CACHEDBG_FMT_DIR_PROB "Directory: %s" +#define CACHEDBG_FMT_ERR_HEAD " -> [E] there is a mark that data exists in stats, but there is no data in the cache file." +#define CACHEDBG_FMT_WARN_HEAD " -> [W] These show no data in stats, but there is evidence of data in the cache file(no problem)." +#define CACHEDBG_FMT_WARN_OPEN "\n -> [W] This file is currently open and may not provide accurate analysis results." +#define CACHEDBG_FMT_CRIT_HEAD " -> [C] %s" +#define CACHEDBG_FMT_CRIT_HEAD2 " -> [C] " +#define CACHEDBG_FMT_PROB_BLOCK " 0x%016zx(0x%016zx bytes)" + +// [NOTE] +// NOCACHE_PATH_PREFIX symbol needs for not using cache mode. +// Now s3fs I/F functions in s3fs.cpp has left the processing +// to FdManager and FdEntity class. FdManager class manages +// the list of local file stat and file descriptor in conjunction +// with the FdEntity class. +// When s3fs is not using local cache, it means FdManager must +// return new temporary file descriptor at each opening it. +// Then FdManager caches fd by key which is dummy file path +// instead of real file path. +// This process may not be complete, but it is easy way can +// be realized. +// +static constexpr char NOCACHE_PATH_PREFIX_FORM[] = " __S3FS_UNEXISTED_PATH_%lx__ / "; // important space words for simply + +//------------------------------------------------ +// FdManager class variable +//------------------------------------------------ +FdManager FdManager::singleton; +pthread_mutex_t FdManager::fd_manager_lock; +pthread_mutex_t FdManager::cache_cleanup_lock; +pthread_mutex_t FdManager::reserved_diskspace_lock; +bool FdManager::is_lock_init(false); +std::string FdManager::cache_dir; +bool FdManager::check_cache_dir_exist(false); +off_t FdManager::free_disk_space = 0; +off_t FdManager::fake_used_disk_space = 0; +std::string FdManager::check_cache_output; +bool FdManager::checked_lseek(false); +bool FdManager::have_lseek_hole(false); +std::string FdManager::tmp_dir = "/tmp"; + +//------------------------------------------------ +// FdManager class methods +//------------------------------------------------ +bool FdManager::SetCacheDir(const char* dir) +{ + if(!dir || '\0' == dir[0]){ + cache_dir = ""; + }else{ + cache_dir = dir; + } + return true; +} + +bool FdManager::SetCacheCheckOutput(const char* path) +{ + if(!path || '\0' == path[0]){ + check_cache_output.erase(); + }else{ + check_cache_output = path; + } + return true; +} + +bool FdManager::DeleteCacheDirectory() +{ + if(FdManager::cache_dir.empty()){ + return true; + } + + std::string cache_path; + if(!FdManager::MakeCachePath(nullptr, cache_path, false)){ + return false; + } + if(!delete_files_in_dir(cache_path.c_str(), true)){ + return false; + } + + std::string mirror_path = FdManager::cache_dir + "/." + S3fsCred::GetBucket() + ".mirror"; + if(!delete_files_in_dir(mirror_path.c_str(), true)){ + return false; + } + + return true; +} + +int FdManager::DeleteCacheFile(const char* path) +{ + S3FS_PRN_INFO3("[path=%s]", SAFESTRPTR(path)); + + if(!path){ + return -EIO; + } + if(FdManager::cache_dir.empty()){ + return 0; + } + std::string cache_path; + if(!FdManager::MakeCachePath(path, cache_path, false)){ + return 0; + } + int result = 0; + if(0 != unlink(cache_path.c_str())){ + if(ENOENT == errno){ + S3FS_PRN_DBG("failed to delete file(%s): errno=%d", path, errno); + }else{ + S3FS_PRN_ERR("failed to delete file(%s): errno=%d", path, errno); + } + return -errno; + } + if(0 != (result = CacheFileStat::DeleteCacheFileStat(path))){ + if(-ENOENT == result){ + S3FS_PRN_DBG("failed to delete stat file(%s): errno=%d", path, result); + }else{ + S3FS_PRN_ERR("failed to delete stat file(%s): errno=%d", path, result); + } + } + return result; +} + +bool FdManager::MakeCachePath(const char* path, std::string& cache_path, bool is_create_dir, bool is_mirror_path) +{ + if(FdManager::cache_dir.empty()){ + cache_path = ""; + return true; + } + + std::string resolved_path(FdManager::cache_dir); + if(!is_mirror_path){ + resolved_path += "/"; + resolved_path += S3fsCred::GetBucket(); + }else{ + resolved_path += "/."; + resolved_path += S3fsCred::GetBucket(); + resolved_path += ".mirror"; + } + + if(is_create_dir){ + int result; + if(0 != (result = mkdirp(resolved_path + mydirname(path), 0777))){ + S3FS_PRN_ERR("failed to create dir(%s) by errno(%d).", path, result); + return false; + } + } + if(!path || '\0' == path[0]){ + cache_path = resolved_path; + }else{ + cache_path = resolved_path + SAFESTRPTR(path); + } + return true; +} + +bool FdManager::CheckCacheTopDir() +{ + if(FdManager::cache_dir.empty()){ + return true; + } + std::string toppath(FdManager::cache_dir + "/" + S3fsCred::GetBucket()); + + return check_exist_dir_permission(toppath.c_str()); +} + +bool FdManager::MakeRandomTempPath(const char* path, std::string& tmppath) +{ + char szBuff[64]; + + snprintf(szBuff, sizeof(szBuff), NOCACHE_PATH_PREFIX_FORM, random()); // worry for performance, but maybe don't worry. + szBuff[sizeof(szBuff) - 1] = '\0'; // for safety + tmppath = szBuff; + tmppath += path ? path : ""; + return true; +} + +bool FdManager::SetCheckCacheDirExist(bool is_check) +{ + bool old = FdManager::check_cache_dir_exist; + FdManager::check_cache_dir_exist = is_check; + return old; +} + +bool FdManager::CheckCacheDirExist() +{ + if(!FdManager::check_cache_dir_exist){ + return true; + } + if(FdManager::cache_dir.empty()){ + return true; + } + return IsDir(&cache_dir); +} + +off_t FdManager::GetEnsureFreeDiskSpace() +{ + AutoLock auto_lock(&FdManager::reserved_diskspace_lock); + return FdManager::free_disk_space; +} + +off_t FdManager::SetEnsureFreeDiskSpace(off_t size) +{ + AutoLock auto_lock(&FdManager::reserved_diskspace_lock); + off_t old = FdManager::free_disk_space; + FdManager::free_disk_space = size; + return old; +} + +bool FdManager::InitFakeUsedDiskSize(off_t fake_freesize) +{ + FdManager::fake_used_disk_space = 0; // At first, clear this value because this value is used in GetFreeDiskSpace. + + off_t actual_freesize = FdManager::GetFreeDiskSpace(nullptr); + + if(fake_freesize < actual_freesize){ + FdManager::fake_used_disk_space = actual_freesize - fake_freesize; + }else{ + FdManager::fake_used_disk_space = 0; + } + return true; +} + +off_t FdManager::GetTotalDiskSpaceByRatio(int ratio) +{ + return FdManager::GetTotalDiskSpace(nullptr) * ratio / 100; +} + +off_t FdManager::GetTotalDiskSpace(const char* path) +{ + struct statvfs vfsbuf; + int result = FdManager::GetVfsStat(path, &vfsbuf); + if(result == -1){ + return 0; + } + + off_t actual_totalsize = vfsbuf.f_blocks * vfsbuf.f_frsize; + + return actual_totalsize; +} + +off_t FdManager::GetFreeDiskSpace(const char* path) +{ + struct statvfs vfsbuf; + int result = FdManager::GetVfsStat(path, &vfsbuf); + if(result == -1){ + return 0; + } + + off_t actual_freesize = vfsbuf.f_bavail * vfsbuf.f_frsize; + + return (FdManager::fake_used_disk_space < actual_freesize ? (actual_freesize - FdManager::fake_used_disk_space) : 0); +} + +int FdManager::GetVfsStat(const char* path, struct statvfs* vfsbuf){ + std::string ctoppath; + if(!FdManager::cache_dir.empty()){ + ctoppath = FdManager::cache_dir + "/"; + ctoppath = get_exist_directory_path(ctoppath); // existed directory + if(ctoppath != "/"){ + ctoppath += "/"; + } + }else{ + ctoppath = tmp_dir + "/"; + } + if(path && '\0' != *path){ + ctoppath += path; + }else{ + ctoppath += "."; + } + if(-1 == statvfs(ctoppath.c_str(), vfsbuf)){ + S3FS_PRN_ERR("could not get vfs stat by errno(%d)", errno); + return -1; + } + + return 0; +} + +bool FdManager::IsSafeDiskSpace(const char* path, off_t size) +{ + off_t fsize = FdManager::GetFreeDiskSpace(path); + return size + FdManager::GetEnsureFreeDiskSpace() <= fsize; +} + +bool FdManager::IsSafeDiskSpaceWithLog(const char* path, off_t size) +{ + off_t fsize = FdManager::GetFreeDiskSpace(path); + off_t needsize = size + FdManager::GetEnsureFreeDiskSpace(); + if(needsize <= fsize){ + return true; + } else { + S3FS_PRN_EXIT("There is no enough disk space for used as cache(or temporary) directory by s3fs. Requires %.3f MB, already has %.3f MB.", static_cast(needsize) / 1024 / 1024, static_cast(fsize) / 1024 / 1024); + return false; + } +} + +bool FdManager::HaveLseekHole() +{ + if(FdManager::checked_lseek){ + return FdManager::have_lseek_hole; + } + + // create temporary file + int fd; + std::unique_ptr ptmpfp(MakeTempFile(), &s3fs_fclose); + if(nullptr == ptmpfp || -1 == (fd = fileno(ptmpfp.get()))){ + S3FS_PRN_ERR("failed to open temporary file by errno(%d)", errno); + FdManager::checked_lseek = true; + FdManager::have_lseek_hole = false; + return false; + } + + // check SEEK_DATA/SEEK_HOLE options + bool result = true; + if(-1 == lseek(fd, 0, SEEK_DATA)){ + if(EINVAL == errno){ + S3FS_PRN_ERR("lseek does not support SEEK_DATA"); + result = false; + } + } + if(result && -1 == lseek(fd, 0, SEEK_HOLE)){ + if(EINVAL == errno){ + S3FS_PRN_ERR("lseek does not support SEEK_HOLE"); + result = false; + } + } + + FdManager::checked_lseek = true; + FdManager::have_lseek_hole = result; + return FdManager::have_lseek_hole; +} + +bool FdManager::SetTmpDir(const char *dir) +{ + if(!dir || '\0' == dir[0]){ + tmp_dir = "/tmp"; + }else{ + tmp_dir = dir; + } + return true; +} + +bool FdManager::IsDir(const std::string* dir) +{ + // check the directory + struct stat st; + if(0 != stat(dir->c_str(), &st)){ + S3FS_PRN_ERR("could not stat() directory %s by errno(%d).", dir->c_str(), errno); + return false; + } + if(!S_ISDIR(st.st_mode)){ + S3FS_PRN_ERR("the directory %s is not a directory.", dir->c_str()); + return false; + } + return true; +} + +bool FdManager::CheckTmpDirExist() +{ + if(FdManager::tmp_dir.empty()){ + return true; + } + return IsDir(&tmp_dir); +} + +FILE* FdManager::MakeTempFile() { + int fd; + char cfn[PATH_MAX]; + std::string fn = tmp_dir + "/s3fstmp.XXXXXX"; + strncpy(cfn, fn.c_str(), sizeof(cfn) - 1); + cfn[sizeof(cfn) - 1] = '\0'; + + fd = mkstemp(cfn); + if (-1 == fd) { + S3FS_PRN_ERR("failed to create tmp file. errno(%d)", errno); + return nullptr; + } + if (-1 == unlink(cfn)) { + S3FS_PRN_ERR("failed to delete tmp file. errno(%d)", errno); + return nullptr; + } + return fdopen(fd, "rb+"); +} + +bool FdManager::HasOpenEntityFd(const char* path) +{ + AutoLock auto_lock(&FdManager::fd_manager_lock); + + const FdEntity* ent; + int fd = -1; + if(nullptr == (ent = FdManager::singleton.GetFdEntity(path, fd, false, AutoLock::ALREADY_LOCKED))){ + return false; + } + return (0 < ent->GetOpenCount()); +} + +// [NOTE] +// Returns the number of open pseudo fd. +// +int FdManager::GetOpenFdCount(const char* path) +{ + AutoLock auto_lock(&FdManager::fd_manager_lock); + + return FdManager::singleton.GetPseudoFdCount(path); +} + +//------------------------------------------------ +// FdManager methods +//------------------------------------------------ +FdManager::FdManager() +{ + if(this == FdManager::get()){ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + int result; + if(0 != (result = pthread_mutex_init(&FdManager::fd_manager_lock, &attr))){ + S3FS_PRN_CRIT("failed to init fd_manager_lock: %d", result); + abort(); + } + if(0 != (result = pthread_mutex_init(&FdManager::cache_cleanup_lock, &attr))){ + S3FS_PRN_CRIT("failed to init cache_cleanup_lock: %d", result); + abort(); + } + if(0 != (result = pthread_mutex_init(&FdManager::reserved_diskspace_lock, &attr))){ + S3FS_PRN_CRIT("failed to init reserved_diskspace_lock: %d", result); + abort(); + } + FdManager::is_lock_init = true; + }else{ + abort(); + } +} + +FdManager::~FdManager() +{ + if(this == FdManager::get()){ + for(fdent_map_t::iterator iter = fent.begin(); fent.end() != iter; ++iter){ + FdEntity* ent = (*iter).second.get(); + S3FS_PRN_WARN("To exit with the cache file opened: path=%s, refcnt=%d", ent->GetPath().c_str(), ent->GetOpenCount()); + } + fent.clear(); + + if(FdManager::is_lock_init){ + int result; + if(0 != (result = pthread_mutex_destroy(&FdManager::fd_manager_lock))){ + S3FS_PRN_CRIT("failed to destroy fd_manager_lock: %d", result); + abort(); + } + if(0 != (result = pthread_mutex_destroy(&FdManager::cache_cleanup_lock))){ + S3FS_PRN_CRIT("failed to destroy cache_cleanup_lock: %d", result); + abort(); + } + if(0 != (result = pthread_mutex_destroy(&FdManager::reserved_diskspace_lock))){ + S3FS_PRN_CRIT("failed to destroy reserved_diskspace_lock: %d", result); + abort(); + } + FdManager::is_lock_init = false; + } + }else{ + abort(); + } +} + +FdEntity* FdManager::GetFdEntity(const char* path, int& existfd, bool newfd, AutoLock::Type locktype) +{ + S3FS_PRN_INFO3("[path=%s][pseudo_fd=%d]", SAFESTRPTR(path), existfd); + + if(!path || '\0' == path[0]){ + return nullptr; + } + AutoLock auto_lock(&FdManager::fd_manager_lock, locktype); + + fdent_map_t::iterator iter = fent.find(path); + if(fent.end() != iter && iter->second){ + if(-1 == existfd){ + if(newfd){ + existfd = iter->second->OpenPseudoFd(O_RDWR); // [NOTE] O_RDWR flags + } + return iter->second.get(); + }else if(iter->second->FindPseudoFd(existfd)){ + if(newfd){ + existfd = iter->second->Dup(existfd); + } + return iter->second.get(); + } + } + + if(-1 != existfd){ + for(iter = fent.begin(); iter != fent.end(); ++iter){ + if(iter->second && iter->second->FindPseudoFd(existfd)){ + // found opened fd in map + if(iter->second->GetPath() == path){ + if(newfd){ + existfd = iter->second->Dup(existfd); + } + return iter->second.get(); + } + // found fd, but it is used another file(file descriptor is recycled) + // so returns nullptr. + break; + } + } + } + + // If the cache directory is not specified, s3fs opens a temporary file + // when the file is opened. + if(!FdManager::IsCacheDir()){ + for(iter = fent.begin(); iter != fent.end(); ++iter){ + if(iter->second && iter->second->IsOpen() && iter->second->GetPath() == path){ + return iter->second.get(); + } + } + } + return nullptr; +} + +FdEntity* FdManager::Open(int& fd, const char* path, const headers_t* pmeta, off_t size, const struct timespec& ts_mctime, int flags, bool force_tmpfile, bool is_create, bool ignore_modify, AutoLock::Type type) +{ + S3FS_PRN_DBG("[path=%s][size=%lld][ts_mctime=%s][flags=0x%x][force_tmpfile=%s][create=%s][ignore_modify=%s]", SAFESTRPTR(path), static_cast(size), str(ts_mctime).c_str(), flags, (force_tmpfile ? "yes" : "no"), (is_create ? "yes" : "no"), (ignore_modify ? "yes" : "no")); + + if(!path || '\0' == path[0]){ + return nullptr; + } + + AutoLock auto_lock(&FdManager::fd_manager_lock); + + // search in mapping by key(path) + fdent_map_t::iterator iter = fent.find(path); + if(fent.end() == iter && !force_tmpfile && !FdManager::IsCacheDir()){ + // If the cache directory is not specified, s3fs opens a temporary file + // when the file is opened. + // Then if it could not find a entity in map for the file, s3fs should + // search a entity in all which opened the temporary file. + // + for(iter = fent.begin(); iter != fent.end(); ++iter){ + if(iter->second && iter->second->IsOpen() && iter->second->GetPath() == path){ + break; // found opened fd in mapping + } + } + } + + if(fent.end() != iter){ + // found + FdEntity* ent = iter->second.get(); + + // [NOTE] + // If the file is being modified and ignore_modify flag is false, + // the file size will not be changed even if there is a request + // to reduce the size of the modified file. + // If you do, the "test_open_second_fd" test will fail. + // + if(!ignore_modify && ent->IsModified()){ + // If the file is being modified and it's size is larger than size parameter, it will not be resized. + off_t cur_size = 0; + if(ent->GetSize(cur_size) && size <= cur_size){ + size = -1; + } + } + + // (re)open + if(0 > (fd = ent->Open(pmeta, size, ts_mctime, flags, type))){ + S3FS_PRN_ERR("failed to (re)open and create new pseudo fd for path(%s).", path); + return nullptr; + } + + if(use_newcache){ + accessor->Invalidate(path); + } + + return ent; + }else if(is_create){ + // not found + std::string cache_path; + if(!force_tmpfile && !FdManager::MakeCachePath(path, cache_path, true)){ + S3FS_PRN_ERR("failed to make cache path for object(%s).", path); + return nullptr; + } + // make new obj + std::unique_ptr ent(new FdEntity(path, cache_path.c_str())); + // open + if(0 > (fd = ent->Open(pmeta, size, ts_mctime, flags, type))){ + S3FS_PRN_ERR("failed to open and create new pseudo fd for path(%s) errno:%d.", path, fd); + return nullptr; + } + + if(use_newcache){ + ent->UpdateRealsize(size); + } + + if(!cache_path.empty()){ + // using cache + return (fent[path] = std::move(ent)).get(); + }else{ + // not using cache, so the key of fdentity is set not really existing path. + // (but not strictly unexisting path.) + // + // [NOTE] + // The reason why this process here, please look at the definition of the + // comments of NOCACHE_PATH_PREFIX_FORM symbol. + // + std::string tmppath; + FdManager::MakeRandomTempPath(path, tmppath); + return (fent[tmppath] = std::move(ent)).get(); + } + }else{ + return nullptr; + } +} + +// [NOTE] +// This method does not create a new pseudo fd. +// It just finds existfd and returns the corresponding entity. +// +FdEntity* FdManager::GetExistFdEntity(const char* path, int existfd) +{ + S3FS_PRN_DBG("[path=%s][pseudo_fd=%d]", SAFESTRPTR(path), existfd); + + AutoLock auto_lock(&FdManager::fd_manager_lock); + + // search from all entity. + for(fdent_map_t::iterator iter = fent.begin(); iter != fent.end(); ++iter){ + if(iter->second && iter->second->FindPseudoFd(existfd)){ + // found existfd in entity + return iter->second.get(); + } + } + // not found entity + return nullptr; +} + +FdEntity* FdManager::OpenExistFdEntity(const char* path, int& fd, int flags) +{ + S3FS_PRN_DBG("[path=%s][flags=0x%x]", SAFESTRPTR(path), flags); + + // search entity by path, and create pseudo fd + FdEntity* ent = Open(fd, path, nullptr, -1, S3FS_OMIT_TS, flags, false, false, false, AutoLock::NONE); + if(!ent){ + // Not found entity + return nullptr; + } + return ent; +} + +// [NOTE] +// Returns the number of open pseudo fd. +// This method is called from GetOpenFdCount method which is already locked. +// +int FdManager::GetPseudoFdCount(const char* path) +{ + S3FS_PRN_DBG("[path=%s]", SAFESTRPTR(path)); + + if(!path || '\0' == path[0]){ + return 0; + } + + // search from all entity. + for(fdent_map_t::iterator iter = fent.begin(); iter != fent.end(); ++iter){ + if(iter->second && iter->second->GetPath() == path){ + // found the entity for the path + return iter->second->GetOpenCount(); + } + } + // not found entity + return 0; +} + +void FdManager::Rename(const std::string &from, const std::string &to) +{ + AutoLock auto_lock(&FdManager::fd_manager_lock); + + fdent_map_t::iterator iter = fent.find(from); + if(fent.end() == iter && !FdManager::IsCacheDir()){ + // If the cache directory is not specified, s3fs opens a temporary file + // when the file is opened. + // Then if it could not find a entity in map for the file, s3fs should + // search a entity in all which opened the temporary file. + // + for(iter = fent.begin(); iter != fent.end(); ++iter){ + if(iter->second && iter->second->IsOpen() && iter->second->GetPath() == from){ + break; // found opened fd in mapping + } + } + } + + if(fent.end() != iter){ + // found + S3FS_PRN_DBG("[from=%s][to=%s]", from.c_str(), to.c_str()); + + std::unique_ptr ent(std::move(iter->second)); + + // retrieve old fd entity from map + fent.erase(iter); + + // rename path and caches in fd entity + std::string fentmapkey; + if(!ent->RenamePath(to, fentmapkey)){ + S3FS_PRN_ERR("Failed to rename FdEntity object for %s to %s", from.c_str(), to.c_str()); + return; + } + + // set new fd entity to map + fent[fentmapkey] = std::move(ent); + } +} + +bool FdManager::Close(FdEntity* ent, int fd) +{ + S3FS_PRN_DBG("[ent->file=%s][pseudo_fd=%d]", ent ? ent->GetPath().c_str() : "", fd); + + if(!ent || -1 == fd){ + return true; // returns success + } + AutoLock auto_lock(&FdManager::fd_manager_lock); + + for(fdent_map_t::iterator iter = fent.begin(); iter != fent.end(); ++iter){ + if(iter->second.get() == ent){ + ent->Close(fd); + if(!ent->IsOpen()){ + // remove found entity from map. + iter = fent.erase(iter); + + // check another key name for entity value to be on the safe side + for(; iter != fent.end(); ){ + if(iter->second.get() == ent){ + iter = fent.erase(iter); + }else{ + ++iter; + } + } + } + return true; + } + } + return false; +} + +bool FdManager::ChangeEntityToTempPath(FdEntity* ent, const char* path) +{ + AutoLock auto_lock(&FdManager::fd_manager_lock); + + for(fdent_map_t::iterator iter = fent.begin(); iter != fent.end(); ){ + if(iter->second.get() == ent){ + std::string tmppath; + FdManager::MakeRandomTempPath(path, tmppath); + iter->second.reset(ent); + break; + }else{ + ++iter; + } + } + return false; +} + +void FdManager::CleanupCacheDir() +{ + //S3FS_PRN_DBG("cache cleanup requested"); + + if(!FdManager::IsCacheDir()){ + return; + } + + AutoLock auto_lock_no_wait(&FdManager::cache_cleanup_lock, AutoLock::NO_WAIT); + + if(auto_lock_no_wait.isLockAcquired()){ + //S3FS_PRN_DBG("cache cleanup started"); + CleanupCacheDirInternal(""); + //S3FS_PRN_DBG("cache cleanup ended"); + }else{ + // wait for other thread to finish cache cleanup + AutoLock auto_lock(&FdManager::cache_cleanup_lock); + } +} + +void FdManager::CleanupCacheDirInternal(const std::string &path) +{ + DIR* dp; + struct dirent* dent; + std::string abs_path = cache_dir + "/" + S3fsCred::GetBucket() + path; + + if(nullptr == (dp = opendir(abs_path.c_str()))){ + S3FS_PRN_ERR("could not open cache dir(%s) - errno(%d)", abs_path.c_str(), errno); + return; + } + + for(dent = readdir(dp); dent; dent = readdir(dp)){ + if(0 == strcmp(dent->d_name, "..") || 0 == strcmp(dent->d_name, ".")){ + continue; + } + std::string fullpath = abs_path; + fullpath += "/"; + fullpath += dent->d_name; + struct stat st; + if(0 != lstat(fullpath.c_str(), &st)){ + S3FS_PRN_ERR("could not get stats of file(%s) - errno(%d)", fullpath.c_str(), errno); + closedir(dp); + return; + } + std::string next_path = path + "/" + dent->d_name; + if(S_ISDIR(st.st_mode)){ + CleanupCacheDirInternal(next_path); + }else{ + AutoLock auto_lock(&FdManager::fd_manager_lock, AutoLock::NO_WAIT); + if (!auto_lock.isLockAcquired()) { + S3FS_PRN_INFO("could not get fd_manager_lock when clean up file(%s), then skip it.", next_path.c_str()); + continue; + } + fdent_map_t::iterator iter = fent.find(next_path); + if(fent.end() == iter) { + S3FS_PRN_DBG("cleaned up: %s", next_path.c_str()); + FdManager::DeleteCacheFile(next_path.c_str()); + } + } + } + closedir(dp); +} + +bool FdManager::ReserveDiskSpace(off_t size) +{ + if(IsSafeDiskSpace(nullptr, size)){ + AutoLock auto_lock(&FdManager::reserved_diskspace_lock); + free_disk_space += size; + return true; + } + return false; +} + +void FdManager::FreeReservedDiskSpace(off_t size) +{ + AutoLock auto_lock(&FdManager::reserved_diskspace_lock); + free_disk_space -= size; +} + +// +// Inspect all files for stats file for cache file +// +// [NOTE] +// The minimum sub_path parameter is "/". +// The sub_path is a directory path starting from "/" and ending with "/". +// +// This method produces the following output. +// +// * Header +// ------------------------------------------------------------ +// Check cache file and its stats file consistency +// ------------------------------------------------------------ +// * When the cache file and its stats information match +// File path: -> [OK] no problem +// +// * If there is a problem with the cache file and its stats information +// File path: +// -> [P] +// -> [E] there is a mark that data exists in stats, but there is no data in the cache file. +// (bytes) +// ... +// ... +// -> [W] These show no data in stats, but there is evidence of data in the cache file.(no problem.) +// (bytes) +// ... +// ... +// +bool FdManager::RawCheckAllCache(FILE* fp, const char* cache_stat_top_dir, const char* sub_path, int& total_file_cnt, int& err_file_cnt, int& err_dir_cnt) +{ + if(!cache_stat_top_dir || '\0' == cache_stat_top_dir[0] || !sub_path || '\0' == sub_path[0]){ + S3FS_PRN_ERR("Parameter cache_stat_top_dir is empty."); + return false; + } + + // open directory of cache file's stats + DIR* statsdir; + std::string target_dir = cache_stat_top_dir; + target_dir += sub_path; + if(nullptr == (statsdir = opendir(target_dir.c_str()))){ + S3FS_PRN_ERR("Could not open directory(%s) by errno(%d)", target_dir.c_str(), errno); + return false; + } + + // loop in directory of cache file's stats + const struct dirent* pdirent = nullptr; + while(nullptr != (pdirent = readdir(statsdir))){ + if(DT_DIR == pdirent->d_type){ + // found directory + if(0 == strcmp(pdirent->d_name, ".") || 0 == strcmp(pdirent->d_name, "..")){ + continue; + } + + // reentrant for sub directory + std::string subdir_path = sub_path; + subdir_path += pdirent->d_name; + subdir_path += '/'; + if(!RawCheckAllCache(fp, cache_stat_top_dir, subdir_path.c_str(), total_file_cnt, err_file_cnt, err_dir_cnt)){ + // put error message for this dir. + ++err_dir_cnt; + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_DIR_PROB, subdir_path.c_str()); + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_CRIT_HEAD, "Something error is occurred in checking this directory"); + } + + }else{ + ++total_file_cnt; + + // make cache file path + std::string strOpenedWarn; + std::string cache_path; + std::string object_file_path = sub_path; + object_file_path += pdirent->d_name; + if(!FdManager::MakeCachePath(object_file_path.c_str(), cache_path, false, false) || cache_path.empty()){ + ++err_file_cnt; + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_FILE_PROB, object_file_path.c_str(), strOpenedWarn.c_str()); + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_CRIT_HEAD, "Could not make cache file path"); + continue; + } + + // check if the target file is currently in operation. + { + AutoLock auto_lock(&FdManager::fd_manager_lock); + + fdent_map_t::iterator iter = fent.find(object_file_path); + if(fent.end() != iter){ + // This file is opened now, then we need to put warning message. + strOpenedWarn = CACHEDBG_FMT_WARN_OPEN; + } + } + + // open cache file + int cache_file_fd; + if(-1 == (cache_file_fd = open(cache_path.c_str(), O_RDONLY))){ + ++err_file_cnt; + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_FILE_PROB, object_file_path.c_str(), strOpenedWarn.c_str()); + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_CRIT_HEAD, "Could not open cache file"); + continue; + } + scope_guard guard([&]() { close(cache_file_fd); }); + + // get inode number for cache file + struct stat st; + if(0 != fstat(cache_file_fd, &st)){ + ++err_file_cnt; + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_FILE_PROB, object_file_path.c_str(), strOpenedWarn.c_str()); + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_CRIT_HEAD, "Could not get file inode number for cache file"); + + continue; + } + ino_t cache_file_inode = st.st_ino; + + // open cache stat file and load page info. + PageList pagelist; + CacheFileStat cfstat(object_file_path.c_str()); + if(!cfstat.ReadOnlyOpen() || !pagelist.Serialize(cfstat, false, cache_file_inode)){ + ++err_file_cnt; + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_FILE_PROB, object_file_path.c_str(), strOpenedWarn.c_str()); + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_CRIT_HEAD, "Could not load cache file stats information"); + + continue; + } + cfstat.Release(); + + // compare cache file size and stats information + if(st.st_size != pagelist.Size()){ + ++err_file_cnt; + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_FILE_PROB, object_file_path.c_str(), strOpenedWarn.c_str()); + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_CRIT_HEAD2 "The cache file size(%lld) and the value(%lld) from cache file stats are different", static_cast(st.st_size), static_cast(pagelist.Size())); + + continue; + } + + // compare cache file stats and cache file blocks + fdpage_list_t err_area_list; + fdpage_list_t warn_area_list; + if(!pagelist.CompareSparseFile(cache_file_fd, st.st_size, err_area_list, warn_area_list)){ + // Found some error or warning + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_FILE_PROB, object_file_path.c_str(), strOpenedWarn.c_str()); + if(!warn_area_list.empty()){ + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_WARN_HEAD); + for(fdpage_list_t::const_iterator witer = warn_area_list.begin(); witer != warn_area_list.end(); ++witer){ + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_PROB_BLOCK, static_cast(witer->offset), static_cast(witer->bytes)); + } + } + if(!err_area_list.empty()){ + ++err_file_cnt; + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_ERR_HEAD); + for(fdpage_list_t::const_iterator eiter = err_area_list.begin(); eiter != err_area_list.end(); ++eiter){ + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_PROB_BLOCK, static_cast(eiter->offset), static_cast(eiter->bytes)); + } + } + }else{ + // There is no problem! + if(!strOpenedWarn.empty()){ + strOpenedWarn += "\n "; + } + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_FILE_OK, object_file_path.c_str(), strOpenedWarn.c_str()); + } + err_area_list.clear(); + warn_area_list.clear(); + } + } + closedir(statsdir); + + return true; +} + +bool FdManager::CheckAllCache() +{ + if(!FdManager::HaveLseekHole()){ + S3FS_PRN_ERR("lseek does not support SEEK_DATA/SEEK_HOLE, then could not check cache."); + return false; + } + + FILE* fp; + if(FdManager::check_cache_output.empty()){ + fp = stdout; + }else{ + if(nullptr == (fp = fopen(FdManager::check_cache_output.c_str(), "a+"))){ + S3FS_PRN_ERR("Could not open(create) output file(%s) for checking all cache by errno(%d)", FdManager::check_cache_output.c_str(), errno); + return false; + } + } + + // print head message + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_HEAD, S3fsLog::GetCurrentTime().c_str()); + + // Loop in directory of cache file's stats + std::string top_path = CacheFileStat::GetCacheFileStatTopDir(); + int total_file_cnt = 0; + int err_file_cnt = 0; + int err_dir_cnt = 0; + bool result = RawCheckAllCache(fp, top_path.c_str(), "/", total_file_cnt, err_file_cnt, err_dir_cnt); + if(!result){ + S3FS_PRN_ERR("Processing failed due to some problem."); + } + + // print foot message + S3FS_PRN_CACHE(fp, CACHEDBG_FMT_FOOT, total_file_cnt, err_file_cnt, err_dir_cnt); + + if(stdout != fp){ + fclose(fp); + } + + return result; +} + +void FdManager::ReleaseCache(const std::string &avoid_path, off_t size, const std::string &dir) +{ + DIR* dp; + struct dirent* dent; + std::string abs_path = cache_dir + "/" + S3fsCred::GetBucket() + dir; + + if(nullptr == (dp = opendir(abs_path.c_str()))){ + S3FS_PRN_ERR("could not open cache dir(%s) - errno(%d)", abs_path.c_str(), errno); + return; + } + + for(dent = readdir(dp); dent; dent = readdir(dp)){ + if(GetFreeDiskSpace(nullptr) >= size) + return; + + if(0 == strcmp(dent->d_name, "..") || 0 == strcmp(dent->d_name, ".")){ + continue; + } + std::string fullpath = abs_path; + fullpath += "/"; + fullpath += dent->d_name; + struct stat st; + if(0 != lstat(fullpath.c_str(), &st)){ + S3FS_PRN_ERR("could not get stats of file(%s) - errno(%d)", fullpath.c_str(), errno); + continue; + } + std::string next_path = dir + "/" + dent->d_name; + + if(next_path == avoid_path) + continue; + + if(S_ISDIR(st.st_mode)){ + ReleaseCache(avoid_path, size, next_path); + }else{ + AutoLock auto_lock(&FdManager::fd_manager_lock, AutoLock::NO_WAIT); + if (!auto_lock.isLockAcquired()) { + S3FS_PRN_INFO("could not get fd_manager_lock when clean up file(%s), then skip it.", next_path.c_str()); + continue; + } + fdent_map_t::iterator iter = fent.find(next_path); + if(fent.end() == iter) { + S3FS_PRN_DBG("cleaned up: %s", next_path.c_str()); + FdManager::DeleteCacheFile(next_path.c_str()); + }else{ + FdEntity* ent = (*iter).second.get(); + ent->ReleaseCache(); + } + } + } + closedir(dp); +} + +bool FdManager::EnsureDiskSpaceUsable(const std::string &avoid_path, off_t size) +{ + if(GetFreeDiskSpace(nullptr) >= size) + return true; + + ReleaseCache(avoid_path, size, ""); + + return GetFreeDiskSpace(nullptr) >= size; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache.h b/s3fs/fdcache.h new file mode 100644 index 0000000..3ea8d7a --- /dev/null +++ b/s3fs/fdcache.h @@ -0,0 +1,118 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_FDCACHE_H_ +#define S3FS_FDCACHE_H_ + +#include "fdcache_entity.h" + +//------------------------------------------------ +// class FdManager +//------------------------------------------------ +class FdManager +{ + private: + static FdManager singleton; + static pthread_mutex_t fd_manager_lock; + static pthread_mutex_t cache_cleanup_lock; + static pthread_mutex_t reserved_diskspace_lock; + static bool is_lock_init; + static std::string cache_dir; + static bool check_cache_dir_exist; + static off_t free_disk_space; // limit free disk space + static off_t fake_used_disk_space; // difference between fake free disk space and actual at startup(for test/debug) + static std::string check_cache_output; + static bool checked_lseek; + static bool have_lseek_hole; + static std::string tmp_dir; + + fdent_map_t fent; + + private: + static off_t GetFreeDiskSpace(const char* path); + static off_t GetTotalDiskSpace(const char* path); + static bool IsDir(const std::string* dir); + static int GetVfsStat(const char* path, struct statvfs* vfsbuf); + + int GetPseudoFdCount(const char* path); + void CleanupCacheDirInternal(const std::string &path = ""); + bool RawCheckAllCache(FILE* fp, const char* cache_stat_top_dir, const char* sub_path, int& total_file_cnt, int& err_file_cnt, int& err_dir_cnt); + + void ReleaseCache(const std::string &avoid_path, off_t size, const std::string &dir); + + public: + FdManager(); + ~FdManager(); + + // Reference singleton + static FdManager* get() { return &singleton; } + + static bool DeleteCacheDirectory(); + static int DeleteCacheFile(const char* path); + static bool SetCacheDir(const char* dir); + static bool IsCacheDir() { return !FdManager::cache_dir.empty(); } + static const char* GetCacheDir() { return FdManager::cache_dir.c_str(); } + static bool SetCacheCheckOutput(const char* path); + static const char* GetCacheCheckOutput() { return FdManager::check_cache_output.c_str(); } + static bool MakeCachePath(const char* path, std::string& cache_path, bool is_create_dir = true, bool is_mirror_path = false); + static bool CheckCacheTopDir(); + static bool MakeRandomTempPath(const char* path, std::string& tmppath); + static bool SetCheckCacheDirExist(bool is_check); + static bool CheckCacheDirExist(); + static bool HasOpenEntityFd(const char* path); + static int GetOpenFdCount(const char* path); + static off_t GetEnsureFreeDiskSpace(); + static off_t SetEnsureFreeDiskSpace(off_t size); + static bool InitFakeUsedDiskSize(off_t fake_freesize); + static bool IsSafeDiskSpace(const char* path, off_t size); + static bool IsSafeDiskSpaceWithLog(const char* path, off_t size); + static void FreeReservedDiskSpace(off_t size); + static bool ReserveDiskSpace(off_t size); + static bool HaveLseekHole(); + static bool SetTmpDir(const char* dir); + static bool CheckTmpDirExist(); + static FILE* MakeTempFile(); + static off_t GetTotalDiskSpaceByRatio(int ratio); + + // Return FdEntity associated with path, returning nullptr on error. This operation increments the reference count; callers must decrement via Close after use. + FdEntity* GetFdEntity(const char* path, int& existfd, bool newfd = true, AutoLock::Type locktype = AutoLock::NONE); + FdEntity* Open(int& fd, const char* path, const headers_t* pmeta, off_t size, const struct timespec& ts_mctime, int flags, bool force_tmpfile, bool is_create, bool ignore_modify, AutoLock::Type type); + FdEntity* GetExistFdEntity(const char* path, int existfd = -1); + FdEntity* OpenExistFdEntity(const char* path, int& fd, int flags = O_RDONLY); + void Rename(const std::string &from, const std::string &to); + bool Close(FdEntity* ent, int fd); + bool ChangeEntityToTempPath(FdEntity* ent, const char* path); + void CleanupCacheDir(); + + bool CheckAllCache(); + + bool EnsureDiskSpaceUsable(const std::string &avoid_path, off_t size); +}; + +#endif // S3FS_FDCACHE_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_auto.cpp b/s3fs/fdcache_auto.cpp new file mode 100644 index 0000000..5a97073 --- /dev/null +++ b/s3fs/fdcache_auto.cpp @@ -0,0 +1,126 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include + +#include "s3fs_logger.h" +#include "fdcache_auto.h" +#include "fdcache.h" + +//------------------------------------------------ +// AutoFdEntity methods +//------------------------------------------------ +AutoFdEntity::AutoFdEntity() : pFdEntity(nullptr), pseudo_fd(-1) +{ +} + +AutoFdEntity::~AutoFdEntity() +{ + Close(); +} + +bool AutoFdEntity::Close() +{ + if(pFdEntity){ + if(!FdManager::get()->Close(pFdEntity, pseudo_fd)){ + S3FS_PRN_ERR("Failed to close fdentity."); + return false; + } + pFdEntity = nullptr; + pseudo_fd = -1; + } + return true; +} + +// [NOTE] +// This method touches the internal fdentity with. +// This is used to keep the file open. +// +int AutoFdEntity::Detach() +{ + if(!pFdEntity){ + S3FS_PRN_ERR("Does not have a associated FdEntity."); + return -1; + } + int fd = pseudo_fd; + pseudo_fd = -1; + pFdEntity = nullptr; + + return fd; +} + +FdEntity* AutoFdEntity::Attach(const char* path, int existfd) +{ + Close(); + + if(nullptr == (pFdEntity = FdManager::get()->GetFdEntity(path, existfd, false))){ + S3FS_PRN_DBG("Could not find fd entity object(file=%s, pseudo_fd=%d)", path, existfd); + return nullptr; + } + pseudo_fd = existfd; + return pFdEntity; +} + +FdEntity* AutoFdEntity::Open(const char* path, const headers_t* pmeta, off_t size, const struct timespec& ts_mctime, int flags, bool force_tmpfile, bool is_create, bool ignore_modify, AutoLock::Type type, int* error) +{ + Close(); + + if(nullptr == (pFdEntity = FdManager::get()->Open(pseudo_fd, path, pmeta, size, ts_mctime, flags, force_tmpfile, is_create, ignore_modify, type))){ + if(error){ + *error = pseudo_fd; + } + pseudo_fd = -1; + return nullptr; + } + return pFdEntity; +} + +// [NOTE] +// the fd obtained by this method is not a newly created pseudo fd. +// +FdEntity* AutoFdEntity::GetExistFdEntity(const char* path, int existfd) +{ + Close(); + + FdEntity* ent; + if(nullptr == (ent = FdManager::get()->GetExistFdEntity(path, existfd))){ + return nullptr; + } + return ent; +} + +FdEntity* AutoFdEntity::OpenExistFdEntity(const char* path, int flags) +{ + Close(); + + if(nullptr == (pFdEntity = FdManager::get()->OpenExistFdEntity(path, pseudo_fd, flags))){ + return nullptr; + } + return pFdEntity; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_auto.h b/s3fs/fdcache_auto.h new file mode 100644 index 0000000..c96ee41 --- /dev/null +++ b/s3fs/fdcache_auto.h @@ -0,0 +1,74 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_FDCACHE_AUTO_H_ +#define S3FS_FDCACHE_AUTO_H_ + +#include + +#include "autolock.h" +#include "metaheader.h" + +class FdEntity; + +//------------------------------------------------ +// class AutoFdEntity +//------------------------------------------------ +// A class that opens fdentry and closes it automatically. +// This class object is used to prevent inconsistencies in +// the number of references in fdentry. +// The methods are wrappers to the method of the FdManager class. +// +class AutoFdEntity +{ + private: + FdEntity* pFdEntity; + int pseudo_fd; + + private: + AutoFdEntity(const AutoFdEntity&) = delete; + AutoFdEntity(AutoFdEntity&&) = delete; + AutoFdEntity& operator=(const AutoFdEntity&) = delete; + AutoFdEntity& operator=(AutoFdEntity&&) = delete; + + public: + AutoFdEntity(); + ~AutoFdEntity(); + + bool Close(); + int Detach(); + FdEntity* Attach(const char* path, int existfd); + int GetPseudoFd() const { return pseudo_fd; } + + FdEntity* Open(const char* path, const headers_t* pmeta, off_t size, const struct timespec& ts_mctime, int flags, bool force_tmpfile, bool is_create, bool ignore_modify, AutoLock::Type type, int* error = nullptr); + FdEntity* GetExistFdEntity(const char* path, int existfd = -1); + FdEntity* OpenExistFdEntity(const char* path, int flags = O_RDONLY); +}; + +#endif // S3FS_FDCACHE_AUTO_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_entity.cpp b/s3fs/fdcache_entity.cpp new file mode 100644 index 0000000..d7d4a6a --- /dev/null +++ b/s3fs/fdcache_entity.cpp @@ -0,0 +1,2907 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "fdcache_entity.h" +#include "fdcache_stat.h" +#include "fdcache_untreated.h" +#include "fdcache.h" +#include "string_util.h" +#include "s3fs_logger.h" +#include "s3fs_util.h" +#include "autolock.h" +#include "curl.h" +#include "s3fs_cred.h" + +//------------------------------------------------ +// FdEntity class variables +//------------------------------------------------ +bool FdEntity::mixmultipart = true; +bool FdEntity::streamupload = false; + +//------------------------------------------------ +// FdEntity class methods +//------------------------------------------------ +bool FdEntity::SetNoMixMultipart() +{ + bool old = mixmultipart; + mixmultipart = false; + return old; +} + +bool FdEntity::SetStreamUpload(bool isstream) +{ + bool old = streamupload; + streamupload = isstream; + return old; +} + +int FdEntity::FillFile(int fd, unsigned char byte, off_t size, off_t start) +{ + unsigned char bytes[1024 * 32]; // 32kb + memset(bytes, byte, std::min(static_cast(sizeof(bytes)), size)); + + for(off_t total = 0, onewrote = 0; total < size; total += onewrote){ + if(-1 == (onewrote = pwrite(fd, bytes, std::min(static_cast(sizeof(bytes)), size - total), start + total))){ + S3FS_PRN_ERR("pwrite failed. errno(%d)", errno); + return -errno; + } + } + return 0; +} + +// [NOTE] +// If fd is wrong or something error is occurred, return 0. +// The ino_t is allowed zero, but inode 0 is not realistic. +// So this method returns 0 on error assuming the correct +// inode is never 0. +// The caller must have exclusive control. +// +ino_t FdEntity::GetInode(int fd) +{ + if(-1 == fd){ + S3FS_PRN_ERR("file descriptor is wrong."); + return 0; + } + + struct stat st; + if(0 != fstat(fd, &st)){ + S3FS_PRN_ERR("could not get stat for physical file descriptor(%d) by errno(%d).", fd, errno); + return 0; + } + return st.st_ino; +} + +//------------------------------------------------ +// FdEntity methods +//------------------------------------------------ +FdEntity::FdEntity(const char* tpath, const char* cpath) : + is_lock_init(false), path(SAFESTRPTR(tpath)), + physical_fd(-1), pfile(nullptr), inode(0), size_orgmeta(0), + cachepath(SAFESTRPTR(cpath)), pending_status(pending_status_t::NO_UPDATE_PENDING) +{ + holding_mtime.tv_sec = -1; + holding_mtime.tv_nsec = 0; + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + int result; + if(0 != (result = pthread_mutex_init(&fdent_lock, &attr))){ + S3FS_PRN_CRIT("failed to init fdent_lock: %d", result); + abort(); + } + if(0 != (result = pthread_mutex_init(&fdent_data_lock, &attr))){ + S3FS_PRN_CRIT("failed to init fdent_data_lock: %d", result); + abort(); + } + is_lock_init = true; +} + +FdEntity::~FdEntity() +{ + Clear(); + + if(is_lock_init){ + int result; + if(0 != (result = pthread_mutex_destroy(&fdent_data_lock))){ + S3FS_PRN_CRIT("failed to destroy fdent_data_lock: %d", result); + abort(); + } + if(0 != (result = pthread_mutex_destroy(&fdent_lock))){ + S3FS_PRN_CRIT("failed to destroy fdent_lock: %d", result); + abort(); + } + is_lock_init = false; + } +} + +void FdEntity::Clear() +{ + AutoLock auto_lock(&fdent_lock); + AutoLock auto_data_lock(&fdent_data_lock); + + pseudo_fd_map.clear(); + + if(-1 != physical_fd){ + if(!cachepath.empty()){ + // [NOTE] + // Compare the inode of the existing cache file with the inode of + // the cache file output by this object, and if they are the same, + // serialize the pagelist. + // + ino_t cur_inode = GetInode(); + if(0 != cur_inode && cur_inode == inode){ + CacheFileStat cfstat(path.c_str()); + if(!pagelist.Serialize(cfstat, true, inode)){ + S3FS_PRN_WARN("failed to save cache stat file(%s).", path.c_str()); + } + } + } + if(pfile){ + fclose(pfile); + pfile = nullptr; + } + physical_fd = -1; + inode = 0; + + if(!mirrorpath.empty()){ + if(-1 == unlink(mirrorpath.c_str())){ + S3FS_PRN_WARN("failed to remove mirror cache file(%s) by errno(%d).", mirrorpath.c_str(), errno); + } + mirrorpath.erase(); + } + } + pagelist.Init(0, false, false); + path = ""; + cachepath = ""; +} + +// [NOTE] +// This method returns the inode of the file in cachepath. +// The return value is the same as the class method GetInode(). +// The caller must have exclusive control. +// +ino_t FdEntity::GetInode() const +{ + if(cachepath.empty()){ + S3FS_PRN_INFO("cache file path is empty, then return inode as 0."); + return 0; + } + + struct stat st; + if(0 != stat(cachepath.c_str(), &st)){ + S3FS_PRN_INFO("could not get stat for file(%s) by errno(%d).", cachepath.c_str(), errno); + return 0; + } + return st.st_ino; +} + +void FdEntity::Close(int fd) +{ + AutoLock auto_lock(&fdent_lock); + + S3FS_PRN_DBG("[path=%s][pseudo_fd=%d][physical_fd=%d]", path.c_str(), fd, physical_fd); + + // search pseudo fd and close it. + fdinfo_map_t::iterator iter = pseudo_fd_map.find(fd); + if(pseudo_fd_map.end() != iter){ + pseudo_fd_map.erase(iter); + }else{ + S3FS_PRN_WARN("Not found pseudo_fd(%d) in entity object(%s)", fd, path.c_str()); + } + + // check pseudo fd count + if(-1 != physical_fd && 0 == GetOpenCount(AutoLock::ALREADY_LOCKED)){ + AutoLock auto_data_lock(&fdent_data_lock); + if(!cachepath.empty()){ + // [NOTE] + // Compare the inode of the existing cache file with the inode of + // the cache file output by this object, and if they are the same, + // serialize the pagelist. + // + ino_t cur_inode = GetInode(); + if(0 != cur_inode && cur_inode == inode){ + CacheFileStat cfstat(path.c_str()); + if(!pagelist.Serialize(cfstat, true, inode)){ + S3FS_PRN_WARN("failed to save cache stat file(%s).", path.c_str()); + } + } + } + if(pfile){ + fclose(pfile); + pfile = nullptr; + } + physical_fd = -1; + inode = 0; + + if(!mirrorpath.empty()){ + if(-1 == unlink(mirrorpath.c_str())){ + S3FS_PRN_WARN("failed to remove mirror cache file(%s) by errno(%d).", mirrorpath.c_str(), errno); + } + mirrorpath.erase(); + } + } +} + +int FdEntity::Dup(int fd, AutoLock::Type locktype) +{ + AutoLock auto_lock(&fdent_lock, locktype); + + S3FS_PRN_DBG("[path=%s][pseudo_fd=%d][physical_fd=%d][pseudo fd count=%zu]", path.c_str(), fd, physical_fd, pseudo_fd_map.size()); + + if(-1 == physical_fd){ + return -1; + } + fdinfo_map_t::iterator iter = pseudo_fd_map.find(fd); + if(pseudo_fd_map.end() == iter){ + S3FS_PRN_ERR("Not found pseudo_fd(%d) in entity object(%s) for physical_fd(%d)", fd, path.c_str(), physical_fd); + return -1; + } + const PseudoFdInfo* org_pseudoinfo = iter->second.get(); + std::unique_ptr ppseudoinfo(new PseudoFdInfo(physical_fd, (org_pseudoinfo ? org_pseudoinfo->GetFlags() : 0))); + int pseudo_fd = ppseudoinfo->GetPseudoFd(); + pseudo_fd_map[pseudo_fd] = std::move(ppseudoinfo); + + return pseudo_fd; +} + +int FdEntity::OpenPseudoFd(int flags, AutoLock::Type locktype) +{ + AutoLock auto_lock(&fdent_lock, locktype); + + S3FS_PRN_DBG("[path=%s][physical_fd=%d][pseudo fd count=%zu]", path.c_str(), physical_fd, pseudo_fd_map.size()); + + if(-1 == physical_fd){ + return -1; + } + std::unique_ptr ppseudoinfo(new PseudoFdInfo(physical_fd, flags)); + int pseudo_fd = ppseudoinfo->GetPseudoFd(); + pseudo_fd_map[pseudo_fd] = std::move(ppseudoinfo); + + return pseudo_fd; +} + +int FdEntity::GetOpenCount(AutoLock::Type locktype) const +{ + AutoLock auto_lock(&fdent_lock, locktype); + + return static_cast(pseudo_fd_map.size()); +} + +// +// Open mirror file which is linked cache file. +// +int FdEntity::OpenMirrorFile() +{ + if(cachepath.empty()){ + S3FS_PRN_ERR("cache path is empty, why come here"); + return -EIO; + } + + // make temporary directory + std::string bupdir; + if(!FdManager::MakeCachePath(nullptr, bupdir, true, true)){ + S3FS_PRN_ERR("could not make bup cache directory path or create it."); + return -EIO; + } + + // create seed generating mirror file name + unsigned int seed = static_cast(time(nullptr)); + int urandom_fd; + if(-1 != (urandom_fd = open("/dev/urandom", O_RDONLY))){ + unsigned int rand_data; + if(sizeof(rand_data) == read(urandom_fd, &rand_data, sizeof(rand_data))){ + seed ^= rand_data; + } + close(urandom_fd); + } + + // try to link mirror file + while(true){ + // make random(temp) file path + // (do not care for threading, because allowed any value returned.) + // + char szfile[NAME_MAX + 1]; + snprintf(szfile, sizeof(szfile), "%x.tmp", rand_r(&seed)); + szfile[NAME_MAX] = '\0'; // for safety + mirrorpath = bupdir + "/" + szfile; + + // link mirror file to cache file + if(0 == link(cachepath.c_str(), mirrorpath.c_str())){ + break; + } + if(EEXIST != errno){ + S3FS_PRN_ERR("could not link mirror file(%s) to cache file(%s) by errno(%d).", mirrorpath.c_str(), cachepath.c_str(), errno); + return -errno; + } + ++seed; + } + + // open mirror file + int mirrorfd; + if(-1 == (mirrorfd = open(mirrorpath.c_str(), O_RDWR))){ + S3FS_PRN_ERR("could not open mirror file(%s) by errno(%d).", mirrorpath.c_str(), errno); + return -errno; + } + return mirrorfd; +} + +bool FdEntity::FindPseudoFd(int fd, AutoLock::Type locktype) const +{ + AutoLock auto_lock(&fdent_lock, locktype); + + if(-1 == fd){ + return false; + } + if(pseudo_fd_map.end() == pseudo_fd_map.find(fd)){ + return false; + } + return true; +} + +PseudoFdInfo* FdEntity::CheckPseudoFdFlags(int fd, bool writable, AutoLock::Type locktype) +{ + AutoLock auto_lock(&fdent_lock, locktype); + + if(-1 == fd){ + return nullptr; + } + fdinfo_map_t::iterator iter = pseudo_fd_map.find(fd); + if(pseudo_fd_map.end() == iter || nullptr == iter->second){ + return nullptr; + } + if(writable){ + if(!iter->second->Writable()){ + return nullptr; + } + }else{ + if(!iter->second->Readable()){ + return nullptr; + } + } + return iter->second.get(); +} + +bool FdEntity::IsUploading(AutoLock::Type locktype) +{ + AutoLock auto_lock(&fdent_lock, locktype); + + for(fdinfo_map_t::const_iterator iter = pseudo_fd_map.begin(); iter != pseudo_fd_map.end(); ++iter){ + const PseudoFdInfo* ppseudoinfo = iter->second.get(); + if(ppseudoinfo && ppseudoinfo->IsUploading()){ + return true; + } + } + return false; +} + +// [NOTE] +// If the open is successful, returns pseudo fd. +// If it fails, it returns an error code with a negative value. +// +// ts_mctime argument is a variable for mtime/ctime. +// If you want to disable this variable, specify UTIME_OMIT for +// tv_nsec in timespec member(in this case tv_sec member is ignored). +// This is similar to utimens operation. +// You can use "S3FS_OMIT_TS" global variable for UTIME_OMIT. +// +int FdEntity::Open(const headers_t* pmeta, off_t size, const struct timespec& ts_mctime, int flags, AutoLock::Type type) +{ + AutoLock auto_lock(&fdent_lock, type); + + S3FS_PRN_DBG("[path=%s][physical_fd=%d][size=%lld][ts_mctime=%s][flags=0x%x]", path.c_str(), physical_fd, static_cast(size), str(ts_mctime).c_str(), flags); + + if (!auto_lock.isLockAcquired()) { + // had to wait for fd lock, return + S3FS_PRN_ERR("Could not get lock."); + return -EIO; + } + + AutoLock auto_data_lock(&fdent_data_lock); + + // [NOTE] + // When the file size is incremental by truncating, it must be keeped + // as an untreated area, and this area is set to these variables. + // + off_t truncated_start = 0; + off_t truncated_size = 0; + + if(-1 != physical_fd){ + // + // already open file + // + + // check only file size(do not need to save cfs and time. + if(0 <= size && pagelist.Size() != size){ + // truncate temporary file size + if(-1 == ftruncate(physical_fd, size) || -1 == fsync(physical_fd)){ + S3FS_PRN_ERR("failed to truncate temporary file(physical_fd=%d) by errno(%d).", physical_fd, errno); + return -errno; + } + // resize page list + if(!pagelist.Resize(size, false, true)){ // Areas with increased size are modified + S3FS_PRN_ERR("failed to truncate temporary file information(physical_fd=%d).", physical_fd); + return -EIO; + } + } + + // set untreated area + if(0 <= size && size_orgmeta < size){ + // set untreated area + truncated_start = size_orgmeta; + truncated_size = size - size_orgmeta; + } + + // set original headers and set size. + off_t new_size = (0 <= size ? size : size_orgmeta); + if(pmeta){ + orgmeta = *pmeta; + size_orgmeta = get_size(orgmeta); + } + if(new_size < size_orgmeta){ + size_orgmeta = new_size; + } + + }else{ + // + // file is not opened yet + // + bool need_save_csf = false; // need to save(reset) cache stat file + bool is_truncate = false; // need to truncate + + std::unique_ptr pcfstat; + + if(!cachepath.empty()){ + // using cache + struct stat st; + if(stat(cachepath.c_str(), &st) == 0){ + if(0 > compare_timespec(st, stat_time_type::MTIME, ts_mctime)){ + S3FS_PRN_DBG("cache file stale, removing: %s", cachepath.c_str()); + if(unlink(cachepath.c_str()) != 0){ + return (0 == errno ? -EIO : -errno); + } + } + } + + // open cache and cache stat file, load page info. + pcfstat.reset(new CacheFileStat(path.c_str())); + // try to open cache file + if( -1 != (physical_fd = open(cachepath.c_str(), O_RDWR)) && + 0 != (inode = FdEntity::GetInode(physical_fd)) && + pagelist.Serialize(*pcfstat, false, inode) ) + { + // succeed to open cache file and to load stats data + memset(&st, 0, sizeof(struct stat)); + if(-1 == fstat(physical_fd, &st)){ + S3FS_PRN_ERR("fstat is failed. errno(%d)", errno); + physical_fd = -1; + inode = 0; + return (0 == errno ? -EIO : -errno); + } + // check size, st_size, loading stat file + if(-1 == size){ + if(st.st_size != pagelist.Size()){ + pagelist.Resize(st.st_size, false, true); // Areas with increased size are modified + need_save_csf = true; // need to update page info + } + size = st.st_size; + }else{ + // First if the current cache file size and pagelist do not match, fix pagelist. + if(st.st_size != pagelist.Size()){ + pagelist.Resize(st.st_size, false, true); // Areas with increased size are modified + need_save_csf = true; // need to update page info + } + if(size != pagelist.Size()){ + pagelist.Resize(size, false, true); // Areas with increased size are modified + need_save_csf = true; // need to update page info + } + if(size != st.st_size){ + is_truncate = true; + } + } + + }else{ + if(-1 != physical_fd){ + close(physical_fd); + } + inode = 0; + // could not open cache file or could not load stats data, so initialize it. + if(-1 == (physical_fd = open(cachepath.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0600))){ + S3FS_PRN_ERR("failed to open file(%s). errno(%d)", cachepath.c_str(), errno); + + // remove cache stat file if it is existed + int result; + if(0 != (result = CacheFileStat::DeleteCacheFileStat(path.c_str()))){ + if(-ENOENT != result){ + S3FS_PRN_WARN("failed to delete current cache stat file(%s) by errno(%d), but continue...", path.c_str(), result); + } + } + return result; + } + need_save_csf = true; // need to update page info + inode = FdEntity::GetInode(physical_fd); + if(-1 == size){ + size = 0; + pagelist.Init(0, false, false); + }else{ + // [NOTE] + // The modify flag must not be set when opening a file, + // if the ts_mctime parameter(mtime) is specified(tv_nsec != UTIME_OMIT) + // and the cache file does not exist. + // If mtime is specified for the file and the cache file + // mtime is older than it, the cache file is removed and + // the processing comes here. + // + pagelist.Resize(size, false, (UTIME_OMIT == ts_mctime.tv_nsec ? true : false)); + + is_truncate = true; + } + } + + // open mirror file + int mirrorfd; + if(0 >= (mirrorfd = OpenMirrorFile())){ + S3FS_PRN_ERR("failed to open mirror file linked cache file(%s).", cachepath.c_str()); + return (0 == mirrorfd ? -EIO : mirrorfd); + } + // switch fd + close(physical_fd); + physical_fd = mirrorfd; + + // make file pointer(for being same tmpfile) + if(nullptr == (pfile = fdopen(physical_fd, "wb"))){ + S3FS_PRN_ERR("failed to get fileno(%s). errno(%d)", cachepath.c_str(), errno); + close(physical_fd); + physical_fd = -1; + inode = 0; + return (0 == errno ? -EIO : -errno); + } + + }else{ + // not using cache + inode = 0; + + // open temporary file + if(nullptr == (pfile = FdManager::MakeTempFile()) || -1 ==(physical_fd = fileno(pfile))){ + S3FS_PRN_ERR("failed to open temporary file by errno(%d)", errno); + if(pfile){ + fclose(pfile); + pfile = nullptr; + } + return (0 == errno ? -EIO : -errno); + } + if(-1 == size){ + size = 0; + pagelist.Init(0, false, false); + }else{ + // [NOTE] + // The modify flag must not be set when opening a file, + // if the ts_mctime parameter(mtime) is specified(tv_nsec != UTIME_OMIT) + // and the cache file does not exist. + // If mtime is specified for the file and the cache file + // mtime is older than it, the cache file is removed and + // the processing comes here. + // + pagelist.Resize(size, false, (UTIME_OMIT == ts_mctime.tv_nsec ? true : false)); + + is_truncate = true; + } + } + + // truncate cache(tmp) file + if(is_truncate){ + if(0 != ftruncate(physical_fd, size) || 0 != fsync(physical_fd)){ + S3FS_PRN_ERR("ftruncate(%s) or fsync returned err(%d)", cachepath.c_str(), errno); + fclose(pfile); + pfile = nullptr; + physical_fd = -1; + inode = 0; + return (0 == errno ? -EIO : -errno); + } + } + + // reset cache stat file + if(need_save_csf && pcfstat.get()){ + if(!pagelist.Serialize(*pcfstat, true, inode)){ + S3FS_PRN_WARN("failed to save cache stat file(%s), but continue...", path.c_str()); + } + } + + // set original headers and size in it. + if(pmeta){ + orgmeta = *pmeta; + size_orgmeta = get_size(orgmeta); + }else{ + orgmeta.clear(); + size_orgmeta = 0; + } + + // set untreated area + if(0 <= size && size_orgmeta < size){ + truncated_start = size_orgmeta; + truncated_size = size - size_orgmeta; + } + + // set mtime and ctime(set "x-amz-meta-mtime" and "x-amz-meta-ctime" in orgmeta) + if(UTIME_OMIT != ts_mctime.tv_nsec){ + if(0 != SetMCtime(ts_mctime, ts_mctime, AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("failed to set mtime/ctime. errno(%d)", errno); + fclose(pfile); + pfile = nullptr; + physical_fd = -1; + inode = 0; + return (0 == errno ? -EIO : -errno); + } + } + } + + // create new pseudo fd, and set it to map + std::unique_ptr ppseudoinfo(new PseudoFdInfo(physical_fd, flags)); + int pseudo_fd = ppseudoinfo->GetPseudoFd(); + pseudo_fd_map[pseudo_fd] = std::move(ppseudoinfo); + + // if there is untreated area, set it to pseudo object. + if(0 < truncated_size){ + if(!AddUntreated(truncated_start, truncated_size)){ + pseudo_fd_map.erase(pseudo_fd); + if(pfile){ + fclose(pfile); + pfile = nullptr; + } + } + } + + return pseudo_fd; +} + +// [NOTE] +// This method is called for only nocopyapi functions. +// So we do not check disk space for this option mode, if there is no enough +// disk space this method will be failed. +// +bool FdEntity::LoadAll(int fd, headers_t* pmeta, off_t* size, bool force_load) +{ + AutoLock auto_lock(&fdent_lock); + + S3FS_PRN_INFO3("[path=%s][pseudo_fd=%d][physical_fd=%d]", path.c_str(), fd, physical_fd); + + if(-1 == physical_fd || !FindPseudoFd(fd, AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("pseudo_fd(%d) and physical_fd(%d) for path(%s) is not opened yet", fd, physical_fd, path.c_str()); + return false; + } + + AutoLock auto_data_lock(&fdent_data_lock); + + if(force_load){ + SetAllStatusUnloaded(); + } + // + // TODO: possibly do background for delay loading + // + int result; + if(0 != (result = Load(/*start=*/ 0, /*size=*/ 0, AutoLock::ALREADY_LOCKED))){ + S3FS_PRN_ERR("could not download, result(%d)", result); + return false; + } + if(size){ + *size = pagelist.Size(); + } + return true; +} + +// +// Rename file path. +// +// This method sets the FdManager::fent map registration key to fentmapkey. +// +// [NOTE] +// This method changes the file path of FdEntity. +// Old file is deleted after linking to the new file path, and this works +// without problem because the file descriptor is not affected even if the +// cache file is open. +// The mirror file descriptor is also the same. The mirror file path does +// not need to be changed and will remain as it is. +// +bool FdEntity::RenamePath(const std::string& newpath, std::string& fentmapkey) +{ + if(!cachepath.empty()){ + // has cache path + + // make new cache path + std::string newcachepath; + if(!FdManager::MakeCachePath(newpath.c_str(), newcachepath, true)){ + S3FS_PRN_ERR("failed to make cache path for object(%s).", newpath.c_str()); + return false; + } + + // rename cache file + if(-1 == rename(cachepath.c_str(), newcachepath.c_str())){ + S3FS_PRN_ERR("failed to rename old cache path(%s) to new cache path(%s) by errno(%d).", cachepath.c_str(), newcachepath.c_str(), errno); + return false; + } + + // link and unlink cache file stat + if(!CacheFileStat::RenameCacheFileStat(path.c_str(), newpath.c_str())){ + S3FS_PRN_ERR("failed to rename cache file stat(%s to %s).", path.c_str(), newpath.c_str()); + return false; + } + fentmapkey = newpath; + cachepath = newcachepath; + + }else{ + // does not have cache path + fentmapkey.erase(); + FdManager::MakeRandomTempPath(newpath.c_str(), fentmapkey); + } + // set new path + path = newpath; + + return true; +} + +bool FdEntity::IsModified() const +{ + if(use_newcache){ + return GetUpdateMark(); + } + + AutoLock auto_lock(&fdent_lock); + AutoLock auto_data_lock2(&fdent_data_lock); + return pagelist.IsModified(); +} + +bool FdEntity::GetStats(struct stat& st, AutoLock::Type locktype) const +{ + AutoLock auto_lock(&fdent_lock, locktype); + if(-1 == physical_fd){ + return false; + } + + memset(&st, 0, sizeof(struct stat)); + if(-1 == fstat(physical_fd, &st)){ + S3FS_PRN_ERR("fstat failed. errno(%d)", errno); + return false; + } + + if(use_newcache){ + st.st_size = GetRealsize(); + } + + return true; +} + +int FdEntity::SetCtime(struct timespec time, AutoLock::Type locktype) +{ + AutoLock auto_lock(&fdent_lock, locktype); + + S3FS_PRN_INFO3("[path=%s][physical_fd=%d][time=%s]", path.c_str(), physical_fd, str(time).c_str()); + + if(-1 == time.tv_sec){ + return 0; + } + orgmeta["x-amz-meta-ctime"] = str(time); + return 0; +} + +int FdEntity::SetAtime(struct timespec time, AutoLock::Type locktype) +{ + AutoLock auto_lock(&fdent_lock, locktype); + + S3FS_PRN_INFO3("[path=%s][physical_fd=%d][time=%s]", path.c_str(), physical_fd, str(time).c_str()); + + if(-1 == time.tv_sec){ + return 0; + } + orgmeta["x-amz-meta-atime"] = str(time); + return 0; +} + +// [NOTE] +// This method updates mtime as well as ctime. +// +int FdEntity::SetMCtime(struct timespec mtime, struct timespec ctime, AutoLock::Type locktype) +{ + AutoLock auto_lock(&fdent_lock, locktype); + + S3FS_PRN_INFO3("[path=%s][physical_fd=%d][mtime=%s][ctime=%s]", path.c_str(), physical_fd, str(mtime).c_str(), str(ctime).c_str()); + + if(mtime.tv_sec < 0 || ctime.tv_sec < 0){ + return 0; + } + + if(-1 != physical_fd){ + struct timespec ts[2]; + ts[0].tv_sec = mtime.tv_sec; + ts[0].tv_nsec = mtime.tv_nsec; + ts[1].tv_sec = ctime.tv_sec; + ts[1].tv_nsec = ctime.tv_nsec; + if(-1 == futimens(physical_fd, ts)){ + S3FS_PRN_ERR("futimens failed. errno(%d)", errno); + return -errno; + } + }else if(!cachepath.empty()){ + // not opened file yet. + struct timespec ts[2]; + ts[0].tv_sec = ctime.tv_sec; + ts[0].tv_nsec = ctime.tv_nsec; + ts[1].tv_sec = mtime.tv_sec; + ts[1].tv_nsec = mtime.tv_nsec; + if(-1 == utimensat(AT_FDCWD, cachepath.c_str(), ts, 0)){ + S3FS_PRN_ERR("utimensat failed. errno(%d)", errno); + return -errno; + } + } + + orgmeta["x-amz-meta-mtime"] = str(mtime); + orgmeta["x-amz-meta-ctime"] = str(ctime); + + return 0; +} + +bool FdEntity::UpdateCtime() +{ + AutoLock auto_lock(&fdent_lock); + struct stat st; + if(!GetStats(st, AutoLock::ALREADY_LOCKED)){ + return false; + } + + orgmeta["x-amz-meta-ctime"] = str_stat_time(st, stat_time_type::CTIME); + + return true; +} + +bool FdEntity::UpdateAtime() +{ + AutoLock auto_lock(&fdent_lock); + struct stat st; + if(!GetStats(st, AutoLock::ALREADY_LOCKED)){ + return false; + } + + orgmeta["x-amz-meta-atime"] = str_stat_time(st, stat_time_type::ATIME); + + return true; +} + +bool FdEntity::UpdateMtime(bool clear_holding_mtime) +{ + AutoLock auto_lock(&fdent_lock); + + if(0 <= holding_mtime.tv_sec){ + // [NOTE] + // This conditional statement is very special. + // If you copy a file with "cp -p" etc., utimens or chown will be + // called after opening the file, after that call to write, flush. + // If normally utimens are not called(cases like "cp" only), mtime + // should be updated at the file flush. + // Here, check the holding_mtime value to prevent mtime from being + // overwritten. + // + if(clear_holding_mtime){ + if(!ClearHoldingMtime(AutoLock::ALREADY_LOCKED)){ + return false; + } + // [NOTE] + // If come here after fdatasync has been processed, the file + // content update has already taken place. However, the metadata + // update is necessary and needs to be flagged in order to + // perform it with flush, + // + pending_status = pending_status_t::UPDATE_META_PENDING; + } + }else{ + struct stat st; + if(!GetStats(st, AutoLock::ALREADY_LOCKED)){ + return false; + } + orgmeta["x-amz-meta-mtime"] = str_stat_time(st, stat_time_type::MTIME); + } + return true; +} + +bool FdEntity::SetHoldingMtime(struct timespec mtime, AutoLock::Type locktype) +{ + AutoLock auto_lock(&fdent_lock, locktype); + + S3FS_PRN_INFO3("[path=%s][physical_fd=%d][mtime=%s]", path.c_str(), physical_fd, str(mtime).c_str()); + + if(mtime.tv_sec < 0){ + return false; + } + holding_mtime = mtime; + return true; +} + +bool FdEntity::ClearHoldingMtime(AutoLock::Type locktype) +{ + AutoLock auto_lock(&fdent_lock, locktype); + + if(holding_mtime.tv_sec < 0){ + return false; + } + struct stat st; + if(!GetStats(st, AutoLock::ALREADY_LOCKED)){ + return false; + } + if(-1 != physical_fd){ + struct timespec ts[2]; + struct timespec ts_ctime; + + ts[0].tv_sec = holding_mtime.tv_sec; + ts[0].tv_nsec = holding_mtime.tv_nsec; + + set_stat_to_timespec(st, stat_time_type::CTIME, ts_ctime); + ts[1].tv_sec = ts_ctime.tv_sec; + ts[1].tv_nsec = ts_ctime.tv_nsec; + + if(-1 == futimens(physical_fd, ts)){ + S3FS_PRN_ERR("futimens failed. errno(%d)", errno); + return false; + } + }else if(!cachepath.empty()){ + // not opened file yet. + struct timespec ts[2]; + struct timespec ts_ctime; + + set_stat_to_timespec(st, stat_time_type::CTIME, ts_ctime); + ts[0].tv_sec = ts_ctime.tv_sec; + ts[0].tv_nsec = ts_ctime.tv_nsec; + + ts[1].tv_sec = holding_mtime.tv_sec; + ts[1].tv_nsec = holding_mtime.tv_nsec; + if(-1 == utimensat(AT_FDCWD, cachepath.c_str(), ts, 0)){ + S3FS_PRN_ERR("utimensat failed. errno(%d)", errno); + return false; + } + } + holding_mtime.tv_sec = -1; + holding_mtime.tv_nsec = 0; + + return true; +} + +bool FdEntity::GetSize(off_t& size) const +{ + AutoLock auto_lock(&fdent_lock); + if(-1 == physical_fd){ + return false; + } + + if(use_newcache){ + size = GetRealsize(); + return true; + } + + AutoLock auto_data_lock(&fdent_data_lock); + size = pagelist.Size(); + + return true; +} + +bool FdEntity::GetXattr(std::string& xattr) const +{ + AutoLock auto_lock(&fdent_lock); + + headers_t::const_iterator iter = orgmeta.find("x-amz-meta-xattr"); + if(iter == orgmeta.end()){ + return false; + } + xattr = iter->second; + return true; +} + +bool FdEntity::SetXattr(const std::string& xattr) +{ + AutoLock auto_lock(&fdent_lock); + orgmeta["x-amz-meta-xattr"] = xattr; + return true; +} + +bool FdEntity::SetMode(mode_t mode) +{ + AutoLock auto_lock(&fdent_lock); + orgmeta["x-amz-meta-mode"] = std::to_string(mode); + return true; +} + +bool FdEntity::SetUId(uid_t uid) +{ + AutoLock auto_lock(&fdent_lock); + orgmeta["x-amz-meta-uid"] = std::to_string(uid); + return true; +} + +bool FdEntity::SetGId(gid_t gid) +{ + AutoLock auto_lock(&fdent_lock); + orgmeta["x-amz-meta-gid"] = std::to_string(gid); + return true; +} + +bool FdEntity::SetContentType(const char* path) +{ + if(!path){ + return false; + } + AutoLock auto_lock(&fdent_lock); + orgmeta["Content-Type"] = S3fsCurl::LookupMimeType(path); + return true; +} + +bool FdEntity::SetAllStatus(bool is_loaded) +{ + S3FS_PRN_INFO3("[path=%s][physical_fd=%d][%s]", path.c_str(), physical_fd, is_loaded ? "loaded" : "unloaded"); + + if(-1 == physical_fd){ + return false; + } + // [NOTE] + // this method is only internal use, and calling after locking. + // so do not lock now. + // + //AutoLock auto_lock(&fdent_lock); + + // get file size + struct stat st; + memset(&st, 0, sizeof(struct stat)); + if(-1 == fstat(physical_fd, &st)){ + S3FS_PRN_ERR("fstat is failed. errno(%d)", errno); + return false; + } + // Reinit + pagelist.Init(st.st_size, is_loaded, false); + + return true; +} + +int FdEntity::Load(off_t start, off_t size, AutoLock::Type type, bool is_modified_flag) +{ + AutoLock auto_lock(&fdent_lock, type); + + S3FS_PRN_DBG("[path=%s][physical_fd=%d][offset=%lld][size=%lld]", path.c_str(), physical_fd, static_cast(start), static_cast(size)); + + if(-1 == physical_fd){ + return -EBADF; + } + AutoLock auto_data_lock(&fdent_data_lock, type); + + int result = 0; + + // check loaded area & load + fdpage_list_t unloaded_list; + if(0 < pagelist.GetUnloadedPages(unloaded_list, start, size)){ + for(fdpage_list_t::iterator iter = unloaded_list.begin(); iter != unloaded_list.end(); ++iter){ + if(0 != size && start + size <= iter->offset){ + // reached end + break; + } + // check loading size + off_t need_load_size = 0; + if(iter->offset < size_orgmeta){ + // original file size(on S3) is smaller than request. + need_load_size = (iter->next() <= size_orgmeta ? iter->bytes : (size_orgmeta - iter->offset)); + } + + // download + if(S3fsCurl::GetMultipartSize() <= need_load_size && !nomultipart){ + // parallel request + result = S3fsCurl::ParallelGetObjectRequest(path.c_str(), physical_fd, iter->offset, need_load_size); + }else{ + // single request + if(0 < need_load_size){ + S3fsCurl s3fscurl; + result = s3fscurl.GetObjectRequest(path.c_str(), physical_fd, iter->offset, need_load_size); + }else{ + result = 0; + } + } + + if(0 != result){ + break; + } + // Set loaded flag + pagelist.SetPageLoadedStatus(iter->offset, iter->bytes, (is_modified_flag ? PageList::page_status::LOAD_MODIFIED : PageList::page_status::LOADED)); + } + PageList::FreeList(unloaded_list); + } + return result; +} + +int FdEntity::LoadByAdaptor(off_t start, off_t size, AutoLock::Type type, std::shared_ptr dataAdaptor, bool is_modified_flag) +{ + AutoLock auto_lock(&fdent_lock, type); + + S3FS_PRN_DBG("[path=%s][physical_fd=%d][offset=%lld][size=%lld]", path.c_str(), physical_fd, static_cast(start), static_cast(size)); + + if(-1 == physical_fd){ + return -EBADF; + } + AutoLock auto_data_lock(&fdent_data_lock, type); + + int result = 0; + + // check loaded area & load + fdpage_list_t unloaded_list; + if(0 < pagelist.GetUnloadedPages(unloaded_list, start, size)){ + for(fdpage_list_t::iterator iter = unloaded_list.begin(); iter != unloaded_list.end(); ++iter){ + if(0 != size && start + size <= iter->offset){ + // reached end + break; + } + // check loading size + off_t need_load_size = 0; + if(iter->offset < size_orgmeta){ + // original file size(on S3) is smaller than request. + need_load_size = (iter->next() <= size_orgmeta ? iter->bytes : (size_orgmeta - iter->offset)); + } + + if(0 < need_load_size){ + std::unique_ptr buf(new char[need_load_size]); + HybridCache::ByteBuffer buffer(buf.get(), need_load_size); + result = dataAdaptor->DownLoad(path, iter->offset, need_load_size, buffer).get(); + if(0 == result){ + WriteCache(buffer.data, iter->offset, need_load_size, type); + } + }else{ + result = 0; + } + if(0 != result){ + break; + } + // Set loaded flag + pagelist.SetPageLoadedStatus(iter->offset, iter->bytes, (is_modified_flag ? PageList::page_status::LOAD_MODIFIED : PageList::page_status::LOADED)); + } + PageList::FreeList(unloaded_list); + } + return result; +} + +// [NOTE] +// At no disk space for caching object. +// This method is downloading by dividing an object of the specified range +// and uploading by multipart after finishing downloading it. +// +// [NOTICE] +// Need to lock before calling this method. +// +int FdEntity::NoCacheLoadAndPost(PseudoFdInfo* pseudo_obj, off_t start, off_t size) +{ + int result = 0; + + S3FS_PRN_INFO3("[path=%s][physical_fd=%d][offset=%lld][size=%lld]", path.c_str(), physical_fd, static_cast(start), static_cast(size)); + + if(!pseudo_obj){ + S3FS_PRN_ERR("Pseudo object is nullptr."); + return -EIO; + } + + if(-1 == physical_fd){ + return -EBADF; + } + + // [NOTE] + // This method calling means that the cache file is never used no more. + // + if(!cachepath.empty()){ + // remove cache files(and cache stat file) + FdManager::DeleteCacheFile(path.c_str()); + // cache file path does not use no more. + cachepath.erase(); + mirrorpath.erase(); + } + + // Change entity key in manager mapping + FdManager::get()->ChangeEntityToTempPath(this, path.c_str()); + + // open temporary file + int tmpfd; + std::unique_ptr ptmpfp(FdManager::MakeTempFile(), &s3fs_fclose); + if(nullptr == ptmpfp || -1 == (tmpfd = fileno(ptmpfp.get()))){ + S3FS_PRN_ERR("failed to open temporary file by errno(%d)", errno); + return (0 == errno ? -EIO : -errno); + } + + // loop uploading by multipart + for(fdpage_list_t::iterator iter = pagelist.pages.begin(); iter != pagelist.pages.end(); ++iter){ + if(iter->end() < start){ + continue; + } + if(0 != size && start + size <= iter->offset){ + break; + } + // download each multipart size(default 10MB) in unit + for(off_t oneread = 0, totalread = (iter->offset < start ? start : 0); totalread < static_cast(iter->bytes); totalread += oneread){ + int upload_fd = physical_fd; + off_t offset = iter->offset + totalread; + oneread = std::min(static_cast(iter->bytes) - totalread, S3fsCurl::GetMultipartSize()); + + // check rest size is over minimum part size + // + // [NOTE] + // If the final part size is smaller than 5MB, it is not allowed by S3 API. + // For this case, if the previous part of the final part is not over 5GB, + // we incorporate the final part to the previous part. If the previous part + // is over 5GB, we want to even out the last part and the previous part. + // + if((iter->bytes - totalread - oneread) < MIN_MULTIPART_SIZE){ + if(FIVE_GB < iter->bytes - totalread){ + oneread = (iter->bytes - totalread) / 2; + }else{ + oneread = iter->bytes - totalread; + } + } + + if(!iter->loaded){ + // + // loading or initializing + // + upload_fd = tmpfd; + + // load offset & size + size_t need_load_size = 0; + if(size_orgmeta <= offset){ + // all area is over of original size + need_load_size = 0; + }else{ + if(size_orgmeta < (offset + oneread)){ + // original file size(on S3) is smaller than request. + need_load_size = size_orgmeta - offset; + }else{ + need_load_size = oneread; + } + } + size_t over_size = oneread - need_load_size; + + // [NOTE] + // truncate file to zero and set length to part offset + size + // after this, file length is (offset + size), but file does not use any disk space. + // + if(-1 == ftruncate(tmpfd, 0) || -1 == ftruncate(tmpfd, (offset + oneread))){ + S3FS_PRN_ERR("failed to truncate temporary file(physical_fd=%d).", tmpfd); + result = -EIO; + break; + } + + // single area get request + if(0 < need_load_size){ + S3fsCurl s3fscurl; + if(0 != (result = s3fscurl.GetObjectRequest(path.c_str(), tmpfd, offset, oneread))){ + S3FS_PRN_ERR("failed to get object(start=%lld, size=%lld) for file(physical_fd=%d).", static_cast(offset), static_cast(oneread), tmpfd); + break; + } + } + // initialize fd without loading + if(0 < over_size){ + if(0 != (result = FdEntity::FillFile(tmpfd, 0, over_size, offset + need_load_size))){ + S3FS_PRN_ERR("failed to fill rest bytes for physical_fd(%d). errno(%d)", tmpfd, result); + break; + } + } + }else{ + // already loaded area + } + // single area upload by multipart post + if(0 != (result = NoCacheMultipartPost(pseudo_obj, upload_fd, offset, oneread))){ + S3FS_PRN_ERR("failed to multipart post(start=%lld, size=%lld) for file(physical_fd=%d).", static_cast(offset), static_cast(oneread), upload_fd); + break; + } + } + if(0 != result){ + break; + } + + // set loaded flag + if(!iter->loaded){ + if(iter->offset < start){ + fdpage page(iter->offset, start - iter->offset, iter->loaded, false); + iter->bytes -= (start - iter->offset); + iter->offset = start; + pagelist.pages.insert(iter, page); + } + if(0 != size && start + size < iter->next()){ + fdpage page(iter->offset, start + size - iter->offset, true, false); + iter->bytes -= (start + size - iter->offset); + iter->offset = start + size; + pagelist.pages.insert(iter, page); + }else{ + iter->loaded = true; + iter->modified = false; + } + } + } + if(0 == result){ + // compress pagelist + pagelist.Compress(); + + // fd data do empty + if(-1 == ftruncate(physical_fd, 0)){ + S3FS_PRN_ERR("failed to truncate file(physical_fd=%d), but continue...", physical_fd); + } + } + + return result; +} + +// [NOTE] +// At no disk space for caching object. +// This method is starting multipart uploading. +// +int FdEntity::NoCachePreMultipartPost(PseudoFdInfo* pseudo_obj) +{ + if(!pseudo_obj){ + S3FS_PRN_ERR("Internal error, pseudo fd object pointer is null."); + return -EIO; + } + + // initialize multipart upload values + pseudo_obj->ClearUploadInfo(true); + + S3fsCurl s3fscurl(true); + std::string upload_id; + int result; + if(0 != (result = s3fscurl.PreMultipartPostRequest(path.c_str(), orgmeta, upload_id, false))){ + return result; + } + s3fscurl.DestroyCurlHandle(); + + // Clear the dirty flag, because the meta data is updated. + pending_status = pending_status_t::NO_UPDATE_PENDING; + + // reset upload_id + if(!pseudo_obj->InitialUploadInfo(upload_id)){ + return -EIO; + } + return 0; +} + +// [NOTE] +// At no disk space for caching object. +// This method is uploading one part of multipart. +// +int FdEntity::NoCacheMultipartPost(PseudoFdInfo* pseudo_obj, int tgfd, off_t start, off_t size) +{ + if(-1 == tgfd || !pseudo_obj || !pseudo_obj->IsUploading()){ + S3FS_PRN_ERR("Need to initialize for multipart post."); + return -EIO; + } + + // get upload id + std::string upload_id; + if(!pseudo_obj->GetUploadId(upload_id)){ + return -EIO; + } + + // append new part and get it's etag string pointer + etagpair* petagpair = nullptr; + if(!pseudo_obj->AppendUploadPart(start, size, false, &petagpair)){ + return -EIO; + } + + S3fsCurl s3fscurl(true); + return s3fscurl.MultipartUploadRequest(upload_id, path.c_str(), tgfd, start, size, petagpair); +} + +// [NOTE] +// At no disk space for caching object. +// This method is finishing multipart uploading. +// +int FdEntity::NoCacheCompleteMultipartPost(PseudoFdInfo* pseudo_obj) +{ + etaglist_t etaglist; + if(!pseudo_obj || !pseudo_obj->IsUploading() || !pseudo_obj->GetEtaglist(etaglist)){ + S3FS_PRN_ERR("There is no upload id or etag list."); + return -EIO; + } + + // get upload id + std::string upload_id; + if(!pseudo_obj->GetUploadId(upload_id)){ + return -EIO; + } + + S3fsCurl s3fscurl(true); + int result = s3fscurl.CompleteMultipartPostRequest(path.c_str(), upload_id, etaglist); + s3fscurl.DestroyCurlHandle(); + if(0 != result){ + S3fsCurl s3fscurl_abort(true); + int result2 = s3fscurl.AbortMultipartUpload(path.c_str(), upload_id); + s3fscurl_abort.DestroyCurlHandle(); + if(0 != result2){ + S3FS_PRN_ERR("failed to abort multipart upload by errno(%d)", result2); + } + return result; + } + + // clear multipart upload info + untreated_list.ClearAll(); + pseudo_obj->ClearUploadInfo(); + + return 0; +} + +off_t FdEntity::BytesModified() +{ + AutoLock auto_lock(&fdent_lock); + AutoLock auto_lock2(&fdent_data_lock); + return pagelist.BytesModified(); +} + +// [NOTE] +// There are conditions that allow you to perform multipart uploads. +// +// According to the AWS spec: +// - 1 to 10,000 parts are allowed +// - minimum size of parts is 5MB (except for the last part) +// +// For example, if you set the minimum part size to 5MB, you can upload +// a maximum (5 * 10,000)MB file. +// The part size can be changed in MB units, then the maximum file size +// that can be handled can be further increased. +// Files smaller than the minimum part size will not be multipart uploaded, +// but will be uploaded as single part(normally). +// +int FdEntity::RowFlush(int fd, const char* tpath, AutoLock::Type type, bool force_sync, bool force_tmpfile) +{ + AutoLock auto_lock(&fdent_lock, type); + + S3FS_PRN_INFO3("[tpath=%s][path=%s][pseudo_fd=%d][physical_fd=%d]", SAFESTRPTR(tpath), path.c_str(), fd, physical_fd); + + if(-1 == physical_fd){ + return -EBADF; + } + + // check pseudo fd and its flag + fdinfo_map_t::iterator miter = pseudo_fd_map.find(fd); + if(pseudo_fd_map.end() == miter || nullptr == miter->second){ + return -EBADF; + } + if(!miter->second->Writable() && !(miter->second->GetFlags() & O_CREAT)){ + // If the entity is opened read-only, it will end normally without updating. + return 0; + } + + if(use_newcache && !force_tmpfile){ + if(!force_sync && !GetUpdateMark() && !IsDirtyMetadata()){ + S3FS_PRN_WARN("Nothing to update[path=%s][pseudo_fd=%d][physical_fd=%d]", path.c_str(), fd, physical_fd); + return 0; + } + int res = accessor->Flush(path); + if (0 == res) { + SetUpdateMark(false); + pagelist.ClearAllModified(); + pending_status = pending_status_t::NO_UPDATE_PENDING; + } + return res; + } + + PseudoFdInfo* pseudo_obj = miter->second.get(); + + AutoLock auto_lock2(&fdent_data_lock); + + int result; + if(!force_sync && !pagelist.IsModified() && !IsDirtyMetadata()){ + // nothing to update. + return 0; + } + if(S3fsLog::IsS3fsLogDbg()){ + pagelist.Dump(); + } + + if(nomultipart){ + // No multipart upload + if(!force_sync && !pagelist.IsModified()){ + // for only push pending headers + result = UploadPending(-1, AutoLock::ALREADY_LOCKED); + }else{ + result = RowFlushNoMultipart(pseudo_obj, tpath); + } + }else if(FdEntity::streamupload){ + // Stream multipart upload + result = RowFlushStreamMultipart(pseudo_obj, tpath); + }else if(FdEntity::mixmultipart){ + // Mix multipart upload + result = RowFlushMixMultipart(pseudo_obj, tpath); + }else{ + // Normal multipart upload + result = RowFlushMultipart(pseudo_obj, tpath); + } + + // [NOTE] + // if something went wrong, so if you are using a cache file, + // the cache file may not be correct. So delete cache files. + // + if(0 != result && !cachepath.empty()){ + FdManager::DeleteCacheFile(tpath); + } + + return result; +} + +// [NOTE] +// Both fdent_lock and fdent_data_lock must be locked before calling. +// +int FdEntity::RowFlushNoMultipart(const PseudoFdInfo* pseudo_obj, const char* tpath) +{ + S3FS_PRN_INFO3("[tpath=%s][path=%s][pseudo_fd=%d][physical_fd=%d]", SAFESTRPTR(tpath), path.c_str(), (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd); + + if(-1 == physical_fd || !pseudo_obj){ + return -EBADF; + } + + if(pseudo_obj->IsUploading()){ + S3FS_PRN_ERR("Why uploading now, even though s3fs is No Multipart uploading mode."); + return -EBADF; + } + + int result; + std::string tmppath = path; + headers_t tmporgmeta = orgmeta; + + // If there is no loading all of the area, loading all area. + off_t restsize = pagelist.GetTotalUnloadedPageSize(); + if(0 < restsize){ + // check disk space + if(!ReserveDiskSpace(restsize)){ + // no enough disk space + S3FS_PRN_WARN("Not enough local storage to flush: [path=%s][pseudo_fd=%d][physical_fd=%d]", path.c_str(), pseudo_obj->GetPseudoFd(), physical_fd); + return -ENOSPC; // No space left on device + } + } + FdManager::FreeReservedDiskSpace(restsize); + + // Always load all uninitialized area + if(0 != (result = Load(/*start=*/ 0, /*size=*/ 0, AutoLock::ALREADY_LOCKED))){ + S3FS_PRN_ERR("failed to upload all area(errno=%d)", result); + return result; + } + + // check size + if(pagelist.Size() > MAX_MULTIPART_CNT * S3fsCurl::GetMultipartSize()){ + S3FS_PRN_ERR("Part count exceeds %d. Increase multipart size and try again.", MAX_MULTIPART_CNT); + return -EFBIG; + } + + // backup upload file size + struct stat st; + memset(&st, 0, sizeof(struct stat)); + if(-1 == fstat(physical_fd, &st)){ + S3FS_PRN_ERR("fstat is failed by errno(%d), but continue...", errno); + } + + S3fsCurl s3fscurl(true); + result = s3fscurl.PutRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, physical_fd); + + // reset uploaded file size + size_orgmeta = st.st_size; + + untreated_list.ClearAll(); + + if(0 == result){ + pagelist.ClearAllModified(); + } + + return result; +} + +// [NOTE] +// Both fdent_lock and fdent_data_lock must be locked before calling. +// +int FdEntity::RowFlushMultipart(PseudoFdInfo* pseudo_obj, const char* tpath) +{ + S3FS_PRN_INFO3("[tpath=%s][path=%s][pseudo_fd=%d][physical_fd=%d]", SAFESTRPTR(tpath), path.c_str(), (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd); + + if(-1 == physical_fd || !pseudo_obj){ + return -EBADF; + } + + int result = 0; + + if(!pseudo_obj->IsUploading()){ + // Start uploading + + // If there is no loading all of the area, loading all area. + off_t restsize = pagelist.GetTotalUnloadedPageSize(); + + // Check rest size and free disk space + if(0 < restsize && !ReserveDiskSpace(restsize)){ + // no enough disk space + if(0 != (result = NoCachePreMultipartPost(pseudo_obj))){ + S3FS_PRN_ERR("failed to switch multipart uploading with no cache(errno=%d)", result); + return result; + } + // upload all by multipart uploading + if(0 != (result = NoCacheLoadAndPost(pseudo_obj))){ + S3FS_PRN_ERR("failed to upload all area by multipart uploading(errno=%d)", result); + return result; + } + + }else{ + // enough disk space or no rest size + std::string tmppath = path; + headers_t tmporgmeta = orgmeta; + + FdManager::FreeReservedDiskSpace(restsize); + + // Load all uninitialized area(no mix multipart uploading) + if(0 != (result = Load(/*start=*/ 0, /*size=*/ 0, AutoLock::ALREADY_LOCKED))){ + S3FS_PRN_ERR("failed to upload all area(errno=%d)", result); + return result; + } + + // backup upload file size + struct stat st; + memset(&st, 0, sizeof(struct stat)); + if(-1 == fstat(physical_fd, &st)){ + S3FS_PRN_ERR("fstat is failed by errno(%d), but continue...", errno); + } + + if(pagelist.Size() > MAX_MULTIPART_CNT * S3fsCurl::GetMultipartSize()){ + S3FS_PRN_ERR("Part count exceeds %d. Increase multipart size and try again.", MAX_MULTIPART_CNT); + return -EFBIG; + + }else if(pagelist.Size() >= S3fsCurl::GetMultipartSize()){ + // multipart uploading + result = S3fsCurl::ParallelMultipartUploadRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, physical_fd); + + }else{ + // normal uploading (too small part size) + S3fsCurl s3fscurl(true); + result = s3fscurl.PutRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, physical_fd); + } + + // reset uploaded file size + size_orgmeta = st.st_size; + } + untreated_list.ClearAll(); + + }else{ + // Already start uploading + + // upload rest data + off_t untreated_start = 0; + off_t untreated_size = 0; + if(untreated_list.GetLastUpdatedPart(untreated_start, untreated_size, S3fsCurl::GetMultipartSize(), 0) && 0 < untreated_size){ + if(0 != (result = NoCacheMultipartPost(pseudo_obj, physical_fd, untreated_start, untreated_size))){ + S3FS_PRN_ERR("failed to multipart post(start=%lld, size=%lld) for file(physical_fd=%d).", static_cast(untreated_start), static_cast(untreated_size), physical_fd); + return result; + } + untreated_list.ClearParts(untreated_start, untreated_size); + } + // complete multipart uploading. + if(0 != (result = NoCacheCompleteMultipartPost(pseudo_obj))){ + S3FS_PRN_ERR("failed to complete(finish) multipart post for file(physical_fd=%d).", physical_fd); + return result; + } + // truncate file to zero + if(-1 == ftruncate(physical_fd, 0)){ + // So the file has already been removed, skip error. + S3FS_PRN_ERR("failed to truncate file(physical_fd=%d) to zero, but continue...", physical_fd); + } + // put pending headers or create new file + if(0 != (result = UploadPending(-1, AutoLock::ALREADY_LOCKED))){ + return result; + } + } + + if(0 == result){ + pagelist.ClearAllModified(); + pending_status = pending_status_t::NO_UPDATE_PENDING; + } + return result; +} + +// [NOTE] +// Both fdent_lock and fdent_data_lock must be locked before calling. +// +int FdEntity::RowFlushMixMultipart(PseudoFdInfo* pseudo_obj, const char* tpath) +{ + S3FS_PRN_INFO3("[tpath=%s][path=%s][pseudo_fd=%d][physical_fd=%d]", SAFESTRPTR(tpath), path.c_str(), (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd); + + if(-1 == physical_fd || !pseudo_obj){ + return -EBADF; + } + + int result = 0; + + if(!pseudo_obj->IsUploading()){ + // Start uploading + + // If there is no loading all of the area, loading all area. + off_t restsize = pagelist.GetTotalUnloadedPageSize(/* start */ 0, /* size = all */ 0, MIN_MULTIPART_SIZE); + + // Check rest size and free disk space + if(0 < restsize && !ReserveDiskSpace(restsize)){ + // no enough disk space + if(0 != (result = NoCachePreMultipartPost(pseudo_obj))){ + S3FS_PRN_ERR("failed to switch multipart uploading with no cache(errno=%d)", result); + return result; + } + // upload all by multipart uploading + if(0 != (result = NoCacheLoadAndPost(pseudo_obj))){ + S3FS_PRN_ERR("failed to upload all area by multipart uploading(errno=%d)", result); + return result; + } + + }else{ + // enough disk space or no rest size + std::string tmppath = path; + headers_t tmporgmeta = orgmeta; + + FdManager::FreeReservedDiskSpace(restsize); + + // backup upload file size + struct stat st; + memset(&st, 0, sizeof(struct stat)); + if(-1 == fstat(physical_fd, &st)){ + S3FS_PRN_ERR("fstat is failed by errno(%d), but continue...", errno); + } + + if(pagelist.Size() > MAX_MULTIPART_CNT * S3fsCurl::GetMultipartSize()){ + S3FS_PRN_ERR("Part count exceeds %d. Increase multipart size and try again.", MAX_MULTIPART_CNT); + return -EFBIG; + + }else if(pagelist.Size() >= S3fsCurl::GetMultipartSize()){ + // mix multipart uploading + + // This is to ensure that each part is 5MB or more. + // If the part is less than 5MB, download it. + fdpage_list_t dlpages; + fdpage_list_t mixuppages; + if(!pagelist.GetPageListsForMultipartUpload(dlpages, mixuppages, S3fsCurl::GetMultipartSize())){ + S3FS_PRN_ERR("something error occurred during getting download pagelist."); + return -1; + } + + // [TODO] should use parallel downloading + // + for(fdpage_list_t::const_iterator iter = dlpages.begin(); iter != dlpages.end(); ++iter){ + if(0 != (result = Load(iter->offset, iter->bytes, AutoLock::ALREADY_LOCKED, /*is_modified_flag=*/ true))){ // set loaded and modified flag + S3FS_PRN_ERR("failed to get parts(start=%lld, size=%lld) before uploading.", static_cast(iter->offset), static_cast(iter->bytes)); + return result; + } + } + + // multipart uploading with copy api + result = S3fsCurl::ParallelMixMultipartUploadRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, physical_fd, mixuppages); + + }else{ + // normal uploading (too small part size) + + // If there are unloaded pages, they are loaded at here. + if(0 != (result = Load(/*start=*/ 0, /*size=*/ 0, AutoLock::ALREADY_LOCKED))){ + S3FS_PRN_ERR("failed to load parts before uploading object(%d)", result); + return result; + } + + S3fsCurl s3fscurl(true); + result = s3fscurl.PutRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, physical_fd); + } + + // reset uploaded file size + size_orgmeta = st.st_size; + } + untreated_list.ClearAll(); + + }else{ + // Already start uploading + + // upload rest data + off_t untreated_start = 0; + off_t untreated_size = 0; + if(untreated_list.GetLastUpdatedPart(untreated_start, untreated_size, S3fsCurl::GetMultipartSize(), 0) && 0 < untreated_size){ + if(0 != (result = NoCacheMultipartPost(pseudo_obj, physical_fd, untreated_start, untreated_size))){ + S3FS_PRN_ERR("failed to multipart post(start=%lld, size=%lld) for file(physical_fd=%d).", static_cast(untreated_start), static_cast(untreated_size), physical_fd); + return result; + } + untreated_list.ClearParts(untreated_start, untreated_size); + } + // complete multipart uploading. + if(0 != (result = NoCacheCompleteMultipartPost(pseudo_obj))){ + S3FS_PRN_ERR("failed to complete(finish) multipart post for file(physical_fd=%d).", physical_fd); + return result; + } + // truncate file to zero + if(-1 == ftruncate(physical_fd, 0)){ + // So the file has already been removed, skip error. + S3FS_PRN_ERR("failed to truncate file(physical_fd=%d) to zero, but continue...", physical_fd); + } + // put pending headers or create new file + if(0 != (result = UploadPending(-1, AutoLock::ALREADY_LOCKED))){ + return result; + } + } + + if(0 == result){ + pagelist.ClearAllModified(); + pending_status = pending_status_t::NO_UPDATE_PENDING; + } + return result; +} + +// [NOTE] +// Both fdent_lock and fdent_data_lock must be locked before calling. +// +int FdEntity::RowFlushStreamMultipart(PseudoFdInfo* pseudo_obj, const char* tpath) +{ + S3FS_PRN_INFO3("[tpath=%s][path=%s][pseudo_fd=%d][physical_fd=%d][mix_upload=%s]", SAFESTRPTR(tpath), path.c_str(), (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd, (FdEntity::mixmultipart ? "true" : "false")); + + if(-1 == physical_fd || !pseudo_obj){ + return -EBADF; + } + int result = 0; + + if(pagelist.Size() <= S3fsCurl::GetMultipartSize()){ + // + // Use normal upload instead of multipart upload(too small part size) + // + + // backup upload file size + struct stat st; + memset(&st, 0, sizeof(struct stat)); + if(-1 == fstat(physical_fd, &st)){ + S3FS_PRN_ERR("fstat is failed by errno(%d), but continue...", errno); + } + + // If there are unloaded pages, they are loaded at here. + if(0 != (result = Load(/*start=*/ 0, /*size=*/ 0, AutoLock::ALREADY_LOCKED))){ + S3FS_PRN_ERR("failed to load parts before uploading object(%d)", result); + return result; + } + + headers_t tmporgmeta = orgmeta; + S3fsCurl s3fscurl(true); + result = s3fscurl.PutRequest(path.c_str(), tmporgmeta, physical_fd); + + // reset uploaded file size + size_orgmeta = st.st_size; + + untreated_list.ClearAll(); + + if(0 == result){ + pagelist.ClearAllModified(); + } + + }else{ + // + // Make upload/download/copy/cancel lists from file + // + mp_part_list_t to_upload_list; + mp_part_list_t to_copy_list; + mp_part_list_t to_download_list; + filepart_list_t cancel_uploaded_list; + bool wait_upload_complete = false; + if(!pseudo_obj->ExtractUploadPartsFromAllArea(untreated_list, to_upload_list, to_copy_list, to_download_list, cancel_uploaded_list, wait_upload_complete, S3fsCurl::GetMultipartSize(), pagelist.Size(), FdEntity::mixmultipart)){ + S3FS_PRN_ERR("Failed to extract various upload parts list from all area: errno(EIO)"); + return -EIO; + } + + // + // Check total size for downloading and Download + // + off_t total_download_size = total_mp_part_list(to_download_list); + if(0 < total_download_size){ + // + // Check if there is enough free disk space for the total download size + // + if(!ReserveDiskSpace(total_download_size)){ + // no enough disk space + // + // [NOTE] + // Because there is no left space size to download, we can't solve this anymore + // in this case which is uploading in sequence. + // + S3FS_PRN_WARN("Not enough local storage(%lld byte) to cache write request for whole of the file: [path=%s][physical_fd=%d]", static_cast(total_download_size), path.c_str(), physical_fd); + return -ENOSPC; // No space left on device + } + // enough disk space + + // + // Download all parts + // + // [TODO] + // Execute in parallel downloading with multiple thread. + // + for(mp_part_list_t::const_iterator download_iter = to_download_list.begin(); download_iter != to_download_list.end(); ++download_iter){ + if(0 != (result = Load(download_iter->start, download_iter->size, AutoLock::ALREADY_LOCKED))){ + break; + } + } + FdManager::FreeReservedDiskSpace(total_download_size); + if(0 != result){ + S3FS_PRN_ERR("failed to load uninitialized area before writing(errno=%d)", result); + return result; + } + } + + // + // Has multipart uploading already started? + // + if(!pseudo_obj->IsUploading()){ + // + // Multipart uploading hasn't started yet, so start it. + // + S3fsCurl s3fscurl(true); + std::string upload_id; + if(0 != (result = s3fscurl.PreMultipartPostRequest(path.c_str(), orgmeta, upload_id, true))){ + S3FS_PRN_ERR("failed to setup multipart upload(create upload id) by errno(%d)", result); + return result; + } + if(!pseudo_obj->InitialUploadInfo(upload_id)){ + S3FS_PRN_ERR("failed to setup multipart upload(set upload id to object)"); + return -EIO; + } + + // Clear the dirty flag, because the meta data is updated. + pending_status = pending_status_t::NO_UPDATE_PENDING; + } + + // + // Output debug level information + // + // When canceling(overwriting) a part that has already been uploaded, output it. + // + if(S3fsLog::IsS3fsLogDbg()){ + for(filepart_list_t::const_iterator cancel_iter = cancel_uploaded_list.begin(); cancel_iter != cancel_uploaded_list.end(); ++cancel_iter){ + S3FS_PRN_DBG("Cancel uploaded: start(%lld), size(%lld), part number(%d)", static_cast(cancel_iter->startpos), static_cast(cancel_iter->size), (cancel_iter->petag ? cancel_iter->petag->part_num : -1)); + } + } + + // [NOTE] + // If there is a part where has already been uploading, that part + // is re-updated after finishing uploading, so the part of the last + // uploded must be canceled. + // (These are cancel_uploaded_list, cancellation processing means + // re-uploading the same area.) + // + // In rare cases, the completion of the previous upload and the + // re-upload may be reversed, causing the ETag to be reversed, + // in which case the upload will fail. + // To prevent this, if the upload of the same area as the re-upload + // is incomplete, we must wait for it to complete here. + // + if(wait_upload_complete){ + if(0 != (result = pseudo_obj->WaitAllThreadsExit())){ + S3FS_PRN_ERR("Some cancel area uploads that were waiting to complete failed with %d.", result); + return result; + } + } + + // + // Upload multipart and copy parts and wait exiting them + // + if(!pseudo_obj->ParallelMultipartUploadAll(path.c_str(), to_upload_list, to_copy_list, result)){ + S3FS_PRN_ERR("Failed to upload multipart parts."); + untreated_list.ClearAll(); + pseudo_obj->ClearUploadInfo(); // clear multipart upload info + return -EIO; + } + if(0 != result){ + S3FS_PRN_ERR("An error(%d) occurred in some threads that were uploading parallel multiparts, but continue to clean up..", result); + untreated_list.ClearAll(); + pseudo_obj->ClearUploadInfo(); // clear multipart upload info + return result; + } + + // + // Complete uploading + // + std::string upload_id; + etaglist_t etaglist; + if(!pseudo_obj->GetUploadId(upload_id) || !pseudo_obj->GetEtaglist(etaglist)){ + S3FS_PRN_ERR("There is no upload id or etag list."); + untreated_list.ClearAll(); + pseudo_obj->ClearUploadInfo(); // clear multipart upload info + return -EIO; + }else{ + S3fsCurl s3fscurl(true); + result = s3fscurl.CompleteMultipartPostRequest(path.c_str(), upload_id, etaglist); + s3fscurl.DestroyCurlHandle(); + if(0 != result){ + S3FS_PRN_ERR("failed to complete multipart upload by errno(%d)", result); + untreated_list.ClearAll(); + pseudo_obj->ClearUploadInfo(); // clear multipart upload info + + S3fsCurl s3fscurl_abort(true); + int result2 = s3fscurl.AbortMultipartUpload(path.c_str(), upload_id); + s3fscurl_abort.DestroyCurlHandle(); + if(0 != result2){ + S3FS_PRN_ERR("failed to abort multipart upload by errno(%d)", result2); + } + return result; + } + } + untreated_list.ClearAll(); + pseudo_obj->ClearUploadInfo(); // clear multipart upload info + + // put pending headers or create new file + if(0 != (result = UploadPending(-1, AutoLock::ALREADY_LOCKED))){ + return result; + } + } + untreated_list.ClearAll(); + + if(0 == result){ + pagelist.ClearAllModified(); + } + + return result; +} + +// [NOTICE] +// Need to lock before calling this method. +bool FdEntity::ReserveDiskSpace(off_t size) +{ + if(FdManager::ReserveDiskSpace(size)){ + return true; + } + + if(!pagelist.IsModified()){ + // try to clear all cache for this fd. + pagelist.Init(pagelist.Size(), false, false); + if(-1 == ftruncate(physical_fd, 0) || -1 == ftruncate(physical_fd, pagelist.Size())){ + S3FS_PRN_ERR("failed to truncate temporary file(physical_fd=%d).", physical_fd); + return false; + } + + if(FdManager::ReserveDiskSpace(size)){ + return true; + } + } + + FdManager::get()->CleanupCacheDir(); + + return FdManager::ReserveDiskSpace(size); +} + +ssize_t FdEntity::Read(int fd, char* bytes, off_t start, size_t size, bool force_load) +{ + S3FS_PRN_DBG("[path=%s][pseudo_fd=%d][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), fd, physical_fd, static_cast(start), size); + + if(-1 == physical_fd || nullptr == CheckPseudoFdFlags(fd, false)){ + S3FS_PRN_DBG("pseudo_fd(%d) to physical_fd(%d) for path(%s) is not opened or not readable", fd, physical_fd, path.c_str()); + return -EBADF; + } + + if(use_newcache){ + size_t realSize = GetRealsize(); + if (start >= realSize) return 0; + size_t realReadSize = size; + if (start + size > realSize) { + realReadSize = realSize - start; + } + + int res = accessor->Get(path, start, realReadSize, bytes); + if (!res) { + return realReadSize; + } + return res; + } + + AutoLock auto_lock(&fdent_lock); + AutoLock auto_lock2(&fdent_data_lock); + + if(force_load){ + pagelist.SetPageLoadedStatus(start, size, PageList::page_status::NOT_LOAD_MODIFIED); + } + + ssize_t rsize; + + // check disk space + if(0 < pagelist.GetTotalUnloadedPageSize(start, size)){ + // load size(for prefetch) + size_t load_size = size; + if(start + static_cast(size) < pagelist.Size()){ + ssize_t prefetch_max_size = std::max(static_cast(size), S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount()); + + if(start + prefetch_max_size < pagelist.Size()){ + load_size = prefetch_max_size; + }else{ + load_size = pagelist.Size() - start; + } + } + + if(!ReserveDiskSpace(load_size)){ + S3FS_PRN_WARN("could not reserve disk space for pre-fetch download"); + load_size = size; + if(!ReserveDiskSpace(load_size)){ + S3FS_PRN_ERR("could not reserve disk space for pre-fetch download"); + return -ENOSPC; + } + } + + // Loading + int result = 0; + if(0 < size){ + result = Load(start, load_size, AutoLock::ALREADY_LOCKED); + } + + FdManager::FreeReservedDiskSpace(load_size); + + if(0 != result){ + S3FS_PRN_ERR("could not download. start(%lld), size(%zu), errno(%d)", static_cast(start), size, result); + return result; + } + } + + // Reading + if(-1 == (rsize = pread(physical_fd, bytes, size, start))){ + S3FS_PRN_ERR("pread failed. errno(%d)", errno); + return -errno; + } + return rsize; +} + +ssize_t FdEntity::ReadByAdaptor(int fd, char* bytes, off_t start, size_t size, bool force_load, std::shared_ptr dataAdaptor) +{ + S3FS_PRN_DBG("[path=%s][pseudo_fd=%d][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), fd, physical_fd, static_cast(start), size); + + ssize_t rsize; + { + // AutoLock auto_lock(&fdent_lock); // TODO: If download occurs during flush, the fdent_lock will conflict + AutoLock auto_lock2(&fdent_data_lock); + + if(force_load){ + pagelist.SetPageLoadedStatus(start, size, PageList::page_status::NOT_LOAD_MODIFIED); + } + + // check disk space + if(0 < pagelist.GetTotalUnloadedPageSize(start, size)){ + // load size(for prefetch) + size_t load_size = size; + if(start + static_cast(size) < pagelist.Size()){ + ssize_t prefetch_max_size = std::max(static_cast(size), S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount()); + + if(start + prefetch_max_size < pagelist.Size()){ + load_size = prefetch_max_size; + }else{ + load_size = pagelist.Size() - start; + } + } + + if(!ReserveDiskSpace(load_size)){ + S3FS_PRN_WARN("could not reserve disk space for pre-fetch download"); + load_size = size; + if(!ReserveDiskSpace(load_size)){ + S3FS_PRN_ERR("could not reserve disk space for pre-fetch download"); + return -ENOSPC; + } + } + + // Loading + int result = 0; + if(0 < size){ + result = LoadByAdaptor(start, load_size, AutoLock::ALREADY_LOCKED, dataAdaptor); + } + + FdManager::FreeReservedDiskSpace(load_size); + + if(0 != result){ + S3FS_PRN_ERR("could not download. start(%lld), size(%zu), errno(%d)", static_cast(start), size, result); + return result; + } + } + + // read/write/disk release may be concurrent + int lock_res = flock_set(physical_fd, F_RDLCK); + if(lock_res < 0){ + S3FS_PRN_ERR("cache file read lock failed. path(%s), physical_fd(%d)", path.c_str(), physical_fd); + return lock_res; + } + } + + // Reading + if(-1 == (rsize = pread(physical_fd, bytes, size, start))){ + flock_set(physical_fd, F_UNLCK); + S3FS_PRN_ERR("pread failed. errno(%d)", errno); + return -errno; + } + + flock_set(physical_fd, F_UNLCK); + return rsize; +} + +ssize_t FdEntity::Write(int fd, const char* bytes, off_t start, size_t size, bool force_tmpfile) +{ + S3FS_PRN_WARN("[path=%s][pseudo_fd=%d][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), fd, physical_fd, static_cast(start), size); + + PseudoFdInfo* pseudo_obj = nullptr; + if(-1 == physical_fd || nullptr == (pseudo_obj = CheckPseudoFdFlags(fd, false))){ + S3FS_PRN_ERR("pseudo_fd(%d) to physical_fd(%d) for path(%s) is not opened or not writable", fd, physical_fd, path.c_str()); + return -EBADF; + } + + if(use_newcache && !force_tmpfile){ + int res = accessor->Put(path, start, size, bytes); + if (!res) { + return size; + } + return res; + } + + // check if not enough disk space left BEFORE locking fd + if(FdManager::IsCacheDir() && !FdManager::IsSafeDiskSpace(nullptr, size)){ + FdManager::get()->CleanupCacheDir(); + } + AutoLock auto_lock(&fdent_lock); + AutoLock auto_lock2(&fdent_data_lock); + + // check file size + if(pagelist.Size() < start){ + // grow file size + if(-1 == ftruncate(physical_fd, start)){ + S3FS_PRN_ERR("failed to truncate temporary file(physical_fd=%d).", physical_fd); + return -errno; + } + // set untreated area + if(!AddUntreated(pagelist.Size(), (start - pagelist.Size()))){ + S3FS_PRN_ERR("failed to set untreated area by incremental."); + return -EIO; + } + + // add new area + pagelist.SetPageLoadedStatus(pagelist.Size(), start - pagelist.Size(), PageList::page_status::MODIFIED); + } + + ssize_t wsize; + if(nomultipart){ + // No multipart upload + wsize = WriteNoMultipart(pseudo_obj, bytes, start, size); + }else if(FdEntity::streamupload){ + // Stream upload + wsize = WriteStreamUpload(pseudo_obj, bytes, start, size); + }else if(FdEntity::mixmultipart){ + // Mix multipart upload + wsize = WriteMixMultipart(pseudo_obj, bytes, start, size); + }else{ + // Normal multipart upload + wsize = WriteMultipart(pseudo_obj, bytes, start, size); + } + + return wsize; +} + +// [NOTE] +// Both fdent_lock and fdent_data_lock must be locked before calling. +// +ssize_t FdEntity::WriteNoMultipart(const PseudoFdInfo* pseudo_obj, const char* bytes, off_t start, size_t size) +{ + S3FS_PRN_DBG("[path=%s][pseudo_fd=%d][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd, static_cast(start), size); + + if(-1 == physical_fd || !pseudo_obj){ + S3FS_PRN_ERR("pseudo_fd(%d) to physical_fd(%d) for path(%s) is not opened or not writable", (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd, path.c_str()); + return -EBADF; + } + + int result = 0; + + if(pseudo_obj->IsUploading()){ + S3FS_PRN_ERR("Why uploading now, even though s3fs is No Multipart uploading mode."); + return -EBADF; + } + + // check disk space + off_t restsize = pagelist.GetTotalUnloadedPageSize(0, start) + size; + if(!ReserveDiskSpace(restsize)){ + // no enough disk space + S3FS_PRN_WARN("Not enough local storage to cache write request: [path=%s][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), physical_fd, static_cast(start), size); + return -ENOSPC; // No space left on device + } + + // Load uninitialized area which starts from 0 to (start + size) before writing. + if(0 < start){ + result = Load(0, start, AutoLock::ALREADY_LOCKED); + } + + FdManager::FreeReservedDiskSpace(restsize); + if(0 != result){ + S3FS_PRN_ERR("failed to load uninitialized area before writing(errno=%d)", result); + return result; + } + + // Writing + ssize_t wsize; + if(-1 == (wsize = pwrite(physical_fd, bytes, size, start))){ + S3FS_PRN_ERR("pwrite failed. errno(%d)", errno); + return -errno; + } + if(0 < wsize){ + pagelist.SetPageLoadedStatus(start, wsize, PageList::page_status::LOAD_MODIFIED); + AddUntreated(start, wsize); + } + + // Load uninitialized area which starts from (start + size) to EOF after writing. + if(pagelist.Size() > start + static_cast(size)){ + result = Load(start + size, pagelist.Size(), AutoLock::ALREADY_LOCKED); + if(0 != result){ + S3FS_PRN_ERR("failed to load uninitialized area after writing(errno=%d)", result); + return result; + } + } + + return wsize; +} + +// [NOTE] +// Both fdent_lock and fdent_data_lock must be locked before calling. +// +ssize_t FdEntity::WriteMultipart(PseudoFdInfo* pseudo_obj, const char* bytes, off_t start, size_t size) +{ + S3FS_PRN_DBG("[path=%s][pseudo_fd=%d][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd, static_cast(start), size); + + if(-1 == physical_fd || !pseudo_obj){ + S3FS_PRN_ERR("pseudo_fd(%d) to physical_fd(%d) for path(%s) is not opened or not writable", (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd, path.c_str()); + return -EBADF; + } + + int result = 0; + + if(!pseudo_obj->IsUploading()){ + // check disk space + off_t restsize = pagelist.GetTotalUnloadedPageSize(0, start) + size; + if(ReserveDiskSpace(restsize)){ + // enough disk space + + // Load uninitialized area which starts from 0 to (start + size) before writing. + if(0 < start){ + result = Load(0, start, AutoLock::ALREADY_LOCKED); + } + + FdManager::FreeReservedDiskSpace(restsize); + if(0 != result){ + S3FS_PRN_ERR("failed to load uninitialized area before writing(errno=%d)", result); + return result; + } + }else{ + // no enough disk space + if((start + static_cast(size)) <= S3fsCurl::GetMultipartSize()){ + S3FS_PRN_WARN("Not enough local storage to cache write request till multipart upload can start: [path=%s][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), physical_fd, static_cast(start), size); + return -ENOSPC; // No space left on device + } + if(0 != (result = NoCachePreMultipartPost(pseudo_obj))){ + S3FS_PRN_ERR("failed to switch multipart uploading with no cache(errno=%d)", result); + return result; + } + // start multipart uploading + if(0 != (result = NoCacheLoadAndPost(pseudo_obj, 0, start))){ + S3FS_PRN_ERR("failed to load uninitialized area and multipart uploading it(errno=%d)", result); + return result; + } + untreated_list.ClearAll(); + } + }else{ + // already start multipart uploading + } + + // Writing + ssize_t wsize; + if(-1 == (wsize = pwrite(physical_fd, bytes, size, start))){ + S3FS_PRN_ERR("pwrite failed. errno(%d)", errno); + return -errno; + } + if(0 < wsize){ + pagelist.SetPageLoadedStatus(start, wsize, PageList::page_status::LOAD_MODIFIED); + AddUntreated(start, wsize); + } + + // Load uninitialized area which starts from (start + size) to EOF after writing. + if(pagelist.Size() > start + static_cast(size)){ + result = Load(start + size, pagelist.Size(), AutoLock::ALREADY_LOCKED); + if(0 != result){ + S3FS_PRN_ERR("failed to load uninitialized area after writing(errno=%d)", result); + return result; + } + } + + // check multipart uploading + if(pseudo_obj->IsUploading()){ + // get last untreated part(maximum size is multipart size) + off_t untreated_start = 0; + off_t untreated_size = 0; + if(untreated_list.GetLastUpdatedPart(untreated_start, untreated_size, S3fsCurl::GetMultipartSize())){ + // when multipart max size is reached + if(0 != (result = NoCacheMultipartPost(pseudo_obj, physical_fd, untreated_start, untreated_size))){ + S3FS_PRN_ERR("failed to multipart post(start=%lld, size=%lld) for file(physical_fd=%d).", static_cast(untreated_start), static_cast(untreated_size), physical_fd); + return result; + } + + // [NOTE] + // truncate file to zero and set length to part offset + size + // after this, file length is (offset + size), but file does not use any disk space. + // + if(-1 == ftruncate(physical_fd, 0) || -1 == ftruncate(physical_fd, (untreated_start + untreated_size))){ + S3FS_PRN_ERR("failed to truncate file(physical_fd=%d).", physical_fd); + return -errno; + } + untreated_list.ClearParts(untreated_start, untreated_size); + } + } + return wsize; +} + +// [NOTE] +// Both fdent_lock and fdent_data_lock must be locked before calling. +// +ssize_t FdEntity::WriteMixMultipart(PseudoFdInfo* pseudo_obj, const char* bytes, off_t start, size_t size) +{ + S3FS_PRN_DBG("[path=%s][pseudo_fd=%d][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd, static_cast(start), size); + + if(-1 == physical_fd || !pseudo_obj){ + S3FS_PRN_ERR("pseudo_fd(%d) to physical_fd(%d) for path(%s) is not opened or not writable", (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd, path.c_str()); + return -EBADF; + } + + int result; + + if(!pseudo_obj->IsUploading()){ + // check disk space + off_t restsize = pagelist.GetTotalUnloadedPageSize(0, start, MIN_MULTIPART_SIZE) + size; + if(ReserveDiskSpace(restsize)){ + // enough disk space + FdManager::FreeReservedDiskSpace(restsize); + }else{ + // no enough disk space + if((start + static_cast(size)) <= S3fsCurl::GetMultipartSize()){ + S3FS_PRN_WARN("Not enough local storage to cache write request till multipart upload can start: [path=%s][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), physical_fd, static_cast(start), size); + return -ENOSPC; // No space left on device + } + if(0 != (result = NoCachePreMultipartPost(pseudo_obj))){ + S3FS_PRN_ERR("failed to switch multipart uploading with no cache(errno=%d)", result); + return result; + } + // start multipart uploading + if(0 != (result = NoCacheLoadAndPost(pseudo_obj, 0, start))){ + S3FS_PRN_ERR("failed to load uninitialized area and multipart uploading it(errno=%d)", result); + return result; + } + untreated_list.ClearAll(); + } + }else{ + // already start multipart uploading + } + + // Writing + ssize_t wsize; + if(-1 == (wsize = pwrite(physical_fd, bytes, size, start))){ + S3FS_PRN_ERR("pwrite failed. errno(%d)", errno); + return -errno; + } + if(0 < wsize){ + pagelist.SetPageLoadedStatus(start, wsize, PageList::page_status::LOAD_MODIFIED); + AddUntreated(start, wsize); + } + + // check multipart uploading + if(pseudo_obj->IsUploading()){ + // get last untreated part(maximum size is multipart size) + off_t untreated_start = 0; + off_t untreated_size = 0; + if(untreated_list.GetLastUpdatedPart(untreated_start, untreated_size, S3fsCurl::GetMultipartSize())){ + // when multipart max size is reached + if(0 != (result = NoCacheMultipartPost(pseudo_obj, physical_fd, untreated_start, untreated_size))){ + S3FS_PRN_ERR("failed to multipart post(start=%lld, size=%lld) for file(physical_fd=%d).", static_cast(untreated_start), static_cast(untreated_size), physical_fd); + return result; + } + + // [NOTE] + // truncate file to zero and set length to part offset + size + // after this, file length is (offset + size), but file does not use any disk space. + // + if(-1 == ftruncate(physical_fd, 0) || -1 == ftruncate(physical_fd, (untreated_start + untreated_size))){ + S3FS_PRN_ERR("failed to truncate file(physical_fd=%d).", physical_fd); + return -errno; + } + untreated_list.ClearParts(untreated_start, untreated_size); + } + } + return wsize; +} + +// +// On Stream upload, the uploading is executed in another thread when the +// written area exceeds the maximum size of multipart upload. +// +// [NOTE] +// Both fdent_lock and fdent_data_lock must be locked before calling. +// +ssize_t FdEntity::WriteStreamUpload(PseudoFdInfo* pseudo_obj, const char* bytes, off_t start, size_t size) +{ + S3FS_PRN_DBG("[path=%s][pseudo_fd=%d][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd, static_cast(start), size); + + if(-1 == physical_fd || !pseudo_obj){ + S3FS_PRN_ERR("pseudo_fd(%d) to physical_fd(%d) for path(%s) is not opened or not writable", (pseudo_obj ? pseudo_obj->GetPseudoFd() : -1), physical_fd, path.c_str()); + return -EBADF; + } + + // Writing + ssize_t wsize; + if(-1 == (wsize = pwrite(physical_fd, bytes, size, start))){ + S3FS_PRN_ERR("pwrite failed. errno(%d)", errno); + return -errno; + } + if(0 < wsize){ + pagelist.SetPageLoadedStatus(start, wsize, PageList::page_status::LOAD_MODIFIED); + AddUntreated(start, wsize); + } + + // Check and Upload + // + // If the last updated Untreated area exceeds the maximum upload size, + // upload processing is performed. + // + headers_t tmporgmeta = orgmeta; + bool isuploading = pseudo_obj->IsUploading(); + ssize_t result; + if(0 != (result = pseudo_obj->UploadBoundaryLastUntreatedArea(path.c_str(), tmporgmeta, this))){ + S3FS_PRN_ERR("Failed to upload the last untreated parts(area) : result=%zd", result); + return result; + } + + if(!isuploading && pseudo_obj->IsUploading()){ + // Clear the dirty flag, because the meta data is updated. + pending_status = pending_status_t::NO_UPDATE_PENDING; + } + + return wsize; +} + +// [NOTE] +// Returns true if merged to orgmeta. +// If true is returned, the caller can update the header. +// If it is false, do not update the header because multipart upload is in progress. +// In this case, the header is pending internally and is updated after the upload +// is complete(flush file). +// +bool FdEntity::MergeOrgMeta(headers_t& updatemeta) +{ + AutoLock auto_lock(&fdent_lock); + + merge_headers(orgmeta, updatemeta, true); // overwrite all keys + // [NOTE] + // this is special cases, we remove the key which has empty values. + for(headers_t::iterator hiter = orgmeta.begin(); hiter != orgmeta.end(); ){ + if(hiter->second.empty()){ + hiter = orgmeta.erase(hiter); + }else{ + ++hiter; + } + } + updatemeta = orgmeta; + orgmeta.erase("x-amz-copy-source"); + + // update ctime/mtime/atime + struct timespec mtime = get_mtime(updatemeta, false); // not overcheck + struct timespec ctime = get_ctime(updatemeta, false); // not overcheck + struct timespec atime = get_atime(updatemeta, false); // not overcheck + if(0 <= mtime.tv_sec){ + SetMCtime(mtime, (ctime.tv_sec < 0 ? mtime : ctime), AutoLock::ALREADY_LOCKED); + } + if(0 <= atime.tv_sec){ + SetAtime(atime, AutoLock::ALREADY_LOCKED); + } + + AutoLock auto_lock2(&fdent_data_lock); + if(pending_status_t::NO_UPDATE_PENDING == pending_status && (IsUploading(AutoLock::ALREADY_LOCKED) || pagelist.IsModified())){ + pending_status = pending_status_t::UPDATE_META_PENDING; + } + + return (pending_status_t::NO_UPDATE_PENDING != pending_status); +} + +// global function in s3fs.cpp +int put_headers(const char* path, headers_t& meta, bool is_copy, bool use_st_size = true); + +int FdEntity::UploadPending(int fd, AutoLock::Type type) +{ + AutoLock auto_lock(&fdent_lock, type); + int result; + + if(pending_status_t::NO_UPDATE_PENDING == pending_status){ + // nothing to do + result = 0; + + }else if(pending_status_t::UPDATE_META_PENDING == pending_status){ + headers_t updatemeta = orgmeta; + updatemeta["x-amz-copy-source"] = urlEncodePath(service_path + S3fsCred::GetBucket() + get_realpath(path.c_str())); + updatemeta["x-amz-metadata-directive"] = "REPLACE"; + + // put headers, no need to update mtime to avoid dead lock + result = put_headers(path.c_str(), updatemeta, true); + if(0 != result){ + S3FS_PRN_ERR("failed to put header after flushing file(%s) by(%d).", path.c_str(), result); + }else{ + pending_status = pending_status_t::NO_UPDATE_PENDING; + } + + }else{ // CREATE_FILE_PENDING == pending_status + if(-1 == fd){ + S3FS_PRN_ERR("could not create a new file(%s), because fd is not specified.", path.c_str()); + result = -EBADF; + }else{ + result = Flush(fd, AutoLock::ALREADY_LOCKED, true); + if(0 != result){ + S3FS_PRN_ERR("failed to flush for file(%s) by(%d).", path.c_str(), result); + }else{ + pending_status = pending_status_t::NO_UPDATE_PENDING; + } + } + } + return result; +} + +// [NOTE] +// For systems where the fallocate function cannot be detected, use a dummy function. +// ex. OSX +// +#ifndef HAVE_FALLOCATE +static int fallocate(int /*fd*/, int /*mode*/, off_t /*offset*/, off_t /*len*/) +{ + errno = ENOSYS; // This is a bad idea, but the caller can handle it simply. + return -1; +} +#endif // HAVE_FALLOCATE + +// [NOTE] +// If HAVE_FALLOCATE is undefined, or versions prior to 2.6.38(fallocate function exists), +// following flags are undefined. Then we need these symbols defined in fallocate, so we +// define them here. +// The definitions are copied from linux/falloc.h, but if HAVE_FALLOCATE is undefined, +// these values can be anything. +// +#ifndef FALLOC_FL_PUNCH_HOLE +#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */ +#endif +#ifndef FALLOC_FL_KEEP_SIZE +#define FALLOC_FL_KEEP_SIZE 0x01 +#endif + +// [NOTE] +// This method punches an area(on cache file) that has no data at the time it is called. +// This is called to prevent the cache file from growing. +// However, this method uses the non-portable(Linux specific) system call fallocate(). +// Also, depending on the file system, FALLOC_FL_PUNCH_HOLE mode may not work and HOLE +// will not open.(Filesystems for which this method works are ext4, btrfs, xfs, etc.) +// +bool FdEntity::PunchHole(off_t start, size_t size) +{ + S3FS_PRN_DBG("[path=%s][physical_fd=%d][offset=%lld][size=%zu]", path.c_str(), physical_fd, static_cast(start), size); + + AutoLock auto_lock(&fdent_lock); + AutoLock auto_lock2(&fdent_data_lock); + + if(-1 == physical_fd){ + return false; + } + + // get page list that have no data + fdpage_list_t nodata_pages; + if(!pagelist.GetNoDataPageLists(nodata_pages)){ + S3FS_PRN_ERR("failed to get page list that have no data."); + return false; + } + if(nodata_pages.empty()){ + S3FS_PRN_DBG("there is no page list that have no data, so nothing to do."); + return true; + } + + // try to punch hole to file + for(fdpage_list_t::const_iterator iter = nodata_pages.begin(); iter != nodata_pages.end(); ++iter){ + if(0 != fallocate(physical_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, iter->offset, iter->bytes)){ + if(ENOSYS == errno || EOPNOTSUPP == errno){ + S3FS_PRN_ERR("failed to fallocate for punching hole to file with errno(%d), it maybe the fallocate function is not implemented in this kernel, or the file system does not support FALLOC_FL_PUNCH_HOLE.", errno); + }else{ + S3FS_PRN_ERR("failed to fallocate for punching hole to file with errno(%d)", errno); + } + return false; + } + if(!pagelist.SetPageLoadedStatus(iter->offset, iter->bytes, PageList::page_status::NOT_LOAD_MODIFIED)){ + S3FS_PRN_ERR("succeed to punch HOLEs in the cache file, but failed to update the cache stat."); + return false; + } + S3FS_PRN_DBG("made a hole at [%lld - %lld bytes](into a boundary) of the cache file.", static_cast(iter->offset), static_cast(iter->bytes)); + } + return true; +} + +// [NOTE] +// Indicate that a new file's is dirty. +// This ensures that both metadata and data are synced during flush. +// +void FdEntity::MarkDirtyNewFile() +{ + AutoLock auto_lock(&fdent_lock); + AutoLock auto_lock2(&fdent_data_lock); + + pagelist.Init(0, false, true); + pending_status = pending_status_t::CREATE_FILE_PENDING; +} + +bool FdEntity::IsDirtyNewFile() const +{ + AutoLock auto_lock(&fdent_lock); + + return (pending_status_t::CREATE_FILE_PENDING == pending_status); +} + +// [NOTE] +// The fdatasync call only uploads the content but does not update +// the meta data. In the flush call, if there is no update contents, +// need to upload only metadata, so use these functions. +// +void FdEntity::MarkDirtyMetadata() +{ + AutoLock auto_lock(&fdent_lock); + AutoLock auto_lock2(&fdent_data_lock); + + if(pending_status_t::NO_UPDATE_PENDING == pending_status){ + pending_status = pending_status_t::UPDATE_META_PENDING; + } +} + +bool FdEntity::IsDirtyMetadata() const +{ + // [NOTE] + // fdent_lock must be previously locked. + // + return (pending_status_t::UPDATE_META_PENDING == pending_status); +} + +bool FdEntity::AddUntreated(off_t start, off_t size) +{ + bool result = untreated_list.AddPart(start, size); + if(!result){ + S3FS_PRN_DBG("Failed adding untreated area part."); + }else if(S3fsLog::IsS3fsLogDbg()){ + untreated_list.Dump(); + } + + return result; +} + +bool FdEntity::GetLastUpdateUntreatedPart(off_t& start, off_t& size) const +{ + // Get last untreated area + if(!untreated_list.GetLastUpdatePart(start, size)){ + return false; + } + return true; +} + +bool FdEntity::ReplaceLastUpdateUntreatedPart(off_t front_start, off_t front_size, off_t behind_start, off_t behind_size) +{ + if(0 < front_size){ + if(!untreated_list.ReplaceLastUpdatePart(front_start, front_size)){ + return false; + } + }else{ + if(!untreated_list.RemoveLastUpdatePart()){ + return false; + } + } + if(0 < behind_size){ + if(!untreated_list.AddPart(behind_start, behind_size)){ + return false; + } + } + return true; +} + +size_t FdEntity::GetRealsize() const +{ + return static_cast(realsize.load()); +} + +void FdEntity::UpdateRealsize(off_t size) +{ + if (size < 0) return; + while(true){ + size_t curSize = GetRealsize(); + if(curSize >= size) break; + if(realsize.compare_exchange_weak(curSize, size)) break; + } + SetUpdateMark(true); +} + +void FdEntity::TruncateRealsize(off_t size) +{ + while(true){ + size_t curSize = GetRealsize(); + if(realsize.compare_exchange_weak(curSize, size)) break; + } + SetUpdateMark(true); +} + +void FdEntity::SetUpdateMark(bool is_update) +{ + update_mark.store(is_update); +} + +bool FdEntity::GetUpdateMark() const +{ + return update_mark.load(); +} + +const headers_t& FdEntity::GetOriginalHeaders() const +{ + return this->orgmeta; +} + +ssize_t FdEntity::WriteCache(const char* bytes, off_t start, size_t size, AutoLock::Type type) +{ + AutoLock auto_data_lock(&fdent_data_lock, type); + + if(!FdManager::get()->EnsureDiskSpaceUsable(path, size)) { + S3FS_PRN_ERR("disk space not enough. path:%s", path.c_str()); + return 0; + } + + if(flock_set(physical_fd, F_WRLCK) < 0){ + S3FS_PRN_ERR("cache file write lock failed. path(%s), physical_fd(%d)", path.c_str(), physical_fd); + return 0; + } + ssize_t wsize; + if(-1 == (wsize = pwrite(physical_fd, bytes, size, start))){ + flock_set(physical_fd, F_UNLCK); + S3FS_PRN_ERR("pwrite failed. path:%s, errno(%d)", path.c_str(), errno); + return 0; + } + flock_set(physical_fd, F_UNLCK); + + pagelist.SetPageLoadedStatus(start, wsize, PageList::page_status::LOADED); + return wsize; +} + +void FdEntity::ReleaseCache() +{ + AutoLock auto_data_lock(&fdent_data_lock); + pagelist.Init(0, false, false); + + if(flock_set(physical_fd, F_WRLCK) < 0){ + S3FS_PRN_ERR("cache file write lock failed. path(%s), physical_fd(%d)", path.c_str(), physical_fd); + return; + } + + if(-1 == ftruncate(physical_fd, 0)) { + flock_set(physical_fd, F_UNLCK); + S3FS_PRN_ERR("failed to truncate temporary file(physical_fd=%d).", physical_fd); + return; + } + lseek(physical_fd, 0, SEEK_SET); + flock_set(physical_fd, F_UNLCK); +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_entity.h b/s3fs/fdcache_entity.h new file mode 100644 index 0000000..25b4764 --- /dev/null +++ b/s3fs/fdcache_entity.h @@ -0,0 +1,197 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_FDCACHE_ENTITY_H_ +#define S3FS_FDCACHE_ENTITY_H_ + +#include +#include + +#include "autolock.h" +#include "fdcache_page.h" +#include "fdcache_fdinfo.h" +#include "fdcache_untreated.h" +#include "metaheader.h" + +//------------------------------------------------ +// Symbols +//------------------------------------------------ +static constexpr int MAX_MULTIPART_CNT = 10 * 1000; // S3 multipart max count + +//------------------------------------------------ +// class FdEntity +//------------------------------------------------ +class FdEntity +{ + private: + // [NOTE] + // Distinguish between meta pending and new file creation pending, + // because the processing(request) at these updates is different. + // Therefore, the pending state is expressed by this enum type. + // + enum class pending_status_t { + NO_UPDATE_PENDING = 0, + UPDATE_META_PENDING, // pending meta header + CREATE_FILE_PENDING // pending file creation and meta header + }; + + static bool mixmultipart; // whether multipart uploading can use copy api. + static bool streamupload; // whether stream uploading. + + mutable pthread_mutex_t fdent_lock; + bool is_lock_init; + std::string path; // object path + int physical_fd; // physical file(cache or temporary file) descriptor + UntreatedParts untreated_list; // list of untreated parts that have been written and not yet uploaded(for streamupload) + fdinfo_map_t pseudo_fd_map; // pseudo file descriptor information map + FILE* pfile; // file pointer(tmp file or cache file) + ino_t inode; // inode number for cache file + headers_t orgmeta; // original headers at opening + off_t size_orgmeta; // original file size in original headers + + mutable pthread_mutex_t fdent_data_lock;// protects the following members + PageList pagelist; + std::string cachepath; // local cache file path + // (if this is empty, does not load/save pagelist.) + std::string mirrorpath; // mirror file path to local cache file path + pending_status_t pending_status;// status for new file creation and meta update + struct timespec holding_mtime; // if mtime is updated while the file is open, it is set time_t value + + std::atomic realsize{0}; // real file size + std::atomic update_mark{false}; // file update mark + + private: + static int FillFile(int fd, unsigned char byte, off_t size, off_t start); + static ino_t GetInode(int fd); + + void Clear(); + ino_t GetInode() const; + int OpenMirrorFile(); + int NoCacheLoadAndPost(PseudoFdInfo* pseudo_obj, off_t start = 0, off_t size = 0); // size=0 means loading to end + PseudoFdInfo* CheckPseudoFdFlags(int fd, bool writable, AutoLock::Type locktype = AutoLock::NONE); + bool IsUploading(AutoLock::Type locktype = AutoLock::NONE); + bool SetAllStatus(bool is_loaded); // [NOTE] not locking + bool SetAllStatusUnloaded() { return SetAllStatus(false); } + int NoCachePreMultipartPost(PseudoFdInfo* pseudo_obj); + int NoCacheMultipartPost(PseudoFdInfo* pseudo_obj, int tgfd, off_t start, off_t size); + int NoCacheCompleteMultipartPost(PseudoFdInfo* pseudo_obj); + int RowFlushNoMultipart(const PseudoFdInfo* pseudo_obj, const char* tpath); + int RowFlushMultipart(PseudoFdInfo* pseudo_obj, const char* tpath); + int RowFlushMixMultipart(PseudoFdInfo* pseudo_obj, const char* tpath); + int RowFlushStreamMultipart(PseudoFdInfo* pseudo_obj, const char* tpath); + ssize_t WriteNoMultipart(const PseudoFdInfo* pseudo_obj, const char* bytes, off_t start, size_t size); + ssize_t WriteMultipart(PseudoFdInfo* pseudo_obj, const char* bytes, off_t start, size_t size); + ssize_t WriteMixMultipart(PseudoFdInfo* pseudo_obj, const char* bytes, off_t start, size_t size); + ssize_t WriteStreamUpload(PseudoFdInfo* pseudo_obj, const char* bytes, off_t start, size_t size); + + bool ReserveDiskSpace(off_t size); + + bool AddUntreated(off_t start, off_t size); + + bool IsDirtyMetadata() const; + + public: + static bool GetNoMixMultipart() { return mixmultipart; } + static bool SetNoMixMultipart(); + static bool GetStreamUpload() { return streamupload; } + static bool SetStreamUpload(bool isstream); + + explicit FdEntity(const char* tpath = nullptr, const char* cpath = nullptr); + ~FdEntity(); + FdEntity(const FdEntity&) = delete; + FdEntity(FdEntity&&) = delete; + FdEntity& operator=(const FdEntity&) = delete; + FdEntity& operator=(FdEntity&&) = delete; + + void Close(int fd); + bool IsOpen() const { return (-1 != physical_fd); } + bool FindPseudoFd(int fd, AutoLock::Type locktype = AutoLock::NONE) const; + int Open(const headers_t* pmeta, off_t size, const struct timespec& ts_mctime, int flags, AutoLock::Type type); + bool LoadAll(int fd, headers_t* pmeta = nullptr, off_t* size = nullptr, bool force_load = false); + int Dup(int fd, AutoLock::Type locktype = AutoLock::NONE); + int OpenPseudoFd(int flags = O_RDONLY, AutoLock::Type locktype = AutoLock::NONE); + int GetOpenCount(AutoLock::Type locktype = AutoLock::NONE) const; + const std::string& GetPath() const { return path; } + bool RenamePath(const std::string& newpath, std::string& fentmapkey); + int GetPhysicalFd() const { return physical_fd; } + bool IsModified() const; + bool MergeOrgMeta(headers_t& updatemeta); + int UploadPending(int fd, AutoLock::Type type); + + bool GetStats(struct stat& st, AutoLock::Type locktype = AutoLock::NONE) const; + int SetCtime(struct timespec time, AutoLock::Type locktype = AutoLock::NONE); + int SetAtime(struct timespec time, AutoLock::Type locktype = AutoLock::NONE); + int SetMCtime(struct timespec mtime, struct timespec ctime, AutoLock::Type locktype = AutoLock::NONE); + bool UpdateCtime(); + bool UpdateAtime(); + bool UpdateMtime(bool clear_holding_mtime = false); + bool UpdateMCtime(); + bool SetHoldingMtime(struct timespec mtime, AutoLock::Type locktype = AutoLock::NONE); + bool ClearHoldingMtime(AutoLock::Type locktype = AutoLock::NONE); + bool GetSize(off_t& size) const; + bool GetXattr(std::string& xattr) const; + bool SetXattr(const std::string& xattr); + bool SetMode(mode_t mode); + bool SetUId(uid_t uid); + bool SetGId(gid_t gid); + bool SetContentType(const char* path); + + int Load(off_t start, off_t size, AutoLock::Type type, bool is_modified_flag = false); // size=0 means loading to end + int LoadByAdaptor(off_t start, off_t size, AutoLock::Type type, std::shared_ptr dataAdaptor, bool is_modified_flag = false); + + off_t BytesModified(); + int RowFlush(int fd, const char* tpath, AutoLock::Type type, bool force_sync = false, bool force_tmpfile = false); + int Flush(int fd, AutoLock::Type type, bool force_sync = false, bool force_tmpfile = false) { return RowFlush(fd, nullptr, type, force_sync, force_tmpfile); } + + ssize_t Read(int fd, char* bytes, off_t start, size_t size, bool force_load = false); + ssize_t ReadByAdaptor(int fd, char* bytes, off_t start, size_t size, bool force_load, std::shared_ptr dataAdaptor); + ssize_t Write(int fd, const char* bytes, off_t start, size_t size, bool force_tmpfile = false); + + bool PunchHole(off_t start = 0, size_t size = 0); + + void MarkDirtyNewFile(); + bool IsDirtyNewFile() const; + void MarkDirtyMetadata(); + + bool GetLastUpdateUntreatedPart(off_t& start, off_t& size) const; + bool ReplaceLastUpdateUntreatedPart(off_t front_start, off_t front_size, off_t behind_start, off_t behind_size); + + size_t GetRealsize() const; + void UpdateRealsize(off_t size); + void TruncateRealsize(off_t size); + void SetUpdateMark(bool is_update); + bool GetUpdateMark() const; + const headers_t& GetOriginalHeaders() const; + ssize_t WriteCache(const char* bytes, off_t start, size_t size, AutoLock::Type locktype = AutoLock::NONE); + void ReleaseCache(); +}; + +typedef std::map> fdent_map_t; // key=path, value=FdEntity* + +#endif // S3FS_FDCACHE_ENTITY_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_fdinfo.cpp b/s3fs/fdcache_fdinfo.cpp new file mode 100644 index 0000000..c71dbb7 --- /dev/null +++ b/s3fs/fdcache_fdinfo.cpp @@ -0,0 +1,1049 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "s3fs_logger.h" +#include "s3fs_util.h" +#include "fdcache_fdinfo.h" +#include "fdcache_pseudofd.h" +#include "fdcache_entity.h" +#include "curl.h" +#include "string_util.h" +#include "threadpoolman.h" + +//------------------------------------------------ +// PseudoFdInfo class variables +//------------------------------------------------ +int PseudoFdInfo::max_threads = -1; +int PseudoFdInfo::opt_max_threads = -1; + +//------------------------------------------------ +// PseudoFdInfo class methods +//------------------------------------------------ +// +// Worker function for uploading +// +void* PseudoFdInfo::MultipartUploadThreadWorker(void* arg) +{ + std::unique_ptr pthparam(static_cast(arg)); + if(!pthparam || !(pthparam->ppseudofdinfo)){ + return reinterpret_cast(-EIO); + } + S3FS_PRN_INFO3("Upload Part Thread [tpath=%s][start=%lld][size=%lld][part=%d]", pthparam->path.c_str(), static_cast(pthparam->start), static_cast(pthparam->size), pthparam->part_num); + + int result; + { + AutoLock auto_lock(&(pthparam->ppseudofdinfo->upload_list_lock)); + + if(0 != (result = pthparam->ppseudofdinfo->last_result)){ + S3FS_PRN_DBG("Already occurred error, thus this thread worker is exiting."); + + if(!pthparam->ppseudofdinfo->CompleteInstruction(result, AutoLock::ALREADY_LOCKED)){ // result will be overwritten with the same value. + result = -EIO; + } + return reinterpret_cast(result); + } + } + + // setup and make curl object + std::unique_ptr s3fscurl(S3fsCurl::CreateParallelS3fsCurl(pthparam->path.c_str(), pthparam->upload_fd, pthparam->start, pthparam->size, pthparam->part_num, pthparam->is_copy, pthparam->petag, pthparam->upload_id, result)); + if(nullptr == s3fscurl){ + S3FS_PRN_ERR("failed creating s3fs curl object for uploading [path=%s][start=%lld][size=%lld][part=%d]", pthparam->path.c_str(), static_cast(pthparam->start), static_cast(pthparam->size), pthparam->part_num); + + // set result for exiting + if(!pthparam->ppseudofdinfo->CompleteInstruction(result, AutoLock::NONE)){ + result = -EIO; + } + return reinterpret_cast(result); + } + + // Send request and get result + if(0 == (result = s3fscurl->RequestPerform())){ + S3FS_PRN_DBG("succeed uploading [path=%s][start=%lld][size=%lld][part=%d]", pthparam->path.c_str(), static_cast(pthparam->start), static_cast(pthparam->size), pthparam->part_num); + if(!s3fscurl->MixMultipartPostComplete()){ + S3FS_PRN_ERR("failed completion uploading [path=%s][start=%lld][size=%lld][part=%d]", pthparam->path.c_str(), static_cast(pthparam->start), static_cast(pthparam->size), pthparam->part_num); + result = -EIO; + } + }else{ + S3FS_PRN_ERR("failed uploading with error(%d) [path=%s][start=%lld][size=%lld][part=%d]", result, pthparam->path.c_str(), static_cast(pthparam->start), static_cast(pthparam->size), pthparam->part_num); + } + s3fscurl->DestroyCurlHandle(true, false); + + // set result + if(!pthparam->ppseudofdinfo->CompleteInstruction(result, AutoLock::NONE)){ + S3FS_PRN_WARN("This thread worker is about to end, so it doesn't return an EIO here and runs to the end."); + } + + return reinterpret_cast(result); +} + +//------------------------------------------------ +// PseudoFdInfo methods +//------------------------------------------------ +PseudoFdInfo::PseudoFdInfo(int fd, int open_flags) : pseudo_fd(-1), physical_fd(fd), flags(0), upload_fd(-1), uploaded_sem(0), instruct_count(0), completed_count(0), last_result(0) +{ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + int result; + if(0 != (result = pthread_mutex_init(&upload_list_lock, &attr))){ + S3FS_PRN_CRIT("failed to init upload_list_lock: %d", result); + abort(); + } + is_lock_init = true; + + if(-1 != physical_fd){ + pseudo_fd = PseudoFdManager::Get(); + flags = open_flags; + } +} + +PseudoFdInfo::~PseudoFdInfo() +{ + Clear(); // call before destrying the mutex + + if(is_lock_init){ + int result; + if(0 != (result = pthread_mutex_destroy(&upload_list_lock))){ + S3FS_PRN_CRIT("failed to destroy upload_list_lock: %d", result); + abort(); + } + is_lock_init = false; + } +} + +bool PseudoFdInfo::Clear() +{ + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!CancelAllThreads() || !ResetUploadInfo(AutoLock::NONE)){ + return false; + } + CloseUploadFd(); + + if(-1 != pseudo_fd){ + PseudoFdManager::Release(pseudo_fd); + } + pseudo_fd = -1; + physical_fd = -1; + + return true; +} + +void PseudoFdInfo::CloseUploadFd() +{ + AutoLock auto_lock(&upload_list_lock); + + if(-1 != upload_fd){ + close(upload_fd); + } +} + +bool PseudoFdInfo::OpenUploadFd(AutoLock::Type type) +{ + AutoLock auto_lock(&upload_list_lock, type); + + if(-1 != upload_fd){ + // already initialized + return true; + } + if(-1 == physical_fd){ + S3FS_PRN_ERR("physical_fd is not initialized yet."); + return false; + } + + // duplicate fd + int fd; + if(-1 == (fd = dup(physical_fd))){ + S3FS_PRN_ERR("Could not duplicate physical file descriptor(errno=%d)", errno); + return false; + } + scope_guard guard([&]() { close(fd); }); + + if(0 != lseek(fd, 0, SEEK_SET)){ + S3FS_PRN_ERR("Could not seek physical file descriptor(errno=%d)", errno); + return false; + } + struct stat st; + if(-1 == fstat(fd, &st)){ + S3FS_PRN_ERR("Invalid file descriptor for uploading(errno=%d)", errno); + return false; + } + + guard.dismiss(); + upload_fd = fd; + return true; +} + +bool PseudoFdInfo::Set(int fd, int open_flags) +{ + if(-1 == fd){ + return false; + } + Clear(); + physical_fd = fd; + pseudo_fd = PseudoFdManager::Get(); + flags = open_flags; + + return true; +} + +bool PseudoFdInfo::Writable() const +{ + if(-1 == pseudo_fd){ + return false; + } + if(0 == (flags & (O_WRONLY | O_RDWR))){ + return false; + } + return true; +} + +bool PseudoFdInfo::Readable() const +{ + if(-1 == pseudo_fd){ + return false; + } + // O_RDONLY is 0x00, it means any pattern is readable. + return true; +} + +bool PseudoFdInfo::ClearUploadInfo(bool is_cancel_mp) +{ + if(is_cancel_mp){ + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!CancelAllThreads()){ + return false; + } + } + return ResetUploadInfo(AutoLock::NONE); +} + +bool PseudoFdInfo::ResetUploadInfo(AutoLock::Type type) +{ + AutoLock auto_lock(&upload_list_lock, type); + + upload_id.erase(); + upload_list.clear(); + instruct_count = 0; + completed_count = 0; + last_result = 0; + + return true; +} + +bool PseudoFdInfo::RowInitialUploadInfo(const std::string& id, bool is_cancel_mp, AutoLock::Type type) +{ + if(is_cancel_mp && AutoLock::ALREADY_LOCKED == type){ + S3FS_PRN_ERR("Internal Error: Could not call this with type=AutoLock::ALREADY_LOCKED and is_cancel_mp=true"); + return false; + } + + if(is_cancel_mp){ + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!ClearUploadInfo(is_cancel_mp)){ + return false; + } + }else{ + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!ResetUploadInfo(type)){ + return false; + } + } + + AutoLock auto_lock(&upload_list_lock, type); + upload_id = id; + return true; +} + +bool PseudoFdInfo::CompleteInstruction(int result, AutoLock::Type type) +{ + AutoLock auto_lock(&upload_list_lock, type); + + if(0 != result){ + last_result = result; + } + + if(0 >= instruct_count){ + S3FS_PRN_ERR("Internal error: instruct_count caused an underflow."); + return false; + } + --instruct_count; + ++completed_count; + + return true; +} + +bool PseudoFdInfo::GetUploadId(std::string& id) const +{ + if(!IsUploading()){ + S3FS_PRN_ERR("Multipart Upload has not started yet."); + return false; + } + id = upload_id; + return true; +} + +bool PseudoFdInfo::GetEtaglist(etaglist_t& list) const +{ + if(!IsUploading()){ + S3FS_PRN_ERR("Multipart Upload has not started yet."); + return false; + } + + AutoLock auto_lock(&upload_list_lock); + + list.clear(); + for(filepart_list_t::const_iterator iter = upload_list.begin(); iter != upload_list.end(); ++iter){ + if(iter->petag){ + list.push_back(*(iter->petag)); + }else{ + S3FS_PRN_ERR("The pointer to the etag string is null(internal error)."); + return false; + } + } + return !list.empty(); +} + +// [NOTE] +// This method adds a part for a multipart upload. +// The added new part must be an area that is exactly continuous with the +// immediately preceding part. +// An error will occur if it is discontinuous or if it overlaps with an +// existing area. +// +bool PseudoFdInfo::AppendUploadPart(off_t start, off_t size, bool is_copy, etagpair** ppetag) +{ + if(!IsUploading()){ + S3FS_PRN_ERR("Multipart Upload has not started yet."); + return false; + } + + AutoLock auto_lock(&upload_list_lock); + off_t next_start_pos = 0; + if(!upload_list.empty()){ + next_start_pos = upload_list.back().startpos + upload_list.back().size; + } + if(start != next_start_pos){ + S3FS_PRN_ERR("The expected starting position for the next part is %lld, but %lld was specified.", static_cast(next_start_pos), static_cast(start)); + return false; + } + + // make part number + int partnumber = static_cast(upload_list.size()) + 1; + + // add new part + etagpair* petag_entity = etag_entities.add(etagpair(nullptr, partnumber)); // [NOTE] Create the etag entity and register it in the list. + upload_list.emplace_back(false, physical_fd, start, size, is_copy, petag_entity); + + // set etag pointer + if(ppetag){ + *ppetag = petag_entity; + } + + return true; +} + +// +// Utility for sorting upload list +// +static bool filepart_partnum_compare(const filepart& src1, const filepart& src2) +{ + return src1.get_part_number() < src2.get_part_number(); +} + +bool PseudoFdInfo::InsertUploadPart(off_t start, off_t size, int part_num, bool is_copy, etagpair** ppetag, AutoLock::Type type) +{ + //S3FS_PRN_DBG("[start=%lld][size=%lld][part_num=%d][is_copy=%s]", static_cast(start), static_cast(size), part_num, (is_copy ? "true" : "false")); + + if(!IsUploading()){ + S3FS_PRN_ERR("Multipart Upload has not started yet."); + return false; + } + if(start < 0 || size <= 0 || part_num < 0 || !ppetag){ + S3FS_PRN_ERR("Parameters are wrong."); + return false; + } + + AutoLock auto_lock(&upload_list_lock, type); + + // insert new part + etagpair* petag_entity = etag_entities.add(etagpair(nullptr, part_num)); + upload_list.emplace_back(false, physical_fd, start, size, is_copy, petag_entity); + + // sort by part number + std::sort(upload_list.begin(), upload_list.end(), filepart_partnum_compare); + + // set etag pointer + *ppetag = petag_entity; + + return true; +} + +// [NOTE] +// This method only launches the upload thread. +// Check the maximum number of threads before calling. +// +bool PseudoFdInfo::ParallelMultipartUpload(const char* path, const mp_part_list_t& mplist, bool is_copy, AutoLock::Type type) +{ + //S3FS_PRN_DBG("[path=%s][mplist(%zu)]", SAFESTRPTR(path), mplist.size()); + + AutoLock auto_lock(&upload_list_lock, type); + + if(mplist.empty()){ + // nothing to do + return true; + } + if(!OpenUploadFd(AutoLock::ALREADY_LOCKED)){ + return false; + } + + for(mp_part_list_t::const_iterator iter = mplist.begin(); iter != mplist.end(); ++iter){ + // Insert upload part + etagpair* petag = nullptr; + if(!InsertUploadPart(iter->start, iter->size, iter->part_num, is_copy, &petag, AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("Failed to insert insert upload part(path=%s, start=%lld, size=%lld, part=%d, copy=%s) to mplist", SAFESTRPTR(path), static_cast(iter->start), static_cast(iter->size), iter->part_num, (is_copy ? "true" : "false")); + return false; + } + + // make parameter for my thread + pseudofdinfo_thparam* thargs = new pseudofdinfo_thparam; + thargs->ppseudofdinfo = this; + thargs->path = SAFESTRPTR(path); + thargs->upload_id = upload_id; + thargs->upload_fd = upload_fd; + thargs->start = iter->start; + thargs->size = iter->size; + thargs->is_copy = is_copy; + thargs->part_num = iter->part_num; + thargs->petag = petag; + + // make parameter for thread pool + std::unique_ptr ppoolparam(new thpoolman_param); + ppoolparam->args = thargs; + ppoolparam->psem = &uploaded_sem; + ppoolparam->pfunc = PseudoFdInfo::MultipartUploadThreadWorker; + + // setup instruction + if(!ThreadPoolMan::Instruct(std::move(ppoolparam))){ + S3FS_PRN_ERR("failed setup instruction for uploading."); + delete thargs; + return false; + } + ++instruct_count; + } + return true; +} + +bool PseudoFdInfo::ParallelMultipartUploadAll(const char* path, const mp_part_list_t& to_upload_list, const mp_part_list_t& copy_list, int& result) +{ + S3FS_PRN_DBG("[path=%s][to_upload_list(%zu)][copy_list(%zu)]", SAFESTRPTR(path), to_upload_list.size(), copy_list.size()); + + result = 0; + + if(!OpenUploadFd(AutoLock::NONE)){ + return false; + } + + if(!ParallelMultipartUpload(path, to_upload_list, false, AutoLock::NONE) || !ParallelMultipartUpload(path, copy_list, true, AutoLock::NONE)){ + S3FS_PRN_ERR("Failed setup instruction for uploading(path=%s, to_upload_list=%zu, copy_list=%zu).", SAFESTRPTR(path), to_upload_list.size(), copy_list.size()); + return false; + } + + // Wait for all thread exiting + result = WaitAllThreadsExit(); + + return true; +} + +// +// Upload the last updated Untreated area +// +// [Overview] +// Uploads untreated areas with the maximum multipart upload size as the +// boundary. +// +// * The starting position of the untreated area is aligned with the maximum +// multipart upload size as the boundary. +// * If there is an uploaded area that overlaps with the aligned untreated +// area, that uploaded area is canceled and absorbed by the untreated area. +// * Upload only when the aligned untreated area exceeds the maximum multipart +// upload size. +// * When the start position of the untreated area is changed to boundary +// alignment(to backward), and if that gap area is remained, that area is +// rest to untreated area. +// +ssize_t PseudoFdInfo::UploadBoundaryLastUntreatedArea(const char* path, headers_t& meta, FdEntity* pfdent) +{ + S3FS_PRN_DBG("[path=%s][pseudo_fd=%d][physical_fd=%d]", SAFESTRPTR(path), pseudo_fd, physical_fd); + + if(!path || -1 == physical_fd || -1 == pseudo_fd || !pfdent){ + S3FS_PRN_ERR("pseudo_fd(%d) to physical_fd(%d) for path(%s) is not opened or not writable, or pfdent is nullptr.", pseudo_fd, physical_fd, path); + return -EBADF; + } + AutoLock auto_lock(&upload_list_lock); + + // + // Get last update untreated area + // + off_t last_untreated_start = 0; + off_t last_untreated_size = 0; + if(!pfdent->GetLastUpdateUntreatedPart(last_untreated_start, last_untreated_size) || last_untreated_start < 0 || last_untreated_size <= 0){ + S3FS_PRN_WARN("Not found last update untreated area or it is empty, thus return without any error."); + return 0; + } + + // + // Aligns the start position of the last updated raw area with the boundary + // + // * Align the last updated raw space with the maximum upload size boundary. + // * The remaining size of the part before the boundary is will not be uploaded. + // + off_t max_mp_size = S3fsCurl::GetMultipartSize(); + off_t aligned_start = ((last_untreated_start / max_mp_size) + (0 < (last_untreated_start % max_mp_size) ? 1 : 0)) * max_mp_size; + if((last_untreated_start + last_untreated_size) <= aligned_start){ + S3FS_PRN_INFO("After the untreated area(start=%lld, size=%lld) is aligned with the boundary, the aligned start(%lld) exceeds the untreated area, so there is nothing to do.", static_cast(last_untreated_start), static_cast(last_untreated_size), static_cast(aligned_start)); + return 0; + } + + off_t aligned_size = (((last_untreated_start + last_untreated_size) - aligned_start) / max_mp_size) * max_mp_size; + if(0 == aligned_size){ + S3FS_PRN_DBG("After the untreated area(start=%lld, size=%lld) is aligned with the boundary(start is %lld), the aligned size is empty, so nothing to do.", static_cast(last_untreated_start), static_cast(last_untreated_size), static_cast(aligned_start)); + return 0; + } + + off_t front_rem_start = last_untreated_start; // start of the remainder untreated area in front of the boundary + off_t front_rem_size = aligned_start - last_untreated_start; // size of the remainder untreated area in front of the boundary + + // + // Get the area for uploading, if last update treated area can be uploaded. + // + // [NOTE] + // * Create the updoad area list, if the untreated area aligned with the boundary + // exceeds the maximum upload size. + // * If it overlaps with an area that has already been uploaded(unloaded list), + // that area is added to the cancellation list and included in the untreated area. + // + mp_part_list_t to_upload_list; + filepart_list_t cancel_uploaded_list; + if(!ExtractUploadPartsFromUntreatedArea(aligned_start, aligned_size, to_upload_list, cancel_uploaded_list, S3fsCurl::GetMultipartSize())){ + S3FS_PRN_ERR("Failed to extract upload parts from last untreated area."); + return -EIO; + } + if(to_upload_list.empty()){ + S3FS_PRN_INFO("There is nothing to upload. In most cases, the untreated area does not meet the upload size."); + return 0; + } + + // + // Has multipart uploading already started? + // + if(!IsUploading()){ + // Multipart uploading hasn't started yet, so start it. + // + S3fsCurl s3fscurl(true); + std::string tmp_upload_id; + int result; + if(0 != (result = s3fscurl.PreMultipartPostRequest(path, meta, tmp_upload_id, true))){ + S3FS_PRN_ERR("failed to setup multipart upload(create upload id) by errno(%d)", result); + return result; + } + if(!RowInitialUploadInfo(tmp_upload_id, false/* not need to cancel */, AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("failed to setup multipart upload(set upload id to object)"); + return result; + } + } + + // + // Output debug level information + // + // When canceling(overwriting) a part that has already been uploaded, output it. + // + if(S3fsLog::IsS3fsLogDbg()){ + for(filepart_list_t::const_iterator cancel_iter = cancel_uploaded_list.begin(); cancel_iter != cancel_uploaded_list.end(); ++cancel_iter){ + S3FS_PRN_DBG("Cancel uploaded: start(%lld), size(%lld), part number(%d)", static_cast(cancel_iter->startpos), static_cast(cancel_iter->size), (cancel_iter->petag ? cancel_iter->petag->part_num : -1)); + } + } + + // + // Upload Multipart parts + // + if(!ParallelMultipartUpload(path, to_upload_list, false, AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("Failed to upload multipart parts."); + return -EIO; + } + + // + // Exclude the uploaded Untreated area and update the last Untreated area. + // + off_t behind_rem_start = aligned_start + aligned_size; + off_t behind_rem_size = (last_untreated_start + last_untreated_size) - behind_rem_start; + + if(!pfdent->ReplaceLastUpdateUntreatedPart(front_rem_start, front_rem_size, behind_rem_start, behind_rem_size)){ + S3FS_PRN_WARN("The last untreated area could not be detected and the uploaded area could not be excluded from it, but continue because it does not affect the overall processing."); + } + + return 0; +} + +int PseudoFdInfo::WaitAllThreadsExit() +{ + int result; + bool is_loop = true; + { + AutoLock auto_lock(&upload_list_lock); + if(0 == instruct_count && 0 == completed_count){ + result = last_result; + is_loop = false; + } + } + + while(is_loop){ + // need to wait the worker exiting + uploaded_sem.wait(); + { + AutoLock auto_lock(&upload_list_lock); + if(0 < completed_count){ + --completed_count; + } + if(0 == instruct_count && 0 == completed_count){ + // break loop + result = last_result; + is_loop = false; + } + } + } + + return result; +} + +bool PseudoFdInfo::CancelAllThreads() +{ + bool need_cancel = false; + { + AutoLock auto_lock(&upload_list_lock); + if(0 < instruct_count && 0 < completed_count){ + S3FS_PRN_INFO("The upload thread is running, so cancel them and wait for the end."); + need_cancel = true; + last_result = -ECANCELED; // to stop thread running + } + } + if(need_cancel){ + WaitAllThreadsExit(); + } + return true; +} + +// +// Extract the list for multipart upload from the Unteated Area +// +// The untreated_start parameter must be set aligning it with the boundaries +// of the maximum multipart upload size. This method expects it to be bounded. +// +// This method creates the upload area aligned from the untreated area by +// maximum size and creates the required list. +// If it overlaps with an area that has already been uploaded, the overlapped +// upload area will be canceled and absorbed by the untreated area. +// If the list creation process is complete and areas smaller than the maximum +// size remain, those area will be reset to untreated_start and untreated_size +// and returned to the caller. +// If the called untreated area is smaller than the maximum size of the +// multipart upload, no list will be created. +// +// [NOTE] +// Maximum multipart upload size must be uploading boundary. +// +bool PseudoFdInfo::ExtractUploadPartsFromUntreatedArea(const off_t& untreated_start, const off_t& untreated_size, mp_part_list_t& to_upload_list, filepart_list_t& cancel_upload_list, off_t max_mp_size) +{ + if(untreated_start < 0 || untreated_size <= 0){ + S3FS_PRN_ERR("Paramters are wrong(untreated_start=%lld, untreated_size=%lld).", static_cast(untreated_start), static_cast(untreated_size)); + return false; + } + + // Initiliaze lists + to_upload_list.clear(); + cancel_upload_list.clear(); + + // + // Align start position with maximum multipart upload boundaries + // + off_t aligned_start = (untreated_start / max_mp_size) * max_mp_size; + off_t aligned_size = untreated_size + (untreated_start - aligned_start); + + // + // Check aligned untreated size + // + if(aligned_size < max_mp_size){ + S3FS_PRN_INFO("untreated area(start=%lld, size=%lld) to aligned boundary(start=%lld, size=%lld) is smaller than max mp size(%lld), so nothing to do.", static_cast(untreated_start), static_cast(untreated_size), static_cast(aligned_start), static_cast(aligned_size), static_cast(max_mp_size)); + return true; // successful termination + } + + // + // Check each unloaded area in list + // + // [NOTE] + // The uploaded area must be to be aligned by boundary. + // Also, it is assumed that it must not be a copy area. + // So if the areas overlap, include uploaded area as an untreated area. + // + for(filepart_list_t::iterator cur_iter = upload_list.begin(); cur_iter != upload_list.end(); /* ++cur_iter */){ + // Check overlap + if((cur_iter->startpos + cur_iter->size - 1) < aligned_start || (aligned_start + aligned_size - 1) < cur_iter->startpos){ + // Areas do not overlap + ++cur_iter; + + }else{ + // The areas overlap + // + // Since the start position of the uploaded area is aligned with the boundary, + // it is not necessary to check the start position. + // If the uploaded area exceeds the untreated area, expand the untreated area. + // + if((aligned_start + aligned_size - 1) < (cur_iter->startpos + cur_iter->size - 1)){ + aligned_size += (cur_iter->startpos + cur_iter->size) - (aligned_start + aligned_size); + } + + // + // Add this to cancel list + // + cancel_upload_list.push_back(*cur_iter); // Copy and Push to cancel list + cur_iter = upload_list.erase(cur_iter); + } + } + + // + // Add upload area to the list + // + while(max_mp_size <= aligned_size){ + int part_num = static_cast((aligned_start / max_mp_size) + 1); + to_upload_list.emplace_back(aligned_start, max_mp_size, part_num); + + aligned_start += max_mp_size; + aligned_size -= max_mp_size; + } + + return true; +} + +// +// Extract the area lists to be uploaded/downloaded for the entire file. +// +// [Parameters] +// to_upload_list : A list of areas to upload in multipart upload. +// to_copy_list : A list of areas for copy upload in multipart upload. +// to_download_list : A list of areas that must be downloaded before multipart upload. +// cancel_upload_list : A list of areas that have already been uploaded and will be canceled(overwritten). +// wait_upload_complete : If cancellation areas exist, this flag is set to true when it is necessary to wait until the upload of those cancellation areas is complete. +// file_size : The size of the upload file. +// use_copy : Specify true if copy multipart upload is available. +// +// [NOTE] +// The untreated_list in fdentity does not change, but upload_list is changed. +// (If you want to restore it, you can use cancel_upload_list.) +// +bool PseudoFdInfo::ExtractUploadPartsFromAllArea(UntreatedParts& untreated_list, mp_part_list_t& to_upload_list, mp_part_list_t& to_copy_list, mp_part_list_t& to_download_list, filepart_list_t& cancel_upload_list, bool& wait_upload_complete, off_t max_mp_size, off_t file_size, bool use_copy) +{ + AutoLock auto_lock(&upload_list_lock); + + // Initiliaze lists + to_upload_list.clear(); + to_copy_list.clear(); + to_download_list.clear(); + cancel_upload_list.clear(); + wait_upload_complete = false; + + // Duplicate untreated list + untreated_list_t dup_untreated_list; + untreated_list.Duplicate(dup_untreated_list); + + // Initialize the iterator of each list first + untreated_list_t::iterator dup_untreated_iter = dup_untreated_list.begin(); + filepart_list_t::iterator uploaded_iter = upload_list.begin(); + + // + // Loop to extract areas to upload and download + // + // Check at the boundary of the maximum upload size from the beginning of the file + // + for(off_t cur_start = 0, cur_size = 0; cur_start < file_size; cur_start += cur_size){ + // + // Set part size + // (To avoid confusion, the area to be checked is called the "current area".) + // + cur_size = ((cur_start + max_mp_size) <= file_size ? max_mp_size : (file_size - cur_start)); + + // + // Extract the untreated erea that overlaps this current area. + // (The extracted area is deleted from dup_untreated_list.) + // + untreated_list_t cur_untreated_list; + for(cur_untreated_list.clear(); dup_untreated_iter != dup_untreated_list.end(); ){ + if((dup_untreated_iter->start < (cur_start + cur_size)) && (cur_start < (dup_untreated_iter->start + dup_untreated_iter->size))){ + // this untreated area is overlap + off_t tmp_untreated_start; + off_t tmp_untreated_size; + if(dup_untreated_iter->start < cur_start){ + // [NOTE] + // This untreated area overlaps with the current area, but starts + // in front of the target area. + // This state should not be possible, but if this state is detected, + // the part before the target area will be deleted. + // + tmp_untreated_start = cur_start; + tmp_untreated_size = dup_untreated_iter->size - (cur_start - dup_untreated_iter->start); + }else{ + tmp_untreated_start = dup_untreated_iter->start; + tmp_untreated_size = dup_untreated_iter->size; + } + + // + // Check the end of the overlapping untreated area. + // + if((tmp_untreated_start + tmp_untreated_size) <= (cur_start + cur_size)){ + // + // All of untreated areas are within the current area + // + // - Add this untreated area to cur_untreated_list + // - Delete this from dup_untreated_list + // + cur_untreated_list.emplace_back(tmp_untreated_start, tmp_untreated_size); + dup_untreated_iter = dup_untreated_list.erase(dup_untreated_iter); + }else{ + // + // The untreated area exceeds the end of the current area + // + + // Ajust untreated area + tmp_untreated_size = (cur_start + cur_size) - tmp_untreated_start; + + // Add ajusted untreated area to cur_untreated_list + cur_untreated_list.emplace_back(tmp_untreated_start, tmp_untreated_size); + + // Remove this ajusted untreated area from the area pointed + // to by dup_untreated_iter. + dup_untreated_iter->size = (dup_untreated_iter->start + dup_untreated_iter->size) - (cur_start + cur_size); + dup_untreated_iter->start = tmp_untreated_start + tmp_untreated_size; + } + + }else if((cur_start + cur_size - 1) < dup_untreated_iter->start){ + // this untreated area is over the current area, thus break loop. + break; + }else{ + ++dup_untreated_iter; + } + } + + // + // Check uploaded area + // + // [NOTE] + // The uploaded area should be aligned with the maximum upload size boundary. + // It also assumes that each size of uploaded area must be a maximum upload + // size. + // + filepart_list_t::iterator overlap_uploaded_iter = upload_list.end(); + for(; uploaded_iter != upload_list.end(); ++uploaded_iter){ + if((cur_start < (uploaded_iter->startpos + uploaded_iter->size)) && (uploaded_iter->startpos < (cur_start + cur_size))){ + if(overlap_uploaded_iter != upload_list.end()){ + // + // Something wrong in this unloaded area. + // + // This area is not aligned with the boundary, then this condition + // is unrecoverable and return failure. + // + S3FS_PRN_ERR("The uploaded list may not be the boundary for the maximum multipart upload size. No further processing is possible."); + return false; + } + // Set this iterator to overlap iter + overlap_uploaded_iter = uploaded_iter; + + }else if((cur_start + cur_size - 1) < uploaded_iter->startpos){ + break; + } + } + + // + // Create upload/download/cancel/copy list for this current area + // + int part_num = static_cast((cur_start / max_mp_size) + 1); + if(cur_untreated_list.empty()){ + // + // No untreated area was detected in this current area + // + if(overlap_uploaded_iter != upload_list.end()){ + // + // This current area already uploaded, then nothing to add to lists. + // + S3FS_PRN_DBG("Already uploaded: start=%lld, size=%lld", static_cast(cur_start), static_cast(cur_size)); + + }else{ + // + // This current area has not been uploaded + // (neither an uploaded area nor an untreated area.) + // + if(use_copy){ + // + // Copy multipart upload available + // + S3FS_PRN_DBG("To copy: start=%lld, size=%lld", static_cast(cur_start), static_cast(cur_size)); + to_copy_list.emplace_back(cur_start, cur_size, part_num); + }else{ + // + // This current area needs to be downloaded and uploaded + // + S3FS_PRN_DBG("To download and upload: start=%lld, size=%lld", static_cast(cur_start), static_cast(cur_size)); + to_download_list.emplace_back(cur_start, cur_size); + to_upload_list.emplace_back(cur_start, cur_size, part_num); + } + } + }else{ + // + // Found untreated area in this current area + // + if(overlap_uploaded_iter != upload_list.end()){ + // + // This current area is also the uploaded area + // + // [NOTE] + // The uploaded area is aligned with boundary, there are all data in + // this current area locally(which includes all data of untreated area). + // So this current area only needs to be uploaded again. + // + S3FS_PRN_DBG("Cancel upload: start=%lld, size=%lld", static_cast(overlap_uploaded_iter->startpos), static_cast(overlap_uploaded_iter->size)); + + if(!overlap_uploaded_iter->uploaded){ + S3FS_PRN_DBG("This cancel upload area is still uploading, so you must wait for it to complete before starting any Stream uploads."); + wait_upload_complete = true; + } + cancel_upload_list.push_back(*overlap_uploaded_iter); // add this uploaded area to cancel_upload_list + uploaded_iter = upload_list.erase(overlap_uploaded_iter); // remove it from upload_list + + S3FS_PRN_DBG("To upload: start=%lld, size=%lld", static_cast(cur_start), static_cast(cur_size)); + to_upload_list.emplace_back(cur_start, cur_size, part_num); // add new uploading area to list + + }else{ + // + // No uploaded area overlap this current area + // (Areas other than the untreated area must be downloaded.) + // + // [NOTE] + // Need to consider the case where there is a gap between the start + // of the current area and the untreated area. + // This gap is the area that should normally be downloaded. + // But it is the area that can be copied if we can use copy multipart + // upload. Then If we can use copy multipart upload and the previous + // area is used copy multipart upload, this gap will be absorbed by + // the previous area. + // Unifying the copy multipart upload area can reduce the number of + // upload requests. + // + off_t tmp_cur_start = cur_start; + off_t tmp_cur_size = cur_size; + off_t changed_start = cur_start; + off_t changed_size = cur_size; + bool first_area = true; + for(untreated_list_t::const_iterator tmp_cur_untreated_iter = cur_untreated_list.begin(); tmp_cur_untreated_iter != cur_untreated_list.end(); ++tmp_cur_untreated_iter, first_area = false){ + if(tmp_cur_start < tmp_cur_untreated_iter->start){ + // + // Detected a gap at the start of area + // + bool include_prev_copy_part = false; + if(first_area && use_copy && !to_copy_list.empty()){ + // + // Make sure that the area of the last item in to_copy_list + // is contiguous with this current area. + // + // [NOTE] + // Areas can be unified if the total size of the areas is + // within 5GB and the remaining area after unification is + // larger than the minimum multipart upload size. + // + mp_part_list_t::reverse_iterator copy_riter = to_copy_list.rbegin(); + + if( (copy_riter->start + copy_riter->size) == tmp_cur_start && + (copy_riter->size + (tmp_cur_untreated_iter->start - tmp_cur_start)) <= FIVE_GB && + ((tmp_cur_start + tmp_cur_size) - tmp_cur_untreated_iter->start) >= MIN_MULTIPART_SIZE ) + { + // + // Unify to this area to previouse copy area. + // + copy_riter->size += tmp_cur_untreated_iter->start - tmp_cur_start; + S3FS_PRN_DBG("Resize to copy: start=%lld, size=%lld", static_cast(copy_riter->start), static_cast(copy_riter->size)); + + changed_size -= (tmp_cur_untreated_iter->start - changed_start); + changed_start = tmp_cur_untreated_iter->start; + include_prev_copy_part = true; + } + } + if(!include_prev_copy_part){ + // + // If this area is not unified, need to download this area + // + S3FS_PRN_DBG("To download: start=%lld, size=%lld", static_cast(tmp_cur_start), static_cast(tmp_cur_untreated_iter->start - tmp_cur_start)); + to_download_list.emplace_back(tmp_cur_start, tmp_cur_untreated_iter->start - tmp_cur_start); + } + } + // + // Set next start position + // + tmp_cur_size = (tmp_cur_start + tmp_cur_size) - (tmp_cur_untreated_iter->start + tmp_cur_untreated_iter->size); + tmp_cur_start = tmp_cur_untreated_iter->start + tmp_cur_untreated_iter->size; + } + + // + // Add download area to list, if remaining size + // + if(0 < tmp_cur_size){ + S3FS_PRN_DBG("To download: start=%lld, size=%lld", static_cast(tmp_cur_start), static_cast(tmp_cur_size)); + to_download_list.emplace_back(tmp_cur_start, tmp_cur_size); + } + + // + // Set upload area(whole of area) to list + // + S3FS_PRN_DBG("To upload: start=%lld, size=%lld", static_cast(changed_start), static_cast(changed_size)); + to_upload_list.emplace_back(changed_start, changed_size, part_num); + } + } + } + return true; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_fdinfo.h b/s3fs/fdcache_fdinfo.h new file mode 100644 index 0000000..0f1bcc8 --- /dev/null +++ b/s3fs/fdcache_fdinfo.h @@ -0,0 +1,133 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_FDCACHE_FDINFO_H_ +#define S3FS_FDCACHE_FDINFO_H_ + +#include + +#include "psemaphore.h" +#include "metaheader.h" +#include "autolock.h" +#include "types.h" + +class FdEntity; +class UntreatedParts; + +//------------------------------------------------ +// Structure of parameters to pass to thread +//------------------------------------------------ +class PseudoFdInfo; + +struct pseudofdinfo_thparam +{ + PseudoFdInfo* ppseudofdinfo; + std::string path; + std::string upload_id; + int upload_fd; + off_t start; + off_t size; + bool is_copy; + int part_num; + etagpair* petag; + + pseudofdinfo_thparam() : ppseudofdinfo(nullptr), path(""), upload_id(""), upload_fd(-1), start(0), size(0), is_copy(false), part_num(-1), petag(nullptr) {} +}; + +//------------------------------------------------ +// Class PseudoFdInfo +//------------------------------------------------ +class PseudoFdInfo +{ + private: + static int max_threads; + static int opt_max_threads; // for option value + + int pseudo_fd; + int physical_fd; + int flags; // flags at open + std::string upload_id; + int upload_fd; // duplicated fd for uploading + filepart_list_t upload_list; + petagpool etag_entities; // list of etag string and part number entities(to maintain the etag entity even if MPPART_INFO is destroyed) + bool is_lock_init; + mutable pthread_mutex_t upload_list_lock; // protects upload_id and upload_list + Semaphore uploaded_sem; // use a semaphore to trigger an upload completion like event flag + int instruct_count; // number of instructions for processing by threads + int completed_count; // number of completed processes by thread + int last_result; // the result of thread processing + + private: + static void* MultipartUploadThreadWorker(void* arg); + + bool Clear(); + void CloseUploadFd(); + bool OpenUploadFd(AutoLock::Type type = AutoLock::NONE); + bool ResetUploadInfo(AutoLock::Type type); + bool RowInitialUploadInfo(const std::string& id, bool is_cancel_mp, AutoLock::Type type); + bool CompleteInstruction(int result, AutoLock::Type type = AutoLock::NONE); + bool ParallelMultipartUpload(const char* path, const mp_part_list_t& mplist, bool is_copy, AutoLock::Type type = AutoLock::NONE); + bool InsertUploadPart(off_t start, off_t size, int part_num, bool is_copy, etagpair** ppetag, AutoLock::Type type = AutoLock::NONE); + bool CancelAllThreads(); + bool ExtractUploadPartsFromUntreatedArea(const off_t& untreated_start, const off_t& untreated_size, mp_part_list_t& to_upload_list, filepart_list_t& cancel_upload_list, off_t max_mp_size); + + public: + explicit PseudoFdInfo(int fd = -1, int open_flags = 0); + ~PseudoFdInfo(); + PseudoFdInfo(const PseudoFdInfo&) = delete; + PseudoFdInfo(PseudoFdInfo&&) = delete; + PseudoFdInfo& operator=(const PseudoFdInfo&) = delete; + PseudoFdInfo& operator=(PseudoFdInfo&&) = delete; + + int GetPhysicalFd() const { return physical_fd; } + int GetPseudoFd() const { return pseudo_fd; } + int GetFlags() const { return flags; } + bool Writable() const; + bool Readable() const; + + bool Set(int fd, int open_flags); + bool ClearUploadInfo(bool is_cancel_mp = false); + bool InitialUploadInfo(const std::string& id){ return RowInitialUploadInfo(id, true, AutoLock::NONE); } + + bool IsUploading() const { return !upload_id.empty(); } + bool GetUploadId(std::string& id) const; + bool GetEtaglist(etaglist_t& list) const; + + bool AppendUploadPart(off_t start, off_t size, bool is_copy = false, etagpair** ppetag = nullptr); + + bool ParallelMultipartUploadAll(const char* path, const mp_part_list_t& to_upload_list, const mp_part_list_t& copy_list, int& result); + + int WaitAllThreadsExit(); + ssize_t UploadBoundaryLastUntreatedArea(const char* path, headers_t& meta, FdEntity* pfdent); + bool ExtractUploadPartsFromAllArea(UntreatedParts& untreated_list, mp_part_list_t& to_upload_list, mp_part_list_t& to_copy_list, mp_part_list_t& to_download_list, filepart_list_t& cancel_upload_list, bool& wait_upload_complete, off_t max_mp_size, off_t file_size, bool use_copy); +}; + +typedef std::map> fdinfo_map_t; + +#endif // S3FS_FDCACHE_FDINFO_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_page.cpp b/s3fs/fdcache_page.cpp new file mode 100644 index 0000000..f5b50ef --- /dev/null +++ b/s3fs/fdcache_page.cpp @@ -0,0 +1,1035 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "s3fs_logger.h" +#include "fdcache_page.h" +#include "fdcache_stat.h" +#include "string_util.h" + +//------------------------------------------------ +// Symbols +//------------------------------------------------ +static constexpr int CHECK_CACHEFILE_PART_SIZE = 1024 * 16; // Buffer size in PageList::CheckZeroAreaInFile() + +//------------------------------------------------ +// fdpage_list_t utility +//------------------------------------------------ +// Inline function for repeated processing +inline void raw_add_compress_fdpage_list(fdpage_list_t& pagelist, const fdpage& orgpage, bool ignore_load, bool ignore_modify, bool default_load, bool default_modify) +{ + if(0 < orgpage.bytes){ + // [NOTE] + // The page variable is subject to change here. + // + fdpage page = orgpage; + + if(ignore_load){ + page.loaded = default_load; + } + if(ignore_modify){ + page.modified = default_modify; + } + pagelist.push_back(page); + } +} + +// Compress the page list +// +// ignore_load: Ignore the flag of loaded member and compress +// ignore_modify: Ignore the flag of modified member and compress +// default_load: loaded flag value in the list after compression when ignore_load=true +// default_modify: modified flag value in the list after compression when default_modify=true +// +// NOTE: ignore_modify and ignore_load cannot both be true. +// Zero size pages will be deleted. However, if the page information is the only one, +// it will be left behind. This is what you need to do to create a new empty file. +// +static void raw_compress_fdpage_list(const fdpage_list_t& pages, fdpage_list_t& compressed_pages, bool ignore_load, bool ignore_modify, bool default_load, bool default_modify) +{ + compressed_pages.clear(); + + fdpage* lastpage = nullptr; + fdpage_list_t::iterator add_iter; + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(0 == iter->bytes){ + continue; + } + if(!lastpage){ + // First item + raw_add_compress_fdpage_list(compressed_pages, (*iter), ignore_load, ignore_modify, default_load, default_modify); + lastpage = &(compressed_pages.back()); + }else{ + // check page continuity + if(lastpage->next() != iter->offset){ + // Non-consecutive with last page, so add a page filled with default values + if( (!ignore_load && (lastpage->loaded != false)) || + (!ignore_modify && (lastpage->modified != false)) ) + { + // add new page + fdpage tmppage(lastpage->next(), (iter->offset - lastpage->next()), false, false); + raw_add_compress_fdpage_list(compressed_pages, tmppage, ignore_load, ignore_modify, default_load, default_modify); + + add_iter = compressed_pages.end(); + --add_iter; + lastpage = &(*add_iter); + }else{ + // Expand last area + lastpage->bytes = iter->offset - lastpage->offset; + } + } + + // add current page + if( (!ignore_load && (lastpage->loaded != iter->loaded )) || + (!ignore_modify && (lastpage->modified != iter->modified)) ) + { + // Add new page + raw_add_compress_fdpage_list(compressed_pages, (*iter), ignore_load, ignore_modify, default_load, default_modify); + + add_iter = compressed_pages.end(); + --add_iter; + lastpage = &(*add_iter); + }else{ + // Expand last area + lastpage->bytes += iter->bytes; + } + } + } +} + +static void compress_fdpage_list_ignore_modify(const fdpage_list_t& pages, fdpage_list_t& compressed_pages, bool default_modify) +{ + raw_compress_fdpage_list(pages, compressed_pages, /* ignore_load= */ false, /* ignore_modify= */ true, /* default_load= */false, /* default_modify= */default_modify); +} + +static void compress_fdpage_list_ignore_load(const fdpage_list_t& pages, fdpage_list_t& compressed_pages, bool default_load) +{ + raw_compress_fdpage_list(pages, compressed_pages, /* ignore_load= */ true, /* ignore_modify= */ false, /* default_load= */default_load, /* default_modify= */false); +} + +static void compress_fdpage_list(const fdpage_list_t& pages, fdpage_list_t& compressed_pages) +{ + raw_compress_fdpage_list(pages, compressed_pages, /* ignore_load= */ false, /* ignore_modify= */ false, /* default_load= */false, /* default_modify= */false); +} + +static fdpage_list_t parse_partsize_fdpage_list(const fdpage_list_t& pages, off_t max_partsize) +{ + fdpage_list_t parsed_pages; + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(iter->modified){ + // modified page + fdpage tmppage = *iter; + for(off_t start = iter->offset, rest_bytes = iter->bytes; 0 < rest_bytes; ){ + if((max_partsize * 2) < rest_bytes){ + // do parse + tmppage.offset = start; + tmppage.bytes = max_partsize; + parsed_pages.push_back(tmppage); + + start += max_partsize; + rest_bytes -= max_partsize; + }else{ + // Since the number of remaining bytes is less than twice max_partsize, + // one of the divided areas will be smaller than max_partsize. + // Therefore, this area at the end should not be divided. + tmppage.offset = start; + tmppage.bytes = rest_bytes; + parsed_pages.push_back(tmppage); + + start += rest_bytes; + rest_bytes = 0; + } + } + }else{ + // not modified page is not parsed + parsed_pages.push_back(*iter); + } + } + return parsed_pages; +} + +//------------------------------------------------ +// PageList class methods +//------------------------------------------------ +// +// Examine and return the status of each block in the file. +// +// Assuming the file is a sparse file, check the HOLE and DATA areas +// and return it in fdpage_list_t. The loaded flag of each fdpage is +// set to false for HOLE blocks and true for DATA blocks. +// +bool PageList::GetSparseFilePages(int fd, size_t file_size, fdpage_list_t& sparse_list) +{ + // [NOTE] + // Express the status of the cache file using fdpage_list_t. + // There is a hole in the cache file(sparse file), and the + // state of this hole is expressed by the "loaded" member of + // struct fdpage. (the "modified" member is not used) + // + if(0 == file_size){ + // file is empty + return true; + } + + bool is_hole = false; + off_t hole_pos = lseek(fd, 0, SEEK_HOLE); + off_t data_pos = lseek(fd, 0, SEEK_DATA); + if(-1 == hole_pos && -1 == data_pos){ + S3FS_PRN_ERR("Could not find the first position both HOLE and DATA in the file(physical_fd=%d).", fd); + return false; + }else if(-1 == hole_pos){ + is_hole = false; + }else if(-1 == data_pos){ + is_hole = true; + }else if(hole_pos < data_pos){ + is_hole = true; + }else{ + is_hole = false; + } + + for(off_t cur_pos = 0, next_pos = 0; 0 <= cur_pos; cur_pos = next_pos, is_hole = !is_hole){ + fdpage page; + page.offset = cur_pos; + page.loaded = !is_hole; + page.modified = false; + + next_pos = lseek(fd, cur_pos, (is_hole ? SEEK_DATA : SEEK_HOLE)); + if(-1 == next_pos){ + page.bytes = static_cast(file_size - cur_pos); + }else{ + page.bytes = next_pos - cur_pos; + } + sparse_list.push_back(page); + } + return true; +} + +// +// Confirm that the specified area is ZERO +// +bool PageList::CheckZeroAreaInFile(int fd, off_t start, size_t bytes) +{ + std::unique_ptr readbuff(new char[CHECK_CACHEFILE_PART_SIZE]); + + for(size_t comp_bytes = 0, check_bytes = 0; comp_bytes < bytes; comp_bytes += check_bytes){ + if(CHECK_CACHEFILE_PART_SIZE < (bytes - comp_bytes)){ + check_bytes = CHECK_CACHEFILE_PART_SIZE; + }else{ + check_bytes = bytes - comp_bytes; + } + bool found_bad_data = false; + ssize_t read_bytes; + if(-1 == (read_bytes = pread(fd, readbuff.get(), check_bytes, (start + comp_bytes)))){ + S3FS_PRN_ERR("Something error is occurred in reading %zu bytes at %lld from file(physical_fd=%d).", check_bytes, static_cast(start + comp_bytes), fd); + found_bad_data = true; + }else{ + check_bytes = static_cast(read_bytes); + for(size_t tmppos = 0; tmppos < check_bytes; ++tmppos){ + if('\0' != readbuff[tmppos]){ + // found not ZERO data. + found_bad_data = true; + break; + } + } + } + if(found_bad_data){ + return false; + } + } + return true; +} + +// +// Checks that the specified area matches the state of the sparse file. +// +// [Parameters] +// checkpage: This is one state of the cache file, it is loaded from the stats file. +// sparse_list: This is a list of the results of directly checking the cache file status(HOLE/DATA). +// In the HOLE area, the "loaded" flag of fdpage is false. The DATA area has it set to true. +// fd: opened file discriptor to target cache file. +// +bool PageList::CheckAreaInSparseFile(const struct fdpage& checkpage, const fdpage_list_t& sparse_list, int fd, fdpage_list_t& err_area_list, fdpage_list_t& warn_area_list) +{ + // Check the block status of a part(Check Area: checkpage) of the target file. + // The elements of sparse_list have 5 patterns that overlap this block area. + // + // File |<---...--------------------------------------...--->| + // Check Area (offset)<-------------------->(offset + bytes - 1) + // Area case(0) <-------> + // Area case(1) <-------> + // Area case(2) <--------> + // Area case(3) <----------> + // Area case(4) <-----------> + // Area case(5) <-----------------------------> + // + bool result = true; + + for(fdpage_list_t::const_iterator iter = sparse_list.begin(); iter != sparse_list.end(); ++iter){ + off_t check_start = 0; + off_t check_bytes = 0; + if((iter->offset + iter->bytes) <= checkpage.offset){ + // case 0 + continue; // next + + }else if((checkpage.offset + checkpage.bytes) <= iter->offset){ + // case 1 + break; // finish + + }else if(iter->offset < checkpage.offset && (iter->offset + iter->bytes) < (checkpage.offset + checkpage.bytes)){ + // case 2 + check_start = checkpage.offset; + check_bytes = iter->bytes - (checkpage.offset - iter->offset); + + }else if((checkpage.offset + checkpage.bytes) < (iter->offset + iter->bytes)){ // here, already "iter->offset < (checkpage.offset + checkpage.bytes)" is true. + // case 3 + check_start = iter->offset; + check_bytes = checkpage.bytes - (iter->offset - checkpage.offset); + + }else if(checkpage.offset < iter->offset && (iter->offset + iter->bytes) < (checkpage.offset + checkpage.bytes)){ + // case 4 + check_start = iter->offset; + check_bytes = iter->bytes; + + }else{ // (iter->offset <= checkpage.offset && (checkpage.offset + checkpage.bytes) <= (iter->offset + iter->bytes)) + // case 5 + check_start = checkpage.offset; + check_bytes = checkpage.bytes; + } + + // check target area type + if(checkpage.loaded || checkpage.modified){ + // target area must be not HOLE(DATA) area. + if(!iter->loaded){ + // Found bad area, it is HOLE area. + fdpage page(check_start, check_bytes, false, false); + err_area_list.push_back(page); + result = false; + } + }else{ + // target area should be HOLE area.(If it is not a block boundary, it may be a DATA area.) + if(iter->loaded){ + // need to check this area's each data, it should be ZERO. + if(!PageList::CheckZeroAreaInFile(fd, check_start, static_cast(check_bytes))){ + // Discovered an area that has un-initial status data but it probably does not effect bad. + fdpage page(check_start, check_bytes, true, false); + warn_area_list.push_back(page); + result = false; + } + } + } + } + return result; +} + +//------------------------------------------------ +// PageList methods +//------------------------------------------------ +void PageList::FreeList(fdpage_list_t& list) +{ + list.clear(); +} + +PageList::PageList(off_t size, bool is_loaded, bool is_modified, bool shrinked) : is_shrink(shrinked) +{ + Init(size, is_loaded, is_modified); +} + +PageList::~PageList() +{ + Clear(); +} + +void PageList::Clear() +{ + PageList::FreeList(pages); + is_shrink = false; +} + +bool PageList::Init(off_t size, bool is_loaded, bool is_modified) +{ + Clear(); + if(0 <= size){ + fdpage page(0, size, is_loaded, is_modified); + pages.push_back(page); + } + return true; +} + +off_t PageList::Size() const +{ + if(pages.empty()){ + return 0; + } + fdpage_list_t::const_reverse_iterator riter = pages.rbegin(); + return riter->next(); +} + +bool PageList::Compress() +{ + fdpage* lastpage = nullptr; + for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ){ + if(!lastpage){ + // First item + lastpage = &(*iter); + ++iter; + }else{ + // check page continuity + if(lastpage->next() != iter->offset){ + // Non-consecutive with last page, so add a page filled with default values + if(lastpage->loaded || lastpage->modified){ + // insert new page before current pos + fdpage tmppage(lastpage->next(), (iter->offset - lastpage->next()), false, false); + iter = pages.insert(iter, tmppage); + lastpage = &(*iter); + ++iter; + }else{ + // Expand last area + lastpage->bytes = iter->offset - lastpage->offset; + } + } + // check current page + if(lastpage->loaded == iter->loaded && lastpage->modified == iter->modified){ + // Expand last area and remove current pos + lastpage->bytes += iter->bytes; + iter = pages.erase(iter); + }else{ + lastpage = &(*iter); + ++iter; + } + } + } + return true; +} + +bool PageList::Parse(off_t new_pos) +{ + for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(new_pos == iter->offset){ + // nothing to do + return true; + }else if(iter->offset < new_pos && new_pos < iter->next()){ + fdpage page(iter->offset, new_pos - iter->offset, iter->loaded, iter->modified); + iter->bytes -= (new_pos - iter->offset); + iter->offset = new_pos; + pages.insert(iter, page); + return true; + } + } + return false; +} + +bool PageList::Resize(off_t size, bool is_loaded, bool is_modified) +{ + off_t total = Size(); + + if(0 == total){ + // [NOTE] + // The is_shrink flag remains unchanged in this function. + // + bool backup_is_shrink = is_shrink; + + Init(size, is_loaded, is_modified); + is_shrink = backup_is_shrink; + + }else if(total < size){ + // add new area + fdpage page(total, (size - total), is_loaded, is_modified); + pages.push_back(page); + + }else if(size < total){ + // cut area + for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ){ + if(iter->next() <= size){ + ++iter; + }else{ + if(size <= iter->offset){ + iter = pages.erase(iter); + }else{ + iter->bytes = size - iter->offset; + } + } + } + if(is_modified){ + is_shrink = true; + } + }else{ // total == size + // nothing to do + } + // compress area + return Compress(); +} + +bool PageList::IsPageLoaded(off_t start, off_t size) const +{ + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(iter->end() < start){ + continue; + } + if(!iter->loaded){ + return false; + } + if(0 != size && start + size <= iter->next()){ + break; + } + } + return true; +} + +bool PageList::SetPageLoadedStatus(off_t start, off_t size, PageList::page_status pstatus, bool is_compress) +{ + off_t now_size = Size(); + bool is_loaded = (page_status::LOAD_MODIFIED == pstatus || page_status::LOADED == pstatus); + bool is_modified = (page_status::LOAD_MODIFIED == pstatus || page_status::MODIFIED == pstatus); + + if(now_size <= start){ + if(now_size < start){ + // add + Resize(start, false, is_modified); // set modified flag from now end pos to specified start pos. + } + Resize(start + size, is_loaded, is_modified); + + }else if(now_size <= start + size){ + // cut + Resize(start, false, false); // not changed loaded/modified flags in existing area. + // add + Resize(start + size, is_loaded, is_modified); + + }else{ + // start-size are inner pages area + // parse "start", and "start + size" position + Parse(start); + Parse(start + size); + + // set loaded flag + for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(iter->end() < start){ + continue; + }else if(start + size <= iter->offset){ + break; + }else{ + iter->loaded = is_loaded; + iter->modified = is_modified; + } + } + } + // compress area + return (is_compress ? Compress() : true); +} + +bool PageList::FindUnloadedPage(off_t start, off_t& resstart, off_t& ressize) const +{ + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(start <= iter->end()){ + if(!iter->loaded && !iter->modified){ // Do not load unloaded and modified areas + resstart = iter->offset; + ressize = iter->bytes; + return true; + } + } + } + return false; +} + +// [NOTE] +// Accumulates the range of unload that is smaller than the Limit size. +// If you want to integrate all unload ranges, set the limit size to 0. +// +off_t PageList::GetTotalUnloadedPageSize(off_t start, off_t size, off_t limit_size) const +{ + // If size is 0, it means loading to end. + if(0 == size){ + if(start < Size()){ + size = Size() - start; + } + } + off_t next = start + size; + off_t restsize = 0; + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(iter->next() <= start){ + continue; + } + if(next <= iter->offset){ + break; + } + if(iter->loaded || iter->modified){ + continue; + } + off_t tmpsize; + if(iter->offset <= start){ + if(iter->next() <= next){ + tmpsize = (iter->next() - start); + }else{ + tmpsize = next - start; // = size + } + }else{ + if(iter->next() <= next){ + tmpsize = iter->next() - iter->offset; // = iter->bytes + }else{ + tmpsize = next - iter->offset; + } + } + if(0 == limit_size || tmpsize < limit_size){ + restsize += tmpsize; + } + } + return restsize; +} + +size_t PageList::GetUnloadedPages(fdpage_list_t& unloaded_list, off_t start, off_t size) const +{ + // If size is 0, it means loading to end. + if(0 == size){ + if(start < Size()){ + size = Size() - start; + } + } + off_t next = start + size; + + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(iter->next() <= start){ + continue; + } + if(next <= iter->offset){ + break; + } + if(iter->loaded || iter->modified){ + continue; // already loaded or modified + } + + // page area + off_t page_start = std::max(iter->offset, start); + off_t page_next = std::min(iter->next(), next); + off_t page_size = page_next - page_start; + + // add list + fdpage_list_t::reverse_iterator riter = unloaded_list.rbegin(); + if(riter != unloaded_list.rend() && riter->next() == page_start){ + // merge to before page + riter->bytes += page_size; + }else{ + fdpage page(page_start, page_size, false, false); + unloaded_list.push_back(page); + } + } + return unloaded_list.size(); +} + +// [NOTE] +// This method is called in advance when mixing POST and COPY in multi-part upload. +// The minimum size of each part must be 5 MB, and the data area below this must be +// downloaded from S3. +// This method checks the current PageList status and returns the area that needs +// to be downloaded so that each part is at least 5 MB. +// +bool PageList::GetPageListsForMultipartUpload(fdpage_list_t& dlpages, fdpage_list_t& mixuppages, off_t max_partsize) +{ + // compress before this processing + Compress(); // always true + + // make a list by modified flag + fdpage_list_t modified_pages; + fdpage_list_t download_pages; // A non-contiguous page list showing the areas that need to be downloaded + fdpage_list_t mixupload_pages; // A continuous page list showing only modified flags for mixupload + compress_fdpage_list_ignore_load(pages, modified_pages, false); + + fdpage prev_page; + for(fdpage_list_t::const_iterator iter = modified_pages.begin(); iter != modified_pages.end(); ++iter){ + if(iter->modified){ + // current is modified area + if(!prev_page.modified){ + // previous is not modified area + if(prev_page.bytes < MIN_MULTIPART_SIZE){ + // previous(not modified) area is too small for one multipart size, + // then all of previous area is needed to download. + download_pages.push_back(prev_page); + + // previous(not modified) area is set upload area. + prev_page.modified = true; + mixupload_pages.push_back(prev_page); + }else{ + // previous(not modified) area is set copy area. + prev_page.modified = false; + mixupload_pages.push_back(prev_page); + } + // set current to previous + prev_page = *iter; + }else{ + // previous is modified area, too + prev_page.bytes += iter->bytes; + } + + }else{ + // current is not modified area + if(!prev_page.modified){ + // previous is not modified area, too + prev_page.bytes += iter->bytes; + + }else{ + // previous is modified area + if(prev_page.bytes < MIN_MULTIPART_SIZE){ + // previous(modified) area is too small for one multipart size, + // then part or all of current area is needed to download. + off_t missing_bytes = MIN_MULTIPART_SIZE - prev_page.bytes; + + if((missing_bytes + MIN_MULTIPART_SIZE) < iter-> bytes){ + // The current size is larger than the missing size, and the remainder + // after deducting the missing size is larger than the minimum size. + + fdpage missing_page(iter->offset, missing_bytes, false, false); + download_pages.push_back(missing_page); + + // previous(not modified) area is set upload area. + prev_page.bytes = MIN_MULTIPART_SIZE; + mixupload_pages.push_back(prev_page); + + // set current to previous + prev_page = *iter; + prev_page.offset += missing_bytes; + prev_page.bytes -= missing_bytes; + + }else{ + // The current size is less than the missing size, or the remaining + // size less the missing size is less than the minimum size. + download_pages.push_back(*iter); + + // add current to previous + prev_page.bytes += iter->bytes; + } + + }else{ + // previous(modified) area is enough size for one multipart size. + mixupload_pages.push_back(prev_page); + + // set current to previous + prev_page = *iter; + } + } + } + } + // last area + if(0 < prev_page.bytes){ + mixupload_pages.push_back(prev_page); + } + + // compress + compress_fdpage_list_ignore_modify(download_pages, dlpages, false); + compress_fdpage_list_ignore_load(mixupload_pages, mixuppages, false); + + // parse by max pagesize + dlpages = parse_partsize_fdpage_list(dlpages, max_partsize); + mixuppages = parse_partsize_fdpage_list(mixuppages, max_partsize); + + return true; +} + +bool PageList::GetNoDataPageLists(fdpage_list_t& nodata_pages, off_t start, size_t size) +{ + // compress before this processing + Compress(); // always true + + // extract areas without data + fdpage_list_t tmp_pagelist; + off_t stop_pos = (0L == size ? -1 : (start + size)); + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if((iter->offset + iter->bytes) < start){ + continue; + } + if(-1 != stop_pos && stop_pos <= iter->offset){ + break; + } + if(iter->modified){ + continue; + } + + fdpage tmppage; + tmppage.offset = std::max(iter->offset, start); + tmppage.bytes = (-1 == stop_pos ? iter->bytes : std::min(iter->bytes, (stop_pos - tmppage.offset))); + tmppage.loaded = iter->loaded; + tmppage.modified = iter->modified; + + tmp_pagelist.push_back(tmppage); + } + + if(tmp_pagelist.empty()){ + nodata_pages.clear(); + }else{ + // compress + compress_fdpage_list(tmp_pagelist, nodata_pages); + } + return true; +} + +off_t PageList::BytesModified() const +{ + off_t total = 0; + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(iter->modified){ + total += iter->bytes; + } + } + return total; +} + +bool PageList::IsModified() const +{ + if(is_shrink){ + return true; + } + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(iter->modified){ + return true; + } + } + return false; +} + +bool PageList::ClearAllModified() +{ + is_shrink = false; + + for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(iter->modified){ + iter->modified = false; + } + } + return Compress(); +} + +bool PageList::Serialize(CacheFileStat& file, bool is_output, ino_t inode) +{ + if(!file.Open()){ + return false; + } + if(is_output){ + // + // put to file + // + std::ostringstream ssall; + ssall << inode << ":" << Size(); + + for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){ + ssall << "\n" << iter->offset << ":" << iter->bytes << ":" << (iter->loaded ? "1" : "0") << ":" << (iter->modified ? "1" : "0"); + } + + if(-1 == ftruncate(file.GetFd(), 0)){ + S3FS_PRN_ERR("failed to truncate file(to 0) for stats(%d)", errno); + return false; + } + std::string strall = ssall.str(); + if(0 >= pwrite(file.GetFd(), strall.c_str(), strall.length(), 0)){ + S3FS_PRN_ERR("failed to write stats(%d)", errno); + return false; + } + + }else{ + // + // loading from file + // + struct stat st; + memset(&st, 0, sizeof(struct stat)); + if(-1 == fstat(file.GetFd(), &st)){ + S3FS_PRN_ERR("fstat is failed. errno(%d)", errno); + return false; + } + if(0 >= st.st_size){ + // nothing + Init(0, false, false); + return true; + } + std::unique_ptr ptmp(new char[st.st_size + 1]); + ssize_t result; + // read from file + if(0 >= (result = pread(file.GetFd(), ptmp.get(), st.st_size, 0))){ + S3FS_PRN_ERR("failed to read stats(%d)", errno); + return false; + } + ptmp[result] = '\0'; + std::string oneline; + std::istringstream ssall(ptmp.get()); + + // loaded + Clear(); + + // load head line(for size and inode) + off_t total; + ino_t cache_inode; // if this value is 0, it means old format. + if(!getline(ssall, oneline, '\n')){ + S3FS_PRN_ERR("failed to parse stats."); + return false; + }else{ + std::istringstream sshead(oneline); + std::string strhead1; + std::string strhead2; + + // get first part in head line. + if(!getline(sshead, strhead1, ':')){ + S3FS_PRN_ERR("failed to parse stats."); + return false; + } + // get second part in head line. + if(!getline(sshead, strhead2, ':')){ + // old head format is "\n" + total = cvt_strtoofft(strhead1.c_str(), /* base= */10); + cache_inode = 0; + }else{ + // current head format is ":\n" + total = cvt_strtoofft(strhead2.c_str(), /* base= */10); + cache_inode = static_cast(cvt_strtoofft(strhead1.c_str(), /* base= */10)); + if(0 == cache_inode){ + S3FS_PRN_ERR("wrong inode number in parsed cache stats."); + return false; + } + } + } + // check inode number + if(0 != cache_inode && cache_inode != inode){ + S3FS_PRN_ERR("differ inode and inode number in parsed cache stats."); + return false; + } + + // load each part + bool is_err = false; + while(getline(ssall, oneline, '\n')){ + std::string part; + std::istringstream ssparts(oneline); + // offset + if(!getline(ssparts, part, ':')){ + is_err = true; + break; + } + off_t offset = cvt_strtoofft(part.c_str(), /* base= */10); + // size + if(!getline(ssparts, part, ':')){ + is_err = true; + break; + } + off_t size = cvt_strtoofft(part.c_str(), /* base= */10); + // loaded + if(!getline(ssparts, part, ':')){ + is_err = true; + break; + } + bool is_loaded = (1 == cvt_strtoofft(part.c_str(), /* base= */10) ? true : false); + bool is_modified; + if(!getline(ssparts, part, ':')){ + is_modified = false; // old version does not have this part. + }else{ + is_modified = (1 == cvt_strtoofft(part.c_str(), /* base= */10) ? true : false); + } + // add new area + PageList::page_status pstatus = PageList::page_status::NOT_LOAD_MODIFIED; + if(is_loaded){ + if(is_modified){ + pstatus = PageList::page_status::LOAD_MODIFIED; + }else{ + pstatus = PageList::page_status::LOADED; + } + }else{ + if(is_modified){ + pstatus = PageList::page_status::MODIFIED; + } + } + SetPageLoadedStatus(offset, size, pstatus); + } + if(is_err){ + S3FS_PRN_ERR("failed to parse stats."); + Clear(); + return false; + } + + // check size + if(total != Size()){ + S3FS_PRN_ERR("different size(%lld - %lld).", static_cast(total), static_cast(Size())); + Clear(); + return false; + } + } + return true; +} + +void PageList::Dump() const +{ + int cnt = 0; + + S3FS_PRN_DBG("pages (shrinked=%s) = {", (is_shrink ? "yes" : "no")); + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter, ++cnt){ + S3FS_PRN_DBG(" [%08d] -> {%014lld - %014lld : %s / %s}", cnt, static_cast(iter->offset), static_cast(iter->bytes), iter->loaded ? "loaded" : "unloaded", iter->modified ? "modified" : "not modified"); + } + S3FS_PRN_DBG("}"); +} + +// +// Compare the fdpage_list_t pages of the object with the state of the file. +// +// The loaded=true or modified=true area of pages must be a DATA block +// (not a HOLE block) in the file. +// The other area is a HOLE block in the file or is a DATA block(but the +// data of the target area in that block should be ZERO). +// If it is a bad area in the previous case, it will be reported as an error. +// If the latter case does not match, it will be reported as a warning. +// +bool PageList::CompareSparseFile(int fd, size_t file_size, fdpage_list_t& err_area_list, fdpage_list_t& warn_area_list) +{ + err_area_list.clear(); + warn_area_list.clear(); + + // First, list the block disk allocation area of the cache file. + // The cache file has holes(sparse file) and no disk block areas + // are assigned to any holes. + fdpage_list_t sparse_list; + if(!PageList::GetSparseFilePages(fd, file_size, sparse_list)){ + S3FS_PRN_ERR("Something error is occurred in parsing hole/data of the cache file(physical_fd=%d).", fd); + + fdpage page(0, static_cast(file_size), false, false); + err_area_list.push_back(page); + + return false; + } + + if(sparse_list.empty() && pages.empty()){ + // both file and stats information are empty, it means cache file size is ZERO. + return true; + } + + // Compare each pages and sparse_list + bool result = true; + for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ + if(!PageList::CheckAreaInSparseFile(*iter, sparse_list, fd, err_area_list, warn_area_list)){ + result = false; + } + } + return result; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_page.h b/s3fs/fdcache_page.h new file mode 100644 index 0000000..f4ef8c2 --- /dev/null +++ b/s3fs/fdcache_page.h @@ -0,0 +1,136 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_FDCACHE_PAGE_H_ +#define S3FS_FDCACHE_PAGE_H_ + +#include +#include + +//------------------------------------------------ +// Symbols +//------------------------------------------------ +// [NOTE] +// If the following symbols in lseek whence are undefined, define them. +// If it is not supported by lseek, s3fs judges by the processing result of lseek. +// +#ifndef SEEK_DATA +#define SEEK_DATA 3 +#endif +#ifndef SEEK_HOLE +#define SEEK_HOLE 4 +#endif + +//------------------------------------------------ +// Structure fdpage +//------------------------------------------------ +// page block information +struct fdpage +{ + off_t offset; + off_t bytes; + bool loaded; + bool modified; + + explicit fdpage(off_t start = 0, off_t size = 0, bool is_loaded = false, bool is_modified = false) : + offset(start), bytes(size), loaded(is_loaded), modified(is_modified) {} + + off_t next() const + { + return (offset + bytes); + } + off_t end() const + { + return (0 < bytes ? offset + bytes - 1 : 0); + } +}; +typedef std::vector fdpage_list_t; + +//------------------------------------------------ +// Class PageList +//------------------------------------------------ +class CacheFileStat; +class FdEntity; + +// cppcheck-suppress copyCtorAndEqOperator +class PageList +{ + friend class FdEntity; // only one method access directly pages. + + private: + fdpage_list_t pages; + bool is_shrink; // [NOTE] true if it has been shrinked even once + + public: + enum class page_status{ + NOT_LOAD_MODIFIED = 0, + LOADED, + MODIFIED, + LOAD_MODIFIED + }; + + private: + static bool GetSparseFilePages(int fd, size_t file_size, fdpage_list_t& sparse_list); + static bool CheckZeroAreaInFile(int fd, off_t start, size_t bytes); + static bool CheckAreaInSparseFile(const struct fdpage& checkpage, const fdpage_list_t& sparse_list, int fd, fdpage_list_t& err_area_list, fdpage_list_t& warn_area_list); + + void Clear(); + bool Parse(off_t new_pos); + + public: + static void FreeList(fdpage_list_t& list); + + explicit PageList(off_t size = 0, bool is_loaded = false, bool is_modified = false, bool shrinked = false); + PageList(const PageList&) = delete; + PageList& operator=(const PageList&) = delete; + ~PageList(); + + bool Init(off_t size, bool is_loaded, bool is_modified); + off_t Size() const; + bool Resize(off_t size, bool is_loaded, bool is_modified); + + bool IsPageLoaded(off_t start = 0, off_t size = 0) const; // size=0 is checking to end of list + bool SetPageLoadedStatus(off_t start, off_t size, PageList::page_status pstatus = page_status::LOADED, bool is_compress = true); + bool FindUnloadedPage(off_t start, off_t& resstart, off_t& ressize) const; + off_t GetTotalUnloadedPageSize(off_t start = 0, off_t size = 0, off_t limit_size = 0) const; // size=0 is checking to end of list + size_t GetUnloadedPages(fdpage_list_t& unloaded_list, off_t start = 0, off_t size = 0) const; // size=0 is checking to end of list + bool GetPageListsForMultipartUpload(fdpage_list_t& dlpages, fdpage_list_t& mixuppages, off_t max_partsize); + bool GetNoDataPageLists(fdpage_list_t& nodata_pages, off_t start = 0, size_t size = 0); + + off_t BytesModified() const; + bool IsModified() const; + bool ClearAllModified(); + + bool Compress(); + bool Serialize(CacheFileStat& file, bool is_output, ino_t inode); + void Dump() const; + bool CompareSparseFile(int fd, size_t file_size, fdpage_list_t& err_area_list, fdpage_list_t& warn_area_list); +}; + +#endif // S3FS_FDCACHE_PAGE_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_pseudofd.cpp b/s3fs/fdcache_pseudofd.cpp new file mode 100644 index 0000000..711cada --- /dev/null +++ b/s3fs/fdcache_pseudofd.cpp @@ -0,0 +1,133 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include + +#include "s3fs_logger.h" +#include "fdcache_pseudofd.h" +#include "autolock.h" + +//------------------------------------------------ +// Symbols +//------------------------------------------------ +// [NOTE] +// The minimum pseudo fd value starts 2. +// This is to avoid mistakes for 0(stdout) and 1(stderr), which are usually used. +// +static constexpr int MIN_PSEUDOFD_NUMBER = 2; + +//------------------------------------------------ +// PseudoFdManager class methods +//------------------------------------------------ +PseudoFdManager& PseudoFdManager::GetManager() +{ + static PseudoFdManager singleton; + return singleton; +} + +int PseudoFdManager::Get() +{ + return (PseudoFdManager::GetManager()).CreatePseudoFd(); +} + +bool PseudoFdManager::Release(int fd) +{ + return (PseudoFdManager::GetManager()).ReleasePseudoFd(fd); +} + +//------------------------------------------------ +// PseudoFdManager methods +//------------------------------------------------ +PseudoFdManager::PseudoFdManager() : is_lock_init(false) +{ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + int result; + if(0 != (result = pthread_mutex_init(&pseudofd_list_lock, &attr))){ + S3FS_PRN_CRIT("failed to init pseudofd_list_lock: %d", result); + abort(); + } + is_lock_init = true; +} + +PseudoFdManager::~PseudoFdManager() +{ + if(is_lock_init){ + int result; + if(0 != (result = pthread_mutex_destroy(&pseudofd_list_lock))){ + S3FS_PRN_CRIT("failed to destroy pseudofd_list_lock: %d", result); + abort(); + } + is_lock_init = false; + } +} + +int PseudoFdManager::GetUnusedMinPseudoFd() const +{ + int min_fd = MIN_PSEUDOFD_NUMBER; + + // Look for the first discontinuous value. + for(pseudofd_list_t::const_iterator iter = pseudofd_list.begin(); iter != pseudofd_list.end(); ++iter){ + if(min_fd == (*iter)){ + ++min_fd; + }else if(min_fd < (*iter)){ + break; + } + } + return min_fd; +} + +int PseudoFdManager::CreatePseudoFd() +{ + AutoLock auto_lock(&pseudofd_list_lock); + + int new_fd = PseudoFdManager::GetUnusedMinPseudoFd(); + pseudofd_list.push_back(new_fd); + std::sort(pseudofd_list.begin(), pseudofd_list.end()); + + return new_fd; +} + +bool PseudoFdManager::ReleasePseudoFd(int fd) +{ + AutoLock auto_lock(&pseudofd_list_lock); + + for(pseudofd_list_t::iterator iter = pseudofd_list.begin(); iter != pseudofd_list.end(); ++iter){ + if(fd == (*iter)){ + pseudofd_list.erase(iter); + return true; + } + } + return false; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_pseudofd.h b/s3fs/fdcache_pseudofd.h new file mode 100644 index 0000000..1025264 --- /dev/null +++ b/s3fs/fdcache_pseudofd.h @@ -0,0 +1,71 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_FDCACHE_PSEUDOFD_H_ +#define S3FS_FDCACHE_PSEUDOFD_H_ + +#include + +//------------------------------------------------ +// Typdefs +//------------------------------------------------ +// List of pseudo fd in use +// +typedef std::vector pseudofd_list_t; + +//------------------------------------------------ +// Class PseudoFdManager +//------------------------------------------------ +class PseudoFdManager +{ + private: + pseudofd_list_t pseudofd_list; + bool is_lock_init; + pthread_mutex_t pseudofd_list_lock; // protects pseudofd_list + + private: + static PseudoFdManager& GetManager(); + + PseudoFdManager(); + ~PseudoFdManager(); + PseudoFdManager(const PseudoFdManager&) = delete; + PseudoFdManager(PseudoFdManager&&) = delete; + PseudoFdManager& operator=(const PseudoFdManager&) = delete; + PseudoFdManager& operator=(PseudoFdManager&&) = delete; + + int GetUnusedMinPseudoFd() const; + int CreatePseudoFd(); + bool ReleasePseudoFd(int fd); + + public: + static int Get(); + static bool Release(int fd); +}; + +#endif // S3FS_FDCACHE_PSEUDOFD_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_stat.cpp b/s3fs/fdcache_stat.cpp new file mode 100644 index 0000000..c337409 --- /dev/null +++ b/s3fs/fdcache_stat.cpp @@ -0,0 +1,282 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include + +#include "s3fs_logger.h" +#include "fdcache_stat.h" +#include "fdcache.h" +#include "s3fs_util.h" +#include "s3fs_cred.h" +#include "string_util.h" + +//------------------------------------------------ +// CacheFileStat class methods +//------------------------------------------------ +std::string CacheFileStat::GetCacheFileStatTopDir() +{ + std::string top_path; + if(!FdManager::IsCacheDir() || S3fsCred::GetBucket().empty()){ + return top_path; + } + + // stat top dir( "//..stat" ) + top_path += FdManager::GetCacheDir(); + top_path += "/."; + top_path += S3fsCred::GetBucket(); + top_path += ".stat"; + return top_path; +} + +int CacheFileStat::MakeCacheFileStatPath(const char* path, std::string& sfile_path, bool is_create_dir) +{ + std::string top_path = CacheFileStat::GetCacheFileStatTopDir(); + if(top_path.empty()){ + S3FS_PRN_ERR("The path to cache top dir is empty."); + return -EIO; + } + + if(is_create_dir){ + int result; + if(0 != (result = mkdirp(top_path + mydirname(path), 0777))){ + S3FS_PRN_ERR("failed to create dir(%s) by errno(%d).", path, result); + return result; + } + } + if(!path || '\0' == path[0]){ + sfile_path = top_path; + }else{ + sfile_path = top_path + SAFESTRPTR(path); + } + return 0; +} + +bool CacheFileStat::CheckCacheFileStatTopDir() +{ + std::string top_path = CacheFileStat::GetCacheFileStatTopDir(); + if(top_path.empty()){ + S3FS_PRN_INFO("The path to cache top dir is empty, thus not need to check permission."); + return true; + } + + return check_exist_dir_permission(top_path.c_str()); +} + +int CacheFileStat::DeleteCacheFileStat(const char* path) +{ + if(!path || '\0' == path[0]){ + return -EINVAL; + } + // stat path + std::string sfile_path; + int result; + if(0 != (result = CacheFileStat::MakeCacheFileStatPath(path, sfile_path, false))){ + S3FS_PRN_ERR("failed to create cache stat file path(%s)", path); + return result; + } + if(0 != unlink(sfile_path.c_str())){ + result = -errno; + if(-ENOENT == result){ + S3FS_PRN_DBG("failed to delete file(%s): errno=%d", path, result); + }else{ + S3FS_PRN_ERR("failed to delete file(%s): errno=%d", path, result); + } + return result; + } + return 0; +} + +// [NOTE] +// If remove stat file directory, it should do before removing +// file cache directory. +// +bool CacheFileStat::DeleteCacheFileStatDirectory() +{ + std::string top_path = CacheFileStat::GetCacheFileStatTopDir(); + if(top_path.empty()){ + S3FS_PRN_INFO("The path to cache top dir is empty, thus not need to remove it."); + return true; + } + return delete_files_in_dir(top_path.c_str(), true); +} + +bool CacheFileStat::RenameCacheFileStat(const char* oldpath, const char* newpath) +{ + if(!oldpath || '\0' == oldpath[0] || !newpath || '\0' == newpath[0]){ + return false; + } + + // stat path + std::string old_filestat; + std::string new_filestat; + if(0 != CacheFileStat::MakeCacheFileStatPath(oldpath, old_filestat, false) || 0 != CacheFileStat::MakeCacheFileStatPath(newpath, new_filestat, false)){ + return false; + } + + // check new stat path + struct stat st; + if(0 == stat(new_filestat.c_str(), &st)){ + // new stat path is existed, then unlink it. + if(-1 == unlink(new_filestat.c_str())){ + S3FS_PRN_ERR("failed to unlink new cache file stat path(%s) by errno(%d).", new_filestat.c_str(), errno); + return false; + } + } + + // check old stat path + if(0 != stat(old_filestat.c_str(), &st)){ + // old stat path is not existed, then nothing to do any more. + return true; + } + + // link and unlink + if(-1 == link(old_filestat.c_str(), new_filestat.c_str())){ + S3FS_PRN_ERR("failed to link old cache file stat path(%s) to new cache file stat path(%s) by errno(%d).", old_filestat.c_str(), new_filestat.c_str(), errno); + return false; + } + if(-1 == unlink(old_filestat.c_str())){ + S3FS_PRN_ERR("failed to unlink old cache file stat path(%s) by errno(%d).", old_filestat.c_str(), errno); + return false; + } + return true; +} + +//------------------------------------------------ +// CacheFileStat methods +//------------------------------------------------ +CacheFileStat::CacheFileStat(const char* tpath) : fd(-1) +{ + if(tpath && '\0' != tpath[0]){ + SetPath(tpath, true); + } +} + +CacheFileStat::~CacheFileStat() +{ + Release(); +} + +bool CacheFileStat::SetPath(const char* tpath, bool is_open) +{ + if(!tpath || '\0' == tpath[0]){ + return false; + } + if(!Release()){ + // could not close old stat file. + return false; + } + path = tpath; + if(!is_open){ + return true; + } + return Open(); +} + +bool CacheFileStat::RawOpen(bool readonly) +{ + if(path.empty()){ + return false; + } + if(-1 != fd){ + // already opened + return true; + } + // stat path + std::string sfile_path; + if(0 != CacheFileStat::MakeCacheFileStatPath(path.c_str(), sfile_path, true)){ + S3FS_PRN_ERR("failed to create cache stat file path(%s)", path.c_str()); + return false; + } + // open + int tmpfd; + if(readonly){ + if(-1 == (tmpfd = open(sfile_path.c_str(), O_RDONLY))){ + S3FS_PRN_ERR("failed to read only open cache stat file path(%s) - errno(%d)", path.c_str(), errno); + return false; + } + }else{ + if(-1 == (tmpfd = open(sfile_path.c_str(), O_CREAT|O_RDWR, 0600))){ + S3FS_PRN_ERR("failed to open cache stat file path(%s) - errno(%d)", path.c_str(), errno); + return false; + } + } + scope_guard guard([&]() { close(tmpfd); }); + + // lock + if(-1 == flock(tmpfd, LOCK_EX)){ + S3FS_PRN_ERR("failed to lock cache stat file(%s) - errno(%d)", path.c_str(), errno); + return false; + } + // seek top + if(0 != lseek(tmpfd, 0, SEEK_SET)){ + S3FS_PRN_ERR("failed to lseek cache stat file(%s) - errno(%d)", path.c_str(), errno); + flock(tmpfd, LOCK_UN); + return false; + } + S3FS_PRN_DBG("file locked(%s - %s)", path.c_str(), sfile_path.c_str()); + + guard.dismiss(); + fd = tmpfd; + return true; +} + +bool CacheFileStat::Open() +{ + return RawOpen(false); +} + +bool CacheFileStat::ReadOnlyOpen() +{ + return RawOpen(true); +} + +bool CacheFileStat::Release() +{ + if(-1 == fd){ + // already release + return true; + } + // unlock + if(-1 == flock(fd, LOCK_UN)){ + S3FS_PRN_ERR("failed to unlock cache stat file(%s) - errno(%d)", path.c_str(), errno); + return false; + } + S3FS_PRN_DBG("file unlocked(%s)", path.c_str()); + + if(-1 == close(fd)){ + S3FS_PRN_ERR("failed to close cache stat file(%s) - errno(%d)", path.c_str(), errno); + return false; + } + fd = -1; + + return true; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_stat.h b/s3fs/fdcache_stat.h new file mode 100644 index 0000000..3ad476b --- /dev/null +++ b/s3fs/fdcache_stat.h @@ -0,0 +1,66 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_FDCACHE_STAT_H_ +#define S3FS_FDCACHE_STAT_H_ + +#include + +//------------------------------------------------ +// CacheFileStat +//------------------------------------------------ +class CacheFileStat +{ + private: + std::string path; + int fd; + + private: + static int MakeCacheFileStatPath(const char* path, std::string& sfile_path, bool is_create_dir = true); + + bool RawOpen(bool readonly); + + public: + static std::string GetCacheFileStatTopDir(); + static int DeleteCacheFileStat(const char* path); + static bool CheckCacheFileStatTopDir(); + static bool DeleteCacheFileStatDirectory(); + static bool RenameCacheFileStat(const char* oldpath, const char* newpath); + + explicit CacheFileStat(const char* tpath = nullptr); + ~CacheFileStat(); + + bool Open(); + bool ReadOnlyOpen(); + bool Release(); + bool SetPath(const char* tpath, bool is_open = true); + int GetFd() const { return fd; } +}; + +#endif // S3FS_FDCACHE_STAT_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_untreated.cpp b/s3fs/fdcache_untreated.cpp new file mode 100644 index 0000000..dcba302 --- /dev/null +++ b/s3fs/fdcache_untreated.cpp @@ -0,0 +1,277 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include + +#include "s3fs_logger.h" +#include "fdcache_untreated.h" +#include "autolock.h" + +//------------------------------------------------ +// UntreatedParts methods +//------------------------------------------------ +UntreatedParts::UntreatedParts() : last_tag(0) //, is_lock_init(false) +{ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + + int result; + if(0 != (result = pthread_mutex_init(&untreated_list_lock, &attr))){ + S3FS_PRN_CRIT("failed to init untreated_list_lock: %d", result); + abort(); + } + is_lock_init = true; +} + +UntreatedParts::~UntreatedParts() +{ + if(is_lock_init){ + int result; + if(0 != (result = pthread_mutex_destroy(&untreated_list_lock))){ + S3FS_PRN_CRIT("failed to destroy untreated_list_lock: %d", result); + abort(); + } + is_lock_init = false; + } +} + +bool UntreatedParts::empty() +{ + AutoLock auto_lock(&untreated_list_lock); + return untreated_list.empty(); +} + +bool UntreatedParts::AddPart(off_t start, off_t size) +{ + if(start < 0 || size <= 0){ + S3FS_PRN_ERR("Paramter are wrong(start=%lld, size=%lld).", static_cast(start), static_cast(size)); + return false; + } + AutoLock auto_lock(&untreated_list_lock); + + ++last_tag; + + // Check the overlap with the existing part and add the part. + for(untreated_list_t::iterator iter = untreated_list.begin(); iter != untreated_list.end(); ++iter){ + if(iter->stretch(start, size, last_tag)){ + // the part was stretched, thus check if it overlaps with next parts + untreated_list_t::iterator niter = iter; + for(++niter; niter != untreated_list.end(); ){ + if(!iter->stretch(niter->start, niter->size, last_tag)){ + // This next part does not overlap with the current part + break; + } + // Since the parts overlap and the current part is stretched, delete this next part. + niter = untreated_list.erase(niter); + } + // success to stretch and compress existed parts + return true; + + }else if((start + size) < iter->start){ + // The part to add should be inserted before the current part. + untreated_list.insert(iter, untreatedpart(start, size, last_tag)); + // success to stretch and compress existed parts + return true; + } + } + // There are no overlapping parts in the untreated_list, then add the part at end of list + untreated_list.emplace_back(start, size, last_tag); + return true; +} + +bool UntreatedParts::RowGetPart(off_t& start, off_t& size, off_t max_size, off_t min_size, bool lastpart) const +{ + if(max_size <= 0 || min_size < 0 || max_size < min_size){ + S3FS_PRN_ERR("Paramter are wrong(max_size=%lld, min_size=%lld).", static_cast(max_size), static_cast(min_size)); + return false; + } + AutoLock auto_lock(&untreated_list_lock); + + // Check the overlap with the existing part and add the part. + for(untreated_list_t::const_iterator iter = untreated_list.begin(); iter != untreated_list.end(); ++iter){ + if(!lastpart || iter->untreated_tag == last_tag){ + if(min_size <= iter->size){ + if(iter->size <= max_size){ + // whole part( min <= part size <= max ) + start = iter->start; + size = iter->size; + }else{ + // Partially take out part( max < part size ) + start = iter->start; + size = max_size; + } + return true; + }else{ + if(lastpart){ + return false; + } + } + } + } + return false; +} + +// [NOTE] +// If size is specified as 0, all areas(parts) after start will be deleted. +// +bool UntreatedParts::ClearParts(off_t start, off_t size) +{ + if(start < 0 || size < 0){ + S3FS_PRN_ERR("Paramter are wrong(start=%lld, size=%lld).", static_cast(start), static_cast(size)); + return false; + } + AutoLock auto_lock(&untreated_list_lock); + + if(untreated_list.empty()){ + return true; + } + + // Check the overlap with the existing part. + for(untreated_list_t::iterator iter = untreated_list.begin(); iter != untreated_list.end(); ){ + if(0 != size && (start + size) <= iter->start){ + // clear area is in front of iter area, no more to do. + break; + }else if(start <= iter->start){ + if(0 != size && (start + size) <= (iter->start + iter->size)){ + // clear area overlaps with iter area(on the start side) + iter->size = (iter->start + iter->size) - (start + size); + iter->start = start + size; + if(0 == iter->size){ + iter = untreated_list.erase(iter); + } + }else{ + // clear area overlaps with all of iter area + iter = untreated_list.erase(iter); + } + }else if(start < (iter->start + iter->size)){ + // clear area overlaps with iter area(on the end side) + if(0 == size || (iter->start + iter->size) <= (start + size)){ + // start to iter->end is clear + iter->size = start - iter->start; + }else{ + // parse current part + iter->size = start - iter->start; + + // add new part + off_t next_start = start + size; + off_t next_size = (iter->start + iter->size) - (start + size); + long next_tag = iter->untreated_tag; + ++iter; + iter = untreated_list.insert(iter, untreatedpart(next_start, next_size, next_tag)); + ++iter; + } + }else{ + // clear area is in behind of iter area + ++iter; + } + } + return true; +} + +// +// Update the last updated Untreated part +// +bool UntreatedParts::GetLastUpdatePart(off_t& start, off_t& size) const +{ + AutoLock auto_lock(&untreated_list_lock); + + for(untreated_list_t::const_iterator iter = untreated_list.begin(); iter != untreated_list.end(); ++iter){ + if(iter->untreated_tag == last_tag){ + start = iter->start; + size = iter->size; + return true; + } + } + return false; +} + +// +// Replaces the last updated Untreated part. +// +// [NOTE] +// If size <= 0, delete that part +// +bool UntreatedParts::ReplaceLastUpdatePart(off_t start, off_t size) +{ + AutoLock auto_lock(&untreated_list_lock); + + for(untreated_list_t::iterator iter = untreated_list.begin(); iter != untreated_list.end(); ++iter){ + if(iter->untreated_tag == last_tag){ + if(0 < size){ + iter->start = start; + iter->size = size; + }else{ + iter = untreated_list.erase(iter); + } + return true; + } + } + return false; +} + +// +// Remove the last updated Untreated part. +// +bool UntreatedParts::RemoveLastUpdatePart() +{ + AutoLock auto_lock(&untreated_list_lock); + + for(untreated_list_t::iterator iter = untreated_list.begin(); iter != untreated_list.end(); ++iter){ + if(iter->untreated_tag == last_tag){ + untreated_list.erase(iter); + return true; + } + } + return false; +} + +// +// Duplicate the internally untreated_list. +// +bool UntreatedParts::Duplicate(untreated_list_t& list) +{ + AutoLock auto_lock(&untreated_list_lock); + + list = untreated_list; + return true; +} + +void UntreatedParts::Dump() +{ + AutoLock auto_lock(&untreated_list_lock); + + S3FS_PRN_DBG("untreated list = ["); + for(untreated_list_t::const_iterator iter = untreated_list.begin(); iter != untreated_list.end(); ++iter){ + S3FS_PRN_DBG(" {%014lld - %014lld : tag=%ld}", static_cast(iter->start), static_cast(iter->size), iter->untreated_tag); + } + S3FS_PRN_DBG("]"); +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/fdcache_untreated.h b/s3fs/fdcache_untreated.h new file mode 100644 index 0000000..8e55afe --- /dev/null +++ b/s3fs/fdcache_untreated.h @@ -0,0 +1,76 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_FDCACHE_UNTREATED_H_ +#define S3FS_FDCACHE_UNTREATED_H_ + +#include "common.h" +#include "types.h" + +//------------------------------------------------ +// Class UntreatedParts +//------------------------------------------------ +class UntreatedParts +{ + private: + mutable pthread_mutex_t untreated_list_lock; // protects untreated_list + bool is_lock_init; + + untreated_list_t untreated_list; + long last_tag; // [NOTE] Use this to identify the latest updated part. + + private: + bool RowGetPart(off_t& start, off_t& size, off_t max_size, off_t min_size, bool lastpart) const; + + public: + UntreatedParts(); + ~UntreatedParts(); + UntreatedParts(const UntreatedParts&) = delete; + UntreatedParts(UntreatedParts&&) = delete; + UntreatedParts& operator=(const UntreatedParts&) = delete; + UntreatedParts& operator=(UntreatedParts&&) = delete; + + bool empty(); + + bool AddPart(off_t start, off_t size); + bool GetLastUpdatedPart(off_t& start, off_t& size, off_t max_size, off_t min_size = MIN_MULTIPART_SIZE) const { return RowGetPart(start, size, max_size, min_size, true); } + + bool ClearParts(off_t start, off_t size); + bool ClearAll() { return ClearParts(0, 0); } + + bool GetLastUpdatePart(off_t& start, off_t& size) const; + bool ReplaceLastUpdatePart(off_t start, off_t size); + bool RemoveLastUpdatePart(); + + bool Duplicate(untreated_list_t& list); + + void Dump(); +}; + +#endif // S3FS_FDCACHE_UNTREATED_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/hybridcache_accessor_4_s3fs.cpp b/s3fs/hybridcache_accessor_4_s3fs.cpp new file mode 100644 index 0000000..d947c53 --- /dev/null +++ b/s3fs/hybridcache_accessor_4_s3fs.cpp @@ -0,0 +1,554 @@ +#include "fdcache_entity.h" +#include "fdcache.h" +#include "hybridcache_accessor_4_s3fs.h" +#include "hybridcache_disk_data_adaptor.h" +#include "hybridcache_s3_data_adaptor.h" +#include "s3fs_logger.h" +#include "time.h" + +#include "Common.h" +#include "FileSystemDataAdaptor.h" +#include "GlobalDataAdaptor.h" + +using HybridCache::ByteBuffer; +using HybridCache::WriteCache; +using HybridCache::ReadCache; +using HybridCache::ErrCode::SUCCESS; +using HybridCache::EnableLogging; + +HybridCacheAccessor4S3fs::HybridCacheAccessor4S3fs( + const HybridCache::HybridCacheConfig& cfg) : HybridCacheAccessor(cfg) { + Init(); +} + +HybridCacheAccessor4S3fs::~HybridCacheAccessor4S3fs() { + Stop(); +} + +void HybridCacheAccessor4S3fs::Init() { + InitLog(); + + if (cfg_.UseGlobalCache) { + std::shared_ptr etcd_client = nullptr; + if (cfg_.GlobalCacheCfg.EnableWriteCache) { + GetGlobalConfig().default_policy.write_cache_type = REPLICATION; + GetGlobalConfig().default_policy.write_replication_factor = 1; + etcd_client = std::make_shared(cfg_.GlobalCacheCfg.EtcdAddress); + } + if (!cfg_.GlobalCacheCfg.GflagFile.empty()) { + HybridCache::ParseFlagFromFile(cfg_.GlobalCacheCfg.GflagFile); + } + dataAdaptor_ = std::make_shared( + std::make_shared(std::make_shared()), + cfg_.GlobalCacheCfg.GlobalServers, etcd_client); + } else { + dataAdaptor_ = std::make_shared( + std::make_shared()); + } + + executor_ = std::make_shared(cfg_.ThreadNum); + dataAdaptor_->SetExecutor(executor_); + writeCache_ = std::make_shared(cfg_.WriteCacheCfg); + readCache_ = std::make_shared(cfg_.ReadCacheCfg, dataAdaptor_, + executor_); + tokenBucket_ = std::make_shared( + cfg_.UploadNormalFlowLimit, cfg_.UploadBurstFlowLimit); + toStop_.store(false, std::memory_order_release); + bgFlushThread_ = std::thread(&HybridCacheAccessor4S3fs::BackGroundFlush, this); + LOG(WARNING) << "[Accessor]Init, useGlobalCache:" << cfg_.UseGlobalCache; +} + +void HybridCacheAccessor4S3fs::Stop() { + toStop_.store(true, std::memory_order_release); + if (bgFlushThread_.joinable()) { + bgFlushThread_.join(); + } + executor_->stop(); + writeCache_.reset(); + readCache_.reset(); + LOG(WARNING) << "[Accessor]Stop"; +} + +int HybridCacheAccessor4S3fs::Put(const std::string &key, size_t start, + size_t len, const char* buf) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + // When the write cache is full, + // block waiting for asynchronous flush to release the write cache space. + while(IsWriteCacheFull(len)) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + + // shared lock + auto fileLock = fileLock_.find(key); + while(true) { + if (fileLock_.end() != fileLock) break; + auto res = fileLock_.insert(key, std::make_shared>(0)); + if (res.second) { + fileLock = std::move(res.first); + break; + } + fileLock = fileLock_.find(key); + } + while(true) { + int lock = fileLock->second->load(); + if (lock >= 0 && fileLock->second->compare_exchange_weak(lock, lock + 1)) + break; + } + + int res = writeCache_->Put(key, start, len, ByteBuffer(const_cast(buf), len)); + + int fd = -1; + FdEntity* ent = nullptr; + if (SUCCESS == res && nullptr == (ent = FdManager::get()->GetFdEntity( + key.c_str(), fd, false, AutoLock::ALREADY_LOCKED))) { + res = -EIO; + LOG(ERROR) << "[Accessor]Put, can't find opened path, file:" << key; + } + if (SUCCESS == res) { + ent->UpdateRealsize(start + len); // TODO: size如何获取?并发情况下的一致性? + } + + fileLock->second->fetch_sub(1); // release shared lock + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[Accessor]Put, key:" << key << ", start:" << start + << ", len:" << len << ", res:" << res + << ", time:" << totalTime << "ms"; + } + return res; +} + +int HybridCacheAccessor4S3fs::Get(const std::string &key, size_t start, + size_t len, char* buf) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + ByteBuffer buffer(buf, len); + std::vector> dataBoundary; + res = writeCache_->Get(key, start, len, buffer, dataBoundary); + + size_t remainLen = len; + for (auto it : dataBoundary) { + remainLen -= it.second; + } + + // handle cache misses + size_t readLen = 0; + size_t stepStart = 0; + size_t fileStartOff = 0; + std::vector> fs; + auto it = dataBoundary.begin(); + while (remainLen > 0 && SUCCESS == res) { + ByteBuffer buffer(buf + stepStart); + fileStartOff = start + stepStart; + if (it != dataBoundary.end()) { + readLen = it->first - stepStart; + if (!readLen) { + stepStart = it->first + it->second; + ++it; + continue; + } + stepStart = it->first + it->second; + ++it; + } else { + readLen = remainLen; + } + buffer.len = readLen; + remainLen -= readLen; + fs.emplace_back(std::move(readCache_->Get(key, fileStartOff, readLen, buffer))); + } + + if (!fs.empty()) { + auto collectRes = folly::collectAll(fs).get(); + for (auto& entry: collectRes) { + int tmpRes = entry.value(); + if (SUCCESS != tmpRes && -ENOENT != tmpRes) + res = tmpRes; + } + } + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[Accessor]Get, key:" << key << ", start:" << start + << ", len:" << len << ", res:" << res + << ", time:" << totalTime << "ms"; + } + return res; +} + +int HybridCacheAccessor4S3fs::Flush(const std::string &key) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) { + startTime = std::chrono::steady_clock::now(); + LOG(INFO) << "[Accessor]Flush start, key:" << key; + } + + // exclusive lock + auto fileLock = fileLock_.find(key); + while(true) { + if (fileLock_.end() != fileLock) break; + auto res = fileLock_.insert(key, std::make_shared>(0)); + if (res.second) { + fileLock = std::move(res.first); + break; + } + fileLock = fileLock_.find(key); + } + while(true) { + int expected = 0; + if (fileLock->second->compare_exchange_weak(expected, -1)) + break; + } + + int res = SUCCESS; + int fd = -1; + FdEntity* ent = nullptr; + if (nullptr == (ent = FdManager::get()->GetFdEntity( + key.c_str(), fd, false, AutoLock::ALREADY_LOCKED))) { + res = -EIO; + LOG(ERROR) << "[Accessor]Flush, can't find opened path, file:" << key; + } + size_t realSize = 0; + std::map realHeaders; + if (SUCCESS == res) { + realSize = ent->GetRealsize(); + for (auto &it : ent->GetOriginalHeaders()) { + realHeaders[it.first] = it.second; + } + } + + if (SUCCESS == res && cfg_.UseGlobalCache) { + // first head S3,upload a empty file when the file does not exist + size_t size; + std::map headers; + S3DataAdaptor s3Adaptor; + res = s3Adaptor.Head(key, size, headers).get(); + if (-ENOENT == res) { + res = s3Adaptor.UpLoad(key, 0, ByteBuffer(nullptr, 0), realHeaders).get(); + if (SUCCESS != res) { + LOG(ERROR) << "[Accessor]Flush, upload empty file error, file:" << key + << ", res:" << res; + } + } else if (SUCCESS != res) { + LOG(ERROR) << "[Accessor]Flush, head error, file:" << key + << ", res:" << res; + } + } + + char *buf = nullptr; + while(0 != posix_memalign((void **) &buf, 4096, realSize)); + ByteBuffer buffer(buf, realSize); + if (SUCCESS == res) { + const size_t chunkSize = GetGlobalConfig().write_chunk_size * 2; + const uint64_t chunkNum = realSize / chunkSize + (realSize % chunkSize == 0 ? 0 : 1); + std::vector jsonRoots(chunkNum); + std::vector> fs; + uint64_t cur = 0; + for (size_t offset = 0; offset < realSize; offset += chunkSize) { + size_t len = std::min(chunkSize, realSize - offset); + fs.emplace_back(folly::via(executor_.get(), [this, key, offset, len, buf, &realHeaders, &jsonRoots, cur]() { + int getRes = Get(key, offset, len, buf + offset); + if (!cfg_.UseGlobalCache || SUCCESS != getRes) return getRes; + while(!tokenBucket_->consume(len)); // upload flow control + ByteBuffer buffer(buf + offset, len); + GlobalDataAdaptor* adaptor = dynamic_cast(dataAdaptor_.get()); + return adaptor->UpLoadPart(key, offset, len, buffer, realHeaders, jsonRoots[cur]).get(); + })); + ++cur; + } + auto collectRes = folly::collectAll(fs).get(); + for (auto& entry: collectRes) { + int tmpRes = entry.value(); + if (SUCCESS != tmpRes) res = tmpRes; + } + if (cfg_.UseGlobalCache && SUCCESS == res) { + GlobalDataAdaptor* adaptor = dynamic_cast(dataAdaptor_.get()); + res = adaptor->Completed(key, jsonRoots, realSize).get(); + } + } + + if (SUCCESS == res && !cfg_.UseGlobalCache) { // Get success + while(!tokenBucket_->consume(realSize)); // upload flow control + res = dataAdaptor_->UpLoad(key, realSize, buffer, realHeaders).get(); + if (SUCCESS != res){ + LOG(ERROR) << "[Accessor]Flush, upload error, file:" << key + << ", res:" << res; + } + } + + // folly via is not executed immediately, so use separate thread + std::thread t([this, key, res]() { + if (SUCCESS == res) // upload success + writeCache_->Delete(key); + auto fileLock = fileLock_.find(key); + if (fileLock_.end() != fileLock) { + fileLock->second->store(0); + fileLock_.erase(fileLock); // release exclusive lock + } + }); + t.detach(); + + if (SUCCESS == res && cfg_.FlushToRead) { // upload success + // TODO: 为提升性能,解锁可能会先于put readCache,可能导致并发flush时写脏数据 + std::vector> fs; + const size_t chunkSize = 32 * 1024 * 1024; + for (size_t offset = 0; offset < realSize; offset += chunkSize) { + size_t len = std::min(chunkSize, realSize - offset); + fs.emplace_back(folly::via(executor_.get(), [this, key, offset, len, buf]() { + return readCache_->Put(key, offset, len, ByteBuffer(buf + offset, len)); + })); + } + folly::collectAll(fs).via(executor_.get()).thenValue([this, buf]( + std::vector, std::allocator>>&& tups) { + if (buf) free(buf); + return 0; + }); + } else { + folly::via(executor_.get(), [this, buf]() { + if (buf) free(buf); + }); + } + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[Accessor]Flush end, key:" << key << ", size:" << realSize + << ", res:" << res << ", time:" << totalTime << "ms"; + } + return res; +} + +int HybridCacheAccessor4S3fs::DeepFlush(const std::string &key) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + if (cfg_.UseGlobalCache) { + res = dataAdaptor_->DeepFlush(key).get(); + } + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[Accessor]DeepFlush, key:" << key << ", res:" << res + << ", time:" << totalTime << "ms"; + } + return res; +} + +int HybridCacheAccessor4S3fs::Delete(const std::string &key) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + // exclusive lock + auto fileLock = fileLock_.find(key); + while(true) { + if (fileLock_.end() != fileLock) break; + auto res = fileLock_.insert(key, std::make_shared>(0)); + if (res.second) { + fileLock = std::move(res.first); + break; + } + fileLock = fileLock_.find(key); + } + while(true) { + int expected = 0; + if (fileLock->second->compare_exchange_weak(expected, -1)) + break; + } + + int res = writeCache_->Delete(key); + if (SUCCESS == res) { + res = readCache_->Delete(key); + } + if (SUCCESS == res) { + res = dataAdaptor_->Delete(key).get(); + } + + fileLock->second->store(0); + fileLock_.erase(fileLock); // release exclusive lock + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[Accessor]Delete, key:" << key << ", res:" << res + << ", time:" << totalTime << "ms"; + } + return res; +} + +int HybridCacheAccessor4S3fs::Truncate(const std::string &key, size_t size) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + // exclusive lock + auto fileLock = fileLock_.find(key); + while(true) { + if (fileLock_.end() != fileLock) break; + auto res = fileLock_.insert(key, std::make_shared>(0)); + if (res.second) { + fileLock = std::move(res.first); + break; + } + fileLock = fileLock_.find(key); + } + while(true) { + int expected = 0; + if (fileLock->second->compare_exchange_weak(expected, -1)) + break; + } + + int res = SUCCESS; + int fd = -1; + FdEntity* ent = nullptr; + if (nullptr == (ent = FdManager::get()->GetFdEntity(key.c_str(), fd, + false, AutoLock::ALREADY_LOCKED))) { + res = -EIO; + LOG(ERROR) << "[Accessor]Flush, can't find opened path, file:" << key; + } + size_t realSize = 0; + if (SUCCESS == res) { + realSize = ent->GetRealsize(); + if (size < realSize) { + res = writeCache_->Truncate(key, size); + } else if (size > realSize) { + // fill write cache + size_t fillSize = size - realSize; + std::unique_ptr buf = std::make_unique(fillSize); + res = writeCache_->Put(key, realSize, fillSize, + ByteBuffer(buf.get(), fillSize)); + } + } + + if (SUCCESS == res && size != realSize) { + ent->TruncateRealsize(size); + } + + // release exclusive lock + fileLock->second->store(0); + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[Accessor]Truncate, key:" << key << ", size:" << size + << ", res:" << res << ", time:" << totalTime << "ms"; + } + return res; +} + +int HybridCacheAccessor4S3fs::Invalidate(const std::string &key) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + int res = SUCCESS; + if (cfg_.CleanCacheByOpen) { + res = readCache_->Delete(key); + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[Accessor]Invalidate, key:" << key + << ", res:" << res << ", time:" << totalTime << "ms"; + } + } + return res; +} + +int HybridCacheAccessor4S3fs::Head(const std::string &key, size_t& size, + std::map& headers) { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + int res = dataAdaptor_->Head(key, size, headers).get(); + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[Accessor]Head, key:" << key << ", res:" << res + << ", size:" << size << ", headerCnt:" << headers.size() + << ", time:" << totalTime << "ms"; + } + return res; +} + +int HybridCacheAccessor4S3fs::FsSync() { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + if (EnableLogging) { + LOG(WARNING) << "[Accessor]FsSync start"; + } + while(true) { + bool expected = false; + if (backFlushRunning_.compare_exchange_weak(expected, true)) + break; + } + + std::map files; + writeCache_->GetAllKeys(files); + std::vector> filesVec(files.begin(), files.end()); + std::sort(filesVec.begin(), filesVec.end(), + [](std::pair lhs, std::pair rhs) { + return lhs.second < rhs.second; + }); + + std::vector> fs; + for (auto& file : filesVec) { + std::string key = file.first; + fs.emplace_back(folly::via(executor_.get(), [this, key]() { + int res = this->Flush(key); + if (res) { + LOG(ERROR) << "[Accessor]FsSync, flush error in FsSync, file:" << key + << ", res:" << res; + } + return res; + })); + } + if (fs.size()) { + collectAll(fs).get(); + } + backFlushRunning_.store(false); + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(WARNING) << "[Accessor]FsSync end, fileCnt:" << filesVec.size() + << ", time:" << totalTime << "ms"; + } + return SUCCESS; +} + +bool HybridCacheAccessor4S3fs::UseGlobalCache() { + return cfg_.UseGlobalCache; +} + +void HybridCacheAccessor4S3fs::BackGroundFlush() { + LOG(WARNING) << "[Accessor]BackGroundFlush start"; + while (!toStop_.load(std::memory_order_acquire)) { + if (WriteCacheRatio() < cfg_.BackFlushCacheRatio) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + continue; + } + LOG(WARNING) << "[Accessor]BackGroundFlush radically, write cache ratio:" + << WriteCacheRatio(); + FsSync(); + } + if (0 < writeCache_->GetCacheSize()) { + FsSync(); + } + LOG(WARNING) << "[Accessor]BackGroundFlush end"; +} + +void HybridCacheAccessor4S3fs::InitLog() { + FLAGS_log_dir = cfg_.LogPath; + FLAGS_minloglevel = cfg_.LogLevel; + EnableLogging = cfg_.EnableLog; + google::InitGoogleLogging("hybridcache"); +} + +uint32_t HybridCacheAccessor4S3fs::WriteCacheRatio() { + return writeCache_->GetCacheSize() * 100 / writeCache_->GetCacheMaxSize(); +} + +bool HybridCacheAccessor4S3fs::IsWriteCacheFull(size_t len) { + return writeCache_->GetCacheSize() + len >= + (writeCache_->GetCacheMaxSize() * cfg_.WriteCacheCfg.CacheSafeRatio / 100); +} diff --git a/s3fs/hybridcache_accessor_4_s3fs.h b/s3fs/hybridcache_accessor_4_s3fs.h new file mode 100644 index 0000000..6cdfe8f --- /dev/null +++ b/s3fs/hybridcache_accessor_4_s3fs.h @@ -0,0 +1,65 @@ +/* + * Project: HybridCache + * Created Date: 24-3-25 + * Author: lshb + */ + +#ifndef HYBRIDCACHE_ACCESSOR_4_S3FS_H_ +#define HYBRIDCACHE_ACCESSOR_4_S3FS_H_ + +#include + +#include "accessor.h" + +using atomic_ptr_t = std::shared_ptr>; + +class HybridCacheAccessor4S3fs : public HybridCache::HybridCacheAccessor { + public: + HybridCacheAccessor4S3fs(const HybridCache::HybridCacheConfig& cfg); + ~HybridCacheAccessor4S3fs(); + + void Init(); + void Stop(); + + int Put(const std::string &key, size_t start, size_t len, const char* buf); + + int Get(const std::string &key, size_t start, size_t len, char* buf); + + int Flush(const std::string &key); + + int DeepFlush(const std::string &key); + + int Delete(const std::string &key); + + int Truncate(const std::string &key, size_t size); + + int Invalidate(const std::string &key); + + int Head(const std::string &key, size_t& size, + std::map& headers); + + // async full files flush in background + int FsSync(); + + bool UseGlobalCache(); + + HybridCache::ThreadPool* GetExecutor() { + return executor_.get(); + } + + private: + void InitLog(); + bool IsWriteCacheFull(size_t len); + uint32_t WriteCacheRatio(); + void BackGroundFlush(); + + private: + folly::ConcurrentHashMap fileLock_; // rwlock. write and flush are exclusive + std::shared_ptr executor_; + std::shared_ptr tokenBucket_; // upload flow limit + std::atomic toStop_{false}; + std::atomic backFlushRunning_{false}; + std::thread bgFlushThread_; +}; + +#endif // HYBRIDCACHE_ACCESSOR_4_S3FS_H_ diff --git a/s3fs/hybridcache_disk_data_adaptor.cpp b/s3fs/hybridcache_disk_data_adaptor.cpp new file mode 100644 index 0000000..08de2b9 --- /dev/null +++ b/s3fs/hybridcache_disk_data_adaptor.cpp @@ -0,0 +1,89 @@ +#include "fdcache_entity.h" +#include "fdcache.h" +#include "hybridcache_disk_data_adaptor.h" + +using HybridCache::ErrCode::SUCCESS; +using HybridCache::EnableLogging; + +const size_t SINGLE_WRITE_SIZE = 1024 * 1024 * 1024; + +folly::Future DiskDataAdaptor::DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer) { + assert(executor_); + return folly::via(executor_.get(), [this, key, start, size, buffer]() -> int { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = SUCCESS; + int fd = -1; + FdEntity* ent = FdManager::get()->GetFdEntity( + key.c_str(), fd, false, AutoLock::ALREADY_LOCKED); + if (nullptr == ent) { + LOG(ERROR) << "[DataAdaptor]DownLoad, can't find opened path, file:" << key; + res = -EIO; + } + if (SUCCESS == res) { + res = ent->ReadByAdaptor(fd, buffer.data, start, size, false, dataAdaptor_); + } + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[DataAdaptor]DownLoad, file:" << key + << ", start:" << start << ", size:" << size + << ", res:" << res << ", time:" << totalTime << "ms"; + } + return 0 < res ? SUCCESS : res; + }); +} + +folly::Future DiskDataAdaptor::UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map& headers) { + return dataAdaptor_->UpLoad(key, size, buffer, headers).thenValue( + [this, key, buffer, size](int upRes) { + if (SUCCESS != upRes) + return upRes; + int fd = -1; + FdEntity* ent = FdManager::get()->GetFdEntity( + key.c_str(), fd, false, AutoLock::ALREADY_LOCKED); + if (nullptr == ent) { + LOG(ERROR) << "[DataAdaptor]UpLoad, can't find opened path, file:" << key; + return upRes; + } + size_t remainLen = size; + size_t totalWriteLen = 0; + while (0 < remainLen) { + size_t stepLen = SINGLE_WRITE_SIZE < remainLen ? SINGLE_WRITE_SIZE : remainLen; + totalWriteLen += ent->WriteCache(buffer.data + size - remainLen, + size - remainLen, stepLen); + remainLen -= stepLen; + } + if (EnableLogging) { + LOG(INFO) << "[DataAdaptor]UpLoad, write disk cache, file:" << key + << ", size:" << size << ", wsize:" << totalWriteLen; + } + return upRes; + }); +} + +folly::Future DiskDataAdaptor::Delete(const std::string &key) { + return dataAdaptor_->Delete(key).thenValue([this, key](int delRes) { + if (SUCCESS == delRes) { + int tmpRes = FdManager::DeleteCacheFile(key.c_str()); + if (EnableLogging) { + LOG(INFO) << "[DataAdaptor]Delete, delete disk cache, file:" << key + << ", res:" << tmpRes; + } + } + return delRes; + }); +} + +folly::Future DiskDataAdaptor::Head(const std::string &key, + size_t& size, + std::map& headers) { + return dataAdaptor_->Head(key, size, headers); +} diff --git a/s3fs/hybridcache_disk_data_adaptor.h b/s3fs/hybridcache_disk_data_adaptor.h new file mode 100644 index 0000000..41d7d44 --- /dev/null +++ b/s3fs/hybridcache_disk_data_adaptor.h @@ -0,0 +1,40 @@ +/* + * Project: HybridCache + * Created Date: 24-6-7 + * Author: lshb + */ + +#ifndef DISK_DATA_ADAPTOR_H_ +#define DISK_DATA_ADAPTOR_H_ + +#include "data_adaptor.h" + +using HybridCache::ByteBuffer; + +class DiskDataAdaptor : public HybridCache::DataAdaptor { + public: + DiskDataAdaptor(std::shared_ptr dataAdaptor) : dataAdaptor_(dataAdaptor) {} + DiskDataAdaptor() = default; + ~DiskDataAdaptor() {} + + folly::Future DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer); + + folly::Future UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map& headers); + + folly::Future Delete(const std::string &key); + + folly::Future Head(const std::string &key, + size_t& size, + std::map& headers); + + private: + std::shared_ptr dataAdaptor_; +}; + +#endif // DISK_DATA_ADAPTOR_H_ diff --git a/s3fs/hybridcache_s3_data_adaptor.cpp b/s3fs/hybridcache_s3_data_adaptor.cpp new file mode 100644 index 0000000..1025e30 --- /dev/null +++ b/s3fs/hybridcache_s3_data_adaptor.cpp @@ -0,0 +1,136 @@ +#include "common.h" +#include "curl.h" +#include "curl_multi.h" +#include "fdcache_entity.h" +#include "hybridcache_s3_data_adaptor.h" +#include "s3fs_logger.h" +#include "string_util.h" + +using HybridCache::EnableLogging; + +folly::Future S3DataAdaptor::DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer) { + assert(executor_); + return folly::via(executor_.get(), [key, start, size, buffer]() -> int { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = 0; + // parallel request + if (S3fsCurl::GetMultipartSize() <= size && !nomultipart) { + res = S3fsCurl::ParallelGetObjectRequest(key.c_str(), + NEW_CACHE_FAKE_FD, start, size, buffer.data); + } else if (0 < size) { // single request + S3fsCurl s3fscurl; + res = s3fscurl.GetObjectRequest(key.c_str(), + NEW_CACHE_FAKE_FD, start, size, buffer.data); + } + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[DataAdaptor]DownLoad, file:" << key + << ", start:" << start << ", size:" << size + << ", res:" << res << ", time:" << totalTime << "ms"; + } + return res; + }); +} + +folly::Future S3DataAdaptor::UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map& headers) { + assert(executor_); + + // check size + if (size > MAX_MULTIPART_CNT * S3fsCurl::GetMultipartSize()) { + int res = -EFBIG; + LOG(ERROR) << "[DataAdaptor]UpLoad, file size too large, " + << "increase multipart size and try again. Part count exceeds:" + << MAX_MULTIPART_CNT << ", file:" << key << ", size:" << size; + if (EnableLogging) { + LOG(INFO) << "[DataAdaptor]UpLoad, file:" << key << ", size:" << size + << ", headerSize:" << headers.size() << ", res:" << res; + } + return res; + } + + return folly::via(executor_.get(), [key, size, buffer, headers]() -> int { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + int res = 0; + headers_t s3fsHeaders; + for (auto it : headers) { + s3fsHeaders[it.first] = it.second; + } + + if (nomultipart || size < S3fsCurl::GetMultipartSize()) { // normal uploading + S3fsCurl s3fscurl(true); + res = s3fscurl.PutRequest(key.c_str(), s3fsHeaders, + NEW_CACHE_FAKE_FD, size, buffer.data); + } else { // Multi part Upload + res = S3fsCurl::ParallelMultipartUploadRequest(key.c_str(), s3fsHeaders, + NEW_CACHE_FAKE_FD, size, buffer.data); + } + + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[DataAdaptor]UpLoad, file:" << key << ", size:" << size + << ", headerSize:" << headers.size() << ", res:" << res + << ", time:" << totalTime << "ms"; + } + return res; + }); +} + +folly::Future S3DataAdaptor::Delete(const std::string &key) { + assert(executor_); + return folly::via(executor_.get(), [key]() -> int { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + S3fsCurl s3fscurl; + int res = s3fscurl.DeleteRequest(key.c_str()); + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[DataAdaptor]Delete, file:" << key << ", res:" << res + << ", time:" << totalTime << "ms"; + } + return res; + }); +} + +folly::Future S3DataAdaptor::Head(const std::string &key, + size_t& size, + std::map& headers) { + assert(executor_); + return folly::via(executor_.get(), [key, &size, &headers]() -> int { + std::chrono::steady_clock::time_point startTime; + if (EnableLogging) startTime = std::chrono::steady_clock::now(); + + headers_t s3fsHeaders; + S3fsCurl s3fscurl; + int res = s3fscurl.HeadRequest(key.c_str(), s3fsHeaders); + for (auto it : s3fsHeaders) { + headers[it.first] = it.second; + if (lower(it.first) == "content-length") { + std::stringstream sstream(it.second); + sstream >> size; + } + } + if (EnableLogging) { + double totalTime = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + LOG(INFO) << "[DataAdaptor]Head, file:" << key << ", res:" << res + << ", size:" << size << ", headerSize:" << headers.size() + << ", time:" << totalTime << "ms"; + } + return res; + }); +} diff --git a/s3fs/hybridcache_s3_data_adaptor.h b/s3fs/hybridcache_s3_data_adaptor.h new file mode 100644 index 0000000..c0e62a3 --- /dev/null +++ b/s3fs/hybridcache_s3_data_adaptor.h @@ -0,0 +1,33 @@ +/* + * Project: HybridCache + * Created Date: 24-3-11 + * Author: lshb + */ + +#ifndef S3_DATA_ADAPTOR_H_ +#define S3_DATA_ADAPTOR_H_ + +#include "data_adaptor.h" + +using HybridCache::ByteBuffer; + +class S3DataAdaptor : public HybridCache::DataAdaptor { + public: + folly::Future DownLoad(const std::string &key, + size_t start, + size_t size, + ByteBuffer &buffer); + + folly::Future UpLoad(const std::string &key, + size_t size, + const ByteBuffer &buffer, + const std::map& headers); + + folly::Future Delete(const std::string &key); + + folly::Future Head(const std::string &key, + size_t& size, + std::map& headers); +}; + +#endif // S3_DATA_ADAPTOR_H_ diff --git a/s3fs/metaheader.cpp b/s3fs/metaheader.cpp new file mode 100644 index 0000000..69e97e8 --- /dev/null +++ b/s3fs/metaheader.cpp @@ -0,0 +1,341 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include + +#include "common.h" +#include "metaheader.h" +#include "string_util.h" + +static constexpr struct timespec DEFAULT_TIMESPEC = {-1, 0}; + +//------------------------------------------------------------------- +// Utility functions for convert +//------------------------------------------------------------------- +static struct timespec cvt_string_to_time(const char *str) +{ + // [NOTE] + // In rclone, there are cases where ns is set to x-amz-meta-mtime + // with floating point number. s3fs uses x-amz-meta-mtime by + // truncating the floating point or less (in seconds or less) to + // correspond to this. + // + std::string strmtime; + long nsec = 0; + if(str && '\0' != *str){ + strmtime = str; + std::string::size_type pos = strmtime.find('.', 0); + if(std::string::npos != pos){ + nsec = cvt_strtoofft(strmtime.substr(pos + 1).c_str(), /*base=*/ 10); + strmtime.erase(pos); + } + } + struct timespec ts = {static_cast(cvt_strtoofft(strmtime.c_str(), /*base=*/ 10)), nsec}; + return ts; +} + +static struct timespec get_time(const headers_t& meta, const char *header) +{ + headers_t::const_iterator iter; + if(meta.end() == (iter = meta.find(header))){ + return DEFAULT_TIMESPEC; + } + return cvt_string_to_time((*iter).second.c_str()); +} + +struct timespec get_mtime(const headers_t& meta, bool overcheck) +{ + struct timespec t = get_time(meta, "x-amz-meta-mtime"); + if(0 < t.tv_sec){ + return t; + } + t = get_time(meta, "x-amz-meta-goog-reserved-file-mtime"); + if(0 < t.tv_sec){ + return t; + } + if(overcheck){ + struct timespec ts = {get_lastmodified(meta), 0}; + return ts; + } + return DEFAULT_TIMESPEC; +} + +struct timespec get_ctime(const headers_t& meta, bool overcheck) +{ + struct timespec t = get_time(meta, "x-amz-meta-ctime"); + if(0 < t.tv_sec){ + return t; + } + if(overcheck){ + struct timespec ts = {get_lastmodified(meta), 0}; + return ts; + } + return DEFAULT_TIMESPEC; +} + +struct timespec get_atime(const headers_t& meta, bool overcheck) +{ + struct timespec t = get_time(meta, "x-amz-meta-atime"); + if(0 < t.tv_sec){ + return t; + } + if(overcheck){ + struct timespec ts = {get_lastmodified(meta), 0}; + return ts; + } + return DEFAULT_TIMESPEC; +} + +off_t get_size(const char *s) +{ + return cvt_strtoofft(s, /*base=*/ 10); +} + +off_t get_size(const headers_t& meta) +{ + headers_t::const_iterator iter = meta.find("Content-Length"); + if(meta.end() == iter){ + return 0; + } + return get_size((*iter).second.c_str()); +} + +mode_t get_mode(const char *s, int base) +{ + return static_cast(cvt_strtoofft(s, base)); +} + +mode_t get_mode(const headers_t& meta, const std::string& strpath, bool checkdir, bool forcedir) +{ + mode_t mode = 0; + bool isS3sync = false; + headers_t::const_iterator iter; + + if(meta.end() != (iter = meta.find("x-amz-meta-mode"))){ + mode = get_mode((*iter).second.c_str()); + }else if(meta.end() != (iter = meta.find("x-amz-meta-permissions"))){ // for s3sync + mode = get_mode((*iter).second.c_str()); + isS3sync = true; + }else if(meta.end() != (iter = meta.find("x-amz-meta-goog-reserved-posix-mode"))){ // for GCS + mode = get_mode((*iter).second.c_str(), 8); + }else{ + // If another tool creates an object without permissions, default to owner + // read-write and group readable. + mode = (!strpath.empty() && '/' == *strpath.rbegin()) ? 0750 : 0640; + } + + // Checking the bitmask, if the last 3 bits are all zero then process as a regular + // file type (S_IFDIR or S_IFREG), otherwise return mode unmodified so that S_IFIFO, + // S_IFSOCK, S_IFCHR, S_IFLNK and S_IFBLK devices can be processed properly by fuse. + if(!(mode & S_IFMT)){ + if(!isS3sync){ + if(checkdir){ + if(forcedir){ + mode |= S_IFDIR; + }else{ + if(meta.end() != (iter = meta.find("Content-Type"))){ + std::string strConType = (*iter).second; + // Leave just the mime type, remove any optional parameters (eg charset) + std::string::size_type pos = strConType.find(';'); + if(std::string::npos != pos){ + strConType.erase(pos); + } + if(strConType == "application/x-directory" || strConType == "httpd/unix-directory"){ + // Nextcloud uses this MIME type for directory objects when mounting bucket as external Storage + mode |= S_IFDIR; + }else if(!strpath.empty() && '/' == *strpath.rbegin()){ + if(strConType == "binary/octet-stream" || strConType == "application/octet-stream"){ + mode |= S_IFDIR; + }else{ + if(complement_stat){ + // If complement lack stat mode, when the object has '/' character at end of name + // and content type is text/plain and the object's size is 0 or 1, it should be + // directory. + off_t size = get_size(meta); + if(strConType == "text/plain" && (0 == size || 1 == size)){ + mode |= S_IFDIR; + }else{ + mode |= S_IFREG; + } + }else{ + mode |= S_IFREG; + } + } + }else{ + mode |= S_IFREG; + } + }else{ + mode |= S_IFREG; + } + } + } + // If complement lack stat mode, when it's mode is not set any permission, + // the object is added minimal mode only for read permission. + if(complement_stat && 0 == (mode & (S_IRWXU | S_IRWXG | S_IRWXO))){ + mode |= (S_IRUSR | (0 == (mode & S_IFDIR) ? 0 : S_IXUSR)); + } + }else{ + if(!checkdir){ + // cut dir/reg flag. + mode &= ~S_IFDIR; + mode &= ~S_IFREG; + } + } + } + return mode; +} + +uid_t get_uid(const char *s) +{ + return static_cast(cvt_strtoofft(s, /*base=*/ 0)); +} + +uid_t get_uid(const headers_t& meta) +{ + headers_t::const_iterator iter; + if(meta.end() != (iter = meta.find("x-amz-meta-uid"))){ + return get_uid((*iter).second.c_str()); + }else if(meta.end() != (iter = meta.find("x-amz-meta-owner"))){ // for s3sync + return get_uid((*iter).second.c_str()); + }else if(meta.end() != (iter = meta.find("x-amz-meta-goog-reserved-posix-uid"))){ // for GCS + return get_uid((*iter).second.c_str()); + }else{ + return geteuid(); + } +} + +gid_t get_gid(const char *s) +{ + return static_cast(cvt_strtoofft(s, /*base=*/ 0)); +} + +gid_t get_gid(const headers_t& meta) +{ + headers_t::const_iterator iter; + if(meta.end() != (iter = meta.find("x-amz-meta-gid"))){ + return get_gid((*iter).second.c_str()); + }else if(meta.end() != (iter = meta.find("x-amz-meta-group"))){ // for s3sync + return get_gid((*iter).second.c_str()); + }else if(meta.end() != (iter = meta.find("x-amz-meta-goog-reserved-posix-gid"))){ // for GCS + return get_gid((*iter).second.c_str()); + }else{ + return getegid(); + } +} + +blkcnt_t get_blocks(off_t size) +{ + return (size / 512) + (0 == (size % 512) ? 0 : 1); +} + +time_t cvtIAMExpireStringToTime(const char* s) +{ + struct tm tm; + if(!s){ + return 0L; + } + memset(&tm, 0, sizeof(struct tm)); + strptime(s, "%Y-%m-%dT%H:%M:%S", &tm); + return timegm(&tm); // GMT +} + +time_t get_lastmodified(const char* s) +{ + struct tm tm; + if(!s){ + return -1; + } + memset(&tm, 0, sizeof(struct tm)); + strptime(s, "%a, %d %b %Y %H:%M:%S %Z", &tm); + return timegm(&tm); // GMT +} + +time_t get_lastmodified(const headers_t& meta) +{ + headers_t::const_iterator iter = meta.find("Last-Modified"); + if(meta.end() == iter){ + return -1; + } + return get_lastmodified((*iter).second.c_str()); +} + +// +// Returns it whether it is an object with need checking in detail. +// If this function returns true, the object is possible to be directory +// and is needed checking detail(searching sub object). +// +bool is_need_check_obj_detail(const headers_t& meta) +{ + headers_t::const_iterator iter; + + // directory object is Content-Length as 0. + if(0 != get_size(meta)){ + return false; + } + // if the object has x-amz-meta information, checking is no more. + if(meta.end() != meta.find("x-amz-meta-mode") || + meta.end() != meta.find("x-amz-meta-mtime") || + meta.end() != meta.find("x-amz-meta-ctime") || + meta.end() != meta.find("x-amz-meta-atime") || + meta.end() != meta.find("x-amz-meta-uid") || + meta.end() != meta.find("x-amz-meta-gid") || + meta.end() != meta.find("x-amz-meta-owner") || + meta.end() != meta.find("x-amz-meta-group") || + meta.end() != meta.find("x-amz-meta-permissions") ) + { + return false; + } + // if there is not Content-Type, or Content-Type is "x-directory", + // checking is no more. + if(meta.end() == (iter = meta.find("Content-Type"))){ + return false; + } + if("application/x-directory" == (*iter).second){ + return false; + } + return true; +} + +// [NOTE] +// If add_noexist is false and the key does not exist, it will not be added. +// +bool merge_headers(headers_t& base, const headers_t& additional, bool add_noexist) +{ + bool added = false; + for(headers_t::const_iterator iter = additional.begin(); iter != additional.end(); ++iter){ + if(add_noexist || base.find(iter->first) != base.end()){ + base[iter->first] = iter->second; + added = true; + } + } + return added; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/metaheader.h b/s3fs/metaheader.h new file mode 100644 index 0000000..44dd8a5 --- /dev/null +++ b/s3fs/metaheader.h @@ -0,0 +1,71 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_METAHEADER_H_ +#define S3FS_METAHEADER_H_ + +#include +#include +#include + +//------------------------------------------------------------------- +// headers_t +//------------------------------------------------------------------- +struct header_nocase_cmp +{ + bool operator()(const std::string &strleft, const std::string &strright) const + { + return (strcasecmp(strleft.c_str(), strright.c_str()) < 0); + } +}; +typedef std::map headers_t; + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +struct timespec get_mtime(const headers_t& meta, bool overcheck = true); +struct timespec get_ctime(const headers_t& meta, bool overcheck = true); +struct timespec get_atime(const headers_t& meta, bool overcheck = true); +off_t get_size(const char *s); +off_t get_size(const headers_t& meta); +mode_t get_mode(const char *s, int base = 0); +mode_t get_mode(const headers_t& meta, const std::string& strpath, bool checkdir = false, bool forcedir = false); +uid_t get_uid(const char *s); +uid_t get_uid(const headers_t& meta); +gid_t get_gid(const char *s); +gid_t get_gid(const headers_t& meta); +blkcnt_t get_blocks(off_t size); +time_t cvtIAMExpireStringToTime(const char* s); +time_t get_lastmodified(const char* s); +time_t get_lastmodified(const headers_t& meta); +bool is_need_check_obj_detail(const headers_t& meta); +bool merge_headers(headers_t& base, const headers_t& additional, bool add_noexist); +bool simple_parse_xml(const char* data, size_t len, const char* key, std::string& value); + +#endif // S3FS_METAHEADER_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/mpu_util.cpp b/s3fs/mpu_util.cpp new file mode 100644 index 0000000..1421905 --- /dev/null +++ b/s3fs/mpu_util.cpp @@ -0,0 +1,159 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include + +#include "s3fs.h" +#include "s3fs_logger.h" +#include "mpu_util.h" +#include "curl.h" +#include "s3fs_xml.h" +#include "s3fs_auth.h" +#include "string_util.h" + +//------------------------------------------------------------------- +// Global variables +//------------------------------------------------------------------- +utility_incomp_type utility_mode = utility_incomp_type::NO_UTILITY_MODE; + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +static void print_incomp_mpu_list(const incomp_mpu_list_t& list) +{ + printf("\n"); + printf("Lists the parts that have been uploaded for a specific multipart upload.\n"); + printf("\n"); + + if(!list.empty()){ + printf("---------------------------------------------------------------\n"); + + int cnt = 0; + for(incomp_mpu_list_t::const_iterator iter = list.begin(); iter != list.end(); ++iter, ++cnt){ + printf(" Path : %s\n", (*iter).key.c_str()); + printf(" UploadId : %s\n", (*iter).id.c_str()); + printf(" Date : %s\n", (*iter).date.c_str()); + printf("\n"); + } + printf("---------------------------------------------------------------\n"); + + }else{ + printf("There is no list.\n"); + } +} + +static bool abort_incomp_mpu_list(const incomp_mpu_list_t& list, time_t abort_time) +{ + if(list.empty()){ + return true; + } + time_t now_time = time(nullptr); + + // do removing. + S3fsCurl s3fscurl; + bool result = true; + for(incomp_mpu_list_t::const_iterator iter = list.begin(); iter != list.end(); ++iter){ + const char* tpath = (*iter).key.c_str(); + std::string upload_id = (*iter).id; + + if(0 != abort_time){ // abort_time is 0, it means all. + time_t date = 0; + if(!get_unixtime_from_iso8601((*iter).date.c_str(), date)){ + S3FS_PRN_DBG("date format is not ISO 8601 for %s multipart uploading object, skip this.", tpath); + continue; + } + if(now_time <= (date + abort_time)){ + continue; + } + } + + if(0 != s3fscurl.AbortMultipartUpload(tpath, upload_id)){ + S3FS_PRN_EXIT("Failed to remove %s multipart uploading object.", tpath); + result = false; + }else{ + printf("Succeed to remove %s multipart uploading object.\n", tpath); + } + + // reset(initialize) curl object + s3fscurl.DestroyCurlHandle(); + } + return result; +} + +int s3fs_utility_processing(time_t abort_time) +{ + if(utility_incomp_type::NO_UTILITY_MODE == utility_mode){ + return EXIT_FAILURE; + } + printf("\n*** s3fs run as utility mode.\n\n"); + + S3fsCurl s3fscurl; + std::string body; + int result = EXIT_SUCCESS; + if(0 != s3fscurl.MultipartListRequest(body)){ + S3FS_PRN_EXIT("Could not get list multipart upload.\nThere is no incomplete multipart uploaded object in bucket.\n"); + result = EXIT_FAILURE; + }else{ + // parse result(incomplete multipart upload information) + S3FS_PRN_DBG("response body = {\n%s\n}", body.c_str()); + + xmlDocPtr doc; + if(nullptr == (doc = xmlReadMemory(body.c_str(), static_cast(body.size()), "", nullptr, 0))){ + S3FS_PRN_DBG("xmlReadMemory exited with error."); + result = EXIT_FAILURE; + + }else{ + // make incomplete uploads list + incomp_mpu_list_t list; + if(!get_incomp_mpu_list(doc, list)){ + S3FS_PRN_DBG("get_incomp_mpu_list exited with error."); + result = EXIT_FAILURE; + + }else{ + if(utility_incomp_type::INCOMP_TYPE_LIST == utility_mode){ + // print list + print_incomp_mpu_list(list); + }else if(utility_incomp_type::INCOMP_TYPE_ABORT == utility_mode){ + // remove + if(!abort_incomp_mpu_list(list, abort_time)){ + S3FS_PRN_DBG("an error occurred during removal process."); + result = EXIT_FAILURE; + } + } + } + S3FS_XMLFREEDOC(doc); + } + } + + // ssl + s3fs_destroy_global_ssl(); + + return result; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/mpu_util.h b/s3fs/mpu_util.h new file mode 100644 index 0000000..ca60659 --- /dev/null +++ b/s3fs/mpu_util.h @@ -0,0 +1,64 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_MPU_UTIL_H_ +#define S3FS_MPU_UTIL_H_ + +#include +#include + +//------------------------------------------------------------------- +// Structure / Typedef +//------------------------------------------------------------------- +typedef struct incomplete_multipart_upload_info +{ + std::string key; + std::string id; + std::string date; +}INCOMP_MPU_INFO; + +typedef std::vector incomp_mpu_list_t; + +//------------------------------------------------------------------- +// enum for utility process mode +//------------------------------------------------------------------- +enum class utility_incomp_type{ + NO_UTILITY_MODE = 0, // not utility mode + INCOMP_TYPE_LIST, // list of incomplete mpu + INCOMP_TYPE_ABORT // delete incomplete mpu +}; + +extern utility_incomp_type utility_mode; + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +int s3fs_utility_processing(time_t abort_time); + +#endif // S3FS_MPU_UTIL_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/openssl_auth.cpp b/s3fs/openssl_auth.cpp new file mode 100644 index 0000000..5340a97 --- /dev/null +++ b/s3fs/openssl_auth.cpp @@ -0,0 +1,444 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wdeprecated-declarations" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "s3fs_auth.h" +#include "s3fs_logger.h" + +//------------------------------------------------------------------- +// Utility Function for version +//------------------------------------------------------------------- +const char* s3fs_crypt_lib_name() +{ + static constexpr char version[] = "OpenSSL"; + + return version; +} + +//------------------------------------------------------------------- +// Utility Function for global init +//------------------------------------------------------------------- +bool s3fs_init_global_ssl() +{ + ERR_load_crypto_strings(); + + // [NOTE] + // OpenSSL 3.0 loads error strings automatically so these functions are not needed. + // + #ifndef USE_OPENSSL_30 + ERR_load_BIO_strings(); + #endif + + OpenSSL_add_all_algorithms(); + return true; +} + +bool s3fs_destroy_global_ssl() +{ + EVP_cleanup(); + ERR_free_strings(); + return true; +} + +//------------------------------------------------------------------- +// Utility Function for crypt lock +//------------------------------------------------------------------- +// internal use struct for openssl +struct CRYPTO_dynlock_value +{ + pthread_mutex_t dyn_mutex; +}; + +static pthread_mutex_t* s3fs_crypt_mutex = nullptr; + +static void s3fs_crypt_mutex_lock(int mode, int pos, const char* file, int line) __attribute__ ((unused)); +static void s3fs_crypt_mutex_lock(int mode, int pos, const char* file, int line) +{ + if(s3fs_crypt_mutex){ + int result; + if(mode & CRYPTO_LOCK){ + if(0 != (result = pthread_mutex_lock(&s3fs_crypt_mutex[pos]))){ + S3FS_PRN_CRIT("pthread_mutex_lock returned: %d", result); + abort(); + } + }else{ + if(0 != (result = pthread_mutex_unlock(&s3fs_crypt_mutex[pos]))){ + S3FS_PRN_CRIT("pthread_mutex_unlock returned: %d", result); + abort(); + } + } + } +} + +static unsigned long s3fs_crypt_get_threadid() __attribute__ ((unused)); +static unsigned long s3fs_crypt_get_threadid() +{ + // For FreeBSD etc, some system's pthread_t is structure pointer. + // Then we use cast like C style(not C++) instead of ifdef. + return (unsigned long)(pthread_self()); +} + +static struct CRYPTO_dynlock_value* s3fs_dyn_crypt_mutex(const char* file, int line) __attribute__ ((unused)); +static struct CRYPTO_dynlock_value* s3fs_dyn_crypt_mutex(const char* file, int line) +{ + struct CRYPTO_dynlock_value* dyndata = new CRYPTO_dynlock_value(); + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + int result; + if(0 != (result = pthread_mutex_init(&(dyndata->dyn_mutex), &attr))){ + S3FS_PRN_CRIT("pthread_mutex_init returned: %d", result); + return nullptr; + } + return dyndata; +} + +static void s3fs_dyn_crypt_mutex_lock(int mode, struct CRYPTO_dynlock_value* dyndata, const char* file, int line) __attribute__ ((unused)); +static void s3fs_dyn_crypt_mutex_lock(int mode, struct CRYPTO_dynlock_value* dyndata, const char* file, int line) +{ + if(dyndata){ + int result; + if(mode & CRYPTO_LOCK){ + if(0 != (result = pthread_mutex_lock(&(dyndata->dyn_mutex)))){ + S3FS_PRN_CRIT("pthread_mutex_lock returned: %d", result); + abort(); + } + }else{ + if(0 != (result = pthread_mutex_unlock(&(dyndata->dyn_mutex)))){ + S3FS_PRN_CRIT("pthread_mutex_unlock returned: %d", result); + abort(); + } + } + } +} + +static void s3fs_destroy_dyn_crypt_mutex(struct CRYPTO_dynlock_value* dyndata, const char* file, int line) __attribute__ ((unused)); +static void s3fs_destroy_dyn_crypt_mutex(struct CRYPTO_dynlock_value* dyndata, const char* file, int line) +{ + if(dyndata){ + int result = pthread_mutex_destroy(&(dyndata->dyn_mutex)); + if(result != 0){ + S3FS_PRN_CRIT("failed to destroy dyn_mutex"); + abort(); + } + delete dyndata; + } +} + +bool s3fs_init_crypt_mutex() +{ + if(s3fs_crypt_mutex){ + S3FS_PRN_DBG("s3fs_crypt_mutex is not nullptr, destroy it."); + + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!s3fs_destroy_crypt_mutex()){ + S3FS_PRN_ERR("Failed to s3fs_crypt_mutex"); + return false; + } + } + s3fs_crypt_mutex = new pthread_mutex_t[CRYPTO_num_locks()]; + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + for(int cnt = 0; cnt < CRYPTO_num_locks(); cnt++){ + int result = pthread_mutex_init(&s3fs_crypt_mutex[cnt], &attr); + if(result != 0){ + S3FS_PRN_CRIT("pthread_mutex_init returned: %d", result); + return false; + } + } + // static lock + CRYPTO_set_locking_callback(s3fs_crypt_mutex_lock); + CRYPTO_set_id_callback(s3fs_crypt_get_threadid); + // dynamic lock + CRYPTO_set_dynlock_create_callback(s3fs_dyn_crypt_mutex); + CRYPTO_set_dynlock_lock_callback(s3fs_dyn_crypt_mutex_lock); + CRYPTO_set_dynlock_destroy_callback(s3fs_destroy_dyn_crypt_mutex); + + return true; +} + +bool s3fs_destroy_crypt_mutex() +{ + if(!s3fs_crypt_mutex){ + return true; + } + + CRYPTO_set_dynlock_destroy_callback(nullptr); + CRYPTO_set_dynlock_lock_callback(nullptr); + CRYPTO_set_dynlock_create_callback(nullptr); + CRYPTO_set_id_callback(nullptr); + CRYPTO_set_locking_callback(nullptr); + + for(int cnt = 0; cnt < CRYPTO_num_locks(); cnt++){ + int result = pthread_mutex_destroy(&s3fs_crypt_mutex[cnt]); + if(result != 0){ + S3FS_PRN_CRIT("failed to destroy s3fs_crypt_mutex[%d]", cnt); + abort(); + } + } + CRYPTO_cleanup_all_ex_data(); + delete[] s3fs_crypt_mutex; + s3fs_crypt_mutex = nullptr; + + return true; +} + +//------------------------------------------------------------------- +// Utility Function for HMAC +//------------------------------------------------------------------- +static std::unique_ptr s3fs_HMAC_RAW(const void* key, size_t keylen, const unsigned char* data, size_t datalen, unsigned int* digestlen, bool is_sha256) +{ + if(!key || !data || !digestlen){ + return nullptr; + } + (*digestlen) = EVP_MAX_MD_SIZE * sizeof(unsigned char); + std::unique_ptr digest(new unsigned char[*digestlen]); + if(is_sha256){ + HMAC(EVP_sha256(), key, static_cast(keylen), data, datalen, digest.get(), digestlen); + }else{ + HMAC(EVP_sha1(), key, static_cast(keylen), data, datalen, digest.get(), digestlen); + } + + return digest; +} + +std::unique_ptr s3fs_HMAC(const void* key, size_t keylen, const unsigned char* data, size_t datalen, unsigned int* digestlen) +{ + return s3fs_HMAC_RAW(key, keylen, data, datalen, digestlen, false); +} + +std::unique_ptr s3fs_HMAC256(const void* key, size_t keylen, const unsigned char* data, size_t datalen, unsigned int* digestlen) +{ + return s3fs_HMAC_RAW(key, keylen, data, datalen, digestlen, true); +} + +#ifdef USE_OPENSSL_30 +//------------------------------------------------------------------- +// Utility Function for MD5 (OpenSSL >= 3.0) +//------------------------------------------------------------------- +// [NOTE] +// OpenSSL 3.0 deprecated the MD5_*** low-level encryption functions, +// so we should use the high-level EVP API instead. +// + +bool s3fs_md5(const unsigned char* data, size_t datalen, md5_t* digest) +{ + unsigned int digestlen = static_cast(digest->size()); + + const EVP_MD* md = EVP_get_digestbyname("md5"); + EVP_MD_CTX* mdctx = EVP_MD_CTX_create(); + EVP_DigestInit_ex(mdctx, md, nullptr); + EVP_DigestUpdate(mdctx, data, datalen); + EVP_DigestFinal_ex(mdctx, digest->data(), &digestlen); + EVP_MD_CTX_destroy(mdctx); + + return true; +} + +bool s3fs_md5_fd(int fd, off_t start, off_t size, md5_t* result) +{ + EVP_MD_CTX* mdctx; + unsigned int md5_digest_len = static_cast(result->size()); + off_t bytes; + + if(-1 == size){ + struct stat st; + if(-1 == fstat(fd, &st)){ + return false; + } + size = st.st_size; + } + + // instead of MD5_Init + mdctx = EVP_MD_CTX_new(); + EVP_DigestInit_ex(mdctx, EVP_md5(), nullptr); + + for(off_t total = 0; total < size; total += bytes){ + const off_t len = 512; + char buf[len]; + bytes = len < (size - total) ? len : (size - total); + bytes = pread(fd, buf, bytes, start + total); + if(0 == bytes){ + // end of file + break; + }else if(-1 == bytes){ + // error + S3FS_PRN_ERR("file read error(%d)", errno); + EVP_MD_CTX_free(mdctx); + return false; + } + // instead of MD5_Update + EVP_DigestUpdate(mdctx, buf, bytes); + } + + // instead of MD5_Final + EVP_DigestFinal_ex(mdctx, result->data(), &md5_digest_len); + EVP_MD_CTX_free(mdctx); + + return true; +} + +#else +//------------------------------------------------------------------- +// Utility Function for MD5 (OpenSSL < 3.0) +//------------------------------------------------------------------- + +// TODO: Does this fail on OpenSSL < 3.0 and we need to use MD5_CTX functions? +bool s3fs_md5(const unsigned char* data, size_t datalen, md5_t* digest) +{ + unsigned int digestlen = digest->size(); + + const EVP_MD* md = EVP_get_digestbyname("md5"); + EVP_MD_CTX* mdctx = EVP_MD_CTX_create(); + EVP_DigestInit_ex(mdctx, md, nullptr); + EVP_DigestUpdate(mdctx, data, datalen); + EVP_DigestFinal_ex(mdctx, digest->data(), &digestlen); + EVP_MD_CTX_destroy(mdctx); + + return true; +} + +bool s3fs_md5_fd(int fd, off_t start, off_t size, md5_t* result) +{ + MD5_CTX md5ctx; + off_t bytes; + + if(-1 == size){ + struct stat st; + if(-1 == fstat(fd, &st)){ + return false; + } + size = st.st_size; + } + + MD5_Init(&md5ctx); + + for(off_t total = 0; total < size; total += bytes){ + const off_t len = 512; + char buf[len]; + bytes = len < (size - total) ? len : (size - total); + bytes = pread(fd, buf, bytes, start + total); + if(0 == bytes){ + // end of file + break; + }else if(-1 == bytes){ + // error + S3FS_PRN_ERR("file read error(%d)", errno); + return false; + } + MD5_Update(&md5ctx, buf, bytes); + } + + MD5_Final(result->data(), &md5ctx); + + return true; +} +#endif + +//------------------------------------------------------------------- +// Utility Function for SHA256 +//------------------------------------------------------------------- +bool s3fs_sha256(const unsigned char* data, size_t datalen, sha256_t* digest) +{ + const EVP_MD* md = EVP_get_digestbyname("sha256"); + EVP_MD_CTX* mdctx = EVP_MD_CTX_create(); + EVP_DigestInit_ex(mdctx, md, nullptr); + EVP_DigestUpdate(mdctx, data, datalen); + unsigned int digestlen = static_cast(digest->size()); + EVP_DigestFinal_ex(mdctx, digest->data(), &digestlen); + EVP_MD_CTX_destroy(mdctx); + + return true; +} + +bool s3fs_sha256_fd(int fd, off_t start, off_t size, sha256_t* result) +{ + const EVP_MD* md = EVP_get_digestbyname("sha256"); + EVP_MD_CTX* sha256ctx; + off_t bytes; + + if(-1 == fd){ + return false; + } + if(-1 == size){ + struct stat st; + if(-1 == fstat(fd, &st)){ + S3FS_PRN_ERR("fstat error(%d)", errno); + return false; + } + size = st.st_size; + } + + sha256ctx = EVP_MD_CTX_create(); + EVP_DigestInit_ex(sha256ctx, md, nullptr); + + for(off_t total = 0; total < size; total += bytes){ + const off_t len = 512; + char buf[len]; + bytes = len < (size - total) ? len : (size - total); + bytes = pread(fd, buf, bytes, start + total); + if(0 == bytes){ + // end of file + break; + }else if(-1 == bytes){ + // error + S3FS_PRN_ERR("file read error(%d)", errno); + EVP_MD_CTX_destroy(sha256ctx); + return false; + } + EVP_DigestUpdate(sha256ctx, buf, bytes); + } + EVP_DigestFinal_ex(sha256ctx, result->data(), nullptr); + EVP_MD_CTX_destroy(sha256ctx); + + return true; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/psemaphore.h b/s3fs/psemaphore.h new file mode 100644 index 0000000..9de2596 --- /dev/null +++ b/s3fs/psemaphore.h @@ -0,0 +1,111 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_SEMAPHORE_H_ +#define S3FS_SEMAPHORE_H_ + +//------------------------------------------------------------------- +// Class Semaphore +//------------------------------------------------------------------- +// portability wrapper for sem_t since macOS does not implement it +#ifdef __APPLE__ + +#include + +class Semaphore +{ + public: + explicit Semaphore(int value) : value(value), sem(dispatch_semaphore_create(value)) {} + ~Semaphore() + { + // macOS cannot destroy a semaphore with posts less than the initializer + for(int i = 0; i < get_value(); ++i){ + post(); + } + dispatch_release(sem); + } + Semaphore(const Semaphore&) = delete; + Semaphore(Semaphore&&) = delete; + Semaphore& operator=(const Semaphore&) = delete; + Semaphore& operator=(Semaphore&&) = delete; + + void wait() { dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); } + bool try_wait() + { + if(0 == dispatch_semaphore_wait(sem, DISPATCH_TIME_NOW)){ + return true; + }else{ + return false; + } + } + void post() { dispatch_semaphore_signal(sem); } + int get_value() const { return value; } + + private: + const int value; + dispatch_semaphore_t sem; +}; + +#else + +#include +#include + +class Semaphore +{ + public: + explicit Semaphore(int value) : value(value) { sem_init(&mutex, 0, value); } + ~Semaphore() { sem_destroy(&mutex); } + void wait() + { + int r; + do { + r = sem_wait(&mutex); + } while (r == -1 && errno == EINTR); + } + bool try_wait() + { + int result; + do{ + result = sem_trywait(&mutex); + }while(result == -1 && errno == EINTR); + + return (0 == result); + } + void post() { sem_post(&mutex); } + int get_value() const { return value; } + + private: + const int value; + sem_t mutex; +}; + +#endif + +#endif // S3FS_SEMAPHORE_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs.cpp b/s3fs/s3fs.cpp new file mode 100644 index 0000000..0cc895c --- /dev/null +++ b/s3fs/s3fs.cpp @@ -0,0 +1,6181 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "s3fs.h" +#include "s3fs_logger.h" +#include "metaheader.h" +#include "fdcache.h" +#include "fdcache_auto.h" +#include "fdcache_stat.h" +#include "curl.h" +#include "curl_multi.h" +#include "s3objlist.h" +#include "cache.h" +#include "addhead.h" +#include "sighandlers.h" +#include "s3fs_xml.h" +#include "string_util.h" +#include "s3fs_auth.h" +#include "s3fs_cred.h" +#include "s3fs_help.h" +#include "s3fs_util.h" +#include "mpu_util.h" +#include "threadpoolman.h" +#include "autolock.h" + +//------------------------------------------------------------------- +// Symbols +//------------------------------------------------------------------- +#if !defined(ENOATTR) +#define ENOATTR ENODATA +#endif + +enum class dirtype { + UNKNOWN = -1, + NEW = 0, + OLD = 1, + FOLDER = 2, + NOOBJ = 3, +}; + +//------------------------------------------------------------------- +// Static variables +//------------------------------------------------------------------- +static uid_t mp_uid = 0; // owner of mount point(only not specified uid opt) +static gid_t mp_gid = 0; // group of mount point(only not specified gid opt) +static mode_t mp_mode = 0; // mode of mount point +static mode_t mp_umask = 0; // umask for mount point +static bool is_mp_umask = false;// default does not set. +static std::string mountpoint; +static std::unique_ptr ps3fscred; // using only in this file +static std::string mimetype_file; +static bool nocopyapi = false; +static bool norenameapi = false; +static bool nonempty = false; +static bool allow_other = false; +static uid_t s3fs_uid = 0; +static gid_t s3fs_gid = 0; +static mode_t s3fs_umask = 0; +static bool is_s3fs_uid = false;// default does not set. +static bool is_s3fs_gid = false;// default does not set. +static bool is_s3fs_umask = false;// default does not set. +static bool is_remove_cache = false; +static bool is_use_xattr = false; +static off_t multipart_threshold = 25 * 1024 * 1024; +static int64_t singlepart_copy_limit = 512 * 1024 * 1024; +static bool is_specified_endpoint = false; +static int s3fs_init_deferred_exit_status = 0; +static bool support_compat_dir = false;// default does not support compatibility directory type +static int max_keys_list_object = 1000;// default is 1000 +static off_t max_dirty_data = 5LL * 1024LL * 1024LL * 1024LL; +static bool use_wtf8 = false; +static off_t fake_diskfree_size = -1; // default is not set(-1) +static int max_thread_count = 5; // default is 5 +static bool update_parent_dir_stat= false; // default not updating parent directory stats +static fsblkcnt_t bucket_block_count; // advertised block count of the bucket +static unsigned long s3fs_block_size = 16 * 1024 * 1024; // s3fs block size is 16MB +std::string newcache_conf; + +//------------------------------------------------------------------- +// Global functions : prototype +//------------------------------------------------------------------- +int put_headers(const char* path, headers_t& meta, bool is_copy, bool use_st_size = true); // [NOTE] global function because this is called from FdEntity class + +//------------------------------------------------------------------- +// Static functions : prototype +//------------------------------------------------------------------- +static bool is_special_name_folder_object(const char* path); +static int chk_dir_object_type(const char* path, std::string& newpath, std::string& nowpath, std::string& nowcache, headers_t* pmeta = nullptr, dirtype* pDirType = nullptr); +static int remove_old_type_dir(const std::string& path, dirtype type); +static int get_object_attribute(const char* path, struct stat* pstbuf, headers_t* pmeta = nullptr, bool overcheck = true, bool* pisforce = nullptr, bool add_no_truncate_cache = false); +static int check_object_access(const char* path, int mask, struct stat* pstbuf); +static int check_object_owner(const char* path, struct stat* pstbuf); +static int check_parent_object_access(const char* path, int mask); +static int get_local_fent(AutoFdEntity& autoent, FdEntity **entity, const char* path, int flags = O_RDONLY, bool is_load = false); +static bool multi_head_callback(S3fsCurl* s3fscurl, void* param); +static std::unique_ptr multi_head_retry_callback(S3fsCurl* s3fscurl); +static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf, fuse_fill_dir_t filler); +static int readdir_multi_head_4_newcache(const char* path, const S3ObjList& head, void* buf, fuse_fill_dir_t filler); +static int list_bucket(const char* path, S3ObjList& head, const char* delimiter, bool check_content_only = false); +static int directory_empty(const char* path); +static int rename_large_object(const char* from, const char* to); +static int create_file_object(const char* path, mode_t mode, uid_t uid, gid_t gid); +static int create_directory_object(const char* path, mode_t mode, const struct timespec& ts_atime, const struct timespec& ts_mtime, const struct timespec& ts_ctime, uid_t uid, gid_t gid, const char* pxattrvalue); +static int rename_object(const char* from, const char* to, bool update_ctime); +static int rename_object_nocopy(const char* from, const char* to, bool update_ctime); +static int clone_directory_object(const char* from, const char* to, bool update_ctime, const char* pxattrvalue); +static int rename_directory(const char* from, const char* to); +static int update_mctime_parent_directory(const char* _path); +static int remote_mountpath_exists(const char* path, bool compat_dir); +static bool get_meta_xattr_value(const char* path, std::string& rawvalue); +static bool get_parent_meta_xattr_value(const char* path, std::string& rawvalue); +static bool get_xattr_posix_key_value(const char* path, std::string& xattrvalue, bool default_key); +static bool build_inherited_xattr_value(const char* path, std::string& xattrvalue); +static bool parse_xattr_keyval(const std::string& xattrpair, std::string& key, std::string* pval); +static size_t parse_xattrs(const std::string& strxattrs, xattrs_t& xattrs); +static std::string raw_build_xattrs(const xattrs_t& xattrs); +static std::string build_xattrs(const xattrs_t& xattrs); +static int s3fs_check_service(); +static bool set_mountpoint_attribute(struct stat& mpst); +static int set_bucket(const char* arg); +static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_args* outargs); +static fsblkcnt_t parse_bucket_size(char* value); +static bool is_cmd_exists(const std::string& command); +static int print_umount_message(const std::string& mp, bool force); + +//------------------------------------------------------------------- +// fuse interface functions +//------------------------------------------------------------------- +static int s3fs_getattr(const char* path, struct stat* stbuf); +static int s3fs_readlink(const char* path, char* buf, size_t size); +static int s3fs_mknod(const char* path, mode_t mode, dev_t rdev); +static int s3fs_mkdir(const char* path, mode_t mode); +static int s3fs_unlink(const char* path); +static int s3fs_rmdir(const char* path); +static int s3fs_symlink(const char* from, const char* to); +static int s3fs_rename(const char* from, const char* to); +static int s3fs_link(const char* from, const char* to); +static int s3fs_chmod(const char* path, mode_t mode); +static int s3fs_chmod_nocopy(const char* path, mode_t mode); +static int s3fs_chown(const char* path, uid_t uid, gid_t gid); +static int s3fs_chown_nocopy(const char* path, uid_t uid, gid_t gid); +static int s3fs_utimens(const char* path, const struct timespec ts[2]); +static int s3fs_utimens_nocopy(const char* path, const struct timespec ts[2]); +static int s3fs_truncate(const char* path, off_t size); +static int s3fs_create(const char* path, mode_t mode, struct fuse_file_info* fi); +static int s3fs_open(const char* path, struct fuse_file_info* fi); +static int s3fs_read(const char* path, char* buf, size_t size, off_t offset, struct fuse_file_info* fi); +static int s3fs_write(const char* path, const char* buf, size_t size, off_t offset, struct fuse_file_info* fi); +static int s3fs_statfs(const char* path, struct statvfs* stbuf); +static int s3fs_flush(const char* path, struct fuse_file_info* fi); +static int s3fs_fsync(const char* path, int datasync, struct fuse_file_info* fi); +static int s3fs_release(const char* path, struct fuse_file_info* fi); +static int s3fs_opendir(const char* path, struct fuse_file_info* fi); +static int s3fs_readdir(const char* path, void* buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info* fi); +static int s3fs_access(const char* path, int mask); +static void* s3fs_init(struct fuse_conn_info* conn); +static void s3fs_destroy(void*); +#if defined(__APPLE__) +static int s3fs_setxattr(const char* path, const char* name, const char* value, size_t size, int flags, uint32_t position); +static int s3fs_getxattr(const char* path, const char* name, char* value, size_t size, uint32_t position); +#else +static int s3fs_setxattr(const char* path, const char* name, const char* value, size_t size, int flags); +static int s3fs_getxattr(const char* path, const char* name, char* value, size_t size); +#endif +static int s3fs_listxattr(const char* path, char* list, size_t size); +static int s3fs_removexattr(const char* path, const char* name); + +//------------------------------------------------------------------- +// Classes +//------------------------------------------------------------------- +// +// A flag class indicating whether the mount point has a stat +// +// [NOTE] +// The flag is accessed from child threads, so This class is used for exclusive control of flags. +// This class will be reviewed when we organize the code in the future. +// +class MpStatFlag +{ + private: + std::atomic has_mp_stat; + + public: + MpStatFlag() = default; + MpStatFlag(const MpStatFlag&) = delete; + MpStatFlag(MpStatFlag&&) = delete; + ~MpStatFlag() = default; + MpStatFlag& operator=(const MpStatFlag&) = delete; + MpStatFlag& operator=(MpStatFlag&&) = delete; + + bool Get(); + bool Set(bool flag); +}; + +bool MpStatFlag::Get() +{ + return has_mp_stat; +} + +bool MpStatFlag::Set(bool flag) +{ + return has_mp_stat.exchange(flag); +} + +// whether the stat information file for mount point exists +static MpStatFlag* pHasMpStat = nullptr; + +// +// A synchronous class that calls the fuse_fill_dir_t function that processes the readdir data +// +class SyncFiller +{ + private: + mutable pthread_mutex_t filler_lock; + bool is_lock_init = false; + void* filler_buff; + fuse_fill_dir_t filler_func; + std::set filled; + + public: + explicit SyncFiller(void* buff = nullptr, fuse_fill_dir_t filler = nullptr); + SyncFiller(const SyncFiller&) = delete; + SyncFiller(SyncFiller&&) = delete; + ~SyncFiller(); + SyncFiller& operator=(const SyncFiller&) = delete; + SyncFiller& operator=(SyncFiller&&) = delete; + + int Fill(const char *name, const struct stat *stbuf, off_t off); + int SufficiencyFill(const std::vector& pathlist); +}; + +SyncFiller::SyncFiller(void* buff, fuse_fill_dir_t filler) : filler_buff(buff), filler_func(filler) +{ + if(!filler_buff || !filler_func){ + S3FS_PRN_CRIT("Internal error: SyncFiller constructor parameter is critical value."); + abort(); + } + + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + + int result; + if(0 != (result = pthread_mutex_init(&filler_lock, &attr))){ + S3FS_PRN_CRIT("failed to init filler_lock: %d", result); + abort(); + } + is_lock_init = true; +} + +SyncFiller::~SyncFiller() +{ + if(is_lock_init){ + int result; + if(0 != (result = pthread_mutex_destroy(&filler_lock))){ + S3FS_PRN_CRIT("failed to destroy filler_lock: %d", result); + abort(); + } + is_lock_init = false; + } +} + +// +// See. prototype fuse_fill_dir_t in fuse.h +// +int SyncFiller::Fill(const char *name, const struct stat *stbuf, off_t off) +{ + AutoLock auto_lock(&filler_lock); + + int result = 0; + if(filled.insert(name).second){ + result = filler_func(filler_buff, name, stbuf, off); + } + return result; +} + +int SyncFiller::SufficiencyFill(const std::vector& pathlist) +{ + AutoLock auto_lock(&filler_lock); + + int result = 0; + for(std::vector::const_iterator it = pathlist.begin(); it != pathlist.end(); ++it) { + if(filled.insert(*it).second){ + if(0 != filler_func(filler_buff, it->c_str(), nullptr, 0)){ + result = 1; + } + } + } + return result; +} + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +static bool IS_REPLACEDIR(dirtype type) +{ + return dirtype::OLD == type || dirtype::FOLDER == type || dirtype::NOOBJ == type; +} + +static bool IS_RMTYPEDIR(dirtype type) +{ + return dirtype::OLD == type || dirtype::FOLDER == type; +} + +static bool IS_CREATE_MP_STAT(const char* path) +{ + // [NOTE] + // pHasMpStat->Get() is set in get_object_attribute() + // + return (path && 0 == strcmp(path, "/") && !pHasMpStat->Get()); +} + +static bool is_special_name_folder_object(const char* path) +{ + if(!support_compat_dir){ + // s3fs does not support compatibility directory type("_$folder$" etc) now, + // thus always returns false. + return false; + } + + if(!path || '\0' == path[0]){ + return false; + } + if(0 == strcmp(path, "/") && mount_prefix.empty()){ + // the path is the mount point which is the bucket root + return false; + } + + std::string strpath = path; + headers_t header; + + if(std::string::npos == strpath.find("_$folder$", 0)){ + if('/' == *strpath.rbegin()){ + strpath.erase(strpath.length() - 1); + } + strpath += "_$folder$"; + } + S3fsCurl s3fscurl; + if(0 != s3fscurl.HeadRequest(strpath.c_str(), header)){ + return false; + } + header.clear(); + S3FS_MALLOCTRIM(0); + return true; +} + +// [Detail] +// This function is complicated for checking directory object type. +// Arguments is used for deleting cache/path, and remake directory object. +// Please see the codes which calls this function. +// +// path: target path +// newpath: should be object path for making/putting/getting after checking +// nowpath: now object name for deleting after checking +// nowcache: now cache path for deleting after checking +// pmeta: headers map +// pDirType: directory object type +// +static int chk_dir_object_type(const char* path, std::string& newpath, std::string& nowpath, std::string& nowcache, headers_t* pmeta, dirtype* pDirType) +{ + dirtype TypeTmp = dirtype::UNKNOWN; + int result = -1; + bool isforce = false; + dirtype* pType = pDirType ? pDirType : &TypeTmp; + + // Normalize new path. + newpath = path; + if('/' != *newpath.rbegin()){ + std::string::size_type Pos; + if(std::string::npos != (Pos = newpath.find("_$folder$", 0))){ + newpath.erase(Pos); + } + newpath += "/"; + } + + // Always check "dir/" at first. + if(0 == (result = get_object_attribute(newpath.c_str(), nullptr, pmeta, false, &isforce))){ + // Found "dir/" cache --> Check for "_$folder$", "no dir object" + nowcache = newpath; + if(is_special_name_folder_object(newpath.c_str())){ // check support_compat_dir in this function + // "_$folder$" type. + (*pType) = dirtype::FOLDER; + nowpath.erase(newpath.length() - 1); + nowpath += "_$folder$"; // cut and add + }else if(isforce){ + // "no dir object" type. + (*pType) = dirtype::NOOBJ; + nowpath = ""; + }else{ + nowpath = newpath; + if(!nowpath.empty() && '/' == *nowpath.rbegin()){ + // "dir/" type + (*pType) = dirtype::NEW; + }else{ + // "dir" type + (*pType) = dirtype::OLD; + } + } + }else if(support_compat_dir){ + // Check "dir" when support_compat_dir is enabled + nowpath.erase(newpath.length() - 1); + if(0 == (result = get_object_attribute(nowpath.c_str(), nullptr, pmeta, false, &isforce))){ + // Found "dir" cache --> this case is only "dir" type. + // Because, if object is "_$folder$" or "no dir object", the cache is "dir/" type. + // (But "no dir object" is checked here.) + nowcache = nowpath; + if(isforce){ + (*pType) = dirtype::NOOBJ; + nowpath = ""; + }else{ + (*pType) = dirtype::OLD; + } + }else{ + // Not found cache --> check for "_$folder$" and "no dir object". + // (come here is that support_compat_dir is enabled) + nowcache = ""; // This case is no cache. + nowpath += "_$folder$"; + if(is_special_name_folder_object(nowpath.c_str())){ + // "_$folder$" type. + (*pType) = dirtype::FOLDER; + result = 0; // result is OK. + }else if(-ENOTEMPTY == directory_empty(newpath.c_str())){ + // "no dir object" type. + (*pType) = dirtype::NOOBJ; + nowpath = ""; // now path. + result = 0; // result is OK. + }else{ + // Error: Unknown type. + (*pType) = dirtype::UNKNOWN; + newpath = ""; + nowpath = ""; + } + } + } + return result; +} + +static int remove_old_type_dir(const std::string& path, dirtype type) +{ + if(IS_RMTYPEDIR(type)){ + S3fsCurl s3fscurl; + int result = s3fscurl.DeleteRequest(path.c_str()); + if(0 != result && -ENOENT != result){ + return result; + } + // succeed removing or not found the directory + }else{ + // nothing to do + } + return 0; +} + +// +// Get object attributes with stat cache. +// This function is base for s3fs_getattr(). +// +// [NOTICE] +// Checking order is changed following list because of reducing the number of the requests. +// 1) "dir" +// 2) "dir/" +// 3) "dir_$folder$" +// +// Special two case of the mount point directory: +// [Case 1] the mount point is the root of the bucket: +// 1) "/" +// +// [Case 2] the mount point is a directory path(ex. foo) below the bucket: +// 1) "foo" +// 2) "foo/" +// 3) "foo_$folder$" +// +static int get_object_attribute(const char* path, struct stat* pstbuf, headers_t* pmeta, bool overcheck, bool* pisforce, bool add_no_truncate_cache) +{ + int result = -1; + struct stat tmpstbuf; + struct stat* pstat = pstbuf ? pstbuf : &tmpstbuf; + headers_t tmpHead; + headers_t* pheader = pmeta ? pmeta : &tmpHead; + std::string strpath; + S3fsCurl s3fscurl; + bool forcedir = false; + bool is_mountpoint = false; // path is the mount point + bool is_bucket_mountpoint = false; // path is the mount point which is the bucket root + std::string::size_type Pos; + + S3FS_PRN_DBG("[path=%s]", path); + + if(!path || '\0' == path[0]){ + return -ENOENT; + } + + memset(pstat, 0, sizeof(struct stat)); + + // check mount point + if(0 == strcmp(path, "/") || 0 == strcmp(path, ".")){ + is_mountpoint = true; + if(mount_prefix.empty()){ + is_bucket_mountpoint = true; + } + // default stat for mount point if the directory stat file is not existed. + pstat->st_mode = mp_mode; + pstat->st_uid = is_s3fs_uid ? s3fs_uid : mp_uid; + pstat->st_gid = is_s3fs_gid ? s3fs_gid : mp_gid; + } + + // Check cache. + pisforce = (nullptr != pisforce ? pisforce : &forcedir); + (*pisforce) = false; + strpath = path; + if(support_compat_dir && overcheck && std::string::npos != (Pos = strpath.find("_$folder$", 0))){ + strpath.erase(Pos); + strpath += "/"; + } + // [NOTE] + // For mount points("/"), the Stat cache key name is "/". + // + if(StatCache::getStatCacheData()->GetStat(strpath, pstat, pheader, overcheck, pisforce)){ + if(is_mountpoint){ + // if mount point, we need to set this. + pstat->st_nlink = 1; // see fuse faq + } + return 0; + } + if(StatCache::getStatCacheData()->IsNoObjectCache(strpath)){ + // there is the path in the cache for no object, it is no object. + return -ENOENT; + } + + // set query(head request) path + if(is_bucket_mountpoint){ + // [NOTE] + // This is a special process for mount point + // The path is "/" for mount points. + // If the bucket mounted at a mount point, we try to find "/" object under + // the bucket for mount point's stat. + // In this case, we will send the request "HEAD // HTTP /1.1" to S3 server. + // + // If the directory under the bucket is mounted, it will be sent + // "HEAD // HTTP/1.1", so we do not need to change path at + // here. + // + strpath = "//"; // strpath is "//" + }else{ + strpath = path; + } + + if(use_newcache && accessor->UseGlobalCache()){ + size_t realSize = 0; + std::map headers; + result = accessor->Head(strpath, realSize, headers); + if(0 == result){ + headers["Content-Length"] = std::to_string(realSize); + for(auto& it : headers) { + pheader->insert(std::make_pair(it.first, it.second)); + } + } + }else{ + result = s3fscurl.HeadRequest(strpath.c_str(), (*pheader)); + s3fscurl.DestroyCurlHandle(); + } + + // if not found target path object, do over checking + if(-EPERM == result){ + // [NOTE] + // In case of a permission error, it exists in directory + // file list but inaccessible. So there is a problem that + // it will send a HEAD request every time, because it is + // not registered in the Stats cache. + // Therefore, even if the file has a permission error, it + // should be registered in the Stats cache. However, if + // the response without modifying is registered in the + // cache, the file permission will be 0644(umask dependent) + // because the meta header does not exist. + // Thus, set the mode of 0000 here in the meta header so + // that s3fs can print a permission error when the file + // is actually accessed. + // It is better not to set meta header other than mode, + // so do not do it. + // + (*pheader)["x-amz-meta-mode"] = "0"; + + }else if(0 != result){ + if(overcheck && !is_bucket_mountpoint){ + // when support_compat_dir is disabled, strpath maybe have "_$folder$". + if('/' != *strpath.rbegin() && std::string::npos == strpath.find("_$folder$", 0)){ + // now path is "object", do check "object/" for over checking + strpath += "/"; + result = s3fscurl.HeadRequest(strpath.c_str(), (*pheader)); + s3fscurl.DestroyCurlHandle(); + } + if(support_compat_dir && 0 != result){ + // now path is "object/", do check "object_$folder$" for over checking + strpath.erase(strpath.length() - 1); + strpath += "_$folder$"; + result = s3fscurl.HeadRequest(strpath.c_str(), (*pheader)); + s3fscurl.DestroyCurlHandle(); + + if(0 != result){ + // cut "_$folder$" for over checking "no dir object" after here + if(std::string::npos != (Pos = strpath.find("_$folder$", 0))){ + strpath.erase(Pos); + } + } + } + } + if(0 != result && std::string::npos == strpath.find("_$folder$", 0)){ + // now path is "object" or "object/", do check "no dir object" which is not object but has only children. + // + // [NOTE] + // If the path is mount point and there is no Stat information file for it, we need this process. + // + if('/' == *strpath.rbegin()){ + strpath.erase(strpath.length() - 1); + } + if(-ENOTEMPTY == directory_empty(strpath.c_str())){ + // found "no dir object". + strpath += "/"; + *pisforce = true; + result = 0; + } + } + }else{ + if('/' != *strpath.rbegin() && std::string::npos == strpath.find("_$folder$", 0) && is_need_check_obj_detail(*pheader)){ + // check a case of that "object" does not have attribute and "object" is possible to be directory. + if(-ENOTEMPTY == directory_empty(strpath.c_str())){ + // found "no dir object". + strpath += "/"; + *pisforce = true; + result = 0; + } + } + } + + // set headers for mount point from default stat + if(is_mountpoint){ + if(0 != result || pheader->empty()){ + pHasMpStat->Set(false); + + // [NOTE] + // If mount point and no stat information file, create header + // information from the default stat. + // + (*pheader)["Content-Type"] = S3fsCurl::LookupMimeType(strpath); + (*pheader)["x-amz-meta-uid"] = std::to_string(pstat->st_uid); + (*pheader)["x-amz-meta-gid"] = std::to_string(pstat->st_gid); + (*pheader)["x-amz-meta-mode"] = std::to_string(pstat->st_mode); + (*pheader)["x-amz-meta-atime"] = std::to_string(pstat->st_atime); + (*pheader)["x-amz-meta-ctime"] = std::to_string(pstat->st_ctime); + (*pheader)["x-amz-meta-mtime"] = std::to_string(pstat->st_mtime); + + result = 0; + }else{ + pHasMpStat->Set(true); + } + } + + // [NOTE] + // If the file is listed but not allowed access, put it in + // the positive cache instead of the negative cache. + // + // When mount points, the following error does not occur. + // + if(0 != result && -EPERM != result){ + // finally, "path" object did not find. Add no object cache. + strpath = path; // reset original + StatCache::getStatCacheData()->AddNoObjectCache(strpath); + return result; + } + + // set cache key + if(is_bucket_mountpoint){ + strpath = "/"; + }else if(std::string::npos != (Pos = strpath.find("_$folder$", 0))){ + // if path has "_$folder$", need to cut it. + strpath.erase(Pos); + strpath += "/"; + } + + // Set into cache + // + // [NOTE] + // When add_no_truncate_cache is true, the stats is always cached. + // This cached stats is only removed by DelStat(). + // This is necessary for the case to access the attribute of opened file. + // (ex. getxattr() is called while writing to the opened file.) + // + if(add_no_truncate_cache || 0 != StatCache::getStatCacheData()->GetCacheSize()){ + // add into stat cache + if(!StatCache::getStatCacheData()->AddStat(strpath, (*pheader), forcedir, add_no_truncate_cache)){ + S3FS_PRN_ERR("failed adding stat cache [path=%s]", strpath.c_str()); + return -ENOENT; + } + if(!StatCache::getStatCacheData()->GetStat(strpath, pstat, pheader, overcheck, pisforce)){ + // There is not in cache.(why?) -> retry to convert. + if(!convert_header_to_stat(strpath.c_str(), (*pheader), pstat, forcedir)){ + S3FS_PRN_ERR("failed convert headers to stat[path=%s]", strpath.c_str()); + return -ENOENT; + } + } + }else{ + // cache size is Zero -> only convert. + if(!convert_header_to_stat(strpath.c_str(), (*pheader), pstat, forcedir)){ + S3FS_PRN_ERR("failed convert headers to stat[path=%s]", strpath.c_str()); + return -ENOENT; + } + } + + if(is_mountpoint){ + // if mount point, we need to set this. + pstat->st_nlink = 1; // see fuse faq + } + + return 0; +} + +// +// Check the object uid and gid for write/read/execute. +// The param "mask" is as same as access() function. +// If there is not a target file, this function returns -ENOENT. +// If the target file can be accessed, the result always is 0. +// +// path: the target object path +// mask: bit field(F_OK, R_OK, W_OK, X_OK) like access(). +// stat: nullptr or the pointer of struct stat. +// +static int check_object_access(const char* path, int mask, struct stat* pstbuf) +{ + int result; + struct stat st; + struct stat* pst = (pstbuf ? pstbuf : &st); + struct fuse_context* pcxt; + + S3FS_PRN_DBG("[path=%s]", path); + + if(nullptr == (pcxt = fuse_get_context())){ + return -EIO; + } + S3FS_PRN_DBG("[pid=%u,uid=%u,gid=%u]", (unsigned int)(pcxt->pid), (unsigned int)(pcxt->uid), (unsigned int)(pcxt->gid)); + + if(0 != (result = get_object_attribute(path, pst))){ + // If there is not the target file(object), result is -ENOENT. + return result; + } + if(0 == pcxt->uid){ + // root is allowed all accessing. + return 0; + } + if(is_s3fs_uid && s3fs_uid == pcxt->uid){ + // "uid" user is allowed all accessing. + return 0; + } + if(F_OK == mask){ + // if there is a file, always return allowed. + return 0; + } + + // for "uid", "gid" option + uid_t obj_uid = (is_s3fs_uid ? s3fs_uid : pst->st_uid); + gid_t obj_gid = (is_s3fs_gid ? s3fs_gid : pst->st_gid); + + // compare file mode and uid/gid + mask. + mode_t mode; + mode_t base_mask = S_IRWXO; + if(is_s3fs_umask){ + // If umask is set, all object attributes set ~umask. + mode = ((S_IRWXU | S_IRWXG | S_IRWXO) & ~s3fs_umask); + }else{ + mode = pst->st_mode; + } + if(pcxt->uid == obj_uid){ + base_mask |= S_IRWXU; + } + if(pcxt->gid == obj_gid){ + base_mask |= S_IRWXG; + } else if(1 == is_uid_include_group(pcxt->uid, obj_gid)){ + base_mask |= S_IRWXG; + } + mode &= base_mask; + + if(X_OK == (mask & X_OK)){ + if(0 == (mode & (S_IXUSR | S_IXGRP | S_IXOTH))){ + return -EACCES; + } + } + if(W_OK == (mask & W_OK)){ + if(0 == (mode & (S_IWUSR | S_IWGRP | S_IWOTH))){ + return -EACCES; + } + } + if(R_OK == (mask & R_OK)){ + if(0 == (mode & (S_IRUSR | S_IRGRP | S_IROTH))){ + return -EACCES; + } + } + if(0 == mode){ + return -EACCES; + } + return 0; +} + +static int check_object_owner(const char* path, struct stat* pstbuf) +{ + int result; + struct stat st; + struct stat* pst = (pstbuf ? pstbuf : &st); + const struct fuse_context* pcxt; + + S3FS_PRN_DBG("[path=%s]", path); + + if(nullptr == (pcxt = fuse_get_context())){ + return -EIO; + } + if(0 != (result = get_object_attribute(path, pst))){ + // If there is not the target file(object), result is -ENOENT. + return result; + } + // check owner + if(0 == pcxt->uid){ + // root is allowed all accessing. + return 0; + } + if(is_s3fs_uid && s3fs_uid == pcxt->uid){ + // "uid" user is allowed all accessing. + return 0; + } + if(pcxt->uid == pst->st_uid){ + return 0; + } + return -EPERM; +} + +// +// Check accessing the parent directories of the object by uid and gid. +// +static int check_parent_object_access(const char* path, int mask) +{ + std::string parent; + int result; + + S3FS_PRN_DBG("[path=%s]", path); + + if(0 == strcmp(path, "/") || 0 == strcmp(path, ".")){ + // path is mount point. + return 0; + } + if(X_OK == (mask & X_OK)){ + for(parent = mydirname(path); !parent.empty(); parent = mydirname(parent)){ + if(parent == "."){ + parent = "/"; + } + if(0 != (result = check_object_access(parent.c_str(), X_OK, nullptr))){ + return result; + } + if(parent == "/" || parent == "."){ + break; + } + } + } + mask = (mask & ~X_OK); + if(0 != mask){ + parent = mydirname(path); + if(parent == "."){ + parent = "/"; + } + if(0 != (result = check_object_access(parent.c_str(), mask, nullptr))){ + return result; + } + } + return 0; +} + +// +// ssevalue is MD5 for SSE-C type, or KMS id for SSE-KMS +// +bool get_object_sse_type(const char* path, sse_type_t& ssetype, std::string& ssevalue) +{ + if(!path){ + return false; + } + + headers_t meta; + if(0 != get_object_attribute(path, nullptr, &meta)){ + S3FS_PRN_ERR("Failed to get object(%s) headers", path); + return false; + } + + ssetype = sse_type_t::SSE_DISABLE; + ssevalue.erase(); + for(headers_t::iterator iter = meta.begin(); iter != meta.end(); ++iter){ + std::string key = (*iter).first; + if(0 == strcasecmp(key.c_str(), "x-amz-server-side-encryption") && 0 == strcasecmp((*iter).second.c_str(), "AES256")){ + ssetype = sse_type_t::SSE_S3; + }else if(0 == strcasecmp(key.c_str(), "x-amz-server-side-encryption-aws-kms-key-id")){ + ssetype = sse_type_t::SSE_KMS; + ssevalue = (*iter).second; + }else if(0 == strcasecmp(key.c_str(), "x-amz-server-side-encryption-customer-key-md5")){ + ssetype = sse_type_t::SSE_C; + ssevalue = (*iter).second; + } + } + return true; +} + +static int get_local_fent(AutoFdEntity& autoent, FdEntity **entity, const char* path, int flags, bool is_load) +{ + int result; + struct stat stobj; + FdEntity* ent; + headers_t meta; + + S3FS_PRN_INFO2("[path=%s]", path); + + if(0 != (result = get_object_attribute(path, &stobj, &meta))){ + return result; + } + + // open + struct timespec st_mctime; + if(!S_ISREG(stobj.st_mode) && !S_ISLNK(stobj.st_mode)){ + st_mctime = S3FS_OMIT_TS; + }else{ + set_stat_to_timespec(stobj, stat_time_type::MTIME, st_mctime); + } + bool force_tmpfile = S_ISREG(stobj.st_mode) ? false : true; + + if(nullptr == (ent = autoent.Open(path, &meta, stobj.st_size, st_mctime, flags, force_tmpfile, true, false, AutoLock::NONE))){ + S3FS_PRN_ERR("Could not open file. errno(%d)", errno); + return -EIO; + } + // load + if(is_load && !ent->LoadAll(autoent.GetPseudoFd(), &meta)){ + S3FS_PRN_ERR("Could not load file. errno(%d)", errno); + autoent.Close(); + return -EIO; + } + *entity = ent; + return 0; +} + +// +// create or update s3 meta +// @return fuse return code +// +int put_headers(const char* path, headers_t& meta, bool is_copy, bool use_st_size) +{ + int result; + S3fsCurl s3fscurl(true); + off_t size; + std::string strpath; + + S3FS_PRN_INFO2("[path=%s]", path); + + if(0 == strcmp(path, "/") && mount_prefix.empty()){ + strpath = "//"; // for the mount point that is bucket root, change "/" to "//". + }else{ + strpath = path; + } + + // files larger than 5GB must be modified via the multipart interface + // call use_st_size as false when the file does not exist(ex. rename object) + if(use_st_size && '/' != *strpath.rbegin()){ // directory object("dir/") is always 0(Content-Length = 0) + struct stat buf; + if(0 != (result = get_object_attribute(path, &buf))){ + return result; + } + size = buf.st_size; + }else{ + size = get_size(meta); + } + + if(!nocopyapi && !nomultipart && size >= multipart_threshold){ + if(0 != (result = s3fscurl.MultipartHeadRequest(strpath.c_str(), size, meta, is_copy))){ + return result; + } + }else{ + if(0 != (result = s3fscurl.PutHeadRequest(strpath.c_str(), meta, is_copy))){ + return result; + } + } + return 0; +} + +static int s3fs_getattr(const char* _path, struct stat* stbuf) +{ + WTF8_ENCODE(path) + int result; + +#if defined(__APPLE__) + FUSE_CTX_DBG("[path=%s]", path); +#else + FUSE_CTX_INFO("[path=%s]", path); +#endif + + // check parent directory attribute. + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_access(path, F_OK, stbuf))){ + return result; + } + // If has already opened fd, the st_size should be instead. + // (See: Issue 241) + if(stbuf){ + AutoFdEntity autoent; + const FdEntity* ent; + if(nullptr != (ent = autoent.OpenExistFdEntity(path))){ + struct stat tmpstbuf; + if(ent->GetStats(tmpstbuf)){ + stbuf->st_size = tmpstbuf.st_size; + } + } + if(0 == strcmp(path, "/")){ + stbuf->st_size = 4096; + } + stbuf->st_blksize = 4096; + stbuf->st_blocks = get_blocks(stbuf->st_size); + + S3FS_PRN_DBG("[path=%s] uid=%u, gid=%u, mode=%04o", path, (unsigned int)(stbuf->st_uid), (unsigned int)(stbuf->st_gid), stbuf->st_mode); + } + S3FS_MALLOCTRIM(0); + + return result; +} + +static int s3fs_readlink(const char* _path, char* buf, size_t size) +{ + if(!_path || !buf || 0 == size){ + return 0; + } + WTF8_ENCODE(path) + std::string strValue; + FUSE_CTX_INFO("[path=%s]", path); + + // check symbolic link cache + if(!StatCache::getStatCacheData()->GetSymlink(path, strValue)){ + // not found in cache, then open the path + { // scope for AutoFdEntity + AutoFdEntity autoent; + FdEntity* ent; + int result; + if(0 != (result = get_local_fent(autoent, &ent, path, O_RDONLY))){ + S3FS_PRN_ERR("could not get fent(file=%s)", path); + return result; + } + // Get size + off_t readsize; + if(!ent->GetSize(readsize)){ + S3FS_PRN_ERR("could not get file size(file=%s)", path); + return -EIO; + } + if(static_cast(size) <= readsize){ + readsize = size - 1; + } + // Read + ssize_t ressize; + if(0 > (ressize = ent->Read(autoent.GetPseudoFd(), buf, 0, readsize))){ + S3FS_PRN_ERR("could not read file(file=%s, ressize=%zd)", path, ressize); + return static_cast(ressize); + } + buf[ressize] = '\0'; + } + + // check buf if it has space words. + strValue = trim(buf); + + // decode wtf8. This will always be shorter + if(use_wtf8){ + strValue = s3fs_wtf8_decode(strValue); + } + + // add symbolic link cache + if(!StatCache::getStatCacheData()->AddSymlink(path, strValue)){ + S3FS_PRN_ERR("failed to add symbolic link cache for %s", path); + } + } + // copy result + strncpy(buf, strValue.c_str(), size - 1); + buf[size - 1] = '\0'; + + S3FS_MALLOCTRIM(0); + + return 0; +} + +// common function for creation of a plain object +static int create_file_object(const char* path, mode_t mode, uid_t uid, gid_t gid) +{ + S3FS_PRN_INFO2("[path=%s][mode=%04o]", path, mode); + + std::string strnow = s3fs_str_realtime(); + headers_t meta; + meta["Content-Type"] = S3fsCurl::LookupMimeType(path); + meta["x-amz-meta-uid"] = std::to_string(uid); + meta["x-amz-meta-gid"] = std::to_string(gid); + meta["x-amz-meta-mode"] = std::to_string(mode); + meta["x-amz-meta-atime"] = strnow; + meta["x-amz-meta-ctime"] = strnow; + meta["x-amz-meta-mtime"] = strnow; + + S3fsCurl s3fscurl(true); + return s3fscurl.PutRequest(path, meta, -1); // fd=-1 means for creating zero byte object. +} + +static int s3fs_mknod(const char *_path, mode_t mode, dev_t rdev) +{ + WTF8_ENCODE(path) + int result; + struct fuse_context* pcxt; + + FUSE_CTX_INFO("[path=%s][mode=%04o][dev=%llu]", path, mode, (unsigned long long)rdev); + + if(nullptr == (pcxt = fuse_get_context())){ + return -EIO; + } + + if(0 != (result = create_file_object(path, mode, pcxt->uid, pcxt->gid))){ + S3FS_PRN_ERR("could not create object for special file(result=%d)", result); + return result; + } + StatCache::getStatCacheData()->DelStat(path); + + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(path))){ + S3FS_PRN_ERR("succeed to mknod the file(%s), but could not update timestamp of its parent directory(result=%d).", path, update_result); + } + + S3FS_MALLOCTRIM(0); + + return result; +} + +static int s3fs_create(const char* _path, mode_t mode, struct fuse_file_info* fi) +{ + WTF8_ENCODE(path) + int result; + const struct fuse_context* pcxt; + + FUSE_CTX_INFO("[path=%s][mode=%04o][flags=0x%x]", path, mode, fi->flags); + + if(nullptr == (pcxt = fuse_get_context())){ + return -EIO; + } + + // check parent directory attribute. + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + result = check_object_access(path, W_OK, nullptr); + if(-ENOENT == result){ + if(0 != (result = check_parent_object_access(path, W_OK))){ + return result; + } + }else if(0 != result){ + return result; + } + + std::string strnow = s3fs_str_realtime(); + headers_t meta; + meta["Content-Length"] = "0"; + meta["x-amz-meta-uid"] = std::to_string(pcxt->uid); + meta["x-amz-meta-gid"] = std::to_string(pcxt->gid); + meta["x-amz-meta-mode"] = std::to_string(mode); + meta["x-amz-meta-atime"] = strnow; + meta["x-amz-meta-mtime"] = strnow; + meta["x-amz-meta-ctime"] = strnow; + + std::string xattrvalue; + if(build_inherited_xattr_value(path, xattrvalue)){ + S3FS_PRN_DBG("Set xattrs = %s", urlDecode(xattrvalue).c_str()); + meta["x-amz-meta-xattr"] = xattrvalue; + } + + // [NOTE] set no_truncate flag + // At this point, the file has not been created(uploaded) and + // the data is only present in the Stats cache. + // The Stats cache should not be deleted automatically by + // timeout. If this stats is deleted, s3fs will try to get it + // from the server with a Head request and will get an + // unexpected error because the result object does not exist. + // + if(!StatCache::getStatCacheData()->AddStat(path, meta, false, true)){ + return -EIO; + } + + AutoFdEntity autoent; + FdEntity* ent; + int error = 0; + if(nullptr == (ent = autoent.Open(path, &meta, 0, S3FS_OMIT_TS, fi->flags, false, true, false, AutoLock::NONE, &error))){ + StatCache::getStatCacheData()->DelStat(path); + return error; + } + ent->MarkDirtyNewFile(); + fi->fh = autoent.Detach(); // KEEP fdentity open; + + S3FS_MALLOCTRIM(0); + + return 0; +} + +static int create_directory_object(const char* path, mode_t mode, const struct timespec& ts_atime, const struct timespec& ts_mtime, const struct timespec& ts_ctime, uid_t uid, gid_t gid, const char* pxattrvalue) +{ + S3FS_PRN_INFO1("[path=%s][mode=%04o][atime=%s][mtime=%s][ctime=%s][uid=%u][gid=%u]", path, mode, str(ts_atime).c_str(), str(ts_mtime).c_str(), str(ts_ctime).c_str(), (unsigned int)uid, (unsigned int)gid); + + if(!path || '\0' == path[0]){ + return -EINVAL; + } + std::string tpath = path; + if('/' != *tpath.rbegin()){ + tpath += "/"; + }else if("/" == tpath && mount_prefix.empty()){ + tpath = "//"; // for the mount point that is bucket root, change "/" to "//". + } + + headers_t meta; + meta["x-amz-meta-uid"] = std::to_string(uid); + meta["x-amz-meta-gid"] = std::to_string(gid); + meta["x-amz-meta-mode"] = std::to_string(mode); + meta["x-amz-meta-atime"] = str(ts_atime); + meta["x-amz-meta-mtime"] = str(ts_mtime); + meta["x-amz-meta-ctime"] = str(ts_ctime); + + if(pxattrvalue){ + S3FS_PRN_DBG("Set xattrs = %s", urlDecode(pxattrvalue).c_str()); + meta["x-amz-meta-xattr"] = pxattrvalue; + } + + S3fsCurl s3fscurl; + return s3fscurl.PutRequest(tpath.c_str(), meta, -1); // fd=-1 means for creating zero byte object. +} + +static int s3fs_mkdir(const char* _path, mode_t mode) +{ + WTF8_ENCODE(path) + int result; + struct fuse_context* pcxt; + + FUSE_CTX_INFO("[path=%s][mode=%04o]", path, mode); + + if(nullptr == (pcxt = fuse_get_context())){ + return -EIO; + } + + // check parent directory attribute. + if(0 != (result = check_parent_object_access(path, W_OK | X_OK))){ + return result; + } + if(-ENOENT != (result = check_object_access(path, F_OK, nullptr))){ + if(0 == result){ + result = -EEXIST; + } + return result; + } + + std::string xattrvalue; + const char* pxattrvalue; + if(get_parent_meta_xattr_value(path, xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + + struct timespec now; + s3fs_realtime(now); + result = create_directory_object(path, mode, now, now, now, pcxt->uid, pcxt->gid, pxattrvalue); + + StatCache::getStatCacheData()->DelStat(path); + + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(path))){ + S3FS_PRN_ERR("succeed to create the directory(%s), but could not update timestamp of its parent directory(result=%d).", path, update_result); + } + + S3FS_MALLOCTRIM(0); + + return result; +} + +static int s3fs_unlink(const char* _path) +{ + WTF8_ENCODE(path) + int result; + + FUSE_CTX_INFO("[path=%s]", path); + + if(0 != (result = check_parent_object_access(path, W_OK | X_OK))){ + return result; + } + + if(use_newcache){ + result = accessor->Delete(path); + }else{ + S3fsCurl s3fscurl; + result = s3fscurl.DeleteRequest(path); + FdManager::DeleteCacheFile(path); + } + + StatCache::getStatCacheData()->DelStat(path); + StatCache::getStatCacheData()->DelSymlink(path); + + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(path))){ + S3FS_PRN_ERR("succeed to remove the file(%s), but could not update timestamp of its parent directory(result=%d).", path, update_result); + } + + S3FS_MALLOCTRIM(0); + + return result; +} + +static int directory_empty(const char* path) +{ + int result; + S3ObjList head; + + if((result = list_bucket(path, head, "/", true)) != 0){ + S3FS_PRN_ERR("list_bucket returns error."); + return result; + } + if(!head.IsEmpty()){ + return -ENOTEMPTY; + } + return 0; +} + +static int s3fs_rmdir(const char* _path) +{ + WTF8_ENCODE(path) + int result; + std::string strpath; + struct stat stbuf; + + FUSE_CTX_INFO("[path=%s]", path); + + if(0 != (result = check_parent_object_access(path, W_OK | X_OK))){ + return result; + } + + // directory must be empty + if(directory_empty(path) != 0){ + return -ENOTEMPTY; + } + + strpath = path; + if('/' != *strpath.rbegin()){ + strpath += "/"; + } + S3fsCurl s3fscurl; + result = s3fscurl.DeleteRequest(strpath.c_str()); + s3fscurl.DestroyCurlHandle(); + StatCache::getStatCacheData()->DelStat(strpath.c_str()); + + // double check for old version(before 1.63) + // The old version makes "dir" object, newer version makes "dir/". + // A case, there is only "dir", the first removing object is "dir/". + // Then "dir/" is not exists, but curl_delete returns 0. + // So need to check "dir" and should be removed it. + if('/' == *strpath.rbegin()){ + strpath.erase(strpath.length() - 1); + } + if(0 == get_object_attribute(strpath.c_str(), &stbuf, nullptr, false)){ + if(S_ISDIR(stbuf.st_mode)){ + // Found "dir" object. + result = s3fscurl.DeleteRequest(strpath.c_str()); + s3fscurl.DestroyCurlHandle(); + StatCache::getStatCacheData()->DelStat(strpath.c_str()); + } + } + // If there is no "dir" and "dir/" object(this case is made by s3cmd/s3sync), + // the cache key is "dir/". So we get error only once(delete "dir/"). + + // check for "_$folder$" object. + // This processing is necessary for other S3 clients compatibility. + if(is_special_name_folder_object(strpath.c_str())){ + strpath += "_$folder$"; + result = s3fscurl.DeleteRequest(strpath.c_str()); + } + + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(path))){ + S3FS_PRN_ERR("succeed to remove the directory(%s), but could not update timestamp of its parent directory(result=%d).", path, update_result); + } + + S3FS_MALLOCTRIM(0); + + return result; +} + +static int s3fs_symlink(const char* _from, const char* _to) +{ + WTF8_ENCODE(from) + WTF8_ENCODE(to) + int result; + const struct fuse_context* pcxt; + + FUSE_CTX_INFO("[from=%s][to=%s]", from, to); + + if(nullptr == (pcxt = fuse_get_context())){ + return -EIO; + } + if(0 != (result = check_parent_object_access(to, W_OK | X_OK))){ + return result; + } + if(-ENOENT != (result = check_object_access(to, F_OK, nullptr))){ + if(0 == result){ + result = -EEXIST; + } + return result; + } + + std::string strnow = s3fs_str_realtime(); + headers_t headers; + headers["Content-Type"] = "application/octet-stream"; // Static + headers["x-amz-meta-mode"] = std::to_string(S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO); + headers["x-amz-meta-atime"] = strnow; + headers["x-amz-meta-ctime"] = strnow; + headers["x-amz-meta-mtime"] = strnow; + headers["x-amz-meta-uid"] = std::to_string(pcxt->uid); + headers["x-amz-meta-gid"] = std::to_string(pcxt->gid); + + // [NOTE] + // Symbolic links do not set xattrs. + + // open tmpfile + std::string strFrom; + { // scope for AutoFdEntity + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr == (ent = autoent.Open(to, &headers, 0, S3FS_OMIT_TS, O_RDWR, true, true, false, AutoLock::NONE))){ + S3FS_PRN_ERR("could not open tmpfile(errno=%d)", errno); + return -errno; + } + // write(without space words) + strFrom = trim(from); + ssize_t from_size = static_cast(strFrom.length()); + ssize_t ressize; + if(from_size != (ressize = ent->Write(autoent.GetPseudoFd(), strFrom.c_str(), 0, from_size, true))){ + if(ressize < 0){ + S3FS_PRN_ERR("could not write tmpfile(errno=%d)", static_cast(ressize)); + return static_cast(ressize); + }else{ + S3FS_PRN_ERR("could not write tmpfile %zd byte(errno=%d)", ressize, errno); + return (0 == errno ? -EIO : -errno); + } + } + // upload + if(0 != (result = ent->Flush(autoent.GetPseudoFd(), AutoLock::NONE, true, true))){ + S3FS_PRN_WARN("could not upload tmpfile(result=%d)", result); + } + } + + StatCache::getStatCacheData()->DelStat(to); + if(!StatCache::getStatCacheData()->AddSymlink(to, strFrom)){ + S3FS_PRN_ERR("failed to add symbolic link cache for %s", to); + } + + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(to))){ + S3FS_PRN_ERR("succeed to create symbolic link(%s), but could not update timestamp of its parent directory(result=%d).", to, update_result); + } + + S3FS_MALLOCTRIM(0); + + return result; +} + +static int rename_object(const char* from, const char* to, bool update_ctime) +{ + int result; + headers_t meta; + struct stat buf; + + S3FS_PRN_INFO1("[from=%s][to=%s]", from , to); + + if(0 != (result = check_parent_object_access(to, W_OK | X_OK))){ + // not permit writing "to" object parent dir. + return result; + } + if(0 != (result = check_parent_object_access(from, W_OK | X_OK))){ + // not permit removing "from" object parent dir. + return result; + } + if(0 != (result = get_object_attribute(from, &buf, &meta))){ + return result; + } + + std::string strSourcePath = (mount_prefix.empty() && 0 == strcmp("/", from)) ? "//" : from; + + if(update_ctime){ + meta["x-amz-meta-ctime"] = s3fs_str_realtime(); + } + meta["x-amz-copy-source"] = urlEncodePath(service_path + S3fsCred::GetBucket() + get_realpath(strSourcePath.c_str())); + meta["Content-Type"] = S3fsCurl::LookupMimeType(to); + meta["x-amz-metadata-directive"] = "REPLACE"; + + std::string xattrvalue; + if(get_meta_xattr_value(from, xattrvalue)){ + S3FS_PRN_DBG("Set xattrs = %s", urlDecode(xattrvalue).c_str()); + meta["x-amz-meta-xattr"] = xattrvalue; + } + + // [NOTE] + // If it has a cache, open it first and leave it open until rename. + // The cache is renamed after put_header, because it must be open + // at the time of renaming. + { + // update time + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr == (ent = autoent.OpenExistFdEntity(from))){ + // no opened fd + + // get mtime/ctime/atime from meta + struct timespec mtime = get_mtime(meta); + struct timespec ctime = get_ctime(meta); + struct timespec atime = get_atime(meta); + if(mtime.tv_sec < 0){ + mtime.tv_sec = 0L; + mtime.tv_nsec = 0L; + } + if(ctime.tv_sec < 0){ + ctime.tv_sec = 0L; + ctime.tv_nsec = 0L; + } + if(atime.tv_sec < 0){ + atime.tv_sec = 0L; + atime.tv_nsec = 0L; + } + + if(FdManager::IsCacheDir()){ + // create cache file if be needed + // + // [NOTE] + // Do not specify "S3FS_OMIT_TS" for mctime parameter. + // This is because if the cache file does not exist, the pagelist for it + // will be recreated, but the entire file area indicated by this pagelist + // will be in the "modified" state. + // This does not affect the rename process, but the cache information in + // the "modified" state remains, making it impossible to read the file correctly. + // + ent = autoent.Open(from, &meta, buf.st_size, mtime, O_RDONLY, false, true, false, AutoLock::NONE); + } + if(ent){ + ent->SetMCtime(mtime, ctime); + ent->SetAtime(atime); + } + } + + // copy + if(0 != (result = put_headers(to, meta, true, /* use_st_size= */ false))){ + return result; + } + + // rename + FdManager::get()->Rename(from, to); + } + + // Remove file + result = s3fs_unlink(from); + + StatCache::getStatCacheData()->DelStat(to); + + return result; +} + +static int rename_object_nocopy(const char* from, const char* to, bool update_ctime) +{ + int result; + + FUSE_CTX_INFO1("[from=%s][to=%s]", from , to); + + if(0 != (result = check_parent_object_access(to, W_OK | X_OK))){ + // not permit writing "to" object parent dir. + return result; + } + if(0 != (result = check_parent_object_access(from, W_OK | X_OK))){ + // not permit removing "from" object parent dir. + return result; + } + + // open & load + { // scope for AutoFdEntity + AutoFdEntity autoent; + FdEntity* ent; + if(0 != (result = get_local_fent(autoent, &ent, from, O_RDWR, true))){ + S3FS_PRN_ERR("could not open and read file(%s)", from); + return result; + } + + // Set header + if(!ent->SetContentType(to)){ + S3FS_PRN_ERR("could not set content-type for %s", to); + return -EIO; + } + + // update ctime + if(update_ctime){ + struct timespec ts; + s3fs_realtime(ts); + ent->SetCtime(ts); + } + + // upload + if(0 != (result = ent->RowFlush(autoent.GetPseudoFd(), to, AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", to, result); + return result; + } + FdManager::get()->Rename(from, to); + } + + // Remove file + result = s3fs_unlink(from); + + // Stats + StatCache::getStatCacheData()->DelStat(to); + + return result; +} + +static int rename_large_object(const char* from, const char* to) +{ + int result; + struct stat buf; + headers_t meta; + + S3FS_PRN_INFO1("[from=%s][to=%s]", from , to); + + if(0 != (result = check_parent_object_access(to, W_OK | X_OK))){ + // not permit writing "to" object parent dir. + return result; + } + if(0 != (result = check_parent_object_access(from, W_OK | X_OK))){ + // not permit removing "from" object parent dir. + return result; + } + if(0 != (result = get_object_attribute(from, &buf, &meta, false))){ + return result; + } + + S3fsCurl s3fscurl(true); + if(0 != (result = s3fscurl.MultipartRenameRequest(from, to, meta, buf.st_size))){ + return result; + } + s3fscurl.DestroyCurlHandle(); + + // Rename cache file + FdManager::get()->Rename(from, to); + + // Remove file + result = s3fs_unlink(from); + + // Stats + StatCache::getStatCacheData()->DelStat(to); + + return result; +} + +static int clone_directory_object(const char* from, const char* to, bool update_ctime, const char* pxattrvalue) +{ + int result = -1; + struct stat stbuf; + + S3FS_PRN_INFO1("[from=%s][to=%s]", from, to); + + // get target's attributes + if(0 != (result = get_object_attribute(from, &stbuf))){ + return result; + } + + struct timespec ts_atime; + struct timespec ts_mtime; + struct timespec ts_ctime; + set_stat_to_timespec(stbuf, stat_time_type::ATIME, ts_atime); + set_stat_to_timespec(stbuf, stat_time_type::MTIME, ts_mtime); + if(update_ctime){ + s3fs_realtime(ts_ctime); + }else{ + set_stat_to_timespec(stbuf, stat_time_type::CTIME, ts_ctime); + } + result = create_directory_object(to, stbuf.st_mode, ts_atime, ts_mtime, ts_ctime, stbuf.st_uid, stbuf.st_gid, pxattrvalue); + + StatCache::getStatCacheData()->DelStat(to); + + return result; +} + +static int rename_directory(const char* from, const char* to) +{ + S3ObjList head; + s3obj_list_t headlist; + std::string strfrom = from ? from : ""; // from is without "/". + std::string strto = to ? to : ""; // to is without "/" too. + std::string basepath = strfrom + "/"; + std::string newpath; // should be from name(not used) + std::string nowcache; // now cache path(not used) + dirtype DirType; + bool normdir; + std::vector mvnodes; + struct stat stbuf; + int result; + bool is_dir; + + S3FS_PRN_INFO1("[from=%s][to=%s]", from, to); + + // + // Initiate and Add base directory into mvnode struct. + // + strto += "/"; + if(0 == chk_dir_object_type(from, newpath, strfrom, nowcache, nullptr, &DirType) && dirtype::UNKNOWN != DirType){ + if(dirtype::NOOBJ != DirType){ + normdir = false; + }else{ + normdir = true; + strfrom = from; // from directory is not removed, but from directory attr is needed. + } + mvnodes.emplace_back(strfrom, strto, true, normdir); + }else{ + // Something wrong about "from" directory. + } + + // + // get a list of all the objects + // + // No delimiter is specified, the result(head) is all object keys. + // (CommonPrefixes is empty, but all object is listed in Key.) + if(0 != (result = list_bucket(basepath.c_str(), head, nullptr))){ + S3FS_PRN_ERR("list_bucket returns error."); + return result; + } + head.GetNameList(headlist); // get name without "/". + StatCache::getStatCacheData()->GetNotruncateCache(basepath, headlist); // Add notruncate file name from stat cache + S3ObjList::MakeHierarchizedList(headlist, false); // add hierarchized dir. + + s3obj_list_t::const_iterator liter; + for(liter = headlist.begin(); headlist.end() != liter; ++liter){ + // make "from" and "to" object name. + std::string from_name = basepath + (*liter); + std::string to_name = strto + (*liter); + std::string etag = head.GetETag((*liter).c_str()); + + // Check subdirectory. + StatCache::getStatCacheData()->HasStat(from_name, etag.c_str()); // Check ETag + if(0 != get_object_attribute(from_name.c_str(), &stbuf, nullptr)){ + S3FS_PRN_WARN("failed to get %s object attribute.", from_name.c_str()); + continue; + } + if(S_ISDIR(stbuf.st_mode)){ + is_dir = true; + if(0 != chk_dir_object_type(from_name.c_str(), newpath, from_name, nowcache, nullptr, &DirType) || dirtype::UNKNOWN == DirType){ + S3FS_PRN_WARN("failed to get %s%s object directory type.", basepath.c_str(), (*liter).c_str()); + continue; + } + if(dirtype::NOOBJ != DirType){ + normdir = false; + }else{ + normdir = true; + from_name = basepath + (*liter); // from directory is not removed, but from directory attr is needed. + } + }else{ + is_dir = false; + normdir = false; + } + + // push this one onto the stack + mvnodes.emplace_back(from_name, to_name, is_dir, normdir); + } + + std::sort(mvnodes.begin(), mvnodes.end(), [](const mvnode& a, const mvnode& b) { return a.old_path < b.old_path; }); + + // + // rename + // + // rename directory objects. + for(auto mn_cur = mvnodes.cbegin(); mn_cur != mvnodes.cend(); ++mn_cur){ + if(mn_cur->is_dir && !mn_cur->old_path.empty()){ + std::string xattrvalue; + const char* pxattrvalue; + if(get_meta_xattr_value(mn_cur->old_path.c_str(), xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + + // [NOTE] + // The ctime is updated only for the top (from) directory. + // Other than that, it will not be updated. + // + if(0 != (result = clone_directory_object(mn_cur->old_path.c_str(), mn_cur->new_path.c_str(), (strfrom == mn_cur->old_path), pxattrvalue))){ + S3FS_PRN_ERR("clone_directory_object returned an error(%d)", result); + return result; + } + } + } + + // iterate over the list - copy the files with rename_object + // does a safe copy - copies first and then deletes old + for(auto mn_cur = mvnodes.begin(); mn_cur != mvnodes.end(); ++mn_cur){ + if(!mn_cur->is_dir){ + if(!nocopyapi && !norenameapi){ + result = rename_object(mn_cur->old_path.c_str(), mn_cur->new_path.c_str(), false); // keep ctime + }else{ + result = rename_object_nocopy(mn_cur->old_path.c_str(), mn_cur->new_path.c_str(), false); // keep ctime + } + if(0 != result){ + S3FS_PRN_ERR("rename_object returned an error(%d)", result); + return result; + } + } + } + + // Iterate over old the directories, bottoms up and remove + for(auto mn_cur = mvnodes.rbegin(); mn_cur != mvnodes.rend(); ++mn_cur){ + if(mn_cur->is_dir && !mn_cur->old_path.empty()){ + if(!(mn_cur->is_normdir)){ + if(0 != (result = s3fs_rmdir(mn_cur->old_path.c_str()))){ + S3FS_PRN_ERR("s3fs_rmdir returned an error(%d)", result); + return result; + } + }else{ + // cache clear. + StatCache::getStatCacheData()->DelStat(mn_cur->old_path); + } + } + } + + return 0; +} + +static int s3fs_rename(const char* _from, const char* _to) +{ + WTF8_ENCODE(from) + WTF8_ENCODE(to) + struct stat buf; + int result; + + FUSE_CTX_INFO("[from=%s][to=%s]", from, to); + + if(0 != (result = check_parent_object_access(to, W_OK | X_OK))){ + // not permit writing "to" object parent dir. + return result; + } + if(0 != (result = check_parent_object_access(from, W_OK | X_OK))){ + // not permit removing "from" object parent dir. + return result; + } + if(0 != (result = get_object_attribute(from, &buf, nullptr))){ + return result; + } + if(0 != (result = directory_empty(to))){ + return result; + } + + // flush pending writes if file is open + { // scope for AutoFdEntity + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr != (ent = autoent.OpenExistFdEntity(from, O_RDWR))){ + if(0 != (result = ent->Flush(autoent.GetPseudoFd(), AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", to, result); + return result; + } + StatCache::getStatCacheData()->DelStat(from); + } + } + + // files larger than 5GB must be modified via the multipart interface + if(S_ISDIR(buf.st_mode)){ + result = rename_directory(from, to); + }else if(!nomultipart && buf.st_size >= singlepart_copy_limit){ + result = rename_large_object(from, to); + }else{ + if(!nocopyapi && !norenameapi){ + result = rename_object(from, to, true); // update ctime + }else{ + result = rename_object_nocopy(from, to, true); // update ctime + } + } + + // update parent directory timestamp + // + // [NOTE] + // already updated timestamp for original path in above functions. + // + int update_result; + if(0 != (update_result = update_mctime_parent_directory(to))){ + S3FS_PRN_ERR("succeed to create the file/directory(%s), but could not update timestamp of its parent directory(result=%d).", to, update_result); + } + + S3FS_MALLOCTRIM(0); + + return result; +} + +static int s3fs_link(const char* _from, const char* _to) +{ + WTF8_ENCODE(from) + WTF8_ENCODE(to) + FUSE_CTX_INFO("[from=%s][to=%s]", from, to); + return -ENOTSUP; +} + +static int s3fs_chmod(const char* _path, mode_t mode) +{ + WTF8_ENCODE(path) + int result; + std::string strpath; + std::string newpath; + std::string nowcache; + headers_t meta; + struct stat stbuf; + dirtype nDirType = dirtype::UNKNOWN; + + FUSE_CTX_INFO("[path=%s][mode=%04o]", path, mode); + + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_owner(path, &stbuf))){ + return result; + } + + if(S_ISDIR(stbuf.st_mode)){ + result = chk_dir_object_type(path, newpath, strpath, nowcache, &meta, &nDirType); + }else{ + strpath = path; + nowcache = strpath; + result = get_object_attribute(strpath.c_str(), nullptr, &meta); + } + if(0 != result){ + return result; + } + + if(S_ISDIR(stbuf.st_mode) && (IS_REPLACEDIR(nDirType) || IS_CREATE_MP_STAT(path))){ + std::string xattrvalue; + const char* pxattrvalue; + if(get_meta_xattr_value(path, xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + if(IS_REPLACEDIR(nDirType)){ + // Should rebuild directory object(except new type) + // Need to remove old dir("dir" etc) and make new dir("dir/") + + // At first, remove directory old object + if(0 != (result = remove_old_type_dir(strpath, nDirType))){ + return result; + } + } + StatCache::getStatCacheData()->DelStat(nowcache); + + // Make new directory object("dir/") + struct timespec ts_atime; + struct timespec ts_mtime; + struct timespec ts_ctime; + set_stat_to_timespec(stbuf, stat_time_type::ATIME, ts_atime); + set_stat_to_timespec(stbuf, stat_time_type::MTIME, ts_mtime); + s3fs_realtime(ts_ctime); + + if(0 != (result = create_directory_object(newpath.c_str(), mode, ts_atime, ts_mtime, ts_ctime, stbuf.st_uid, stbuf.st_gid, pxattrvalue))){ + return result; + } + }else{ + // normal object or directory object of newer version + std::string strSourcePath = (mount_prefix.empty() && "/" == strpath) ? "//" : strpath; + headers_t updatemeta; + updatemeta["x-amz-meta-ctime"] = s3fs_str_realtime(); + updatemeta["x-amz-meta-mode"] = std::to_string(mode); + updatemeta["x-amz-copy-source"] = urlEncodePath(service_path + S3fsCred::GetBucket() + get_realpath(strSourcePath.c_str())); + updatemeta["x-amz-metadata-directive"] = "REPLACE"; + + // check opened file handle. + // + // If the file starts uploading by multipart when the disk capacity is insufficient, + // we need to put these header after finishing upload. + // Or if the file is only open, we must update to FdEntity's internal meta. + // + AutoFdEntity autoent; + FdEntity* ent; + bool need_put_header = true; + if(nullptr != (ent = autoent.OpenExistFdEntity(path))){ + if(ent->MergeOrgMeta(updatemeta)){ + // meta is changed, but now uploading. + // then the meta is pending and accumulated to be put after the upload is complete. + S3FS_PRN_INFO("meta pending until upload is complete"); + need_put_header = false; + + // If there is data in the Stats cache, update the Stats cache. + StatCache::getStatCacheData()->UpdateMetaStats(strpath, updatemeta); + + // [NOTE] + // There are cases where this function is called during the process of + // creating a new file (before uploading). + // In this case, a temporary cache exists in the Stat cache. + // So we need to update the cache, if it exists. (see. s3fs_create and s3fs_utimens) + // + if(!StatCache::getStatCacheData()->AddStat(strpath, updatemeta, false, true)){ + return -EIO; + } + } + } + if(need_put_header){ + // not found opened file. + merge_headers(meta, updatemeta, true); + + // upload meta directly. + if(0 != (result = put_headers(strpath.c_str(), meta, true))){ + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + } + } + S3FS_MALLOCTRIM(0); + + return 0; +} + +static int s3fs_chmod_nocopy(const char* _path, mode_t mode) +{ + WTF8_ENCODE(path) + int result; + std::string strpath; + std::string newpath; + std::string nowcache; + struct stat stbuf; + dirtype nDirType = dirtype::UNKNOWN; + + FUSE_CTX_INFO1("[path=%s][mode=%04o]", path, mode); + + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_owner(path, &stbuf))){ + return result; + } + + // Get attributes + if(S_ISDIR(stbuf.st_mode)){ + result = chk_dir_object_type(path, newpath, strpath, nowcache, nullptr, &nDirType); + }else{ + strpath = path; + nowcache = strpath; + result = get_object_attribute(strpath.c_str(), nullptr, nullptr); + } + if(0 != result){ + return result; + } + + if(S_ISDIR(stbuf.st_mode)){ + std::string xattrvalue; + const char* pxattrvalue; + if(get_meta_xattr_value(path, xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + + if(IS_REPLACEDIR(nDirType)){ + // Should rebuild all directory object + // Need to remove old dir("dir" etc) and make new dir("dir/") + + // At first, remove directory old object + if(0 != (result = remove_old_type_dir(strpath, nDirType))){ + return result; + } + } + StatCache::getStatCacheData()->DelStat(nowcache); + + // Make new directory object("dir/") + struct timespec ts_atime; + struct timespec ts_mtime; + struct timespec ts_ctime; + set_stat_to_timespec(stbuf, stat_time_type::ATIME, ts_atime); + set_stat_to_timespec(stbuf, stat_time_type::MTIME, ts_mtime); + s3fs_realtime(ts_ctime); + + if(0 != (result = create_directory_object(newpath.c_str(), mode, ts_atime, ts_mtime, ts_ctime, stbuf.st_uid, stbuf.st_gid, pxattrvalue))){ + return result; + } + }else{ + // normal object or directory object of newer version + + // open & load + AutoFdEntity autoent; + FdEntity* ent; + if(0 != (result = get_local_fent(autoent, &ent, strpath.c_str(), O_RDWR, true))){ + S3FS_PRN_ERR("could not open and read file(%s)", strpath.c_str()); + return result; + } + + struct timespec ts; + s3fs_realtime(ts); + ent->SetCtime(ts); + + // Change file mode + ent->SetMode(mode); + + // upload + if(0 != (result = ent->Flush(autoent.GetPseudoFd(), AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", strpath.c_str(), result); + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + } + S3FS_MALLOCTRIM(0); + + return result; +} + +static int s3fs_chown(const char* _path, uid_t uid, gid_t gid) +{ + WTF8_ENCODE(path) + int result; + std::string strpath; + std::string newpath; + std::string nowcache; + headers_t meta; + struct stat stbuf; + dirtype nDirType = dirtype::UNKNOWN; + + FUSE_CTX_INFO("[path=%s][uid=%u][gid=%u]", path, (unsigned int)uid, (unsigned int)gid); + + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_owner(path, &stbuf))){ + return result; + } + + if((uid_t)(-1) == uid){ + uid = stbuf.st_uid; + } + if((gid_t)(-1) == gid){ + gid = stbuf.st_gid; + } + if(S_ISDIR(stbuf.st_mode)){ + result = chk_dir_object_type(path, newpath, strpath, nowcache, &meta, &nDirType); + }else{ + strpath = path; + nowcache = strpath; + result = get_object_attribute(strpath.c_str(), nullptr, &meta); + } + if(0 != result){ + return result; + } + + if(S_ISDIR(stbuf.st_mode) && (IS_REPLACEDIR(nDirType) || IS_CREATE_MP_STAT(path))){ + std::string xattrvalue; + const char* pxattrvalue; + if(get_meta_xattr_value(path, xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + + if(IS_REPLACEDIR(nDirType)){ + // Should rebuild directory object(except new type) + // Need to remove old dir("dir" etc) and make new dir("dir/") + + // At first, remove directory old object + if(0 != (result = remove_old_type_dir(strpath, nDirType))){ + return result; + } + } + StatCache::getStatCacheData()->DelStat(nowcache); + + // Make new directory object("dir/") + struct timespec ts_atime; + struct timespec ts_mtime; + struct timespec ts_ctime; + set_stat_to_timespec(stbuf, stat_time_type::ATIME, ts_atime); + set_stat_to_timespec(stbuf, stat_time_type::MTIME, ts_mtime); + s3fs_realtime(ts_ctime); + + if(0 != (result = create_directory_object(newpath.c_str(), stbuf.st_mode, ts_atime, ts_mtime, ts_ctime, uid, gid, pxattrvalue))){ + return result; + } + }else{ + std::string strSourcePath = (mount_prefix.empty() && "/" == strpath) ? "//" : strpath; + headers_t updatemeta; + updatemeta["x-amz-meta-ctime"] = s3fs_str_realtime(); + updatemeta["x-amz-meta-uid"] = std::to_string(uid); + updatemeta["x-amz-meta-gid"] = std::to_string(gid); + updatemeta["x-amz-copy-source"] = urlEncodePath(service_path + S3fsCred::GetBucket() + get_realpath(strSourcePath.c_str())); + updatemeta["x-amz-metadata-directive"] = "REPLACE"; + + // check opened file handle. + // + // If the file starts uploading by multipart when the disk capacity is insufficient, + // we need to put these header after finishing upload. + // Or if the file is only open, we must update to FdEntity's internal meta. + // + AutoFdEntity autoent; + FdEntity* ent; + bool need_put_header = true; + if(nullptr != (ent = autoent.OpenExistFdEntity(path))){ + if(ent->MergeOrgMeta(updatemeta)){ + // meta is changed, but now uploading. + // then the meta is pending and accumulated to be put after the upload is complete. + S3FS_PRN_INFO("meta pending until upload is complete"); + need_put_header = false; + + // If there is data in the Stats cache, update the Stats cache. + StatCache::getStatCacheData()->UpdateMetaStats(strpath, updatemeta); + + // [NOTE] + // There are cases where this function is called during the process of + // creating a new file (before uploading). + // In this case, a temporary cache exists in the Stat cache. + // So we need to update the cache, if it exists. (see. s3fs_create and s3fs_utimens) + // + if(!StatCache::getStatCacheData()->AddStat(strpath, updatemeta, false, true)){ + return -EIO; + } + } + } + if(need_put_header){ + // not found opened file. + merge_headers(meta, updatemeta, true); + + // upload meta directly. + if(0 != (result = put_headers(strpath.c_str(), meta, true))){ + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + } + } + S3FS_MALLOCTRIM(0); + + return 0; +} + +static int s3fs_chown_nocopy(const char* _path, uid_t uid, gid_t gid) +{ + WTF8_ENCODE(path) + int result; + std::string strpath; + std::string newpath; + std::string nowcache; + struct stat stbuf; + dirtype nDirType = dirtype::UNKNOWN; + + FUSE_CTX_INFO1("[path=%s][uid=%u][gid=%u]", path, (unsigned int)uid, (unsigned int)gid); + + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_owner(path, &stbuf))){ + return result; + } + + if((uid_t)(-1) == uid){ + uid = stbuf.st_uid; + } + if((gid_t)(-1) == gid){ + gid = stbuf.st_gid; + } + + // Get attributes + if(S_ISDIR(stbuf.st_mode)){ + result = chk_dir_object_type(path, newpath, strpath, nowcache, nullptr, &nDirType); + }else{ + strpath = path; + nowcache = strpath; + result = get_object_attribute(strpath.c_str(), nullptr, nullptr); + } + if(0 != result){ + return result; + } + + if(S_ISDIR(stbuf.st_mode)){ + std::string xattrvalue; + const char* pxattrvalue; + if(get_meta_xattr_value(path, xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + + if(IS_REPLACEDIR(nDirType)){ + // Should rebuild all directory object + // Need to remove old dir("dir" etc) and make new dir("dir/") + + // At first, remove directory old object + if(0 != (result = remove_old_type_dir(strpath, nDirType))){ + return result; + } + } + StatCache::getStatCacheData()->DelStat(nowcache); + + // Make new directory object("dir/") + struct timespec ts_atime; + struct timespec ts_mtime; + struct timespec ts_ctime; + set_stat_to_timespec(stbuf, stat_time_type::ATIME, ts_atime); + set_stat_to_timespec(stbuf, stat_time_type::MTIME, ts_mtime); + s3fs_realtime(ts_ctime); + + if(0 != (result = create_directory_object(newpath.c_str(), stbuf.st_mode, ts_atime, ts_mtime, ts_ctime, uid, gid, pxattrvalue))){ + return result; + } + }else{ + // normal object or directory object of newer version + + // open & load + AutoFdEntity autoent; + FdEntity* ent; + if(0 != (result = get_local_fent(autoent, &ent, strpath.c_str(), O_RDWR, true))){ + S3FS_PRN_ERR("could not open and read file(%s)", strpath.c_str()); + return result; + } + + struct timespec ts; + s3fs_realtime(ts); + ent->SetCtime(ts); + + // Change owner + ent->SetUId(uid); + ent->SetGId(gid); + + // upload + if(0 != (result = ent->Flush(autoent.GetPseudoFd(), AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", strpath.c_str(), result); + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + } + S3FS_MALLOCTRIM(0); + + return result; +} + +static timespec handle_utimens_special_values(timespec ts, timespec now, timespec orig) +{ + if(ts.tv_nsec == UTIME_NOW){ + return now; + }else if(ts.tv_nsec == UTIME_OMIT){ + return orig; + }else{ + return ts; + } +} + +static int update_mctime_parent_directory(const char* _path) +{ + if(!update_parent_dir_stat){ + // Disable updating parent directory stat. + S3FS_PRN_DBG("Updating parent directory stats is disabled"); + return 0; + } + + WTF8_ENCODE(path) + int result; + std::string parentpath; // parent directory path + std::string nowpath; // now directory object path("dir" or "dir/" or "xxx_$folder$", etc) + std::string newpath; // directory path for the current version("dir/") + std::string nowcache; + headers_t meta; + struct stat stbuf; + struct timespec mctime; + struct timespec atime; + dirtype nDirType = dirtype::UNKNOWN; + + S3FS_PRN_INFO2("[path=%s]", path); + + // get parent directory path + parentpath = mydirname(path); + + // check & get directory type + if(0 != (result = chk_dir_object_type(parentpath.c_str(), newpath, nowpath, nowcache, &meta, &nDirType))){ + return result; + } + + // get directory stat + // + // [NOTE] + // It is assumed that this function is called after the operation on + // the file is completed, so there is no need to check the permissions + // on the parent directory. + // + if(0 != (result = get_object_attribute(parentpath.c_str(), &stbuf))){ + // If there is not the target file(object), result is -ENOENT. + return result; + } + if(!S_ISDIR(stbuf.st_mode)){ + S3FS_PRN_ERR("path(%s) is not parent directory.", parentpath.c_str()); + return -EIO; + } + + // make atime/mtime/ctime for updating + s3fs_realtime(mctime); + set_stat_to_timespec(stbuf, stat_time_type::ATIME, atime); + + if(0 == atime.tv_sec && 0 == atime.tv_nsec){ + atime = mctime; + } + + if(nocopyapi || IS_REPLACEDIR(nDirType) || IS_CREATE_MP_STAT(parentpath.c_str())){ + // Should rebuild directory object(except new type) + // Need to remove old dir("dir" etc) and make new dir("dir/") + std::string xattrvalue; + const char* pxattrvalue; + if(get_meta_xattr_value(path, xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + + // At first, remove directory old object + if(!nowpath.empty()){ + if(0 != (result = remove_old_type_dir(nowpath, nDirType))){ + return result; + } + } + if(!nowcache.empty()){ + StatCache::getStatCacheData()->DelStat(nowcache); + } + + // Make new directory object("dir/") + if(0 != (result = create_directory_object(newpath.c_str(), stbuf.st_mode, atime, mctime, mctime, stbuf.st_uid, stbuf.st_gid, pxattrvalue))){ + return result; + } + }else{ + std::string strSourcePath = (mount_prefix.empty() && "/" == nowpath) ? "//" : nowpath; + headers_t updatemeta; + updatemeta["x-amz-meta-mtime"] = str(mctime); + updatemeta["x-amz-meta-ctime"] = str(mctime); + updatemeta["x-amz-meta-atime"] = str(atime); + updatemeta["x-amz-copy-source"] = urlEncodePath(service_path + S3fsCred::GetBucket() + get_realpath(strSourcePath.c_str())); + updatemeta["x-amz-metadata-directive"] = "REPLACE"; + + merge_headers(meta, updatemeta, true); + + // upload meta for parent directory. + if(0 != (result = put_headers(nowpath.c_str(), meta, true))){ + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + } + S3FS_MALLOCTRIM(0); + + return 0; +} + +static int s3fs_utimens(const char* _path, const struct timespec ts[2]) +{ + WTF8_ENCODE(path) + int result; + std::string strpath; + std::string newpath; + std::string nowcache; + headers_t meta; + struct stat stbuf; + dirtype nDirType = dirtype::UNKNOWN; + + FUSE_CTX_INFO("[path=%s][mtime=%s][ctime/atime=%s]", path, str(ts[1]).c_str(), str(ts[0]).c_str()); + + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_access(path, W_OK, &stbuf))){ + if(0 != check_object_owner(path, &stbuf)){ + return result; + } + } + + struct timespec now; + struct timespec ts_atime; + struct timespec ts_ctime; + struct timespec ts_mtime; + + s3fs_realtime(now); + set_stat_to_timespec(stbuf, stat_time_type::ATIME, ts_atime); + set_stat_to_timespec(stbuf, stat_time_type::CTIME, ts_ctime); + set_stat_to_timespec(stbuf, stat_time_type::MTIME, ts_mtime); + + struct timespec atime = handle_utimens_special_values(ts[0], now, ts_atime); + struct timespec ctime = handle_utimens_special_values(ts[0], now, ts_ctime); + struct timespec mtime = handle_utimens_special_values(ts[1], now, ts_mtime); + + if(S_ISDIR(stbuf.st_mode)){ + result = chk_dir_object_type(path, newpath, strpath, nowcache, &meta, &nDirType); + }else{ + strpath = path; + nowcache = strpath; + result = get_object_attribute(strpath.c_str(), nullptr, &meta); + } + if(0 != result){ + return result; + } + + if(S_ISDIR(stbuf.st_mode) && (IS_REPLACEDIR(nDirType) || IS_CREATE_MP_STAT(path))){ + std::string xattrvalue; + const char* pxattrvalue; + if(get_meta_xattr_value(path, xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + + if(IS_REPLACEDIR(nDirType)){ + // Should rebuild directory object(except new type) + // Need to remove old dir("dir" etc) and make new dir("dir/") + + // At first, remove directory old object + if(0 != (result = remove_old_type_dir(strpath, nDirType))){ + return result; + } + } + StatCache::getStatCacheData()->DelStat(nowcache); + + // Make new directory object("dir/") + if(0 != (result = create_directory_object(newpath.c_str(), stbuf.st_mode, atime, mtime, ctime, stbuf.st_uid, stbuf.st_gid, pxattrvalue))){ + return result; + } + }else{ + std::string strSourcePath = (mount_prefix.empty() && "/" == strpath) ? "//" : strpath; + headers_t updatemeta; + updatemeta["x-amz-meta-mtime"] = str(mtime); + updatemeta["x-amz-meta-ctime"] = str(ctime); + updatemeta["x-amz-meta-atime"] = str(atime); + updatemeta["x-amz-copy-source"] = urlEncodePath(service_path + S3fsCred::GetBucket() + get_realpath(strSourcePath.c_str())); + updatemeta["x-amz-metadata-directive"] = "REPLACE"; + + // check opened file handle. + // + // If the file starts uploading by multipart when the disk capacity is insufficient, + // we need to put these header after finishing upload. + // Or if the file is only open, we must update to FdEntity's internal meta. + // + AutoFdEntity autoent; + FdEntity* ent; + bool need_put_header = true; + bool keep_mtime = false; + if(nullptr != (ent = autoent.OpenExistFdEntity(path))){ + if(ent->MergeOrgMeta(updatemeta)){ + // meta is changed, but now uploading. + // then the meta is pending and accumulated to be put after the upload is complete. + S3FS_PRN_INFO("meta pending until upload is complete"); + need_put_header = false; + ent->SetHoldingMtime(mtime); + + // If there is data in the Stats cache, update the Stats cache. + StatCache::getStatCacheData()->UpdateMetaStats(strpath, updatemeta); + + // [NOTE] + // There are cases where this function is called during the process of + // creating a new file (before uploading). + // In this case, a temporary cache exists in the Stat cache.(see s3fs_create) + // So we need to update the cache, if it exists. + // + // Previously, the process of creating a new file was to update the + // file content after first uploading the file, but now the file is + // not created until flushing. + // So we need to create a temporary Stat cache for it. + // + if(!StatCache::getStatCacheData()->AddStat(strpath, updatemeta, false, true)){ + return -EIO; + } + + }else{ + S3FS_PRN_INFO("meta is not pending, but need to keep current mtime."); + + // [NOTE] + // Depending on the order in which write/flush and utimens are called, + // the mtime updated here may be overwritten at the time of flush. + // To avoid that, set a special flag. + // + keep_mtime = true; + } + } + if(need_put_header){ + // not found opened file. + merge_headers(meta, updatemeta, true); + + // upload meta directly. + if(0 != (result = put_headers(strpath.c_str(), meta, true))){ + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + + if(keep_mtime){ + ent->SetHoldingMtime(mtime); + } + } + } + S3FS_MALLOCTRIM(0); + + return 0; +} + +static int s3fs_utimens_nocopy(const char* _path, const struct timespec ts[2]) +{ + WTF8_ENCODE(path) + int result; + std::string strpath; + std::string newpath; + std::string nowcache; + struct stat stbuf; + dirtype nDirType = dirtype::UNKNOWN; + + FUSE_CTX_INFO1("[path=%s][mtime=%s][atime/ctime=%s]", path, str(ts[1]).c_str(), str(ts[0]).c_str()); + + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_access(path, W_OK, &stbuf))){ + if(0 != check_object_owner(path, &stbuf)){ + return result; + } + } + + struct timespec now; + struct timespec ts_atime; + struct timespec ts_ctime; + struct timespec ts_mtime; + + s3fs_realtime(now); + set_stat_to_timespec(stbuf, stat_time_type::ATIME, ts_atime); + set_stat_to_timespec(stbuf, stat_time_type::CTIME, ts_ctime); + set_stat_to_timespec(stbuf, stat_time_type::MTIME, ts_mtime); + + struct timespec atime = handle_utimens_special_values(ts[0], now, ts_atime); + struct timespec ctime = handle_utimens_special_values(ts[0], now, ts_ctime); + struct timespec mtime = handle_utimens_special_values(ts[1], now, ts_mtime); + + // Get attributes + if(S_ISDIR(stbuf.st_mode)){ + result = chk_dir_object_type(path, newpath, strpath, nowcache, nullptr, &nDirType); + }else{ + strpath = path; + nowcache = strpath; + result = get_object_attribute(strpath.c_str(), nullptr, nullptr); + } + if(0 != result){ + return result; + } + + if(S_ISDIR(stbuf.st_mode)){ + std::string xattrvalue; + const char* pxattrvalue; + if(get_meta_xattr_value(path, xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + + if(IS_REPLACEDIR(nDirType)){ + // Should rebuild all directory object + // Need to remove old dir("dir" etc) and make new dir("dir/") + + // At first, remove directory old object + if(0 != (result = remove_old_type_dir(strpath, nDirType))){ + return result; + } + } + StatCache::getStatCacheData()->DelStat(nowcache); + + // Make new directory object("dir/") + if(0 != (result = create_directory_object(newpath.c_str(), stbuf.st_mode, atime, mtime, ctime, stbuf.st_uid, stbuf.st_gid, pxattrvalue))){ + return result; + } + }else{ + // normal object or directory object of newer version + + // open & load + AutoFdEntity autoent; + FdEntity* ent; + if(0 != (result = get_local_fent(autoent, &ent, strpath.c_str(), O_RDWR, true))){ + S3FS_PRN_ERR("could not open and read file(%s)", strpath.c_str()); + return result; + } + + // set mtime/ctime + if(0 != (result = ent->SetMCtime(mtime, ctime))){ + S3FS_PRN_ERR("could not set mtime and ctime to file(%s): result=%d", strpath.c_str(), result); + return result; + } + + // set atime + if(0 != (result = ent->SetAtime(atime))){ + S3FS_PRN_ERR("could not set atime to file(%s): result=%d", strpath.c_str(), result); + return result; + } + + // upload + if(0 != (result = ent->Flush(autoent.GetPseudoFd(), AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", strpath.c_str(), result); + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + } + S3FS_MALLOCTRIM(0); + + return result; +} + +static int s3fs_truncate(const char* _path, off_t size) +{ + WTF8_ENCODE(path) + int result; + headers_t meta; + AutoFdEntity autoent; + FdEntity* ent = nullptr; + + FUSE_CTX_INFO("[path=%s][size=%lld]", path, static_cast(size)); + + if(size < 0){ + size = 0; + } + + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_access(path, W_OK, nullptr))){ + return result; + } + + // Get file information + if(0 == (result = get_object_attribute(path, nullptr, &meta))){ + // File exists + + // [NOTE] + // If the file exists, the file has already been opened by FUSE before + // truncate is called. Then the call below will change the file size. + // (When an already open file is changed the file size, FUSE will not + // reopen it.) + // The Flush is called before this file is closed, so there is no need + // to do it here. + // + // [NOTICE] + // FdManager::Open() ignores changes that reduce the file size for the + // file you are editing. However, if user opens only onece, edit it, + // and then shrink the file, it should be done. + // When this function is called, the file is already open by FUSE or + // some other operation. Therefore, if the number of open files is 1, + // no edits other than that fd will be made, and the files will be + // shrunk using ignore_modify flag even while editing. + // See the comments when merging this code for FUSE2 limitations. + // (In FUSE3, it will be possible to handle it reliably using fuse_file_info.) + // + bool ignore_modify; + if(1 < FdManager::GetOpenFdCount(path)){ + ignore_modify = false; + }else{ + ignore_modify = true; + } + + if(nullptr == (ent = autoent.Open(path, &meta, size, S3FS_OMIT_TS, O_RDWR, false, true, ignore_modify, AutoLock::NONE))){ + S3FS_PRN_ERR("could not open file(%s): errno=%d", path, errno); + return -EIO; + } + if(use_newcache){ + int res = accessor->Truncate(path, size); + if(res) return res; + } + ent->UpdateCtime(); + +#if defined(__APPLE__) + // [NOTE] + // Only for macos, this truncate calls to "size=0" do not reflect size. + // The cause is unknown now, but it can be avoided by flushing the file. + // + if(0 == size){ + if(0 != (result = ent->Flush(autoent.GetPseudoFd(), AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", path, result); + return result; + } + StatCache::getStatCacheData()->DelStat(path); + } +#endif + + }else{ + // Not found -> Make tmpfile(with size) + const struct fuse_context* pcxt; + if(nullptr == (pcxt = fuse_get_context())){ + return -EIO; + } + + std::string strnow = s3fs_str_realtime(); + meta["Content-Type"] = "application/octet-stream"; // Static + meta["x-amz-meta-mode"] = std::to_string(S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO); + meta["x-amz-meta-ctime"] = strnow; + meta["x-amz-meta-mtime"] = strnow; + meta["x-amz-meta-uid"] = std::to_string(pcxt->uid); + meta["x-amz-meta-gid"] = std::to_string(pcxt->gid); + + if(nullptr == (ent = autoent.Open(path, &meta, size, S3FS_OMIT_TS, O_RDWR, true, true, false, AutoLock::NONE))){ + S3FS_PRN_ERR("could not open file(%s): errno=%d", path, errno); + return -EIO; + } + if(use_newcache){ + int res = accessor->Truncate(path, size); + if(res) return res; + } + if(0 != (result = ent->Flush(autoent.GetPseudoFd(), AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", path, result); + return result; + } + StatCache::getStatCacheData()->DelStat(path); + } + S3FS_MALLOCTRIM(0); + + return result; +} + +static int s3fs_open(const char* _path, struct fuse_file_info* fi) +{ + WTF8_ENCODE(path) + int result; + struct stat st; + bool needs_flush = false; + + FUSE_CTX_INFO("[path=%s][flags=0x%x]", path, fi->flags); + + if ((fi->flags & O_ACCMODE) == O_RDONLY && fi->flags & O_TRUNC) { + return -EACCES; + } + + // [NOTE] + // Delete the Stats cache only if the file is not open. + // If the file is open, the stats cache will not be deleted as + // there are cases where the object does not exist on the server + // and only the Stats cache exists. + // + if(StatCache::getStatCacheData()->HasStat(path)){ + if(!FdManager::HasOpenEntityFd(path)){ + StatCache::getStatCacheData()->DelStat(path); + } + } + + int mask = (O_RDONLY != (fi->flags & O_ACCMODE) ? W_OK : R_OK); + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + + result = check_object_access(path, mask, &st); + if(-ENOENT == result){ + if(0 != (result = check_parent_object_access(path, W_OK))){ + return result; + } + }else if(0 != result){ + return result; + } + + AutoFdEntity autoent; + FdEntity* ent; + headers_t meta; + + if((unsigned int)fi->flags & O_TRUNC){ + if(0 != st.st_size){ + st.st_size = 0; + needs_flush = true; + } + }else{ + // [NOTE] + // If the file has already been opened and edited, the file size in + // the edited state is given priority. + // This prevents the file size from being reset to its original size + // if you keep the file open, shrink its size, and then read the file + // from another process while it has not yet been flushed. + // + if(nullptr != (ent = autoent.OpenExistFdEntity(path)) && ent->IsModified()){ + // sets the file size being edited. + ent->GetSize(st.st_size); + } + } + if(!S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)){ + st.st_mtime = -1; + } + + if(0 != (result = get_object_attribute(path, nullptr, &meta, true, nullptr, true))){ // no truncate cache + return result; + } + + struct timespec st_mctime; + set_stat_to_timespec(st, stat_time_type::MTIME, st_mctime); + + if(nullptr == (ent = autoent.Open(path, &meta, st.st_size, st_mctime, fi->flags, false, true, false, AutoLock::NONE))){ + StatCache::getStatCacheData()->DelStat(path); + return -EIO; + } + + if (needs_flush){ + struct timespec ts; + s3fs_realtime(ts); + ent->SetMCtime(ts, ts); + + if(0 != (result = ent->RowFlush(autoent.GetPseudoFd(), path, AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", path, result); + StatCache::getStatCacheData()->DelStat(path); + return result; + } + } + fi->fh = autoent.Detach(); // KEEP fdentity open; + + S3FS_MALLOCTRIM(0); + + return 0; +} + +static int s3fs_read(const char* _path, char* buf, size_t size, off_t offset, struct fuse_file_info* fi) +{ + WTF8_ENCODE(path) + ssize_t res; + + S3FS_PRN_WARN("[path=%s][size=%zu][offset=%lld][pseudo_fd=%llu]", path, size, static_cast(offset), (unsigned long long)(fi->fh)); + + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr == (ent = autoent.GetExistFdEntity(path, static_cast(fi->fh)))){ + S3FS_PRN_ERR("could not find opened pseudo_fd(=%llu) for path(%s)", (unsigned long long)(fi->fh), path); + return -EIO; + } + + // check real file size + off_t realsize = 0; + if(!ent->GetSize(realsize) || 0 == realsize){ + S3FS_PRN_DBG("file size is 0, so break to read."); + return 0; + } + + if(0 > (res = ent->Read(static_cast(fi->fh), buf, offset, size, false))){ + S3FS_PRN_WARN("failed to read file(%s). result=%zd", path, res); + } + return static_cast(res); +} + +static int s3fs_write(const char* _path, const char* buf, size_t size, off_t offset, struct fuse_file_info* fi) +{ + WTF8_ENCODE(path) + ssize_t res; + + S3FS_PRN_WARN("[path=%s][size=%zu][offset=%lld][pseudo_fd=%llu]", path, size, static_cast(offset), (unsigned long long)(fi->fh)); + + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr == (ent = autoent.GetExistFdEntity(path, static_cast(fi->fh)))){ + S3FS_PRN_ERR("could not find opened pseudo_fd(%llu) for path(%s)", (unsigned long long)(fi->fh), path); + return -EIO; + } + + if(0 > (res = ent->Write(static_cast(fi->fh), buf, offset, size))){ + S3FS_PRN_WARN("failed to write file(%s). result=%zd", path, res); + } + + if(max_dirty_data != -1 && ent->BytesModified() >= max_dirty_data && !use_newcache){ + int flushres; + if(0 != (flushres = ent->RowFlush(static_cast(fi->fh), path, AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", path, flushres); + StatCache::getStatCacheData()->DelStat(path); + return flushres; + } + // Punch a hole in the file to recover disk space. + if(!ent->PunchHole()){ + S3FS_PRN_WARN("could not punching HOLEs to a cache file, but continue."); + } + } + + return static_cast(res); +} + +static int s3fs_statfs(const char* _path, struct statvfs* stbuf) +{ + // WTF8_ENCODE(path) + stbuf->f_bsize = s3fs_block_size; + stbuf->f_namemax = NAME_MAX; + +#if defined(__MSYS__) + // WinFsp resolves the free space from f_bfree * f_frsize, and the total space from f_blocks * f_frsize (in bytes). + stbuf->f_blocks = bucket_block_count; + stbuf->f_frsize = stbuf->f_bsize; + stbuf->f_bfree = stbuf->f_blocks; +#elif defined(__APPLE__) + stbuf->f_blocks = bucket_block_count; + stbuf->f_frsize = stbuf->f_bsize; + stbuf->f_bfree = stbuf->f_blocks; + stbuf->f_files = UINT32_MAX; + stbuf->f_ffree = UINT32_MAX; + stbuf->f_favail = UINT32_MAX; +#else + stbuf->f_frsize = stbuf->f_bsize; + stbuf->f_blocks = bucket_block_count; + stbuf->f_bfree = stbuf->f_blocks; +#endif + stbuf->f_bavail = stbuf->f_blocks; + + return 0; +} + +static int s3fs_flush(const char* _path, struct fuse_file_info* fi) +{ + WTF8_ENCODE(path) + int result; + + FUSE_CTX_INFO("[path=%s][pseudo_fd=%llu]", path, (unsigned long long)(fi->fh)); + + int mask = (O_RDONLY != (fi->flags & O_ACCMODE) ? W_OK : R_OK); + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + result = check_object_access(path, mask, nullptr); + if(-ENOENT == result){ + if(0 != (result = check_parent_object_access(path, W_OK))){ + return result; + } + }else if(0 != result){ + return result; + } + + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr != (ent = autoent.GetExistFdEntity(path, static_cast(fi->fh)))){ + bool is_new_file = ent->IsDirtyNewFile(); + + ent->UpdateMtime(true); // clear the flag not to update mtime. + ent->UpdateCtime(); + result = ent->Flush(static_cast(fi->fh), AutoLock::NONE, false); + StatCache::getStatCacheData()->DelStat(path); + + if(is_new_file){ + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(path))){ + S3FS_PRN_ERR("succeed to create the file(%s), but could not update timestamp of its parent directory(result=%d).", path, update_result); + } + } + } + S3FS_MALLOCTRIM(0); + + return result; +} + +// [NOTICE] +// Assumption is a valid fd. +// +static int s3fs_fsync(const char* _path, int datasync, struct fuse_file_info* fi) +{ + WTF8_ENCODE(path) + int result = 0; + + FUSE_CTX_INFO("[path=%s][datasync=%d][pseudo_fd=%llu]", path, datasync, (unsigned long long)(fi->fh)); + + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr != (ent = autoent.GetExistFdEntity(path, static_cast(fi->fh)))){ + bool is_new_file = ent->IsDirtyNewFile(); + + if(0 == datasync){ + ent->UpdateMtime(); + ent->UpdateCtime(); + } + result = ent->Flush(static_cast(fi->fh), AutoLock::NONE, false); + + if(0 != datasync){ + // [NOTE] + // The metadata are not updated when fdatasync is called. + // Instead of it, these metadata are pended and set the dirty flag here. + // Setting this flag allows metadata to be updated even if there is no + // content update between the fdatasync call and the flush call. + // + ent->MarkDirtyMetadata(); + } + + if(is_new_file){ + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(path))){ + S3FS_PRN_ERR("succeed to create the file(%s), but could not update timestamp of its parent directory(result=%d).", path, update_result); + } + } + } + S3FS_MALLOCTRIM(0); + + // Issue 320: Delete stat cache entry because st_size may have changed. + StatCache::getStatCacheData()->DelStat(path); + + return result; +} + +static int s3fs_release(const char* _path, struct fuse_file_info* fi) +{ + WTF8_ENCODE(path) + FUSE_CTX_INFO("[path=%s][pseudo_fd=%llu]", path, (unsigned long long)(fi->fh)); + + { // scope for AutoFdEntity + AutoFdEntity autoent; + + // [NOTE] + // The pseudo fd stored in fi->fh is attached to AutoFdEntry so that it can be + // destroyed here. + // + FdEntity* ent; + if(nullptr == (ent = autoent.Attach(path, static_cast(fi->fh)))){ + S3FS_PRN_ERR("could not find pseudo_fd(%llu) for path(%s)", (unsigned long long)(fi->fh), path); + return -EIO; + } + + // [NOTE] + // There are cases when s3fs_flush is not called and s3fs_release is called. + // (There have been reported cases where it is not called when exported as NFS.) + // Therefore, Flush() is called here to try to upload the data. + // Flush() will only perform an upload if the file has been updated. + // + int result; + if(ent->IsModified()){ + if(0 != (result = ent->Flush(static_cast(fi->fh), AutoLock::NONE, false))){ + S3FS_PRN_ERR("failed to upload file contentsfor pseudo_fd(%llu) / path(%s) by result(%d)", (unsigned long long)(fi->fh), path, result); + return result; + } + } + + // [NOTE] + // All opened file's stats is cached with no truncate flag. + // Thus we unset it here. + StatCache::getStatCacheData()->ChangeNoTruncateFlag(path, false); + + // [NOTICE] + // At first, we remove stats cache. + // Because fuse does not wait for response from "release" function. :-( + // And fuse runs next command before this function returns. + // Thus we call deleting stats function ASAP. + // + if((fi->flags & O_RDWR) || (fi->flags & O_WRONLY)){ + StatCache::getStatCacheData()->DelStat(path); + } + + bool is_new_file = ent->IsDirtyNewFile(); + + if(0 != (result = ent->UploadPending(static_cast(fi->fh), AutoLock::NONE))){ + S3FS_PRN_ERR("could not upload pending data(meta, etc) for pseudo_fd(%llu) / path(%s)", (unsigned long long)(fi->fh), path); + return result; + } + + if(is_new_file){ + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(path))){ + S3FS_PRN_ERR("succeed to create the file(%s), but could not update timestamp of its parent directory(result=%d).", path, update_result); + } + } + } + + // check - for debug + if(S3fsLog::IsS3fsLogDbg()){ + if(FdManager::HasOpenEntityFd(path)){ + S3FS_PRN_DBG("file(%s) is still opened(another pseudo fd is opend).", path); + } + } + S3FS_MALLOCTRIM(0); + + return 0; +} + +static int s3fs_opendir(const char* _path, struct fuse_file_info* fi) +{ + WTF8_ENCODE(path) + int result; + int mask = (O_RDONLY != (fi->flags & O_ACCMODE) ? W_OK : R_OK); + + FUSE_CTX_INFO("[path=%s][flags=0x%x]", path, fi->flags); + + if(0 == (result = check_object_access(path, mask, nullptr))){ + result = check_parent_object_access(path, X_OK); + } + S3FS_MALLOCTRIM(0); + + return result; +} + +// cppcheck-suppress unmatchedSuppression +// cppcheck-suppress constParameterCallback +static bool multi_head_callback(S3fsCurl* s3fscurl, void* param) +{ + if(!s3fscurl){ + return false; + } + + // Add stat cache + std::string saved_path = s3fscurl->GetSpecialSavedPath(); + if(!StatCache::getStatCacheData()->AddStat(saved_path, *(s3fscurl->GetResponseHeaders()))){ + S3FS_PRN_ERR("failed adding stat cache [path=%s]", saved_path.c_str()); + return false; + } + + // Get stats from stats cache(for converting from meta), and fill + std::string bpath = mybasename(saved_path); + if(use_wtf8){ + bpath = s3fs_wtf8_decode(bpath); + } + if(param){ + SyncFiller* pcbparam = reinterpret_cast(param); + struct stat st; + if(StatCache::getStatCacheData()->GetStat(saved_path, &st)){ + pcbparam->Fill(bpath.c_str(), &st, 0); + }else{ + S3FS_PRN_INFO2("Could not find %s file in stat cache.", saved_path.c_str()); + pcbparam->Fill(bpath.c_str(), nullptr, 0); + } + }else{ + S3FS_PRN_WARN("param(multi_head_callback_param*) is nullptr, then can not call filler."); + } + + return true; +} + +struct multi_head_notfound_callback_param +{ + pthread_mutex_t list_lock; + s3obj_list_t notfound_list; +}; + +static bool multi_head_notfound_callback(S3fsCurl* s3fscurl, void* param) +{ + if(!s3fscurl){ + return false; + } + S3FS_PRN_INFO("HEAD returned NotFound(404) for %s object, it maybe only the path exists and the object does not exist.", s3fscurl->GetPath().c_str()); + + if(!param){ + S3FS_PRN_WARN("param(multi_head_notfound_callback_param*) is nullptr, then can not call filler."); + return false; + } + + // set path to not found list + struct multi_head_notfound_callback_param* pcbparam = reinterpret_cast(param); + + AutoLock auto_lock(&(pcbparam->list_lock)); + pcbparam->notfound_list.push_back(s3fscurl->GetBasePath()); + + return true; +} + +static std::unique_ptr multi_head_retry_callback(S3fsCurl* s3fscurl) +{ + if(!s3fscurl){ + return nullptr; + } + size_t ssec_key_pos= s3fscurl->GetLastPreHeadSeecKeyPos(); + int retry_count = s3fscurl->GetMultipartRetryCount(); + + // retry next sse key. + // if end of sse key, set retry master count is up. + ssec_key_pos = (ssec_key_pos == static_cast(-1) ? 0 : ssec_key_pos + 1); + if(0 == S3fsCurl::GetSseKeyCount() || S3fsCurl::GetSseKeyCount() <= ssec_key_pos){ + if(s3fscurl->IsOverMultipartRetryCount()){ + S3FS_PRN_ERR("Over retry count(%d) limit(%s).", s3fscurl->GetMultipartRetryCount(), s3fscurl->GetSpecialSavedPath().c_str()); + return nullptr; + } + ssec_key_pos = -1; + retry_count++; + } + + std::unique_ptr newcurl(new S3fsCurl(s3fscurl->IsUseAhbe())); + std::string path = s3fscurl->GetBasePath(); + std::string base_path = s3fscurl->GetBasePath(); + std::string saved_path = s3fscurl->GetSpecialSavedPath(); + + if(!newcurl->PreHeadRequest(path, base_path, saved_path, ssec_key_pos)){ + S3FS_PRN_ERR("Could not duplicate curl object(%s).", saved_path.c_str()); + return nullptr; + } + newcurl->SetMultipartRetryCount(retry_count); + + return newcurl; +} + +static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf, fuse_fill_dir_t filler) +{ + if(use_newcache && accessor->UseGlobalCache()){ + return readdir_multi_head_4_newcache(path, head, buf, filler); + } + + S3fsMultiCurl curlmulti(S3fsCurl::GetMaxMultiRequest(), true); // [NOTE] run all requests to completion even if some requests fail. + s3obj_list_t headlist; + int result = 0; + + S3FS_PRN_INFO1("[path=%s][list=%zu]", path, headlist.size()); + + // Make base path list. + head.GetNameList(headlist, true, false); // get name with "/". + StatCache::getStatCacheData()->GetNotruncateCache(std::string(path), headlist); // Add notruncate file name from stat cache + + // Initialize S3fsMultiCurl + curlmulti.SetSuccessCallback(multi_head_callback); + curlmulti.SetRetryCallback(multi_head_retry_callback); + + // Success Callback function parameter(SyncFiller object) + SyncFiller syncfiller(buf, filler); + curlmulti.SetSuccessCallbackParam(reinterpret_cast(&syncfiller)); + + // Not found Callback function parameter + struct multi_head_notfound_callback_param notfound_param; + if(support_compat_dir){ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + #if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); + #endif + + if(0 != (result = pthread_mutex_init(&(notfound_param.list_lock), &attr))){ + S3FS_PRN_CRIT("failed to init notfound_param.list_lock: %d", result); + abort(); + } + curlmulti.SetNotFoundCallback(multi_head_notfound_callback); + curlmulti.SetNotFoundCallbackParam(reinterpret_cast(¬found_param)); + } + + // Make single head request(with max). + for(s3obj_list_t::iterator iter = headlist.begin(); headlist.end() != iter; ++iter){ + std::string disppath = path + (*iter); + std::string etag = head.GetETag((*iter).c_str()); + struct stat st; + + // [NOTE] + // If there is a cache hit, file stat is filled by filler at here. + // + if(StatCache::getStatCacheData()->HasStat(disppath, &st, etag.c_str())){ + std::string bpath = mybasename(disppath); + if(use_wtf8){ + bpath = s3fs_wtf8_decode(bpath); + } + syncfiller.Fill(bpath.c_str(), &st, 0); + continue; + } + + // First check for directory, start checking "not SSE-C". + // If checking failed, retry to check with "SSE-C" by retry callback func when SSE-C mode. + std::unique_ptr s3fscurl(new S3fsCurl()); + if(!s3fscurl->PreHeadRequest(disppath, disppath, disppath)){ // target path = cache key path.(ex "dir/") + S3FS_PRN_WARN("Could not make curl object for head request(%s).", disppath.c_str()); + continue; + } + + if(!curlmulti.SetS3fsCurlObject(std::move(s3fscurl))){ + S3FS_PRN_WARN("Could not make curl object into multi curl(%s).", disppath.c_str()); + continue; + } + } + headlist.clear(); + + // Multi request + if(0 != (result = curlmulti.Request())){ + // If result is -EIO, it is something error occurred. + // This case includes that the object is encrypting(SSE) and s3fs does not have keys. + // So s3fs set result to 0 in order to continue the process. + if(-EIO == result){ + S3FS_PRN_WARN("error occurred in multi request(errno=%d), but continue...", result); + result = 0; + }else{ + S3FS_PRN_ERR("error occurred in multi request(errno=%d).", result); + return result; + } + } + + // [NOTE] + // Objects that could not be found by HEAD request may exist only + // as a path, so search for objects under that path.(a case of no dir object) + // + if(!support_compat_dir){ + syncfiller.SufficiencyFill(head.common_prefixes); + } + if(support_compat_dir && !notfound_param.notfound_list.empty()){ // [NOTE] not need to lock to access this here. + // dummy header + mode_t dirmask = umask(0); // macos does not have getumask() + umask(dirmask); + + headers_t dummy_header; + dummy_header["Content-Type"] = "application/x-directory"; // directory + dummy_header["x-amz-meta-uid"] = std::to_string(is_s3fs_uid ? s3fs_uid : geteuid()); + dummy_header["x-amz-meta-gid"] = std::to_string(is_s3fs_gid ? s3fs_gid : getegid()); + dummy_header["x-amz-meta-mode"] = std::to_string(S_IFDIR | (~dirmask & (S_IRWXU | S_IRWXG | S_IRWXO))); + dummy_header["x-amz-meta-atime"] = "0"; + dummy_header["x-amz-meta-ctime"] = "0"; + dummy_header["x-amz-meta-mtime"] = "0"; + + for(s3obj_list_t::iterator reiter = notfound_param.notfound_list.begin(); reiter != notfound_param.notfound_list.end(); ++reiter){ + int dir_result; + std::string dirpath = *reiter; + if(-ENOTEMPTY == (dir_result = directory_empty(dirpath.c_str()))){ + // Found objects under the path, so the path is directory. + + // Add stat cache + if(StatCache::getStatCacheData()->AddStat(dirpath, dummy_header, true)){ // set forcedir=true + // Get stats from stats cache(for converting from meta), and fill + std::string base_path = mybasename(dirpath); + if(use_wtf8){ + base_path = s3fs_wtf8_decode(base_path); + } + + struct stat st; + if(StatCache::getStatCacheData()->GetStat(dirpath, &st)){ + syncfiller.Fill(base_path.c_str(), &st, 0); + }else{ + S3FS_PRN_INFO2("Could not find %s directory(no dir object) in stat cache.", dirpath.c_str()); + syncfiller.Fill(base_path.c_str(), nullptr, 0); + } + }else{ + S3FS_PRN_ERR("failed adding stat cache [path=%s], but dontinue...", dirpath.c_str()); + } + }else{ + S3FS_PRN_WARN("%s object does not have any object under it(errno=%d),", reiter->c_str(), dir_result); + } + } + } + + return result; +} + +static int readdir_multi_head_4_newcache(const char* path, const S3ObjList& head, void* buf, fuse_fill_dir_t filler) +{ + s3obj_list_t headlist; + int result = 0; + + S3FS_PRN_INFO1("[path=%s][list=%zu]", path, headlist.size()); + + // Make base path list. + head.GetNameList(headlist, true, false); // get name with "/". + StatCache::getStatCacheData()->GetNotruncateCache(std::string(path), headlist); // Add notruncate file name from stat cache + + SyncFiller syncfiller(buf, filler); + + // Not found Callback function parameter + struct multi_head_notfound_callback_param notfound_param; + if(support_compat_dir){ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + #if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); + #endif + + if(0 != (result = pthread_mutex_init(&(notfound_param.list_lock), &attr))){ + S3FS_PRN_CRIT("failed to init notfound_param.list_lock: %d", result); + abort(); + } + } + + std::vector> fs; + for(s3obj_list_t::iterator iter = headlist.begin(); headlist.end() != iter; ++iter){ + std::string disppath = path + (*iter); + std::string etag = head.GetETag((*iter).c_str()); + struct stat st; + + // [NOTE] + // If there is a cache hit, file stat is filled by filler at here. + // + if(StatCache::getStatCacheData()->HasStat(disppath, &st, etag.c_str())){ + std::string bpath = mybasename(disppath); + if(use_wtf8){ + bpath = s3fs_wtf8_decode(bpath); + } + syncfiller.Fill(bpath.c_str(), &st, 0); + continue; + } + + fs.push_back(folly::via(accessor->GetExecutor(), [disppath, &syncfiller, ¬found_param]() { + size_t realSize = 0; + std::map headers; + headers_t meta; + int res = accessor->Head(get_realpath(disppath.c_str()), realSize, headers); + if(0 == res){ + headers["Content-Length"] = std::to_string(realSize); + for(auto& it : headers) { + meta.insert(std::make_pair(it.first, it.second)); + } + if(!StatCache::getStatCacheData()->AddStat(disppath, meta)){ + S3FS_PRN_ERR("failed adding stat cache [path=%s]", disppath.c_str()); + meta.clear(); + return -EIO; + } + meta.clear(); // ps: it must be released here. + + // Get stats from stats cache(for converting from meta), and fill + std::string bpath = mybasename(disppath); + if(use_wtf8){ + bpath = s3fs_wtf8_decode(bpath); + } + struct stat st; + if(StatCache::getStatCacheData()->GetStat(disppath, &st)){ + syncfiller.Fill(bpath.c_str(), &st, 0); + }else{ + S3FS_PRN_INFO2("Could not find %s file in stat cache.", disppath.c_str()); + syncfiller.Fill(bpath.c_str(), nullptr, 0); + } + }else if(-ENOENT == res) { // notfound + notfound_param.notfound_list.push_back(disppath); + res = 0; + }else{ + S3FS_PRN_ERR("File head failed [path=%s][errno=%d]", disppath.c_str(), res); + } + return res; + })); + } + + if(fs.size()){ + auto f = collectAll(fs.begin(), fs.end()).via(accessor->GetExecutor()).thenValue([]( + std::vector, std::allocator>>&& tups) { + for(const auto& t : tups){ + if (0 != t.value()) return t.value(); + } + return 0; + }); + f.wait(); + result = f.value(); + FUSE_CTX_INFO("multi request [path=%s][list=%zu][result=%d]", path, headlist.size(), result); + + // If result is -EIO, it is something error occurred. + // This case includes that the object is encrypting(SSE) and s3fs does not have keys. + // So s3fs set result to 0 in order to continue the process. + if(-EIO == result){ + S3FS_PRN_WARN("error occurred in multi request, but continue... [path=%s][list=%zu][errno=%d]", path, headlist.size(), result); + result = 0; + }else if(0 != result){ + S3FS_PRN_ERR("error occurred in multi request [path=%s][list=%zu][errno=%d]", path, headlist.size(), result); + return result; + } + } + headlist.clear(); + + // [NOTE] + // Objects that could not be found by HEAD request may exist only + // as a path, so search for objects under that path.(a case of no dir object) + // + if(!support_compat_dir){ + syncfiller.SufficiencyFill(head.common_prefixes); + } + if(support_compat_dir && !notfound_param.notfound_list.empty()){ // [NOTE] not need to lock to access this here. + // dummy header + mode_t dirmask = umask(0); // macos does not have getumask() + umask(dirmask); + + headers_t dummy_header; + dummy_header["Content-Type"] = "application/x-directory"; // directory + dummy_header["x-amz-meta-uid"] = std::to_string(is_s3fs_uid ? s3fs_uid : geteuid()); + dummy_header["x-amz-meta-gid"] = std::to_string(is_s3fs_gid ? s3fs_gid : getegid()); + dummy_header["x-amz-meta-mode"] = std::to_string(S_IFDIR | (~dirmask & (S_IRWXU | S_IRWXG | S_IRWXO))); + dummy_header["x-amz-meta-atime"] = "0"; + dummy_header["x-amz-meta-ctime"] = "0"; + dummy_header["x-amz-meta-mtime"] = "0"; + + for(s3obj_list_t::iterator reiter = notfound_param.notfound_list.begin(); reiter != notfound_param.notfound_list.end(); ++reiter){ + int dir_result; + std::string dirpath = *reiter; + if(-ENOTEMPTY == (dir_result = directory_empty(dirpath.c_str()))){ + // Found objects under the path, so the path is directory. + + // Add stat cache + if(StatCache::getStatCacheData()->AddStat(dirpath, dummy_header, true)){ // set forcedir=true + // Get stats from stats cache(for converting from meta), and fill + std::string base_path = mybasename(dirpath); + if(use_wtf8){ + base_path = s3fs_wtf8_decode(base_path); + } + + struct stat st; + if(StatCache::getStatCacheData()->GetStat(dirpath, &st)){ + syncfiller.Fill(base_path.c_str(), &st, 0); + }else{ + S3FS_PRN_INFO2("Could not find %s directory(no dir object) in stat cache.", dirpath.c_str()); + syncfiller.Fill(base_path.c_str(), nullptr, 0); + } + }else{ + S3FS_PRN_ERR("failed adding stat cache [path=%s], but dontinue...", dirpath.c_str()); + } + }else{ + S3FS_PRN_WARN("%s object does not have any object under it(errno=%d),", reiter->c_str(), dir_result); + } + } + } + + return result; +} + +static int s3fs_readdir(const char* _path, void* buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info* fi) +{ + WTF8_ENCODE(path) + S3ObjList head; + int result; + + FUSE_CTX_INFO("[path=%s]", path); + + if(0 != (result = check_object_access(path, R_OK, nullptr))){ + return result; + } + + // get a list of all the objects + if((result = list_bucket(path, head, "/")) != 0){ + S3FS_PRN_ERR("list_bucket returns error(%d).", result); + return result; + } + + // force to add "." and ".." name. + filler(buf, ".", nullptr, 0); + filler(buf, "..", nullptr, 0); + if(head.IsEmpty()){ + return 0; + } + + // Send multi head request for stats caching. + std::string strpath = path; + if(strcmp(path, "/") != 0){ + strpath += "/"; + } + if(0 != (result = readdir_multi_head(strpath.c_str(), head, buf, filler))){ + S3FS_PRN_ERR("readdir_multi_head returns error(%d).", result); + } + S3FS_MALLOCTRIM(0); + + return result; +} + +static int list_bucket(const char* path, S3ObjList& head, const char* delimiter, bool check_content_only) +{ + std::string s3_realpath; + std::string query_delimiter; + std::string query_prefix; + std::string query_maxkey; + std::string next_continuation_token; + std::string next_marker; + bool truncated = true; + S3fsCurl s3fscurl; + xmlDocPtr doc; + + S3FS_PRN_INFO1("[path=%s]", path); + + if(delimiter && 0 < strlen(delimiter)){ + query_delimiter += "delimiter="; + query_delimiter += delimiter; + query_delimiter += "&"; + } + + query_prefix += "&prefix="; + s3_realpath = get_realpath(path); + if(s3_realpath.empty() || '/' != *s3_realpath.rbegin()){ + // last word must be "/" + query_prefix += urlEncodePath(s3_realpath.substr(1) + "/"); + }else{ + query_prefix += urlEncodePath(s3_realpath.substr(1)); + } + if (check_content_only){ + // Just need to know if there are child objects in dir + // For dir with children, expect "dir/" and "dir/child" + query_maxkey += "max-keys=2"; + }else{ + query_maxkey += "max-keys=" + std::to_string(max_keys_list_object); + } + + while(truncated){ + // append parameters to query in alphabetical order + std::string each_query; + if(!next_continuation_token.empty()){ + each_query += "continuation-token=" + urlEncodePath(next_continuation_token) + "&"; + next_continuation_token = ""; + } + each_query += query_delimiter; + if(S3fsCurl::IsListObjectsV2()){ + each_query += "list-type=2&"; + } + if(!next_marker.empty()){ + each_query += "marker=" + urlEncodePath(next_marker) + "&"; + next_marker = ""; + } + each_query += query_maxkey; + each_query += query_prefix; + + // request + int result; + if(0 != (result = s3fscurl.ListBucketRequest(path, each_query.c_str()))){ + S3FS_PRN_ERR("ListBucketRequest returns with error."); + return result; + } + const std::string* body = s3fscurl.GetBodyData(); + + // [NOTE] + // CR code(\r) is replaced with LF(\n) by xmlReadMemory() function. + // To prevent that, only CR code is encoded by following function. + // The encoded CR code is decoded with append_objects_from_xml(_ex). + // + std::string encbody = get_encoded_cr_code(body->c_str()); + + // xmlDocPtr + if(nullptr == (doc = xmlReadMemory(encbody.c_str(), static_cast(encbody.size()), "", nullptr, 0))){ + S3FS_PRN_ERR("xmlReadMemory returns with error."); + return -EIO; + } + if(0 != append_objects_from_xml(path, doc, head)){ + S3FS_PRN_ERR("append_objects_from_xml returns with error."); + xmlFreeDoc(doc); + return -EIO; + } + if(true == (truncated = is_truncated(doc))){ + auto tmpch = get_next_continuation_token(doc); + if(nullptr != tmpch){ + next_continuation_token = reinterpret_cast(tmpch.get()); + }else if(nullptr != (tmpch = get_next_marker(doc))){ + next_marker = reinterpret_cast(tmpch.get()); + } + + if(next_continuation_token.empty() && next_marker.empty()){ + // If did not specify "delimiter", s3 did not return "NextMarker". + // On this case, can use last name for next marker. + // + std::string lastname; + if(!head.GetLastName(lastname)){ + S3FS_PRN_WARN("Could not find next marker, thus break loop."); + truncated = false; + }else{ + next_marker = s3_realpath.substr(1); + if(s3_realpath.empty() || '/' != *s3_realpath.rbegin()){ + next_marker += "/"; + } + next_marker += lastname; + } + } + } + S3FS_XMLFREEDOC(doc); + + // reset(initialize) curl object + s3fscurl.DestroyCurlHandle(); + + if(check_content_only){ + break; + } + } + S3FS_MALLOCTRIM(0); + + return 0; +} + +static int remote_mountpath_exists(const char* path, bool compat_dir) +{ + struct stat stbuf; + int result; + + S3FS_PRN_INFO1("[path=%s]", path); + + // getattr will prefix the path with the remote mountpoint + if(0 != (result = get_object_attribute(path, &stbuf, nullptr))){ + return result; + } + + // [NOTE] + // If there is no mount point(directory object) that s3fs can recognize, + // an error will occur. + // A mount point with a directory path(ex. "/...") + // requires that directory object. + // If the directory or object is created by a client other than s3fs, + // s3fs may not be able to recognize it. If you specify such a directory + // as a mount point, you can avoid the error by starting with "compat_dir" + // specified. + // + if(!compat_dir && !pHasMpStat->Get()){ + return -ENOENT; + } + return 0; +} + +static bool get_meta_xattr_value(const char* path, std::string& rawvalue) +{ + if(!path || '\0' == path[0]){ + S3FS_PRN_ERR("path is empty."); + return false; + } + S3FS_PRN_DBG("[path=%s]", path); + + rawvalue.erase(); + + headers_t meta; + if(0 != get_object_attribute(path, nullptr, &meta)){ + S3FS_PRN_ERR("Failed to get object(%s) headers", path); + return false; + } + + headers_t::const_iterator iter; + if(meta.end() == (iter = meta.find("x-amz-meta-xattr"))){ + return false; + } + rawvalue = iter->second; + return true; +} + +static bool get_parent_meta_xattr_value(const char* path, std::string& rawvalue) +{ + if(0 == strcmp(path, "/") || 0 == strcmp(path, ".")){ + // path is mount point, thus does not have parent. + return false; + } + + std::string parent = mydirname(path); + if(parent.empty()){ + S3FS_PRN_ERR("Could not get parent path for %s.", path); + return false; + } + return get_meta_xattr_value(parent.c_str(), rawvalue); +} + +static bool get_xattr_posix_key_value(const char* path, std::string& xattrvalue, bool default_key) +{ + xattrvalue.erase(); + + std::string rawvalue; + if(!get_meta_xattr_value(path, rawvalue)){ + return false; + } + + xattrs_t xattrs; + if(0 == parse_xattrs(rawvalue, xattrs)){ + return false; + } + + std::string targetkey; + if(default_key){ + targetkey = "system.posix_acl_default"; + }else{ + targetkey = "system.posix_acl_access"; + } + + xattrs_t::iterator iter; + if(xattrs.end() == (iter = xattrs.find(targetkey))){ + return false; + } + + // convert value by base64 + xattrvalue = s3fs_base64(reinterpret_cast(iter->second.c_str()), iter->second.length()); + + return true; +} + +// [NOTE] +// Converts and returns the POSIX ACL default(system.posix_acl_default) value of +// the parent directory as a POSIX ACL(system.posix_acl_access) value. +// Returns false if the parent directory has no POSIX ACL defaults. +// +static bool build_inherited_xattr_value(const char* path, std::string& xattrvalue) +{ + S3FS_PRN_DBG("[path=%s]", path); + + xattrvalue.erase(); + + if(0 == strcmp(path, "/") || 0 == strcmp(path, ".")){ + // path is mount point, thus does not have parent. + return false; + } + + std::string parent = mydirname(path); + if(parent.empty()){ + S3FS_PRN_ERR("Could not get parent path for %s.", path); + return false; + } + + // get parent's "system.posix_acl_default" value(base64'd). + std::string parent_default_value; + if(!get_xattr_posix_key_value(parent.c_str(), parent_default_value, true)){ + return false; + } + + // build "system.posix_acl_access" from parent's default value + std::string raw_xattr_value; + raw_xattr_value = "{\"system.posix_acl_access\":\""; + raw_xattr_value += parent_default_value; + raw_xattr_value += "\"}"; + + xattrvalue = urlEncodePath(raw_xattr_value); + return true; +} + +static bool parse_xattr_keyval(const std::string& xattrpair, std::string& key, std::string* pval) +{ + // parse key and value + size_t pos; + std::string tmpval; + if(std::string::npos == (pos = xattrpair.find_first_of(':'))){ + S3FS_PRN_ERR("one of xattr pair(%s) is wrong format.", xattrpair.c_str()); + return false; + } + key = xattrpair.substr(0, pos); + tmpval = xattrpair.substr(pos + 1); + + if(!takeout_str_dquart(key) || !takeout_str_dquart(tmpval)){ + S3FS_PRN_ERR("one of xattr pair(%s) is wrong format.", xattrpair.c_str()); + return false; + } + + *pval = s3fs_decode64(tmpval.c_str(), tmpval.size()); + + return true; +} + +static size_t parse_xattrs(const std::string& strxattrs, xattrs_t& xattrs) +{ + xattrs.clear(); + + // decode + std::string jsonxattrs = urlDecode(strxattrs); + + // get from "{" to "}" + std::string restxattrs; + { + size_t startpos; + size_t endpos = std::string::npos; + if(std::string::npos != (startpos = jsonxattrs.find_first_of('{'))){ + endpos = jsonxattrs.find_last_of('}'); + } + if(startpos == std::string::npos || endpos == std::string::npos || endpos <= startpos){ + S3FS_PRN_WARN("xattr header(%s) is not json format.", jsonxattrs.c_str()); + return 0; + } + restxattrs = jsonxattrs.substr(startpos + 1, endpos - (startpos + 1)); + } + + // parse each key:val + for(size_t pair_nextpos = restxattrs.find_first_of(','); !restxattrs.empty(); restxattrs = (pair_nextpos != std::string::npos ? restxattrs.substr(pair_nextpos + 1) : ""), pair_nextpos = restxattrs.find_first_of(',')){ + std::string pair = pair_nextpos != std::string::npos ? restxattrs.substr(0, pair_nextpos) : restxattrs; + std::string key; + std::string val; + if(!parse_xattr_keyval(pair, key, &val)){ + // something format error, so skip this. + continue; + } + xattrs[key] = val; + } + return xattrs.size(); +} + +static std::string raw_build_xattrs(const xattrs_t& xattrs) +{ + std::string strxattrs; + bool is_set = false; + for(xattrs_t::const_iterator iter = xattrs.begin(); iter != xattrs.end(); ++iter){ + if(is_set){ + strxattrs += ','; + }else{ + is_set = true; + strxattrs = "{"; + } + strxattrs += '\"'; + strxattrs += iter->first; + strxattrs += "\":\""; + strxattrs += s3fs_base64(reinterpret_cast(iter->second.c_str()), iter->second.length()); + strxattrs += '\"'; + } + if(is_set){ + strxattrs += "}"; + } + return strxattrs; +} + +static std::string build_xattrs(const xattrs_t& xattrs) +{ + std::string strxattrs = raw_build_xattrs(xattrs); + if(strxattrs.empty()){ + strxattrs = "{}"; + } + strxattrs = urlEncodePath(strxattrs); + + return strxattrs; +} + +static int set_xattrs_to_header(headers_t& meta, const char* name, const char* value, size_t size, int flags) +{ + std::string strxattrs; + xattrs_t xattrs; + + headers_t::iterator iter; + if(meta.end() == (iter = meta.find("x-amz-meta-xattr"))){ +#if defined(XATTR_REPLACE) + if(XATTR_REPLACE == (flags & XATTR_REPLACE)){ + // there is no xattr header but flags is replace, so failure. + return -ENOATTR; + } +#endif + }else{ +#if defined(XATTR_CREATE) + if(XATTR_CREATE == (flags & XATTR_CREATE)){ + // found xattr header but flags is only creating, so failure. + return -EEXIST; + } +#endif + strxattrs = iter->second; + } + + // get map as xattrs_t + parse_xattrs(strxattrs, xattrs); + + // add name(do not care overwrite and empty name/value) + xattrs[name] = std::string(value, size); + + // build new strxattrs(not encoded) and set it to headers_t + meta["x-amz-meta-xattr"] = build_xattrs(xattrs); + + S3FS_PRN_DBG("Set xattrs(after adding %s key) = %s", name, raw_build_xattrs(xattrs).c_str()); + + return 0; +} + +#if defined(__APPLE__) +static int s3fs_setxattr(const char* path, const char* name, const char* value, size_t size, int flags, uint32_t position) +#else +static int s3fs_setxattr(const char* path, const char* name, const char* value, size_t size, int flags) +#endif +{ + FUSE_CTX_INFO("[path=%s][name=%s][value=%p][size=%zu][flags=0x%x]", path, name, value, size, flags); + + if(!value && 0 < size){ + S3FS_PRN_ERR("Wrong parameter: value(%p), size(%zu)", value, size); + return 0; + } + +#if defined(__APPLE__) + if (position != 0) { + // No resource fork support + return -EINVAL; + } +#endif + + int result; + std::string strpath; + std::string newpath; + std::string nowcache; + headers_t meta; + struct stat stbuf; + dirtype nDirType = dirtype::UNKNOWN; + + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_owner(path, &stbuf))){ + return result; + } + + if(S_ISDIR(stbuf.st_mode)){ + result = chk_dir_object_type(path, newpath, strpath, nowcache, &meta, &nDirType); + }else{ + strpath = path; + nowcache = strpath; + result = get_object_attribute(strpath.c_str(), nullptr, &meta); + } + if(0 != result){ + return result; + } + + if(S_ISDIR(stbuf.st_mode) && (IS_REPLACEDIR(nDirType) || IS_CREATE_MP_STAT(path))){ + if(IS_REPLACEDIR(nDirType)){ + // Should rebuild directory object(except new type) + // Need to remove old dir("dir" etc) and make new dir("dir/") + + // At first, remove directory old object + if(0 != (result = remove_old_type_dir(strpath, nDirType))){ + return result; + } + } + StatCache::getStatCacheData()->DelStat(nowcache); + + // Make new directory object("dir/") + struct timespec ts_atime; + struct timespec ts_mtime; + struct timespec ts_ctime; + set_stat_to_timespec(stbuf, stat_time_type::ATIME, ts_atime); + set_stat_to_timespec(stbuf, stat_time_type::MTIME, ts_mtime); + set_stat_to_timespec(stbuf, stat_time_type::CTIME, ts_ctime); + + if(0 != (result = create_directory_object(newpath.c_str(), stbuf.st_mode, ts_atime, ts_mtime, ts_ctime, stbuf.st_uid, stbuf.st_gid, nullptr))){ + return result; + } + + // need to set xattr header for directory. + strpath = newpath; + nowcache = strpath; + } + + // set xattr all object + std::string strSourcePath = (mount_prefix.empty() && "/" == strpath) ? "//" : strpath; + headers_t updatemeta; + updatemeta["x-amz-meta-ctime"] = s3fs_str_realtime(); + updatemeta["x-amz-copy-source"] = urlEncodePath(service_path + S3fsCred::GetBucket() + get_realpath(strSourcePath.c_str())); + updatemeta["x-amz-metadata-directive"] = "REPLACE"; + + // check opened file handle. + // + // If the file starts uploading by multipart when the disk capacity is insufficient, + // we need to put these header after finishing upload. + // Or if the file is only open, we must update to FdEntity's internal meta. + // + AutoFdEntity autoent; + FdEntity* ent; + bool need_put_header = true; + if(nullptr != (ent = autoent.OpenExistFdEntity(path))){ + // get xattr and make new xattr + std::string strxattr; + if(ent->GetXattr(strxattr)){ + updatemeta["x-amz-meta-xattr"] = strxattr; + }else{ + // [NOTE] + // Set an empty xattr. + // This requires the key to be present in order to add xattr. + ent->SetXattr(strxattr); + } + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(0 != (result = set_xattrs_to_header(updatemeta, name, value, size, flags))){ + return result; + } + + if(ent->MergeOrgMeta(updatemeta)){ + // meta is changed, but now uploading. + // then the meta is pending and accumulated to be put after the upload is complete. + S3FS_PRN_INFO("meta pending until upload is complete"); + need_put_header = false; + + // If there is data in the Stats cache, update the Stats cache. + StatCache::getStatCacheData()->UpdateMetaStats(strpath, updatemeta); + + // [NOTE] + // There are cases where this function is called during the process of + // creating a new file (before uploading). + // In this case, a temporary cache exists in the Stat cache. + // So we need to update the cache, if it exists. (see. s3fs_create and s3fs_utimens) + // + if(!StatCache::getStatCacheData()->AddStat(strpath, updatemeta, false, true)){ + return -EIO; + } + } + } + if(need_put_header){ + // not found opened file. + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(0 != (result = set_xattrs_to_header(meta, name, value, size, flags))){ + return result; + } + merge_headers(meta, updatemeta, true); + + // upload meta directly. + if(0 != (result = put_headers(strpath.c_str(), meta, true))){ + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + } + + return 0; +} + +#if defined(__APPLE__) +static int s3fs_getxattr(const char* path, const char* name, char* value, size_t size, uint32_t position) +#else +static int s3fs_getxattr(const char* path, const char* name, char* value, size_t size) +#endif +{ +#if defined(__APPLE__) + FUSE_CTX_DBG("[path=%s][name=%s][value=%p][size=%zu]", path, name, value, size); +#else + FUSE_CTX_INFO("[path=%s][name=%s][value=%p][size=%zu]", path, name, value, size); +#endif + + if(!path || !name){ + return -EIO; + } + +#if defined(__APPLE__) + if (position != 0) { + // No resource fork support + return -EINVAL; + } +#endif + + int result; + headers_t meta; + xattrs_t xattrs; + + // check parent directory attribute. + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + + // get headers + if(0 != (result = get_object_attribute(path, nullptr, &meta))){ + return result; + } + + // get xattrs + headers_t::iterator hiter = meta.find("x-amz-meta-xattr"); + if(meta.end() == hiter){ + // object does not have xattrs + return -ENOATTR; + } + std::string strxattrs = hiter->second; + + parse_xattrs(strxattrs, xattrs); + + S3FS_PRN_DBG("Get xattrs = %s", raw_build_xattrs(xattrs).c_str()); + + // search name + std::string strname = name; + xattrs_t::iterator xiter = xattrs.find(strname); + if(xattrs.end() == xiter){ + // not found name in xattrs + return -ENOATTR; + } + + // decode + size_t length = xiter->second.length(); + const char* pvalue = xiter->second.c_str(); + + if(0 < size){ + if(static_cast(size) < length){ + // over buffer size + return -ERANGE; + } + if(pvalue){ + memcpy(value, pvalue, length); + } + } + + return static_cast(length); +} + +static int s3fs_listxattr(const char* path, char* list, size_t size) +{ + S3FS_PRN_INFO("[path=%s][list=%p][size=%zu]", path, list, size); + + if(!path){ + return -EIO; + } + + int result; + headers_t meta; + xattrs_t xattrs; + + // check parent directory attribute. + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + + // get headers + if(0 != (result = get_object_attribute(path, nullptr, &meta))){ + return result; + } + + // get xattrs + headers_t::iterator iter; + if(meta.end() == (iter = meta.find("x-amz-meta-xattr"))){ + // object does not have xattrs + return 0; + } + std::string strxattrs = iter->second; + + parse_xattrs(strxattrs, xattrs); + + S3FS_PRN_DBG("Get xattrs = %s", raw_build_xattrs(xattrs).c_str()); + + // calculate total name length + size_t total = 0; + for(xattrs_t::const_iterator xiter = xattrs.begin(); xiter != xattrs.end(); ++xiter){ + if(!xiter->first.empty()){ + total += xiter->first.length() + 1; + } + } + + if(0 == total){ + return 0; + } + + // check parameters + if(0 == size){ + return static_cast(total); + } + if(!list || size < total){ + return -ERANGE; + } + + // copy to list + char* setpos = list; + for(xattrs_t::const_iterator xiter = xattrs.begin(); xiter != xattrs.end(); ++xiter){ + if(!xiter->first.empty()){ + strcpy(setpos, xiter->first.c_str()); + setpos = &setpos[strlen(setpos) + 1]; + } + } + + return static_cast(total); +} + +static int s3fs_removexattr(const char* path, const char* name) +{ + FUSE_CTX_INFO("[path=%s][name=%s]", path, name); + + if(!path || !name){ + return -EIO; + } + + int result; + std::string strpath; + std::string newpath; + std::string nowcache; + headers_t meta; + xattrs_t xattrs; + struct stat stbuf; + dirtype nDirType = dirtype::UNKNOWN; + + if(0 == strcmp(path, "/")){ + S3FS_PRN_ERR("Could not change mode for mount point."); + return -EIO; + } + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_owner(path, &stbuf))){ + return result; + } + + if(S_ISDIR(stbuf.st_mode)){ + result = chk_dir_object_type(path, newpath, strpath, nowcache, &meta, &nDirType); + }else{ + strpath = path; + nowcache = strpath; + result = get_object_attribute(strpath.c_str(), nullptr, &meta); + } + if(0 != result){ + return result; + } + + // get xattrs + headers_t::iterator hiter = meta.find("x-amz-meta-xattr"); + if(meta.end() == hiter){ + // object does not have xattrs + return -ENOATTR; + } + std::string strxattrs = hiter->second; + + parse_xattrs(strxattrs, xattrs); + + // check name xattrs + std::string strname = name; + xattrs_t::iterator xiter = xattrs.find(strname); + if(xattrs.end() == xiter){ + return -ENOATTR; + } + + // make new header_t after deleting name xattr + xattrs.erase(xiter); + + S3FS_PRN_DBG("Reset xattrs(after delete %s key) = %s", name, raw_build_xattrs(xattrs).c_str()); + + if(S_ISDIR(stbuf.st_mode) && IS_REPLACEDIR(nDirType)){ + // Should rebuild directory object(except new type) + // Need to remove old dir("dir" etc) and make new dir("dir/") + + // At first, remove directory old object + if(0 != (result = remove_old_type_dir(strpath, nDirType))){ + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + + // Make new directory object("dir/") + struct timespec ts_atime; + struct timespec ts_mtime; + struct timespec ts_ctime; + set_stat_to_timespec(stbuf, stat_time_type::ATIME, ts_atime); + set_stat_to_timespec(stbuf, stat_time_type::MTIME, ts_mtime); + set_stat_to_timespec(stbuf, stat_time_type::CTIME, ts_ctime); + + if(0 != (result = create_directory_object(newpath.c_str(), stbuf.st_mode, ts_atime, ts_mtime, ts_ctime, stbuf.st_uid, stbuf.st_gid, nullptr))){ + return result; + } + + // need to set xattr header for directory. + strpath = newpath; + nowcache = strpath; + } + + // set xattr all object + std::string strSourcePath = (mount_prefix.empty() && "/" == strpath) ? "//" : strpath; + headers_t updatemeta; + updatemeta["x-amz-copy-source"] = urlEncodePath(service_path + S3fsCred::GetBucket() + get_realpath(strSourcePath.c_str())); + updatemeta["x-amz-metadata-directive"] = "REPLACE"; + if(!xattrs.empty()){ + updatemeta["x-amz-meta-xattr"] = build_xattrs(xattrs); + }else{ + updatemeta["x-amz-meta-xattr"] = ""; // This is a special case. If empty, this header will eventually be removed. + } + + // check opened file handle. + // + // If the file starts uploading by multipart when the disk capacity is insufficient, + // we need to put these header after finishing upload. + // Or if the file is only open, we must update to FdEntity's internal meta. + // + AutoFdEntity autoent; + FdEntity* ent; + bool need_put_header = true; + if(nullptr != (ent = autoent.OpenExistFdEntity(path))){ + if(ent->MergeOrgMeta(updatemeta)){ + // meta is changed, but now uploading. + // then the meta is pending and accumulated to be put after the upload is complete. + S3FS_PRN_INFO("meta pending until upload is complete"); + need_put_header = false; + + // If there is data in the Stats cache, update the Stats cache. + StatCache::getStatCacheData()->UpdateMetaStats(strpath, updatemeta); + } + } + if(need_put_header){ + // not found opened file. + if(updatemeta["x-amz-meta-xattr"].empty()){ + updatemeta.erase("x-amz-meta-xattr"); + } + + merge_headers(meta, updatemeta, true); + + // upload meta directly. + if(0 != (result = put_headers(strpath.c_str(), meta, true))){ + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + } + + return 0; +} + +// s3fs_init calls this function to exit cleanly from the fuse event loop. +// +// There's no way to pass an exit status to the high-level event loop API, so +// this function stores the exit value in a global for main() +static void s3fs_exit_fuseloop(int exit_status) +{ + S3FS_PRN_ERR("Exiting FUSE event loop due to errors\n"); + s3fs_init_deferred_exit_status = exit_status; + struct fuse_context *ctx = fuse_get_context(); + if (nullptr != ctx) { + fuse_exit(ctx->fuse); + } +} + +static void* s3fs_init(struct fuse_conn_info* conn) +{ + S3FS_PRN_INIT_INFO("init v%s(commit:%s) with %s, credential-library(%s)", VERSION, COMMIT_HASH_VAL, s3fs_crypt_lib_name(), ps3fscred->GetCredFuncVersion(false)); + + // cache(remove cache dirs at first) + if(is_remove_cache && (!CacheFileStat::DeleteCacheFileStatDirectory() || !FdManager::DeleteCacheDirectory())){ + S3FS_PRN_DBG("Could not initialize cache directory."); + } + + // check loading IAM role name + if(!ps3fscred->LoadIAMRoleFromMetaData()){ + S3FS_PRN_CRIT("could not load IAM role name from meta data."); + s3fs_exit_fuseloop(EXIT_FAILURE); + return nullptr; + } + + // Check Bucket + { + int result; + if(EXIT_SUCCESS != (result = s3fs_check_service())){ + s3fs_exit_fuseloop(result); + return nullptr; + } + } + + // Investigate system capabilities + #ifndef __APPLE__ + if((unsigned int)conn->capable & FUSE_CAP_ATOMIC_O_TRUNC){ + conn->want |= FUSE_CAP_ATOMIC_O_TRUNC; + } + #endif + + if((unsigned int)conn->capable & FUSE_CAP_BIG_WRITES){ + conn->want |= FUSE_CAP_BIG_WRITES; + } + + if(!ThreadPoolMan::Initialize(max_thread_count)){ + S3FS_PRN_CRIT("Could not create thread pool(%d)", max_thread_count); + s3fs_exit_fuseloop(EXIT_FAILURE); + } + + // Signal object + if(!S3fsSignals::Initialize()){ + S3FS_PRN_ERR("Failed to initialize signal object, but continue..."); + } + + return nullptr; +} + +static void s3fs_destroy(void*) +{ + S3FS_PRN_INFO("destroy"); + + // Signal object + if(!S3fsSignals::Destroy()){ + S3FS_PRN_WARN("Failed to clean up signal object."); + } + + ThreadPoolMan::Destroy(); + + // cache(remove at last) + if(is_remove_cache && (!CacheFileStat::DeleteCacheFileStatDirectory() || !FdManager::DeleteCacheDirectory())){ + S3FS_PRN_WARN("Could not remove cache directory."); + } +} + +static int s3fs_access(const char* path, int mask) +{ + FUSE_CTX_INFO("[path=%s][mask=%s%s%s%s]", path, + ((mask & R_OK) == R_OK) ? "R_OK " : "", + ((mask & W_OK) == W_OK) ? "W_OK " : "", + ((mask & X_OK) == X_OK) ? "X_OK " : "", + (mask == F_OK) ? "F_OK" : ""); + + int result = check_object_access(path, mask, nullptr); + S3FS_MALLOCTRIM(0); + return result; +} + +// +// If calling with wrong region, s3fs gets following error body as 400 error code. +// " +// AuthorizationHeaderMalformed +// The authorization header is malformed; the region 'us-east-1' is wrong; expecting 'ap-northeast-1' +// ap-northeast-1 +// ... +// ... +// " +// +// So this is cheap code but s3fs should get correct region automatically. +// +static bool check_region_error(const char* pbody, size_t len, std::string& expectregion) +{ + if(!pbody){ + return false; + } + + std::string code; + if(!simple_parse_xml(pbody, len, "Code", code) || code != "AuthorizationHeaderMalformed"){ + return false; + } + + if(!simple_parse_xml(pbody, len, "Region", expectregion)){ + return false; + } + + return true; +} + +static bool check_endpoint_error(const char* pbody, size_t len, std::string& expectendpoint) +{ + if(!pbody){ + return false; + } + + std::string code; + if(!simple_parse_xml(pbody, len, "Code", code) || code != "PermanentRedirect"){ + return false; + } + + if(!simple_parse_xml(pbody, len, "Endpoint", expectendpoint)){ + return false; + } + + return true; +} + +static bool check_invalid_sse_arg_error(const char* pbody, size_t len) +{ + if(!pbody){ + return false; + } + + std::string code; + if(!simple_parse_xml(pbody, len, "Code", code) || code != "InvalidArgument"){ + return false; + } + std::string argname; + if(!simple_parse_xml(pbody, len, "ArgumentName", argname) || argname != "x-amz-server-side-encryption"){ + return false; + } + return true; +} + +static bool check_error_message(const char* pbody, size_t len, std::string& message) +{ + message.clear(); + if(!pbody){ + return false; + } + if(!simple_parse_xml(pbody, len, "Message", message)){ + return false; + } + return true; +} + +// [NOTE] +// This function checks if the bucket is accessible when s3fs starts. +// +// The following patterns for mount points are supported by s3fs: +// (1) Mount the bucket top +// (2) Mount to a directory(folder) under the bucket. In this case: +// (2A) Directories created by clients other than s3fs +// (2B) Directory created by s3fs +// +// Both case of (1) and (2) check access permissions to the mount point +// path(directory). +// In the case of (2A), if the directory(object) for the mount point does +// not exist, the check fails. However, launching s3fs with the "compat_dir" +// option avoids this error and the check succeeds. If you do not specify +// the "compat_dir" option in case (2A), please create a directory(object) +// for the mount point before launching s3fs. +// +static int s3fs_check_service() +{ + S3FS_PRN_INFO("check services."); + + // At first time for access S3, we check IAM role if it sets. + if(!ps3fscred->CheckIAMCredentialUpdate()){ + S3FS_PRN_CRIT("Failed to initialize IAM credential."); + return EXIT_FAILURE; + } + + S3fsCurl s3fscurl; + int res; + bool force_no_sse = false; + + while(0 > (res = s3fscurl.CheckBucket(get_realpath("/").c_str(), support_compat_dir, force_no_sse))){ + // get response code + bool do_retry = false; + long responseCode = s3fscurl.GetLastResponseCode(); + + // check wrong endpoint, and automatically switch endpoint + if(300 <= responseCode && responseCode < 500){ + + // check region error(for putting message or retrying) + const std::string* body = s3fscurl.GetBodyData(); + std::string expectregion; + std::string expectendpoint; + + // Check if any case can be retried + if(check_region_error(body->c_str(), body->size(), expectregion)){ + // [NOTE] + // If endpoint is not specified(using us-east-1 region) and + // an error is encountered accessing a different region, we + // will retry the check on the expected region. + // see) https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html#access-bucket-intro + // + if(s3host != "http://s3.amazonaws.com" && s3host != "https://s3.amazonaws.com"){ + // specified endpoint for specified url is wrong. + if(is_specified_endpoint){ + S3FS_PRN_CRIT("The bucket region is not '%s'(specified) for specified url(%s), it is correctly '%s'. You should specify url(http(s)://s3-%s.amazonaws.com) and endpoint(%s) option.", endpoint.c_str(), s3host.c_str(), expectregion.c_str(), expectregion.c_str(), expectregion.c_str()); + }else{ + S3FS_PRN_CRIT("The bucket region is not '%s'(default) for specified url(%s), it is correctly '%s'. You should specify url(http(s)://s3-%s.amazonaws.com) and endpoint(%s) option.", endpoint.c_str(), s3host.c_str(), expectregion.c_str(), expectregion.c_str(), expectregion.c_str()); + } + + }else if(is_specified_endpoint){ + // specified endpoint is wrong. + S3FS_PRN_CRIT("The bucket region is not '%s'(specified), it is correctly '%s'. You should specify endpoint(%s) option.", endpoint.c_str(), expectregion.c_str(), expectregion.c_str()); + + }else if(S3fsCurl::GetSignatureType() == signature_type_t::V4_ONLY || S3fsCurl::GetSignatureType() == signature_type_t::V2_OR_V4){ + // current endpoint and url are default value, so try to connect to expected region. + S3FS_PRN_CRIT("Failed to connect region '%s'(default), so retry to connect region '%s' for url(http(s)://s3-%s.amazonaws.com).", endpoint.c_str(), expectregion.c_str(), expectregion.c_str()); + + // change endpoint + endpoint = expectregion; + + // change url + if(s3host == "http://s3.amazonaws.com"){ + s3host = "http://s3-" + endpoint + ".amazonaws.com"; + }else if(s3host == "https://s3.amazonaws.com"){ + s3host = "https://s3-" + endpoint + ".amazonaws.com"; + } + + // Retry with changed host + s3fscurl.DestroyCurlHandle(); + do_retry = true; + + }else{ + S3FS_PRN_CRIT("The bucket region is not '%s'(default), it is correctly '%s'. You should specify endpoint(%s) option.", endpoint.c_str(), expectregion.c_str(), expectregion.c_str()); + } + + }else if(check_endpoint_error(body->c_str(), body->size(), expectendpoint)){ + // redirect error + if(pathrequeststyle){ + S3FS_PRN_CRIT("S3 service returned PermanentRedirect (current is url(%s) and endpoint(%s)). You need to specify correct url(http(s)://s3-.amazonaws.com) and endpoint option with use_path_request_style option.", s3host.c_str(), endpoint.c_str()); + }else{ + S3FS_PRN_CRIT("S3 service returned PermanentRedirect with %s (current is url(%s) and endpoint(%s)). You need to specify correct endpoint option.", expectendpoint.c_str(), s3host.c_str(), endpoint.c_str()); + } + return EXIT_FAILURE; + + }else if(check_invalid_sse_arg_error(body->c_str(), body->size())){ + // SSE argument error, so retry it without SSE + S3FS_PRN_CRIT("S3 service returned InvalidArgument(x-amz-server-side-encryption), so retry without adding x-amz-server-side-encryption."); + + // Retry without sse parameters + s3fscurl.DestroyCurlHandle(); + do_retry = true; + force_no_sse = true; + } + } + + // Try changing signature from v4 to v2 + // + // [NOTE] + // If there is no case to retry with the previous checks, and there + // is a chance to retry with signature v2, prepare to retry with v2. + // + if(!do_retry && (responseCode == 400 || responseCode == 403) && S3fsCurl::GetSignatureType() == signature_type_t::V2_OR_V4){ + // switch sigv2 + S3FS_PRN_CRIT("Failed to connect by sigv4, so retry to connect by signature version 2. But you should to review url and endpoint option."); + + // retry to check with sigv2 + s3fscurl.DestroyCurlHandle(); + do_retry = true; + S3fsCurl::SetSignatureType(signature_type_t::V2_ONLY); + } + + // check errors(after retrying) + if(!do_retry && responseCode != 200 && responseCode != 301){ + // parse error message if existed + std::string errMessage; + const std::string* body = s3fscurl.GetBodyData(); + check_error_message(body->c_str(), body->size(), errMessage); + + if(responseCode == 400){ + S3FS_PRN_CRIT("Failed to check bucket and directory for mount point : Bad Request(host=%s, message=%s)", s3host.c_str(), errMessage.c_str()); + }else if(responseCode == 403){ + S3FS_PRN_CRIT("Failed to check bucket and directory for mount point : Invalid Credentials(host=%s, message=%s)", s3host.c_str(), errMessage.c_str()); + }else if(responseCode == 404){ + if(mount_prefix.empty()){ + S3FS_PRN_CRIT("Failed to check bucket and directory for mount point : Bucket or directory not found(host=%s, message=%s)", s3host.c_str(), errMessage.c_str()); + }else{ + S3FS_PRN_CRIT("Failed to check bucket and directory for mount point : Bucket or directory(%s) not found(host=%s, message=%s) - You may need to specify the compat_dir option.", mount_prefix.c_str(), s3host.c_str(), errMessage.c_str()); + } + }else{ + S3FS_PRN_CRIT("Failed to check bucket and directory for mount point : Unable to connect(host=%s, message=%s)", s3host.c_str(), errMessage.c_str()); + } + return EXIT_FAILURE; + } + } + s3fscurl.DestroyCurlHandle(); + + // make sure remote mountpath exists and is a directory + if(!mount_prefix.empty()){ + if(remote_mountpath_exists("/", support_compat_dir) != 0){ + S3FS_PRN_CRIT("Remote mountpath %s not found, this may be resolved with the compat_dir option.", mount_prefix.c_str()); + return EXIT_FAILURE; + } + } + S3FS_MALLOCTRIM(0); + + return EXIT_SUCCESS; +} + +// +// Check & Set attributes for mount point. +// +static bool set_mountpoint_attribute(struct stat& mpst) +{ + mp_uid = geteuid(); + mp_gid = getegid(); + mp_mode = S_IFDIR | (allow_other ? (is_mp_umask ? (~mp_umask & (S_IRWXU | S_IRWXG | S_IRWXO)) : (S_IRWXU | S_IRWXG | S_IRWXO)) : S_IRWXU); + +// In MSYS2 environment with WinFsp, it is not supported to change mode of mount point. +// Doing that forcely will occurs permission problem, so disabling it. +#ifdef __MSYS__ + return true; +#else + S3FS_PRN_INFO2("PROC(uid=%u, gid=%u) - MountPoint(uid=%u, gid=%u, mode=%04o)", + (unsigned int)mp_uid, (unsigned int)mp_gid, (unsigned int)(mpst.st_uid), (unsigned int)(mpst.st_gid), mpst.st_mode); + + // check owner + if(0 == mp_uid || mpst.st_uid == mp_uid){ + return true; + } + // check group permission + if(mpst.st_gid == mp_gid || 1 == is_uid_include_group(mp_uid, mpst.st_gid)){ + if(S_IRWXG == (mpst.st_mode & S_IRWXG)){ + return true; + } + } + // check other permission + if(S_IRWXO == (mpst.st_mode & S_IRWXO)){ + return true; + } + return false; +#endif +} + +// +// Set bucket and mount_prefix based on passed bucket name. +// +static int set_bucket(const char* arg) +{ + // TODO: Mutates input. Consider some other tokenization. + char *bucket_name = const_cast(arg); + if(strstr(arg, ":")){ + if(strstr(arg, "://")){ + S3FS_PRN_EXIT("bucket name and path(\"%s\") is wrong, it must be \"bucket[:/path]\".", arg); + return -1; + } + if(!S3fsCred::SetBucket(strtok(bucket_name, ":"))){ + S3FS_PRN_EXIT("bucket name and path(\"%s\") is wrong, it must be \"bucket[:/path]\".", arg); + return -1; + } + char* pmount_prefix = strtok(nullptr, ""); + if(pmount_prefix){ + if(0 == strlen(pmount_prefix) || '/' != pmount_prefix[0]){ + S3FS_PRN_EXIT("path(%s) must be prefix \"/\".", pmount_prefix); + return -1; + } + mount_prefix = pmount_prefix; + // Trim the last consecutive '/' + mount_prefix = trim_right(mount_prefix, "/"); + } + }else{ + if(!S3fsCred::SetBucket(arg)){ + S3FS_PRN_EXIT("bucket name and path(\"%s\") is wrong, it must be \"bucket[:/path]\".", arg); + return -1; + } + } + return 0; +} + +// +// Utility function for parse "--bucket_size" option +// +// max_size: A string like 20000000, 30GiB, 20TB etc +// return: An integer of type fsblkcnt_t corresponding to the number +// of blocks with max_size calculated with the s3fs block size, +// or 0 on error +// +static fsblkcnt_t parse_bucket_size(char* max_size) +{ + const unsigned long long ten00 = 1000L; + const unsigned long long ten24 = 1024L; + unsigned long long scale = 1; + unsigned long long n_bytes = 0; + char *ptr; + + if(nullptr != (ptr = strstr(max_size, "GB"))){ + scale = ten00 * ten00 * ten00; + if(2 < strlen(ptr)){ + return 0; // no trailing garbage + } + *ptr = '\0'; + }else if(nullptr != (ptr = strstr(max_size, "GiB"))){ + scale = ten24 * ten24 * ten24; + if(3 < strlen(ptr)){ + return 0; // no trailing garbage + } + *ptr = '\0'; + }else if(nullptr != (ptr = strstr(max_size, "TB"))){ + scale = ten00 * ten00 * ten00 * ten00; + if(2 < strlen(ptr)){ + return 0; // no trailing garbage + } + *ptr = '\0'; + }else if(nullptr != (ptr = strstr(max_size, "TiB"))){ + scale = ten24 * ten24 * ten24 * ten24; + if(3 < strlen(ptr)){ + return 0; // no trailing garbage + } + *ptr = '\0'; + }else if(nullptr != (ptr = strstr(max_size, "PB"))){ + scale = ten00 * ten00 * ten00 * ten00 * ten00; + if(2 < strlen(ptr)){ + return 0; // no trailing garbage + } + *ptr = '\0'; + }else if(nullptr != (ptr = strstr(max_size, "PiB"))){ + scale = ten24 * ten24 * ten24 * ten24 * ten24; + if(3 < strlen(ptr)){ + return 0; // no trailing garbage + } + *ptr = '\0'; + }else if(nullptr != (ptr = strstr(max_size, "EB"))){ + scale = ten00 * ten00 * ten00 * ten00 * ten00 * ten00; + if(2 < strlen(ptr)){ + return 0; // no trailing garbage + } + *ptr = '\0'; + }else if(nullptr != (ptr = strstr(max_size, "EiB"))){ + scale = ten24 * ten24 * ten24 * ten24 * ten24 * ten24; + if(3 < strlen(ptr)){ + return 0; // no trailing garbage + } + *ptr = '\0'; + } + + // extra check + for(ptr = max_size; *ptr != '\0'; ++ptr){ + if(!isdigit(*ptr)){ + return 0; // wrong number + } + n_bytes = static_cast(strtoull(max_size, nullptr, 10)); + if((INT64_MAX / scale) < n_bytes){ + return 0; // overflow + } + n_bytes *= scale; + } + + // [NOTE] + // To round a number by s3fs block size. + // And need to check the result value because fsblkcnt_t is 32bit in macos etc. + // + n_bytes /= s3fs_block_size; + + if(sizeof(fsblkcnt_t) <= 4){ + if(INT32_MAX < n_bytes){ + return 0; // overflow + } + } + return static_cast(n_bytes); // cast to fsblkcnt_t +} + +static bool is_cmd_exists(const std::string& command) +{ + // The `command -v` is a POSIX-compliant method for checking the existence of a program. + std::string cmd = "command -v " + command + " >/dev/null 2>&1"; + int result = system(cmd.c_str()); + return (result !=-1 && WIFEXITED(result) && WEXITSTATUS(result) == 0); +} + +static int print_umount_message(const std::string& mp, bool force) +{ + std::string cmd; + if (is_cmd_exists("fusermount")){ + if (force){ + cmd = "fusermount -uz " + mp; + } else { + cmd = "fusermount -u " + mp; + } + }else{ + if (force){ + cmd = "umount -l " + mp; + } else { + cmd = "umount " + mp; + } + } + + S3FS_PRN_EXIT("MOUNTPOINT %s is stale, you could use this command to fix: %s", mp.c_str(), cmd.c_str()); + + return 0; +} + +// This is repeatedly called by the fuse option parser +// if the key is equal to FUSE_OPT_KEY_OPT, it's an option passed in prefixed by +// '-' or '--' e.g.: -f -d -ousecache=/tmp +// +// if the key is equal to FUSE_OPT_KEY_NONOPT, it's either the bucket name +// or the mountpoint. The bucket name will always come before the mountpoint +// +static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_args* outargs) +{ + int ret; + if(key == FUSE_OPT_KEY_NONOPT){ + // the first NONOPT option is the bucket name + if(S3fsCred::GetBucket().empty()){ + if ((ret = set_bucket(arg))){ + return ret; + } + return 0; + }else if (!strcmp(arg, "s3fs")) { + return 0; + } + + // the second NONOPT option is the mountpoint(not utility mode) + if(mountpoint.empty() && utility_incomp_type::NO_UTILITY_MODE == utility_mode){ + // save the mountpoint and do some basic error checking + mountpoint = arg; + struct stat stbuf; + +// In MSYS2 environment with WinFsp, it is not needed to create the mount point before mounting. +// Also it causes a conflict with WinFsp's validation, so disabling it. +#ifdef __MSYS__ + memset(&stbuf, 0, sizeof stbuf); + set_mountpoint_attribute(stbuf); +#else + if(stat(arg, &stbuf) == -1){ + // check stale mountpoint + if(errno == ENOTCONN){ + print_umount_message(mountpoint, true); + } else { + S3FS_PRN_EXIT("unable to access MOUNTPOINT %s: %s", mountpoint.c_str(), strerror(errno)); + } + return -1; + } + if(!(S_ISDIR(stbuf.st_mode))){ + S3FS_PRN_EXIT("MOUNTPOINT: %s is not a directory.", mountpoint.c_str()); + return -1; + } + if(!set_mountpoint_attribute(stbuf)){ + S3FS_PRN_EXIT("MOUNTPOINT: %s permission denied.", mountpoint.c_str()); + return -1; + } + + if(!nonempty){ + const struct dirent *ent; + DIR *dp = opendir(mountpoint.c_str()); + if(dp == nullptr){ + S3FS_PRN_EXIT("failed to open MOUNTPOINT: %s: %s", mountpoint.c_str(), strerror(errno)); + return -1; + } + while((ent = readdir(dp)) != nullptr){ + if(strcmp(ent->d_name, ".") != 0 && strcmp(ent->d_name, "..") != 0){ + closedir(dp); + S3FS_PRN_EXIT("MOUNTPOINT directory %s is not empty. if you are sure this is safe, can use the 'nonempty' mount option.", mountpoint.c_str()); + return -1; + } + } + closedir(dp); + } +#endif + return 1; + } + + // Unknown option + if(utility_incomp_type::NO_UTILITY_MODE == utility_mode){ + S3FS_PRN_EXIT("specified unknown third option(%s).", arg); + }else{ + S3FS_PRN_EXIT("specified unknown second option(%s). you don't need to specify second option(mountpoint) for utility mode(-u).", arg); + } + return -1; + + }else if(key == FUSE_OPT_KEY_OPT){ + if(is_prefix(arg, "uid=")){ + s3fs_uid = get_uid(strchr(arg, '=') + sizeof(char)); + if(0 != geteuid() && 0 == s3fs_uid){ + S3FS_PRN_EXIT("root user can only specify uid=0."); + return -1; + } + is_s3fs_uid = true; + return 1; // continue for fuse option + } + else if(is_prefix(arg, "gid=")){ + s3fs_gid = get_gid(strchr(arg, '=') + sizeof(char)); + if(0 != getegid() && 0 == s3fs_gid){ + S3FS_PRN_EXIT("root user can only specify gid=0."); + return -1; + } + is_s3fs_gid = true; + return 1; // continue for fuse option + } + else if(is_prefix(arg, "bucket_size=")){ + bucket_block_count = parse_bucket_size(const_cast(strchr(arg, '=')) + sizeof(char)); + if(0 == bucket_block_count){ + S3FS_PRN_EXIT("invalid bucket_size option."); + return -1; + } + return 0; + } + else if(is_prefix(arg, "umask=")){ + off_t s3fs_umask_tmp = cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 8); + s3fs_umask = s3fs_umask_tmp & (S_IRWXU | S_IRWXG | S_IRWXO); + is_s3fs_umask = true; + return 1; // continue for fuse option + } + else if(0 == strcmp(arg, "allow_other")){ + allow_other = true; + return 1; // continue for fuse option + } + else if(is_prefix(arg, "mp_umask=")){ + off_t mp_umask_tmp = cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 8); + mp_umask = mp_umask_tmp & (S_IRWXU | S_IRWXG | S_IRWXO); + is_mp_umask = true; + return 0; + } + else if(is_prefix(arg, "default_acl=")){ + const char* acl_string = strchr(arg, '=') + sizeof(char); + acl_t acl = to_acl(acl_string); + if(acl == acl_t::UNKNOWN){ + S3FS_PRN_EXIT("unknown value for default_acl: %s", acl_string); + return -1; + } + S3fsCurl::SetDefaultAcl(acl); + return 0; + } + else if(is_prefix(arg, "retries=")){ + off_t retries = cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10); + if(retries == 0){ + S3FS_PRN_EXIT("retries must be greater than zero"); + return -1; + } + S3fsCurl::SetRetries(static_cast(retries)); + return 0; + } + else if(is_prefix(arg, "tmpdir=")){ + FdManager::SetTmpDir(strchr(arg, '=') + sizeof(char)); + return 0; + } + else if(is_prefix(arg, "use_cache=")){ + FdManager::SetCacheDir(strchr(arg, '=') + sizeof(char)); + return 0; + } + else if(0 == strcmp(arg, "check_cache_dir_exist")){ + FdManager::SetCheckCacheDirExist(true); + return 0; + } + else if(0 == strcmp(arg, "del_cache")){ + is_remove_cache = true; + return 0; + } + else if(is_prefix(arg, "multireq_max=")){ + int maxreq = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + S3fsCurl::SetMaxMultiRequest(maxreq); + return 0; + } + else if(0 == strcmp(arg, "nonempty")){ + nonempty = true; + return 1; // need to continue for fuse. + } + else if(0 == strcmp(arg, "nomultipart")){ + nomultipart = true; + return 0; + } + // old format for storage_class + else if(0 == strcmp(arg, "use_rrs") || is_prefix(arg, "use_rrs=")){ + off_t rrs = 1; + // for an old format. + if(is_prefix(arg, "use_rrs=")){ + rrs = cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10); + } + if(0 == rrs){ + S3fsCurl::SetStorageClass("STANDARD"); + }else if(1 == rrs){ + S3fsCurl::SetStorageClass("REDUCED_REDUNDANCY"); + }else{ + S3FS_PRN_EXIT("poorly formed argument to option: use_rrs"); + return -1; + } + return 0; + } + else if(is_prefix(arg, "storage_class=")){ + const char *storage_class = strchr(arg, '=') + sizeof(char); + S3fsCurl::SetStorageClass(storage_class); + return 0; + } + // + // [NOTE] + // use_sse Set Server Side Encrypting type to SSE-S3 + // use_sse=1 + // use_sse=file Set Server Side Encrypting type to Custom key(SSE-C) and load custom keys + // use_sse=custom(c):file + // use_sse=custom(c) Set Server Side Encrypting type to Custom key(SSE-C) + // use_sse=kmsid(k):kms-key-id Set Server Side Encrypting type to AWS Key Management key id(SSE-KMS) and load KMS id + // use_sse=kmsid(k) Set Server Side Encrypting type to AWS Key Management key id(SSE-KMS) + // + // load_sse_c=file Load Server Side Encrypting custom keys + // + // AWSSSECKEYS Loading Environment for Server Side Encrypting custom keys + // AWSSSEKMSID Loading Environment for Server Side Encrypting Key id + // + else if(is_prefix(arg, "use_sse")){ + if(0 == strcmp(arg, "use_sse") || 0 == strcmp(arg, "use_sse=1")){ // use_sse=1 is old type parameter + // sse type is SSE_S3 + if(!S3fsCurl::IsSseDisable() && !S3fsCurl::IsSseS3Type()){ + S3FS_PRN_EXIT("already set SSE another type, so conflict use_sse option or environment."); + return -1; + } + S3fsCurl::SetSseType(sse_type_t::SSE_S3); + + }else if(0 == strcmp(arg, "use_sse=kmsid") || 0 == strcmp(arg, "use_sse=k")){ + // sse type is SSE_KMS with out kmsid(expecting id is loaded by environment) + if(!S3fsCurl::IsSseDisable() && !S3fsCurl::IsSseKmsType()){ + S3FS_PRN_EXIT("already set SSE another type, so conflict use_sse option or environment."); + return -1; + } + if(!S3fsCurl::IsSetSseKmsId()){ + S3FS_PRN_EXIT("use_sse=kms but not loaded kms id by environment."); + return -1; + } + S3fsCurl::SetSseType(sse_type_t::SSE_KMS); + + }else if(is_prefix(arg, "use_sse=kmsid:") || is_prefix(arg, "use_sse=k:")){ + // sse type is SSE_KMS with kmsid + if(!S3fsCurl::IsSseDisable() && !S3fsCurl::IsSseKmsType()){ + S3FS_PRN_EXIT("already set SSE another type, so conflict use_sse option or environment."); + return -1; + } + const char* kmsid; + if(is_prefix(arg, "use_sse=kmsid:")){ + kmsid = &arg[strlen("use_sse=kmsid:")]; + }else{ + kmsid = &arg[strlen("use_sse=k:")]; + } + if(!S3fsCurl::SetSseKmsid(kmsid)){ + S3FS_PRN_EXIT("failed to load use_sse kms id."); + return -1; + } + S3fsCurl::SetSseType(sse_type_t::SSE_KMS); + + }else if(0 == strcmp(arg, "use_sse=custom") || 0 == strcmp(arg, "use_sse=c")){ + // sse type is SSE_C with out custom keys(expecting keys are loaded by environment or load_sse_c option) + if(!S3fsCurl::IsSseDisable() && !S3fsCurl::IsSseCType()){ + S3FS_PRN_EXIT("already set SSE another type, so conflict use_sse option or environment."); + return -1; + } + // [NOTE] + // do not check ckeys exists here. + // + S3fsCurl::SetSseType(sse_type_t::SSE_C); + + }else if(is_prefix(arg, "use_sse=custom:") || is_prefix(arg, "use_sse=c:")){ + // sse type is SSE_C with custom keys + if(!S3fsCurl::IsSseDisable() && !S3fsCurl::IsSseCType()){ + S3FS_PRN_EXIT("already set SSE another type, so conflict use_sse option or environment."); + return -1; + } + const char* ssecfile; + if(is_prefix(arg, "use_sse=custom:")){ + ssecfile = &arg[strlen("use_sse=custom:")]; + }else{ + ssecfile = &arg[strlen("use_sse=c:")]; + } + if(!S3fsCurl::SetSseCKeys(ssecfile)){ + S3FS_PRN_EXIT("failed to load use_sse custom key file(%s).", ssecfile); + return -1; + } + S3fsCurl::SetSseType(sse_type_t::SSE_C); + + }else if(0 == strcmp(arg, "use_sse=")){ // this type is old style(parameter is custom key file path) + // SSE_C with custom keys. + const char* ssecfile = &arg[strlen("use_sse=")]; + if(!S3fsCurl::SetSseCKeys(ssecfile)){ + S3FS_PRN_EXIT("failed to load use_sse custom key file(%s).", ssecfile); + return -1; + } + S3fsCurl::SetSseType(sse_type_t::SSE_C); + + }else{ + // never come here. + S3FS_PRN_EXIT("something wrong use_sse option."); + return -1; + } + return 0; + } + // [NOTE] + // Do only load SSE custom keys, care for set without set sse type. + else if(is_prefix(arg, "load_sse_c=")){ + const char* ssecfile = &arg[strlen("load_sse_c=")]; + if(!S3fsCurl::SetSseCKeys(ssecfile)){ + S3FS_PRN_EXIT("failed to load use_sse custom key file(%s).", ssecfile); + return -1; + } + return 0; + } + else if(is_prefix(arg, "ssl_verify_hostname=")){ + long sslvh = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + if(-1 == S3fsCurl::SetSslVerifyHostname(sslvh)){ + S3FS_PRN_EXIT("poorly formed argument to option: ssl_verify_hostname."); + return -1; + } + return 0; + } + // + // Detect options for credential + // + else if(0 >= (ret = ps3fscred->DetectParam(arg))){ + if(0 > ret){ + return -1; + } + return 0; + } + else if(is_prefix(arg, "public_bucket=")){ + off_t pubbucket = cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10); + if(1 == pubbucket){ + S3fsCurl::SetPublicBucket(true); + // [NOTE] + // if bucket is public(without credential), s3 do not allow copy api. + // so s3fs sets nocopyapi mode. + // + nocopyapi = true; + }else if(0 == pubbucket){ + S3fsCurl::SetPublicBucket(false); + }else{ + S3FS_PRN_EXIT("poorly formed argument to option: public_bucket."); + return -1; + } + return 0; + } + else if(is_prefix(arg, "bucket=")){ + std::string bname = strchr(arg, '=') + sizeof(char); + if ((ret = set_bucket(bname.c_str()))){ + return ret; + } + return 0; + } + else if(0 == strcmp(arg, "no_check_certificate")){ + S3fsCurl::SetCheckCertificate(false); + return 0; + } + else if(is_prefix(arg, "connect_timeout=")){ + long contimeout = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + S3fsCurl::SetConnectTimeout(contimeout); + return 0; + } + else if(is_prefix(arg, "readwrite_timeout=")){ + time_t rwtimeout = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + S3fsCurl::SetReadwriteTimeout(rwtimeout); + return 0; + } + else if(is_prefix(arg, "list_object_max_keys=")){ + int max_keys = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + if(max_keys < 1000){ + S3FS_PRN_EXIT("argument should be over 1000: list_object_max_keys"); + return -1; + } + max_keys_list_object = max_keys; + return 0; + } + else if(is_prefix(arg, "max_stat_cache_size=")){ + unsigned long cache_size = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), 10)); + StatCache::getStatCacheData()->SetCacheSize(cache_size); + return 0; + } + else if(is_prefix(arg, "stat_cache_expire=")){ + time_t expr_time = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), 10)); + StatCache::getStatCacheData()->SetExpireTime(expr_time); + return 0; + } + // [NOTE] + // This option is for compatibility old version. + else if(is_prefix(arg, "stat_cache_interval_expire=")){ + time_t expr_time = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + StatCache::getStatCacheData()->SetExpireTime(expr_time, true); + return 0; + } + else if(0 == strcmp(arg, "enable_noobj_cache")){ + S3FS_PRN_WARN("enable_noobj_cache is enabled by default and a future version will remove this option."); + StatCache::getStatCacheData()->EnableCacheNoObject(); + return 0; + } + else if(0 == strcmp(arg, "disable_noobj_cache")){ + StatCache::getStatCacheData()->DisableCacheNoObject(); + return 0; + } + else if(0 == strcmp(arg, "nodnscache")){ + S3fsCurl::SetDnsCache(false); + return 0; + } + else if(0 == strcmp(arg, "nosscache")){ + S3fsCurl::SetSslSessionCache(false); + return 0; + } + else if(is_prefix(arg, "parallel_count=") || is_prefix(arg, "parallel_upload=")){ + int maxpara = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + if(0 >= maxpara){ + S3FS_PRN_EXIT("argument should be over 1: parallel_count"); + return -1; + } + S3fsCurl::SetMaxParallelCount(maxpara); + return 0; + } + else if(is_prefix(arg, "max_thread_count=")){ + int max_thcount = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + if(0 >= max_thcount){ + S3FS_PRN_EXIT("argument should be over 1: max_thread_count"); + return -1; + } + max_thread_count = max_thcount; + S3FS_PRN_WARN("The max_thread_count option is not a formal option. Please note that it will change in the future."); + return 0; + } + else if(is_prefix(arg, "fd_page_size=")){ + S3FS_PRN_ERR("option fd_page_size is no longer supported, so skip this option."); + return 0; + } + else if(is_prefix(arg, "multipart_size=")){ + off_t size = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + if(!S3fsCurl::SetMultipartSize(size)){ + S3FS_PRN_EXIT("multipart_size option must be at least 5 MB."); + return -1; + } + return 0; + } + else if(is_prefix(arg, "multipart_copy_size=")){ + off_t size = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + if(!S3fsCurl::SetMultipartCopySize(size)){ + S3FS_PRN_EXIT("multipart_copy_size option must be at least 5 MB."); + return -1; + } + return 0; + } + else if(is_prefix(arg, "max_dirty_data=")){ + off_t size = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + if(size >= 50){ + size *= 1024 * 1024; + }else if(size != -1){ + S3FS_PRN_EXIT("max_dirty_data option must be at least 50 MB."); + return -1; + } + max_dirty_data = size; + return 0; + } + if(is_prefix(arg, "free_space_ratio=")){ + int ratio = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + + if(FdManager::GetEnsureFreeDiskSpace()!=0){ + S3FS_PRN_EXIT("option free_space_ratio conflicts with ensure_diskfree, please set only one of them."); + return -1; + } + + if(ratio < 0 || ratio > 100){ + S3FS_PRN_EXIT("option free_space_ratio must between 0 to 100, which is: %d", ratio); + return -1; + } + + off_t dfsize = FdManager::GetTotalDiskSpaceByRatio(ratio); + S3FS_PRN_INFO("Free space ratio set to %d %%, ensure the available disk space is greater than %.3f MB", ratio, static_cast(dfsize) / 1024 / 1024); + + if(dfsize < S3fsCurl::GetMultipartSize()){ + S3FS_PRN_WARN("specified size to ensure disk free space is smaller than multipart size, so set multipart size to it."); + dfsize = S3fsCurl::GetMultipartSize(); + } + FdManager::SetEnsureFreeDiskSpace(dfsize); + return 0; + } + else if(is_prefix(arg, "ensure_diskfree=")){ + off_t dfsize = cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10) * 1024 * 1024; + + if(FdManager::GetEnsureFreeDiskSpace()!=0){ + S3FS_PRN_EXIT("option free_space_ratio conflicts with ensure_diskfree, please set only one of them."); + return -1; + } + + S3FS_PRN_INFO("Set and ensure the available disk space is greater than %.3f MB.", static_cast(dfsize) / 1024 / 1024); + if(dfsize < S3fsCurl::GetMultipartSize()){ + S3FS_PRN_WARN("specified size to ensure disk free space is smaller than multipart size, so set multipart size to it."); + dfsize = S3fsCurl::GetMultipartSize(); + } + FdManager::SetEnsureFreeDiskSpace(dfsize); + return 0; + } + else if(is_prefix(arg, "fake_diskfree=")){ + S3FS_PRN_WARN("The fake_diskfree option was specified. Use this option for testing or debugging."); + + // [NOTE] This value is used for initializing to FdManager after parsing all options. + fake_diskfree_size = cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10) * 1024 * 1024; + return 0; + } + else if(is_prefix(arg, "multipart_threshold=")){ + multipart_threshold = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)) * 1024 * 1024; + if(multipart_threshold <= MIN_MULTIPART_SIZE){ + S3FS_PRN_EXIT("multipart_threshold must be at least %lld, was: %lld", static_cast(MIN_MULTIPART_SIZE), static_cast(multipart_threshold)); + return -1; + } + return 0; + } + else if(is_prefix(arg, "singlepart_copy_limit=")){ + singlepart_copy_limit = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)) * 1024 * 1024; + return 0; + } + else if(is_prefix(arg, "ahbe_conf=")){ + std::string ahbe_conf = strchr(arg, '=') + sizeof(char); + if(!AdditionalHeader::get()->Load(ahbe_conf.c_str())){ + S3FS_PRN_EXIT("failed to load ahbe_conf file(%s).", ahbe_conf.c_str()); + return -1; + } + AdditionalHeader::get()->Dump(); + return 0; + } + else if(0 == strcmp(arg, "noxmlns")){ + noxmlns = true; + return 0; + } + else if(0 == strcmp(arg, "nomixupload")){ + FdEntity::SetNoMixMultipart(); + return 0; + } + else if(0 == strcmp(arg, "nocopyapi")){ + nocopyapi = true; + return 0; + } + else if(0 == strcmp(arg, "streamupload")){ + FdEntity::SetStreamUpload(true); + S3FS_PRN_WARN("The streamupload option is not a formal option. Please note that it will change in the future."); + return 0; + } + else if(0 == strcmp(arg, "norenameapi")){ + norenameapi = true; + return 0; + } + else if(0 == strcmp(arg, "complement_stat")){ + complement_stat = true; + return 0; + } + else if(0 == strcmp(arg, "notsup_compat_dir")){ + S3FS_PRN_WARN("notsup_compat_dir is enabled by default and a future version will remove this option."); + support_compat_dir = false; + return 0; + } + else if(0 == strcmp(arg, "compat_dir")){ + support_compat_dir = true; + return 0; + } + else if(0 == strcmp(arg, "enable_content_md5")){ + S3fsCurl::SetContentMd5(true); + return 0; + } + else if(0 == strcmp(arg, "enable_unsigned_payload")){ + S3fsCurl::SetUnsignedPayload(true); + return 0; + } + else if(0 == strcmp(arg, "update_parent_dir_stat")){ + update_parent_dir_stat = true; + return 0; + } + else if(is_prefix(arg, "host=")){ + s3host = strchr(arg, '=') + sizeof(char); + return 0; + } + else if(is_prefix(arg, "servicepath=")){ + service_path = strchr(arg, '=') + sizeof(char); + return 0; + } + else if(is_prefix(arg, "url=")){ + s3host = strchr(arg, '=') + sizeof(char); + // strip the trailing '/', if any, off the end of the host + // std::string + size_t found, length; + found = s3host.find_last_of('/'); + length = s3host.length(); + while(found == (length - 1) && length > 0){ + s3host.erase(found); + found = s3host.find_last_of('/'); + length = s3host.length(); + } + // Check url for http / https protocol std::string + if(!is_prefix(s3host.c_str(), "https://") && !is_prefix(s3host.c_str(), "http://")){ + S3FS_PRN_EXIT("option url has invalid format, missing http / https protocol"); + return -1; + } + return 0; + } + else if(0 == strcmp(arg, "sigv2")){ + S3fsCurl::SetSignatureType(signature_type_t::V2_ONLY); + return 0; + } + else if(0 == strcmp(arg, "sigv4")){ + S3fsCurl::SetSignatureType(signature_type_t::V4_ONLY); + return 0; + } + else if(is_prefix(arg, "endpoint=")){ + endpoint = strchr(arg, '=') + sizeof(char); + is_specified_endpoint = true; + return 0; + } + else if(0 == strcmp(arg, "use_path_request_style")){ + pathrequeststyle = true; + return 0; + } + else if(0 == strcmp(arg, "noua")){ + S3fsCurl::SetUserAgentFlag(false); + return 0; + } + else if(0 == strcmp(arg, "listobjectsv2")){ + S3fsCurl::SetListObjectsV2(true); + return 0; + } + else if(0 == strcmp(arg, "use_xattr")){ + is_use_xattr = true; + return 0; + }else if(is_prefix(arg, "use_xattr=")){ + const char* strflag = strchr(arg, '=') + sizeof(char); + if(0 == strcmp(strflag, "1")){ + is_use_xattr = true; + }else if(0 == strcmp(strflag, "0")){ + is_use_xattr = false; + }else{ + S3FS_PRN_EXIT("option use_xattr has unknown parameter(%s).", strflag); + return -1; + } + return 0; + } + else if(is_prefix(arg, "cipher_suites=")){ + cipher_suites = strchr(arg, '=') + sizeof(char); + return 0; + } + else if(is_prefix(arg, "instance_name=")){ + instance_name = strchr(arg, '=') + sizeof(char); + instance_name = "[" + instance_name + "]"; + return 0; + } + else if(is_prefix(arg, "mime=")){ + mimetype_file = strchr(arg, '=') + sizeof(char); + return 0; + } + else if(is_prefix(arg, "proxy=")){ + const char* url = &arg[strlen("proxy=")]; + if(!S3fsCurl::SetProxy(url)){ + S3FS_PRN_EXIT("failed to set proxy(%s).", url); + return -1; + } + return 0; + } + else if(is_prefix(arg, "proxy_cred_file=")){ + const char* file = &arg[strlen("proxy_cred_file=")]; + if(!S3fsCurl::SetProxyUserPwd(file)){ + S3FS_PRN_EXIT("failed to set proxy user and passphrase from file(%s).", file); + return -1; + } + return 0; + } + // + // log file option + // + else if(is_prefix(arg, "logfile=")){ + const char* strlogfile = strchr(arg, '=') + sizeof(char); + if(!S3fsLog::SetLogfile(strlogfile)){ + S3FS_PRN_EXIT("The file(%s) specified by logfile option could not be opened.", strlogfile); + return -1; + } + return 0; + } + // + // debug level option + // + else if(is_prefix(arg, "dbglevel=")){ + const char* strlevel = strchr(arg, '=') + sizeof(char); + if(0 == strcasecmp(strlevel, "silent") || 0 == strcasecmp(strlevel, "critical") || 0 == strcasecmp(strlevel, "crit")){ + S3fsLog::SetLogLevel(S3fsLog::LEVEL_CRIT); + }else if(0 == strcasecmp(strlevel, "error") || 0 == strcasecmp(strlevel, "err")){ + S3fsLog::SetLogLevel(S3fsLog::LEVEL_ERR); + }else if(0 == strcasecmp(strlevel, "wan") || 0 == strcasecmp(strlevel, "warn") || 0 == strcasecmp(strlevel, "warning")){ + S3fsLog::SetLogLevel(S3fsLog::LEVEL_WARN); + }else if(0 == strcasecmp(strlevel, "inf") || 0 == strcasecmp(strlevel, "info") || 0 == strcasecmp(strlevel, "information")){ + S3fsLog::SetLogLevel(S3fsLog::LEVEL_INFO); + }else if(0 == strcasecmp(strlevel, "dbg") || 0 == strcasecmp(strlevel, "debug")){ + S3fsLog::SetLogLevel(S3fsLog::LEVEL_DBG); + }else{ + S3FS_PRN_EXIT("option dbglevel has unknown parameter(%s).", strlevel); + return -1; + } + return 0; + } + // + // debug option + // + // S3fsLog level is LEVEL_INFO, after second -d is passed to fuse. + // + else if(0 == strcmp(arg, "-d") || 0 == strcmp(arg, "--debug")){ + if(!S3fsLog::IsS3fsLogInfo() && !S3fsLog::IsS3fsLogDbg()){ + S3fsLog::SetLogLevel(S3fsLog::LEVEL_INFO); + return 0; + } + if(0 == strcmp(arg, "--debug")){ + // fuse doesn't understand "--debug", but it understands -d. + // but we can't pass -d back to fuse. + return 0; + } + } + // "f2" is not used no more. + // (set S3fsLog::LEVEL_DBG) + else if(0 == strcmp(arg, "f2")){ + S3fsLog::SetLogLevel(S3fsLog::LEVEL_DBG); + return 0; + } + else if(0 == strcmp(arg, "curldbg")){ + S3fsCurl::SetVerbose(true); + return 0; + }else if(is_prefix(arg, "curldbg=")){ + const char* strlevel = strchr(arg, '=') + sizeof(char); + if(0 == strcasecmp(strlevel, "normal")){ + S3fsCurl::SetVerbose(true); + }else if(0 == strcasecmp(strlevel, "body")){ + S3fsCurl::SetVerbose(true); + S3fsCurl::SetDumpBody(true); + }else{ + S3FS_PRN_EXIT("option curldbg has unknown parameter(%s).", strlevel); + return -1; + } + return 0; + } + // + // no time stamp in debug message + // + else if(0 == strcmp(arg, "no_time_stamp_msg")){ + S3fsLog::SetTimeStamp(false); + return 0; + } + // + // Check cache file, using SIGUSR1 + // + else if(0 == strcmp(arg, "set_check_cache_sigusr1")){ + if(!S3fsSignals::SetUsr1Handler(nullptr)){ + S3FS_PRN_EXIT("could not set sigusr1 for checking cache."); + return -1; + } + return 0; + }else if(is_prefix(arg, "set_check_cache_sigusr1=")){ + const char* strfilepath = strchr(arg, '=') + sizeof(char); + if(!S3fsSignals::SetUsr1Handler(strfilepath)){ + S3FS_PRN_EXIT("could not set sigusr1 for checking cache and output file(%s).", strfilepath); + return -1; + } + return 0; + } + else if(is_prefix(arg, "accessKeyId=")){ + S3FS_PRN_EXIT("option accessKeyId is no longer supported."); + return -1; + } + else if(is_prefix(arg, "secretAccessKey=")){ + S3FS_PRN_EXIT("option secretAccessKey is no longer supported."); + return -1; + } + else if(0 == strcmp(arg, "use_wtf8")){ + use_wtf8 = true; + return 0; + } + else if(0 == strcmp(arg, "requester_pays")){ + S3fsCurl::SetRequesterPays(true); + return 0; + } + // [NOTE] + // following option will be discarding, because these are not for fuse. + // (Referenced sshfs.c) + // + else if(0 == strcmp(arg, "auto") || + 0 == strcmp(arg, "noauto") || + 0 == strcmp(arg, "user") || + 0 == strcmp(arg, "nouser") || + 0 == strcmp(arg, "users") || + 0 == strcmp(arg, "_netdev")) + { + return 0; + } + else if(is_prefix(arg, "newcache_conf=")){ + newcache_conf = std::string(strchr(arg, '=') + sizeof(char)); + if(!newcache_conf.empty()) use_newcache = true; + return 0; + } + } + return 1; +} + +int main(int argc, char* argv[]) +{ + int ch; + int fuse_res; + int option_index = 0; + struct fuse_operations s3fs_oper; + time_t incomp_abort_time = (24 * 60 * 60); + S3fsLog singletonLog; + + static constexpr struct option long_opts[] = { + {"help", no_argument, nullptr, 'h'}, + {"version", no_argument, nullptr, 0}, + {"debug", no_argument, nullptr, 'd'}, + {"incomplete-mpu-list", no_argument, nullptr, 'u'}, + {"incomplete-mpu-abort", optional_argument, nullptr, 'a'}, // 'a' is only identifier and is not option. + {nullptr, 0, nullptr, 0} + }; + + // init bucket_block_size +#if defined(__MSYS__) + bucket_block_count = static_cast(INT32_MAX); +#elif defined(__APPLE__) + bucket_block_count = static_cast(INT32_MAX); +#else + bucket_block_count = ~0U; +#endif + + // init xml2 + xmlInitParser(); + LIBXML_TEST_VERSION + + init_sysconf_vars(); + + // get program name - emulate basename + program_name = argv[0]; + size_t found = program_name.find_last_of('/'); + if(found != std::string::npos){ + program_name.replace(0, found+1, ""); + } + + // set credential object + // + ps3fscred.reset(new S3fsCred()); + if(!S3fsCurl::InitCredentialObject(ps3fscred.get())){ + S3FS_PRN_EXIT("Failed to setup credential object to s3fs curl."); + exit(EXIT_FAILURE); + } + + while((ch = getopt_long(argc, argv, "dho:fsu", long_opts, &option_index)) != -1){ + switch(ch){ + case 0: + if(strcmp(long_opts[option_index].name, "version") == 0){ + show_version(); + exit(EXIT_SUCCESS); + } + break; + case 'h': + show_help(); + exit(EXIT_SUCCESS); + case 'o': + break; + case 'd': + break; + case 'f': + foreground = true; + break; + case 's': + break; + case 'u': // --incomplete-mpu-list + if(utility_incomp_type::NO_UTILITY_MODE != utility_mode){ + S3FS_PRN_EXIT("already utility mode option is specified."); + exit(EXIT_FAILURE); + } + utility_mode = utility_incomp_type::INCOMP_TYPE_LIST; + break; + case 'a': // --incomplete-mpu-abort + if(utility_incomp_type::NO_UTILITY_MODE != utility_mode){ + S3FS_PRN_EXIT("already utility mode option is specified."); + exit(EXIT_FAILURE); + } + utility_mode = utility_incomp_type::INCOMP_TYPE_ABORT; + + // check expire argument + if(nullptr != optarg && 0 == strcasecmp(optarg, "all")){ // all is 0s + incomp_abort_time = 0; + }else if(nullptr != optarg){ + if(!convert_unixtime_from_option_arg(optarg, incomp_abort_time)){ + S3FS_PRN_EXIT("--incomplete-mpu-abort option argument is wrong."); + exit(EXIT_FAILURE); + } + } + // if optarg is null, incomp_abort_time is 24H(default) + break; + default: + exit(EXIT_FAILURE); + } + } + // print launch message + print_launch_message(argc, argv); + + // Load SSE environment + if(!S3fsCurl::LoadEnvSse()){ + S3FS_PRN_EXIT("something wrong about SSE environment."); + exit(EXIT_FAILURE); + } + + // ssl init + if(!s3fs_init_global_ssl()){ + S3FS_PRN_EXIT("could not initialize for ssl libraries."); + exit(EXIT_FAILURE); + } + + // mutex for xml + if(!init_parser_xml_lock()){ + S3FS_PRN_EXIT("could not initialize mutex for xml parser."); + s3fs_destroy_global_ssl(); + exit(EXIT_FAILURE); + } + + // mutex for basename/dirname + if(!init_basename_lock()){ + S3FS_PRN_EXIT("could not initialize mutex for basename/dirname."); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + exit(EXIT_FAILURE); + } + + // init curl (without mime types) + // + // [NOTE] + // The curl initialization here does not load mime types. + // The mime types file parameter are dynamic values according + // to the user's environment, and are analyzed by the my_fuse_opt_proc + // function. + // The my_fuse_opt_proc function is executed after this curl + // initialization. Because the curl method is used in the + // my_fuse_opt_proc function, then it must be called here to + // initialize. Fortunately, the processing using mime types + // is only PUT/POST processing, and it is not used until the + // call of my_fuse_opt_proc function is completed. Therefore, + // the mime type is loaded just after calling the my_fuse_opt_proc + // function. + // + if(!S3fsCurl::InitS3fsCurl()){ + S3FS_PRN_EXIT("Could not initiate curl library."); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + // clear this structure + memset(&s3fs_oper, 0, sizeof(s3fs_oper)); + + // This is the fuse-style parser for the arguments + // after which the bucket name and mountpoint names + // should have been set + struct fuse_args custom_args = FUSE_ARGS_INIT(argc, argv); + if(0 != fuse_opt_parse(&custom_args, nullptr, nullptr, my_fuse_opt_proc)){ + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + // init mime types for curl + if(!S3fsCurl::InitMimeType(mimetype_file)){ + S3FS_PRN_WARN("Missing MIME types prevents setting Content-Type on uploaded objects."); + } + + // [NOTE] + // exclusive option check here. + // + if(strcasecmp(S3fsCurl::GetStorageClass().c_str(), "REDUCED_REDUNDANCY") == 0 && !S3fsCurl::IsSseDisable()){ + S3FS_PRN_EXIT("use_sse option could not be specified with storage class reduced_redundancy."); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + if(!S3fsCurl::FinalCheckSse()){ + S3FS_PRN_EXIT("something wrong about SSE options."); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + if(S3fsCurl::GetSignatureType() == signature_type_t::V2_ONLY && S3fsCurl::GetUnsignedPayload()){ + S3FS_PRN_WARN("Ignoring enable_unsigned_payload with sigv2"); + } + + if(!FdEntity::GetNoMixMultipart() && max_dirty_data != -1){ + S3FS_PRN_WARN("Setting max_dirty_data to -1 when nomixupload is enabled"); + max_dirty_data = -1; + } + + // + // Check the combination of parameters for credential + // + if(!ps3fscred->CheckAllParams()){ + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + // The second plain argument is the mountpoint + // if the option was given, we all ready checked for a + // readable, non-empty directory, this checks determines + // if the mountpoint option was ever supplied + if(utility_incomp_type::NO_UTILITY_MODE == utility_mode){ + if(mountpoint.empty()){ + S3FS_PRN_EXIT("missing MOUNTPOINT argument."); + show_usage(); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + } + + // check tmp dir permission + if(!FdManager::CheckTmpDirExist()){ + S3FS_PRN_EXIT("temporary directory doesn't exists."); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + // check cache dir permission + if(!FdManager::CheckCacheDirExist() || !FdManager::CheckCacheTopDir() || !CacheFileStat::CheckCacheFileStatTopDir()){ + S3FS_PRN_EXIT("could not allow cache directory permission, check permission of cache directories."); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + // set fake free disk space + if(-1 != fake_diskfree_size){ + FdManager::InitFakeUsedDiskSize(fake_diskfree_size); + } + + // Set default value of free_space_ratio to 10% + if(FdManager::GetEnsureFreeDiskSpace()==0){ + int ratio = 10; + off_t dfsize = FdManager::GetTotalDiskSpaceByRatio(ratio); + S3FS_PRN_INFO("Free space ratio default to %d %%, ensure the available disk space is greater than %.3f MB", ratio, static_cast(dfsize) / 1024 / 1024); + + if(dfsize < S3fsCurl::GetMultipartSize()){ + S3FS_PRN_WARN("specified size to ensure disk free space is smaller than multipart size, so set multipart size to it."); + dfsize = S3fsCurl::GetMultipartSize(); + } + FdManager::SetEnsureFreeDiskSpace(dfsize); + } + + // set user agent + S3fsCurl::InitUserAgent(); + + // There's room for more command line error checking + + // Check to see if the bucket name contains periods and https (SSL) is + // being used. This is a known limitation: + // https://docs.amazonwebservices.com/AmazonS3/latest/dev/ + // The Developers Guide suggests that either use HTTP of for us to write + // our own certificate verification logic. + // For now, this will be unsupported unless we get a request for it to + // be supported. In that case, we have a couple of options: + // - implement a command line option that bypasses the verify host + // but doesn't bypass verifying the certificate + // - write our own host verification (this might be complex) + // See issue #128strncasecmp + /* + if(1 == S3fsCurl::GetSslVerifyHostname()){ + found = S3fsCred::GetBucket().find_first_of('.'); + if(found != std::string::npos){ + found = s3host.find("https:"); + if(found != std::string::npos){ + S3FS_PRN_EXIT("Using https and a bucket name with periods is unsupported."); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + exit(EXIT_FAILURE); + } + } + } + */ + + if(utility_incomp_type::NO_UTILITY_MODE != utility_mode){ + int exitcode = s3fs_utility_processing(incomp_abort_time); + + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(exitcode); + } + + // Check multipart / copy api for mix multipart uploading + if(nomultipart || nocopyapi || norenameapi){ + FdEntity::SetNoMixMultipart(); + max_dirty_data = -1; + } + + // check free disk space + if(!FdManager::IsSafeDiskSpace(nullptr, S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){ + // clean cache dir and retry + S3FS_PRN_WARN("No enough disk space for s3fs, try to clean cache dir"); + FdManager::get()->CleanupCacheDir(); + + if(!FdManager::IsSafeDiskSpaceWithLog(nullptr, S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){ + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + } + + // set mp stat flag object + // + pHasMpStat = new MpStatFlag(); + + s3fs_oper.getattr = s3fs_getattr; // stat() + s3fs_oper.readlink = s3fs_readlink; + s3fs_oper.mknod = s3fs_mknod; + s3fs_oper.mkdir = s3fs_mkdir; + s3fs_oper.unlink = s3fs_unlink; + s3fs_oper.rmdir = s3fs_rmdir; + s3fs_oper.symlink = s3fs_symlink; + s3fs_oper.rename = s3fs_rename; + s3fs_oper.link = s3fs_link; + if(!nocopyapi){ + s3fs_oper.chmod = s3fs_chmod; + s3fs_oper.chown = s3fs_chown; + s3fs_oper.utimens = s3fs_utimens; + }else{ + s3fs_oper.chmod = s3fs_chmod_nocopy; + s3fs_oper.chown = s3fs_chown_nocopy; + s3fs_oper.utimens = s3fs_utimens_nocopy; + } + s3fs_oper.truncate = s3fs_truncate; + s3fs_oper.open = s3fs_open; + s3fs_oper.read = s3fs_read; + s3fs_oper.write = s3fs_write; + s3fs_oper.statfs = s3fs_statfs; + s3fs_oper.flush = s3fs_flush; + s3fs_oper.fsync = s3fs_fsync; + s3fs_oper.release = s3fs_release; + s3fs_oper.opendir = s3fs_opendir; + s3fs_oper.readdir = s3fs_readdir; // list + s3fs_oper.init = s3fs_init; + s3fs_oper.destroy = s3fs_destroy; + s3fs_oper.access = s3fs_access; + s3fs_oper.create = s3fs_create; + // extended attributes + if(is_use_xattr){ + s3fs_oper.setxattr = s3fs_setxattr; + s3fs_oper.getxattr = s3fs_getxattr; + s3fs_oper.listxattr = s3fs_listxattr; + s3fs_oper.removexattr = s3fs_removexattr; + } + s3fs_oper.flag_utime_omit_ok = true; + + if(use_newcache){ + HybridCache::HybridCacheConfig cfg; + HybridCache::GetHybridCacheConfig(newcache_conf, cfg); + accessor = std::make_shared(cfg); + } + + // now passing things off to fuse, fuse will finish evaluating the command line args + fuse_res = fuse_main(custom_args.argc, custom_args.argv, &s3fs_oper, nullptr); + if(fuse_res == 0){ + fuse_res = s3fs_init_deferred_exit_status; + } + fuse_opt_free_args(&custom_args); + + // Destroy curl + if(!S3fsCurl::DestroyS3fsCurl()){ + S3FS_PRN_WARN("Could not release curl library."); + } + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + delete pHasMpStat; + + // cleanup xml2 + xmlCleanupParser(); + S3FS_MALLOCTRIM(0); + + if(use_newcache){ + accessor.reset(); + } + + exit(fuse_res); +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs.h b/s3fs/s3fs.h new file mode 100644 index 0000000..29c84f4 --- /dev/null +++ b/s3fs/s3fs.h @@ -0,0 +1,92 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_S3FS_H_ +#define S3FS_S3FS_H_ + +#define FUSE_USE_VERSION 26 + +#include + +#define S3FS_FUSE_EXIT() \ + do{ \ + struct fuse_context* pcxt = fuse_get_context(); \ + if(pcxt){ \ + fuse_exit(pcxt->fuse); \ + } \ + }while(0) + +// [NOTE] +// s3fs use many small allocated chunk in heap area for stats +// cache and parsing xml, etc. The OS may decide that giving +// this little memory back to the kernel will cause too much +// overhead and delay the operation. +// Address of gratitude, this workaround quotes a document of +// libxml2.( http://xmlsoft.org/xmlmem.html ) +// +// When valgrind is used to test memory leak of s3fs, a large +// amount of chunk may be reported. You can check the memory +// release accurately by defining the S3FS_MALLOC_TRIM flag +// and building it. Also, when executing s3fs, you can define +// the MMAP_THRESHOLD environment variable and check more +// accurate memory leak.( see, man 3 free ) +// +#ifdef S3FS_MALLOC_TRIM +#ifdef HAVE_MALLOC_TRIM +#include +#define S3FS_MALLOCTRIM(pad) malloc_trim(pad) +#else // HAVE_MALLOC_TRIM +#define S3FS_MALLOCTRIM(pad) +#endif // HAVE_MALLOC_TRIM +#else // S3FS_MALLOC_TRIM +#define S3FS_MALLOCTRIM(pad) +#endif // S3FS_MALLOC_TRIM + +#define S3FS_XMLFREEDOC(doc) \ + do{ \ + xmlFreeDoc(doc); \ + S3FS_MALLOCTRIM(0); \ + }while(0) +#define S3FS_XMLFREE(ptr) \ + do{ \ + xmlFree(ptr); \ + S3FS_MALLOCTRIM(0); \ + }while(0) +#define S3FS_XMLXPATHFREECONTEXT(ctx) \ + do{ \ + xmlXPathFreeContext(ctx); \ + S3FS_MALLOCTRIM(0); \ + }while(0) +#define S3FS_XMLXPATHFREEOBJECT(obj) \ + do{ \ + xmlXPathFreeObject(obj); \ + S3FS_MALLOCTRIM(0); \ + }while(0) + +#endif // S3FS_S3FS_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_auth.h b/s3fs/s3fs_auth.h new file mode 100644 index 0000000..6b373fc --- /dev/null +++ b/s3fs/s3fs_auth.h @@ -0,0 +1,66 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_AUTH_H_ +#define S3FS_AUTH_H_ + +#include +#include +#include +#include + +typedef std::array md5_t; +typedef std::array sha256_t; + +//------------------------------------------------------------------- +// Utility functions for Authentication +//------------------------------------------------------------------- +// +// in common_auth.cpp +// +std::string s3fs_get_content_md5(int fd); +std::string s3fs_sha256_hex_fd(int fd, off_t start, off_t size); +std::string s3fs_get_content_md5(off_t fsize, char* buf); + +// +// in xxxxxx_auth.cpp +// +const char* s3fs_crypt_lib_name(); +bool s3fs_init_global_ssl(); +bool s3fs_destroy_global_ssl(); +bool s3fs_init_crypt_mutex(); +bool s3fs_destroy_crypt_mutex(); +std::unique_ptr s3fs_HMAC(const void* key, size_t keylen, const unsigned char* data, size_t datalen, unsigned int* digestlen); +std::unique_ptr s3fs_HMAC256(const void* key, size_t keylen, const unsigned char* data, size_t datalen, unsigned int* digestlen); +bool s3fs_md5(const unsigned char* data, size_t datalen, md5_t* result); +bool s3fs_md5_fd(int fd, off_t start, off_t size, md5_t* result); +bool s3fs_sha256(const unsigned char* data, size_t datalen, sha256_t* digest); +bool s3fs_sha256_fd(int fd, off_t start, off_t size, sha256_t* result); + +#endif // S3FS_AUTH_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_cred.cpp b/s3fs/s3fs_cred.cpp new file mode 100644 index 0000000..bce0c95 --- /dev/null +++ b/s3fs/s3fs_cred.cpp @@ -0,0 +1,1628 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "s3fs_cred.h" +#include "s3fs_help.h" +#include "s3fs_logger.h" +#include "curl.h" +#include "string_util.h" +#include "metaheader.h" + +//------------------------------------------------------------------- +// Symbols +//------------------------------------------------------------------- +static constexpr char DEFAULT_AWS_PROFILE_NAME[] = "default"; + +//------------------------------------------------------------------- +// External Credential dummy function +//------------------------------------------------------------------- +// [NOTE] +// This function expects the following values: +// +// detail=false ex. "Custom AWS Credential Library - v1.0.0" +// detail=true ex. "Custom AWS Credential Library - v1.0.0 +// s3fs-fuse credential I/F library for S3 compatible strage X. +// Copyright(C) 2022 Foo" +// +const char* VersionS3fsCredential(bool detail) +{ + static constexpr char version[] = "built-in"; + static constexpr char detail_version[] = + "s3fs-fuse built-in Credential I/F Function\n" + "Copyright(C) 2007 s3fs-fuse\n"; + + if(detail){ + return detail_version; + }else{ + return version; + } +} + +bool InitS3fsCredential(const char* popts, char** pperrstr) +{ + if(popts && 0 < strlen(popts)){ + S3FS_PRN_WARN("The external credential library does not have InitS3fsCredential function, but credlib_opts value is not empty(%s)", popts); + } + if(pperrstr){ + *pperrstr = strdup("The external credential library does not have InitS3fsCredential function, so built-in function was called."); + }else{ + S3FS_PRN_INFO("The external credential library does not have InitS3fsCredential function, so built-in function was called."); + } + return true; +} + +bool FreeS3fsCredential(char** pperrstr) +{ + if(pperrstr){ + *pperrstr = strdup("The external credential library does not have FreeS3fsCredential function, so built-in function was called."); + }else{ + S3FS_PRN_INFO("The external credential library does not have FreeS3fsCredential function, so built-in function was called."); + } + return true; +} + +bool UpdateS3fsCredential(char** ppaccess_key_id, char** ppserect_access_key, char** ppaccess_token, long long* ptoken_expire, char** pperrstr) +{ + S3FS_PRN_INFO("Parameters : ppaccess_key_id=%p, ppserect_access_key=%p, ppaccess_token=%p, ptoken_expire=%p", ppaccess_key_id, ppserect_access_key, ppaccess_token, ptoken_expire); + + if(pperrstr){ + *pperrstr = strdup("Check why built-in function was called, the external credential library must have UpdateS3fsCredential function."); + }else{ + S3FS_PRN_CRIT("Check why built-in function was called, the external credential library must have UpdateS3fsCredential function."); + } + + if(ppaccess_key_id){ + *ppaccess_key_id = nullptr; + } + if(ppserect_access_key){ + *ppserect_access_key = nullptr; + } + if(ppaccess_token){ + *ppaccess_token = nullptr; + } + return false; // always false +} + +//------------------------------------------------------------------- +// Class Variables +//------------------------------------------------------------------- +constexpr char S3fsCred::ALLBUCKET_FIELDS_TYPE[]; +constexpr char S3fsCred::KEYVAL_FIELDS_TYPE[]; +constexpr char S3fsCred::AWS_ACCESSKEYID[]; +constexpr char S3fsCred::AWS_SECRETKEY[]; + +constexpr char S3fsCred::ECS_IAM_ENV_VAR[]; +constexpr char S3fsCred::IAMCRED_ACCESSKEYID[]; +constexpr char S3fsCred::IAMCRED_SECRETACCESSKEY[]; +constexpr char S3fsCred::IAMCRED_ROLEARN[]; + +constexpr char S3fsCred::IAMv2_token_url[]; +constexpr char S3fsCred::IAMv2_token_ttl_hdr[]; +constexpr char S3fsCred::IAMv2_token_hdr[]; + +std::string S3fsCred::bucket_name; + +//------------------------------------------------------------------- +// Class Methods +//------------------------------------------------------------------- +bool S3fsCred::SetBucket(const char* bucket) +{ + if(!bucket || strlen(bucket) == 0){ + return false; + } + S3fsCred::bucket_name = bucket; + return true; +} + +const std::string& S3fsCred::GetBucket() +{ + return S3fsCred::bucket_name; +} + +bool S3fsCred::ParseIAMRoleFromMetaDataResponse(const char* response, std::string& rolename) +{ + if(!response){ + return false; + } + // [NOTE] + // expected following strings. + // + // myrolename + // + std::istringstream ssrole(response); + std::string oneline; + if (getline(ssrole, oneline, '\n')){ + rolename = oneline; + return !rolename.empty(); + } + return false; +} + +//------------------------------------------------------------------- +// Methods : Constructor / Destructor +//------------------------------------------------------------------- +S3fsCred::S3fsCred() : + is_lock_init(false), + aws_profile(DEFAULT_AWS_PROFILE_NAME), + load_iamrole(false), + AWSAccessTokenExpire(0), + is_ecs(false), + is_use_session_token(false), + is_ibm_iam_auth(false), + IAM_cred_url("http://169.254.169.254/latest/meta-data/iam/security-credentials/"), + IAM_api_version(2), + IAM_field_count(4), + IAM_token_field("Token"), + IAM_expiry_field("Expiration"), + set_builtin_cred_opts(false), + hExtCredLib(nullptr), + pFuncCredVersion(VersionS3fsCredential), + pFuncCredInit(InitS3fsCredential), + pFuncCredFree(FreeS3fsCredential), + pFuncCredUpdate(UpdateS3fsCredential) +{ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + int result; + if(0 != (result = pthread_mutex_init(&token_lock, &attr))){ + S3FS_PRN_CRIT("failed to init token_lock: %d", result); + abort(); + } + is_lock_init = true; +} + +S3fsCred::~S3fsCred() +{ + UnloadExtCredLib(); + + if(is_lock_init){ + int result; + if(0 != (result = pthread_mutex_destroy(&token_lock))){ + S3FS_PRN_CRIT("failed to destroy token_lock: %d", result); + abort(); + } + is_lock_init = false; + } +} + +//------------------------------------------------------------------- +// Methods : Access member variables +//------------------------------------------------------------------- +bool S3fsCred::SetS3fsPasswdFile(const char* file) +{ + if(!file || strlen(file) == 0){ + return false; + } + passwd_file = file; + + return true; +} + +bool S3fsCred::IsSetPasswdFile() const +{ + return !passwd_file.empty(); +} + +bool S3fsCred::SetAwsProfileName(const char* name) +{ + if(!name || strlen(name) == 0){ + return false; + } + aws_profile = name; + + return true; +} + +bool S3fsCred::SetIAMRoleMetadataType(bool flag) +{ + bool old = load_iamrole; + load_iamrole = flag; + return old; +} + +bool S3fsCred::SetAccessKey(const char* AccessKeyId, const char* SecretAccessKey, AutoLock::Type type) +{ + AutoLock auto_lock(&token_lock, type); + + if((!is_ibm_iam_auth && (!AccessKeyId || '\0' == AccessKeyId[0])) || !SecretAccessKey || '\0' == SecretAccessKey[0]){ + return false; + } + AWSAccessKeyId = AccessKeyId; + AWSSecretAccessKey = SecretAccessKey; + + return true; +} + +bool S3fsCred::SetAccessKeyWithSessionToken(const char* AccessKeyId, const char* SecretAccessKey, const char * SessionToken, AutoLock::Type type) +{ + AutoLock auto_lock(&token_lock, type); + + bool access_key_is_empty = !AccessKeyId || '\0' == AccessKeyId[0]; + bool secret_access_key_is_empty = !SecretAccessKey || '\0' == SecretAccessKey[0]; + bool session_token_is_empty = !SessionToken || '\0' == SessionToken[0]; + + if((!is_ibm_iam_auth && access_key_is_empty) || secret_access_key_is_empty || session_token_is_empty){ + return false; + } + AWSAccessKeyId = AccessKeyId; + AWSSecretAccessKey = SecretAccessKey; + AWSAccessToken = SessionToken; + is_use_session_token= true; + + return true; +} + +bool S3fsCred::IsSetAccessKeys(AutoLock::Type type) const +{ + AutoLock auto_lock(&token_lock, type); + + return IsSetIAMRole(AutoLock::ALREADY_LOCKED) || ((!AWSAccessKeyId.empty() || is_ibm_iam_auth) && !AWSSecretAccessKey.empty()); +} + +bool S3fsCred::SetIsECS(bool flag) +{ + bool old = is_ecs; + is_ecs = flag; + return old; +} + +bool S3fsCred::SetIsUseSessionToken(bool flag) +{ + bool old = is_use_session_token; + is_use_session_token = flag; + return old; +} + +bool S3fsCred::SetIsIBMIAMAuth(bool flag) +{ + bool old = is_ibm_iam_auth; + is_ibm_iam_auth = flag; + return old; +} + +bool S3fsCred::SetIAMRole(const char* role, AutoLock::Type type) +{ + AutoLock auto_lock(&token_lock, type); + + IAM_role = role ? role : ""; + return true; +} + +std::string S3fsCred::GetIAMRole(AutoLock::Type type) const +{ + AutoLock auto_lock(&token_lock, type); + + return IAM_role; +} + +bool S3fsCred::IsSetIAMRole(AutoLock::Type type) const +{ + AutoLock auto_lock(&token_lock, type); + + return !IAM_role.empty(); +} + +size_t S3fsCred::SetIAMFieldCount(size_t field_count) +{ + size_t old = IAM_field_count; + IAM_field_count = field_count; + return old; +} + +std::string S3fsCred::SetIAMCredentialsURL(const char* url) +{ + std::string old = IAM_cred_url; + IAM_cred_url = url ? url : ""; + return old; +} + +std::string S3fsCred::SetIAMTokenField(const char* token_field) +{ + std::string old = IAM_token_field; + IAM_token_field = token_field ? token_field : ""; + return old; +} + +std::string S3fsCred::SetIAMExpiryField(const char* expiry_field) +{ + std::string old = IAM_expiry_field; + IAM_expiry_field = expiry_field ? expiry_field : ""; + return old; +} + +bool S3fsCred::GetIAMCredentialsURL(std::string& url, bool check_iam_role, AutoLock::Type type) +{ + // check + if(check_iam_role && !is_ecs && !IsIBMIAMAuth()){ + if(!IsSetIAMRole(type)) { + S3FS_PRN_ERR("IAM role name is empty."); + return false; + } + S3FS_PRN_INFO3("[IAM role=%s]", GetIAMRole(type).c_str()); + } + + if(is_ecs){ + const char *env = std::getenv(S3fsCred::ECS_IAM_ENV_VAR); + if(env == nullptr){ + S3FS_PRN_ERR("%s is not set.", S3fsCred::ECS_IAM_ENV_VAR); + return false; + } + url = IAM_cred_url + env; + + }else if(IsIBMIAMAuth()){ + url = IAM_cred_url; + + }else{ + // [NOTE] + // To avoid deadlocking, do not manipulate the S3fsCred object + // in the S3fsCurl::GetIAMv2ApiToken method (when retrying). + // + AutoLock auto_lock(&token_lock, type); // Lock for IAM_api_version, IAMv2_api_token + + if(GetIMDSVersion(AutoLock::ALREADY_LOCKED) > 1){ + S3fsCurl s3fscurl; + std::string token; + int result = s3fscurl.GetIAMv2ApiToken(S3fsCred::IAMv2_token_url, S3fsCred::IAMv2_token_ttl, S3fsCred::IAMv2_token_ttl_hdr, token); + if(-ENOENT == result){ + // If we get a 404 back when requesting the token service, + // then it's highly likely we're running in an environment + // that doesn't support the AWS IMDSv2 API, so we'll skip + // the token retrieval in the future. + SetIMDSVersion(1, AutoLock::ALREADY_LOCKED); + + }else if(result != 0){ + // If we get an unexpected error when retrieving the API + // token, log it but continue. Requirement for including + // an API token with the metadata request may or may not + // be required, so we should not abort here. + S3FS_PRN_ERR("AWS IMDSv2 token retrieval failed: %d", result); + + }else{ + // Set token + if(!SetIAMv2APIToken(token, AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("Error storing IMDSv2 API token(%s).", token.c_str()); + } + } + } + if(check_iam_role){ + url = IAM_cred_url + GetIAMRole(AutoLock::ALREADY_LOCKED); + }else{ + url = IAM_cred_url; + } + } + return true; +} + +int S3fsCred::SetIMDSVersion(int version, AutoLock::Type type) +{ + AutoLock auto_lock(&token_lock, type); + + int old = IAM_api_version; + IAM_api_version = version; + return old; +} + +int S3fsCred::GetIMDSVersion(AutoLock::Type type) const +{ + AutoLock auto_lock(&token_lock, type); + + return IAM_api_version; +} + +bool S3fsCred::SetIAMv2APIToken(const std::string& token, AutoLock::Type type) +{ + S3FS_PRN_INFO3("Setting AWS IMDSv2 API token to %s", token.c_str()); + + AutoLock auto_lock(&token_lock, type); + + if(token.empty()){ + return false; + } + IAMv2_api_token = token; + return true; +} + +std::string S3fsCred::GetIAMv2APIToken(AutoLock::Type type) const +{ + AutoLock auto_lock(&token_lock, type); + + return IAMv2_api_token; +} + +// [NOTE] +// Currently, token_lock is always locked before calling this method, +// and this method calls the S3fsCurl::GetIAMCredentials method. +// Currently, when the request fails and retries in the process of +// S3fsCurl::GetIAMCredentials, does not use the S3fsCred object in +// retry logic. +// Be careful not to deadlock whenever you change this logic. +// +bool S3fsCred::LoadIAMCredentials(AutoLock::Type type) +{ + // url(check iam role) + std::string url; + + AutoLock auto_lock(&token_lock, type); + + if(!GetIAMCredentialsURL(url, true, AutoLock::ALREADY_LOCKED)){ + return false; + } + + const char* iam_v2_token = nullptr; + std::string str_iam_v2_token; + if(GetIMDSVersion(AutoLock::ALREADY_LOCKED) > 1){ + str_iam_v2_token = GetIAMv2APIToken(AutoLock::ALREADY_LOCKED); + iam_v2_token = str_iam_v2_token.c_str(); + } + + const char* ibm_secret_access_key = nullptr; + std::string str_ibm_secret_access_key; + if(IsIBMIAMAuth()){ + str_ibm_secret_access_key = AWSSecretAccessKey; + ibm_secret_access_key = str_ibm_secret_access_key.c_str(); + } + + S3fsCurl s3fscurl; + std::string response; + if(!s3fscurl.GetIAMCredentials(url.c_str(), iam_v2_token, ibm_secret_access_key, response)){ + return false; + } + + if(!SetIAMCredentials(response.c_str(), AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("Something error occurred, could not set IAM role name."); + return false; + } + return true; +} + +// +// load IAM role name from http://169.254.169.254/latest/meta-data/iam/security-credentials +// +bool S3fsCred::LoadIAMRoleFromMetaData() +{ + AutoLock auto_lock(&token_lock); + + if(load_iamrole){ + // url(not check iam role) + std::string url; + + if(!GetIAMCredentialsURL(url, false, AutoLock::ALREADY_LOCKED)){ + return false; + } + + const char* iam_v2_token = nullptr; + std::string str_iam_v2_token; + if(GetIMDSVersion(AutoLock::ALREADY_LOCKED) > 1){ + str_iam_v2_token = GetIAMv2APIToken(AutoLock::ALREADY_LOCKED); + iam_v2_token = str_iam_v2_token.c_str(); + } + + S3fsCurl s3fscurl; + std::string token; + if(!s3fscurl.GetIAMRoleFromMetaData(url.c_str(), iam_v2_token, token)){ + return false; + } + + if(!SetIAMRoleFromMetaData(token.c_str(), AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("Something error occurred, could not set IAM role name."); + return false; + } + S3FS_PRN_INFO("loaded IAM role name = %s", GetIAMRole(AutoLock::ALREADY_LOCKED).c_str()); + } + return true; +} + +bool S3fsCred::SetIAMCredentials(const char* response, AutoLock::Type type) +{ + S3FS_PRN_INFO3("IAM credential response = \"%s\"", response); + + iamcredmap_t keyval; + + if(!ParseIAMCredentialResponse(response, keyval)){ + return false; + } + + if(IAM_field_count != keyval.size()){ + return false; + } + + AutoLock auto_lock(&token_lock, type); + + AWSAccessToken = keyval[IAM_token_field]; + + if(is_ibm_iam_auth){ + off_t tmp_expire = 0; + if(!s3fs_strtoofft(&tmp_expire, keyval[IAM_expiry_field].c_str(), /*base=*/ 10)){ + return false; + } + AWSAccessTokenExpire = static_cast(tmp_expire); + }else{ + AWSAccessKeyId = keyval[S3fsCred::IAMCRED_ACCESSKEYID]; + AWSSecretAccessKey = keyval[S3fsCred::IAMCRED_SECRETACCESSKEY]; + AWSAccessTokenExpire = cvtIAMExpireStringToTime(keyval[IAM_expiry_field].c_str()); + } + return true; +} + +bool S3fsCred::SetIAMRoleFromMetaData(const char* response, AutoLock::Type type) +{ + S3FS_PRN_INFO3("IAM role name response = \"%s\"", response ? response : "(null)"); + + std::string rolename; + if(!S3fsCred::ParseIAMRoleFromMetaDataResponse(response, rolename)){ + return false; + } + + SetIAMRole(rolename.c_str(), type); + return true; +} + +//------------------------------------------------------------------- +// Methods : for Credentials +//------------------------------------------------------------------- +// +// Check passwd file readable +// +bool S3fsCred::IsReadableS3fsPasswdFile() const +{ + if(passwd_file.empty()){ + return false; + } + + std::ifstream PF(passwd_file.c_str()); + if(!PF.good()){ + return false; + } + PF.close(); + + return true; +} + +// +// S3fsCred::CheckS3fsPasswdFilePerms +// +// expect that global passwd_file variable contains +// a non-empty value and is readable by the current user +// +// Check for too permissive access to the file +// help save users from themselves via a security hole +// +// only two options: return or error out +// +bool S3fsCred::CheckS3fsPasswdFilePerms() +{ + struct stat info; + + // let's get the file info + if(stat(passwd_file.c_str(), &info) != 0){ + S3FS_PRN_EXIT("unexpected error from stat(%s): %s", passwd_file.c_str(), strerror(errno)); + return false; + } + + // Check readable + if(!IsReadableS3fsPasswdFile()){ + S3FS_PRN_EXIT("S3fs passwd file \"%s\" is not readable.", passwd_file.c_str()); + return false; + } + + // return error if any file has others permissions + if( (info.st_mode & S_IROTH) || + (info.st_mode & S_IWOTH) || + (info.st_mode & S_IXOTH)) { + S3FS_PRN_EXIT("credentials file %s should not have others permissions.", passwd_file.c_str()); + return false; + } + + // Any local file should not have any group permissions + // /etc/passwd-s3fs can have group permissions + if(passwd_file != "/etc/passwd-s3fs"){ + if( (info.st_mode & S_IRGRP) || + (info.st_mode & S_IWGRP) || + (info.st_mode & S_IXGRP)) { + S3FS_PRN_EXIT("credentials file %s should not have group permissions.", passwd_file.c_str()); + return false; + } + }else{ + // "/etc/passwd-s3fs" does not allow group write. + if((info.st_mode & S_IWGRP)){ + S3FS_PRN_EXIT("credentials file %s should not have group writable permissions.", passwd_file.c_str()); + return false; + } + } + if((info.st_mode & S_IXUSR) || (info.st_mode & S_IXGRP)){ + S3FS_PRN_EXIT("credentials file %s should not have executable permissions.", passwd_file.c_str()); + return false; + } + return true; +} + +// +// Read and Parse passwd file +// +// The line of the password file is one of the following formats: +// (1) "accesskey:secretkey" : AWS format for default(all) access key/secret key +// (2) "bucket:accesskey:secretkey" : AWS format for bucket's access key/secret key +// (3) "key=value" : Content-dependent KeyValue contents +// +// This function sets result into bucketkvmap_t, it bucket name and key&value mapping. +// If bucket name is empty(1 or 3 format), bucket name for mapping is set "\t" or "". +// +// Return: true - Succeed parsing +// false - Should shutdown immediately +// +bool S3fsCred::ParseS3fsPasswdFile(bucketkvmap_t& resmap) +{ + std::string line; + size_t first_pos; + readline_t linelist; + readline_t::iterator iter; + + // open passwd file + std::ifstream PF(passwd_file.c_str()); + if(!PF.good()){ + S3FS_PRN_EXIT("could not open passwd file : %s", passwd_file.c_str()); + return false; + } + + // read each line + while(getline(PF, line)){ + line = trim(line); + if(line.empty()){ + continue; + } + if('#' == line[0]){ + continue; + } + if(std::string::npos != line.find_first_of(" \t")){ + S3FS_PRN_EXIT("invalid line in passwd file, found whitespace character."); + return false; + } + if('[' == line[0]){ + S3FS_PRN_EXIT("invalid line in passwd file, found a bracket \"[\" character."); + return false; + } + linelist.push_back(line); + } + + // read '=' type + kvmap_t kv; + for(iter = linelist.begin(); iter != linelist.end(); ++iter){ + first_pos = iter->find_first_of('='); + if(first_pos == std::string::npos){ + continue; + } + // formatted by "key=val" + std::string key = trim(iter->substr(0, first_pos)); + std::string val = trim(iter->substr(first_pos + 1, std::string::npos)); + if(key.empty()){ + continue; + } + if(kv.end() != kv.find(key)){ + S3FS_PRN_WARN("same key name(%s) found in passwd file, skip this.", key.c_str()); + continue; + } + kv[key] = val; + } + // set special key name + resmap[S3fsCred::KEYVAL_FIELDS_TYPE] = kv; + + // read ':' type + for(iter = linelist.begin(); iter != linelist.end(); ++iter){ + first_pos = iter->find_first_of(':'); + size_t last_pos = iter->find_last_of(':'); + if(first_pos == std::string::npos){ + continue; + } + std::string bucketname; + std::string accesskey; + std::string secret; + if(first_pos != last_pos){ + // formatted by "bucket:accesskey:secretkey" + bucketname= trim(iter->substr(0, first_pos)); + accesskey = trim(iter->substr(first_pos + 1, last_pos - first_pos - 1)); + secret = trim(iter->substr(last_pos + 1, std::string::npos)); + }else{ + // formatted by "accesskey:secretkey" + bucketname= S3fsCred::ALLBUCKET_FIELDS_TYPE; + accesskey = trim(iter->substr(0, first_pos)); + secret = trim(iter->substr(first_pos + 1, std::string::npos)); + } + if(resmap.end() != resmap.find(bucketname)){ + S3FS_PRN_EXIT("there are multiple entries for the same bucket(%s) in the passwd file.", (bucketname.empty() ? "default" : bucketname.c_str())); + return false; + } + kv.clear(); + kv[S3fsCred::AWS_ACCESSKEYID] = accesskey; + kv[S3fsCred::AWS_SECRETKEY] = secret; + resmap[bucketname] = kv; + } + return true; +} + +// +// ReadS3fsPasswdFile +// +// Support for per bucket credentials +// +// Format for the credentials file: +// [bucket:]AccessKeyId:SecretAccessKey +// +// Lines beginning with # are considered comments +// and ignored, as are empty lines +// +// Uncommented lines without the ":" character are flagged as +// an error, so are lines with spaces or tabs +// +// only one default key pair is allowed, but not required +// +bool S3fsCred::ReadS3fsPasswdFile(AutoLock::Type type) +{ + bucketkvmap_t bucketmap; + kvmap_t keyval; + + // if you got here, the password file + // exists and is readable by the + // current user, check for permissions + if(!CheckS3fsPasswdFilePerms()){ + return false; + } + + // + // parse passwd file + // + if(!ParseS3fsPasswdFile(bucketmap)){ + return false; + } + + // + // check key=value type format. + // + bucketkvmap_t::iterator it = bucketmap.find(S3fsCred::KEYVAL_FIELDS_TYPE); + if(bucketmap.end() != it){ + // aws format + std::string access_key_id; + std::string secret_access_key; + int result = CheckS3fsCredentialAwsFormat(it->second, access_key_id, secret_access_key); + if(-1 == result){ + return false; + }else if(1 == result){ + // found ascess(secret) keys + if(!SetAccessKey(access_key_id.c_str(), secret_access_key.c_str(), type)){ + S3FS_PRN_EXIT("failed to set access key/secret key."); + return false; + } + return true; + } + } + + std::string bucket_key = S3fsCred::ALLBUCKET_FIELDS_TYPE; + if(!S3fsCred::bucket_name.empty() && bucketmap.end() != bucketmap.find(S3fsCred::bucket_name)){ + bucket_key = S3fsCred::bucket_name; + } + + it = bucketmap.find(bucket_key); + if(bucketmap.end() == it){ + S3FS_PRN_EXIT("Not found access key/secret key in passwd file."); + return false; + } + keyval = it->second; + kvmap_t::iterator aws_accesskeyid_it = keyval.find(S3fsCred::AWS_ACCESSKEYID); + kvmap_t::iterator aws_secretkey_it = keyval.find(S3fsCred::AWS_SECRETKEY); + if(keyval.end() == aws_accesskeyid_it || keyval.end() == aws_secretkey_it){ + S3FS_PRN_EXIT("Not found access key/secret key in passwd file."); + return false; + } + + if(!SetAccessKey(aws_accesskeyid_it->second.c_str(), aws_secretkey_it->second.c_str(), type)){ + S3FS_PRN_EXIT("failed to set internal data for access key/secret key from passwd file."); + return false; + } + return true; +} + +// +// Return: 1 - OK(could read and set accesskey etc.) +// 0 - NG(could not read) +// -1 - Should shutdown immediately +// +int S3fsCred::CheckS3fsCredentialAwsFormat(const kvmap_t& kvmap, std::string& access_key_id, std::string& secret_access_key) +{ + std::string str1(S3fsCred::AWS_ACCESSKEYID); + std::string str2(S3fsCred::AWS_SECRETKEY); + + if(kvmap.empty()){ + return 0; + } + kvmap_t::const_iterator str1_it = kvmap.find(str1); + kvmap_t::const_iterator str2_it = kvmap.find(str2); + if(kvmap.end() == str1_it && kvmap.end() == str2_it){ + return 0; + } + if(kvmap.end() == str1_it || kvmap.end() == str2_it){ + S3FS_PRN_EXIT("AWSAccesskey or AWSSecretkey is not specified."); + return -1; + } + access_key_id = str1_it->second; + secret_access_key = str2_it->second; + + return 1; +} + +// +// Read Aws Credential File +// +bool S3fsCred::ReadAwsCredentialFile(const std::string &filename, AutoLock::Type type) +{ + // open passwd file + std::ifstream PF(filename.c_str()); + if(!PF.good()){ + return false; + } + + std::string profile; + std::string accesskey; + std::string secret; + std::string session_token; + + // read each line + std::string line; + while(getline(PF, line)){ + line = trim(line); + if(line.empty()){ + continue; + } + if('#' == line[0]){ + continue; + } + + if(line.size() > 2 && line[0] == '[' && line[line.size() - 1] == ']') { + if(profile == aws_profile){ + break; + } + profile = line.substr(1, line.size() - 2); + accesskey.clear(); + secret.clear(); + session_token.clear(); + } + + size_t pos = line.find_first_of('='); + if(pos == std::string::npos){ + continue; + } + std::string key = trim(line.substr(0, pos)); + std::string value = trim(line.substr(pos + 1, std::string::npos)); + if(key == "aws_access_key_id"){ + accesskey = value; + }else if(key == "aws_secret_access_key"){ + secret = value; + }else if(key == "aws_session_token"){ + session_token = value; + } + } + + if(profile != aws_profile){ + return false; + } + if(session_token.empty()){ + if(is_use_session_token){ + S3FS_PRN_EXIT("AWS session token was expected but wasn't provided in aws/credentials file for profile: %s.", aws_profile.c_str()); + return false; + } + if(!SetAccessKey(accesskey.c_str(), secret.c_str(), type)){ + S3FS_PRN_EXIT("failed to set internal data for access key/secret key from aws credential file."); + return false; + } + }else{ + if(!SetAccessKeyWithSessionToken(accesskey.c_str(), secret.c_str(), session_token.c_str(), type)){ + S3FS_PRN_EXIT("session token is invalid."); + return false; + } + } + return true; +} + +// +// InitialS3fsCredentials +// +// called only when were are not mounting a +// public bucket +// +// Here is the order precedence for getting the +// keys: +// +// 1 - from the command line (security risk) +// 2 - from a password file specified on the command line +// 3 - from environment variables +// 3a - from the AWS_CREDENTIAL_FILE environment variable +// 3b - from ${HOME}/.aws/credentials +// 4 - from the users ~/.passwd-s3fs +// 5 - from /etc/passwd-s3fs +// +bool S3fsCred::InitialS3fsCredentials() +{ + // should be redundant + if(S3fsCurl::IsPublicBucket()){ + return true; + } + + // access key loading is deferred + if(load_iamrole || IsSetExtCredLib() || is_ecs){ + return true; + } + + // 1 - keys specified on the command line + if(IsSetAccessKeys(AutoLock::NONE)){ + return true; + } + + // 2 - was specified on the command line + if(IsSetPasswdFile()){ + if(!ReadS3fsPasswdFile(AutoLock::NONE)){ + return false; + } + return true; + } + + // 3 - environment variables + const char* AWSACCESSKEYID = getenv("AWS_ACCESS_KEY_ID") ? getenv("AWS_ACCESS_KEY_ID") : getenv("AWSACCESSKEYID"); + const char* AWSSECRETACCESSKEY = getenv("AWS_SECRET_ACCESS_KEY") ? getenv("AWS_SECRET_ACCESS_KEY") : getenv("AWSSECRETACCESSKEY"); + const char* AWSSESSIONTOKEN = getenv("AWS_SESSION_TOKEN") ? getenv("AWS_SESSION_TOKEN") : getenv("AWSSESSIONTOKEN"); + + if(AWSACCESSKEYID != nullptr || AWSSECRETACCESSKEY != nullptr){ + if( (AWSACCESSKEYID == nullptr && AWSSECRETACCESSKEY != nullptr) || + (AWSACCESSKEYID != nullptr && AWSSECRETACCESSKEY == nullptr) ){ + S3FS_PRN_EXIT("both environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY must be set together."); + return false; + } + S3FS_PRN_INFO2("access key from env variables"); + if(AWSSESSIONTOKEN != nullptr){ + S3FS_PRN_INFO2("session token is available"); + if(!SetAccessKeyWithSessionToken(AWSACCESSKEYID, AWSSECRETACCESSKEY, AWSSESSIONTOKEN, AutoLock::NONE)){ + S3FS_PRN_EXIT("session token is invalid."); + return false; + } + }else{ + S3FS_PRN_INFO2("session token is not available"); + if(is_use_session_token){ + S3FS_PRN_EXIT("environment variable AWS_SESSION_TOKEN is expected to be set."); + return false; + } + } + if(!SetAccessKey(AWSACCESSKEYID, AWSSECRETACCESSKEY, AutoLock::NONE)){ + S3FS_PRN_EXIT("if one access key is specified, both keys need to be specified."); + return false; + } + return true; + } + + // 3a - from the AWS_CREDENTIAL_FILE environment variable + char* AWS_CREDENTIAL_FILE = getenv("AWS_CREDENTIAL_FILE"); + if(AWS_CREDENTIAL_FILE != nullptr){ + passwd_file = AWS_CREDENTIAL_FILE; + if(IsSetPasswdFile()){ + if(!IsReadableS3fsPasswdFile()){ + S3FS_PRN_EXIT("AWS_CREDENTIAL_FILE: \"%s\" is not readable.", passwd_file.c_str()); + return false; + } + if(!ReadS3fsPasswdFile(AutoLock::NONE)){ + return false; + } + return true; + } + } + + // 3b - check ${HOME}/.aws/credentials + std::string aws_credentials = std::string(getpwuid(getuid())->pw_dir) + "/.aws/credentials"; + if(ReadAwsCredentialFile(aws_credentials, AutoLock::NONE)){ + return true; + }else if(aws_profile != DEFAULT_AWS_PROFILE_NAME){ + S3FS_PRN_EXIT("Could not find profile: %s in file: %s", aws_profile.c_str(), aws_credentials.c_str()); + return false; + } + + // 4 - from the default location in the users home directory + char* HOME = getenv("HOME"); + if(HOME != nullptr){ + passwd_file = HOME; + passwd_file += "/.passwd-s3fs"; + if(IsReadableS3fsPasswdFile()){ + if(!ReadS3fsPasswdFile(AutoLock::NONE)){ + return false; + } + + // It is possible that the user's file was there but + // contained no key pairs i.e. commented out + // in that case, go look in the final location + if(IsSetAccessKeys(AutoLock::NONE)){ + return true; + } + } + } + + // 5 - from the system default location + passwd_file = "/etc/passwd-s3fs"; + if(IsReadableS3fsPasswdFile()){ + if(!ReadS3fsPasswdFile(AutoLock::NONE)){ + return false; + } + return true; + } + + S3FS_PRN_EXIT("could not determine how to establish security credentials."); + return false; +} + +//------------------------------------------------------------------- +// Methods : for IAM +//------------------------------------------------------------------- +bool S3fsCred::ParseIAMCredentialResponse(const char* response, iamcredmap_t& keyval) +{ + if(!response){ + return false; + } + std::istringstream sscred(response); + std::string oneline; + keyval.clear(); + while(getline(sscred, oneline, ',')){ + std::string::size_type pos; + std::string key; + std::string val; + if(std::string::npos != (pos = oneline.find(S3fsCred::IAMCRED_ACCESSKEYID))){ + key = S3fsCred::IAMCRED_ACCESSKEYID; + }else if(std::string::npos != (pos = oneline.find(S3fsCred::IAMCRED_SECRETACCESSKEY))){ + key = S3fsCred::IAMCRED_SECRETACCESSKEY; + }else if(std::string::npos != (pos = oneline.find(IAM_token_field))){ + key = IAM_token_field; + }else if(std::string::npos != (pos = oneline.find(IAM_expiry_field))){ + key = IAM_expiry_field; + }else if(std::string::npos != (pos = oneline.find(S3fsCred::IAMCRED_ROLEARN))){ + key = S3fsCred::IAMCRED_ROLEARN; + }else{ + continue; + } + if(std::string::npos == (pos = oneline.find(':', pos + key.length()))){ + continue; + } + + if(is_ibm_iam_auth && key == IAM_expiry_field){ + // parse integer value + if(std::string::npos == (pos = oneline.find_first_of("0123456789", pos))){ + continue; + } + oneline.erase(0, pos); + if(std::string::npos == (pos = oneline.find_last_of("0123456789"))){ + continue; + } + val = oneline.substr(0, pos+1); + }else{ + // parse std::string value (starts and ends with quotes) + if(std::string::npos == (pos = oneline.find('\"', pos))){ + continue; + } + oneline.erase(0, pos+1); + if(std::string::npos == (pos = oneline.find('\"'))){ + continue; + } + val = oneline.substr(0, pos); + } + keyval[key] = val; + } + return true; +} + +bool S3fsCred::CheckIAMCredentialUpdate(std::string* access_key_id, std::string* secret_access_key, std::string* access_token) +{ + AutoLock auto_lock(&token_lock); + + if(IsIBMIAMAuth() || IsSetExtCredLib() || is_ecs || IsSetIAMRole(AutoLock::ALREADY_LOCKED)){ + if(AWSAccessTokenExpire < (time(nullptr) + S3fsCred::IAM_EXPIRE_MERGIN)){ + S3FS_PRN_INFO("IAM Access Token refreshing..."); + + // update + if(!IsSetExtCredLib()){ + if(!LoadIAMCredentials(AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("Access Token refresh by built-in failed"); + return false; + } + }else{ + if(!UpdateExtCredentials(AutoLock::ALREADY_LOCKED)){ + S3FS_PRN_ERR("Access Token refresh by %s(external credential library) failed", credlib.c_str()); + return false; + } + } + S3FS_PRN_INFO("IAM Access Token refreshed"); + } + } + + // set + if(access_key_id){ + *access_key_id = AWSAccessKeyId; + } + if(secret_access_key){ + *secret_access_key = AWSSecretAccessKey; + } + if(access_token){ + if(IsIBMIAMAuth() || IsSetExtCredLib() || is_ecs || is_use_session_token || IsSetIAMRole(AutoLock::ALREADY_LOCKED)){ + *access_token = AWSAccessToken; + }else{ + access_token->erase(); + } + } + + return true; +} + +const char* S3fsCred::GetCredFuncVersion(bool detail) const +{ + static constexpr char errVersion[] = "unknown"; + + if(!pFuncCredVersion){ + return errVersion; + } + return (*pFuncCredVersion)(detail); +} + +//------------------------------------------------------------------- +// Methods : External Credential Library +//------------------------------------------------------------------- +bool S3fsCred::SetExtCredLib(const char* arg) +{ + if(!arg || strlen(arg) == 0){ + return false; + } + credlib = arg; + + return true; +} + +bool S3fsCred::IsSetExtCredLib() const +{ + return !credlib.empty(); +} + +bool S3fsCred::SetExtCredLibOpts(const char* args) +{ + if(!args || strlen(args) == 0){ + return false; + } + credlib_opts = args; + + return true; +} + +bool S3fsCred::IsSetExtCredLibOpts() const +{ + return !credlib_opts.empty(); +} + +bool S3fsCred::InitExtCredLib() +{ + if(!LoadExtCredLib()){ + return false; + } + // Initialize library + if(!pFuncCredInit){ + S3FS_PRN_CRIT("\"InitS3fsCredential\" function pointer is nullptr, why?"); + UnloadExtCredLib(); + return false; + } + + const char* popts = credlib_opts.empty() ? nullptr : credlib_opts.c_str(); + char* perrstr = nullptr; + if(!(*pFuncCredInit)(popts, &perrstr)){ + S3FS_PRN_ERR("Could not initialize %s(external credential library) by \"InitS3fsCredential\" function : %s", credlib.c_str(), perrstr ? perrstr : "unknown"); + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(perrstr){ + free(perrstr); + } + UnloadExtCredLib(); + return false; + } + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(perrstr){ + free(perrstr); + } + + return true; +} + +bool S3fsCred::LoadExtCredLib() +{ + if(credlib.empty()){ + return false; + } + UnloadExtCredLib(); + + S3FS_PRN_INFO("Load External Credential Library : %s", credlib.c_str()); + + // Open Library + // + // Search Library: (RPATH ->) LD_LIBRARY_PATH -> (RUNPATH ->) /etc/ld.so.cache -> /lib -> /usr/lib + // + if(nullptr == (hExtCredLib = dlopen(credlib.c_str(), RTLD_LAZY))){ + const char* preason = dlerror(); + S3FS_PRN_ERR("Could not load %s(external credential library) by error : %s", credlib.c_str(), preason ? preason : "unknown"); + return false; + } + + // Set function pointers + if(nullptr == (pFuncCredVersion = reinterpret_cast(dlsym(hExtCredLib, "VersionS3fsCredential")))){ + S3FS_PRN_ERR("%s(external credential library) does not have \"VersionS3fsCredential\" function which is required.", credlib.c_str()); + UnloadExtCredLib(); + return false; + } + if(nullptr == (pFuncCredUpdate = reinterpret_cast(dlsym(hExtCredLib, "UpdateS3fsCredential")))){ + S3FS_PRN_ERR("%s(external credential library) does not have \"UpdateS3fsCredential\" function which is required.", credlib.c_str()); + UnloadExtCredLib(); + return false; + } + if(nullptr == (pFuncCredInit = reinterpret_cast(dlsym(hExtCredLib, "InitS3fsCredential")))){ + S3FS_PRN_INFO("%s(external credential library) does not have \"InitS3fsCredential\" function which is optional.", credlib.c_str()); + pFuncCredInit = InitS3fsCredential; // set built-in function + } + if(nullptr == (pFuncCredFree = reinterpret_cast(dlsym(hExtCredLib, "FreeS3fsCredential")))){ + S3FS_PRN_INFO("%s(external credential library) does not have \"FreeS3fsCredential\" function which is optional.", credlib.c_str()); + pFuncCredFree = FreeS3fsCredential; // set built-in function + } + S3FS_PRN_INFO("Succeed loading External Credential Library : %s", credlib.c_str()); + + return true; +} + +bool S3fsCred::UnloadExtCredLib() +{ + if(hExtCredLib){ + S3FS_PRN_INFO("Unload External Credential Library : %s", credlib.c_str()); + + // Uninitialize library + if(!pFuncCredFree){ + S3FS_PRN_CRIT("\"FreeS3fsCredential\" function pointer is nullptr, why?"); + }else{ + char* perrstr = nullptr; + if(!(*pFuncCredFree)(&perrstr)){ + S3FS_PRN_ERR("Could not uninitialize by \"FreeS3fsCredential\" function : %s", perrstr ? perrstr : "unknown"); + } + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(perrstr){ + free(perrstr); + } + } + + // reset + pFuncCredVersion = VersionS3fsCredential; + pFuncCredInit = InitS3fsCredential; + pFuncCredFree = FreeS3fsCredential; + pFuncCredUpdate = UpdateS3fsCredential; + + // close + dlclose(hExtCredLib); + hExtCredLib = nullptr; + } + return true; +} + +bool S3fsCred::UpdateExtCredentials(AutoLock::Type type) +{ + if(!hExtCredLib){ + S3FS_PRN_CRIT("External Credential Library is not loaded, why?"); + return false; + } + + AutoLock auto_lock(&token_lock, type); + + char* paccess_key_id = nullptr; + char* pserect_access_key = nullptr; + char* paccess_token = nullptr; + char* perrstr = nullptr; + long long token_expire = 0; + + bool result = (*pFuncCredUpdate)(&paccess_key_id, &pserect_access_key, &paccess_token, &token_expire, &perrstr); + if(!result){ + // error occurred + S3FS_PRN_ERR("Could not update credential by \"UpdateS3fsCredential\" function : %s", perrstr ? perrstr : "unknown"); + + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + }else if(!paccess_key_id || !pserect_access_key || !paccess_token || token_expire <= 0){ + // some variables are wrong + S3FS_PRN_ERR("After updating credential by \"UpdateS3fsCredential\" function, but some variables are wrong : paccess_key_id=%p, pserect_access_key=%p, paccess_token=%p, token_expire=%lld", paccess_key_id, pserect_access_key, paccess_token, token_expire); + result = false; + }else{ + // succeed updating + AWSAccessKeyId = paccess_key_id; + AWSSecretAccessKey = pserect_access_key; + AWSAccessToken = paccess_token; + AWSAccessTokenExpire = token_expire; + } + + // clean + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(paccess_key_id){ + free(paccess_key_id); + } + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(pserect_access_key){ + free(pserect_access_key); + } + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(paccess_token){ + free(paccess_token); + } + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(perrstr){ + free(perrstr); + } + + return result; +} + +//------------------------------------------------------------------- +// Methods: Option detection +//------------------------------------------------------------------- +// return value: 1 = Not processed as it is not a option for this class +// 0 = The option was detected and processed appropriately +// -1 = Processing cannot be continued because a fatal error was detected +// +int S3fsCred::DetectParam(const char* arg) +{ + if(!arg){ + S3FS_PRN_EXIT("parameter arg is empty(null)"); + return -1; + } + + if(is_prefix(arg, "passwd_file=")){ + SetS3fsPasswdFile(strchr(arg, '=') + sizeof(char)); + set_builtin_cred_opts = true; + return 0; + } + + if(0 == strcmp(arg, "ibm_iam_auth")){ + SetIsIBMIAMAuth(true); + SetIAMCredentialsURL("https://iam.cloud.ibm.com/identity/token"); + SetIAMTokenField("\"access_token\""); + SetIAMExpiryField("\"expiration\""); + SetIAMFieldCount(2); + SetIMDSVersion(1, AutoLock::NONE); + set_builtin_cred_opts = true; + return 0; + } + + if(0 == strcmp(arg, "use_session_token")){ + SetIsUseSessionToken(true); + set_builtin_cred_opts = true; + return 0; + } + + if(is_prefix(arg, "ibm_iam_endpoint=")){ + std::string endpoint_url; + const char* iam_endpoint = strchr(arg, '=') + sizeof(char); + + // Check url for http / https protocol std::string + if(!is_prefix(iam_endpoint, "https://") && !is_prefix(iam_endpoint, "http://")){ + S3FS_PRN_EXIT("option ibm_iam_endpoint has invalid format, missing http / https protocol"); + return -1; + } + endpoint_url = std::string(iam_endpoint) + "/identity/token"; + SetIAMCredentialsURL(endpoint_url.c_str()); + set_builtin_cred_opts = true; + return 0; + } + + if(0 == strcmp(arg, "imdsv1only")){ + SetIMDSVersion(1, AutoLock::NONE); + set_builtin_cred_opts = true; + return 0; + } + + if(0 == strcmp(arg, "ecs")){ + if(IsIBMIAMAuth()){ + S3FS_PRN_EXIT("option ecs cannot be used in conjunction with ibm"); + return -1; + } + SetIsECS(true); + SetIMDSVersion(1, AutoLock::NONE); + SetIAMCredentialsURL("http://169.254.170.2"); + SetIAMFieldCount(5); + set_builtin_cred_opts = true; + return 0; + } + + if(is_prefix(arg, "iam_role")){ + if(is_ecs || IsIBMIAMAuth()){ + S3FS_PRN_EXIT("option iam_role cannot be used in conjunction with ecs or ibm"); + return -1; + } + if(0 == strcmp(arg, "iam_role") || 0 == strcmp(arg, "iam_role=auto")){ + // loading IAM role name in s3fs_init(), because we need to wait initializing curl. + // + SetIAMRoleMetadataType(true); + set_builtin_cred_opts = true; + return 0; + + }else if(is_prefix(arg, "iam_role=")){ + const char* role = strchr(arg, '=') + sizeof(char); + SetIAMRole(role, AutoLock::NONE); + SetIAMRoleMetadataType(false); + set_builtin_cred_opts = true; + return 0; + } + } + + if(is_prefix(arg, "profile=")){ + SetAwsProfileName(strchr(arg, '=') + sizeof(char)); + set_builtin_cred_opts = true; + return 0; + } + + if(is_prefix(arg, "credlib=")){ + if(!SetExtCredLib(strchr(arg, '=') + sizeof(char))){ + S3FS_PRN_EXIT("failed to set credlib option : %s", (strchr(arg, '=') + sizeof(char))); + return -1; + } + return 0; + } + + if(is_prefix(arg, "credlib_opts=")){ + if(!SetExtCredLibOpts(strchr(arg, '=') + sizeof(char))){ + S3FS_PRN_EXIT("failed to set credlib_opts option : %s", (strchr(arg, '=') + sizeof(char))); + return -1; + } + return 0; + } + + return 1; +} + +//------------------------------------------------------------------- +// Methods : check parameters +//------------------------------------------------------------------- +// +// Checking forbidden parameters for bucket +// +bool S3fsCred::CheckForbiddenBucketParams() +{ + // The first plain argument is the bucket + if(bucket_name.empty()){ + S3FS_PRN_EXIT("missing BUCKET argument."); + show_usage(); + return false; + } + + // bucket names cannot contain upper case characters in virtual-hosted style + if(!pathrequeststyle && (lower(bucket_name) != bucket_name)){ + S3FS_PRN_EXIT("BUCKET %s, name not compatible with virtual-hosted style.", bucket_name.c_str()); + return false; + } + + // check bucket name for illegal characters + size_t found = bucket_name.find_first_of("/:\\;!@#$%^&*?|+="); + if(found != std::string::npos){ + S3FS_PRN_EXIT("BUCKET %s -- bucket name contains an illegal character: '%c' at position %zu", bucket_name.c_str(), bucket_name[found], found); + return false; + } + + if(!pathrequeststyle && is_prefix(s3host.c_str(), "https://") && bucket_name.find_first_of('.') != std::string::npos) { + S3FS_PRN_EXIT("BUCKET %s -- cannot mount bucket with . while using HTTPS without use_path_request_style", bucket_name.c_str()); + return false; + } + return true; +} + +// +// Check the combination of parameters +// +bool S3fsCred::CheckAllParams() +{ + // + // Checking forbidden parameters for bucket + // + if(!CheckForbiddenBucketParams()){ + return false; + } + + // error checking of command line arguments for compatibility + if(S3fsCurl::IsPublicBucket() && IsSetAccessKeys(AutoLock::NONE)){ + S3FS_PRN_EXIT("specifying both public_bucket and the access keys options is invalid."); + return false; + } + + if(IsSetPasswdFile() && IsSetAccessKeys(AutoLock::NONE)){ + S3FS_PRN_EXIT("specifying both passwd_file and the access keys options is invalid."); + return false; + } + + if(!S3fsCurl::IsPublicBucket() && !load_iamrole && !is_ecs && !IsSetExtCredLib()){ + if(!InitialS3fsCredentials()){ + return false; + } + if(!IsSetAccessKeys(AutoLock::NONE)){ + S3FS_PRN_EXIT("could not establish security credentials, check documentation."); + return false; + } + // More error checking on the access key pair can be done + // like checking for appropriate lengths and characters + } + + // check IBM IAM requirements + if(IsIBMIAMAuth()){ + // check that default ACL is either public-read or private + acl_t defaultACL = S3fsCurl::GetDefaultAcl(); + if(defaultACL != acl_t::PRIVATE && defaultACL != acl_t::PUBLIC_READ){ + S3FS_PRN_EXIT("can only use 'public-read' or 'private' ACL while using ibm_iam_auth"); + return false; + } + } + + // check External Credential Library + // + // [NOTE] + // If credlib(_opts) option (for External Credential Library) is specified, + // no other Credential related options can be specified. It is exclusive. + // + if(set_builtin_cred_opts && (IsSetExtCredLib() || IsSetExtCredLibOpts())){ + S3FS_PRN_EXIT("The \"credlib\" or \"credlib_opts\" option and other credential-related options(passwd_file, iam_role, profile, use_session_token, ecs, imdsv1only, ibm_iam_auth, ibm_iam_endpoint, etc) cannot be specified together."); + return false; + } + + // Load and Initialize external credential library + if(IsSetExtCredLib() || IsSetExtCredLibOpts()){ + if(!IsSetExtCredLib()){ + S3FS_PRN_EXIT("The \"credlib_opts\"(%s) is specifyed but \"credlib\" option is not specified.", credlib_opts.c_str()); + return false; + } + + if(!InitExtCredLib()){ + S3FS_PRN_EXIT("failed to load the library specified by the option credlib(%s, %s).", credlib.c_str(), credlib_opts.c_str()); + return false; + } + S3FS_PRN_INFO("Loaded External Credential Library:\n%s", GetCredFuncVersion(true)); + } + + return true; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_cred.h b/s3fs/s3fs_cred.h new file mode 100644 index 0000000..845a1f3 --- /dev/null +++ b/s3fs/s3fs_cred.h @@ -0,0 +1,187 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_CRED_H_ +#define S3FS_CRED_H_ + +#include "autolock.h" +#include "s3fs_extcred.h" + +//---------------------------------------------- +// Typedefs +//---------------------------------------------- +typedef std::map iamcredmap_t; + +//------------------------------------------------ +// class S3fsCred +//------------------------------------------------ +// This is a class for operating and managing Credentials(accesskey, +// secret key, tokens, etc.) used by S3fs. +// Operations related to Credentials are aggregated in this class. +// +// cppcheck-suppress ctuOneDefinitionRuleViolation ; for stub in test_curl_util.cpp +class S3fsCred +{ + private: + static constexpr char ALLBUCKET_FIELDS_TYPE[] = ""; // special key for mapping(This name is absolutely not used as a bucket name) + static constexpr char KEYVAL_FIELDS_TYPE[] = "\t"; // special key for mapping(This name is absolutely not used as a bucket name) + static constexpr char AWS_ACCESSKEYID[] = "AWSAccessKeyId"; + static constexpr char AWS_SECRETKEY[] = "AWSSecretKey"; + + static constexpr int IAM_EXPIRE_MERGIN = 20 * 60; // update timing + static constexpr char ECS_IAM_ENV_VAR[] = "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"; + static constexpr char IAMCRED_ACCESSKEYID[] = "AccessKeyId"; + static constexpr char IAMCRED_SECRETACCESSKEY[] = "SecretAccessKey"; + static constexpr char IAMCRED_ROLEARN[] = "RoleArn"; + + static std::string bucket_name; + + mutable pthread_mutex_t token_lock; + bool is_lock_init; + + std::string passwd_file; + std::string aws_profile; + + bool load_iamrole; + + std::string AWSAccessKeyId; // Protect exclusively + std::string AWSSecretAccessKey; // Protect exclusively + std::string AWSAccessToken; // Protect exclusively + time_t AWSAccessTokenExpire; // Protect exclusively + + bool is_ecs; + bool is_use_session_token; + bool is_ibm_iam_auth; + + std::string IAM_cred_url; + int IAM_api_version; // Protect exclusively + std::string IAMv2_api_token; // Protect exclusively + size_t IAM_field_count; + std::string IAM_token_field; + std::string IAM_expiry_field; + std::string IAM_role; // Protect exclusively + + bool set_builtin_cred_opts; // true if options other than "credlib" is set + std::string credlib; // credlib(name or path) + std::string credlib_opts; // options for credlib + + void* hExtCredLib; + fp_VersionS3fsCredential pFuncCredVersion; + fp_InitS3fsCredential pFuncCredInit; + fp_FreeS3fsCredential pFuncCredFree; + fp_UpdateS3fsCredential pFuncCredUpdate; + + public: + static constexpr char IAMv2_token_url[] = "http://169.254.169.254/latest/api/token"; + static constexpr int IAMv2_token_ttl = 21600; + static constexpr char IAMv2_token_ttl_hdr[] = "X-aws-ec2-metadata-token-ttl-seconds"; + static constexpr char IAMv2_token_hdr[] = "X-aws-ec2-metadata-token"; + + private: + static bool ParseIAMRoleFromMetaDataResponse(const char* response, std::string& rolename); + + bool SetS3fsPasswdFile(const char* file); + bool IsSetPasswdFile() const; + bool SetAwsProfileName(const char* profile_name); + bool SetIAMRoleMetadataType(bool flag); + + bool SetAccessKey(const char* AccessKeyId, const char* SecretAccessKey, AutoLock::Type type); + bool SetAccessKeyWithSessionToken(const char* AccessKeyId, const char* SecretAccessKey, const char * SessionToken, AutoLock::Type type); + bool IsSetAccessKeys(AutoLock::Type type) const; + + bool SetIsECS(bool flag); + bool SetIsUseSessionToken(bool flag); + + bool SetIsIBMIAMAuth(bool flag); + + int SetIMDSVersion(int version, AutoLock::Type type); + int GetIMDSVersion(AutoLock::Type type) const; + + bool SetIAMv2APIToken(const std::string& token, AutoLock::Type type); + std::string GetIAMv2APIToken(AutoLock::Type type) const; + + bool SetIAMRole(const char* role, AutoLock::Type type); + std::string GetIAMRole(AutoLock::Type type) const; + bool IsSetIAMRole(AutoLock::Type type) const; + size_t SetIAMFieldCount(size_t field_count); + std::string SetIAMCredentialsURL(const char* url); + std::string SetIAMTokenField(const char* token_field); + std::string SetIAMExpiryField(const char* expiry_field); + + bool IsReadableS3fsPasswdFile() const; + bool CheckS3fsPasswdFilePerms(); + bool ParseS3fsPasswdFile(bucketkvmap_t& resmap); + bool ReadS3fsPasswdFile(AutoLock::Type type); + + static int CheckS3fsCredentialAwsFormat(const kvmap_t& kvmap, std::string& access_key_id, std::string& secret_access_key); + bool ReadAwsCredentialFile(const std::string &filename, AutoLock::Type type); + + bool InitialS3fsCredentials(); + bool ParseIAMCredentialResponse(const char* response, iamcredmap_t& keyval); + + bool GetIAMCredentialsURL(std::string& url, bool check_iam_role, AutoLock::Type type); + bool LoadIAMCredentials(AutoLock::Type type); + bool SetIAMCredentials(const char* response, AutoLock::Type type); + bool SetIAMRoleFromMetaData(const char* response, AutoLock::Type type); + + bool SetExtCredLib(const char* arg); + bool IsSetExtCredLib() const; + bool SetExtCredLibOpts(const char* args); + bool IsSetExtCredLibOpts() const; + + bool InitExtCredLib(); + bool LoadExtCredLib(); + bool UnloadExtCredLib(); + bool UpdateExtCredentials(AutoLock::Type type); + + static bool CheckForbiddenBucketParams(); + + public: + static bool SetBucket(const char* bucket); + static const std::string& GetBucket(); + + S3fsCred(); + ~S3fsCred(); + S3fsCred(const S3fsCred&) = delete; + S3fsCred(S3fsCred&&) = delete; + S3fsCred& operator=(const S3fsCred&) = delete; + S3fsCred& operator=(S3fsCred&&) = delete; + + bool IsIBMIAMAuth() const { return is_ibm_iam_auth; } + + bool LoadIAMRoleFromMetaData(); + + bool CheckIAMCredentialUpdate(std::string* access_key_id = nullptr, std::string* secret_access_key = nullptr, std::string* access_token = nullptr); + const char* GetCredFuncVersion(bool detail) const; + + int DetectParam(const char* arg); + bool CheckAllParams(); +}; + +#endif // S3FS_CRED_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_extcred.h b/s3fs/s3fs_extcred.h new file mode 100644 index 0000000..eb70e80 --- /dev/null +++ b/s3fs/s3fs_extcred.h @@ -0,0 +1,144 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_EXTCRED_H_ +#define S3FS_EXTCRED_H_ + +//------------------------------------------------------------------- +// Attributes(weak) : use only in s3fs-fuse internally +//------------------------------------------------------------------- +// [NOTE] +// This macro is only used inside s3fs-fuse. +// External projects that utilize this header file substitute empty +//values as follows: +// +#ifndef S3FS_FUNCATTR_WEAK +#define S3FS_FUNCATTR_WEAK +#endif + +extern "C" { +//------------------------------------------------------------------- +// Prototype for External Credential 4 functions +//------------------------------------------------------------------- +// +// [Required] VersionS3fsCredential +// +// Returns the library name and version as a string. +// +extern const char* VersionS3fsCredential(bool detail) S3FS_FUNCATTR_WEAK; + +// +// [Optional] InitS3fsCredential +// +// A function that does the necessary initialization after the library is +// loaded. This function is called only once immediately after loading the +// library. +// If there is a required initialization inside the library, implement it. +// Implementation of this function is optional and not required. If not +// implemented, it will not be called. +// +// const char* popts : String passed with the credlib_opts option. If the +// credlib_opts option is not specified, nullptr will be +// passed. +// char** pperrstr : pperrstr is used to pass the error message to the +// caller when an error occurs. +// If this pointer is not nullptr, you can allocate memory +// and set an error message to it. The allocated memory +// area is freed by the caller. +// +extern bool InitS3fsCredential(const char* popts, char** pperrstr) S3FS_FUNCATTR_WEAK; + +// +// [Optional] FreeS3fsCredential +// +// A function that is called only once just before the library is unloaded. +// If there is a required discard process in the library, implement it. +// Implementation of this feature is optional and not required. +// If not implemented, it will not be called. +// +// char** pperrstr : pperrstr is used to pass the error message to the +// caller when an error occurs. +// If this pointer is not nullptr, you can allocate memory +// and set an error message to it. The allocated memory +// area is freed by the caller. +// +extern bool FreeS3fsCredential(char** pperrstr) S3FS_FUNCATTR_WEAK; + +// +// [Required] UpdateS3fsCredential +// +// A function that updates the token. +// +// char** ppaccess_key_id : Allocate and set "Access Key ID" string +// area to *ppaccess_key_id. +// char** ppserect_access_key : Allocate and set "Access Secret Key ID" +// string area to *ppserect_access_key. +// char** ppaccess_token : Allocate and set "Token" string area to +// *ppaccess_token. +// long long* ptoken_expire : Set token expire time(time_t) value to +// *ptoken_expire. +// This is essentially a time_t* variable. +// To avoid system differences about time_t +// size, long long* is used. +// When setting the value, cast from time_t +// to long long to set the value. +// char** pperrstr : pperrstr is used to pass the error message to the +// caller when an error occurs. +// +// For all argument of the character string pointer(char **) set the +// allocated string area. The allocated area is freed by the caller. +// +extern bool UpdateS3fsCredential(char** ppaccess_key_id, char** ppserect_access_key, char** ppaccess_token, long long* ptoken_expire, char** pperrstr) S3FS_FUNCATTR_WEAK; + +//--------------------------------------------------------- +// Typedef Prototype function +//--------------------------------------------------------- +// +// const char* VersionS3fsCredential() +// +typedef const char* (*fp_VersionS3fsCredential)(bool detail); + +// +// bool InitS3fsCredential(char** pperrstr) +// +typedef bool (*fp_InitS3fsCredential)(const char* popts, char** pperrstr); + +// +// bool FreeS3fsCredential(char** pperrstr) +// +typedef bool (*fp_FreeS3fsCredential)(char** pperrstr); + +// +// bool UpdateS3fsCredential(char** ppaccess_key_id, char** ppserect_access_key, char** ppaccess_token, long long* ptoken_expire, char** pperrstr) +// +typedef bool (*fp_UpdateS3fsCredential)(char** ppaccess_key_id, char** ppserect_access_key, char** ppaccess_token, long long* ptoken_expire, char** pperrstr); + +} // extern "C" + +#endif // S3FS_EXTCRED_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_global.cpp b/s3fs/s3fs_global.cpp new file mode 100644 index 0000000..650f39e --- /dev/null +++ b/s3fs/s3fs_global.cpp @@ -0,0 +1,50 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include + +#include "hybridcache_accessor_4_s3fs.h" + +//------------------------------------------------------------------- +// Global variables +//------------------------------------------------------------------- +bool foreground = false; +bool nomultipart = false; +bool pathrequeststyle = false; +bool complement_stat = false; +bool noxmlns = false; +bool use_newcache = false; +std::string program_name; +std::string service_path = "/"; +std::string s3host = "https://s3.amazonaws.com"; +std::string endpoint = "us-east-1"; +std::string cipher_suites; +std::string instance_name; +std::shared_ptr accessor; + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_help.cpp b/s3fs/s3fs_help.cpp new file mode 100644 index 0000000..469d408 --- /dev/null +++ b/s3fs/s3fs_help.cpp @@ -0,0 +1,657 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include + +#include + +#include "common.h" +#include "s3fs_help.h" +#include "s3fs_auth.h" + +//------------------------------------------------------------------- +// Contents +//------------------------------------------------------------------- +static constexpr char help_string[] = + "\n" + "Mount an Amazon S3 bucket as a file system.\n" + "\n" + "Usage:\n" + " mounting\n" + " s3fs bucket[:/path] mountpoint [options]\n" + " s3fs mountpoint [options (must specify bucket= option)]\n" + "\n" + " unmounting\n" + " umount mountpoint\n" + "\n" + " General forms for s3fs and FUSE/mount options:\n" + " -o opt[,opt...]\n" + " -o opt [-o opt] ...\n" + "\n" + " utility mode (remove interrupted multipart uploading objects)\n" + " s3fs --incomplete-mpu-list (-u) bucket\n" + " s3fs --incomplete-mpu-abort[=all | =] bucket\n" + "\n" + "s3fs Options:\n" + "\n" + " Most s3fs options are given in the form where \"opt\" is:\n" + "\n" + " =\n" + "\n" + " bucket\n" + " - if it is not specified bucket name (and path) in command line,\n" + " must specify this option after -o option for bucket name.\n" + "\n" + " default_acl (default=\"private\")\n" + " - the default canned acl to apply to all written s3 objects,\n" + " e.g., private, public-read. see\n" + " https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl\n" + " for the full list of canned ACLs\n" + "\n" + " retries (default=\"5\")\n" + " - number of times to retry a failed S3 transaction\n" + "\n" + " tmpdir (default=\"/tmp\")\n" + " - local folder for temporary files.\n" + "\n" + " use_cache (default=\"\" which means disabled)\n" + " - local folder to use for local file cache\n" + "\n" + " check_cache_dir_exist (default is disable)\n" + " - if use_cache is set, check if the cache directory exists.\n" + " If this option is not specified, it will be created at runtime\n" + " when the cache directory does not exist.\n" + "\n" + " del_cache (delete local file cache)\n" + " - delete local file cache when s3fs starts and exits.\n" + "\n" + " storage_class (default=\"standard\")\n" + " - store object with specified storage class. Possible values:\n" + " standard, standard_ia, onezone_ia, reduced_redundancy,\n" + " intelligent_tiering, glacier, glacier_ir, and deep_archive.\n" + "\n" + " use_rrs (default is disable)\n" + " - use Amazon's Reduced Redundancy Storage.\n" + " this option can not be specified with use_sse.\n" + " (can specify use_rrs=1 for old version)\n" + " this option has been replaced by new storage_class option.\n" + "\n" + " use_sse (default is disable)\n" + " - Specify three type Amazon's Server-Site Encryption: SSE-S3,\n" + " SSE-C or SSE-KMS. SSE-S3 uses Amazon S3-managed encryption\n" + " keys, SSE-C uses customer-provided encryption keys, and\n" + " SSE-KMS uses the master key which you manage in AWS KMS.\n" + " You can specify \"use_sse\" or \"use_sse=1\" enables SSE-S3\n" + " type (use_sse=1 is old type parameter).\n" + " Case of setting SSE-C, you can specify \"use_sse=custom\",\n" + " \"use_sse=custom:\" or\n" + " \"use_sse=\" (only \n" + " specified is old type parameter). You can use \"c\" for\n" + " short \"custom\".\n" + " The custom key file must be 600 permission. The file can\n" + " have some lines, each line is one SSE-C key. The first line\n" + " in file is used as Customer-Provided Encryption Keys for\n" + " uploading and changing headers etc. If there are some keys\n" + " after first line, those are used downloading object which\n" + " are encrypted by not first key. So that, you can keep all\n" + " SSE-C keys in file, that is SSE-C key history.\n" + " If you specify \"custom\" (\"c\") without file path, you\n" + " need to set custom key by load_sse_c option or AWSSSECKEYS\n" + " environment. (AWSSSECKEYS environment has some SSE-C keys\n" + " with \":\" separator.) This option is used to decide the\n" + " SSE type. So that if you do not want to encrypt a object\n" + " object at uploading, but you need to decrypt encrypted\n" + " object at downloading, you can use load_sse_c option instead\n" + " of this option.\n" + " For setting SSE-KMS, specify \"use_sse=kmsid\" or\n" + " \"use_sse=kmsid:\". You can use \"k\" for short \"kmsid\".\n" + " If you san specify SSE-KMS type with your in AWS\n" + " KMS, you can set it after \"kmsid:\" (or \"k:\"). If you\n" + " specify only \"kmsid\" (\"k\"), you need to set AWSSSEKMSID\n" + " environment which value is . You must be careful\n" + " about that you can not use the KMS id which is not same EC2\n" + " region.\n" + " Additionally, if you specify SSE-KMS, your endpoints must use\n" + " Secure Sockets Layer(SSL) or Transport Layer Security(TLS).\n" + "\n" + " load_sse_c - specify SSE-C keys\n" + " Specify the custom-provided encryption keys file path for decrypting\n" + " at downloading.\n" + " If you use the custom-provided encryption key at uploading, you\n" + " specify with \"use_sse=custom\". The file has many lines, one line\n" + " means one custom key. So that you can keep all SSE-C keys in file,\n" + " that is SSE-C key history. AWSSSECKEYS environment is as same as this\n" + " file contents.\n" + "\n" + " public_bucket (default=\"\" which means disabled)\n" + " - anonymously mount a public bucket when set to 1, ignores the \n" + " $HOME/.passwd-s3fs and /etc/passwd-s3fs files.\n" + " S3 does not allow copy object api for anonymous users, then\n" + " s3fs sets nocopyapi option automatically when public_bucket=1\n" + " option is specified.\n" + "\n" + " passwd_file (default=\"\")\n" + " - specify which s3fs password file to use\n" + "\n" + " ahbe_conf (default=\"\" which means disabled)\n" + " - This option specifies the configuration file path which\n" + " file is the additional HTTP header by file (object) extension.\n" + " The configuration file format is below:\n" + " -----------\n" + " line = [file suffix or regex] HTTP-header [HTTP-values]\n" + " file suffix = file (object) suffix, if this field is empty,\n" + " it means \"reg:(.*)\".(=all object).\n" + " regex = regular expression to match the file (object) path.\n" + " this type starts with \"reg:\" prefix.\n" + " HTTP-header = additional HTTP header name\n" + " HTTP-values = additional HTTP header value\n" + " -----------\n" + " Sample:\n" + " -----------\n" + " .gz Content-Encoding gzip\n" + " .Z Content-Encoding compress\n" + " reg:^/MYDIR/(.*)[.]t2$ Content-Encoding text2\n" + " -----------\n" + " A sample configuration file is uploaded in \"test\" directory.\n" + " If you specify this option for set \"Content-Encoding\" HTTP \n" + " header, please take care for RFC 2616.\n" + "\n" + " profile (default=\"default\")\n" + " - Choose a profile from ${HOME}/.aws/credentials to authenticate\n" + " against S3. Note that this format matches the AWS CLI format and\n" + " differs from the s3fs passwd format.\n" + "\n" + " connect_timeout (default=\"300\" seconds)\n" + " - time to wait for connection before giving up\n" + "\n" + " readwrite_timeout (default=\"120\" seconds)\n" + " - time to wait between read/write activity before giving up\n" + "\n" + " list_object_max_keys (default=\"1000\")\n" + " - specify the maximum number of keys returned by S3 list object\n" + " API. The default is 1000. you can set this value to 1000 or more.\n" + "\n" + " max_stat_cache_size (default=\"100,000\" entries (about 40MB))\n" + " - maximum number of entries in the stat cache, and this maximum is\n" + " also treated as the number of symbolic link cache.\n" + "\n" + " stat_cache_expire (default is 900))\n" + " - specify expire time (seconds) for entries in the stat cache.\n" + " This expire time indicates the time since stat cached. and this\n" + " is also set to the expire time of the symbolic link cache.\n" + "\n" + " stat_cache_interval_expire (default is 900)\n" + " - specify expire time (seconds) for entries in the stat cache(and\n" + " symbolic link cache).\n" + " This expire time is based on the time from the last access time\n" + " of the stat cache. This option is exclusive with stat_cache_expire,\n" + " and is left for compatibility with older versions.\n" + "\n" + " disable_noobj_cache (default is enable)\n" + " - By default s3fs memorizes when an object does not exist up until\n" + " the stat cache timeout. This caching can cause staleness for\n" + " applications. If disabled, s3fs will not memorize objects and may\n" + " cause extra HeadObject requests and reduce performance.\n" + "\n" + " no_check_certificate\n" + " - server certificate won't be checked against the available \n" + " certificate authorities.\n" + "\n" + " ssl_verify_hostname (default=\"2\")\n" + " - When 0, do not verify the SSL certificate against the hostname.\n" + "\n" + " nodnscache (disable DNS cache)\n" + " - s3fs is always using DNS cache, this option make DNS cache disable.\n" + "\n" + " nosscache (disable SSL session cache)\n" + " - s3fs is always using SSL session cache, this option make SSL \n" + " session cache disable.\n" + "\n" + " multireq_max (default=\"20\")\n" + " - maximum number of parallel request for listing objects.\n" + "\n" + " parallel_count (default=\"5\")\n" + " - number of parallel request for uploading big objects.\n" + " s3fs uploads large object (over 20MB) by multipart post request, \n" + " and sends parallel requests.\n" + " This option limits parallel request count which s3fs requests \n" + " at once. It is necessary to set this value depending on a CPU \n" + " and a network band.\n" + "\n" + " multipart_size (default=\"10\")\n" + " - part size, in MB, for each multipart request.\n" + " The minimum value is 5 MB and the maximum value is 5 GB.\n" + "\n" + " multipart_copy_size (default=\"512\")\n" + " - part size, in MB, for each multipart copy request, used for\n" + " renames and mixupload.\n" + " The minimum value is 5 MB and the maximum value is 5 GB.\n" + " Must be at least 512 MB to copy the maximum 5 TB object size\n" + " but lower values may improve performance.\n" + "\n" + " max_dirty_data (default=\"5120\")\n" + " - flush dirty data to S3 after a certain number of MB written.\n" + " The minimum value is 50 MB. -1 value means disable.\n" + " Cannot be used with nomixupload.\n" + "\n" + " bucket_size (default=maximum long unsigned integer value)\n" + " - The size of the bucket with which the corresponding\n" + " elements of the statvfs structure will be filled. The option\n" + " argument is an integer optionally followed by a\n" + " multiplicative suffix (GB, GiB, TB, TiB, PB, PiB,\n" + " EB, EiB) (no spaces in between). If no suffix is supplied,\n" + " bytes are assumed; eg: 20000000, 30GB, 45TiB. Note that\n" + " s3fs does not compute the actual volume size (too\n" + " expensive): by default it will assume the maximum possible\n" + " size; however, since this may confuse other software which\n" + " uses s3fs, the advertised bucket size can be set with this\n" + " option.\n" + "\n" + " ensure_diskfree (default 0)\n" + " - sets MB to ensure disk free space. This option means the\n" + " threshold of free space size on disk which is used for the\n" + " cache file by s3fs. s3fs makes file for\n" + " downloading, uploading and caching files. If the disk free\n" + " space is smaller than this value, s3fs do not use disk space\n" + " as possible in exchange for the performance.\n" + "\n" + " free_space_ratio (default=\"10\")\n" + " - sets min free space ratio of the disk.\n" + " The value of this option can be between 0 and 100. It will control\n" + " the size of the cache according to this ratio to ensure that the\n" + " idle ratio of the disk is greater than this value.\n" + " For example, when the disk space is 50GB, the default value will\n" + " ensure that the disk will reserve at least 50GB * 10%% = 5GB of\n" + " remaining space.\n" + "\n" + " multipart_threshold (default=\"25\")\n" + " - threshold, in MB, to use multipart upload instead of\n" + " single-part. Must be at least 5 MB.\n" + "\n" + " singlepart_copy_limit (default=\"512\")\n" + " - maximum size, in MB, of a single-part copy before trying \n" + " multipart copy.\n" + "\n" + " host (default=\"https://s3.amazonaws.com\")\n" + " - Set a non-Amazon host, e.g., https://example.com.\n" + "\n" + " servicepath (default=\"/\")\n" + " - Set a service path when the non-Amazon host requires a prefix.\n" + "\n" + " url (default=\"https://s3.amazonaws.com\")\n" + " - sets the url to use to access Amazon S3. If you want to use HTTP,\n" + " then you can set \"url=http://s3.amazonaws.com\".\n" + " If you do not use https, please specify the URL with the url\n" + " option.\n" + "\n" + " endpoint (default=\"us-east-1\")\n" + " - sets the endpoint to use on signature version 4\n" + " If this option is not specified, s3fs uses \"us-east-1\" region as\n" + " the default. If the s3fs could not connect to the region specified\n" + " by this option, s3fs could not run. But if you do not specify this\n" + " option, and if you can not connect with the default region, s3fs\n" + " will retry to automatically connect to the other region. So s3fs\n" + " can know the correct region name, because s3fs can find it in an\n" + " error from the S3 server.\n" + "\n" + " sigv2 (default is signature version 4 falling back to version 2)\n" + " - sets signing AWS requests by using only signature version 2\n" + "\n" + " sigv4 (default is signature version 4 falling back to version 2)\n" + " - sets signing AWS requests by using only signature version 4\n" + "\n" + " mp_umask (default is \"0000\")\n" + " - sets umask for the mount point directory.\n" + " If allow_other option is not set, s3fs allows access to the mount\n" + " point only to the owner. In the opposite case s3fs allows access\n" + " to all users as the default. But if you set the allow_other with\n" + " this option, you can control the permissions of the\n" + " mount point by this option like umask.\n" + "\n" + " umask (default is \"0000\")\n" + " - sets umask for files under the mountpoint. This can allow\n" + " users other than the mounting user to read and write to files\n" + " that they did not create.\n" + "\n" + " nomultipart (disable multipart uploads)\n" + "\n" + " streamupload (default is disable)\n" + " - Enable stream upload.\n" + " If this option is enabled, a sequential upload will be performed\n" + " in parallel with the write from the part that has been written\n" + " during a multipart upload.\n" + " This is expected to give better performance than other upload\n" + " functions.\n" + " Note that this option is still experimental and may change in the\n" + " future.\n" + "\n" + " max_thread_count (default is \"5\")\n" + " - Specifies the number of threads waiting for stream uploads.\n" + " Note that this option and Streamm Upload are still experimental\n" + " and subject to change in the future.\n" + " This option will be merged with \"parallel_count\" in the future.\n" + "\n" + " enable_content_md5 (default is disable)\n" + " - Allow S3 server to check data integrity of uploads via the\n" + " Content-MD5 header. This can add CPU overhead to transfers.\n" + "\n" + " enable_unsigned_payload (default is disable)\n" + " - Do not calculate Content-SHA256 for PutObject and UploadPart\n" + " payloads. This can reduce CPU overhead to transfers.\n" + "\n" + " ecs (default is disable)\n" + " - This option instructs s3fs to query the ECS container credential\n" + " metadata address instead of the instance metadata address.\n" + "\n" + " iam_role (default is no IAM role)\n" + " - This option requires the IAM role name or \"auto\". If you specify\n" + " \"auto\", s3fs will automatically use the IAM role names that are set\n" + " to an instance. If you specify this option without any argument, it\n" + " is the same as that you have specified the \"auto\".\n" + "\n" + " imdsv1only (default is to use IMDSv2 with fallback to v1)\n" + " - AWS instance metadata service, used with IAM role authentication,\n" + " supports the use of an API token. If you're using an IAM role\n" + " in an environment that does not support IMDSv2, setting this flag\n" + " will skip retrieval and usage of the API token when retrieving\n" + " IAM credentials.\n" + "\n" + " ibm_iam_auth (default is not using IBM IAM authentication)\n" + " - This option instructs s3fs to use IBM IAM authentication.\n" + " In this mode, the AWSAccessKey and AWSSecretKey will be used as\n" + " IBM's Service-Instance-ID and APIKey, respectively.\n" + "\n" + " ibm_iam_endpoint (default is https://iam.cloud.ibm.com)\n" + " - sets the URL to use for IBM IAM authentication.\n" + "\n" + " credlib (default=\"\" which means disabled)\n" + " - Specifies the shared library that handles the credentials\n" + " containing the authentication token.\n" + " If this option is specified, the specified credential and token\n" + " processing provided by the shared library ant will be performed\n" + " instead of the built-in credential processing.\n" + " This option cannot be specified with passwd_file, profile,\n" + " use_session_token, ecs, ibm_iam_auth, ibm_iam_endpoint, imdsv1only\n" + " and iam_role option.\n" + "\n" + " credlib_opts (default=\"\" which means disabled)\n" + " - Specifies the options to pass when the shared library specified\n" + " in credlib is loaded and then initialized.\n" + " For the string specified in this option, specify the string defined\n" + " by the shared library.\n" + "\n" + " use_xattr (default is not handling the extended attribute)\n" + " Enable to handle the extended attribute (xattrs).\n" + " If you set this option, you can use the extended attribute.\n" + " For example, encfs and ecryptfs need to support the extended attribute.\n" + " Notice: if s3fs handles the extended attribute, s3fs can not work to\n" + " copy command with preserve=mode.\n" + "\n" + " noxmlns (disable registering xml name space)\n" + " disable registering xml name space for response of \n" + " ListBucketResult and ListVersionsResult etc. Default name \n" + " space is looked up from \"http://s3.amazonaws.com/doc/2006-03-01\".\n" + " This option should not be specified now, because s3fs looks up\n" + " xmlns automatically after v1.66.\n" + "\n" + " nomixupload (disable copy in multipart uploads)\n" + " Disable to use PUT (copy api) when multipart uploading large size objects.\n" + " By default, when doing multipart upload, the range of unchanged data\n" + " will use PUT (copy api) whenever possible.\n" + " When nocopyapi or norenameapi is specified, use of PUT (copy api) is\n" + " invalidated even if this option is not specified.\n" + "\n" + " nocopyapi (for other incomplete compatibility object storage)\n" + " Enable compatibility with S3-like APIs which do not support\n" + " PUT (copy api).\n" + " If you set this option, s3fs do not use PUT with \n" + " \"x-amz-copy-source\" (copy api). Because traffic is increased\n" + " 2-3 times by this option, we do not recommend this.\n" + "\n" + " norenameapi (for other incomplete compatibility object storage)\n" + " Enable compatibility with S3-like APIs which do not support\n" + " PUT (copy api).\n" + " This option is a subset of nocopyapi option. The nocopyapi\n" + " option does not use copy-api for all command (ex. chmod, chown,\n" + " touch, mv, etc), but this option does not use copy-api for\n" + " only rename command (ex. mv). If this option is specified with\n" + " nocopyapi, then s3fs ignores it.\n" + "\n" + " use_path_request_style (use legacy API calling style)\n" + " Enable compatibility with S3-like APIs which do not support\n" + " the virtual-host request style, by using the older path request\n" + " style.\n" + "\n" + " listobjectsv2 (use ListObjectsV2)\n" + " Issue ListObjectsV2 instead of ListObjects, useful on object\n" + " stores without ListObjects support.\n" + "\n" + " noua (suppress User-Agent header)\n" + " Usually s3fs outputs of the User-Agent in \"s3fs/ (commit\n" + " hash ; )\" format.\n" + " If this option is specified, s3fs suppresses the output of the\n" + " User-Agent.\n" + "\n" + " cipher_suites\n" + " Customize the list of TLS cipher suites.\n" + " Expects a colon separated list of cipher suite names.\n" + " A list of available cipher suites, depending on your TLS engine,\n" + " can be found on the CURL library documentation:\n" + " https://curl.haxx.se/docs/ssl-ciphers.html\n" + "\n" + " instance_name - The instance name of the current s3fs mountpoint.\n" + " This name will be added to logging messages and user agent headers sent by s3fs.\n" + "\n" + " complement_stat (complement lack of file/directory mode)\n" + " s3fs complements lack of information about file/directory mode\n" + " if a file or a directory object does not have x-amz-meta-mode\n" + " header. As default, s3fs does not complements stat information\n" + " for a object, then the object will not be able to be allowed to\n" + " list/modify.\n" + "\n" + " compat_dir (enable support of alternative directory names)\n" + " s3fs supports two different naming schemas \"dir/\" and\n" + " \"dir\" to map directory names to S3 objects and\n" + " vice versa by default. As a third variant, directories can be\n" + " determined indirectly if there is a file object with a path (e.g.\n" + " \"/dir/file\") but without the parent directory.\n" + " This option enables a fourth variant, \"dir_$folder$\", created by\n" + " older applications.\n" + " \n" + " S3fs uses only the first schema \"dir/\" to create S3 objects for\n" + " directories." + " \n" + " The support for these different naming schemas causes an increased\n" + " communication effort.\n" + "\n" + " use_wtf8 - support arbitrary file system encoding.\n" + " S3 requires all object names to be valid UTF-8. But some\n" + " clients, notably Windows NFS clients, use their own encoding.\n" + " This option re-encodes invalid UTF-8 object names into valid\n" + " UTF-8 by mapping offending codes into a 'private' codepage of the\n" + " Unicode set.\n" + " Useful on clients not using UTF-8 as their file system encoding.\n" + "\n" + " use_session_token - indicate that session token should be provided.\n" + " If credentials are provided by environment variables this switch\n" + " forces presence check of AWSSESSIONTOKEN variable.\n" + " Otherwise an error is returned.\n" + "\n" + " requester_pays (default is disable)\n" + " This option instructs s3fs to enable requests involving\n" + " Requester Pays buckets.\n" + " It includes the 'x-amz-request-payer=requester' entry in the\n" + " request header.\n" + "\n" + " mime (default is \"/etc/mime.types\")\n" + " Specify the path of the mime.types file.\n" + " If this option is not specified, the existence of \"/etc/mime.types\"\n" + " is checked, and that file is loaded as mime information.\n" + " If this file does not exist on macOS, then \"/etc/apache2/mime.types\"\n" + " is checked as well.\n" + "\n" + " proxy (default=\"\")\n" + " This option specifies a proxy to S3 server.\n" + " Specify the proxy with '[]' formatted.\n" + " '://' can be omitted, and 'http://' is used when omitted.\n" + " Also, ':' can also be omitted. If omitted, port 443 is used for\n" + " HTTPS schema, and port 1080 is used otherwise.\n" + " This option is the same as the curl command's '--proxy(-x)' option and\n" + " libcurl's 'CURLOPT_PROXY' flag.\n" + " This option is equivalent to and takes precedence over the environment\n" + " variables 'http_proxy', 'all_proxy', etc.\n" + "\n" + " proxy_cred_file (default=\"\")\n" + " This option specifies the file that describes the username and\n" + " passphrase for authentication of the proxy when the HTTP schema\n" + " proxy is specified by the 'proxy' option.\n" + " Username and passphrase are valid only for HTTP schema. If the HTTP\n" + " proxy does not require authentication, this option is not required.\n" + " Separate the username and passphrase with a ':' character and\n" + " specify each as a URL-encoded string.\n" + "\n" + " logfile - specify the log output file.\n" + " s3fs outputs the log file to syslog. Alternatively, if s3fs is\n" + " started with the \"-f\" option specified, the log will be output\n" + " to the stdout/stderr.\n" + " You can use this option to specify the log file that s3fs outputs.\n" + " If you specify a log file with this option, it will reopen the log\n" + " file when s3fs receives a SIGHUP signal. You can use the SIGHUP\n" + " signal for log rotation.\n" + "\n" + " dbglevel (default=\"crit\")\n" + " Set the debug message level. set value as crit (critical), err\n" + " (error), warn (warning), info (information) to debug level.\n" + " default debug level is critical. If s3fs run with \"-d\" option,\n" + " the debug level is set information. When s3fs catch the signal\n" + " SIGUSR2, the debug level is bump up.\n" + "\n" + " curldbg - put curl debug message\n" + " Put the debug message from libcurl when this option is specified.\n" + " Specify \"normal\" or \"body\" for the parameter.\n" + " If the parameter is omitted, it is the same as \"normal\".\n" + " If \"body\" is specified, some API communication body data will be\n" + " output in addition to the debug message output as \"normal\".\n" + "\n" + " no_time_stamp_msg - no time stamp in debug message\n" + " The time stamp is output to the debug message by default.\n" + " If this option is specified, the time stamp will not be output\n" + " in the debug message.\n" + " It is the same even if the environment variable \"S3FS_MSGTIMESTAMP\"\n" + " is set to \"no\".\n" + "\n" + " set_check_cache_sigusr1 (default is stdout)\n" + " If the cache is enabled, you can check the integrity of the\n" + " cache file and the cache file's stats info file.\n" + " This option is specified and when sending the SIGUSR1 signal\n" + " to the s3fs process checks the cache status at that time.\n" + " This option can take a file path as parameter to output the\n" + " check result to that file. The file path parameter can be omitted.\n" + " If omitted, the result will be output to stdout or syslog.\n" + "\n" + " update_parent_dir_stat (default is disable)\n" + " The parent directory's mtime and ctime are updated when a file or\n" + " directory is created or deleted (when the parent directory's inode is\n" + " updated).\n" + " By default, parent directory statistics are not updated.\n" + "\n" + " newcache_conf (default=\"\" which means disabled)\n" + " - Enable the new cache.\n" + "\n" + "FUSE/mount Options:\n" + "\n" + " Most of the generic mount options described in 'man mount' are\n" + " supported (ro, rw, suid, nosuid, dev, nodev, exec, noexec, atime,\n" + " noatime, sync async, dirsync). Filesystems are mounted with\n" + " '-onodev,nosuid' by default, which can only be overridden by a\n" + " privileged user.\n" + " \n" + " There are many FUSE specific mount options that can be specified.\n" + " e.g. allow_other See the FUSE's README for the full set.\n" + "\n" + "Utility mode Options:\n" + "\n" + " -u, --incomplete-mpu-list\n" + " Lists multipart incomplete objects uploaded to the specified\n" + " bucket.\n" + " --incomplete-mpu-abort (=all or =)\n" + " Delete the multipart incomplete object uploaded to the specified\n" + " bucket.\n" + " If \"all\" is specified for this option, all multipart incomplete\n" + " objects will be deleted. If you specify no argument as an option,\n" + " objects older than 24 hours (24H) will be deleted (This is the\n" + " default value). You can specify an optional date format. It can\n" + " be specified as year, month, day, hour, minute, second, and it is\n" + " expressed as \"Y\", \"M\", \"D\", \"h\", \"m\", \"s\" respectively.\n" + " For example, \"1Y6M10D12h30m30s\".\n" + "\n" + "Miscellaneous Options:\n" + "\n" + " -h, --help Output this help.\n" + " --version Output version info.\n" + " -d --debug Turn on DEBUG messages to syslog. Specifying -d\n" + " twice turns on FUSE debug messages to STDOUT.\n" + " -f FUSE foreground option - do not run as daemon.\n" + " -s FUSE single-threaded option\n" + " disable multi-threaded operation\n" + "\n" + "\n" + "s3fs home page: \n" + ; + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +void show_usage() +{ + printf("Usage: %s BUCKET:[PATH] MOUNTPOINT [OPTION]...\n", program_name.c_str()); +} + +void show_help() +{ + show_usage(); + printf(help_string); +} + +void show_version() +{ + printf( + "Amazon Simple Storage Service File System V%s (commit:%s) with %s\n" + "Copyright (C) 2010 Randy Rizun \n" + "License GPL2: GNU GPL version 2 \n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n", + VERSION, COMMIT_HASH_VAL, s3fs_crypt_lib_name()); +} + +const char* short_version() +{ + static constexpr char short_ver[] = "s3fs version " VERSION "(" COMMIT_HASH_VAL ")"; + return short_ver; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_help.h b/s3fs/s3fs_help.h new file mode 100644 index 0000000..04ce416 --- /dev/null +++ b/s3fs/s3fs_help.h @@ -0,0 +1,41 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_S3FS_HELP_H_ +#define S3FS_S3FS_HELP_H_ + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +void show_usage(); +void show_help(); +void show_version(); +const char* short_version(); + +#endif // S3FS_S3FS_HELP_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_lib.cpp b/s3fs/s3fs_lib.cpp new file mode 100644 index 0000000..3dd7eb6 --- /dev/null +++ b/s3fs/s3fs_lib.cpp @@ -0,0 +1,2992 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" +//#include "s3fs.h" +#include "s3fs_lib.h" +#include "s3fs_logger.h" +#include "metaheader.h" +#include "fdcache.h" +#include "fdcache_auto.h" +#include "fdcache_stat.h" +#include "curl.h" +#include "curl_multi.h" +#include "s3objlist.h" +#include "cache.h" +#include "addhead.h" +#include "sighandlers.h" +#include "s3fs_xml.h" +#include "string_util.h" +#include "s3fs_auth.h" +#include "s3fs_cred.h" +#include "s3fs_help.h" +#include "s3fs_util.h" +#include "mpu_util.h" +#include "threadpoolman.h" +#include "autolock.h" + + +//------------------------------------------------------------------- +// Symbols +//------------------------------------------------------------------- +#if !defined(ENOATTR) +#define ENOATTR ENODATA +#endif + +enum class dirtype : int8_t { + UNKNOWN = -1, + NEW = 0, + OLD = 1, + FOLDER = 2, + NOOBJ = 3, +}; + +struct PosixContext { + uid_t uid; + gid_t gid; + pid_t pid; +}; + +using Ino = uint64_t; +struct S3DirStream { + Ino ino; + uint64_t fh; + uint64_t offset; +}; + +enum class FileType { + DIR = 1, + FILE = 2, +}; + +struct Fileinfo { + uint64_t fd; + int flags; + Ino ino; + // off64_t read_offset; + // off64_t write_offset; + off64_t offset; +}; + +struct PosixS3Info { + std::string filename; + FileType type; //0 is file, 1 is dir + Fileinfo fileinfo; + S3DirStream dirinfo; +}; + + +//------------------------------------------------------------------- +// Static variables +//------------------------------------------------------------------- +static uid_t mp_uid = 0; // owner of mount point(only not specified uid opt) +static gid_t mp_gid = 0; // group of mount point(only not specified gid opt) +static mode_t mp_mode = 0; // mode of mount point +static mode_t mp_umask = 0; // umask for mount point +static bool is_mp_umask = false;// default does not set. +static std::string mountpoint; +static std::unique_ptr ps3fscred; // using only in this file +static std::string mimetype_file; +static bool nocopyapi = false; +static bool norenameapi = false; +static bool nonempty = false; +static bool allow_other = false; +static uid_t s3fs_uid = 0; +static gid_t s3fs_gid = 0; +static mode_t s3fs_umask = 0; +static bool is_s3fs_uid = false;// default does not set. +static bool is_s3fs_gid = false;// default does not set. +static bool is_s3fs_umask = false;// default does not set. +static bool is_remove_cache = false; +static bool is_use_xattr = false; +static off_t multipart_threshold = 25 * 1024 * 1024; +static int64_t singlepart_copy_limit = 512 * 1024 * 1024; +static bool is_specified_endpoint = false; +static int s3fs_init_deferred_exit_status = 0; +static bool support_compat_dir = false;// default does not support compatibility directory type +static int max_keys_list_object = 1000;// default is 1000 +static off_t max_dirty_data = 5LL * 1024LL * 1024LL * 1024LL; +static bool use_wtf8 = false; +static off_t fake_diskfree_size = -1; // default is not set(-1) +static int max_thread_count = 5; // default is 5 +static bool update_parent_dir_stat= false; // default not updating parent directory stats +static fsblkcnt_t bucket_block_count; // advertised block count of the bucket +static unsigned long s3fs_block_size = 16 * 1024 * 1024; // s3fs block size is 16MB +std::string newcache_conf; + +static std::unordered_map fdtofile(1000); +static struct PosixContext posixcontext; +//------------------------------------------------------------------- +// Global functions : prototype +//------------------------------------------------------------------- +int put_headers(const char* path, headers_t& meta, bool is_copy, bool use_st_size = true); // [NOTE] global function because this is called from FdEntity class + + + +//------------------------------------------------------------------- +// Static functions : prototype +//------------------------------------------------------------------- +static int init_config(std::string configpath); + +static bool is_special_name_folder_object(const char* path); +static int chk_dir_object_type(const char* path, std::string& newpath, std::string& nowpath, std::string& nowcache, headers_t* pmeta = nullptr, dirtype* pDirType = nullptr); +static int remove_old_type_dir(const std::string& path, dirtype type); +static int get_object_attribute(const char* path, struct stat* pstbuf, headers_t* pmeta = nullptr, bool overcheck = true, bool* pisforce = nullptr, bool add_no_truncate_cache = false); +static int check_object_access(const char* path, int mask, struct stat* pstbuf); +static int check_object_owner(const char* path, struct stat* pstbuf); +static int check_parent_object_access(const char* path, int mask); +static int get_local_fent(AutoFdEntity& autoent, FdEntity **entity, const char* path, int flags = O_RDONLY, bool is_load = false); +static bool multi_head_callback(S3fsCurl* s3fscurl, void* param); +static std::unique_ptr multi_head_retry_callback(S3fsCurl* s3fscurl); +//static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf, fuse_fill_dir_t filler); +static int readdir_multi_head(const char* path, const S3ObjList& head, char* data, int offset, int maxread, ssize_t* realbytes, int* realnum); +static int list_bucket(const char* path, S3ObjList& head, const char* delimiter, bool check_content_only = false); +static int directory_empty(const char* path); +static int rename_large_object(const char* from, const char* to); +static int create_file_object(const char* path, mode_t mode, uid_t uid, gid_t gid); +static int create_directory_object(const char* path, mode_t mode, const struct timespec& ts_atime, const struct timespec& ts_mtime, const struct timespec& ts_ctime, uid_t uid, gid_t gid, const char* pxattrvalue); +static int rename_object(const char* from, const char* to, bool update_ctime); +static int rename_object_nocopy(const char* from, const char* to, bool update_ctime); +static int clone_directory_object(const char* from, const char* to, bool update_ctime, const char* pxattrvalue); +static int rename_directory(const char* from, const char* to); +static int update_mctime_parent_directory(const char* _path); +static int remote_mountpath_exists(const char* path, bool compat_dir); +static bool get_meta_xattr_value(const char* path, std::string& rawvalue); +static bool get_parent_meta_xattr_value(const char* path, std::string& rawvalue); +static bool get_xattr_posix_key_value(const char* path, std::string& xattrvalue, bool default_key); +static bool build_inherited_xattr_value(const char* path, std::string& xattrvalue); +static bool parse_xattr_keyval(const std::string& xattrpair, std::string& key, std::string* pval); +static size_t parse_xattrs(const std::string& strxattrs, xattrs_t& xattrs); +static std::string raw_build_xattrs(const xattrs_t& xattrs); +static std::string build_xattrs(const xattrs_t& xattrs); +static int s3fs_check_service(); +static bool set_mountpoint_attribute(struct stat& mpst); +static int set_bucket(const char* arg); +static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_args* outargs); +static fsblkcnt_t parse_bucket_size(char* value); +static bool is_cmd_exists(const std::string& command); +static int print_umount_message(const std::string& mp, bool force); + + + + +//------------------------------------------------------------------- +// Classes +//------------------------------------------------------------------- +// +// A flag class indicating whether the mount point has a stat +// +// [NOTE] +// The flag is accessed from child threads, so This class is used for exclusive control of flags. +// This class will be reviewed when we organize the code in the future. +// +class MpStatFlag +{ + private: + std::atomic has_mp_stat; + + public: + MpStatFlag() = default; + MpStatFlag(const MpStatFlag&) = delete; + MpStatFlag(MpStatFlag&&) = delete; + ~MpStatFlag() = default; + MpStatFlag& operator=(const MpStatFlag&) = delete; + MpStatFlag& operator=(MpStatFlag&&) = delete; + + bool Get(); + bool Set(bool flag); +}; + +bool MpStatFlag::Get() +{ + return has_mp_stat; +} + +bool MpStatFlag::Set(bool flag) +{ + return has_mp_stat.exchange(flag); +} + +// whether the stat information file for mount point exists +static MpStatFlag* pHasMpStat = nullptr; + + + +// +// A synchronous class that calls the fuse_fill_dir_t function that processes the readdir data +// + +typedef int (*fill_dir_t) (void *buf, const char *name, + const struct stat *stbuf, off_t off); + +class SyncFiller +{ + private: + mutable pthread_mutex_t filler_lock; + bool is_lock_init = false; + void* filler_buff; + fill_dir_t filler_func; + std::set filled; + + public: + explicit SyncFiller(void* buff = nullptr, fill_dir_t filler = nullptr); + SyncFiller(const SyncFiller&) = delete; + SyncFiller(SyncFiller&&) = delete; + ~SyncFiller(); + SyncFiller& operator=(const SyncFiller&) = delete; + SyncFiller& operator=(SyncFiller&&) = delete; + + int Fill(const char *name, const struct stat *stbuf, off_t off); + int SufficiencyFill(const std::vector& pathlist); +}; + +SyncFiller::SyncFiller(void* buff, fill_dir_t filler) : filler_buff(buff), filler_func(filler) +{ + if(!filler_buff || !filler_func){ + S3FS_PRN_CRIT("Internal error: SyncFiller constructor parameter is critical value."); + abort(); + } + + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + + int result; + if(0 != (result = pthread_mutex_init(&filler_lock, &attr))){ + S3FS_PRN_CRIT("failed to init filler_lock: %d", result); + abort(); + } + is_lock_init = true; +} + +SyncFiller::~SyncFiller() +{ + if(is_lock_init){ + int result; + if(0 != (result = pthread_mutex_destroy(&filler_lock))){ + S3FS_PRN_CRIT("failed to destroy filler_lock: %d", result); + abort(); + } + is_lock_init = false; + } +} + +// +// See. prototype fuse_fill_dir_t in fuse.h +// +int SyncFiller::Fill(const char *name, const struct stat *stbuf, off_t off) +{ + AutoLock auto_lock(&filler_lock); + + int result = 0; + if(filled.insert(name).second){ + result = filler_func(filler_buff, name, stbuf, off); + } + return result; +} + +int SyncFiller::SufficiencyFill(const std::vector& pathlist) +{ + AutoLock auto_lock(&filler_lock); + + int result = 0; + for(std::vector::const_iterator it = pathlist.begin(); it != pathlist.end(); ++it) { + if(filled.insert(*it).second){ + if(0 != filler_func(filler_buff, it->c_str(), nullptr, 0)){ + result = 1; + } + } + } + return result; +} + + + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +static bool IS_REPLACEDIR(dirtype type) +{ + return dirtype::OLD == type || dirtype::FOLDER == type || dirtype::NOOBJ == type; +} + +static bool IS_RMTYPEDIR(dirtype type) +{ + return dirtype::OLD == type || dirtype::FOLDER == type; +} + +static bool IS_CREATE_MP_STAT(const char* path) +{ + // [NOTE] + // pHasMpStat->Get() is set in get_object_attribute() + // + return (path && 0 == strcmp(path, "/") && !pHasMpStat->Get()); +} + +int put_headers(const char* path, headers_t& meta, bool is_copy, bool use_st_size) +{ + int result; + S3fsCurl s3fscurl(true); + off_t size; + std::string strpath; + + S3FS_PRN_INFO2("[path=%s]", path); + + if(0 == strcmp(path, "/") && mount_prefix.empty()){ + strpath = "//"; // for the mount point that is bucket root, change "/" to "//". + }else{ + strpath = path; + } + + // files larger than 5GB must be modified via the multipart interface + // call use_st_size as false when the file does not exist(ex. rename object) + if(use_st_size && '/' != *strpath.rbegin()){ // directory object("dir/") is always 0(Content-Length = 0) + struct stat buf; + if(0 != (result = get_object_attribute(path, &buf))){ + return result; + } + size = buf.st_size; + }else{ + size = get_size(meta); + } + + if(!nocopyapi && !nomultipart && size >= multipart_threshold){ + if(0 != (result = s3fscurl.MultipartHeadRequest(strpath.c_str(), size, meta, is_copy))){ + return result; + } + }else{ + if(0 != (result = s3fscurl.PutHeadRequest(strpath.c_str(), meta, is_copy))){ + return result; + } + } + return 0; +} + + +static int directory_empty(const char* path) +{ + int result; + S3ObjList head; + + if((result = list_bucket(path, head, "/", true)) != 0){ + S3FS_PRN_ERR("list_bucket returns error."); + return result; + } + if(!head.IsEmpty()){ + return -ENOTEMPTY; + } + return 0; +} + +// +// Get object attributes with stat cache. +// This function is base for s3fs_getattr(). +// +// [NOTICE] +// Checking order is changed following list because of reducing the number of the requests. +// 1) "dir" +// 2) "dir/" +// 3) "dir_$folder$" +// +// Special two case of the mount point directory: +// [Case 1] the mount point is the root of the bucket: +// 1) "/" +// +// [Case 2] the mount point is a directory path(ex. foo) below the bucket: +// 1) "foo" +// 2) "foo/" +// 3) "foo_$folder$" +// +static int get_object_attribute(const char* path, struct stat* pstbuf, headers_t* pmeta, bool overcheck, bool* pisforce, bool add_no_truncate_cache) +{ + int result = -1; + struct stat tmpstbuf; + struct stat* pstat = pstbuf ? pstbuf : &tmpstbuf; + headers_t tmpHead; + headers_t* pheader = pmeta ? pmeta : &tmpHead; + std::string strpath; + S3fsCurl s3fscurl; + bool forcedir = false; + bool is_mountpoint = false; // path is the mount point + bool is_bucket_mountpoint = false; // path is the mount point which is the bucket root + std::string::size_type Pos; + + S3FS_PRN_DBG("[path=%s]", path); + + if(!path || '\0' == path[0]){ + return -ENOENT; + } + + memset(pstat, 0, sizeof(struct stat)); + + // check mount point + if(0 == strcmp(path, "/") || 0 == strcmp(path, ".")){ + is_mountpoint = true; + if(mount_prefix.empty()){ + is_bucket_mountpoint = true; + } + // default stat for mount point if the directory stat file is not existed. + pstat->st_mode = mp_mode; + pstat->st_uid = is_s3fs_uid ? s3fs_uid : mp_uid; + pstat->st_gid = is_s3fs_gid ? s3fs_gid : mp_gid; + } + + // Check cache. + pisforce = (nullptr != pisforce ? pisforce : &forcedir); + (*pisforce) = false; + strpath = path; + if(support_compat_dir && overcheck && std::string::npos != (Pos = strpath.find("_$folder$", 0))){ + strpath.erase(Pos); + strpath += "/"; + } + // [NOTE] + // For mount points("/"), the Stat cache key name is "/". + // + if(StatCache::getStatCacheData()->GetStat(strpath, pstat, pheader, overcheck, pisforce)){ + if(is_mountpoint){ + // if mount point, we need to set this. + pstat->st_nlink = 1; // see fuse faq + } + return 0; + } + if(StatCache::getStatCacheData()->IsNoObjectCache(strpath)){ + // there is the path in the cache for no object, it is no object. + return -ENOENT; + } + + // set query(head request) path + if(is_bucket_mountpoint){ + // [NOTE] + // This is a special process for mount point + // The path is "/" for mount points. + // If the bucket mounted at a mount point, we try to find "/" object under + // the bucket for mount point's stat. + // In this case, we will send the request "HEAD // HTTP /1.1" to S3 server. + // + // If the directory under the bucket is mounted, it will be sent + // "HEAD // HTTP/1.1", so we do not need to change path at + // here. + // + strpath = "//"; // strpath is "//" + }else{ + strpath = path; + } + + if(use_newcache && accessor->UseGlobalCache()){ + size_t realSize = 0; + std::map headers; + result = accessor->Head(strpath, realSize, headers); + if(0 == result){ + headers["Content-Length"] = std::to_string(realSize); + for(auto& it : headers) { + pheader->insert(std::make_pair(it.first, it.second)); + } + } + } else { + result = s3fscurl.HeadRequest(strpath.c_str(), (*pheader)); + s3fscurl.DestroyCurlHandle(); + } + + + // if not found target path object, do over checking + if(-EPERM == result){ + // [NOTE] + // In case of a permission error, it exists in directory + // file list but inaccessible. So there is a problem that + // it will send a HEAD request every time, because it is + // not registered in the Stats cache. + // Therefore, even if the file has a permission error, it + // should be registered in the Stats cache. However, if + // the response without modifying is registered in the + // cache, the file permission will be 0644(umask dependent) + // because the meta header does not exist. + // Thus, set the mode of 0000 here in the meta header so + // that s3fs can print a permission error when the file + // is actually accessed. + // It is better not to set meta header other than mode, + // so do not do it. + // + (*pheader)["x-amz-meta-mode"] = "0"; + + }else if(0 != result){ + if(overcheck && !is_bucket_mountpoint){ + // when support_compat_dir is disabled, strpath maybe have "_$folder$". + if('/' != *strpath.rbegin() && std::string::npos == strpath.find("_$folder$", 0)){ + // now path is "object", do check "object/" for over checking + strpath += "/"; + result = s3fscurl.HeadRequest(strpath.c_str(), (*pheader)); + s3fscurl.DestroyCurlHandle(); + } + if(support_compat_dir && 0 != result){ + // now path is "object/", do check "object_$folder$" for over checking + strpath.erase(strpath.length() - 1); + strpath += "_$folder$"; + result = s3fscurl.HeadRequest(strpath.c_str(), (*pheader)); + s3fscurl.DestroyCurlHandle(); + + if(0 != result){ + // cut "_$folder$" for over checking "no dir object" after here + if(std::string::npos != (Pos = strpath.find("_$folder$", 0))){ + strpath.erase(Pos); + } + } + } + } + if(0 != result && std::string::npos == strpath.find("_$folder$", 0)){ + // now path is "object" or "object/", do check "no dir object" which is not object but has only children. + // + // [NOTE] + // If the path is mount point and there is no Stat information file for it, we need this process. + // + if('/' == *strpath.rbegin()){ + strpath.erase(strpath.length() - 1); + } + if(-ENOTEMPTY == directory_empty(strpath.c_str())){ + // found "no dir object". + strpath += "/"; + *pisforce = true; + result = 0; + } + } + }else{ + if('/' != *strpath.rbegin() && std::string::npos == strpath.find("_$folder$", 0) && is_need_check_obj_detail(*pheader)){ + // check a case of that "object" does not have attribute and "object" is possible to be directory. + if(-ENOTEMPTY == directory_empty(strpath.c_str())){ + // found "no dir object". + strpath += "/"; + *pisforce = true; + result = 0; + } + } + } + + // set headers for mount point from default stat + if(is_mountpoint){ + if(0 != result || pheader->empty()){ + pHasMpStat->Set(false); + + // [NOTE] + // If mount point and no stat information file, create header + // information from the default stat. + // + (*pheader)["Content-Type"] = S3fsCurl::LookupMimeType(strpath); + (*pheader)["x-amz-meta-uid"] = std::to_string(pstat->st_uid); + (*pheader)["x-amz-meta-gid"] = std::to_string(pstat->st_gid); + (*pheader)["x-amz-meta-mode"] = std::to_string(pstat->st_mode); + (*pheader)["x-amz-meta-atime"] = std::to_string(pstat->st_atime); + (*pheader)["x-amz-meta-ctime"] = std::to_string(pstat->st_ctime); + (*pheader)["x-amz-meta-mtime"] = std::to_string(pstat->st_mtime); + + result = 0; + }else{ + pHasMpStat->Set(true); + } + } + + // [NOTE] + // If the file is listed but not allowed access, put it in + // the positive cache instead of the negative cache. + // + // When mount points, the following error does not occur. + // + if(0 != result && -EPERM != result){ + // finally, "path" object did not find. Add no object cache. + strpath = path; // reset original + StatCache::getStatCacheData()->AddNoObjectCache(strpath); + return result; + } + + // set cache key + if(is_bucket_mountpoint){ + strpath = "/"; + }else if(std::string::npos != (Pos = strpath.find("_$folder$", 0))){ + // if path has "_$folder$", need to cut it. + strpath.erase(Pos); + strpath += "/"; + } + + // Set into cache + // + // [NOTE] + // When add_no_truncate_cache is true, the stats is always cached. + // This cached stats is only removed by DelStat(). + // This is necessary for the case to access the attribute of opened file. + // (ex. getxattr() is called while writing to the opened file.) + // + if(add_no_truncate_cache || 0 != StatCache::getStatCacheData()->GetCacheSize()){ + // add into stat cache + if(!StatCache::getStatCacheData()->AddStat(strpath, (*pheader), forcedir, add_no_truncate_cache)){ + S3FS_PRN_ERR("failed adding stat cache [path=%s]", strpath.c_str()); + return -ENOENT; + } + if(!StatCache::getStatCacheData()->GetStat(strpath, pstat, pheader, overcheck, pisforce)){ + // There is not in cache.(why?) -> retry to convert. + if(!convert_header_to_stat(strpath.c_str(), (*pheader), pstat, forcedir)){ + S3FS_PRN_ERR("failed convert headers to stat[path=%s]", strpath.c_str()); + return -ENOENT; + } + } + }else{ + // cache size is Zero -> only convert. + if(!convert_header_to_stat(strpath.c_str(), (*pheader), pstat, forcedir)){ + S3FS_PRN_ERR("failed convert headers to stat[path=%s]", strpath.c_str()); + return -ENOENT; + } + } + + if(is_mountpoint){ + // if mount point, we need to set this. + pstat->st_nlink = 1; // see fuse faq + } + + return 0; +} + +bool get_object_sse_type(const char* path, sse_type_t& ssetype, std::string& ssevalue) +{ + if(!path){ + return false; + } + + headers_t meta; + if(0 != get_object_attribute(path, nullptr, &meta)){ + S3FS_PRN_ERR("Failed to get object(%s) headers", path); + return false; + } + + ssetype = sse_type_t::SSE_DISABLE; + ssevalue.clear(); + for(headers_t::iterator iter = meta.begin(); iter != meta.end(); ++iter){ + std::string key = (*iter).first; + if(0 == strcasecmp(key.c_str(), "x-amz-server-side-encryption") && 0 == strcasecmp((*iter).second.c_str(), "AES256")){ + ssetype = sse_type_t::SSE_S3; + }else if(0 == strcasecmp(key.c_str(), "x-amz-server-side-encryption-aws-kms-key-id")){ + ssetype = sse_type_t::SSE_KMS; + ssevalue = (*iter).second; + }else if(0 == strcasecmp(key.c_str(), "x-amz-server-side-encryption-customer-key-md5")){ + ssetype = sse_type_t::SSE_C; + ssevalue = (*iter).second; + } + } + return true; +} + + +// +// Check the object uid and gid for write/read/execute. +// The param "mask" is as same as access() function. +// If there is not a target file, this function returns -ENOENT. +// If the target file can be accessed, the result always is 0. +// +// path: the target object path +// mask: bit field(F_OK, R_OK, W_OK, X_OK) like access(). +// stat: nullptr or the pointer of struct stat. +// +static int check_object_access(const char* path, int mask, struct stat* pstbuf) +{ + //return 0; + int result; + struct stat st; + struct stat* pst = (pstbuf ? pstbuf : &st); + // struct fuse_context* pcxt; + + // S3FS_PRN_DBG("[path=%s]", path); + + // if(nullptr == (pcxt = fuse_get_context())){ + // return -EIO; + // } + // S3FS_PRN_DBG("[pid=%u,uid=%u,gid=%u]", (unsigned int)(pcxt->pid), (unsigned int)(pcxt->uid), (unsigned int)(pcxt->gid)); + + if(0 != (result = get_object_attribute(path, pst))){ + // If there is not the target file(object), result is -ENOENT. + return result; + } + // if(0 == pcxt->uid){ + // // root is allowed all accessing. + // return 0; + // } + // if(is_s3fs_uid && s3fs_uid == pcxt->uid){ + // // "uid" user is allowed all accessing. + // return 0; + // } + // if(F_OK == mask){ + // // if there is a file, always return allowed. + // return 0; + // } + + // // for "uid", "gid" option + // uid_t obj_uid = (is_s3fs_uid ? s3fs_uid : pst->st_uid); + // gid_t obj_gid = (is_s3fs_gid ? s3fs_gid : pst->st_gid); + + // // compare file mode and uid/gid + mask. + // mode_t mode; + // mode_t base_mask = S_IRWXO; + // if(is_s3fs_umask){ + // // If umask is set, all object attributes set ~umask. + // mode = ((S_IRWXU | S_IRWXG | S_IRWXO) & ~s3fs_umask); + // }else{ + // mode = pst->st_mode; + // } + // if(pcxt->uid == obj_uid){ + // base_mask |= S_IRWXU; + // } + // if(pcxt->gid == obj_gid){ + // base_mask |= S_IRWXG; + // } else if(1 == is_uid_include_group(pcxt->uid, obj_gid)){ + // base_mask |= S_IRWXG; + // } + // mode &= base_mask; + + // if(X_OK == (mask & X_OK)){ + // if(0 == (mode & (S_IXUSR | S_IXGRP | S_IXOTH))){ + // return -EACCES; + // } + // } + // if(W_OK == (mask & W_OK)){ + // if(0 == (mode & (S_IWUSR | S_IWGRP | S_IWOTH))){ + // return -EACCES; + // } + // } + // if(R_OK == (mask & R_OK)){ + // if(0 == (mode & (S_IRUSR | S_IRGRP | S_IROTH))){ + // return -EACCES; + // } + // } + // if(0 == mode){ + // return -EACCES; + // } + return 0; +} + +static bool check_region_error(const char* pbody, size_t len, std::string& expectregion) +{ + if(!pbody){ + return false; + } + + std::string code; + if(!simple_parse_xml(pbody, len, "Code", code) || code != "AuthorizationHeaderMalformed"){ + return false; + } + + if(!simple_parse_xml(pbody, len, "Region", expectregion)){ + return false; + } + + return true; +} + +static bool check_endpoint_error(const char* pbody, size_t len, std::string& expectendpoint) +{ + if(!pbody){ + return false; + } + + std::string code; + if(!simple_parse_xml(pbody, len, "Code", code) || code != "PermanentRedirect"){ + return false; + } + + if(!simple_parse_xml(pbody, len, "Endpoint", expectendpoint)){ + return false; + } + + return true; +} + +static bool check_invalid_sse_arg_error(const char* pbody, size_t len) +{ + if(!pbody){ + return false; + } + + std::string code; + if(!simple_parse_xml(pbody, len, "Code", code) || code != "InvalidArgument"){ + return false; + } + std::string argname; + if(!simple_parse_xml(pbody, len, "ArgumentName", argname) || argname != "x-amz-server-side-encryption"){ + return false; + } + return true; +} + +static bool check_error_message(const char* pbody, size_t len, std::string& message) +{ + message.clear(); + if(!pbody){ + return false; + } + if(!simple_parse_xml(pbody, len, "Message", message)){ + return false; + } + return true; +} + + + +// [NOTE] +// This function checks if the bucket is accessible when s3fs starts. +// +// The following patterns for mount points are supported by s3fs: +// (1) Mount the bucket top +// (2) Mount to a directory(folder) under the bucket. In this case: +// (2A) Directories created by clients other than s3fs +// (2B) Directory created by s3fs +// +// Both case of (1) and (2) check access permissions to the mount point +// path(directory). +// In the case of (2A), if the directory(object) for the mount point does +// not exist, the check fails. However, launching s3fs with the "compat_dir" +// option avoids this error and the check succeeds. If you do not specify +// the "compat_dir" option in case (2A), please create a directory(object) +// for the mount point before launching s3fs. +// +static int s3fs_check_service() +{ + S3FS_PRN_INFO("check services."); + + // At first time for access S3, we check IAM role if it sets. + if(!ps3fscred->CheckIAMCredentialUpdate()){ + S3FS_PRN_CRIT("Failed to initialize IAM credential."); + return EXIT_FAILURE; + } + + S3fsCurl s3fscurl; + int res; + bool force_no_sse = false; + + while(0 > (res = s3fscurl.CheckBucket(get_realpath("/").c_str(), support_compat_dir, force_no_sse))){ + // get response code + bool do_retry = false; + long responseCode = s3fscurl.GetLastResponseCode(); + + // check wrong endpoint, and automatically switch endpoint + if(300 <= responseCode && responseCode < 500){ + + // check region error(for putting message or retrying) + const std::string* body = s3fscurl.GetBodyData(); + std::string expectregion; + std::string expectendpoint; + + // Check if any case can be retried + if(check_region_error(body->c_str(), body->size(), expectregion)){ + // [NOTE] + // If endpoint is not specified(using us-east-1 region) and + // an error is encountered accessing a different region, we + // will retry the check on the expected region. + // see) https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html#access-bucket-intro + // + if(s3host != "http://s3.amazonaws.com" && s3host != "https://s3.amazonaws.com"){ + // specified endpoint for specified url is wrong. + if(is_specified_endpoint){ + S3FS_PRN_CRIT("The bucket region is not '%s'(specified) for specified url(%s), it is correctly '%s'. You should specify url(http(s)://s3-%s.amazonaws.com) and endpoint(%s) option.", endpoint.c_str(), s3host.c_str(), expectregion.c_str(), expectregion.c_str(), expectregion.c_str()); + }else{ + S3FS_PRN_CRIT("The bucket region is not '%s'(default) for specified url(%s), it is correctly '%s'. You should specify url(http(s)://s3-%s.amazonaws.com) and endpoint(%s) option.", endpoint.c_str(), s3host.c_str(), expectregion.c_str(), expectregion.c_str(), expectregion.c_str()); + } + + }else if(is_specified_endpoint){ + // specified endpoint is wrong. + S3FS_PRN_CRIT("The bucket region is not '%s'(specified), it is correctly '%s'. You should specify endpoint(%s) option.", endpoint.c_str(), expectregion.c_str(), expectregion.c_str()); + + }else if(S3fsCurl::GetSignatureType() == signature_type_t::V4_ONLY || S3fsCurl::GetSignatureType() == signature_type_t::V2_OR_V4){ + // current endpoint and url are default value, so try to connect to expected region. + S3FS_PRN_CRIT("Failed to connect region '%s'(default), so retry to connect region '%s' for url(http(s)://s3-%s.amazonaws.com).", endpoint.c_str(), expectregion.c_str(), expectregion.c_str()); + + // change endpoint + endpoint = expectregion; + + // change url + if(s3host == "http://s3.amazonaws.com"){ + s3host = "http://s3-" + endpoint + ".amazonaws.com"; + }else if(s3host == "https://s3.amazonaws.com"){ + s3host = "https://s3-" + endpoint + ".amazonaws.com"; + } + + // Retry with changed host + s3fscurl.DestroyCurlHandle(); + do_retry = true; + + }else{ + S3FS_PRN_CRIT("The bucket region is not '%s'(default), it is correctly '%s'. You should specify endpoint(%s) option.", endpoint.c_str(), expectregion.c_str(), expectregion.c_str()); + } + + }else if(check_endpoint_error(body->c_str(), body->size(), expectendpoint)){ + // redirect error + if(pathrequeststyle){ + S3FS_PRN_CRIT("S3 service returned PermanentRedirect (current is url(%s) and endpoint(%s)). You need to specify correct url(http(s)://s3-.amazonaws.com) and endpoint option with use_path_request_style option.", s3host.c_str(), endpoint.c_str()); + }else{ + S3FS_PRN_CRIT("S3 service returned PermanentRedirect with %s (current is url(%s) and endpoint(%s)). You need to specify correct endpoint option.", expectendpoint.c_str(), s3host.c_str(), endpoint.c_str()); + } + return EXIT_FAILURE; + + }else if(check_invalid_sse_arg_error(body->c_str(), body->size())){ + // SSE argument error, so retry it without SSE + S3FS_PRN_CRIT("S3 service returned InvalidArgument(x-amz-server-side-encryption), so retry without adding x-amz-server-side-encryption."); + + // Retry without sse parameters + s3fscurl.DestroyCurlHandle(); + do_retry = true; + force_no_sse = true; + } + } + + // Try changing signature from v4 to v2 + // + // [NOTE] + // If there is no case to retry with the previous checks, and there + // is a chance to retry with signature v2, prepare to retry with v2. + // + if(!do_retry && (responseCode == 400 || responseCode == 403) && S3fsCurl::GetSignatureType() == signature_type_t::V2_OR_V4){ + // switch sigv2 + S3FS_PRN_CRIT("Failed to connect by sigv4, so retry to connect by signature version 2. But you should to review url and endpoint option."); + + // retry to check with sigv2 + s3fscurl.DestroyCurlHandle(); + do_retry = true; + S3fsCurl::SetSignatureType(signature_type_t::V2_ONLY); + } + + // check errors(after retrying) + if(!do_retry && responseCode != 200 && responseCode != 301){ + // parse error message if existed + std::string errMessage; + const std::string* body = s3fscurl.GetBodyData(); + check_error_message(body->c_str(), body->size(), errMessage); + + if(responseCode == 400){ + S3FS_PRN_CRIT("Failed to check bucket and directory for mount point : Bad Request(host=%s, message=%s)", s3host.c_str(), errMessage.c_str()); + }else if(responseCode == 403){ + S3FS_PRN_CRIT("Failed to check bucket and directory for mount point : Invalid Credentials(host=%s, message=%s)", s3host.c_str(), errMessage.c_str()); + }else if(responseCode == 404){ + if(mount_prefix.empty()){ + S3FS_PRN_CRIT("Failed to check bucket and directory for mount point : Bucket or directory not found(host=%s, message=%s)", s3host.c_str(), errMessage.c_str()); + }else{ + S3FS_PRN_CRIT("Failed to check bucket and directory for mount point : Bucket or directory(%s) not found(host=%s, message=%s) - You may need to specify the compat_dir option.", mount_prefix.c_str(), s3host.c_str(), errMessage.c_str()); + } + }else{ + S3FS_PRN_CRIT("Failed to check bucket and directory for mount point : Unable to connect(host=%s, message=%s)", s3host.c_str(), errMessage.c_str()); + } + return EXIT_FAILURE; + } + } + s3fscurl.DestroyCurlHandle(); + + // make sure remote mountpath exists and is a directory + if(!mount_prefix.empty()){ + if(remote_mountpath_exists("/", support_compat_dir) != 0){ + S3FS_PRN_CRIT("Remote mountpath %s not found, this may be resolved with the compat_dir option.", mount_prefix.c_str()); + return EXIT_FAILURE; + } + } + S3FS_MALLOCTRIM(0); + + return EXIT_SUCCESS; +} + +// +// Check accessing the parent directories of the object by uid and gid. +// +static int check_parent_object_access(const char* path, int mask) +{ + std::string parent; + int result; + + S3FS_PRN_DBG("[path=%s]", path); + + if(0 == strcmp(path, "/") || 0 == strcmp(path, ".")){ + // path is mount point. + return 0; + } + if(X_OK == (mask & X_OK)){ + for(parent = mydirname(path); !parent.empty(); parent = mydirname(parent)){ + if(parent == "."){ + parent = "/"; + } + if(0 != (result = check_object_access(parent.c_str(), X_OK, nullptr))){ + return result; + } + if(parent == "/" || parent == "."){ + break; + } + } + } + mask = (mask & ~X_OK); + if(0 != mask){ + parent = mydirname(path); + if(parent == "."){ + parent = "/"; + } + if(0 != (result = check_object_access(parent.c_str(), mask, nullptr))){ + return result; + } + } + return 0; +} + +static int list_bucket(const char* path, S3ObjList& head, const char* delimiter, bool check_content_only) +{ + std::string s3_realpath; + std::string query_delimiter; + std::string query_prefix; + std::string query_maxkey; + std::string next_continuation_token; + std::string next_marker; + bool truncated = true; + S3fsCurl s3fscurl; + + S3FS_PRN_INFO1("[path=%s]", path); + + if(delimiter && 0 < strlen(delimiter)){ + query_delimiter += "delimiter="; + query_delimiter += delimiter; + query_delimiter += "&"; + } + + query_prefix += "&prefix="; + s3_realpath = get_realpath(path); + if(s3_realpath.empty() || '/' != *s3_realpath.rbegin()){ + // last word must be "/" + query_prefix += urlEncodePath(s3_realpath.substr(1) + "/"); + }else{ + query_prefix += urlEncodePath(s3_realpath.substr(1)); + } + if (check_content_only){ + // Just need to know if there are child objects in dir + // For dir with children, expect "dir/" and "dir/child" + query_maxkey += "max-keys=2"; + }else{ + query_maxkey += "max-keys=" + std::to_string(max_keys_list_object); + } + + while(truncated){ + // append parameters to query in alphabetical order + std::string each_query; + if(!next_continuation_token.empty()){ + each_query += "continuation-token=" + urlEncodePath(next_continuation_token) + "&"; + next_continuation_token = ""; + } + each_query += query_delimiter; + if(S3fsCurl::IsListObjectsV2()){ + each_query += "list-type=2&"; + } + if(!next_marker.empty()){ + each_query += "marker=" + urlEncodePath(next_marker) + "&"; + next_marker = ""; + } + each_query += query_maxkey; + each_query += query_prefix; + + // request + int result; + if(0 != (result = s3fscurl.ListBucketRequest(path, each_query.c_str()))){ + S3FS_PRN_ERR("ListBucketRequest returns with error."); + return result; + } + const std::string* body = s3fscurl.GetBodyData(); + + // [NOTE] + // CR code(\r) is replaced with LF(\n) by xmlReadMemory() function. + // To prevent that, only CR code is encoded by following function. + // The encoded CR code is decoded with append_objects_from_xml(_ex). + // + std::string encbody = get_encoded_cr_code(body->c_str()); + + // xmlDocPtr + std::unique_ptr doc(xmlReadMemory(encbody.c_str(), static_cast(encbody.size()), "", nullptr, 0), xmlFreeDoc); + if(nullptr == doc){ + S3FS_PRN_ERR("xmlReadMemory returns with error."); + return -EIO; + } + if(0 != append_objects_from_xml(path, doc.get(), head)){ + S3FS_PRN_ERR("append_objects_from_xml returns with error."); + return -EIO; + } + if(true == (truncated = is_truncated(doc.get()))){ + auto tmpch = get_next_continuation_token(doc.get()); + if(nullptr != tmpch){ + next_continuation_token = reinterpret_cast(tmpch.get()); + }else if(nullptr != (tmpch = get_next_marker(doc.get()))){ + next_marker = reinterpret_cast(tmpch.get()); + } + + if(next_continuation_token.empty() && next_marker.empty()){ + // If did not specify "delimiter", s3 did not return "NextMarker". + // On this case, can use last name for next marker. + // + std::string lastname; + if(!head.GetLastName(lastname)){ + S3FS_PRN_WARN("Could not find next marker, thus break loop."); + truncated = false; + }else{ + next_marker = s3_realpath.substr(1); + if(s3_realpath.empty() || '/' != *s3_realpath.rbegin()){ + next_marker += "/"; + } + next_marker += lastname; + } + } + } + + // reset(initialize) curl object + s3fscurl.DestroyCurlHandle(); + + if(check_content_only){ + break; + } + } + S3FS_MALLOCTRIM(0); + + return 0; +} + +static int remote_mountpath_exists(const char* path, bool compat_dir) +{ + struct stat stbuf; + int result; + + S3FS_PRN_INFO1("[path=%s]", path); + + // getattr will prefix the path with the remote mountpoint + if(0 != (result = get_object_attribute(path, &stbuf, nullptr))){ + return result; + } + + // [NOTE] + // If there is no mount point(directory object) that s3fs can recognize, + // an error will occur. + // A mount point with a directory path(ex. "/...") + // requires that directory object. + // If the directory or object is created by a client other than s3fs, + // s3fs may not be able to recognize it. If you specify such a directory + // as a mount point, you can avoid the error by starting with "compat_dir" + // specified. + // + if(!compat_dir && !pHasMpStat->Get()){ + return -ENOENT; + } + return 0; +} + +// +// Check & Set attributes for mount point. +// +static bool set_mountpoint_attribute(struct stat& mpst) +{ + mp_uid = geteuid(); + mp_gid = getegid(); + mp_mode = S_IFDIR | (allow_other ? (is_mp_umask ? (~mp_umask & (S_IRWXU | S_IRWXG | S_IRWXO)) : (S_IRWXU | S_IRWXG | S_IRWXO)) : S_IRWXU); + +// In MSYS2 environment with WinFsp, it is not supported to change mode of mount point. +// Doing that forcely will occurs permission problem, so disabling it. +#ifdef __MSYS__ + return true; +#else + S3FS_PRN_INFO2("PROC(uid=%u, gid=%u) - MountPoint(uid=%u, gid=%u, mode=%04o)", + (unsigned int)mp_uid, (unsigned int)mp_gid, (unsigned int)(mpst.st_uid), (unsigned int)(mpst.st_gid), mpst.st_mode); + + // check owner + if(0 == mp_uid || mpst.st_uid == mp_uid){ + return true; + } + // check group permission + if(mpst.st_gid == mp_gid || 1 == is_uid_include_group(mp_uid, mpst.st_gid)){ + if(S_IRWXG == (mpst.st_mode & S_IRWXG)){ + return true; + } + } + // check other permission + if(S_IRWXO == (mpst.st_mode & S_IRWXO)){ + return true; + } + return false; +#endif +} + +// +// Set bucket and mount_prefix based on passed bucket name. +// +static int set_bucket(const char* arg) +{ + // TODO: Mutates input. Consider some other tokenization. + char *bucket_name = const_cast(arg); + if(strstr(arg, ":")){ + if(strstr(arg, "://")){ + S3FS_PRN_EXIT("bucket name and path(\"%s\") is wrong, it must be \"bucket[:/path]\".", arg); + return -1; + } + if(!S3fsCred::SetBucket(strtok(bucket_name, ":"))){ + S3FS_PRN_EXIT("bucket name and path(\"%s\") is wrong, it must be \"bucket[:/path]\".", arg); + return -1; + } + char* pmount_prefix = strtok(nullptr, ""); + if(pmount_prefix){ + if(0 == strlen(pmount_prefix) || '/' != pmount_prefix[0]){ + S3FS_PRN_EXIT("path(%s) must be prefix \"/\".", pmount_prefix); + return -1; + } + mount_prefix = pmount_prefix; + // Trim the last consecutive '/' + mount_prefix = trim_right(mount_prefix, "/"); + } + }else{ + if(!S3fsCred::SetBucket(arg)){ + S3FS_PRN_EXIT("bucket name and path(\"%s\") is wrong, it must be \"bucket[:/path]\".", arg); + return -1; + } + } + return 0; +} + +static int print_umount_message(const std::string& mp, bool force) +{ + std::string cmd; + if (is_cmd_exists("fusermount")){ + if (force){ + cmd = "fusermount -uz " + mp; + } else { + cmd = "fusermount -u " + mp; + } + }else{ + if (force){ + cmd = "umount -l " + mp; + } else { + cmd = "umount " + mp; + } + } + + S3FS_PRN_EXIT("MOUNTPOINT %s is stale, you could use this command to fix: %s", mp.c_str(), cmd.c_str()); + + return 0; +} + +static bool is_cmd_exists(const std::string& command) +{ + // The `command -v` is a POSIX-compliant method for checking the existence of a program. + std::string cmd = "command -v " + command + " >/dev/null 2>&1"; + int result = system(cmd.c_str()); + return (result !=-1 && WIFEXITED(result) && WEXITSTATUS(result) == 0); +} + +static int update_mctime_parent_directory(const char* _path) +{ + if(!update_parent_dir_stat){ + // Disable updating parent directory stat. + S3FS_PRN_DBG("Updating parent directory stats is disabled"); + return 0; + } + + WTF8_ENCODE(path) + int result; + std::string parentpath; // parent directory path + std::string nowpath; // now directory object path("dir" or "dir/" or "xxx_$folder$", etc) + std::string newpath; // directory path for the current version("dir/") + std::string nowcache; + headers_t meta; + struct stat stbuf; + struct timespec mctime; + struct timespec atime; + dirtype nDirType = dirtype::UNKNOWN; + + S3FS_PRN_INFO2("[path=%s]", path); + + // get parent directory path + parentpath = mydirname(path); + + // check & get directory type + if(0 != (result = chk_dir_object_type(parentpath.c_str(), newpath, nowpath, nowcache, &meta, &nDirType))){ + return result; + } + + // get directory stat + // + // [NOTE] + // It is assumed that this function is called after the operation on + // the file is completed, so there is no need to check the permissions + // on the parent directory. + // + if(0 != (result = get_object_attribute(parentpath.c_str(), &stbuf))){ + // If there is not the target file(object), result is -ENOENT. + return result; + } + if(!S_ISDIR(stbuf.st_mode)){ + S3FS_PRN_ERR("path(%s) is not parent directory.", parentpath.c_str()); + return -EIO; + } + + // make atime/mtime/ctime for updating + s3fs_realtime(mctime); + set_stat_to_timespec(stbuf, stat_time_type::ATIME, atime); + + if(0 == atime.tv_sec && 0 == atime.tv_nsec){ + atime = mctime; + } + + if(nocopyapi || IS_REPLACEDIR(nDirType) || IS_CREATE_MP_STAT(parentpath.c_str())){ + // Should rebuild directory object(except new type) + // Need to remove old dir("dir" etc) and make new dir("dir/") + std::string xattrvalue; + const char* pxattrvalue; + if(get_meta_xattr_value(path, xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + + // At first, remove directory old object + if(!nowpath.empty()){ + if(0 != (result = remove_old_type_dir(nowpath, nDirType))){ + return result; + } + } + if(!nowcache.empty()){ + StatCache::getStatCacheData()->DelStat(nowcache); + } + + // Make new directory object("dir/") + if(0 != (result = create_directory_object(newpath.c_str(), stbuf.st_mode, atime, mctime, mctime, stbuf.st_uid, stbuf.st_gid, pxattrvalue))){ + return result; + } + }else{ + std::string strSourcePath = (mount_prefix.empty() && "/" == nowpath) ? "//" : nowpath; + headers_t updatemeta; + updatemeta["x-amz-meta-mtime"] = str(mctime); + updatemeta["x-amz-meta-ctime"] = str(mctime); + updatemeta["x-amz-meta-atime"] = str(atime); + updatemeta["x-amz-copy-source"] = urlEncodePath(service_path + S3fsCred::GetBucket() + get_realpath(strSourcePath.c_str())); + updatemeta["x-amz-metadata-directive"] = "REPLACE"; + + merge_headers(meta, updatemeta, true); + + // upload meta for parent directory. + if(0 != (result = put_headers(nowpath.c_str(), meta, true))){ + return result; + } + StatCache::getStatCacheData()->DelStat(nowcache); + } + S3FS_MALLOCTRIM(0); + + return 0; +} + +static int create_directory_object(const char* path, mode_t mode, const struct timespec& ts_atime, const struct timespec& ts_mtime, const struct timespec& ts_ctime, uid_t uid, gid_t gid, const char* pxattrvalue) +{ + S3FS_PRN_INFO1("[path=%s][mode=%04o][atime=%s][mtime=%s][ctime=%s][uid=%u][gid=%u]", path, mode, str(ts_atime).c_str(), str(ts_mtime).c_str(), str(ts_ctime).c_str(), (unsigned int)uid, (unsigned int)gid); + + if(!path || '\0' == path[0]){ + return -EINVAL; + } + std::string tpath = path; + if('/' != *tpath.rbegin()){ + tpath += "/"; + }else if("/" == tpath && mount_prefix.empty()){ + tpath = "//"; // for the mount point that is bucket root, change "/" to "//". + } + + headers_t meta; + meta["x-amz-meta-uid"] = std::to_string(uid); + meta["x-amz-meta-gid"] = std::to_string(gid); + meta["x-amz-meta-mode"] = std::to_string(mode); + meta["x-amz-meta-atime"] = str(ts_atime); + meta["x-amz-meta-mtime"] = str(ts_mtime); + meta["x-amz-meta-ctime"] = str(ts_ctime); + + if(pxattrvalue){ + S3FS_PRN_DBG("Set xattrs = %s", urlDecode(pxattrvalue).c_str()); + meta["x-amz-meta-xattr"] = pxattrvalue; + } + + S3fsCurl s3fscurl; + return s3fscurl.PutRequest(tpath.c_str(), meta, -1); // fd=-1 means for creating zero byte object. +} + +// [NOTE] +// Converts and returns the POSIX ACL default(system.posix_acl_default) value of +// the parent directory as a POSIX ACL(system.posix_acl_access) value. +// Returns false if the parent directory has no POSIX ACL defaults. +// +static bool build_inherited_xattr_value(const char* path, std::string& xattrvalue) +{ + S3FS_PRN_DBG("[path=%s]", path); + + xattrvalue.clear(); + + if(0 == strcmp(path, "/") || 0 == strcmp(path, ".")){ + // path is mount point, thus does not have parent. + return false; + } + + std::string parent = mydirname(path); + if(parent.empty()){ + S3FS_PRN_ERR("Could not get parent path for %s.", path); + return false; + } + + // get parent's "system.posix_acl_default" value(base64'd). + std::string parent_default_value; + if(!get_xattr_posix_key_value(parent.c_str(), parent_default_value, true)){ + return false; + } + + // build "system.posix_acl_access" from parent's default value + std::string raw_xattr_value; + raw_xattr_value = "{\"system.posix_acl_access\":\""; + raw_xattr_value += parent_default_value; + raw_xattr_value += "\"}"; + + xattrvalue = urlEncodePath(raw_xattr_value); + return true; +} + +static bool get_xattr_posix_key_value(const char* path, std::string& xattrvalue, bool default_key) +{ + xattrvalue.clear(); + + std::string rawvalue; + if(!get_meta_xattr_value(path, rawvalue)){ + return false; + } + + xattrs_t xattrs; + if(0 == parse_xattrs(rawvalue, xattrs)){ + return false; + } + + std::string targetkey; + if(default_key){ + targetkey = "system.posix_acl_default"; + }else{ + targetkey = "system.posix_acl_access"; + } + + xattrs_t::iterator iter; + if(xattrs.end() == (iter = xattrs.find(targetkey))){ + return false; + } + + // convert value by base64 + xattrvalue = s3fs_base64(reinterpret_cast(iter->second.c_str()), iter->second.length()); + + return true; +} + +static bool get_meta_xattr_value(const char* path, std::string& rawvalue) +{ + if(!path || '\0' == path[0]){ + S3FS_PRN_ERR("path is empty."); + return false; + } + S3FS_PRN_DBG("[path=%s]", path); + + rawvalue.clear(); + + headers_t meta; + if(0 != get_object_attribute(path, nullptr, &meta)){ + S3FS_PRN_ERR("Failed to get object(%s) headers", path); + return false; + } + + headers_t::const_iterator iter; + if(meta.end() == (iter = meta.find("x-amz-meta-xattr"))){ + return false; + } + rawvalue = iter->second; + return true; +} + +static size_t parse_xattrs(const std::string& strxattrs, xattrs_t& xattrs) +{ + xattrs.clear(); + + // decode + std::string jsonxattrs = urlDecode(strxattrs); + + // get from "{" to "}" + std::string restxattrs; + { + size_t startpos; + size_t endpos = std::string::npos; + if(std::string::npos != (startpos = jsonxattrs.find_first_of('{'))){ + endpos = jsonxattrs.find_last_of('}'); + } + if(startpos == std::string::npos || endpos == std::string::npos || endpos <= startpos){ + S3FS_PRN_WARN("xattr header(%s) is not json format.", jsonxattrs.c_str()); + return 0; + } + restxattrs = jsonxattrs.substr(startpos + 1, endpos - (startpos + 1)); + } + + // parse each key:val + for(size_t pair_nextpos = restxattrs.find_first_of(','); !restxattrs.empty(); restxattrs = (pair_nextpos != std::string::npos ? restxattrs.substr(pair_nextpos + 1) : ""), pair_nextpos = restxattrs.find_first_of(',')){ + std::string pair = pair_nextpos != std::string::npos ? restxattrs.substr(0, pair_nextpos) : restxattrs; + std::string key; + std::string val; + if(!parse_xattr_keyval(pair, key, &val)){ + // something format error, so skip this. + continue; + } + xattrs[key] = val; + } + return xattrs.size(); +} + + +static bool parse_xattr_keyval(const std::string& xattrpair, std::string& key, std::string* pval) +{ + // parse key and value + size_t pos; + std::string tmpval; + if(std::string::npos == (pos = xattrpair.find_first_of(':'))){ + S3FS_PRN_ERR("one of xattr pair(%s) is wrong format.", xattrpair.c_str()); + return false; + } + key = xattrpair.substr(0, pos); + tmpval = xattrpair.substr(pos + 1); + + if(!takeout_str_dquart(key) || !takeout_str_dquart(tmpval)){ + S3FS_PRN_ERR("one of xattr pair(%s) is wrong format.", xattrpair.c_str()); + return false; + } + + *pval = s3fs_decode64(tmpval.c_str(), tmpval.size()); + + return true; +} + +static bool get_parent_meta_xattr_value(const char* path, std::string& rawvalue) +{ + if(0 == strcmp(path, "/") || 0 == strcmp(path, ".")){ + // path is mount point, thus does not have parent. + return false; + } + + std::string parent = mydirname(path); + if(parent.empty()){ + S3FS_PRN_ERR("Could not get parent path for %s.", path); + return false; + } + return get_meta_xattr_value(parent.c_str(), rawvalue); +} + +struct multi_head_notfound_callback_param +{ + pthread_mutex_t list_lock; + s3obj_list_t notfound_list; +}; + +int posix_s3fs_create(const char* _path, int flags, mode_t mode) { + WTF8_ENCODE(path) + int result; + + S3FS_PRN_INFO("craete file [path=%s][mode=%04o][flags=0x%x]", path, mode, flags); + + // check parent directory attribute. + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + struct stat statbuf; + memset(&statbuf, 0, sizeof(struct stat)); + result = check_object_access(path, W_OK, &statbuf); + if (statbuf.st_size > 0) { + // 文件已经存在,打开即可 + return -EEXIST; + } + if(-ENOENT == result){ + if(0 != (result = check_parent_object_access(path, W_OK))){ + return result; + } + }else if(0 != result){ + return result; + } + + std::string strnow = s3fs_str_realtime(); + headers_t meta; + meta["Content-Length"] = "0"; + meta["x-amz-meta-uid"] = std::to_string(posixcontext.uid); + meta["x-amz-meta-gid"] = std::to_string(posixcontext.gid); + meta["x-amz-meta-mode"] = std::to_string(mode); + meta["x-amz-meta-atime"] = strnow; + meta["x-amz-meta-mtime"] = strnow; + meta["x-amz-meta-ctime"] = strnow; + + std::string xattrvalue; + if(build_inherited_xattr_value(path, xattrvalue)){ + S3FS_PRN_DBG("Set xattrs = %s", urlDecode(xattrvalue).c_str()); + meta["x-amz-meta-xattr"] = xattrvalue; + } + + // [NOTE] set no_truncate flag + // At this point, the file has not been created(uploaded) and + // the data is only present in the Stats cache. + // The Stats cache should not be deleted automatically by + // timeout. If this stats is deleted, s3fs will try to get it + // from the server with a Head request and will get an + // unexpected error because the result object does not exist. + // + if(!StatCache::getStatCacheData()->AddStat(path, meta, false, true)){ + return -EIO; + } + + AutoFdEntity autoent; + FdEntity* ent; + int error = 0; + if(nullptr == (ent = autoent.Open(path, &meta, 0, S3FS_OMIT_TS, flags, false, true, false, AutoLock::NONE, &error))){ + StatCache::getStatCacheData()->DelStat(path); + return error; + } + ent->MarkDirtyNewFile(); + int fd = autoent.Detach(); // KEEP fdentity open; + + S3FS_MALLOCTRIM(0); + if (fd > 0) { + PosixS3Info info; + info.fileinfo.fd = fd; + info.fileinfo.flags = flags; + //info.fileinfo.read_offset = 0; + //info.fileinfo.write_offset = 0; + info.fileinfo.offset = 0; + info.filename = path; + fdtofile[fd] = info; + } + return fd; +} + +int posix_s3fs_open(const char* _path, int flags, mode_t mode) +{ + if (flags & O_CREAT) { + int ret = posix_s3fs_create(_path, flags, mode); + if (ret != -EEXIST) { + return ret; + } + } + WTF8_ENCODE(path) + int result; + struct stat st; + bool needs_flush = false; + + S3FS_PRN_INFO("[path=%s][flags=0x%x]", path, flags); + + if ((flags & O_ACCMODE) == O_RDONLY && flags & O_TRUNC) { + return -EACCES; + } + + // [NOTE] + // Delete the Stats cache only if the file is not open. + // If the file is open, the stats cache will not be deleted as + // there are cases where the object does not exist on the server + // and only the Stats cache exists. + // + if(StatCache::getStatCacheData()->HasStat(path)){ + if(!FdManager::HasOpenEntityFd(path)){ + StatCache::getStatCacheData()->DelStat(path); + } + } + + int mask = (O_RDONLY != (flags & O_ACCMODE) ? W_OK : R_OK); + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + + result = check_object_access(path, mask, &st); + if(-ENOENT == result){ + if(0 != (result = check_parent_object_access(path, W_OK))){ + return result; + } + }else if(0 != result){ + return result; + } + + AutoFdEntity autoent; + FdEntity* ent; + headers_t meta; + + if((unsigned int)flags & O_TRUNC){ + if(0 != st.st_size){ + st.st_size = 0; + needs_flush = true; + } + }else{ + // [NOTE] + // If the file has already been opened and edited, the file size in + // the edited state is given priority. + // This prevents the file size from being reset to its original size + // if you keep the file open, shrink its size, and then read the file + // from another process while it has not yet been flushed. + // + if(nullptr != (ent = autoent.OpenExistFdEntity(path)) && ent->IsModified()){ + // sets the file size being edited. + ent->GetSize(st.st_size); + } + } + if(!S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)){ + st.st_mtime = -1; + } + + if(0 != (result = get_object_attribute(path, nullptr, &meta, true, nullptr, true))){ // no truncate cache + return result; + } + + struct timespec st_mctime; + set_stat_to_timespec(st, stat_time_type::MTIME, st_mctime); + + if(nullptr == (ent = autoent.Open(path, &meta, st.st_size, st_mctime, flags, false, true, false, AutoLock::NONE))){ + StatCache::getStatCacheData()->DelStat(path); + return -EIO; + } + + if (needs_flush){ + struct timespec ts; + s3fs_realtime(ts); + ent->SetMCtime(ts, ts); + + if(0 != (result = ent->RowFlush(autoent.GetPseudoFd(), path, AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", path, result); + StatCache::getStatCacheData()->DelStat(path); + return result; + } + } + int fd = autoent.Detach(); // KEEP fdentity open; + S3FS_MALLOCTRIM(0); + if (fd > 0) { + PosixS3Info info; + info.fileinfo.fd = fd; + info.fileinfo.flags = flags; + //info.fileinfo.read_offset = 0; + //info.fileinfo.write_offset = 0; + info.fileinfo.offset = 0; + info.filename = path; + info.type = FileType::FILE; + //info.fileinfo.ino = ent->GetInode(); + info.fileinfo.ino = fd;// 暂时赋值为fd + fdtofile[fd] = info; + } + return fd; +} + +int posix_s3fs_multiread(int fd, void* buf, size_t size, off_t file_offset) { + //WTF8_ENCODE(path) + S3FS_PRN_INFO("read [pseudo_fd=%llu]", (unsigned long long)fd); + if (fdtofile.find(fd) == fdtofile.end()) { + S3FS_PRN_ERR("readop could not find opened pseudo_fd(=%llu) ", (unsigned long long)(fd)); + return -EIO; + } + auto& info = fdtofile[fd]; + const char* path = info.filename.c_str(); + ssize_t res; + + // ! 注意这个偏移 + //off_t offset = info.fileinfo.read_offset; + off_t offset = info.fileinfo.offset + file_offset; + S3FS_PRN_INFO("[path=%s][size=%zu][offset=%lld][pseudo_fd=%llu]", path, size, (long long)offset, (unsigned long long)fd); + + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr == (ent = autoent.GetExistFdEntity(path, fd))){ + S3FS_PRN_ERR("could not find opened pseudo_fd(=%llu) for path(%s)", (unsigned long long)(fd), path); + return -EIO; + } + + // check real file size + off_t realsize = 0; + if(!ent->GetSize(realsize) || 0 == realsize){ + S3FS_PRN_DBG("file size is 0, so break to read."); + return 0; + } + + if(0 > (res = ent->Read(fd, (char*)buf, offset, size, false))){ + S3FS_PRN_WARN("failed to read file(%s). result=%zd", path, res); + } + // 不更新offset 在调用层统一更新 + // if(0 < res){ + // info.fileinfo.offset += res; + // } + return static_cast(res); +} + +int posix_s3fs_read(int fd, void* buf, size_t size) +{ + //WTF8_ENCODE(path) + S3FS_PRN_INFO("read [pseudo_fd=%llu]", (unsigned long long)fd); + if (fdtofile.find(fd) == fdtofile.end()) { + S3FS_PRN_ERR("readop could not find opened pseudo_fd(=%llu) ", (unsigned long long)(fd)); + return -EIO; + } + auto& info = fdtofile[fd]; + const char* path = info.filename.c_str(); + ssize_t res; + + // ! 注意这个偏移 + //off_t offset = info.fileinfo.read_offset; + off_t offset = info.fileinfo.offset; + S3FS_PRN_INFO("[path=%s][size=%zu][offset=%lld][pseudo_fd=%llu]", path, size, (long long)offset, (unsigned long long)fd); + + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr == (ent = autoent.GetExistFdEntity(path, fd))){ + S3FS_PRN_ERR("could not find opened pseudo_fd(=%llu) for path(%s)", (unsigned long long)(fd), path); + return -EIO; + } + + // check real file size + off_t realsize = 0; + if(!ent->GetSize(realsize) || 0 == realsize){ + S3FS_PRN_DBG("file size is 0, so break to read."); + return 0; + } + + if(0 > (res = ent->Read(fd, (char*)buf, offset, size, false))){ + S3FS_PRN_WARN("failed to read file(%s). result=%zd", path, res); + } + if(0 < res){ + //info.fileinfo.read_offset += res; + info.fileinfo.offset += res; + } + return static_cast(res); +} + +int posix_s3fs_multiwrite(int fd, const void* buf, size_t size, off_t file_offset) { + S3FS_PRN_INFO("multithread write [pseudo_fd=%llu]", (unsigned long long)fd); + if (fdtofile.find(fd) == fdtofile.end()) { + S3FS_PRN_ERR("writeop could not find opened pseudo_fd(=%llu) ", (unsigned long long)(fd)); + return -EIO; + } + auto& info = fdtofile[fd]; + const char* path = info.filename.c_str(); + //uint64_t offset = info.fileinfo.write_offset; + //uint64_t offset = info.fileinfo.offset; + uint64_t offset = info.fileinfo.offset + file_offset; + ssize_t res; + + S3FS_PRN_DBG("multiwrite [path=%s][size=%zu][offset=%lld][pseudo_fd=%llu]", path, size, static_cast(offset), (unsigned long long)(fd)); + + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr == (ent = autoent.GetExistFdEntity(path, static_cast(fd)))){ + S3FS_PRN_ERR("could not find opened pseudo_fd(%llu) for path(%s)", (unsigned long long)(fd), path); + return -EIO; + } + + if(0 > (res = ent->Write(static_cast(fd), (const char*)buf, offset, size))){ + S3FS_PRN_WARN("failed to write file(%s). result=%zd", path, res); + } + + if(max_dirty_data != -1 && ent->BytesModified() >= max_dirty_data && !use_newcache){ + int flushres; + if(0 != (flushres = ent->RowFlush(static_cast(fd), path, AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", path, flushres); + StatCache::getStatCacheData()->DelStat(path); + return flushres; + } + // Punch a hole in the file to recover disk space. + if(!ent->PunchHole()){ + S3FS_PRN_WARN("could not punching HOLEs to a cache file, but continue."); + } + } + // 不更新offset 在调用层统一更新 + // if (0 < res) { + // //info.fileinfo.write_offset += res; + // info.fileinfo.offset += res; + // } + return static_cast(res); +} + + +int posix_s3fs_write(int fd, const void* buf, size_t size) { + S3FS_PRN_INFO("write [pseudo_fd=%llu]", (unsigned long long)fd); + if (fdtofile.find(fd) == fdtofile.end()) { + S3FS_PRN_ERR("writeop could not find opened pseudo_fd(=%llu) ", (unsigned long long)(fd)); + return -EIO; + } + auto& info = fdtofile[fd]; + const char* path = info.filename.c_str(); + //uint64_t offset = info.fileinfo.write_offset; + uint64_t offset = info.fileinfo.offset; + ssize_t res; + + S3FS_PRN_DBG("[path=%s][size=%zu][offset=%lld][pseudo_fd=%llu]", path, size, static_cast(offset), (unsigned long long)(fd)); + + AutoFdEntity autoent; + FdEntity* ent; + if(nullptr == (ent = autoent.GetExistFdEntity(path, static_cast(fd)))){ + S3FS_PRN_ERR("could not find opened pseudo_fd(%llu) for path(%s)", (unsigned long long)(fd), path); + return -EIO; + } + + if(0 > (res = ent->Write(static_cast(fd), (const char*)buf, offset, size))){ + S3FS_PRN_WARN("failed to write file(%s). result=%zd", path, res); + } + + if(max_dirty_data != -1 && ent->BytesModified() >= max_dirty_data && !use_newcache){ + int flushres; + if(0 != (flushres = ent->RowFlush(static_cast(fd), path, AutoLock::NONE, true))){ + S3FS_PRN_ERR("could not upload file(%s): result=%d", path, flushres); + StatCache::getStatCacheData()->DelStat(path); + return flushres; + } + // Punch a hole in the file to recover disk space. + if(!ent->PunchHole()){ + S3FS_PRN_WARN("could not punching HOLEs to a cache file, but continue."); + } + } + if (0 < res) { + //info.fileinfo.write_offset += res; + info.fileinfo.offset += res; + } + return static_cast(res); +} + +off_t posix_s3fs_lseek(int fd, off_t offset, int whence) { + S3FS_PRN_INFO("lseek [pseudo_fd=%llu, offset=%llu, whence=%d]", (unsigned long long)fd, offset, whence); + if (fdtofile.find(fd) == fdtofile.end()) { + S3FS_PRN_ERR("lseekop could not find opened pseudo_fd(=%llu) ", (unsigned long long)(fd)); + return -EIO; + } + auto& info = fdtofile[fd]; + long new_pos = -1; + + FdEntity* ent = nullptr; + //ent = FdManager::get()->GetFdEntity(info.filename.c_str(), fd, false, AutoLock::ALREADY_LOCKED); + ent = FdManager::get()->GetFdEntity(info.filename.c_str(), fd, false, AutoLock::NONE); + if (ent == nullptr) { + S3FS_PRN_ERR("get stat failed in lseek...."); + return -1; + } + struct stat st; + ent->GetStats(st); + + switch (whence) { + case SEEK_SET: + new_pos = offset; + break; + case SEEK_CUR: + new_pos = info.fileinfo.offset + offset; + break; + case SEEK_END: + new_pos = st.st_size + offset; + break; + default: + errno = EINVAL; + return -1; + } + S3FS_PRN_INFO("lseek , filesize[%d], newpos[%d]", st.st_size, new_pos); + + // if (new_pos < 0 || new_pos > file->size) { + if (new_pos < 0) { + errno = EINVAL; + S3FS_PRN_ERR("lseek wrong new_pos, new_pos[%d]", new_pos); + return -1; + } + info.fileinfo.offset = new_pos; + return new_pos; +} + +int posix_s3fs_close(int fd) { + S3FS_PRN_INFO("close [pseudo_fd=%llu]", (unsigned long long)fd); + if (fdtofile.find(fd) == fdtofile.end()) { + S3FS_PRN_ERR("could not find opened pseudo_fd(=%llu) ", (unsigned long long)(fd)); + return -EIO; + } + const auto& info = fdtofile[fd]; + const char* path = info.filename.c_str(); + { // scope for AutoFdEntity + AutoFdEntity autoent; + + // [NOTE] + // The pseudo fd stored in fi->fh is attached to AutoFdEntry so that it can be + // destroyed here. + // + FdEntity* ent; + if(nullptr == (ent = autoent.Attach(path, static_cast(fd)))){ + S3FS_PRN_ERR("could not find pseudo_fd(%llu) for path(%s)", (unsigned long long)(fd), path); + return -EIO; + } + + // [NOTE] + // There are cases when s3fs_flush is not called and s3fs_release is called. + // (There have been reported cases where it is not called when exported as NFS.) + // Therefore, Flush() is called here to try to upload the data. + // Flush() will only perform an upload if the file has been updated. + // + int result; + if(ent->IsModified()){ + if(0 != (result = ent->Flush(static_cast(fd), AutoLock::NONE, false))){ + S3FS_PRN_ERR("failed to upload file contentsfor pseudo_fd(%llu) / path(%s) by result(%d)", (unsigned long long)(fd), path, result); + return result; + } + } + + // [NOTE] + // All opened file's stats is cached with no truncate flag. + // Thus we unset it here. + StatCache::getStatCacheData()->ChangeNoTruncateFlag(path, false); + + // [NOTICE] + // At first, we remove stats cache. + // Because fuse does not wait for response from "release" function. :-( + // And fuse runs next command before this function returns. + // Thus we call deleting stats function ASAP. + // + if((info.fileinfo.flags & O_RDWR) || (info.fileinfo.flags & O_WRONLY)){ + StatCache::getStatCacheData()->DelStat(path); + } + + bool is_new_file = ent->IsDirtyNewFile(); + + if(0 != (result = ent->UploadPending(static_cast(fd), AutoLock::NONE))){ + S3FS_PRN_ERR("could not upload pending data(meta, etc) for pseudo_fd(%llu) / path(%s)", (unsigned long long)(fd), path); + return result; + } + + if(is_new_file){ + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(path))){ + S3FS_PRN_ERR("succeed to create the file(%s), but could not update timestamp of its parent directory(result=%d).", path, update_result); + } + } + } + + // check - for debug + if(S3fsLog::IsS3fsLogDbg()){ + if(FdManager::HasOpenEntityFd(path)){ + S3FS_PRN_DBG("file(%s) is still opened(another pseudo fd is opend).", path); + } + } + S3FS_MALLOCTRIM(0); + fdtofile.erase(fd); + return 0; +} + +int posix_s3fs_stat(const char* _path, struct stat* stbuf) { + WTF8_ENCODE(path) + int result; + +#if defined(__APPLE__) + S3FS_PRN_INFO("stat [path=%s]", path); +#else + S3FS_PRN_INFO("stat [path=%s]", path); +#endif + + // check parent directory attribute. + if(0 != (result = check_parent_object_access(path, X_OK))){ + return result; + } + if(0 != (result = check_object_access(path, F_OK, stbuf))){ + return result; + } + // If has already opened fd, the st_size should be instead. + // (See: Issue 241) + if(stbuf){ + AutoFdEntity autoent; + const FdEntity* ent; + if(nullptr != (ent = autoent.OpenExistFdEntity(path))){ + struct stat tmpstbuf; + if(ent->GetStats(tmpstbuf)){ + stbuf->st_size = tmpstbuf.st_size; + } + } + if(0 == strcmp(path, "/")){ + stbuf->st_size = 4096; + } + stbuf->st_blksize = 4096; + stbuf->st_blocks = get_blocks(stbuf->st_size); + + S3FS_PRN_DBG("stat [path=%s] uid=%u, gid=%u, mode=%04o", path, (unsigned int)(stbuf->st_uid), (unsigned int)(stbuf->st_gid), stbuf->st_mode); + } + S3FS_MALLOCTRIM(0); + + return result; +} + + +int posix_s3fs_fstat(int fd, struct stat* stbuf) { + // sleep(3); + const char* path = fdtofile[fd].filename.c_str(); + return posix_s3fs_stat(path, stbuf); +} + +int posix_s3fs_mkdir(const char* _path, mode_t mode) +{ + WTF8_ENCODE(path) + int result; + + S3FS_PRN_INFO("mkdir [path=%s][mode=%04o]", path, mode); + + // check parent directory attribute. + if(0 != (result = check_parent_object_access(path, W_OK | X_OK))){ + return result; + } + if(-ENOENT != (result = check_object_access(path, F_OK, nullptr))){ + if(0 == result){ + result = -EEXIST; + } + return result; + } + + std::string xattrvalue; + const char* pxattrvalue; + if(get_parent_meta_xattr_value(path, xattrvalue)){ + pxattrvalue = xattrvalue.c_str(); + }else{ + pxattrvalue = nullptr; + } + + struct timespec now; + s3fs_realtime(now); + result = create_directory_object(path, mode, now, now, now, posixcontext.uid, posixcontext.gid, pxattrvalue); + + StatCache::getStatCacheData()->DelStat(path); + + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(path))){ + S3FS_PRN_ERR("succeed to create the directory(%s), but could not update timestamp of its parent directory(result=%d).", path, update_result); + } + + S3FS_MALLOCTRIM(0); + + return result; +} + +int posix_s3fs_opendir(const char* _path, S3DirStream* dirstream) { + int flags = O_DIRECTORY; + int mode = 0777; + int ret = posix_s3fs_open(_path, flags, mode); + fdtofile[ret].type = FileType::DIR; + fdtofile[ret].dirinfo.fh = ret; + fdtofile[ret].dirinfo.offset = 0; + dirstream->fh = ret; + dirstream->offset = 0; + dirstream->ino = fdtofile[ret].fileinfo.ino; + return ret; +} + +// cppcheck-suppress unmatchedSuppression +// cppcheck-suppress constParameterCallback +static bool multi_head_callback(S3fsCurl* s3fscurl, void* param) +{ + if(!s3fscurl){ + return false; + } + + // Add stat cache + std::string saved_path = s3fscurl->GetSpecialSavedPath(); + if(!StatCache::getStatCacheData()->AddStat(saved_path, *(s3fscurl->GetResponseHeaders()))){ + S3FS_PRN_ERR("failed adding stat cache [path=%s]", saved_path.c_str()); + return false; + } + + // Get stats from stats cache(for converting from meta), and fill + std::string bpath = mybasename(saved_path); + if(use_wtf8){ + bpath = s3fs_wtf8_decode(bpath); + } + if(param){ + SyncFiller* pcbparam = reinterpret_cast(param); + struct stat st; + if(StatCache::getStatCacheData()->GetStat(saved_path, &st)){ + pcbparam->Fill(bpath.c_str(), &st, 0); + }else{ + S3FS_PRN_INFO2("Could not find %s file in stat cache.", saved_path.c_str()); + pcbparam->Fill(bpath.c_str(), nullptr, 0); + } + }else{ + S3FS_PRN_WARN("param(multi_head_callback_param*) is nullptr, then can not call filler."); + } + + return true; +} + + +static bool multi_head_notfound_callback(S3fsCurl* s3fscurl, void* param) +{ + if(!s3fscurl){ + return false; + } + S3FS_PRN_INFO("HEAD returned NotFound(404) for %s object, it maybe only the path exists and the object does not exist.", s3fscurl->GetPath().c_str()); + + if(!param){ + S3FS_PRN_WARN("param(multi_head_notfound_callback_param*) is nullptr, then can not call filler."); + return false; + } + + // set path to not found list + struct multi_head_notfound_callback_param* pcbparam = reinterpret_cast(param); + + AutoLock auto_lock(&(pcbparam->list_lock)); + pcbparam->notfound_list.push_back(s3fscurl->GetBasePath()); + + return true; +} + +static std::unique_ptr multi_head_retry_callback(S3fsCurl* s3fscurl) +{ + if(!s3fscurl){ + return nullptr; + } + size_t ssec_key_pos= s3fscurl->GetLastPreHeadSeecKeyPos(); + int retry_count = s3fscurl->GetMultipartRetryCount(); + + // retry next sse key. + // if end of sse key, set retry master count is up. + ssec_key_pos = (ssec_key_pos == static_cast(-1) ? 0 : ssec_key_pos + 1); + if(0 == S3fsCurl::GetSseKeyCount() || S3fsCurl::GetSseKeyCount() <= ssec_key_pos){ + if(s3fscurl->IsOverMultipartRetryCount()){ + S3FS_PRN_ERR("Over retry count(%d) limit(%s).", s3fscurl->GetMultipartRetryCount(), s3fscurl->GetSpecialSavedPath().c_str()); + return nullptr; + } + ssec_key_pos = -1; + retry_count++; + } + + std::unique_ptr newcurl(new S3fsCurl(s3fscurl->IsUseAhbe())); + std::string path = s3fscurl->GetBasePath(); + std::string base_path = s3fscurl->GetBasePath(); + std::string saved_path = s3fscurl->GetSpecialSavedPath(); + + if(!newcurl->PreHeadRequest(path, base_path, saved_path, ssec_key_pos)){ + S3FS_PRN_ERR("Could not duplicate curl object(%s).", saved_path.c_str()); + return nullptr; + } + newcurl->SetMultipartRetryCount(retry_count); + + return newcurl; +} + + +static int readdir_multi_head(const char* path, const S3ObjList& head, char* data, int offset, int maxread, ssize_t* realbytes, int* realnum) +{ //TODO : for newcache + + S3fsMultiCurl curlmulti(S3fsCurl::GetMaxMultiRequest(), true); // [NOTE] run all requests to completion even if some requests fail. + s3obj_list_t headlist; + int result = 0; + *realnum = 0; + + S3FS_PRN_INFO1("readdir_multi_head [path=%s][list=%zu]", path, headlist.size()); + + // Make base path list. + head.GetNameList(headlist, true, false); // get name with "/". + StatCache::getStatCacheData()->GetNotruncateCache(std::string(path), headlist); // Add notruncate file name from stat cache + + // Initialize S3fsMultiCurl + curlmulti.SetSuccessCallback(multi_head_callback); + curlmulti.SetRetryCallback(multi_head_retry_callback); + + // Success Callback function parameter(SyncFiller object) + // SyncFiller syncfiller(buf, filler); + // curlmulti.SetSuccessCallbackParam(reinterpret_cast(&syncfiller)); + + // Not found Callback function parameter + struct multi_head_notfound_callback_param notfound_param; + if(support_compat_dir){ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + #if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); + #endif + + if(0 != (result = pthread_mutex_init(&(notfound_param.list_lock), &attr))){ + S3FS_PRN_CRIT("failed to init notfound_param.list_lock: %d", result); + abort(); + } + curlmulti.SetNotFoundCallback(multi_head_notfound_callback); + curlmulti.SetNotFoundCallbackParam(reinterpret_cast(¬found_param)); + } + + // Make single head request(with max). + int nowPos = 0; + for(s3obj_list_t::iterator iter = headlist.begin() + offset; headlist.end() != iter; ++iter){ + struct dirent64 * dirent = (struct dirent64*) (data + nowPos); + ssize_t entryLen = sizeof(dirent64); + + + std::string disppath = path + (*iter); + std::string etag = head.GetETag((*iter).c_str()); + struct stat st; + + strncpy(dirent->d_name, disppath.c_str(), sizeof(dirent->d_name)); + dirent->d_name[sizeof(dirent->d_name) - 1] = '\0'; + dirent->d_reclen = entryLen; + // TODO: stat的赋值处理 + dirent->d_ino = 999999; // 暂时将d_ino初始化为999999 + if (head.IsDir(disppath.c_str())) { + dirent->d_type = DT_DIR; + } else { + dirent->d_type = DT_REG; + + } + dirent->d_off = nowPos; + nowPos += dirent->d_reclen; + (*realnum)++; + + // [NOTE] + // If there is a cache hit, file stat is filled by filler at here. + // + if(StatCache::getStatCacheData()->HasStat(disppath, &st, etag.c_str())){ + std::string bpath = mybasename(disppath); + if(use_wtf8){ + bpath = s3fs_wtf8_decode(bpath); + } + + //syncfiller.Fill(bpath.c_str(), &st, 0); + //dirent->d_ino = st.st_ino; + continue; + } + + // First check for directory, start checking "not SSE-C". + // If checking failed, retry to check with "SSE-C" by retry callback func when SSE-C mode. + std::unique_ptr s3fscurl(new S3fsCurl()); + if(!s3fscurl->PreHeadRequest(disppath, disppath, disppath)){ // target path = cache key path.(ex "dir/") + S3FS_PRN_WARN("Could not make curl object for head request(%s).", disppath.c_str()); + continue; + } + + if(!curlmulti.SetS3fsCurlObject(std::move(s3fscurl))){ + S3FS_PRN_WARN("Could not make curl object into multi curl(%s).", disppath.c_str()); + continue; + } + } + *realbytes = nowPos; + headlist.clear(); + + // Multi request + if(0 != (result = curlmulti.Request())){ + // If result is -EIO, it is something error occurred. + // This case includes that the object is encrypting(SSE) and s3fs does not have keys. + // So s3fs set result to 0 in order to continue the process. + if(-EIO == result){ + S3FS_PRN_WARN("error occurred in multi request(errno=%d), but continue...", result); + result = 0; + }else{ + S3FS_PRN_ERR("error occurred in multi request(errno=%d).", result); + return result; + } + } + + // [NOTE] + // Objects that could not be found by HEAD request may exist only + // as a path, so search for objects under that path.(a case of no dir object) + // + if(!support_compat_dir){ + //TODO + //syncfiller.SufficiencyFill(head.common_prefixes); + } + if(support_compat_dir && !notfound_param.notfound_list.empty()){ // [NOTE] not need to lock to access this here. + // dummy header + mode_t dirmask = umask(0); // macos does not have getumask() + umask(dirmask); + + headers_t dummy_header; + dummy_header["Content-Type"] = "application/x-directory"; // directory + dummy_header["x-amz-meta-uid"] = std::to_string(is_s3fs_uid ? s3fs_uid : geteuid()); + dummy_header["x-amz-meta-gid"] = std::to_string(is_s3fs_gid ? s3fs_gid : getegid()); + dummy_header["x-amz-meta-mode"] = std::to_string(S_IFDIR | (~dirmask & (S_IRWXU | S_IRWXG | S_IRWXO))); + dummy_header["x-amz-meta-atime"] = "0"; + dummy_header["x-amz-meta-ctime"] = "0"; + dummy_header["x-amz-meta-mtime"] = "0"; + + for(s3obj_list_t::iterator reiter = notfound_param.notfound_list.begin(); reiter != notfound_param.notfound_list.end(); ++reiter){ + int dir_result; + std::string dirpath = *reiter; + if(-ENOTEMPTY == (dir_result = directory_empty(dirpath.c_str()))){ + // Found objects under the path, so the path is directory. + + // Add stat cache + if(StatCache::getStatCacheData()->AddStat(dirpath, dummy_header, true)){ // set forcedir=true + // Get stats from stats cache(for converting from meta), and fill + std::string base_path = mybasename(dirpath); + if(use_wtf8){ + base_path = s3fs_wtf8_decode(base_path); + } + + struct stat st; + if(StatCache::getStatCacheData()->GetStat(dirpath, &st)){ + // TODO + //syncfiller.Fill(base_path.c_str(), &st, 0); + }else{ + S3FS_PRN_INFO2("Could not find %s directory(no dir object) in stat cache.", dirpath.c_str()); + // TODO + //syncfiller.Fill(base_path.c_str(), nullptr, 0); + } + }else{ + S3FS_PRN_ERR("failed adding stat cache [path=%s], but dontinue...", dirpath.c_str()); + } + }else{ + S3FS_PRN_WARN("%s object does not have any object under it(errno=%d),", reiter->c_str(), dir_result); + } + } + } + + return result; +} + +int posix_s3fs_getdents(S3DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes) { + + //WTF8_ENCODE(path) + const char* path = fdtofile[dirstream->fh].filename.c_str(); + S3ObjList head; + int result; + S3FS_PRN_INFO("getdents [path=%s]", path); + + if(0 != (result = check_object_access(path, R_OK, nullptr))){ + return result; + } + + // get a list of all the objects + if((result = list_bucket(path, head, "/")) != 0){ + S3FS_PRN_ERR("list_bucket returns error(%d).", result); + return result; + } + + if(head.IsEmpty()){ + return 0; + } + + // Send multi head request for stats caching. + std::string strpath = path; + if(strcmp(path, "/") != 0){ + strpath += "/"; + } + int readnum = 0; + if(0 != (result = readdir_multi_head(strpath.c_str(), head, contents, dirstream->offset, maxread, realbytes, &readnum))){ + S3FS_PRN_ERR("readdir_multi_head returns error(%d).", result); + } + dirstream->offset += readnum; + S3FS_PRN_DBG("the dirstream offset: %d, realbytes: %d", dirstream->offset, *realbytes); + S3FS_MALLOCTRIM(0); + + return result; +} + +int posix_s3fs_closedir(S3DirStream* dirstream) { + S3FS_PRN_INFO("closedir [pseudo_fd=%llu]", (unsigned long long)dirstream->fh); + return posix_s3fs_close(dirstream->fh); +} + +int posix_s3fs_unlink(const char* _path) +{ + WTF8_ENCODE(path) + int result; + + S3FS_PRN_INFO(" delete [path=%s]", path); + + if(0 != (result = check_parent_object_access(path, W_OK | X_OK))){ + return result; + } + if(use_newcache){ + result = accessor->Delete(path); + }else{ + S3fsCurl s3fscurl; + result = s3fscurl.DeleteRequest(path); + FdManager::DeleteCacheFile(path); + } + + StatCache::getStatCacheData()->DelStat(path); + StatCache::getStatCacheData()->DelSymlink(path); + + // update parent directory timestamp + int update_result; + if(0 != (update_result = update_mctime_parent_directory(path))){ + S3FS_PRN_ERR("succeed to remove the file(%s), but could not update timestamp of its parent directory(result=%d).", path, update_result); + } + S3FS_MALLOCTRIM(0); + return result; +} + +static void* s3fs_init() +{ + S3FS_PRN_INIT_INFO("init v%s(commit:%s) with %s, credential-library(%s)", VERSION, COMMIT_HASH_VAL, s3fs_crypt_lib_name(), ps3fscred->GetCredFuncVersion(false)); + + // cache(remove cache dirs at first) + if(is_remove_cache && (!CacheFileStat::DeleteCacheFileStatDirectory() || !FdManager::DeleteCacheDirectory())){ + S3FS_PRN_DBG("Could not initialize cache directory."); + } + + // check loading IAM role name + if(!ps3fscred->LoadIAMRoleFromMetaData()){ + S3FS_PRN_CRIT("could not load IAM role name from meta data."); + return nullptr; + } + + // Check Bucket + { + int result; + if(EXIT_SUCCESS != (result = s3fs_check_service())){ + return nullptr; + } + } + + if(!ThreadPoolMan::Initialize(max_thread_count)){ + S3FS_PRN_CRIT("Could not create thread pool(%d)", max_thread_count); + } + + // Signal object + if(!S3fsSignals::Initialize()){ + S3FS_PRN_ERR("Failed to initialize signal object, but continue..."); + } + + return nullptr; +} +static void s3fs_destroy() +{ + S3FS_PRN_INFO("destroy"); + + // Signal object + if(!S3fsSignals::Destroy()){ + S3FS_PRN_WARN("Failed to clean up signal object."); + } + + ThreadPoolMan::Destroy(); + + // cache(remove at last) + if(is_remove_cache && (!CacheFileStat::DeleteCacheFileStatDirectory() || !FdManager::DeleteCacheDirectory())){ + S3FS_PRN_WARN("Could not remove cache directory."); + } +} + +// 初始化配置 +static int init_config(std::string configpath) { + std::cout << "init_config: " << configpath << std::endl; + std::unordered_map config; + std::ifstream file(configpath); + std::string line = ""; + + if (!file.is_open()) { + std::cerr << "Could not open configuration file" << std::endl; + } + + while (std::getline(file, line)) { + // Ignore comments and empty lines + if (line.empty() || line[0] == '#') { + continue; + } + + std::istringstream iss(line); + std::string key, value; + + // Split line into key and value + if (std::getline(iss, key, '=') && std::getline(iss, value)) { + // Remove whitespace from the key and value + key.erase(key.find_last_not_of(" \t\n\r\f\v") + 1); + key.erase(0, key.find_first_not_of(" \t\n\r\f\v")); + value.erase(value.find_last_not_of(" \t\n\r\f\v") + 1); + value.erase(0, value.find_first_not_of(" \t\n\r\f\v")); + + config[key] = value; + } + } + + // log level + if (config.find("log_level") != config.end()) { + std::cout << "set loglevel: " << config["log_level"] << std::endl; + if(config["log_level"] == "debug") { + S3fsLog::SetLogLevel(S3fsLog::LEVEL_DBG); + } else if (config["log_level"] == "info") { + S3fsLog::SetLogLevel(S3fsLog::LEVEL_INFO); + } else if (config["log_level"] == "warning") { + S3fsLog::SetLogLevel(S3fsLog::LEVEL_WARN); + } else if (config["log_level"] == "error") { + S3fsLog::SetLogLevel(S3fsLog::LEVEL_ERR); + } else if (config["log_level"] == "critical") { + S3fsLog::SetLogLevel(S3fsLog::LEVEL_CRIT); + } + } + + // bucket config + if(S3fsCred::GetBucket().empty()) { + int ret = set_bucket(config["bucket"].c_str()); + std::cout << "set_bucket: " << ret << std::endl; + } + + // mountpoint config + // the second NONOPT option is the mountpoint(not utility mode) + if(mountpoint.empty() && utility_incomp_type::NO_UTILITY_MODE == utility_mode){ + // save the mountpoint and do some basic error checking + mountpoint = config["mountpoint"]; + struct stat stbuf; + +// In MSYS2 environment with WinFsp, it is not needed to create the mount point before mounting. +// Also it causes a conflict with WinFsp's validation, so disabling it. +#ifdef __MSYS__ + memset(&stbuf, 0, sizeof stbuf); + set_mountpoint_attribute(stbuf); +#else + if(stat(mountpoint.c_str(), &stbuf) == -1){ + // check stale mountpoint + if(errno == ENOTCONN){ + print_umount_message(mountpoint, true); + } else { + S3FS_PRN_EXIT("unable to access MOUNTPOINT %s: %s", mountpoint.c_str(), strerror(errno)); + } + return -1; + } + if(!(S_ISDIR(stbuf.st_mode))){ + S3FS_PRN_EXIT("MOUNTPOINT: %s is not a directory.", mountpoint.c_str()); + return -1; + } + if(!set_mountpoint_attribute(stbuf)){ + S3FS_PRN_EXIT("MOUNTPOINT: %s permission denied.", mountpoint.c_str()); + return -1; + } + + if(!nonempty){ + const struct dirent *ent; + DIR *dp = opendir(mountpoint.c_str()); + if(dp == nullptr){ + S3FS_PRN_EXIT("failed to open MOUNTPOINT: %s: %s", mountpoint.c_str(), strerror(errno)); + return -1; + } + while((ent = readdir(dp)) != nullptr){ + if(strcmp(ent->d_name, ".") != 0 && strcmp(ent->d_name, "..") != 0){ + closedir(dp); + S3FS_PRN_EXIT("MOUNTPOINT directory %s is not empty. if you are sure this is safe, can use the 'nonempty' mount option.", mountpoint.c_str()); + return -1; + } + } + closedir(dp); + } +#endif + } + + // passwd_file + std::string passwd_filename = config["passwd_file"]; + passwd_filename = "passwd_file=" + passwd_filename; + int ret = ps3fscred->DetectParam(passwd_filename.c_str()); + if (0 > ret) { + std::cerr << "Failed to parse passwd_file=" << passwd_filename << ": " << strerror(-ret); + return -1; + } + + // url + s3host = config["url"]; + // strip the trailing '/', if any, off the end of the host + // std::string + size_t found, length; + found = s3host.find_last_of('/'); + length = s3host.length(); + while(found == (length - 1) && length > 0){ + s3host.erase(found); + found = s3host.find_last_of('/'); + length = s3host.length(); + } + // Check url for http / https protocol std::string + if(!is_prefix(s3host.c_str(), "https://") && !is_prefix(s3host.c_str(), "http://")){ + S3FS_PRN_EXIT("option url has invalid format, missing http / https protocol"); + return -1; + } + + if (config.find("use_path_request_style") != config.end()) { + pathrequeststyle = true; + std::cout << "use path reqeust style" << std::endl; + } else { + std::cout << "use virtual host style" << std::endl; + } + + // newcache + if(config.find("newcache_conf") != config.end()) { + newcache_conf = config["newcache_conf"]; + if (!newcache_conf.empty()) { + use_newcache = true; + } + } + + return 0; +} + +S3fsLog singletonLog; +void s3fs_global_init() { +//static __attribute__((constructor)) void Init(void) { + static bool is_called = false; + if (is_called) { + std::cout << "global init has called"; + return; + } + int ch; + int option_index = 0; + time_t incomp_abort_time = (24 * 60 * 60); + + + S3fsLog::SetLogLevel(S3fsLog::LEVEL_DBG); + S3fsLog::SetLogfile("./log/posix_s3fs.log"); + //S3fsLog::debug_level = S3fsLog::LEVEL_DBG; + std::string configpath = "./conf/posix_s3fs.conf"; + + posixcontext.uid = geteuid(); + posixcontext.gid = getegid(); + S3FS_PRN_INFO("set the uid:%d , gid:%d", posixcontext.uid, posixcontext.gid); + + + // init bucket_block_size +#if defined(__MSYS__) + bucket_block_count = static_cast(INT32_MAX); +#elif defined(__APPLE__) + bucket_block_count = static_cast(INT32_MAX); +#else + bucket_block_count = ~0U; +#endif + + // init xml2 + xmlInitParser(); + LIBXML_TEST_VERSION + + init_sysconf_vars(); + + // get program name - emulate basename + program_name = "posixs3fs"; + + // set credential object + // + ps3fscred.reset(new S3fsCred()); + if(!S3fsCurl::InitCredentialObject(ps3fscred.get())){ + S3FS_PRN_EXIT("Failed to setup credential object to s3fs curl."); + exit(EXIT_FAILURE); + } + + // Load SSE environment + if(!S3fsCurl::LoadEnvSse()){ + S3FS_PRN_EXIT("something wrong about SSE environment."); + exit(EXIT_FAILURE); + } + + // ssl init + if(!s3fs_init_global_ssl()){ + S3FS_PRN_EXIT("could not initialize for ssl libraries."); + exit(EXIT_FAILURE); + } + + // mutex for xml + if(!init_parser_xml_lock()){ + S3FS_PRN_EXIT("could not initialize mutex for xml parser."); + s3fs_destroy_global_ssl(); + exit(EXIT_FAILURE); + } + + // mutex for basename/dirname + if(!init_basename_lock()){ + S3FS_PRN_EXIT("could not initialize mutex for basename/dirname."); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + exit(EXIT_FAILURE); + } + + // init curl (without mime types) + // + // [NOTE] + // The curl initialization here does not load mime types. + // The mime types file parameter are dynamic values according + // to the user's environment, and are analyzed by the my_fuse_opt_proc + // function. + // The my_fuse_opt_proc function is executed after this curl + // initialization. Because the curl method is used in the + // my_fuse_opt_proc function, then it must be called here to + // initialize. Fortunately, the processing using mime types + // is only PUT/POST processing, and it is not used until the + // call of my_fuse_opt_proc function is completed. Therefore, + // the mime type is loaded just after calling the my_fuse_opt_proc + // function. + // + if(!S3fsCurl::InitS3fsCurl()){ + S3FS_PRN_EXIT("Could not initiate curl library."); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + if(0 != init_config(configpath)){ + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + // init newcache + if(use_newcache){ + HybridCache::HybridCacheConfig cfg; + HybridCache::GetHybridCacheConfig(newcache_conf, cfg); + accessor = std::make_shared(cfg); + } + + // init mime types for curl + if(!S3fsCurl::InitMimeType(mimetype_file)){ + S3FS_PRN_WARN("Missing MIME types prevents setting Content-Type on uploaded objects."); + } + + // [NOTE] + // exclusive option check here. + // + if(strcasecmp(S3fsCurl::GetStorageClass().c_str(), "REDUCED_REDUNDANCY") == 0 && !S3fsCurl::IsSseDisable()){ + S3FS_PRN_EXIT("use_sse option could not be specified with storage class reduced_redundancy."); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + if(!S3fsCurl::FinalCheckSse()){ + S3FS_PRN_EXIT("something wrong about SSE options."); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + if(S3fsCurl::GetSignatureType() == signature_type_t::V2_ONLY && S3fsCurl::GetUnsignedPayload()){ + S3FS_PRN_WARN("Ignoring enable_unsigned_payload with sigv2"); + } + + if(!FdEntity::GetNoMixMultipart() && max_dirty_data != -1){ + S3FS_PRN_WARN("Setting max_dirty_data to -1 when nomixupload is enabled"); + max_dirty_data = -1; + } + + // + // Check the combination of parameters for credential + // + if(!ps3fscred->CheckAllParams()){ + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + // The second plain argument is the mountpoint + // if the option was given, we all ready checked for a + // readable, non-empty directory, this checks determines + // if the mountpoint option was ever supplied + if(utility_incomp_type::NO_UTILITY_MODE == utility_mode){ + if(mountpoint.empty()){ + S3FS_PRN_EXIT("missing MOUNTPOINT argument."); + show_usage(); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + } + + // check tmp dir permission + if(!FdManager::CheckTmpDirExist()){ + S3FS_PRN_EXIT("temporary directory doesn't exists."); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + // check cache dir permission + if(!FdManager::CheckCacheDirExist() || !FdManager::CheckCacheTopDir() || !CacheFileStat::CheckCacheFileStatTopDir()){ + S3FS_PRN_EXIT("could not allow cache directory permission, check permission of cache directories."); + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + + // set fake free disk space + if(-1 != fake_diskfree_size){ + FdManager::InitFakeUsedDiskSize(fake_diskfree_size); + } + + // Set default value of free_space_ratio to 10% + if(FdManager::GetEnsureFreeDiskSpace()==0){ + //int ratio = 10; + int ratio = 5; + + off_t dfsize = FdManager::GetTotalDiskSpaceByRatio(ratio); + S3FS_PRN_INFO("Free space ratio default to %d %%, ensure the available disk space is greater than %.3f MB", ratio, static_cast(dfsize) / 1024 / 1024); + + if(dfsize < S3fsCurl::GetMultipartSize()){ + S3FS_PRN_WARN("specified size to ensure disk free space is smaller than multipart size, so set multipart size to it."); + dfsize = S3fsCurl::GetMultipartSize(); + } + FdManager::SetEnsureFreeDiskSpace(dfsize); + } + + // set user agent + S3fsCurl::InitUserAgent(); + + if(utility_incomp_type::NO_UTILITY_MODE != utility_mode){ + int exitcode = s3fs_utility_processing(incomp_abort_time); + + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(exitcode); + } + + // Check multipart / copy api for mix multipart uploading + if(nomultipart || nocopyapi || norenameapi){ + FdEntity::SetNoMixMultipart(); + max_dirty_data = -1; + } + + // check free disk space + if(!FdManager::IsSafeDiskSpace(nullptr, S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){ + // clean cache dir and retry + S3FS_PRN_WARN("No enough disk space for s3fs, try to clean cache dir"); + FdManager::get()->CleanupCacheDir(); + + if(!FdManager::IsSafeDiskSpaceWithLog(nullptr, S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){ + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } + } + + // set mp stat flag object + + pHasMpStat = new MpStatFlag(); + s3fs_init(); + is_called = true; + std::cout << "finish s3fs global init" << std::endl; +} + +void s3fs_global_uninit() { +} + +static __attribute__((destructor)) void Clean(void) { + // Destroy curl + s3fs_destroy(); + + if(!S3fsCurl::DestroyS3fsCurl()){ + S3FS_PRN_WARN("Could not release curl library."); + } + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + delete pHasMpStat; + + // cleanup xml2 + xmlCleanupParser(); + S3FS_MALLOCTRIM(0); + + if(use_newcache){ + accessor.reset(); + } +} \ No newline at end of file diff --git a/s3fs/s3fs_lib.h b/s3fs/s3fs_lib.h new file mode 100644 index 0000000..93d777e --- /dev/null +++ b/s3fs/s3fs_lib.h @@ -0,0 +1,69 @@ +#ifndef S3FS_S3FS_LIB_H_ +#define S3FS_S3FS_LIB_H_ + + +#include +#include + + + +#ifdef S3FS_MALLOC_TRIM +#ifdef HAVE_MALLOC_TRIM +#include +#define S3FS_MALLOCTRIM(pad) malloc_trim(pad) +#else // HAVE_MALLOC_TRIM +#define S3FS_MALLOCTRIM(pad) +#endif // HAVE_MALLOC_TRIM +#else // S3FS_MALLOC_TRIM +#define S3FS_MALLOCTRIM(pad) +#endif // S3FS_MALLOC_TRIM + + +//------------------------------------------------------------------- +// posix interface functions +//------------------------------------------------------------------- +#ifdef __cplusplus +extern "C" { +#endif + +struct S3DirStream; + +void s3fs_global_init(); + +void s3fs_global_uninit(); + +int posix_s3fs_create(const char* _path, int flags, mode_t mode); + +int posix_s3fs_open(const char* _path, int flags, mode_t mode); + +int posix_s3fs_multiread(int fd, void* buf, size_t size, off_t file_offset); + +int posix_s3fs_read(int fd, void* buf, size_t size); + +int posix_s3fs_multiwrite(int fd, const void* buf, size_t size, off_t file_offset); + +int posix_s3fs_write(int fd, const void* buf, size_t size); + +off_t posix_s3fs_lseek(int fd, off_t offset, int whence); + +int posix_s3fs_close(int fd); + +int posix_s3fs_stat(const char* _path, struct stat* stbuf); + +int posix_s3fs_fstat(int fd, struct stat* stbuf) ; + +int posix_s3fs_mkdir(const char* _path, mode_t mode); + +int posix_s3fs_opendir(const char* _path, S3DirStream* dirstream); + +int posix_s3fs_getdents(S3DirStream* dirstream, char* contents, size_t maxread, ssize_t* realbytes); + +int posix_s3fs_closedir(S3DirStream* dirstream); + +int posix_s3fs_unlink(const char* _path); + +#ifdef __cplusplus +} +#endif + +#endif // S3FS_S3FS_LIB_H_ diff --git a/s3fs/s3fs_logger.cpp b/s3fs/s3fs_logger.cpp new file mode 100644 index 0000000..18e170e --- /dev/null +++ b/s3fs/s3fs_logger.cpp @@ -0,0 +1,306 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include + +#include "common.h" +#include "s3fs_logger.h" + +//------------------------------------------------------------------- +// S3fsLog class : variables +//------------------------------------------------------------------- +constexpr char S3fsLog::LOGFILEENV[]; +constexpr const char* S3fsLog::nest_spaces[]; +constexpr char S3fsLog::MSGTIMESTAMP[]; +S3fsLog* S3fsLog::pSingleton = nullptr; +S3fsLog::s3fs_log_level S3fsLog::debug_level = S3fsLog::LEVEL_CRIT; +FILE* S3fsLog::logfp = nullptr; +std::string S3fsLog::logfile; +bool S3fsLog::time_stamp = true; + +//------------------------------------------------------------------- +// S3fsLog class : class methods +//------------------------------------------------------------------- +bool S3fsLog::IsS3fsLogLevel(s3fs_log_level level) +{ + return (level == (S3fsLog::debug_level & level)); +} + +std::string S3fsLog::GetCurrentTime() +{ + std::ostringstream current_time; + if(time_stamp){ + struct timeval now; + struct timespec tsnow; + struct tm res; + char tmp[32]; + if(-1 == clock_gettime(S3FS_CLOCK_MONOTONIC, &tsnow)){ + now.tv_sec = tsnow.tv_sec; + now.tv_usec = (tsnow.tv_nsec / 1000); + }else{ + gettimeofday(&now, nullptr); + } + strftime(tmp, sizeof(tmp), "%Y-%m-%dT%H:%M:%S", gmtime_r(&now.tv_sec, &res)); + current_time << tmp << "." << std::setfill('0') << std::setw(3) << (now.tv_usec / 1000) << "Z "; + } + return current_time.str(); +} + +bool S3fsLog::SetLogfile(const char* pfile) +{ + if(!S3fsLog::pSingleton){ + S3FS_PRN_CRIT("S3fsLog::pSingleton is nullptr."); + return false; + } + return S3fsLog::pSingleton->LowSetLogfile(pfile); +} + +bool S3fsLog::ReopenLogfile() +{ + if(!S3fsLog::pSingleton){ + S3FS_PRN_CRIT("S3fsLog::pSingleton is nullptr."); + return false; + } + if(!S3fsLog::logfp){ + S3FS_PRN_INFO("Currently the log file is output to stdout/stderr."); + return true; + } + if(!S3fsLog::logfile.empty()){ + S3FS_PRN_ERR("There is a problem with the path to the log file being empty."); + return false; + } + std::string tmp = S3fsLog::logfile; + return S3fsLog::pSingleton->LowSetLogfile(tmp.c_str()); +} + +S3fsLog::s3fs_log_level S3fsLog::SetLogLevel(s3fs_log_level level) +{ + if(!S3fsLog::pSingleton){ + S3FS_PRN_CRIT("S3fsLog::pSingleton is nullptr."); + return S3fsLog::debug_level; // Although it is an error, it returns the current value. + } + return S3fsLog::pSingleton->LowSetLogLevel(level); +} + +S3fsLog::s3fs_log_level S3fsLog::BumpupLogLevel() +{ + if(!S3fsLog::pSingleton){ + S3FS_PRN_CRIT("S3fsLog::pSingleton is nullptr."); + return S3fsLog::debug_level; // Although it is an error, it returns the current value. + } + return S3fsLog::pSingleton->LowBumpupLogLevel(); +} + +bool S3fsLog::SetTimeStamp(bool value) +{ + bool old = S3fsLog::time_stamp; + S3fsLog::time_stamp = value; + return old; +} + +//------------------------------------------------------------------- +// S3fsLog class : methods +//------------------------------------------------------------------- +S3fsLog::S3fsLog() +{ + if(!S3fsLog::pSingleton){ + S3fsLog::pSingleton = this; + + // init syslog(default CRIT) + openlog("s3fs", LOG_PID | LOG_ODELAY | LOG_NOWAIT, LOG_USER); + LowLoadEnv(); + }else{ + S3FS_PRN_ERR("Already set singleton object for S3fsLog."); + } +} + +S3fsLog::~S3fsLog() +{ + if(S3fsLog::pSingleton == this){ + FILE* oldfp = S3fsLog::logfp; + S3fsLog::logfp = nullptr; + if(oldfp && 0 != fclose(oldfp)){ + S3FS_PRN_ERR("Could not close old log file(%s), but continue...", (S3fsLog::logfile.empty() ? S3fsLog::logfile.c_str() : "null")); + } + S3fsLog::logfile.clear(); + S3fsLog::pSingleton = nullptr; + S3fsLog::debug_level = S3fsLog::LEVEL_CRIT; + + closelog(); + }else{ + S3FS_PRN_ERR("This object is not singleton S3fsLog object."); + } +} + +bool S3fsLog::LowLoadEnv() +{ + if(S3fsLog::pSingleton != this){ + S3FS_PRN_ERR("This object is not as same as S3fsLog::pSingleton."); + return false; + } + char* pEnvVal; + if(nullptr != (pEnvVal = getenv(S3fsLog::LOGFILEENV))){ + if(!SetLogfile(pEnvVal)){ + return false; + } + } + if(nullptr != (pEnvVal = getenv(S3fsLog::MSGTIMESTAMP))){ + if(0 == strcasecmp(pEnvVal, "true") || 0 == strcasecmp(pEnvVal, "yes") || 0 == strcasecmp(pEnvVal, "1")){ + S3fsLog::time_stamp = true; + }else if(0 == strcasecmp(pEnvVal, "false") || 0 == strcasecmp(pEnvVal, "no") || 0 == strcasecmp(pEnvVal, "0")){ + S3fsLog::time_stamp = false; + }else{ + S3FS_PRN_WARN("Unknown %s environment value(%s) is specified, skip to set time stamp mode.", S3fsLog::MSGTIMESTAMP, pEnvVal); + } + } + return true; +} + +bool S3fsLog::LowSetLogfile(const char* pfile) +{ + if(S3fsLog::pSingleton != this){ + S3FS_PRN_ERR("This object is not as same as S3fsLog::pSingleton."); + return false; + } + + if(!pfile){ + // close log file if it is opened + if(S3fsLog::logfp && 0 != fclose(S3fsLog::logfp)){ + S3FS_PRN_ERR("Could not close log file(%s).", (S3fsLog::logfile.empty() ? S3fsLog::logfile.c_str() : "null")); + return false; + } + S3fsLog::logfp = nullptr; + S3fsLog::logfile.clear(); + }else{ + // open new log file + // + // [NOTE] + // It will reopen even if it is the same file. + // + FILE* newfp; + if(nullptr == (newfp = fopen(pfile, "a+"))){ + S3FS_PRN_ERR("Could not open log file(%s).", pfile); + return false; + } + + // switch new log file and close old log file if it is opened + FILE* oldfp = S3fsLog::logfp; + if(oldfp && 0 != fclose(oldfp)){ + S3FS_PRN_ERR("Could not close old log file(%s).", (!S3fsLog::logfile.empty() ? S3fsLog::logfile.c_str() : "null")); + fclose(newfp); + return false; + } + S3fsLog::logfp = newfp; + S3fsLog::logfile = pfile; + } + return true; +} + +S3fsLog::s3fs_log_level S3fsLog::LowSetLogLevel(s3fs_log_level level) +{ + if(S3fsLog::pSingleton != this){ + S3FS_PRN_ERR("This object is not as same as S3fsLog::pSingleton."); + return S3fsLog::debug_level; // Although it is an error, it returns the current value. + } + if(level == S3fsLog::debug_level){ + return S3fsLog::debug_level; + } + s3fs_log_level old = S3fsLog::debug_level; + S3fsLog::debug_level = level; + setlogmask(LOG_UPTO(GetSyslogLevel(S3fsLog::debug_level))); + S3FS_PRN_CRIT("change debug level from %sto %s", GetLevelString(old), GetLevelString(S3fsLog::debug_level)); + return old; +} + +S3fsLog::s3fs_log_level S3fsLog::LowBumpupLogLevel() +{ + if(S3fsLog::pSingleton != this){ + S3FS_PRN_ERR("This object is not as same as S3fsLog::pSingleton."); + return S3fsLog::debug_level; // Although it is an error, it returns the current value. + } + s3fs_log_level old = S3fsLog::debug_level; + S3fsLog::debug_level = ( LEVEL_CRIT == S3fsLog::debug_level ? LEVEL_ERR : + LEVEL_ERR == S3fsLog::debug_level ? LEVEL_WARN : + LEVEL_WARN == S3fsLog::debug_level ? LEVEL_INFO : + LEVEL_INFO == S3fsLog::debug_level ? LEVEL_DBG : LEVEL_CRIT ); + setlogmask(LOG_UPTO(GetSyslogLevel(S3fsLog::debug_level))); + S3FS_PRN_CRIT("change debug level from %sto %s", GetLevelString(old), GetLevelString(S3fsLog::debug_level)); + return old; +} + +void s3fs_low_logprn(S3fsLog::s3fs_log_level level, const char* file, const char *func, int line, const char *fmt, ...) +{ + if(S3fsLog::IsS3fsLogLevel(level)){ + va_list va; + va_start(va, fmt); + size_t len = vsnprintf(nullptr, 0, fmt, va) + 1; + va_end(va); + + std::unique_ptr message(new char[len]); + va_start(va, fmt); + vsnprintf(message.get(), len, fmt, va); + va_end(va); + + if(foreground || S3fsLog::IsSetLogFile()){ + S3fsLog::SeekEnd(); + fprintf(S3fsLog::GetOutputLogFile(), "%s%s%s:%s(%d): %s\n", S3fsLog::GetCurrentTime().c_str(), S3fsLog::GetLevelString(level), file, func, line, message.get()); + S3fsLog::Flush(); + }else{ + // TODO: why does this differ from s3fs_low_logprn2? + syslog(S3fsLog::GetSyslogLevel(level), "%s%s:%s(%d): %s", instance_name.c_str(), file, func, line, message.get()); + } + } +} + +void s3fs_low_logprn2(S3fsLog::s3fs_log_level level, int nest, const char* file, const char *func, int line, const char *fmt, ...) +{ + if(S3fsLog::IsS3fsLogLevel(level)){ + va_list va; + va_start(va, fmt); + size_t len = vsnprintf(nullptr, 0, fmt, va) + 1; + va_end(va); + + std::unique_ptr message(new char[len]); + va_start(va, fmt); + vsnprintf(message.get(), len, fmt, va); + va_end(va); + + if(foreground || S3fsLog::IsSetLogFile()){ + S3fsLog::SeekEnd(); + fprintf(S3fsLog::GetOutputLogFile(), "%s%s%s%s:%s(%d): %s\n", S3fsLog::GetCurrentTime().c_str(), S3fsLog::GetLevelString(level), S3fsLog::GetS3fsLogNest(nest), file, func, line, message.get()); + S3fsLog::Flush(); + }else{ + syslog(S3fsLog::GetSyslogLevel(level), "%s%s%s", instance_name.c_str(), S3fsLog::GetS3fsLogNest(nest), message.get()); + } + } +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_logger.h b/s3fs/s3fs_logger.h new file mode 100644 index 0000000..94d9d0c --- /dev/null +++ b/s3fs/s3fs_logger.h @@ -0,0 +1,270 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_LOGGER_H_ +#define S3FS_LOGGER_H_ + +#include +#include +#include +#include +#include + +#include "common.h" + +#ifdef CLOCK_MONOTONIC_COARSE +#define S3FS_CLOCK_MONOTONIC CLOCK_MONOTONIC_COARSE +#else +// case of OSX +#define S3FS_CLOCK_MONOTONIC CLOCK_MONOTONIC +#endif + +//------------------------------------------------------------------- +// S3fsLog class +//------------------------------------------------------------------- +class S3fsLog +{ + public: + enum s3fs_log_level{ + LEVEL_CRIT = 0, // LEVEL_CRIT + LEVEL_ERR = 1, // LEVEL_ERR + LEVEL_WARN = 3, // LEVEL_WARNING + LEVEL_INFO = 7, // LEVEL_INFO + LEVEL_DBG = 15 // LEVEL_DEBUG + }; + + protected: + static constexpr int NEST_MAX = 4; + static constexpr const char* nest_spaces[NEST_MAX] = {"", " ", " ", " "}; + static constexpr char LOGFILEENV[] = "S3FS_LOGFILE"; + static constexpr char MSGTIMESTAMP[] = "S3FS_MSGTIMESTAMP"; + + static S3fsLog* pSingleton; + static s3fs_log_level debug_level; + static FILE* logfp; + static std::string logfile; + static bool time_stamp; + + protected: + bool LowLoadEnv(); + bool LowSetLogfile(const char* pfile); + s3fs_log_level LowSetLogLevel(s3fs_log_level level); + s3fs_log_level LowBumpupLogLevel(); + + public: + static bool IsS3fsLogLevel(s3fs_log_level level); + static bool IsS3fsLogCrit() { return IsS3fsLogLevel(LEVEL_CRIT); } + static bool IsS3fsLogErr() { return IsS3fsLogLevel(LEVEL_ERR); } + static bool IsS3fsLogWarn() { return IsS3fsLogLevel(LEVEL_WARN); } + static bool IsS3fsLogInfo() { return IsS3fsLogLevel(LEVEL_INFO); } + static bool IsS3fsLogDbg() { return IsS3fsLogLevel(LEVEL_DBG); } + + static constexpr int GetSyslogLevel(s3fs_log_level level) + { + return ( LEVEL_DBG == (level & LEVEL_DBG) ? LOG_DEBUG : + LEVEL_INFO == (level & LEVEL_DBG) ? LOG_INFO : + LEVEL_WARN == (level & LEVEL_DBG) ? LOG_WARNING : + LEVEL_ERR == (level & LEVEL_DBG) ? LOG_ERR : LOG_CRIT ); + } + + static std::string GetCurrentTime(); + + static constexpr const char* GetLevelString(s3fs_log_level level) + { + return ( LEVEL_DBG == (level & LEVEL_DBG) ? "[DBG] " : + LEVEL_INFO == (level & LEVEL_DBG) ? "[INF] " : + LEVEL_WARN == (level & LEVEL_DBG) ? "[WAN] " : + LEVEL_ERR == (level & LEVEL_DBG) ? "[ERR] " : "[CRT] " ); + } + + static constexpr const char* GetS3fsLogNest(int nest) + { + return nest_spaces[nest < NEST_MAX ? nest : NEST_MAX - 1]; + } + + static bool IsSetLogFile() + { + return (nullptr != logfp); + } + + static FILE* GetOutputLogFile() + { + return (logfp ? logfp : stdout); + } + + static FILE* GetErrorLogFile() + { + return (logfp ? logfp : stderr); + } + + static void SeekEnd() + { + if(logfp){ + fseek(logfp, 0, SEEK_END); + } + } + + static void Flush() + { + if(logfp){ + fflush(logfp); + } + } + + static bool SetLogfile(const char* pfile); + static bool ReopenLogfile(); + static s3fs_log_level SetLogLevel(s3fs_log_level level); + static s3fs_log_level BumpupLogLevel(); + static bool SetTimeStamp(bool value); + + explicit S3fsLog(); + ~S3fsLog(); + S3fsLog(const S3fsLog&) = delete; + S3fsLog(S3fsLog&&) = delete; + S3fsLog& operator=(const S3fsLog&) = delete; + S3fsLog& operator=(S3fsLog&&) = delete; +}; + +//------------------------------------------------------------------- +// Debug macros +//------------------------------------------------------------------- +void s3fs_low_logprn(S3fsLog::s3fs_log_level level, const char* file, const char *func, int line, const char *fmt, ...) __attribute__ ((format (printf, 5, 6))); +#define S3FS_LOW_LOGPRN(level, fmt, ...) \ + do{ \ + s3fs_low_logprn(level, __FILE__, __func__, __LINE__, fmt, ##__VA_ARGS__); \ + }while(0) + +void s3fs_low_logprn2(S3fsLog::s3fs_log_level level, int nest, const char* file, const char *func, int line, const char *fmt, ...) __attribute__ ((format (printf, 6, 7))); +#define S3FS_LOW_LOGPRN2(level, nest, fmt, ...) \ + do{ \ + s3fs_low_logprn2(level, nest, __FILE__, __func__, __LINE__, fmt, ##__VA_ARGS__); \ + }while(0) + +#define S3FS_LOW_CURLDBG(fmt, ...) \ + do{ \ + if(foreground || S3fsLog::IsSetLogFile()){ \ + S3fsLog::SeekEnd(); \ + fprintf(S3fsLog::GetOutputLogFile(), "%s[CURL DBG] " fmt "%s\n", S3fsLog::GetCurrentTime().c_str(), __VA_ARGS__); \ + S3fsLog::Flush(); \ + }else{ \ + syslog(S3fsLog::GetSyslogLevel(S3fsLog::LEVEL_CRIT), "%s" fmt "%s", instance_name.c_str(), __VA_ARGS__); \ + } \ + }while(0) + +#define S3FS_LOW_LOGPRN_EXIT(fmt, ...) \ + do{ \ + if(foreground || S3fsLog::IsSetLogFile()){ \ + S3fsLog::SeekEnd(); \ + fprintf(S3fsLog::GetErrorLogFile(), "s3fs: " fmt "%s\n", __VA_ARGS__); \ + S3fsLog::Flush(); \ + }else{ \ + fprintf(S3fsLog::GetErrorLogFile(), "s3fs: " fmt "%s\n", __VA_ARGS__); \ + syslog(S3fsLog::GetSyslogLevel(S3fsLog::LEVEL_CRIT), "%ss3fs: " fmt "%s", instance_name.c_str(), __VA_ARGS__); \ + } \ + }while(0) + +// Special macro for init message +#define S3FS_PRN_INIT_INFO(fmt, ...) \ + do{ \ + if(foreground || S3fsLog::IsSetLogFile()){ \ + S3fsLog::SeekEnd(); \ + fprintf(S3fsLog::GetOutputLogFile(), "%s%s%s%s:%s(%d): " fmt "%s\n", S3fsLog::GetCurrentTime().c_str(), S3fsLog::GetLevelString(S3fsLog::LEVEL_INFO), S3fsLog::GetS3fsLogNest(0), __FILE__, __func__, __LINE__, __VA_ARGS__, ""); \ + S3fsLog::Flush(); \ + }else{ \ + syslog(S3fsLog::GetSyslogLevel(S3fsLog::LEVEL_INFO), "%s%s" fmt "%s", instance_name.c_str(), S3fsLog::GetS3fsLogNest(0), __VA_ARGS__, ""); \ + } \ + }while(0) + +#define S3FS_PRN_LAUNCH_INFO(fmt, ...) \ + do{ \ + if(foreground || S3fsLog::IsSetLogFile()){ \ + S3fsLog::SeekEnd(); \ + fprintf(S3fsLog::GetOutputLogFile(), "%s%s" fmt "%s\n", S3fsLog::GetCurrentTime().c_str(), S3fsLog::GetLevelString(S3fsLog::LEVEL_INFO), __VA_ARGS__, ""); \ + S3fsLog::Flush(); \ + }else{ \ + syslog(S3fsLog::GetSyslogLevel(S3fsLog::LEVEL_INFO), "%s" fmt "%s", instance_name.c_str(), __VA_ARGS__, ""); \ + } \ + }while(0) + +// Special macro for checking cache files +#define S3FS_LOW_CACHE(fp, fmt, ...) \ + do{ \ + if(foreground || S3fsLog::IsSetLogFile()){ \ + S3fsLog::SeekEnd(); \ + fprintf(fp, fmt "%s\n", __VA_ARGS__); \ + S3fsLog::Flush(); \ + }else{ \ + syslog(S3fsLog::GetSyslogLevel(S3fsLog::LEVEL_INFO), "%s: " fmt "%s", instance_name.c_str(), __VA_ARGS__); \ + } \ + }while(0) + +// [NOTE] +// small trick for VA_ARGS +// +#define S3FS_PRN_EXIT(fmt, ...) S3FS_LOW_LOGPRN_EXIT(fmt, ##__VA_ARGS__, "") +#define S3FS_PRN_CRIT(fmt, ...) S3FS_LOW_LOGPRN(S3fsLog::LEVEL_CRIT, fmt, ##__VA_ARGS__) +#define S3FS_PRN_ERR(fmt, ...) S3FS_LOW_LOGPRN(S3fsLog::LEVEL_ERR, fmt, ##__VA_ARGS__) +#define S3FS_PRN_WARN(fmt, ...) S3FS_LOW_LOGPRN(S3fsLog::LEVEL_WARN, fmt, ##__VA_ARGS__) +#define S3FS_PRN_DBG(fmt, ...) S3FS_LOW_LOGPRN(S3fsLog::LEVEL_DBG, fmt, ##__VA_ARGS__) +#define S3FS_PRN_INFO(fmt, ...) S3FS_LOW_LOGPRN2(S3fsLog::LEVEL_INFO, 0, fmt, ##__VA_ARGS__) +#define S3FS_PRN_INFO1(fmt, ...) S3FS_LOW_LOGPRN2(S3fsLog::LEVEL_INFO, 1, fmt, ##__VA_ARGS__) +#define S3FS_PRN_INFO2(fmt, ...) S3FS_LOW_LOGPRN2(S3fsLog::LEVEL_INFO, 2, fmt, ##__VA_ARGS__) +#define S3FS_PRN_INFO3(fmt, ...) S3FS_LOW_LOGPRN2(S3fsLog::LEVEL_INFO, 3, fmt, ##__VA_ARGS__) +#define S3FS_PRN_CURL(fmt, ...) S3FS_LOW_CURLDBG(fmt, ##__VA_ARGS__, "") +#define S3FS_PRN_CACHE(fp, ...) S3FS_LOW_CACHE(fp, ##__VA_ARGS__, "") + +// Macros to print log with fuse context +#define PRINT_FUSE_CTX(level, indent, fmt, ...) do { \ + if(S3fsLog::IsS3fsLogLevel(level)){ \ + struct fuse_context *ctx = fuse_get_context(); \ + if(ctx == NULL){ \ + S3FS_LOW_LOGPRN2(level, indent, fmt, ##__VA_ARGS__); \ + }else{ \ + S3FS_LOW_LOGPRN2(level, indent, fmt"[pid=%u,uid=%u,gid=%u]",\ + ##__VA_ARGS__, \ + (unsigned int)(ctx->pid), \ + (unsigned int)(ctx->uid), \ + (unsigned int)(ctx->gid)); \ + } \ + } \ +} while (0) + +#define FUSE_CTX_INFO(fmt, ...) do { \ + PRINT_FUSE_CTX(S3fsLog::LEVEL_INFO, 0, fmt, ##__VA_ARGS__); \ +} while (0) + +#define FUSE_CTX_INFO1(fmt, ...) do { \ + PRINT_FUSE_CTX(S3fsLog::LEVEL_INFO, 1, fmt, ##__VA_ARGS__); \ +} while (0) + +#define FUSE_CTX_DBG(fmt, ...) do { \ + PRINT_FUSE_CTX(S3fsLog::LEVEL_DBG, 0, fmt, ##__VA_ARGS__); \ +} while (0) + +#endif // S3FS_LOGGER_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_util.cpp b/s3fs/s3fs_util.cpp new file mode 100644 index 0000000..14a645e --- /dev/null +++ b/s3fs/s3fs_util.cpp @@ -0,0 +1,592 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "s3fs_logger.h" +#include "s3fs_util.h" +#include "string_util.h" +#include "s3fs_help.h" +#include "autolock.h" + +//------------------------------------------------------------------- +// Global variables +//------------------------------------------------------------------- +std::string mount_prefix; + +static size_t max_password_size; +static size_t max_group_name_length; + +//------------------------------------------------------------------- +// Utilities +//------------------------------------------------------------------- +std::string get_realpath(const char *path) +{ + std::string realpath = mount_prefix; + realpath += path; + + return realpath; +} + +void init_sysconf_vars() +{ + // SUSv4tc1 says the following about _SC_GETGR_R_SIZE_MAX and + // _SC_GETPW_R_SIZE_MAX: + // Note that sysconf(_SC_GETGR_R_SIZE_MAX) may return -1 if + // there is no hard limit on the size of the buffer needed to + // store all the groups returned. + + long res = sysconf(_SC_GETPW_R_SIZE_MAX); + if(0 > res){ + if (errno != 0){ + S3FS_PRN_WARN("could not get max pw length."); + abort(); + } + res = 1024; // default initial length + } + max_password_size = res; + + res = sysconf(_SC_GETGR_R_SIZE_MAX); + if(0 > res) { + if (errno != 0) { + S3FS_PRN_ERR("could not get max name length."); + abort(); + } + res = 1024; // default initial length + } + max_group_name_length = res; +} + +//------------------------------------------------------------------- +// Utility for UID/GID +//------------------------------------------------------------------- +// get user name from uid +std::string get_username(uid_t uid) +{ + size_t maxlen = max_password_size; + int result; + struct passwd pwinfo; + struct passwd* ppwinfo = nullptr; + + // make buffer + std::unique_ptr pbuf(new char[maxlen]); + // get pw information + while(ERANGE == (result = getpwuid_r(uid, &pwinfo, pbuf.get(), maxlen, &ppwinfo))){ + maxlen *= 2; + pbuf.reset(new char[maxlen]); + } + + if(0 != result){ + S3FS_PRN_ERR("could not get pw information(%d).", result); + return ""; + } + + // check pw + if(nullptr == ppwinfo){ + return ""; + } + std::string name = SAFESTRPTR(ppwinfo->pw_name); + return name; +} + +int is_uid_include_group(uid_t uid, gid_t gid) +{ + size_t maxlen = max_group_name_length; + int result; + struct group ginfo; + struct group* pginfo = nullptr; + + // make buffer + std::unique_ptr pbuf(new char[maxlen]); + // get group information + while(ERANGE == (result = getgrgid_r(gid, &ginfo, pbuf.get(), maxlen, &pginfo))){ + maxlen *= 2; + pbuf.reset(new char[maxlen]); + } + + if(0 != result){ + S3FS_PRN_ERR("could not get group information(%d).", result); + return -result; + } + + // check group + if(nullptr == pginfo){ + // there is not gid in group. + return -EINVAL; + } + + std::string username = get_username(uid); + + char** ppgr_mem; + for(ppgr_mem = pginfo->gr_mem; ppgr_mem && *ppgr_mem; ppgr_mem++){ + if(username == *ppgr_mem){ + // Found username in group. + return 1; + } + } + return 0; +} + +//------------------------------------------------------------------- +// Utility for file and directory +//------------------------------------------------------------------- +// [NOTE] +// basename/dirname returns a static variable pointer as the return value. +// Normally this shouldn't be a problem, but in macos10 we found a case +// where dirname didn't receive its return value correctly due to thread +// conflicts. +// To avoid this, exclusive control is performed by mutex. +// +static pthread_mutex_t* pbasename_lock = nullptr; + +bool init_basename_lock() +{ + if(pbasename_lock){ + S3FS_PRN_ERR("already initialized mutex for posix dirname/basename function."); + return false; + } + pbasename_lock = new pthread_mutex_t; + + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + int result; + if(0 != (result = pthread_mutex_init(pbasename_lock, &attr))){ + S3FS_PRN_ERR("failed to init pbasename_lock: %d.", result); + delete pbasename_lock; + pbasename_lock = nullptr; + return false; + } + return true; +} + +bool destroy_basename_lock() +{ + if(!pbasename_lock){ + S3FS_PRN_ERR("the mutex for posix dirname/basename function is not initialized."); + return false; + } + int result; + if(0 != (result = pthread_mutex_destroy(pbasename_lock))){ + S3FS_PRN_ERR("failed to destroy pbasename_lock: %d", result); + return false; + } + delete pbasename_lock; + pbasename_lock = nullptr; + + return true; +} + +std::string mydirname(const std::string& path) +{ + AutoLock auto_lock(pbasename_lock); + + return mydirname(path.c_str()); +} + +// safe variant of dirname +// dirname clobbers path so let it operate on a tmp copy +std::string mydirname(const char* path) +{ + if(!path || '\0' == path[0]){ + return ""; + } + + char *buf = strdup(path); + std::string result = dirname(buf); + free(buf); + return result; +} + +std::string mybasename(const std::string& path) +{ + AutoLock auto_lock(pbasename_lock); + + return mybasename(path.c_str()); +} + +// safe variant of basename +// basename clobbers path so let it operate on a tmp copy +std::string mybasename(const char* path) +{ + if(!path || '\0' == path[0]){ + return ""; + } + + char *buf = strdup(path); + std::string result = basename(buf); + free(buf); + return result; +} + +// mkdir --parents +int mkdirp(const std::string& path, mode_t mode) +{ + std::string base; + std::string component; + std::istringstream ss(path); + while (getline(ss, component, '/')) { + base += component + "/"; + + struct stat st; + if(0 == stat(base.c_str(), &st)){ + if(!S_ISDIR(st.st_mode)){ + return EPERM; + } + }else{ + if(0 != mkdir(base.c_str(), mode) && errno != EEXIST){ + return errno; + } + } + } + return 0; +} + +// get existed directory path +std::string get_exist_directory_path(const std::string& path) +{ + std::string existed("/"); // "/" is existed. + std::string base; + std::string component; + std::istringstream ss(path); + while (getline(ss, component, '/')) { + if(base != "/"){ + base += "/"; + } + base += component; + struct stat st; + if(0 == stat(base.c_str(), &st) && S_ISDIR(st.st_mode)){ + existed = base; + }else{ + break; + } + } + return existed; +} + +bool check_exist_dir_permission(const char* dirpath) +{ + if(!dirpath || '\0' == dirpath[0]){ + return false; + } + + // exists + struct stat st; + if(0 != stat(dirpath, &st)){ + if(ENOENT == errno){ + // dir does not exist + return true; + } + if(EACCES == errno){ + // could not access directory + return false; + } + // something error occurred + return false; + } + + // check type + if(!S_ISDIR(st.st_mode)){ + // path is not directory + return false; + } + + // check permission + uid_t myuid = geteuid(); + if(myuid == st.st_uid){ + if(S_IRWXU != (st.st_mode & S_IRWXU)){ + return false; + } + }else{ + if(1 == is_uid_include_group(myuid, st.st_gid)){ + if(S_IRWXG != (st.st_mode & S_IRWXG)){ + return false; + } + }else{ + if(S_IRWXO != (st.st_mode & S_IRWXO)){ + return false; + } + } + } + return true; +} + +bool delete_files_in_dir(const char* dir, bool is_remove_own) +{ + DIR* dp; + struct dirent* dent; + + if(nullptr == (dp = opendir(dir))){ + S3FS_PRN_ERR("could not open dir(%s) - errno(%d)", dir, errno); + return false; + } + + for(dent = readdir(dp); dent; dent = readdir(dp)){ + if(0 == strcmp(dent->d_name, "..") || 0 == strcmp(dent->d_name, ".")){ + continue; + } + std::string fullpath = dir; + fullpath += "/"; + fullpath += dent->d_name; + struct stat st; + if(0 != lstat(fullpath.c_str(), &st)){ + S3FS_PRN_ERR("could not get stats of file(%s) - errno(%d)", fullpath.c_str(), errno); + closedir(dp); + return false; + } + if(S_ISDIR(st.st_mode)){ + // dir -> Reentrant + if(!delete_files_in_dir(fullpath.c_str(), true)){ + S3FS_PRN_ERR("could not remove sub dir(%s) - errno(%d)", fullpath.c_str(), errno); + closedir(dp); + return false; + } + }else{ + if(0 != unlink(fullpath.c_str())){ + S3FS_PRN_ERR("could not remove file(%s) - errno(%d)", fullpath.c_str(), errno); + closedir(dp); + return false; + } + } + } + closedir(dp); + + if(is_remove_own && 0 != rmdir(dir)){ + S3FS_PRN_ERR("could not remove dir(%s) - errno(%d)", dir, errno); + return false; + } + return true; +} + +//------------------------------------------------------------------- +// Utility for system information +//------------------------------------------------------------------- +bool compare_sysname(const char* target) +{ + // [NOTE] + // The buffer size of sysname member in struct utsname is + // OS dependent, but 512 bytes is sufficient for now. + // + static const char* psysname = nullptr; + static char sysname[512]; + if(!psysname){ + struct utsname sysinfo; + if(0 != uname(&sysinfo)){ + S3FS_PRN_ERR("could not initialize system name to internal buffer(errno:%d), thus use \"Linux\".", errno); + strcpy(sysname, "Linux"); + }else{ + S3FS_PRN_INFO("system name is %s", sysinfo.sysname); + sysname[sizeof(sysname) - 1] = '\0'; + strncpy(sysname, sysinfo.sysname, sizeof(sysname) - 1); + } + psysname = &sysname[0]; + } + + if(!target || 0 != strcmp(psysname, target)){ + return false; + } + return true; +} + +//------------------------------------------------------------------- +// Utility for print message at launching +//------------------------------------------------------------------- +void print_launch_message(int argc, char** argv) +{ + std::string message = short_version(); + + if(argv){ + message += " :"; + for(int cnt = 0; cnt < argc; ++cnt){ + if(argv[cnt]){ + message += " "; + if(0 == cnt){ + message += basename(argv[cnt]); + }else{ + message += argv[cnt]; + } + } + } + } + S3FS_PRN_LAUNCH_INFO("%s", message.c_str()); +} + +int flock_set(int fd, int type) +{ + struct flock lock; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_type = type; + lock.l_pid = -1; + + return fcntl(fd, F_SETLKW, &lock); +} + +// +// result: -1 ts1 < ts2 +// 0 ts1 == ts2 +// 1 ts1 > ts2 +// +int compare_timespec(const struct timespec& ts1, const struct timespec& ts2) +{ + if(ts1.tv_sec < ts2.tv_sec){ + return -1; + }else if(ts1.tv_sec > ts2.tv_sec){ + return 1; + }else{ + if(ts1.tv_nsec < ts2.tv_nsec){ + return -1; + }else if(ts1.tv_nsec > ts2.tv_nsec){ + return 1; + } + } + return 0; +} + +// +// result: -1 st < ts +// 0 st == ts +// 1 st > ts +// +int compare_timespec(const struct stat& st, stat_time_type type, const struct timespec& ts) +{ + struct timespec st_ts; + set_stat_to_timespec(st, type, st_ts); + + return compare_timespec(st_ts, ts); +} + +void set_timespec_to_stat(struct stat& st, stat_time_type type, const struct timespec& ts) +{ + if(stat_time_type::ATIME == type){ + #if defined(__APPLE__) + st.st_atime = ts.tv_sec; + st.st_atimespec.tv_nsec = ts.tv_nsec; + #else + st.st_atim.tv_sec = ts.tv_sec; + st.st_atim.tv_nsec = ts.tv_nsec; + #endif + }else if(stat_time_type::MTIME == type){ + #if defined(__APPLE__) + st.st_mtime = ts.tv_sec; + st.st_mtimespec.tv_nsec = ts.tv_nsec; + #else + st.st_mtim.tv_sec = ts.tv_sec; + st.st_mtim.tv_nsec = ts.tv_nsec; + #endif + }else if(stat_time_type::CTIME == type){ + #if defined(__APPLE__) + st.st_ctime = ts.tv_sec; + st.st_ctimespec.tv_nsec = ts.tv_nsec; + #else + st.st_ctim.tv_sec = ts.tv_sec; + st.st_ctim.tv_nsec = ts.tv_nsec; + #endif + }else{ + S3FS_PRN_ERR("unknown type(%d), so skip to set value.", static_cast(type)); + } +} + +struct timespec* set_stat_to_timespec(const struct stat& st, stat_time_type type, struct timespec& ts) +{ + if(stat_time_type::ATIME == type){ + #if defined(__APPLE__) + ts.tv_sec = st.st_atime; + ts.tv_nsec = st.st_atimespec.tv_nsec; + #else + ts = st.st_atim; + #endif + }else if(stat_time_type::MTIME == type){ + #if defined(__APPLE__) + ts.tv_sec = st.st_mtime; + ts.tv_nsec = st.st_mtimespec.tv_nsec; + #else + ts = st.st_mtim; + #endif + }else if(stat_time_type::CTIME == type){ + #if defined(__APPLE__) + ts.tv_sec = st.st_ctime; + ts.tv_nsec = st.st_ctimespec.tv_nsec; + #else + ts = st.st_ctim; + #endif + }else{ + S3FS_PRN_ERR("unknown type(%d), so use 0 as timespec.", static_cast(type)); + ts.tv_sec = 0; + ts.tv_nsec = 0; + } + return &ts; +} + +std::string str_stat_time(const struct stat& st, stat_time_type type) +{ + struct timespec ts; + return str(*set_stat_to_timespec(st, type, ts)); +} + +struct timespec* s3fs_realtime(struct timespec& ts) +{ + if(-1 == clock_gettime(static_cast(CLOCK_REALTIME), &ts)){ + S3FS_PRN_WARN("failed to clock_gettime by errno(%d)", errno); + ts.tv_sec = time(nullptr); + ts.tv_nsec = 0; + } + return &ts; +} + +std::string s3fs_str_realtime() +{ + struct timespec ts; + return str(*s3fs_realtime(ts)); +} + +int s3fs_fclose(FILE* fp) +{ + if(fp == nullptr){ + return 0; + } + return fclose(fp); +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_util.h b/s3fs/s3fs_util.h new file mode 100644 index 0000000..09173e0 --- /dev/null +++ b/s3fs/s3fs_util.h @@ -0,0 +1,123 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_S3FS_UTIL_H_ +#define S3FS_S3FS_UTIL_H_ + +#include + +#ifndef CLOCK_REALTIME +#define CLOCK_REALTIME 0 +#endif +#ifndef CLOCK_MONOTONIC +#define CLOCK_MONOTONIC CLOCK_REALTIME +#endif +#ifndef CLOCK_MONOTONIC_COARSE +#define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC +#endif + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +std::string get_realpath(const char *path); + +void init_sysconf_vars(); +std::string get_username(uid_t uid); +int is_uid_include_group(uid_t uid, gid_t gid); + +bool init_basename_lock(); +bool destroy_basename_lock(); +std::string mydirname(const char* path); +std::string mydirname(const std::string& path); +std::string mybasename(const char* path); +std::string mybasename(const std::string& path); + +int mkdirp(const std::string& path, mode_t mode); +std::string get_exist_directory_path(const std::string& path); +bool check_exist_dir_permission(const char* dirpath); +bool delete_files_in_dir(const char* dir, bool is_remove_own); + +bool compare_sysname(const char* target); + +void print_launch_message(int argc, char** argv); + +int flock_set(int fd, int type); + +// +// Utility for nanosecond time(timespec) +// +enum class stat_time_type{ + ATIME, + MTIME, + CTIME +}; + +//------------------------------------------------------------------- +// Utility for nanosecond time(timespec) +//------------------------------------------------------------------- +static constexpr struct timespec S3FS_OMIT_TS = {0, UTIME_OMIT}; + +int compare_timespec(const struct timespec& ts1, const struct timespec& ts2); +int compare_timespec(const struct stat& st, stat_time_type type, const struct timespec& ts); +void set_timespec_to_stat(struct stat& st, stat_time_type type, const struct timespec& ts); +struct timespec* set_stat_to_timespec(const struct stat& st, stat_time_type type, struct timespec& ts); +std::string str_stat_time(const struct stat& st, stat_time_type type); +struct timespec* s3fs_realtime(struct timespec& ts); +std::string s3fs_str_realtime(); + +// Wrap fclose since it is illegal to take the address of a stdlib function +int s3fs_fclose(FILE* fp); + +class scope_guard { +public: + template + explicit scope_guard(Callable&& undo_func) + : func(std::forward(undo_func)) + {} + + ~scope_guard() { + if(func != nullptr) { + func(); + } + } + + void dismiss() { + func = nullptr; + } + + scope_guard(const scope_guard&) = delete; + scope_guard(scope_guard&& other) = delete; + scope_guard& operator=(const scope_guard&) = delete; + scope_guard& operator=(scope_guard&&) = delete; + +private: + std::function func; +}; + +#endif // S3FS_S3FS_UTIL_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_version.md b/s3fs/s3fs_version.md new file mode 100644 index 0000000..eec058c --- /dev/null +++ b/s3fs/s3fs_version.md @@ -0,0 +1,2 @@ +commit 70a30d6e26a5dfd07a00cf79ce1196079e5ab11a (tag: v1.94) +Date: Fri Feb 23 12:56:01 2024 +0900 diff --git a/s3fs/s3fs_xml.cpp b/s3fs/s3fs_xml.cpp new file mode 100644 index 0000000..1b9507a --- /dev/null +++ b/s3fs/s3fs_xml.cpp @@ -0,0 +1,531 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include + +#include "common.h" +#include "s3fs.h" +#include "s3fs_logger.h" +#include "s3fs_xml.h" +#include "s3fs_util.h" +#include "s3objlist.h" +#include "autolock.h" +#include "string_util.h" + +//------------------------------------------------------------------- +// Variables +//------------------------------------------------------------------- +static constexpr char c_strErrorObjectName[] = "FILE or SUBDIR in DIR"; + +// [NOTE] +// mutex for static variables in GetXmlNsUrl +// +static pthread_mutex_t* pxml_parser_mutex = nullptr; + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +static bool GetXmlNsUrl(xmlDocPtr doc, std::string& nsurl) +{ + bool result = false; + + if(!pxml_parser_mutex || !doc){ + return result; + } + + std::string tmpNs; + { + static time_t tmLast = 0; // cache for 60 sec. + static std::string strNs; + + AutoLock lock(pxml_parser_mutex); + + if((tmLast + 60) < time(nullptr)){ + // refresh + tmLast = time(nullptr); + strNs = ""; + xmlNodePtr pRootNode = xmlDocGetRootElement(doc); + if(pRootNode){ + xmlNsPtr* nslist = xmlGetNsList(doc, pRootNode); + if(nslist){ + if(nslist[0] && nslist[0]->href){ + int len = xmlStrlen(nslist[0]->href); + if(0 < len){ + strNs = std::string(reinterpret_cast(nslist[0]->href), len); + } + } + S3FS_XMLFREE(nslist); + } + } + } + tmpNs = strNs; + } + if(!tmpNs.empty()){ + nsurl = tmpNs; + result = true; + } + return result; +} + +static unique_ptr_xmlChar get_base_exp(xmlDocPtr doc, const char* exp) +{ + std::string xmlnsurl; + std::string exp_string; + + if(!doc){ + return {nullptr, xmlFree}; + } + unique_ptr_xmlXPathContext ctx(xmlXPathNewContext(doc), xmlXPathFreeContext); + + if(!noxmlns && GetXmlNsUrl(doc, xmlnsurl)){ + xmlXPathRegisterNs(ctx.get(), reinterpret_cast("s3"), reinterpret_cast(xmlnsurl.c_str())); + exp_string = "/s3:ListBucketResult/s3:"; + } else { + exp_string = "/ListBucketResult/"; + } + + exp_string += exp; + + unique_ptr_xmlXPathObject marker_xp(xmlXPathEvalExpression(reinterpret_cast(exp_string.c_str()), ctx.get()), xmlXPathFreeObject); + if(nullptr == marker_xp){ + return {nullptr, xmlFree}; + } + if(xmlXPathNodeSetIsEmpty(marker_xp->nodesetval)){ + S3FS_PRN_INFO("marker_xp->nodesetval is empty."); + return {nullptr, xmlFree}; + } + xmlNodeSetPtr nodes = marker_xp->nodesetval; + + unique_ptr_xmlChar result(xmlNodeListGetString(doc, nodes->nodeTab[0]->xmlChildrenNode, 1), xmlFree); + return result; +} + +static unique_ptr_xmlChar get_prefix(xmlDocPtr doc) +{ + return get_base_exp(doc, "Prefix"); +} + +unique_ptr_xmlChar get_next_continuation_token(xmlDocPtr doc) +{ + return get_base_exp(doc, "NextContinuationToken"); +} + +unique_ptr_xmlChar get_next_marker(xmlDocPtr doc) +{ + return get_base_exp(doc, "NextMarker"); +} + +// return: the pointer to object name on allocated memory. +// the pointer to "c_strErrorObjectName".(not allocated) +// nullptr(a case of something error occurred) +static char* get_object_name(xmlDocPtr doc, xmlNodePtr node, const char* path) +{ + // Get full path + unique_ptr_xmlChar fullpath(xmlNodeListGetString(doc, node, 1), xmlFree); + if(!fullpath){ + S3FS_PRN_ERR("could not get object full path name.."); + return nullptr; + } + // basepath(path) is as same as fullpath. + if(0 == strcmp(reinterpret_cast(fullpath.get()), path)){ + return const_cast(c_strErrorObjectName); + } + + // Make dir path and filename + std::string strdirpath = mydirname(reinterpret_cast(fullpath.get())); + std::string strmybpath = mybasename(reinterpret_cast(fullpath.get())); + const char* dirpath = strdirpath.c_str(); + const char* mybname = strmybpath.c_str(); + const char* basepath= (path && '/' == path[0]) ? &path[1] : path; + + if('\0' == mybname[0]){ + return nullptr; + } + + // check subdir & file in subdir + if(0 < strlen(dirpath)){ + // case of "/" + if(0 == strcmp(mybname, "/") && 0 == strcmp(dirpath, "/")){ + return const_cast(c_strErrorObjectName); + } + // case of "." + if(0 == strcmp(mybname, ".") && 0 == strcmp(dirpath, ".")){ + return const_cast(c_strErrorObjectName); + } + // case of ".." + if(0 == strcmp(mybname, "..") && 0 == strcmp(dirpath, ".")){ + return const_cast(c_strErrorObjectName); + } + // case of "name" + if(0 == strcmp(dirpath, ".")){ + // OK + return strdup(mybname); + }else{ + if(basepath && 0 == strcmp(dirpath, basepath)){ + // OK + return strdup(mybname); + }else if(basepath && 0 < strlen(basepath) && '/' == basepath[strlen(basepath) - 1] && 0 == strncmp(dirpath, basepath, strlen(basepath) - 1)){ + std::string withdirname; + if(strlen(dirpath) > strlen(basepath)){ + withdirname = &dirpath[strlen(basepath)]; + } + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!withdirname.empty() && '/' != *withdirname.rbegin()){ + withdirname += "/"; + } + withdirname += mybname; + return strdup(withdirname.c_str()); + } + } + } + // case of something wrong + return const_cast(c_strErrorObjectName); +} + +static unique_ptr_xmlChar get_exp_value_xml(xmlDocPtr doc, xmlXPathContextPtr ctx, const char* exp_key) +{ + if(!doc || !ctx || !exp_key){ + return {nullptr, xmlFree}; + } + + xmlNodeSetPtr exp_nodes; + + // search exp_key tag + unique_ptr_xmlXPathObject exp(xmlXPathEvalExpression(reinterpret_cast(exp_key), ctx), xmlXPathFreeObject); + if(nullptr == exp){ + S3FS_PRN_ERR("Could not find key(%s).", exp_key); + return {nullptr, xmlFree}; + } + if(xmlXPathNodeSetIsEmpty(exp->nodesetval)){ + S3FS_PRN_ERR("Key(%s) node is empty.", exp_key); + return {nullptr, xmlFree}; + } + // get exp_key value & set in struct + exp_nodes = exp->nodesetval; + unique_ptr_xmlChar exp_value(xmlNodeListGetString(doc, exp_nodes->nodeTab[0]->xmlChildrenNode, 1), xmlFree); + if(nullptr == exp_value){ + S3FS_PRN_ERR("Key(%s) value is empty.", exp_key); + return {nullptr, xmlFree}; + } + + return exp_value; +} + +bool get_incomp_mpu_list(xmlDocPtr doc, incomp_mpu_list_t& list) +{ + if(!doc){ + return false; + } + + unique_ptr_xmlXPathContext ctx(xmlXPathNewContext(doc), xmlXPathFreeContext); + + std::string xmlnsurl; + std::string ex_upload = "//"; + std::string ex_key; + std::string ex_id; + std::string ex_date; + + if(!noxmlns && GetXmlNsUrl(doc, xmlnsurl)){ + xmlXPathRegisterNs(ctx.get(), reinterpret_cast("s3"), reinterpret_cast(xmlnsurl.c_str())); + ex_upload += "s3:"; + ex_key += "s3:"; + ex_id += "s3:"; + ex_date += "s3:"; + } + ex_upload += "Upload"; + ex_key += "Key"; + ex_id += "UploadId"; + ex_date += "Initiated"; + + // get "Upload" Tags + unique_ptr_xmlXPathObject upload_xp(xmlXPathEvalExpression(reinterpret_cast(ex_upload.c_str()), ctx.get()), xmlXPathFreeObject); + if(nullptr == upload_xp){ + S3FS_PRN_ERR("xmlXPathEvalExpression returns null."); + return false; + } + if(xmlXPathNodeSetIsEmpty(upload_xp->nodesetval)){ + S3FS_PRN_INFO("upload_xp->nodesetval is empty."); + return true; + } + + // Make list + int cnt; + xmlNodeSetPtr upload_nodes; + list.clear(); + for(cnt = 0, upload_nodes = upload_xp->nodesetval; cnt < upload_nodes->nodeNr; cnt++){ + ctx->node = upload_nodes->nodeTab[cnt]; + + INCOMP_MPU_INFO part; + + // search "Key" tag + unique_ptr_xmlChar ex_value(get_exp_value_xml(doc, ctx.get(), ex_key.c_str())); + if(nullptr == ex_value){ + continue; + } + if('/' != *(reinterpret_cast(ex_value.get()))){ + part.key = "/"; + }else{ + part.key = ""; + } + part.key += reinterpret_cast(ex_value.get()); + + // search "UploadId" tag + if(nullptr == (ex_value = get_exp_value_xml(doc, ctx.get(), ex_id.c_str()))){ + continue; + } + part.id = reinterpret_cast(ex_value.get()); + + // search "Initiated" tag + if(nullptr == (ex_value = get_exp_value_xml(doc, ctx.get(), ex_date.c_str()))){ + continue; + } + part.date = reinterpret_cast(ex_value.get()); + + list.push_back(part); + } + + return true; +} + +bool is_truncated(xmlDocPtr doc) +{ + unique_ptr_xmlChar strTruncate(get_base_exp(doc, "IsTruncated")); + if(!strTruncate){ + return false; + } + return 0 == strcasecmp(reinterpret_cast(strTruncate.get()), "true"); +} + +int append_objects_from_xml_ex(const char* path, xmlDocPtr doc, xmlXPathContextPtr ctx, const char* ex_contents, const char* ex_key, const char* ex_etag, int isCPrefix, S3ObjList& head, bool prefix) +{ + xmlNodeSetPtr content_nodes; + + unique_ptr_xmlXPathObject contents_xp(xmlXPathEvalExpression(reinterpret_cast(ex_contents), ctx), xmlXPathFreeObject); + if(nullptr == contents_xp){ + S3FS_PRN_ERR("xmlXPathEvalExpression returns null."); + return -1; + } + if(xmlXPathNodeSetIsEmpty(contents_xp->nodesetval)){ + S3FS_PRN_DBG("contents_xp->nodesetval is empty."); + return 0; + } + content_nodes = contents_xp->nodesetval; + + bool is_dir; + std::string stretag; + int i; + for(i = 0; i < content_nodes->nodeNr; i++){ + ctx->node = content_nodes->nodeTab[i]; + + // object name + unique_ptr_xmlXPathObject key(xmlXPathEvalExpression(reinterpret_cast(ex_key), ctx), xmlXPathFreeObject); + if(nullptr == key){ + S3FS_PRN_WARN("key is null. but continue."); + continue; + } + if(xmlXPathNodeSetIsEmpty(key->nodesetval)){ + S3FS_PRN_WARN("node is empty. but continue."); + continue; + } + xmlNodeSetPtr key_nodes = key->nodesetval; + char* name = get_object_name(doc, key_nodes->nodeTab[0]->xmlChildrenNode, path); + + if(!name){ + S3FS_PRN_WARN("name is something wrong. but continue."); + + }else if(reinterpret_cast(name) != c_strErrorObjectName){ + is_dir = isCPrefix ? true : false; + stretag = ""; + + if(!isCPrefix && ex_etag){ + // Get ETag + unique_ptr_xmlXPathObject ETag(xmlXPathEvalExpression(reinterpret_cast(ex_etag), ctx), xmlXPathFreeObject); + if(nullptr != ETag){ + if(xmlXPathNodeSetIsEmpty(ETag->nodesetval)){ + S3FS_PRN_INFO("ETag->nodesetval is empty."); + }else{ + xmlNodeSetPtr etag_nodes = ETag->nodesetval; + unique_ptr_xmlChar petag(xmlNodeListGetString(doc, etag_nodes->nodeTab[0]->xmlChildrenNode, 1), xmlFree); + if(petag){ + stretag = reinterpret_cast(petag.get()); + } + } + } + } + + // [NOTE] + // The XML data passed to this function is CR code(\r) encoded. + // The function below decodes that encoded CR code. + // + std::string decname = get_decoded_cr_code(name); + free(name); + + if(prefix){ + head.common_prefixes.push_back(decname); + } + if(!head.insert(decname.c_str(), (!stretag.empty() ? stretag.c_str() : nullptr), is_dir)){ + S3FS_PRN_ERR("insert_object returns with error."); + return -1; + } + }else{ + S3FS_PRN_DBG("name is file or subdir in dir. but continue."); + } + } + + return 0; +} + +int append_objects_from_xml(const char* path, xmlDocPtr doc, S3ObjList& head) +{ + std::string xmlnsurl; + std::string ex_contents = "//"; + std::string ex_key; + std::string ex_cprefix = "//"; + std::string ex_prefix; + std::string ex_etag; + + if(!doc){ + return -1; + } + + // If there is not , use path instead of it. + auto pprefix = get_prefix(doc); + std::string prefix = (pprefix ? reinterpret_cast(pprefix.get()) : path ? path : ""); + + unique_ptr_xmlXPathContext ctx(xmlXPathNewContext(doc), xmlXPathFreeContext); + + if(!noxmlns && GetXmlNsUrl(doc, xmlnsurl)){ + xmlXPathRegisterNs(ctx.get(), reinterpret_cast("s3"), reinterpret_cast(xmlnsurl.c_str())); + ex_contents+= "s3:"; + ex_key += "s3:"; + ex_cprefix += "s3:"; + ex_prefix += "s3:"; + ex_etag += "s3:"; + } + ex_contents+= "Contents"; + ex_key += "Key"; + ex_cprefix += "CommonPrefixes"; + ex_prefix += "Prefix"; + ex_etag += "ETag"; + + if(-1 == append_objects_from_xml_ex(prefix.c_str(), doc, ctx.get(), ex_contents.c_str(), ex_key.c_str(), ex_etag.c_str(), 0, head, /*prefix=*/ false) || + -1 == append_objects_from_xml_ex(prefix.c_str(), doc, ctx.get(), ex_cprefix.c_str(), ex_prefix.c_str(), nullptr, 1, head, /*prefix=*/ true) ) + { + S3FS_PRN_ERR("append_objects_from_xml_ex returns with error."); + return -1; + } + + return 0; +} + +//------------------------------------------------------------------- +// Utility functions +//------------------------------------------------------------------- +bool simple_parse_xml(const char* data, size_t len, const char* key, std::string& value) +{ + bool result = false; + + if(!data || !key){ + return false; + } + value.clear(); + + std::unique_ptr doc(xmlReadMemory(data, static_cast(len), "", nullptr, 0), xmlFreeDoc); + if(nullptr == doc){ + return false; + } + + if(nullptr == doc->children){ + return false; + } + for(xmlNodePtr cur_node = doc->children->children; nullptr != cur_node; cur_node = cur_node->next){ + // For DEBUG + // std::string cur_node_name(reinterpret_cast(cur_node->name)); + // printf("cur_node_name: %s\n", cur_node_name.c_str()); + + if(XML_ELEMENT_NODE == cur_node->type){ + std::string elementName = reinterpret_cast(cur_node->name); + // For DEBUG + // printf("elementName: %s\n", elementName.c_str()); + + if(cur_node->children){ + if(XML_TEXT_NODE == cur_node->children->type){ + if(elementName == key) { + value = reinterpret_cast(cur_node->children->content); + result = true; + break; + } + } + } + } + } + + return result; +} + +//------------------------------------------------------------------- +// Utility for lock +//------------------------------------------------------------------- +bool init_parser_xml_lock() +{ + if(pxml_parser_mutex){ + return false; + } + pxml_parser_mutex = new pthread_mutex_t; + + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + + if(0 != pthread_mutex_init(pxml_parser_mutex, &attr)){ + delete pxml_parser_mutex; + pxml_parser_mutex = nullptr; + return false; + } + return true; +} + +bool destroy_parser_xml_lock() +{ + if(!pxml_parser_mutex){ + return false; + } + if(0 != pthread_mutex_destroy(pxml_parser_mutex)){ + return false; + } + delete pxml_parser_mutex; + pxml_parser_mutex = nullptr; + + return true; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3fs_xml.h b/s3fs/s3fs_xml.h new file mode 100644 index 0000000..4f514df --- /dev/null +++ b/s3fs/s3fs_xml.h @@ -0,0 +1,62 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_S3FS_XML_H_ +#define S3FS_S3FS_XML_H_ + +#include +#include // [NOTE] nessetially include this header in some environments +#include +#include + +#include "mpu_util.h" + +class S3ObjList; + +typedef std::unique_ptr unique_ptr_xmlChar; +typedef std::unique_ptr unique_ptr_xmlXPathObject; +typedef std::unique_ptr unique_ptr_xmlXPathContext; +typedef std::unique_ptr unique_ptr_xmlDoc; + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- +bool is_truncated(xmlDocPtr doc); +int append_objects_from_xml_ex(const char* path, xmlDocPtr doc, xmlXPathContextPtr ctx, const char* ex_contents, const char* ex_key, const char* ex_etag, int isCPrefix, S3ObjList& head, bool prefix); +int append_objects_from_xml(const char* path, xmlDocPtr doc, S3ObjList& head); +unique_ptr_xmlChar get_next_continuation_token(xmlDocPtr doc); +unique_ptr_xmlChar get_next_marker(xmlDocPtr doc); +bool get_incomp_mpu_list(xmlDocPtr doc, incomp_mpu_list_t& list); + +bool simple_parse_xml(const char* data, size_t len, const char* key, std::string& value); + +bool init_parser_xml_lock(); +bool destroy_parser_xml_lock(); + +#endif // S3FS_S3FS_XML_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3objlist.cpp b/s3fs/s3objlist.cpp new file mode 100644 index 0000000..592c99d --- /dev/null +++ b/s3fs/s3objlist.cpp @@ -0,0 +1,282 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include + +#include "s3objlist.h" + +//------------------------------------------------------------------- +// Class S3ObjList +//------------------------------------------------------------------- +// New class S3ObjList is base on old s3_object struct. +// This class is for S3 compatible clients. +// +// If name is terminated by "/", it is forced dir type. +// If name is terminated by "_$folder$", it is forced dir type. +// If is_dir is true and name is not terminated by "/", the name is added "/". +// +bool S3ObjList::insert(const char* name, const char* etag, bool is_dir) +{ + if(!name || '\0' == name[0]){ + return false; + } + + s3obj_t::iterator iter; + std::string newname; + std::string orgname = name; + + // Normalization + std::string::size_type pos = orgname.find("_$folder$"); + if(std::string::npos != pos){ + newname = orgname.substr(0, pos); + is_dir = true; + }else{ + newname = orgname; + } + if(is_dir){ + if('/' != *newname.rbegin()){ + newname += "/"; + } + }else{ + if('/' == *newname.rbegin()){ + is_dir = true; + } + } + + // Check derived name object. + if(is_dir){ + std::string chkname = newname.substr(0, newname.length() - 1); + if(objects.end() != (iter = objects.find(chkname))){ + // found "dir" object --> remove it. + objects.erase(iter); + } + }else{ + std::string chkname = newname + "/"; + if(objects.end() != (iter = objects.find(chkname))){ + // found "dir/" object --> not add new object. + // and add normalization + return insert_normalized(orgname.c_str(), chkname.c_str(), true); + } + } + + // Add object + if(objects.end() != (iter = objects.find(newname))){ + // Found same object --> update information. + (*iter).second.normalname.erase(); + (*iter).second.orgname = orgname; + (*iter).second.is_dir = is_dir; + if(etag){ + (*iter).second.etag = etag; // over write + } + }else{ + // add new object + s3obj_entry newobject; + newobject.orgname = orgname; + newobject.is_dir = is_dir; + if(etag){ + newobject.etag = etag; + } + objects[newname] = newobject; + } + + // add normalization + return insert_normalized(orgname.c_str(), newname.c_str(), is_dir); +} + +bool S3ObjList::insert_normalized(const char* name, const char* normalized, bool is_dir) +{ + if(!name || '\0' == name[0] || !normalized || '\0' == normalized[0]){ + return false; + } + if(0 == strcmp(name, normalized)){ + return true; + } + + s3obj_t::iterator iter; + if(objects.end() != (iter = objects.find(name))){ + // found name --> over write + iter->second.orgname.erase(); + iter->second.etag.erase(); + iter->second.normalname = normalized; + iter->second.is_dir = is_dir; + }else{ + // not found --> add new object + s3obj_entry newobject; + newobject.normalname = normalized; + newobject.is_dir = is_dir; + objects[name] = newobject; + } + return true; +} + +const s3obj_entry* S3ObjList::GetS3Obj(const char* name) const +{ + s3obj_t::const_iterator iter; + + if(!name || '\0' == name[0]){ + return nullptr; + } + if(objects.end() == (iter = objects.find(name))){ + return nullptr; + } + return &((*iter).second); +} + +std::string S3ObjList::GetOrgName(const char* name) const +{ + const s3obj_entry* ps3obj; + + if(!name || '\0' == name[0]){ + return ""; + } + if(nullptr == (ps3obj = GetS3Obj(name))){ + return ""; + } + return ps3obj->orgname; +} + +std::string S3ObjList::GetNormalizedName(const char* name) const +{ + const s3obj_entry* ps3obj; + + if(!name || '\0' == name[0]){ + return ""; + } + if(nullptr == (ps3obj = GetS3Obj(name))){ + return ""; + } + if(ps3obj->normalname.empty()){ + return name; + } + return ps3obj->normalname; +} + +std::string S3ObjList::GetETag(const char* name) const +{ + const s3obj_entry* ps3obj; + + if(!name || '\0' == name[0]){ + return ""; + } + if(nullptr == (ps3obj = GetS3Obj(name))){ + return ""; + } + return ps3obj->etag; +} + +bool S3ObjList::IsDir(const char* name) const +{ + const s3obj_entry* ps3obj; + + if(nullptr == (ps3obj = GetS3Obj(name))){ + return false; + } + return ps3obj->is_dir; +} + +bool S3ObjList::GetLastName(std::string& lastname) const +{ + bool result = false; + lastname = ""; + for(s3obj_t::const_iterator iter = objects.begin(); iter != objects.end(); ++iter){ + if((*iter).second.orgname.length()){ + if(lastname.compare(iter->second.orgname) < 0){ + lastname = (*iter).second.orgname; + result = true; + } + }else{ + if(lastname.compare(iter->second.normalname) < 0){ + lastname = (*iter).second.normalname; + result = true; + } + } + } + return result; +} + +bool S3ObjList::GetNameList(s3obj_list_t& list, bool OnlyNormalized, bool CutSlash) const +{ + s3obj_t::const_iterator iter; + + for(iter = objects.begin(); objects.end() != iter; ++iter){ + if(OnlyNormalized && !iter->second.normalname.empty()){ + continue; + } + std::string name = (*iter).first; + if(CutSlash && 1 < name.length() && '/' == *name.rbegin()){ + // only "/" std::string is skipped this. + name.erase(name.length() - 1); + } + list.push_back(name); + } + return true; +} + +typedef std::map s3obj_h_t; + +bool S3ObjList::MakeHierarchizedList(s3obj_list_t& list, bool haveSlash) +{ + s3obj_h_t h_map; + s3obj_h_t::iterator hiter; + s3obj_list_t::const_iterator liter; + + for(liter = list.begin(); list.end() != liter; ++liter){ + std::string strtmp = (*liter); + if(1 < strtmp.length() && '/' == *strtmp.rbegin()){ + strtmp.erase(strtmp.length() - 1); + } + h_map[strtmp] = true; + + // check hierarchized directory + for(std::string::size_type pos = strtmp.find_last_of('/'); std::string::npos != pos; pos = strtmp.find_last_of('/')){ + strtmp.erase(pos); + if(strtmp.empty() || "/" == strtmp){ + break; + } + if(h_map.end() == h_map.find(strtmp)){ + // not found + h_map[strtmp] = false; + } + } + } + + // check map and add lost hierarchized directory. + for(hiter = h_map.begin(); hiter != h_map.end(); ++hiter){ + if(false == (*hiter).second){ + // add hierarchized directory. + std::string strtmp = (*hiter).first; + if(haveSlash){ + strtmp += "/"; + } + list.push_back(strtmp); + } + } + return true; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/s3objlist.h b/s3fs/s3objlist.h new file mode 100644 index 0000000..ffd2d9b --- /dev/null +++ b/s3fs/s3objlist.h @@ -0,0 +1,85 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_S3OBJLIST_H_ +#define S3FS_S3OBJLIST_H_ + +#include +#include +#include + +//------------------------------------------------------------------- +// Structure / Typedef +//------------------------------------------------------------------- +struct s3obj_entry{ + std::string normalname; // normalized name: if empty, object is normalized name. + std::string orgname; // original name: if empty, object is original name. + std::string etag; + bool is_dir; + + s3obj_entry() : is_dir(false) {} +}; + +typedef std::map s3obj_t; +typedef std::vector s3obj_list_t; + +//------------------------------------------------------------------- +// Class S3ObjList +//------------------------------------------------------------------- +class S3ObjList +{ + private: + s3obj_t objects; + public: + std::vector common_prefixes; + + private: + bool insert_normalized(const char* name, const char* normalized, bool is_dir); + const s3obj_entry* GetS3Obj(const char* name) const; + + s3obj_t::const_iterator begin() const { return objects.begin(); } + s3obj_t::const_iterator end() const { return objects.end(); } + + public: + S3ObjList() {} + ~S3ObjList() {} + + bool IsEmpty() const { return objects.empty(); } + bool insert(const char* name, const char* etag = nullptr, bool is_dir = false); + std::string GetOrgName(const char* name) const; + std::string GetNormalizedName(const char* name) const; + std::string GetETag(const char* name) const; + bool IsDir(const char* name) const; + bool GetNameList(s3obj_list_t& list, bool OnlyNormalized = true, bool CutSlash = true) const; + bool GetLastName(std::string& lastname) const; + + static bool MakeHierarchizedList(s3obj_list_t& list, bool haveSlash); +}; + +#endif // S3FS_S3OBJLIST_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/sighandlers.cpp b/s3fs/sighandlers.cpp new file mode 100644 index 0000000..81055fc --- /dev/null +++ b/s3fs/sighandlers.cpp @@ -0,0 +1,267 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include + +#include "s3fs_logger.h" +#include "sighandlers.h" +#include "fdcache.h" + +//------------------------------------------------------------------- +// Class S3fsSignals +//------------------------------------------------------------------- +std::unique_ptr S3fsSignals::pSingleton; +bool S3fsSignals::enableUsr1 = false; + +//------------------------------------------------------------------- +// Class methods +//------------------------------------------------------------------- +bool S3fsSignals::Initialize() +{ + if(!S3fsSignals::pSingleton){ + S3fsSignals::pSingleton.reset(new S3fsSignals); + } + return true; +} + +bool S3fsSignals::Destroy() +{ + S3fsSignals::pSingleton.reset(); + return true; +} + +void S3fsSignals::HandlerUSR1(int sig) +{ + if(SIGUSR1 != sig){ + S3FS_PRN_ERR("The handler for SIGUSR1 received signal(%d)", sig); + return; + } + + S3fsSignals* pSigobj = S3fsSignals::get(); + if(!pSigobj){ + S3FS_PRN_ERR("S3fsSignals object is not initialized."); + return; + } + + if(!pSigobj->WakeupUsr1Thread()){ + S3FS_PRN_ERR("Failed to wakeup the thread for SIGUSR1."); + return; + } +} + +bool S3fsSignals::SetUsr1Handler(const char* path) +{ + if(!FdManager::HaveLseekHole()){ + S3FS_PRN_ERR("Could not set SIGUSR1 for checking cache, because this system does not support SEEK_DATA/SEEK_HOLE in lseek function."); + return false; + } + + // set output file + if(!FdManager::SetCacheCheckOutput(path)){ + S3FS_PRN_ERR("Could not set output file(%s) for checking cache.", path ? path : "null(stdout)"); + return false; + } + + S3fsSignals::enableUsr1 = true; + + return true; +} + +void* S3fsSignals::CheckCacheWorker(void* arg) +{ + Semaphore* pSem = static_cast(arg); + if(!pSem){ + pthread_exit(nullptr); + } + if(!S3fsSignals::enableUsr1){ + pthread_exit(nullptr); + } + + // wait and loop + while(S3fsSignals::enableUsr1){ + // wait + pSem->wait(); + + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!S3fsSignals::enableUsr1){ + break; // assap + } + + // check all cache + if(!FdManager::get()->CheckAllCache()){ + S3FS_PRN_ERR("Processing failed due to some problem."); + } + + // do not allow request queuing + for(int value = pSem->get_value(); 0 < value; value = pSem->get_value()){ + pSem->wait(); + } + } + return nullptr; +} + +void S3fsSignals::HandlerUSR2(int sig) +{ + if(SIGUSR2 == sig){ + S3fsLog::BumpupLogLevel(); + }else{ + S3FS_PRN_ERR("The handler for SIGUSR2 received signal(%d)", sig); + } +} + +bool S3fsSignals::InitUsr2Handler() +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(struct sigaction)); + sa.sa_handler = S3fsSignals::HandlerUSR2; + sa.sa_flags = SA_RESTART; + if(0 != sigaction(SIGUSR2, &sa, nullptr)){ + return false; + } + return true; +} + +void S3fsSignals::HandlerHUP(int sig) +{ + if(SIGHUP == sig){ + S3fsLog::ReopenLogfile(); + }else{ + S3FS_PRN_ERR("The handler for SIGHUP received signal(%d)", sig); + } +} + +bool S3fsSignals::InitHupHandler() +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(struct sigaction)); + sa.sa_handler = S3fsSignals::HandlerHUP; + sa.sa_flags = SA_RESTART; + if(0 != sigaction(SIGHUP, &sa, nullptr)){ + return false; + } + return true; +} + +//------------------------------------------------------------------- +// Methods +//------------------------------------------------------------------- +S3fsSignals::S3fsSignals() +{ + if(S3fsSignals::enableUsr1){ + if(!InitUsr1Handler()){ + S3FS_PRN_ERR("failed creating thread for SIGUSR1 handler, but continue..."); + } + } + if(!S3fsSignals::InitUsr2Handler()){ + S3FS_PRN_ERR("failed to initialize SIGUSR2 handler for bumping log level, but continue..."); + } + if(!S3fsSignals::InitHupHandler()){ + S3FS_PRN_ERR("failed to initialize SIGHUP handler for reopen log file, but continue..."); + } +} + +S3fsSignals::~S3fsSignals() +{ + if(S3fsSignals::enableUsr1){ + if(!DestroyUsr1Handler()){ + S3FS_PRN_ERR("failed stopping thread for SIGUSR1 handler, but continue..."); + } + } +} + +bool S3fsSignals::InitUsr1Handler() +{ + if(pThreadUsr1 || pSemUsr1){ + S3FS_PRN_ERR("Already run thread for SIGUSR1"); + return false; + } + + // create thread + int result; + std::unique_ptr pSemUsr1_tmp(new Semaphore(0)); + std::unique_ptr pThreadUsr1_tmp(new pthread_t); + if(0 != (result = pthread_create(pThreadUsr1.get(), nullptr, S3fsSignals::CheckCacheWorker, static_cast(pSemUsr1_tmp.get())))){ + S3FS_PRN_ERR("Could not create thread for SIGUSR1 by %d", result); + return false; + } + pSemUsr1 = std::move(pSemUsr1_tmp); + pThreadUsr1 = std::move(pThreadUsr1_tmp); + + // set handler + struct sigaction sa; + memset(&sa, 0, sizeof(struct sigaction)); + sa.sa_handler = S3fsSignals::HandlerUSR1; + sa.sa_flags = SA_RESTART; + if(0 != sigaction(SIGUSR1, &sa, nullptr)){ + S3FS_PRN_ERR("Could not set signal handler for SIGUSR1"); + DestroyUsr1Handler(); + return false; + } + + return true; +} + +bool S3fsSignals::DestroyUsr1Handler() +{ + if(!pThreadUsr1 || !pSemUsr1){ + return false; + } + // for thread exit + S3fsSignals::enableUsr1 = false; + + // wakeup thread + pSemUsr1->post(); + + // wait for thread exiting + void* retval = nullptr; + int result; + if(0 != (result = pthread_join(*pThreadUsr1, &retval))){ + S3FS_PRN_ERR("Could not stop thread for SIGUSR1 by %d", result); + return false; + } + pSemUsr1.reset(); + pThreadUsr1.reset(); + + return true; +} + +bool S3fsSignals::WakeupUsr1Thread() +{ + if(!pThreadUsr1 || !pSemUsr1){ + S3FS_PRN_ERR("The thread for SIGUSR1 is not setup."); + return false; + } + pSemUsr1->post(); + return true; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/sighandlers.h b/s3fs/sighandlers.h new file mode 100644 index 0000000..f4996e6 --- /dev/null +++ b/s3fs/sighandlers.h @@ -0,0 +1,79 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_SIGHANDLERS_H_ +#define S3FS_SIGHANDLERS_H_ + +#include + +class Semaphore; + +//---------------------------------------------- +// class S3fsSignals +//---------------------------------------------- +class S3fsSignals +{ + private: + static std::unique_ptr pSingleton; + static bool enableUsr1; + + std::unique_ptr pThreadUsr1; + std::unique_ptr pSemUsr1; + + protected: + static S3fsSignals* get() { return pSingleton.get(); } + + static void HandlerUSR1(int sig); + static void* CheckCacheWorker(void* arg); + + static void HandlerUSR2(int sig); + static bool InitUsr2Handler(); + + static void HandlerHUP(int sig); + static bool InitHupHandler(); + + S3fsSignals(); + S3fsSignals(const S3fsSignals&) = delete; + S3fsSignals(S3fsSignals&&) = delete; + S3fsSignals& operator=(const S3fsSignals&) = delete; + S3fsSignals& operator=(S3fsSignals&&) = delete; + + bool InitUsr1Handler(); + bool DestroyUsr1Handler(); + bool WakeupUsr1Thread(); + + public: + ~S3fsSignals(); + static bool Initialize(); + static bool Destroy(); + + static bool SetUsr1Handler(const char* path); +}; + +#endif // S3FS_SIGHANDLERS_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/string_util.cpp b/s3fs/string_util.cpp new file mode 100644 index 0000000..7e2e8b0 --- /dev/null +++ b/s3fs/string_util.cpp @@ -0,0 +1,669 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include + +#include + +#include "s3fs_logger.h" +#include "string_util.h" + +//------------------------------------------------------------------- +// Global variables +//------------------------------------------------------------------- + +//------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------- + +std::string str(const struct timespec value) +{ + std::ostringstream s; + s << value.tv_sec; + if(value.tv_nsec != 0){ + s << "." << std::setfill('0') << std::setw(9) << value.tv_nsec; + } + return s.str(); +} + +#ifdef __MSYS__ +/* + * Polyfill for strptime function + * + * This source code is from https://gist.github.com/jeremyfromearth/5694aa3a66714254752179ecf3c95582 . + */ +char* strptime(const char* s, const char* f, struct tm* tm) +{ + std::istringstream input(s); + input.imbue(std::locale(setlocale(LC_ALL, nullptr))); + input >> std::get_time(tm, f); + if (input.fail()) { + return nullptr; + } + return (char*)(s + input.tellg()); +} +#endif + +bool s3fs_strtoofft(off_t* value, const char* str, int base) +{ + if(value == nullptr || str == nullptr){ + return false; + } + errno = 0; + char *temp; + long long result = strtoll(str, &temp, base); + + if(temp == str || *temp != '\0'){ + return false; + } + if((result == LLONG_MIN || result == LLONG_MAX) && errno == ERANGE){ + return false; + } + + *value = result; + return true; +} + +off_t cvt_strtoofft(const char* str, int base) +{ + off_t result = 0; + if(!s3fs_strtoofft(&result, str, base)){ + S3FS_PRN_WARN("something error is occurred in convert std::string(%s) to off_t, thus return 0 as default.", (str ? str : "null")); + return 0; + } + return result; +} + +std::string lower(std::string s) +{ + // change each character of the std::string to lower case + for(size_t i = 0; i < s.length(); i++){ + s[i] = tolower(s[i]); + } + return s; +} + +std::string trim_left(std::string d, const char *t /* = SPACES */) +{ + return d.erase(0, d.find_first_not_of(t)); +} + +std::string trim_right(std::string d, const char *t /* = SPACES */) +{ + std::string::size_type i(d.find_last_not_of(t)); + if(i == std::string::npos){ + return ""; + }else{ + return d.erase(d.find_last_not_of(t) + 1); + } +} + +std::string trim(std::string s, const char *t /* = SPACES */) +{ + return trim_left(trim_right(std::move(s), t), t); +} + +std::string peeloff(const std::string& s) +{ + if(s.size() < 2 || *s.begin() != '"' || *s.rbegin() != '"'){ + return s; + } + return s.substr(1, s.size() - 2); +} + +// +// Three url encode functions +// +// urlEncodeGeneral: A general URL encoding function. +// urlEncodePath : A function that URL encodes by excluding the path +// separator('/'). +// urlEncodeQuery : A function that does URL encoding by excluding +// some characters('=', '&' and '%'). +// This function can be used when the target string +// contains already URL encoded strings. It also +// excludes the character () used in query strings. +// Therefore, it is a function to use as URL encoding +// for use in query strings. +// +static constexpr char encode_general_except_chars[] = ".-_~"; // For general URL encode +static constexpr char encode_path_except_chars[] = ".-_~/"; // For fuse(included path) URL encode +static constexpr char encode_query_except_chars[] = ".-_~=&%"; // For query params(and encoded string) + +static std::string rawUrlEncode(const std::string &s, const char* except_chars) +{ + std::string result; + for (size_t i = 0; i < s.length(); ++i) { + unsigned char c = s[i]; + if((except_chars && nullptr != strchr(except_chars, c)) || + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') ) + { + result += c; + }else{ + result += "%"; + result += s3fs_hex_upper(&c, 1); + } + } + return result; +} + +std::string urlEncodeGeneral(const std::string &s) +{ + return rawUrlEncode(s, encode_general_except_chars); +} + +std::string urlEncodePath(const std::string &s) +{ + return rawUrlEncode(s, encode_path_except_chars); +} + +std::string urlEncodeQuery(const std::string &s) +{ + return rawUrlEncode(s, encode_query_except_chars); +} + +std::string urlDecode(const std::string& s) +{ + std::string result; + for(size_t i = 0; i < s.length(); ++i){ + if(s[i] != '%'){ + result += s[i]; + }else{ + int ch = 0; + if(s.length() <= ++i){ + break; // wrong format. + } + ch += ('0' <= s[i] && s[i] <= '9') ? (s[i] - '0') : ('A' <= s[i] && s[i] <= 'F') ? (s[i] - 'A' + 0x0a) : ('a' <= s[i] && s[i] <= 'f') ? (s[i] - 'a' + 0x0a) : 0x00; + if(s.length() <= ++i){ + break; // wrong format. + } + ch *= 16; + ch += ('0' <= s[i] && s[i] <= '9') ? (s[i] - '0') : ('A' <= s[i] && s[i] <= 'F') ? (s[i] - 'A' + 0x0a) : ('a' <= s[i] && s[i] <= 'f') ? (s[i] - 'a' + 0x0a) : 0x00; + result += static_cast(ch); + } + } + return result; +} + +bool takeout_str_dquart(std::string& str) +{ + size_t pos; + + // '"' for start + if(std::string::npos != (pos = str.find_first_of('\"'))){ + str.erase(0, pos + 1); + + // '"' for end + if(std::string::npos == (pos = str.find_last_of('\"'))){ + return false; + } + str.erase(pos); + if(std::string::npos != str.find_first_of('\"')){ + return false; + } + } + return true; +} + +// +// ex. target="http://......?keyword=value&..." +// +bool get_keyword_value(const std::string& target, const char* keyword, std::string& value) +{ + if(!keyword){ + return false; + } + size_t spos; + size_t epos; + if(std::string::npos == (spos = target.find(keyword))){ + return false; + } + spos += strlen(keyword); + if('=' != target[spos]){ + return false; + } + spos++; + if(std::string::npos == (epos = target.find('&', spos))){ + value = target.substr(spos); + }else{ + value = target.substr(spos, (epos - spos)); + } + return true; +} + +// +// Returns the current date +// in a format suitable for a HTTP request header. +// +std::string get_date_rfc850() +{ + char buf[100]; + time_t t = time(nullptr); + struct tm res; + strftime(buf, sizeof(buf), "%a, %d %b %Y %H:%M:%S GMT", gmtime_r(&t, &res)); + return buf; +} + +void get_date_sigv3(std::string& date, std::string& date8601) +{ + time_t tm = time(nullptr); + date = get_date_string(tm); + date8601 = get_date_iso8601(tm); +} + +std::string get_date_string(time_t tm) +{ + char buf[100]; + struct tm res; + strftime(buf, sizeof(buf), "%Y%m%d", gmtime_r(&tm, &res)); + return buf; +} + +std::string get_date_iso8601(time_t tm) +{ + char buf[100]; + struct tm res; + strftime(buf, sizeof(buf), "%Y%m%dT%H%M%SZ", gmtime_r(&tm, &res)); + return buf; +} + +bool get_unixtime_from_iso8601(const char* pdate, time_t& unixtime) +{ + if(!pdate){ + return false; + } + + struct tm tm; + const char* prest = strptime(pdate, "%Y-%m-%dT%T", &tm); + if(prest == pdate){ + // wrong format + return false; + } + unixtime = mktime(&tm); + return true; +} + +// +// Convert to unixtime from std::string which formatted by following: +// "12Y12M12D12h12m12s", "86400s", "9h30m", etc +// +bool convert_unixtime_from_option_arg(const char* argv, time_t& unixtime) +{ + if(!argv){ + return false; + } + unixtime = 0; + const char* ptmp; + int last_unit_type = 0; // unit flag. + bool is_last_number; + time_t tmptime; + for(ptmp = argv, is_last_number = true, tmptime = 0; ptmp && *ptmp; ++ptmp){ + if('0' <= *ptmp && *ptmp <= '9'){ + tmptime *= 10; + tmptime += static_cast(*ptmp - '0'); + is_last_number = true; + }else if(is_last_number){ + if('Y' == *ptmp && 1 > last_unit_type){ + unixtime += (tmptime * (60 * 60 * 24 * 365)); // average 365 day / year + last_unit_type = 1; + }else if('M' == *ptmp && 2 > last_unit_type){ + unixtime += (tmptime * (60 * 60 * 24 * 30)); // average 30 day / month + last_unit_type = 2; + }else if('D' == *ptmp && 3 > last_unit_type){ + unixtime += (tmptime * (60 * 60 * 24)); + last_unit_type = 3; + }else if('h' == *ptmp && 4 > last_unit_type){ + unixtime += (tmptime * (60 * 60)); + last_unit_type = 4; + }else if('m' == *ptmp && 5 > last_unit_type){ + unixtime += (tmptime * 60); + last_unit_type = 5; + }else if('s' == *ptmp && 6 > last_unit_type){ + unixtime += tmptime; + last_unit_type = 6; + }else{ + return false; + } + tmptime = 0; + is_last_number = false; + }else{ + return false; + } + } + if(is_last_number){ + return false; + } + return true; +} + +static std::string s3fs_hex(const unsigned char* input, size_t length, const char *hexAlphabet) +{ + std::string hex; + for(size_t pos = 0; pos < length; ++pos){ + hex += hexAlphabet[input[pos] / 16]; + hex += hexAlphabet[input[pos] % 16]; + } + return hex; +} + +std::string s3fs_hex_lower(const unsigned char* input, size_t length) +{ + return s3fs_hex(input, length, "0123456789abcdef"); +} + +std::string s3fs_hex_upper(const unsigned char* input, size_t length) +{ + return s3fs_hex(input, length, "0123456789ABCDEF"); +} + +std::string s3fs_base64(const unsigned char* input, size_t length) +{ + static constexpr char base[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; + + std::string result; + result.reserve(((length + 3 - 1) / 3) * 4 + 1); + + unsigned char parts[4]; + size_t rpos; + for(rpos = 0; rpos < length; rpos += 3){ + parts[0] = (input[rpos] & 0xfc) >> 2; + parts[1] = ((input[rpos] & 0x03) << 4) | ((((rpos + 1) < length ? input[rpos + 1] : 0x00) & 0xf0) >> 4); + parts[2] = (rpos + 1) < length ? (((input[rpos + 1] & 0x0f) << 2) | ((((rpos + 2) < length ? input[rpos + 2] : 0x00) & 0xc0) >> 6)) : 0x40; + parts[3] = (rpos + 2) < length ? (input[rpos + 2] & 0x3f) : 0x40; + + result += base[parts[0]]; + result += base[parts[1]]; + result += base[parts[2]]; + result += base[parts[3]]; + } + + return result; +} + +inline unsigned char char_decode64(const char ch) +{ + unsigned char by; + if('A' <= ch && ch <= 'Z'){ // A - Z + by = static_cast(ch - 'A'); + }else if('a' <= ch && ch <= 'z'){ // a - z + by = static_cast(ch - 'a' + 26); + }else if('0' <= ch && ch <= '9'){ // 0 - 9 + by = static_cast(ch - '0' + 52); + }else if('+' == ch){ // + + by = 62; + }else if('/' == ch){ // / + by = 63; + }else if('=' == ch){ // = + by = 64; + }else{ // something wrong + by = UCHAR_MAX; + } + return by; +} + +std::string s3fs_decode64(const char* input, size_t input_len) +{ + std::string result; + result.reserve(input_len / 4 * 3); + unsigned char parts[4]; + size_t rpos; + for(rpos = 0; rpos < input_len; rpos += 4){ + parts[0] = char_decode64(input[rpos]); + parts[1] = (rpos + 1) < input_len ? char_decode64(input[rpos + 1]) : 64; + parts[2] = (rpos + 2) < input_len ? char_decode64(input[rpos + 2]) : 64; + parts[3] = (rpos + 3) < input_len ? char_decode64(input[rpos + 3]) : 64; + + result += static_cast(((parts[0] << 2) & 0xfc) | ((parts[1] >> 4) & 0x03)); + if(64 == parts[2]){ + break; + } + result += static_cast(((parts[1] << 4) & 0xf0) | ((parts[2] >> 2) & 0x0f)); + if(64 == parts[3]){ + break; + } + result += static_cast(((parts[2] << 6) & 0xc0) | (parts[3] & 0x3f)); + } + return result; +} + +// +// detect and rewrite invalid utf8. We take invalid bytes +// and encode them into a private region of the unicode +// space. This is sometimes known as wtf8, wobbly transformation format. +// it is necessary because S3 validates the utf8 used for identifiers for +// correctness, while some clients may provide invalid utf, notably +// windows using cp1252. +// + +// Base location for transform. The range 0xE000 - 0xF8ff +// is a private range, se use the start of this range. +static constexpr unsigned int escape_base = 0xe000; + +// encode bytes into wobbly utf8. +// 'result' can be null. returns true if transform was needed. +bool s3fs_wtf8_encode(const char *s, std::string *result) +{ + bool invalid = false; + + // Pass valid utf8 code through + for (; *s; s++) { + const unsigned char c = *s; + + // single byte encoding + if (c <= 0x7f) { + if (result) { + *result += c; + } + continue; + } + + // otherwise, it must be one of the valid start bytes + if ( c >= 0xc2 && c <= 0xf5 ) { + // two byte encoding + // don't need bounds check, std::string is zero terminated + if ((c & 0xe0) == 0xc0 && (s[1] & 0xc0) == 0x80) { + // all two byte encodings starting higher than c1 are valid + if (result) { + *result += c; + *result += *(++s); + } + continue; + } + // three byte encoding + if ((c & 0xf0) == 0xe0 && (s[1] & 0xc0) == 0x80 && (s[2] & 0xc0) == 0x80) { + const unsigned code = ((c & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2] & 0x3f); + if (code >= 0x800 && ! (code >= 0xd800 && code <= 0xd8ff)) { + // not overlong and not a surrogate pair + if (result) { + *result += c; + *result += *(++s); + *result += *(++s); + } + continue; + } + } + // four byte encoding + if ((c & 0xf8) == 0xf0 && (s[1] & 0xc0) == 0x80 && (s[2] & 0xc0) == 0x80 && (s[3] & 0xc0) == 0x80) { + const unsigned code = ((c & 0x07) << 18) | ((s[1] & 0x3f) << 12) | ((s[2] & 0x3f) << 6) | (s[3] & 0x3f); + if (code >= 0x10000 && code <= 0x10ffff) { + // not overlong and in defined unicode space + if (result) { + *result += c; + *result += *(++s); + *result += *(++s); + *result += *(++s); + } + continue; + } + } + } + // printf("invalid %02x at %d\n", c, i); + // Invalid utf8 code. Convert it to a private two byte area of unicode + // e.g. the e000 - f8ff area. This will be a three byte encoding + invalid = true; + if (result) { + unsigned escape = escape_base + c; + *result += static_cast(0xe0 | ((escape >> 12) & 0x0f)); + *result += static_cast(0x80 | ((escape >> 06) & 0x3f)); + *result += static_cast(0x80 | ((escape >> 00) & 0x3f)); + } + } + return invalid; +} + +std::string s3fs_wtf8_encode(const std::string &s) +{ + std::string result; + s3fs_wtf8_encode(s.c_str(), &result); + return result; +} + +// The reverse operation, turn encoded bytes back into their original values +// The code assumes that we map to a three-byte code point. +bool s3fs_wtf8_decode(const char *s, std::string *result) +{ + bool encoded = false; + for (; *s; s++) { + unsigned char c = *s; + // look for a three byte tuple matching our encoding code + if ((c & 0xf0) == 0xe0 && (s[1] & 0xc0) == 0x80 && (s[2] & 0xc0) == 0x80) { + unsigned code = (c & 0x0f) << 12; + code |= (s[1] & 0x3f) << 6; + code |= (s[2] & 0x3f) << 0; + if (code >= escape_base && code <= escape_base + 0xff) { + // convert back + encoded = true; + if(result){ + *result += static_cast(code - escape_base); + } + s+=2; + continue; + } + } + if (result) { + *result += c; + } + } + return encoded; +} + +std::string s3fs_wtf8_decode(const std::string &s) +{ + std::string result; + s3fs_wtf8_decode(s.c_str(), &result); + return result; +} + +// +// Encode only CR('\r'=0x0D) and it also encodes the '%' character accordingly. +// +// The xmlReadMemory() function in libxml2 replaces CR code with LF code('\n'=0x0A) +// due to the XML specification. +// s3fs uses libxml2 to parse the S3 response, and this automatic substitution +// of libxml2 may change the object name(file/dir name). Therefore, before passing +// the response to the xmlReadMemory() function, we need the string encoded by +// this function. +// +// [NOTE] +// Normally the quotes included in the XML content data are HTML encoded("""). +// Encoding for CR can also be HTML encoded as binary code (ex, " "), but +// if the same string content(as file name) as this encoded string exists, the +// original string cannot be distinguished whichever encoded or not encoded. +// Therefore, CR is encoded in the same manner as URL encoding("%0A"). +// And it is assumed that there is no CR code in the S3 response tag etc.(actually +// it shouldn't exist) +// +std::string get_encoded_cr_code(const char* pbase) +{ + std::string result; + if(!pbase){ + return result; + } + std::string strbase(pbase); + size_t baselength = strbase.length(); + size_t startpos = 0; + size_t foundpos; + while(startpos < baselength && std::string::npos != (foundpos = strbase.find_first_of("%\r", startpos))){ + if(0 < (foundpos - startpos)){ + result += strbase.substr(startpos, foundpos - startpos); + } + if('%' == strbase[foundpos]){ + result += "%45"; + }else if('\r' == strbase[foundpos]){ + result += "%0D"; + } + startpos = foundpos + 1; + } + if(startpos < baselength){ + result += strbase.substr(startpos); + } + return result; +} + +// +// Decode a string encoded with get_encoded_cr_code(). +// +std::string get_decoded_cr_code(const char* pencode) +{ + std::string result; + if(!pencode){ + return result; + } + std::string strencode(pencode); + size_t encodelength = strencode.length(); + size_t startpos = 0; + size_t foundpos; + while(startpos < encodelength && std::string::npos != (foundpos = strencode.find('%', startpos))){ + if(0 < (foundpos - startpos)){ + result += strencode.substr(startpos, foundpos - startpos); + } + if((foundpos + 2) < encodelength && 0 == strencode.compare(foundpos, 3, "%45")){ + result += '%'; + startpos = foundpos + 3; + }else if((foundpos + 2) < encodelength && 0 == strencode.compare(foundpos, 3, "%0D")){ + result += '\r'; + startpos = foundpos + 3; + }else if((foundpos + 1) < encodelength && 0 == strencode.compare(foundpos, 2, "%%")){ + result += '%'; + startpos = foundpos + 2; + }else{ + result += '%'; + startpos = foundpos + 1; + } + } + if(startpos < encodelength){ + result += strencode.substr(startpos); + } + return result; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/string_util.h b/s3fs/string_util.h new file mode 100644 index 0000000..077f5a6 --- /dev/null +++ b/s3fs/string_util.h @@ -0,0 +1,136 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_STRING_UTIL_H_ +#define S3FS_STRING_UTIL_H_ + +#include +#include + +// +// A collection of string utilities for manipulating URLs and HTTP responses. +// +//------------------------------------------------------------------- +// Global variables +//------------------------------------------------------------------- +static constexpr char SPACES[] = " \t\r\n"; + +//------------------------------------------------------------------- +// Inline functions +//------------------------------------------------------------------- +static inline int is_prefix(const char *str, const char *prefix) { return strncmp(str, prefix, strlen(prefix)) == 0; } +static inline const char* SAFESTRPTR(const char *strptr) { return strptr ? strptr : ""; } + +//------------------------------------------------------------------- +// Macros(WTF8) +//------------------------------------------------------------------- +#define WTF8_ENCODE(ARG) \ + std::string ARG##_buf; \ + const char * ARG = _##ARG; \ + if (use_wtf8 && s3fs_wtf8_encode( _##ARG, 0 )) { \ + s3fs_wtf8_encode( _##ARG, &ARG##_buf); \ + ARG = ARG##_buf.c_str(); \ + } + +//------------------------------------------------------------------- +// Utilities +//------------------------------------------------------------------- +// TODO: rename to to_string? +std::string str(const struct timespec value); + +#ifdef __MSYS__ +// +// Polyfill for strptime function. +// +char* strptime(const char* s, const char* f, struct tm* tm); +#endif +// +// Convert string to off_t. Returns false on bad input. +// Replacement for C++11 std::stoll. +// +bool s3fs_strtoofft(off_t* value, const char* str, int base = 0); +// +// This function returns 0 if a value that cannot be converted is specified. +// Only call if 0 is considered an error and the operation can continue. +// +off_t cvt_strtoofft(const char* str, int base); + +// +// String Manipulation +// +std::string trim_left(std::string s, const char *t = SPACES); +std::string trim_right(std::string s, const char *t = SPACES); +std::string trim(std::string s, const char *t = SPACES); +std::string lower(std::string s); +std::string peeloff(const std::string& s); + +// +// Date string +// +std::string get_date_rfc850(); +void get_date_sigv3(std::string& date, std::string& date8601); +std::string get_date_string(time_t tm); +std::string get_date_iso8601(time_t tm); +bool get_unixtime_from_iso8601(const char* pdate, time_t& unixtime); +bool convert_unixtime_from_option_arg(const char* argv, time_t& unixtime); + +// +// For encoding +// +std::string urlEncodeGeneral(const std::string &s); +std::string urlEncodePath(const std::string &s); +std::string urlEncodeQuery(const std::string &s); +std::string urlDecode(const std::string& s); + +bool takeout_str_dquart(std::string& str); +bool get_keyword_value(const std::string& target, const char* keyword, std::string& value); + +// +// For binary string +// +std::string s3fs_hex_lower(const unsigned char* input, size_t length); +std::string s3fs_hex_upper(const unsigned char* input, size_t length); +std::string s3fs_base64(const unsigned char* input, size_t length); +std::string s3fs_decode64(const char* input, size_t input_len); + +// +// WTF8 +// +bool s3fs_wtf8_encode(const char *s, std::string *result); +std::string s3fs_wtf8_encode(const std::string &s); +bool s3fs_wtf8_decode(const char *s, std::string *result); +std::string s3fs_wtf8_decode(const std::string &s); + +// +// For CR in XML +// +std::string get_encoded_cr_code(const char* pbase); +std::string get_decoded_cr_code(const char* pencode); + +#endif // S3FS_STRING_UTIL_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/threadpoolman.cpp b/s3fs/threadpoolman.cpp new file mode 100644 index 0000000..682529d --- /dev/null +++ b/s3fs/threadpoolman.cpp @@ -0,0 +1,264 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Takeshi Nakatani + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include + +#include "s3fs_logger.h" +#include "threadpoolman.h" +#include "autolock.h" + +//------------------------------------------------ +// ThreadPoolMan class variables +//------------------------------------------------ +ThreadPoolMan* ThreadPoolMan::singleton = nullptr; + +//------------------------------------------------ +// ThreadPoolMan class methods +//------------------------------------------------ +bool ThreadPoolMan::Initialize(int count) +{ + if(ThreadPoolMan::singleton){ + S3FS_PRN_WARN("Already singleton for Thread Manager is existed, then re-create it."); + ThreadPoolMan::Destroy(); + } + ThreadPoolMan::singleton = new ThreadPoolMan(count); + return true; +} + +void ThreadPoolMan::Destroy() +{ + if(ThreadPoolMan::singleton){ + delete ThreadPoolMan::singleton; + ThreadPoolMan::singleton = nullptr; + } +} + +bool ThreadPoolMan::Instruct(std::unique_ptr pparam) +{ + if(!ThreadPoolMan::singleton){ + S3FS_PRN_WARN("The singleton object is not initialized yet."); + return false; + } + return ThreadPoolMan::singleton->SetInstruction(std::move(pparam)); +} + +// +// Thread worker +// +void* ThreadPoolMan::Worker(void* arg) +{ + ThreadPoolMan* psingleton = static_cast(arg); + + if(!psingleton){ + S3FS_PRN_ERR("The parameter for worker thread is invalid."); + return reinterpret_cast(-EIO); + } + S3FS_PRN_INFO3("Start worker thread in ThreadPoolMan."); + + while(!psingleton->IsExit()){ + // wait + psingleton->thpoolman_sem.wait(); + + if(psingleton->IsExit()){ + break; + } + + // get instruction + std::unique_ptr pparam; + { + AutoLock auto_lock(&(psingleton->thread_list_lock)); + + if(!psingleton->instruction_list.empty()){ + pparam = std::move(psingleton->instruction_list.front()); + psingleton->instruction_list.pop_front(); + if(!pparam){ + S3FS_PRN_WARN("Got a semaphore, but the instruction is empty."); + } + }else{ + S3FS_PRN_WARN("Got a semaphore, but there is no instruction."); + pparam = nullptr; + } + } + + if(pparam){ + void* retval = pparam->pfunc(pparam->args); + if(nullptr != retval){ + S3FS_PRN_WARN("The instruction function returned with somthign error code(%ld).", reinterpret_cast(retval)); + } + if(pparam->psem){ + pparam->psem->post(); + } + } + } + + return nullptr; +} + +//------------------------------------------------ +// ThreadPoolMan methods +//------------------------------------------------ +ThreadPoolMan::ThreadPoolMan(int count) : is_exit(false), thpoolman_sem(0), is_lock_init(false) +{ + if(count < 1){ + S3FS_PRN_CRIT("Failed to creating singleton for Thread Manager, because thread count(%d) is under 1.", count); + abort(); + } + if(ThreadPoolMan::singleton){ + S3FS_PRN_CRIT("Already singleton for Thread Manager is existed."); + abort(); + } + + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif + + int result; + if(0 != (result = pthread_mutex_init(&thread_list_lock, &attr))){ + S3FS_PRN_CRIT("failed to init thread_list_lock: %d", result); + abort(); + } + is_lock_init = true; + + // create threads + if(!StartThreads(count)){ + S3FS_PRN_ERR("Failed starting threads at initializing."); + abort(); + } +} + +ThreadPoolMan::~ThreadPoolMan() +{ + StopThreads(); + + if(is_lock_init){ + int result; + if(0 != (result = pthread_mutex_destroy(&thread_list_lock))){ + S3FS_PRN_CRIT("failed to destroy thread_list_lock: %d", result); + abort(); + } + is_lock_init = false; + } +} + +bool ThreadPoolMan::IsExit() const +{ + return is_exit; +} + +void ThreadPoolMan::SetExitFlag(bool exit_flag) +{ + is_exit = exit_flag; +} + +bool ThreadPoolMan::StopThreads() +{ + if(thread_list.empty()){ + S3FS_PRN_INFO("Any threads are running now, then nothing to do."); + return true; + } + + // all threads to exit + SetExitFlag(true); + for(size_t waitcnt = thread_list.size(); 0 < waitcnt; --waitcnt){ + thpoolman_sem.post(); + } + + // wait for threads exiting + for(thread_list_t::const_iterator iter = thread_list.begin(); iter != thread_list.end(); ++iter){ + void* retval = nullptr; + int result = pthread_join(*iter, &retval); + if(result){ + S3FS_PRN_ERR("failed pthread_join - result(%d)", result); + }else{ + S3FS_PRN_DBG("succeed pthread_join - return code(%ld)", reinterpret_cast(retval)); + } + } + thread_list.clear(); + + // reset semaphore(to zero) + while(thpoolman_sem.try_wait()){ + } + + return true; +} + +bool ThreadPoolMan::StartThreads(int count) +{ + if(count < 1){ + S3FS_PRN_ERR("Failed to creating threads, because thread count(%d) is under 1.", count); + return false; + } + + // stop all thread if they are running. + // cppcheck-suppress unmatchedSuppression + // cppcheck-suppress knownConditionTrueFalse + if(!StopThreads()){ + S3FS_PRN_ERR("Failed to stop existed threads."); + return false; + } + + // create all threads + SetExitFlag(false); + for(int cnt = 0; cnt < count; ++cnt){ + // run thread + pthread_t thread; + int result; + if(0 != (result = pthread_create(&thread, nullptr, ThreadPoolMan::Worker, static_cast(this)))){ + S3FS_PRN_ERR("failed pthread_create with return code(%d)", result); + StopThreads(); // if possible, stop all threads + return false; + } + thread_list.push_back(thread); + } + return true; +} + +bool ThreadPoolMan::SetInstruction(std::unique_ptr pparam) +{ + if(!pparam){ + S3FS_PRN_ERR("The parameter value is nullptr."); + return false; + } + + // set parameter to list + { + AutoLock auto_lock(&thread_list_lock); + instruction_list.push_back(std::move(pparam)); + } + + // run thread + thpoolman_sem.post(); + + return true; +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/threadpoolman.h b/s3fs/threadpoolman.h new file mode 100644 index 0000000..675f374 --- /dev/null +++ b/s3fs/threadpoolman.h @@ -0,0 +1,109 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_THREADPOOLMAN_H_ +#define S3FS_THREADPOOLMAN_H_ + +#include +#include +#include +#include + +#include "psemaphore.h" + +//------------------------------------------------ +// Typedefs for functions and structures +//------------------------------------------------ +// +// Prototype function +// +typedef void* (*thpoolman_worker)(void*); // same as start_routine for pthread_create function + +// +// Parameter structure +// +// [NOTE] +// The args member is a value that is an argument of the worker function. +// The psem member is allowed nullptr. If it is not nullptr, the post() method is +// called when finishing the function. +// +struct thpoolman_param +{ + void* args; + Semaphore* psem; + thpoolman_worker pfunc; + + thpoolman_param() : args(nullptr), psem(nullptr), pfunc(nullptr) {} +}; + +typedef std::list> thpoolman_params_t; + +typedef std::vector thread_list_t; + +//------------------------------------------------ +// Class ThreadPoolMan +//------------------------------------------------ +class ThreadPoolMan +{ + private: + static ThreadPoolMan* singleton; + + std::atomic is_exit; + Semaphore thpoolman_sem; + + bool is_lock_init; + pthread_mutex_t thread_list_lock; + thread_list_t thread_list; + + thpoolman_params_t instruction_list; + + private: + static void* Worker(void* arg); + + explicit ThreadPoolMan(int count = 1); + ~ThreadPoolMan(); + ThreadPoolMan(const ThreadPoolMan&) = delete; + ThreadPoolMan(ThreadPoolMan&&) = delete; + ThreadPoolMan& operator=(const ThreadPoolMan&) = delete; + ThreadPoolMan& operator=(ThreadPoolMan&&) = delete; + + bool IsExit() const; + void SetExitFlag(bool exit_flag); + + bool StopThreads(); + bool StartThreads(int count); + bool SetInstruction(std::unique_ptr pparam); + + public: + static bool Initialize(int count); + static void Destroy(); + static bool Instruct(std::unique_ptr pparam); +}; + +#endif // S3FS_THREADPOOLMAN_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/s3fs/types.h b/s3fs/types.h new file mode 100644 index 0000000..5d89e37 --- /dev/null +++ b/s3fs/types.h @@ -0,0 +1,365 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2007 Randy Rizun + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef S3FS_TYPES_H_ +#define S3FS_TYPES_H_ + +#include +#include +#include +#include +#include +#include + +// +// For extended attribute +// (HAVE_XXX symbols are defined in config.h) +// +#ifdef HAVE_SYS_EXTATTR_H +#include +#elif HAVE_ATTR_XATTR_H +#include +#elif HAVE_SYS_XATTR_H +#include +#endif + +//------------------------------------------------------------------- +// xattrs_t +//------------------------------------------------------------------- +// +// Header "x-amz-meta-xattr" is for extended attributes. +// This header is url encoded string which is json formatted. +// x-amz-meta-xattr:urlencode({"xattr-1":"base64(value-1)","xattr-2":"base64(value-2)","xattr-3":"base64(value-3)"}) +// +typedef std::map xattrs_t; + +//------------------------------------------------------------------- +// acl_t +//------------------------------------------------------------------- +enum class acl_t{ + PRIVATE, + PUBLIC_READ, + PUBLIC_READ_WRITE, + AWS_EXEC_READ, + AUTHENTICATED_READ, + BUCKET_OWNER_READ, + BUCKET_OWNER_FULL_CONTROL, + LOG_DELIVERY_WRITE, + UNKNOWN +}; + +inline const char* str(acl_t value) +{ + switch(value){ + case acl_t::PRIVATE: + return "private"; + case acl_t::PUBLIC_READ: + return "public-read"; + case acl_t::PUBLIC_READ_WRITE: + return "public-read-write"; + case acl_t::AWS_EXEC_READ: + return "aws-exec-read"; + case acl_t::AUTHENTICATED_READ: + return "authenticated-read"; + case acl_t::BUCKET_OWNER_READ: + return "bucket-owner-read"; + case acl_t::BUCKET_OWNER_FULL_CONTROL: + return "bucket-owner-full-control"; + case acl_t::LOG_DELIVERY_WRITE: + return "log-delivery-write"; + case acl_t::UNKNOWN: + return nullptr; + } + abort(); +} + +inline acl_t to_acl(const char *acl) +{ + if(0 == strcmp(acl, "private")){ + return acl_t::PRIVATE; + }else if(0 == strcmp(acl, "public-read")){ + return acl_t::PUBLIC_READ; + }else if(0 == strcmp(acl, "public-read-write")){ + return acl_t::PUBLIC_READ_WRITE; + }else if(0 == strcmp(acl, "aws-exec-read")){ + return acl_t::AWS_EXEC_READ; + }else if(0 == strcmp(acl, "authenticated-read")){ + return acl_t::AUTHENTICATED_READ; + }else if(0 == strcmp(acl, "bucket-owner-read")){ + return acl_t::BUCKET_OWNER_READ; + }else if(0 == strcmp(acl, "bucket-owner-full-control")){ + return acl_t::BUCKET_OWNER_FULL_CONTROL; + }else if(0 == strcmp(acl, "log-delivery-write")){ + return acl_t::LOG_DELIVERY_WRITE; + }else{ + return acl_t::UNKNOWN; + } +} + +//------------------------------------------------------------------- +// sse_type_t +//------------------------------------------------------------------- +enum class sse_type_t{ + SSE_DISABLE = 0, // not use server side encrypting + SSE_S3, // server side encrypting by S3 key + SSE_C, // server side encrypting by custom key + SSE_KMS // server side encrypting by kms id +}; + +enum class signature_type_t { + V2_ONLY, + V4_ONLY, + V2_OR_V4 +}; + +//---------------------------------------------- +// etaglist_t / filepart / untreatedpart +//---------------------------------------------- +// +// Etag string and part number pair +// +struct etagpair +{ + std::string etag; // expected etag value + int part_num; // part number + + explicit etagpair(const char* petag = nullptr, int part = -1) : etag(petag ? petag : ""), part_num(part) {} + + ~etagpair() + { + clear(); + } + + void clear() + { + etag.erase(); + part_num = -1; + } +}; + +// Requires pointer stability and thus must be a list not a vector +typedef std::list etaglist_t; + +struct petagpool +{ + // Requires pointer stability and thus must be a list not a vector + std::list petaglist; + + ~petagpool() + { + clear(); + } + + void clear() + { + petaglist.clear(); + } + + etagpair* add(const etagpair& etag_entity) + { + petaglist.push_back(etag_entity); + return &petaglist.back(); + } +}; + +// +// Each part information for Multipart upload +// +struct filepart +{ + bool uploaded; // does finish uploading + std::string etag; // expected etag value + int fd; // base file(temporary full file) descriptor + off_t startpos; // seek fd point for uploading + off_t size; // uploading size + bool is_copy; // whether is copy multipart + etagpair* petag; // use only parallel upload + char* buf; // user buf. if this not null, it will not write to the file + + explicit filepart(bool is_uploaded = false, int _fd = -1, off_t part_start = 0, off_t part_size = -1, bool is_copy_part = false, etagpair* petagpair = nullptr, char* userBuf = nullptr) : uploaded(false), fd(_fd), startpos(part_start), size(part_size), is_copy(is_copy_part), petag(petagpair), buf(userBuf) {} + + ~filepart() + { + clear(); + } + + void clear() + { + uploaded = false; + etag = ""; + fd = -1; + startpos = 0; + size = -1; + is_copy = false; + petag = nullptr; + buf = nullptr; + } + + void add_etag_list(etaglist_t& list, int partnum = -1) + { + if(-1 == partnum){ + partnum = static_cast(list.size()) + 1; + } + list.push_back(etagpair(nullptr, partnum)); + petag = &list.back(); + } + + void set_etag(etagpair* petagobj) + { + petag = petagobj; + } + + int get_part_number() const + { + if(!petag){ + return -1; + } + return petag->part_num; + } +}; + +typedef std::vector filepart_list_t; + +// +// Each part information for Untreated parts +// +struct untreatedpart +{ + off_t start; // untreated start position + off_t size; // number of untreated bytes + long untreated_tag; // untreated part tag + + explicit untreatedpart(off_t part_start = 0, off_t part_size = 0, long part_untreated_tag = 0) : start(part_start), size(part_size), untreated_tag(part_untreated_tag) + { + if(part_start < 0 || part_size <= 0){ + clear(); // wrong parameter, so clear value. + } + } + + ~untreatedpart() + { + clear(); + } + + void clear() + { + start = 0; + size = 0; + untreated_tag = 0; + } + + // [NOTE] + // Check if the areas overlap + // However, even if the areas do not overlap, this method returns true if areas are adjacent. + // + bool check_overlap(off_t chk_start, off_t chk_size) + { + if(chk_start < 0 || chk_size <= 0 || start < 0 || size <= 0 || (chk_start + chk_size) < start || (start + size) < chk_start){ + return false; + } + return true; + } + + bool stretch(off_t add_start, off_t add_size, long tag) + { + if(!check_overlap(add_start, add_size)){ + return false; + } + off_t new_start = std::min(start, add_start); + off_t new_next_start = std::max((start + size), (add_start + add_size)); + + start = new_start; + size = new_next_start - new_start; + untreated_tag = tag; + + return true; + } +}; + +typedef std::vector untreated_list_t; + +// +// Information on each part of multipart upload +// +struct mp_part +{ + off_t start; + off_t size; + int part_num; // Set only for information to upload + + explicit mp_part(off_t set_start = 0, off_t set_size = 0, int part = 0) : start(set_start), size(set_size), part_num(part) {} +}; + +typedef std::vector mp_part_list_t; + +inline off_t total_mp_part_list(const mp_part_list_t& mplist) +{ + off_t size = 0; + for(mp_part_list_t::const_iterator iter = mplist.begin(); iter != mplist.end(); ++iter){ + size += iter->size; + } + return size; +} + +// +// Rename directory struct +// +struct mvnode +{ + mvnode(std::string old_path, std::string new_path, bool is_dir, bool is_normdir) + : old_path(std::move(old_path)) + , new_path(std::move(new_path)) + , is_dir(is_dir) + , is_normdir(is_normdir) + {} + std::string old_path; + std::string new_path; + bool is_dir; + bool is_normdir; +}; + +//------------------------------------------------------------------- +// mimes_t +//------------------------------------------------------------------- +struct case_insensitive_compare_func +{ + bool operator()(const std::string& a, const std::string& b) const { + return strcasecmp(a.c_str(), b.c_str()) < 0; + } +}; +typedef std::map mimes_t; + +//------------------------------------------------------------------- +// Typedefs specialized for use +//------------------------------------------------------------------- +typedef std::vector readline_t; +typedef std::map kvmap_t; +typedef std::map bucketkvmap_t; + +#endif // S3FS_TYPES_H_ + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..6824e4b --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,25 @@ +SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) + +add_executable(test_page_cache test_page_cache.cpp) +target_link_libraries(test_page_cache PUBLIC hybridcache_local ${THIRD_PARTY_LIBRARIES}) + +add_executable(test_future test_future.cpp) +target_link_libraries(test_future PUBLIC ${THIRD_PARTY_LIBRARIES}) + +add_executable(test_read_cache test_read_cache.cpp) +target_link_libraries(test_read_cache PUBLIC hybridcache_local ${THIRD_PARTY_LIBRARIES}) + +add_executable(test_write_cache test_write_cache.cpp) +target_link_libraries(test_write_cache PUBLIC hybridcache_local ${THIRD_PARTY_LIBRARIES}) + +add_executable(test_config test_config.cpp) +target_link_libraries(test_config PUBLIC hybridcache_local ${THIRD_PARTY_LIBRARIES}) + +add_executable(test_global_read_cache test_global_read_cache.cpp) +target_link_libraries(test_global_read_cache PUBLIC madfs_global) + +add_executable(test_global_read_cache_perf test_global_read_cache_perf.cpp) +target_link_libraries(test_global_read_cache_perf PUBLIC madfs_global) + +add_executable(test_global_write_cache_perf test_global_write_cache_perf.cpp) +target_link_libraries(test_global_write_cache_perf PUBLIC madfs_global) diff --git a/test/hybridcache.conf b/test/hybridcache.conf new file mode 100644 index 0000000..9e00fa1 --- /dev/null +++ b/test/hybridcache.conf @@ -0,0 +1,39 @@ +# ReadCache +ReadCacheConfig.CacheConfig.CacheName=Read +ReadCacheConfig.CacheConfig.MaxCacheSize=1073741824 +ReadCacheConfig.CacheConfig.PageBodySize=65536 +ReadCacheConfig.CacheConfig.PageMetaSize=1024 +ReadCacheConfig.CacheConfig.EnableCAS=1 +ReadCacheConfig.CacheConfig.CacheLibConfig.EnableNvmCache=0 +ReadCacheConfig.CacheConfig.CacheLibConfig.RaidPath= +ReadCacheConfig.CacheConfig.CacheLibConfig.RaidFileNum= +ReadCacheConfig.CacheConfig.CacheLibConfig.RaidFileSize= +ReadCacheConfig.CacheConfig.CacheLibConfig.DataChecksum= +ReadCacheConfig.DownloadNormalFlowLimit=1048576 +ReadCacheConfig.DownloadBurstFlowLimit=10485760 + +# WriteCache +WriteCacheConfig.CacheConfig.CacheName=Write +WriteCacheConfig.CacheConfig.MaxCacheSize=104857600 +WriteCacheConfig.CacheConfig.PageBodySize=65536 +WriteCacheConfig.CacheConfig.PageMetaSize=1024 +WriteCacheConfig.CacheConfig.EnableCAS=1 +WriteCacheConfig.CacheSafeRatio=70 + +# GlobalCache +UseGlobalCache=1 +GlobalCacheConfig.EnableWriteCache=1 +GlobalCacheConfig.EtcdAddress=http://192.168.1.87:2379 +GlobalCacheConfig.GlobalServers=optane07:8000,optane08:8000 +GlobalCacheConfig.GflagFile= + +ThreadNum=16 +BackFlushCacheRatio=40 +UploadNormalFlowLimit=1048576 +UploadBurstFlowLimit=10485760 +LogPath=. +# LogLevel: GLOG_INFO=0, GLOG_WARNING=1, GLOG_ERROR=2, GLOG_FATAL=3 +LogLevel=1 +EnableLog=0 +FlushToRead=1 +CleanCacheByOpen=0 diff --git a/test/test_config.cpp b/test/test_config.cpp new file mode 100644 index 0000000..cdce38a --- /dev/null +++ b/test/test_config.cpp @@ -0,0 +1,26 @@ +#include + +#include "gtest/gtest.h" + +#include "config.h" + +using namespace std; +using namespace HybridCache; + +TEST(ConfigRead, Read) { + HybridCacheConfig cfg; + EXPECT_EQ(true, GetHybridCacheConfig("../../test/hybridcache.conf", cfg)); + EXPECT_EQ(1073741824, cfg.ReadCacheCfg.CacheCfg.MaxCacheSize); + EXPECT_EQ(16, cfg.ThreadNum); + + EXPECT_EQ(true, cfg.UseGlobalCache); + EXPECT_EQ("http://192.168.1.87:2379", cfg.GlobalCacheCfg.EtcdAddress); + EXPECT_EQ(2, cfg.GlobalCacheCfg.GlobalServers.size()); + EXPECT_EQ("optane08:8000", cfg.GlobalCacheCfg.GlobalServers[1]); +} + +int main(int argc, char **argv) { + printf("Running ConfigRead test from %s\n", __FILE__); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/test/test_future.cpp b/test/test_future.cpp new file mode 100644 index 0000000..a88b301 --- /dev/null +++ b/test/test_future.cpp @@ -0,0 +1,72 @@ +#include +#include + +#include "folly/futures/Future.h" +#include "gtest/gtest.h" + +#include "common.h" + +using namespace folly; +using namespace std; + +std::shared_ptr executor; + +folly::Future test(int i) { + std::cout << i << " start" << endl; + return folly::via(executor.get(), [i]() -> int { + std::cout << i << " download ..." << endl; + std::this_thread::sleep_for(2000ms); + std::cout << i << " end" << endl; + return 0; + }); +} + +folly::Future testCombine() { + std::cout << "testCombine start" << endl; + + std::vector> fs; + for (int i = 0; i < 3; i++) { + fs.push_back(test(i)); + } + + std::cout << "testCombine mid" << endl; + + auto f = collectAll(fs).via(executor.get()) + .thenValue([](std::vector, std::allocator>>&& tups) { + int res = 0; + for (const auto& t : tups) { + if (t.value() == 0) ++res; + } + std::cout << "testCombine end" << endl; + return res; + }); + + return f; +} + +TEST(FollyFuture, combine) { + auto f = testCombine(); + std::cout << "testCombine running..." << endl; + f.wait(); + std::cout << "testCombine res:" << f.value() << endl; + EXPECT_EQ(3, f.value()); +} + +TEST(FollyFuture, chaining) { + std::cout << "test chaining..." << endl; + auto f = test(1); + auto f2 = move(f).thenValue([](int i){ return i + 100; }); + f2.wait(); + std::cout << "chaining res:" << f2.value() << endl; +} + +int main(int argc, char* argv[]) { + executor = std::make_shared(16); + + printf("Running folly::future test from %s\n", __FILE__); + testing::InitGoogleTest(&argc, argv); + int res = RUN_ALL_TESTS(); + + executor->stop(); + return res; +} diff --git a/test/test_global_read_cache.cpp b/test/test_global_read_cache.cpp new file mode 100644 index 0000000..4bbd186 --- /dev/null +++ b/test/test_global_read_cache.cpp @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include +#include + +#include "FileSystemDataAdaptor.h" +#include "GlobalDataAdaptor.h" +#include "ReadCacheClient.h" + +DEFINE_string(server, "0.0.0.0:8000", "IP Address of server"); +DEFINE_int32(bench_repeat, 1000, "Repeat count"); +DEFINE_int32(bench_size, 1024 * 16, "Request size in bytes"); +DEFINE_string(filename, "sample.dat", "Test file name"); + +std::string ReadDirectly(const std::string &path, size_t start, size_t length) { + int fd = open(path.c_str(), O_RDONLY); + if (fd < 0) { + PLOG(ERROR) << "Fail to open file: " << path; + return ""; + } + + if (lseek(fd, start, SEEK_SET) < 0) { + PLOG(ERROR) << "Fail to seek file: " << path << " at pos " << start; + close(fd); + return ""; + } + + std::string output; + output.resize(length); + ssize_t nbytes = read(fd, &output[0], length); + if (nbytes != length) { + PLOG(ERROR) << "Fail to read file: " << path + << ", expected read " << length + << ", actual read " << nbytes; + close(fd); + return ""; + } + close(fd); + return output; +} + +ssize_t GetSize(const std::string &path) { + struct stat st; + if (stat(path.c_str(), &st)) { + PLOG(ERROR) << "Fail to state file: " << path; + return -1; + } + return st.st_size; +} + +std::vector SplitString(const std::string &input) { + std::vector result; + std::stringstream ss(input); + std::string item; + while (std::getline(ss, item, ',')) { + result.push_back(item); + } + return result; +} + +TEST(read_cache, generate_get_chunk_request) +{ + const size_t chunk_size = GetGlobalConfig().default_policy.read_chunk_size; + ByteBuffer mock_buffer((char *) 0, 10 * chunk_size); + auto get_chunk_request = ReadCacheClient::GenerateGetChunkRequestsV2; + + // 0 ... CS+16========2CS + { + std::vector requests; + get_chunk_request("foo", chunk_size + 16, chunk_size - 16, mock_buffer, requests, chunk_size); + ASSERT_EQ(requests.size(), 1); + ASSERT_EQ(requests[0].chunk_id, 1); + ASSERT_EQ(requests[0].chunk_start, 16); + ASSERT_EQ(requests[0].chunk_len, chunk_size - 16); + ASSERT_EQ(requests[0].buffer.data, (char *) 0); + ASSERT_EQ(requests[0].buffer.len, chunk_size - 16); + ASSERT_EQ(requests[0].user_key, "foo"); + ASSERT_EQ(requests[0].internal_key, "foo-1-" + std::to_string(chunk_size)); + } + + // 0 ... CS+16========2CS===2CS+16 + { + std::vector requests; + get_chunk_request("foo", chunk_size + 16, chunk_size, mock_buffer, requests, chunk_size); + ASSERT_EQ(requests.size(), 2); + ASSERT_EQ(requests[0].chunk_id, 1); + ASSERT_EQ(requests[0].chunk_start, 16); + ASSERT_EQ(requests[0].chunk_len, chunk_size - 16); + ASSERT_EQ(requests[0].buffer.data, (char *) 0); + ASSERT_EQ(requests[0].buffer.len, chunk_size - 16); + ASSERT_EQ(requests[0].user_key, "foo"); + ASSERT_EQ(requests[0].internal_key, "foo-1-" + std::to_string(chunk_size)); + ASSERT_EQ(requests[1].chunk_id, 2); + ASSERT_EQ(requests[1].chunk_start, 0); + ASSERT_EQ(requests[1].chunk_len, 16); + ASSERT_EQ(requests[1].buffer.data, (char *) chunk_size - 16); + ASSERT_EQ(requests[1].buffer.len, 16); + ASSERT_EQ(requests[1].user_key, "foo"); + ASSERT_EQ(requests[1].internal_key, "foo-2-" + std::to_string(chunk_size)); + } + + // empty request + { + std::vector requests; + get_chunk_request("foo", chunk_size + 16, 0, mock_buffer, requests, chunk_size); + ASSERT_EQ(requests.size(), 0); + } +} + +TEST(read_cache, get_chunk) +{ + auto etcd_client = std::make_shared("http://127.0.0.1:2379"); + auto base = std::make_shared(); + auto global = std::make_shared(base, SplitString(FLAGS_server), etcd_client); + + const size_t chunk_size = GetGlobalConfig().default_policy.read_chunk_size; + ByteBuffer buffer(new char[size_t(FLAGS_bench_size)], size_t(FLAGS_bench_size)); + size_t file_size = GetSize("sample.dat"); + + for (int i = 0; i < FLAGS_bench_repeat; ++i) { + size_t start_pos = lrand48() % file_size; + size_t length = std::min(size_t(FLAGS_bench_size), file_size - start_pos); + if (length) length = lrand48() % length; + ASSERT_EQ(0, global->DownLoad("sample.dat", start_pos, length, buffer).get()); + buffer.data[length] = '\0'; + std::string buffer_cpp(buffer.data, length); + ASSERT_EQ(buffer_cpp, ReadDirectly("sample.dat", start_pos, length)); + } + + ASSERT_EQ(OK, global->DownLoad("sample.dat", file_size - 2, 0, buffer).get()); + ASSERT_EQ(OK, global->DownLoad("sample.dat", file_size - 2, 2, buffer).get()); + ASSERT_EQ(END_OF_FILE, global->DownLoad("sample.dat", file_size - 2, 5, buffer).get()); +} + +TEST(read_cache, mix_read_write) +{ + auto etcd_client = std::make_shared("http://127.0.0.1:2379"); + auto base = std::make_shared(); + auto global = std::make_shared(base, SplitString(FLAGS_server), etcd_client); + + const size_t chunk_size = GetGlobalConfig().default_policy.read_chunk_size; + ByteBuffer buffer(new char[10 * chunk_size], 10 * chunk_size); + std::map headers; + for (size_t i = 0; i < buffer.len; ++i) { + buffer.data[i] = lrand48() % 26 + 'a'; + } + std::string buffer_backup(buffer.data, buffer.len); + ASSERT_EQ(0, global->UpLoad("hello", buffer.len, buffer, headers).get()); + memset(buffer.data, 0, buffer.len); + ASSERT_EQ(0, global->DownLoad("hello", 0, buffer.len, buffer).get()); + ASSERT_EQ(std::string(buffer.data, buffer.len).substr(32), buffer_backup.substr(32)); + + strcpy(buffer.data, "Hello Madfs-----"); + ASSERT_EQ(0, global->UpLoad("hello", 17, buffer, headers).get()); + memset(buffer.data, 0, buffer.len); + ASSERT_EQ(0, global->DownLoad("hello", 6, 5, buffer).get()); + ASSERT_EQ(buffer.data, std::string("Madfs")); + + size_t fsize; + ASSERT_EQ(0, global->Head("hello", fsize, headers).get()); + ASSERT_EQ(fsize, 17); + + ASSERT_EQ(0, global->DeepFlush("hello").get()); + + ASSERT_EQ(0, global->Delete("hello").get()); + ASSERT_EQ(NOT_FOUND, global->Head("hello", fsize, headers).get()); +} + +int main(int argc, char **argv) +{ + gflags::ParseCommandLineFlags(&argc, &argv, true); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/test/test_global_read_cache_perf.cpp b/test/test_global_read_cache_perf.cpp new file mode 100644 index 0000000..7f3f2f6 --- /dev/null +++ b/test/test_global_read_cache_perf.cpp @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include + +#include "S3DataAdaptor.h" +#include "FileSystemDataAdaptor.h" +#include "GlobalDataAdaptor.h" +#include "ReadCacheClient.h" + +DEFINE_string(server, "0.0.0.0:8000", "IP Address of server"); +DEFINE_int32(threads, 1, "Thread count in perf test"); +DEFINE_int32(duration, 5, "Test duration in seconds"); +DEFINE_int32(depth, 1, "IO depth"); +DEFINE_bool(use_s3, false, "Use S3 storage"); +DEFINE_string(filename, "sample.dat", "Test file name"); + +std::vector SplitString(const std::string &input) { + std::vector result; + std::stringstream ss(input); + std::string item; + while (std::getline(ss, item, ',')) { + result.push_back(item); + } + return result; +} + +TEST(global_cache_client, perf) +{ + auto etcd_client = std::make_shared("http://127.0.0.1:2379"); + + std::shared_ptr base_adaptor = std::make_shared(); + if (FLAGS_use_s3) { + base_adaptor = std::make_shared(); + } else { + base_adaptor = std::make_shared(); + } + auto global_adaptor = std::make_shared(base_adaptor, SplitString(FLAGS_server), etcd_client); + const size_t chunk_size = GetGlobalConfig().default_policy.read_chunk_size; + + struct stat st_buf; + if (stat(FLAGS_filename.c_str(), &st_buf)) { + PLOG(ERROR) << "Failed to stat file"; + exit(EXIT_FAILURE); + } + auto chunk_count = std::min(1024, (int) (st_buf.st_size / chunk_size)); + + std::vector workers; + std::atomic running(true); + std::atomic operations_total(0); + for (int i = 0; i < FLAGS_threads; ++i) { + workers.emplace_back([&] { + ByteBuffer buffer[FLAGS_depth]; + for (int j = 0; j < FLAGS_depth; ++j) { + buffer[j].data = new char[chunk_size]; + buffer[j].len = chunk_size; + } + uint64_t operations = 0; + std::vector > future_list; + while(running) { + future_list.clear(); + for (int j = 0; j < FLAGS_depth; ++j) { + future_list.emplace_back(global_adaptor->DownLoad(FLAGS_filename.c_str(), chunk_size * (lrand48() % chunk_count), chunk_size, buffer[j])); + } + folly::collectAll(future_list).wait(); + operations += FLAGS_depth; + } + operations_total.fetch_add(operations); + }); + } + sleep(FLAGS_duration); + running = false; + for (int i = 0; i < FLAGS_threads; ++i) { + workers[i].join(); + } + LOG(INFO) << "operation per second: " << operations_total.load() / double(FLAGS_duration) + << "data transfered (MB/s): " << chunk_size * operations_total.load() / double(FLAGS_duration) / 1024.0 / 1024.0; +} + +int main(int argc, char **argv) +{ + gflags::ParseCommandLineFlags(&argc, &argv, true); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/test/test_global_write_cache_perf.cpp b/test/test_global_write_cache_perf.cpp new file mode 100644 index 0000000..267cfa6 --- /dev/null +++ b/test/test_global_write_cache_perf.cpp @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include + +#include "S3DataAdaptor.h" +#include "FileSystemDataAdaptor.h" +#include "GlobalDataAdaptor.h" +#include "ReadCacheClient.h" + +DEFINE_string(server, "0.0.0.0:8000", "IP Address of server"); +DEFINE_string(local_dir, "", "Local S3 dir"); +DEFINE_int32(threads, 1, "Thread count in perf test"); +DEFINE_int32(duration, 5, "Test duration in seconds"); +DEFINE_int64(size, 16, "File size in MB"); +DEFINE_int32(depth, 1, "IO depth"); +DEFINE_bool(use_s3, false, "Use S3 storage"); + +std::vector SplitString(const std::string &input) { + std::vector result; + std::stringstream ss(input); + std::string item; + while (std::getline(ss, item, ',')) { + result.push_back(item); + } + return result; +} + +TEST(global_cache_client, perf) +{ + auto etcd_client = std::make_shared("http://192.168.3.87:2379"); + + std::shared_ptr base_adaptor = std::make_shared(); + if (FLAGS_use_s3) { + base_adaptor = std::make_shared(); + } else { + base_adaptor = std::make_shared(FLAGS_local_dir); + } + auto global_adaptor = std::make_shared(base_adaptor, SplitString(FLAGS_server), etcd_client); + const size_t chunk_size = FLAGS_size * 1024 * 1024; + std::vector workers; + std::atomic running(true); + std::atomic operations_total(0); + butil::Timer t; + t.start(); + for (int i = 0; i < FLAGS_threads; ++i) { + workers.emplace_back([&] { + ByteBuffer buffer[FLAGS_depth]; + for (int j = 0; j < FLAGS_depth; ++j) { + int ret = posix_memalign((void **) &buffer[j].data, 4096, chunk_size); + // memset(buffer[j].data, 'x', chunk_size); + ASSERT(!ret); + buffer[j].len = chunk_size; + } + uint64_t operations = 0; + std::vector > future_list; + std::map header; + while(running) { + future_list.clear(); + for (int j = 0; j < FLAGS_depth; ++j) { + future_list.emplace_back(global_adaptor->UpLoad("foo/write-dummy-" + std::to_string(j), chunk_size, buffer[j], header)); + } + folly::collectAll(future_list).wait(); + operations += FLAGS_depth; + } + operations_total.fetch_add(operations); + }); + } + sleep(FLAGS_duration); + running = false; + for (int i = 0; i < FLAGS_threads; ++i) { + workers[i].join(); + } + t.stop(); + + LOG(INFO) << "operation per second: " << operations_total.load() / double(t.s_elapsed()) + << "data transfered (MB/s): " << chunk_size * operations_total.load() / double(t.s_elapsed()) / 1024.0 / 1024.0; +} + +int main(int argc, char **argv) +{ + gflags::ParseCommandLineFlags(&argc, &argv, true); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/test/test_page_cache.cpp b/test/test_page_cache.cpp new file mode 100644 index 0000000..92fa9f7 --- /dev/null +++ b/test/test_page_cache.cpp @@ -0,0 +1,174 @@ +#include + +#include "gtest/gtest.h" + +#include "errorcode.h" +#include "page_cache.h" + +using namespace folly; + +using namespace std; +using namespace HybridCache; + +CacheConfig cfg; +std::shared_ptr page; + +const std::string key1 = "007"; +const std::string key2 = "009"; + +const size_t TEST_LEN = 64 * 1024; +std::unique_ptr bufIn(new char[TEST_LEN]); +std::unique_ptr bufOut(new char[TEST_LEN]); + +TEST(PageCache, Init) { + srand((unsigned)time(NULL)); + for (int i=0; i(cfg); + EXPECT_EQ(0, page->Init()); +} + +TEST(PageCache, Write) { + EXPECT_EQ(0, page->Write(key1, 0, 4, bufIn.get())); + EXPECT_EQ(0, page->Write(key1, 5, 4, bufIn.get())); + EXPECT_EQ(0, page->Write(key2, 0, TEST_LEN, bufIn.get())); +} + +TEST(PageCache, Read) { + std::vector> dataBoundary; + EXPECT_EQ(0, page->Read(key1, 0, 10, bufOut.get(), dataBoundary)); + + for (int i=0; i<4; ++i) { + EXPECT_EQ(bufIn[i], bufOut[i]); + } + for (int i=0; i<4; ++i) { + EXPECT_EQ(bufIn[i], bufOut[i+5]); + } + + EXPECT_EQ(2, dataBoundary.size()); + auto it = dataBoundary.begin(); + for (int i=0; i<2; ++i) { + if (i==0) { + EXPECT_EQ(0, it->first); + EXPECT_EQ(4, it->second); + } else { + EXPECT_EQ(5, it->first); + EXPECT_EQ(4, it->second); + } + ++it; + } + + dataBoundary.clear(); + EXPECT_EQ(0, page->Read(key1, 5, 4, bufOut.get(), dataBoundary)); + for (int i=0; i<4; ++i) { + EXPECT_EQ(bufIn[i], bufOut[i]); + } + EXPECT_EQ(1, dataBoundary.size()); + EXPECT_EQ(0, dataBoundary.begin()->first); + EXPECT_EQ(4, dataBoundary.begin()->second); + + dataBoundary.clear(); + EXPECT_EQ(0, page->Read(key2, 0, TEST_LEN, bufOut.get(), dataBoundary)); + for (int i=0; ifirst); + EXPECT_EQ(TEST_LEN, dataBoundary.begin()->second); + + dataBoundary.clear(); + EXPECT_EQ(0, page->Read(key2, 1, TEST_LEN-1, bufOut.get(), dataBoundary)); + for (int i=0; ifirst); + EXPECT_EQ(TEST_LEN-1, dataBoundary.begin()->second); +} + +TEST(PageCache, GetAllCache) { + std::vector> dataSegments; + page->GetAllCache(key1, dataSegments); + EXPECT_EQ(2, dataSegments.size()); + EXPECT_EQ(0, dataSegments[0].second); + EXPECT_EQ(5, dataSegments[1].second); + + for (auto& it : dataSegments) { + EXPECT_EQ(4, it.first.len); + for (int i=0; iGetAllCache(key2, dataSegments); + EXPECT_EQ(1, dataSegments.size()); + EXPECT_EQ(0, dataSegments[0].second); + EXPECT_EQ(TEST_LEN, dataSegments[0].first.len); + for (int i=0; iGetCacheSize()); +} + +TEST(PageCache, GetCacheMaxSize) { + EXPECT_EQ(100 * 1024 * 1024, page->GetCacheMaxSize()); +} + +TEST(PageCache, DeletePart) { + EXPECT_EQ(0, page->DeletePart(key1, 0, 4)); + std::vector> dataBoundary; + EXPECT_EQ(0, page->Read(key1, 0, 10, bufOut.get(), dataBoundary)); + for (int i=0; i<4; ++i) { + EXPECT_EQ(bufIn[i], bufOut[i+5]); + } + EXPECT_EQ(1, dataBoundary.size()); + auto it = dataBoundary.begin(); + EXPECT_EQ(5, it->first); + EXPECT_EQ(4, it->second); + + EXPECT_EQ(0, page->DeletePart(key2, 0, 1)); + dataBoundary.clear(); + EXPECT_EQ(0, page->Read(key2, 0, TEST_LEN, bufOut.get(), dataBoundary)); + for (int i=0; ifirst); + EXPECT_EQ(TEST_LEN-1, it->second); +} + +TEST(PageCache, Delete) { + EXPECT_EQ(0, page->Delete(key1)); + std::vector> dataBoundary; + EXPECT_EQ(ErrCode::PAGE_NOT_FOUND, + page->Read(key1, 0, 10, bufOut.get(), dataBoundary)); + + EXPECT_EQ(0, page->Delete(key2)); + dataBoundary.clear(); + EXPECT_EQ(ErrCode::PAGE_NOT_FOUND, + page->Read(key2, 0, TEST_LEN, bufOut.get(), dataBoundary)); +} + +int main(int argc, char **argv) { + printf("Running PageCache test from %s\n", __FILE__); + testing::InitGoogleTest(&argc, argv); + int res = RUN_ALL_TESTS(); + page->Close(); + page.reset(); + return res; +} diff --git a/test/test_read_cache.cpp b/test/test_read_cache.cpp new file mode 100644 index 0000000..ece0b27 --- /dev/null +++ b/test/test_read_cache.cpp @@ -0,0 +1,134 @@ +#include + +#include "gtest/gtest.h" + +#include "read_cache.h" + +using namespace folly; + +using namespace std; +using namespace HybridCache; + +ReadCacheConfig cfg; +std::shared_ptr executor; +std::shared_ptr readCache; + +const std::string file1 = "testfile1"; +const std::string file2 = "testfile2"; +const std::string file3 = "testfile3"; + +const size_t TEST_LEN = 100 * 1024; +std::unique_ptr bufIn(new char[TEST_LEN]); +std::unique_ptr bufOut(new char[TEST_LEN]); + +TEST(ReadCache, Init) { + srand((unsigned)time(NULL)); + for (int i=0; i(16); + auto dataAdaptor = std::make_shared(); + dataAdaptor->SetExecutor(executor); + readCache = std::make_shared(cfg, dataAdaptor, executor); +} + +TEST(ReadCache, Put) { + ByteBuffer stepBuffer(bufIn.get(), TEST_LEN); + EXPECT_EQ(0, readCache->Put(file1, 0, 4, stepBuffer)); + EXPECT_EQ(0, readCache->Put(file2, 5, 4, stepBuffer)); + EXPECT_EQ(0, readCache->Put(file3, 0, TEST_LEN, stepBuffer)); +} + +TEST(ReadCache, Get_From_Local) { + ByteBuffer stepBuffer(bufOut.get(), TEST_LEN); + + auto f = readCache->Get(file1, 0, 4, stepBuffer); + f.wait(); + EXPECT_EQ(0, f.value()); + for (int i=0; i<4; ++i) { + EXPECT_EQ(bufIn[i], bufOut[i]); + } + + stepBuffer.data = (bufOut.get() + 5); + f = readCache->Get(file2, 5, 4, stepBuffer); + f.wait(); + EXPECT_EQ(0, f.value()); + for (int i=0; i<4; ++i) { + EXPECT_EQ(bufIn[i], bufOut[i+5]); + } + + stepBuffer.data = (bufOut.get() + 3); + f = readCache->Get(file2, 6, 2, stepBuffer); + f.wait(); + EXPECT_EQ(0, f.value()); + for (int i=0; i<2; ++i) { + EXPECT_EQ(bufIn[i+1], bufOut[i+3]); + } + + stepBuffer.data = bufOut.get(); + f = readCache->Get(file3, 0, TEST_LEN, stepBuffer); + f.wait(); + EXPECT_EQ(0, f.value()); + for (int i=0; iGet(file2, 0, 10, stepBuffer); + cout << "wait download from s3 ..." << endl; + f.wait(); + EXPECT_EQ(REMOTE_FILE_NOT_FOUND, f.value()); + for (int i=0; i<4; ++i) { + EXPECT_EQ(bufIn[i], bufOut[i+5]); + } + + f = readCache->Get(file3, 1, TEST_LEN+1, stepBuffer); + cout << "wait download from s3 ..." << endl; + f.wait(); + EXPECT_EQ(REMOTE_FILE_NOT_FOUND, f.value()); + for (int i=0; i keys; + readCache->GetAllKeys(keys); + EXPECT_EQ(3, keys.size()); + EXPECT_EQ(1, keys.count(file1)); + EXPECT_EQ(1, keys.count(file2)); + EXPECT_EQ(1, keys.count(file3)); +} + +TEST(ReadCache, Delete) { + std::set keys; + readCache->Delete(file1); + readCache->GetAllKeys(keys); + EXPECT_EQ(2, keys.size()); + EXPECT_EQ(0, keys.count(file1)); + EXPECT_EQ(1, keys.count(file2)); + EXPECT_EQ(1, keys.count(file3)); +} + +int main(int argc, char **argv) { + printf("Running ReadCache test from %s\n", __FILE__); + testing::InitGoogleTest(&argc, argv); + int res = RUN_ALL_TESTS(); + executor->stop(); + readCache.reset(); + return res; +} diff --git a/test/test_write_cache.cpp b/test/test_write_cache.cpp new file mode 100644 index 0000000..1b03c59 --- /dev/null +++ b/test/test_write_cache.cpp @@ -0,0 +1,199 @@ +#include + +#include "gtest/gtest.h" + +#include "write_cache.h" + +using namespace folly; + +using namespace std; +using namespace HybridCache; + +WriteCacheConfig cfg; +std::shared_ptr writeCache; + +const std::string file1 = "testfile1"; +const std::string file2 = "testfile2"; +const std::string file3 = "testfile3"; + +const size_t TEST_LEN = 100 * 1024; +std::unique_ptr bufIn(new char[TEST_LEN]); +std::unique_ptr bufOut(new char[TEST_LEN]); + +TEST(WriteCache, Init) { + srand((unsigned)time(NULL)); + for (int i=0; i(cfg); +} + +TEST(WriteCache, Put) { + ByteBuffer stepBuffer(bufIn.get(), TEST_LEN); + EXPECT_EQ(0, writeCache->Put(file1, 0, 4, stepBuffer)); + EXPECT_EQ(0, writeCache->Put(file2, 5, 4, stepBuffer)); + EXPECT_EQ(0, writeCache->Put(file3, 0, TEST_LEN, stepBuffer)); +} + +TEST(WriteCache, Get) { + ByteBuffer stepBuffer(bufOut.get(), TEST_LEN); + std::vector> dataBoundary; + EXPECT_EQ(0, writeCache->Get(file1, 0, 10, stepBuffer, dataBoundary)); + for (int i=0; i<4; ++i) { + EXPECT_EQ(bufIn[i], bufOut[i]); + } + EXPECT_EQ(1, dataBoundary.size()); + EXPECT_EQ(0, dataBoundary.begin()->first); + EXPECT_EQ(4, dataBoundary.begin()->second); + + dataBoundary.clear(); + EXPECT_EQ(0, writeCache->Get(file2, 0, 10, stepBuffer, dataBoundary)); + for (int i=0; i<4; ++i) { + EXPECT_EQ(bufIn[i], bufOut[i+5]); + } + EXPECT_EQ(1, dataBoundary.size()); + EXPECT_EQ(5, dataBoundary.begin()->first); + EXPECT_EQ(4, dataBoundary.begin()->second); + + dataBoundary.clear(); + EXPECT_EQ(0, writeCache->Get(file3, 0, TEST_LEN, stepBuffer, dataBoundary)); + for (int i=0; ifirst); + EXPECT_EQ(TEST_LEN, it->second); +} + +TEST(WriteCache, GetAllCacheWithLock) { + std::vector> dataSegments; + EXPECT_EQ(0, writeCache->GetAllCacheWithLock(file1, dataSegments)); + EXPECT_EQ(1, dataSegments.size()); + EXPECT_EQ(0, dataSegments.begin()->second); + EXPECT_EQ(4, dataSegments.begin()->first.len); + for (int i=0; ifirst.len; ++i) { + EXPECT_EQ(bufIn[i], *(dataSegments.begin()->first.data+i)); + } + + dataSegments.clear(); + EXPECT_EQ(0, writeCache->GetAllCacheWithLock(file2, dataSegments)); + EXPECT_EQ(1, dataSegments.size()); + EXPECT_EQ(5, dataSegments.begin()->second); + EXPECT_EQ(4, dataSegments.begin()->first.len); + for (int i=0; ifirst.len; ++i) { + EXPECT_EQ(bufIn[i], *(dataSegments.begin()->first.data+i)); + } + + dataSegments.clear(); + EXPECT_EQ(0, writeCache->GetAllCacheWithLock(file3, dataSegments)); + EXPECT_EQ(2, dataSegments.size()); + auto it = dataSegments.begin(); + EXPECT_EQ(0, it->second); + EXPECT_EQ(64*1024, it->first.len); + for (int i=0; ifirst.len; ++i) { + EXPECT_EQ(bufIn[i], *(it->first.data+i)); + } + ++it; + EXPECT_EQ(64*1024, it->second); + EXPECT_EQ(TEST_LEN-64*1024, it->first.len); + for (int i=0; ifirst.len; ++i) { + EXPECT_EQ(bufIn[i+64*1024], *(it->first.data+i)); + } +} + +TEST(WriteCache, GetAllKeys) { + std::map keys; + EXPECT_EQ(0, writeCache->GetAllKeys(keys)); + EXPECT_EQ(3, keys.size()); + EXPECT_EQ(1, keys.count(file1)); + EXPECT_EQ(1, keys.count(file2)); + EXPECT_EQ(1, keys.count(file3)); + for (auto it : keys) { + cout << "key:" << it.first << " create_time:" << it.second << endl; + } +} + +TEST(WriteCache, GetSize) { + uint32_t realPageSize = cfg.CacheCfg.PageMetaSize + + cfg.CacheCfg.PageBodySize/8 + cfg.CacheCfg.PageBodySize; + cout << "CacheSize:" << writeCache->GetCacheSize() << endl; + cout << "CacheMaxSize:" << writeCache->GetCacheMaxSize() << endl; + EXPECT_EQ(realPageSize*4, writeCache->GetCacheSize()); + EXPECT_EQ(cfg.CacheCfg.MaxCacheSize, writeCache->GetCacheMaxSize()); +} + +TEST(WriteCache, UnLock) { + writeCache->UnLock(file1); + writeCache->UnLock(file2); + writeCache->UnLock(file3); +} + +TEST(WriteCache, Truncate) { + uint32_t pageSize = cfg.CacheCfg.PageBodySize; + EXPECT_EQ(0, writeCache->Truncate(file3, pageSize+1)); + + ByteBuffer stepBuffer(bufOut.get(), TEST_LEN); + std::vector> dataBoundary; + EXPECT_EQ(0, writeCache->Get(file3, 0, TEST_LEN, stepBuffer, dataBoundary)); + for (int i=0; ifirst); + EXPECT_EQ(pageSize+1, it->second); + + EXPECT_EQ(0, writeCache->Truncate(file3, pageSize)); + dataBoundary.clear(); + EXPECT_EQ(0, writeCache->Get(file3, 0, TEST_LEN, stepBuffer, dataBoundary)); + for (int i=0; ifirst); + EXPECT_EQ(pageSize, it->second); + + EXPECT_EQ(0, writeCache->Truncate(file3, pageSize-1)); + dataBoundary.clear(); + EXPECT_EQ(0, writeCache->Get(file3, 0, TEST_LEN, stepBuffer, dataBoundary)); + for (int i=0; ifirst); + EXPECT_EQ(pageSize-1, it->second); +} + +TEST(WriteCache, Delete) { + EXPECT_EQ(0, writeCache->Delete(file1)); + std::map keys; + EXPECT_EQ(0, writeCache->GetAllKeys(keys)); + EXPECT_EQ(2, keys.size()); + EXPECT_EQ(0, keys.count(file1)); + EXPECT_EQ(1, keys.count(file2)); + EXPECT_EQ(1, keys.count(file3)); + + ByteBuffer stepBuffer(bufOut.get(), TEST_LEN); + std::vector> dataBoundary; + EXPECT_EQ(0, writeCache->Get(file1, 0, 10, stepBuffer, dataBoundary)); + EXPECT_EQ(0, dataBoundary.size()); +} + +int main(int argc, char **argv) { + printf("Running WriteCache test from %s\n", __FILE__); + testing::InitGoogleTest(&argc, argv); + int res = RUN_ALL_TESTS(); + writeCache.reset(); + return res; +}