diff --git a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp index 3888eb168..d09c6e9c9 100644 --- a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp +++ b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp @@ -96,6 +96,8 @@ class TempDevice { class CowSnapuserdTest final { public: bool Setup(); + bool SetupOrderedOps(); + bool SetupOrderedOpsInverted(); bool SetupCopyOverlap_1(); bool SetupCopyOverlap_2(); bool Merge(); @@ -103,6 +105,9 @@ class CowSnapuserdTest final { void ReadSnapshotDeviceAndValidate(); void Shutdown(); void MergeInterrupt(); + void MergeInterruptFixed(int duration); + void MergeInterruptRandomly(int max_duration); + void ReadDmUserBlockWithoutDaemon(); std::string snapshot_dev() const { return snapshot_dev_->path(); } @@ -116,6 +121,8 @@ class CowSnapuserdTest final { void StartMerge(); void CreateCowDevice(); + void CreateCowDeviceOrderedOps(); + void CreateCowDeviceOrderedOpsInverted(); void CreateCowDeviceWithCopyOverlap_1(); void CreateCowDeviceWithCopyOverlap_2(); bool SetupDaemon(); @@ -196,6 +203,18 @@ bool CowSnapuserdTest::Setup() { return setup_ok_; } +bool CowSnapuserdTest::SetupOrderedOps() { + CreateBaseDevice(); + CreateCowDeviceOrderedOps(); + return SetupDaemon(); +} + +bool CowSnapuserdTest::SetupOrderedOpsInverted() { + CreateBaseDevice(); + CreateCowDeviceOrderedOpsInverted(); + return SetupDaemon(); +} + bool CowSnapuserdTest::SetupCopyOverlap_1() { CreateBaseDevice(); CreateCowDeviceWithCopyOverlap_1(); @@ -382,6 +401,112 @@ void CowSnapuserdTest::CreateCowDeviceWithCopyOverlap_1() { true); } +void CowSnapuserdTest::CreateCowDeviceOrderedOpsInverted() { + unique_fd rnd_fd; + loff_t offset = 0; + + std::string path = android::base::GetExecutableDirectory(); + cow_system_ = std::make_unique(path); + + rnd_fd.reset(open("/dev/random", O_RDONLY)); + ASSERT_TRUE(rnd_fd > 0); + + std::unique_ptr random_buffer_1_ = std::make_unique(size_); + + // Fill random data + for (size_t j = 0; j < (size_ / 1_MiB); j++) { + ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0), + true); + + offset += 1_MiB; + } + + CowOptions options; + options.compression = "gz"; + CowWriter writer(options); + + ASSERT_TRUE(writer.Initialize(cow_system_->fd)); + + size_t num_blocks = size_ / options.block_size; + size_t blk_end_copy = num_blocks * 2; + size_t source_blk = num_blocks - 1; + size_t blk_src_copy = blk_end_copy - 1; + + size_t x = num_blocks; + while (1) { + ASSERT_TRUE(writer.AddCopy(source_blk, blk_src_copy)); + x -= 1; + if (x == 0) { + break; + } + source_blk -= 1; + blk_src_copy -= 1; + } + + // Flush operations + ASSERT_TRUE(writer.Finalize()); + // Construct the buffer required for validation + orig_buffer_ = std::make_unique(total_base_size_); + // Read the entire base device + ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0), + true); + // Merged Buffer + memmove(orig_buffer_.get(), (char*)orig_buffer_.get() + size_, size_); +} + +void CowSnapuserdTest::CreateCowDeviceOrderedOps() { + unique_fd rnd_fd; + loff_t offset = 0; + + std::string path = android::base::GetExecutableDirectory(); + cow_system_ = std::make_unique(path); + + rnd_fd.reset(open("/dev/random", O_RDONLY)); + ASSERT_TRUE(rnd_fd > 0); + + std::unique_ptr random_buffer_1_ = std::make_unique(size_); + + // Fill random data + for (size_t j = 0; j < (size_ / 1_MiB); j++) { + ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0), + true); + + offset += 1_MiB; + } + + CowOptions options; + options.compression = "gz"; + CowWriter writer(options); + + ASSERT_TRUE(writer.Initialize(cow_system_->fd)); + + size_t num_blocks = size_ / options.block_size; + size_t x = num_blocks; + size_t source_blk = 0; + size_t blk_src_copy = num_blocks; + + while (1) { + ASSERT_TRUE(writer.AddCopy(source_blk, blk_src_copy)); + + x -= 1; + if (x == 0) { + break; + } + source_blk += 1; + blk_src_copy += 1; + } + + // Flush operations + ASSERT_TRUE(writer.Finalize()); + // Construct the buffer required for validation + orig_buffer_ = std::make_unique(total_base_size_); + // Read the entire base device + ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0), + true); + // Merged Buffer + memmove(orig_buffer_.get(), (char*)orig_buffer_.get() + size_, size_); +} + void CowSnapuserdTest::CreateCowDevice() { unique_fd rnd_fd; loff_t offset = 0; @@ -481,6 +606,36 @@ void CowSnapuserdTest::CreateDmUserDevice() { ASSERT_TRUE(android::fs_mgr::WaitForFile(misc_device, 10s)); } +void CowSnapuserdTest::ReadDmUserBlockWithoutDaemon() { + DmTable dmuser_table; + std::string dm_user_name = "dm-test-device"; + unique_fd fd; + + // Create a dm-user block device + ASSERT_TRUE(dmuser_table.AddTarget(std::make_unique(0, 123456, dm_user_name))); + ASSERT_TRUE(dmuser_table.valid()); + + dmuser_dev_ = std::make_unique(dm_user_name, dmuser_table); + ASSERT_TRUE(dmuser_dev_->valid()); + ASSERT_FALSE(dmuser_dev_->path().empty()); + + fd.reset(open(dmuser_dev_->path().c_str(), O_RDONLY)); + ASSERT_GE(fd, 0); + + std::unique_ptr buffer = std::make_unique(1_MiB); + + loff_t offset = 0; + // Every IO should fail as there is no daemon to process the IO + for (size_t j = 0; j < 10; j++) { + ASSERT_EQ(ReadFullyAtOffset(fd, (char*)buffer.get() + offset, BLOCK_SZ, offset), false); + + offset += BLOCK_SZ; + } + + fd = {}; + ASSERT_TRUE(dmuser_dev_->Destroy()); +} + void CowSnapuserdTest::InitDaemon() { bool ok = client_->AttachDmUser(system_device_ctrl_name_); ASSERT_TRUE(ok); @@ -566,6 +721,7 @@ void CowSnapuserdTest::ValidateMerge() { void CowSnapuserdTest::SimulateDaemonRestart() { Shutdown(); + std::this_thread::sleep_for(500ms); SetDeviceControlName(); StartSnapuserdDaemon(); InitCowDevice(); @@ -574,6 +730,34 @@ void CowSnapuserdTest::SimulateDaemonRestart() { CreateSnapshotDevice(); } +void CowSnapuserdTest::MergeInterruptRandomly(int max_duration) { + std::srand(std::time(nullptr)); + StartMerge(); + + for (int i = 0; i < 20; i++) { + int duration = std::rand() % max_duration; + std::this_thread::sleep_for(std::chrono::milliseconds(duration)); + SimulateDaemonRestart(); + StartMerge(); + } + + SimulateDaemonRestart(); + ASSERT_TRUE(Merge()); +} + +void CowSnapuserdTest::MergeInterruptFixed(int duration) { + StartMerge(); + + for (int i = 0; i < 25; i++) { + std::this_thread::sleep_for(std::chrono::milliseconds(duration)); + SimulateDaemonRestart(); + StartMerge(); + } + + SimulateDaemonRestart(); + ASSERT_TRUE(Merge()); +} + void CowSnapuserdTest::MergeInterrupt() { // Interrupt merge at various intervals StartMerge(); @@ -638,10 +822,9 @@ void CowSnapuserdMetadataTest::ValidatePartialFilledArea() { void* buffer = snapuserd_->GetExceptionBuffer(1); loff_t offset = 0; struct disk_exception* de; - for (int i = 0; i < 12; i++) { + for (int i = 11; i >= 0; i--) { de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, i); - ASSERT_EQ(de->new_chunk, new_chunk); offset += sizeof(struct disk_exception); new_chunk += 1; } @@ -780,71 +963,71 @@ void CowSnapuserdMetadataTest::ValidateMetadata() { de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 100); - ASSERT_EQ(de->new_chunk, 522); + ASSERT_EQ(de->new_chunk, 521); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 105); - ASSERT_EQ(de->new_chunk, 524); + ASSERT_EQ(de->new_chunk, 522); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 110); - ASSERT_EQ(de->new_chunk, 526); + ASSERT_EQ(de->new_chunk, 523); offset += sizeof(struct disk_exception); // The next 4 operations are batch merged as // both old and new chunk are contiguous de = reinterpret_cast((char*)buffer + offset); - ASSERT_EQ(de->old_chunk, 50); - ASSERT_EQ(de->new_chunk, 528); - offset += sizeof(struct disk_exception); - - de = reinterpret_cast((char*)buffer + offset); - ASSERT_EQ(de->old_chunk, 51); - ASSERT_EQ(de->new_chunk, 529); + ASSERT_EQ(de->old_chunk, 53); + ASSERT_EQ(de->new_chunk, 524); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 52); - ASSERT_EQ(de->new_chunk, 530); + ASSERT_EQ(de->new_chunk, 525); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); - ASSERT_EQ(de->old_chunk, 53); - ASSERT_EQ(de->new_chunk, 531); + ASSERT_EQ(de->old_chunk, 51); + ASSERT_EQ(de->new_chunk, 526); + offset += sizeof(struct disk_exception); + + de = reinterpret_cast((char*)buffer + offset); + ASSERT_EQ(de->old_chunk, 50); + ASSERT_EQ(de->new_chunk, 527); offset += sizeof(struct disk_exception); // This is handling overlap operation with // two batch merge operations. de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 18); - ASSERT_EQ(de->new_chunk, 533); + ASSERT_EQ(de->new_chunk, 528); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 19); - ASSERT_EQ(de->new_chunk, 534); + ASSERT_EQ(de->new_chunk, 529); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 20); - ASSERT_EQ(de->new_chunk, 535); + ASSERT_EQ(de->new_chunk, 530); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 21); - ASSERT_EQ(de->new_chunk, 537); + ASSERT_EQ(de->new_chunk, 532); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 22); - ASSERT_EQ(de->new_chunk, 538); + ASSERT_EQ(de->new_chunk, 533); offset += sizeof(struct disk_exception); de = reinterpret_cast((char*)buffer + offset); ASSERT_EQ(de->old_chunk, 23); - ASSERT_EQ(de->new_chunk, 539); + ASSERT_EQ(de->new_chunk, 534); offset += sizeof(struct disk_exception); // End of metadata @@ -909,6 +1092,43 @@ TEST(Snapuserd_Test, Snapshot_COPY_Overlap_Merge_Resume_TEST) { harness.Shutdown(); } +TEST(Snapuserd_Test, ReadDmUserBlockWithoutDaemon) { + CowSnapuserdTest harness; + harness.ReadDmUserBlockWithoutDaemon(); +} + +TEST(Snapuserd_Test, Snapshot_Merge_Crash_Fixed_Ordered) { + CowSnapuserdTest harness; + ASSERT_TRUE(harness.SetupOrderedOps()); + harness.MergeInterruptFixed(300); + harness.ValidateMerge(); + harness.Shutdown(); +} + +TEST(Snapuserd_Test, Snapshot_Merge_Crash_Random_Ordered) { + CowSnapuserdTest harness; + ASSERT_TRUE(harness.SetupOrderedOps()); + harness.MergeInterruptRandomly(500); + harness.ValidateMerge(); + harness.Shutdown(); +} + +TEST(Snapuserd_Test, Snapshot_Merge_Crash_Fixed_Inverted) { + CowSnapuserdTest harness; + ASSERT_TRUE(harness.SetupOrderedOpsInverted()); + harness.MergeInterruptFixed(50); + harness.ValidateMerge(); + harness.Shutdown(); +} + +TEST(Snapuserd_Test, Snapshot_Merge_Crash_Random_Inverted) { + CowSnapuserdTest harness; + ASSERT_TRUE(harness.SetupOrderedOpsInverted()); + harness.MergeInterruptRandomly(50); + harness.ValidateMerge(); + harness.Shutdown(); +} + } // namespace snapshot } // namespace android diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp index 03c2ef6c7..57a61a791 100644 --- a/fs_mgr/libsnapshot/snapuserd.cpp +++ b/fs_mgr/libsnapshot/snapuserd.cpp @@ -442,8 +442,9 @@ bool Snapuserd::ReadMetadata() { int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ); std::optional prev_id = {}; - std::map map; + std::vector vec; std::set dest_blocks; + std::set source_blocks; size_t pending_copy_ops = exceptions_per_area_ - num_ops; uint64_t total_copy_ops = reader_->total_copy_ops(); @@ -500,99 +501,45 @@ bool Snapuserd::ReadMetadata() { // scratch space and re-construct it thereby there // is no loss of data. // + // Note that we will follow the same order of COW operations + // as present in the COW file. This will make sure that\ + // the merge of operations are done based on the ops present + // in the file. //=========================================================== - // - // Case 2: - // - // Let's say we have three copy operations written to COW file - // in the following order: - // - // op-1: 15 -> 18 - // op-2: 16 -> 19 - // op-3: 17 -> 20 - // - // As aforementioned, kernel will initiate merge in reverse order. - // Hence, we will read these ops in reverse order so that all these - // ops are exectued in the same order as requested. Thus, we will - // read the metadata in reverse order and for the kernel it will - // look like: - // - // op-3: 17 -> 20 - // op-2: 16 -> 19 - // op-1: 15 -> 18 <-- Merge starts here in the kernel - // - // Now, this is problematic as kernel cannot batch merge them. - // - // Merge sequence will look like: - // - // Merge-1: op-1: 15 -> 18 - // Merge-2: op-2: 16 -> 19 - // Merge-3: op-3: 17 -> 20 - // - // We have three merge operations. - // - // Even though the blocks are contiguous, kernel can batch merge - // them if the blocks are in descending order. Update engine - // addresses this issue partially for overlapping operations as - // we see that op-1 to op-3 and op-4 to op-6 operatiosn are in - // descending order. However, if the copy operations are not - // overlapping, update engine cannot write these blocks - // in descending order. Hence, we will try to address it. - // Thus, we will send these blocks to the kernel and it will - // look like: - // - // op-3: 15 -> 18 - // op-2: 16 -> 19 - // op-1: 17 -> 20 <-- Merge starts here in the kernel - // - // Now with this change, we can batch merge all these three - // operations. Merge sequence will look like: - // - // Merge-1: {op-1: 17 -> 20, op-2: 16 -> 19, op-3: 15 -> 18} - // - // Note that we have changed the ordering of merge; However, this - // is ok as each of these copy operations are independent and there - // is no overlap. - // - //=================================================================== if (prev_id.has_value()) { - chunk_t diff = (cow_op->new_block > prev_id.value()) - ? (cow_op->new_block - prev_id.value()) - : (prev_id.value() - cow_op->new_block); - if (diff != 1) { - break; - } - - if (dest_blocks.count(cow_op->new_block) || map.count(cow_op->source) > 0) { + if (dest_blocks.count(cow_op->new_block) || source_blocks.count(cow_op->source)) { break; } } metadata_found = true; pending_copy_ops -= 1; - map[cow_op->new_block] = cow_op; + vec.push_back(cow_op); dest_blocks.insert(cow_op->source); + source_blocks.insert(cow_op->new_block); prev_id = cow_op->new_block; cowop_riter_->Next(); } while (!cowop_riter_->Done() && pending_copy_ops); data_chunk_id = GetNextAllocatableChunkId(data_chunk_id); - SNAP_LOG(DEBUG) << "Batch Merge copy-ops of size: " << map.size() + SNAP_LOG(DEBUG) << "Batch Merge copy-ops of size: " << vec.size() << " Area: " << vec_.size() << " Area offset: " << offset << " Pending-copy-ops in this area: " << pending_copy_ops; - for (auto it = map.begin(); it != map.end(); it++) { + for (size_t i = 0; i < vec.size(); i++) { struct disk_exception* de = reinterpret_cast((char*)de_ptr.get() + offset); - de->old_chunk = it->first; + const CowOperation* cow_op = vec[i]; + + de->old_chunk = cow_op->new_block; de->new_chunk = data_chunk_id; // Store operation pointer. - chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), it->second)); + chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op)); offset += sizeof(struct disk_exception); num_ops += 1; copy_ops++; if (read_ahead_feature_) { - read_ahead_ops_.push_back(it->second); + read_ahead_ops_.push_back(cow_op); } SNAP_LOG(DEBUG) << num_ops << ":" @@ -635,8 +582,9 @@ bool Snapuserd::ReadMetadata() { data_chunk_id = GetNextAllocatableChunkId(data_chunk_id); } } - map.clear(); + vec.clear(); dest_blocks.clear(); + source_blocks.clear(); prev_id.reset(); }