libsnapshot: No transition of snapuserd during second stage init

When there is a transition of daemon from selinux stage, we observe
intermittent hangs during OTA. This is a workaround wherein
we don't do the transition and allow the daemon to continue which
was spawned during selinux stage.

Bug: 179331261
Test: Incremental OTA, full OTA on pixel
Signed-off-by: Akilesh Kailash <akailash@google.com>
Change-Id: I622a0ed8afcd404bac4919b1de00728de2c12eaf
This commit is contained in:
Akilesh Kailash 2021-02-09 07:42:48 +00:00
parent cb9e1decf0
commit 03e803455e
5 changed files with 38 additions and 50 deletions

View File

@ -1234,6 +1234,25 @@ bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::strin
return true;
}
static bool DeleteDmDevice(const std::string& name, const std::chrono::milliseconds& timeout_ms) {
auto start = std::chrono::steady_clock::now();
auto& dm = DeviceMapper::Instance();
while (true) {
if (dm.DeleteDeviceIfExists(name)) {
break;
}
auto now = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
if (elapsed >= timeout_ms) {
LOG(ERROR) << "DeleteDevice timeout: " << name;
return false;
}
std::this_thread::sleep_for(250ms);
}
return true;
}
bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
const SnapshotStatus& status) {
auto& dm = DeviceMapper::Instance();
@ -1292,10 +1311,11 @@ bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
if (!dm.DeleteDeviceIfExists(base_name)) {
LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
}
auto source_name = GetSourceDeviceName(name);
if (!dm.DeleteDeviceIfExists(source_name)) {
LOG(ERROR) << "Unable to delete source device for snapshot: " << source_name;
if (!DeleteDmDevice(GetSourceDeviceName(name), 4000ms)) {
LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
}
return true;
}
@ -1387,9 +1407,6 @@ bool SnapshotManager::PerformInitTransition(InitTransition transition,
}
auto misc_name = user_cow_name;
if (transition == InitTransition::SELINUX_DETACH) {
misc_name += "-selinux";
}
DmTable table;
table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
@ -2122,15 +2139,12 @@ bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name)
CHECK(lock);
if (!EnsureImageManager()) return false;
auto& dm = DeviceMapper::Instance();
if (UpdateUsesCompression(lock) && !UnmapDmUserDevice(name)) {
return false;
}
auto cow_name = GetCowName(name);
if (!dm.DeleteDeviceIfExists(cow_name)) {
LOG(ERROR) << "Cannot unmap " << cow_name;
if (!DeleteDmDevice(GetCowName(name), 4000ms)) {
LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
return false;
}
@ -2155,12 +2169,11 @@ bool SnapshotManager::UnmapDmUserDevice(const std::string& snapshot_name) {
return false;
}
if (!EnsureSnapuserdConnected()) {
return false;
}
if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
return false;
if (EnsureSnapuserdConnected()) {
if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
return false;
}
}
// Ensure the control device is gone so we don't run into ABA problems.

View File

@ -534,7 +534,7 @@ bool Snapuserd::ReadMetadata() {
bool prev_copy_op = false;
bool metadata_found = false;
SNAP_LOG(DEBUG) << "ReadMetadata Start...";
SNAP_LOG(DEBUG) << "ReadMetadata: Parsing cow file";
if (!reader_->Parse(cow_fd_)) {
SNAP_LOG(ERROR) << "Failed to parse";

View File

@ -70,6 +70,11 @@ class Snapuserd final {
const std::string& GetMiscName() { return misc_name_; }
uint64_t GetNumSectors() { return num_sectors_; }
bool IsAttached() const { return ctrl_fd_ >= 0; }
void CloseFds() {
ctrl_fd_ = {};
cow_fd_ = {};
backing_store_fd_ = {};
}
private:
bool DmuserReadRequest();

View File

@ -210,6 +210,8 @@ void SnapuserdServer::RunThread(std::shared_ptr<DmUserHandler> handler) {
}
}
handler->snapuserd()->CloseFds();
auto misc_name = handler->misc_name();
LOG(INFO) << "Handler thread about to exit: " << misc_name;

View File

@ -723,37 +723,6 @@ void SendLoadPersistentPropertiesMessage() {
}
}
static Result<void> TransitionSnapuserdAction(const BuiltinArguments&) {
if (!SnapshotManager::IsSnapshotManagerNeeded() ||
!android::base::GetBoolProperty(android::snapshot::kVirtualAbCompressionProp, false)) {
return {};
}
auto sm = SnapshotManager::New();
if (!sm) {
LOG(FATAL) << "Failed to create SnapshotManager, will not transition snapuserd";
return {};
}
ServiceList& service_list = ServiceList::GetInstance();
auto svc = service_list.FindService("snapuserd");
if (!svc) {
LOG(FATAL) << "Failed to find snapuserd service, aborting transition";
return {};
}
svc->Start();
svc->SetShutdownCritical();
if (!sm->PerformSecondStageInitTransition()) {
LOG(FATAL) << "Failed to transition snapuserd to second-stage";
}
if (auto pid = GetSnapuserdFirstStagePid()) {
KillFirstStageSnapuserd(pid.value());
}
return {};
}
int SecondStageMain(int argc, char** argv) {
if (REBOOT_BOOTLOADER_ON_PANIC) {
InstallRebootSignalHandlers();
@ -900,7 +869,6 @@ int SecondStageMain(int argc, char** argv) {
// Queue an action that waits for coldboot done so we know ueventd has set up all of /dev...
am.QueueBuiltinAction(wait_for_coldboot_done_action, "wait_for_coldboot_done");
am.QueueBuiltinAction(TransitionSnapuserdAction, "TransitionSnapuserd");
// ... so that we can start queuing up actions that require stuff from /dev.
am.QueueBuiltinAction(SetMmapRndBitsAction, "SetMmapRndBits");
Keychords keychords;