project1/datasets/data_prefetcher_multi.py

82 lines
3.7 KiB
Python

# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
import torch
# def to_cuda(samples, targets, device):
# samples = samples.to(device, non_blocking=True)
# targets = [{k: v.to(device, non_blocking=True) for k, v in t.items()} for t in targets]
# return samples, targets
def to_cuda(samples, ref_samples, targets, device):
ref_samples = [ref_sample.to(device, non_blocking=True) for ref_sample in ref_samples]
samples = samples.to(device, non_blocking=True)
targets = [{k: v.to(device, non_blocking=True) for k, v in t.items()} for t in targets]
return samples, ref_samples, targets
class data_prefetcher():
def __init__(self, loader, device, prefetch=True):
self.loader = iter(loader)
self.prefetch = prefetch
self.device = device
if prefetch:
self.stream = torch.cuda.Stream()
self.preload()
def preload(self):
try:
self.next_samples, self.next_ref_samples, self.next_targets = next(self.loader)
except StopIteration:
self.next_samples = None
self.next_targets = None
self.next_ref_samples =None
return
# if record_stream() doesn't work, another option is to make sure device inputs are created
# on the main stream.
# self.next_input_gpu = torch.empty_like(self.next_input, device='cuda')
# self.next_target_gpu = torch.empty_like(self.next_target, device='cuda')
# Need to make sure the memory allocated for next_* is not still in use by the main stream
# at the time we start copying to next_*:
# self.stream.wait_stream(torch.cuda.current_stream())
with torch.cuda.stream(self.stream):
self.next_samples, self.next_ref_samples, self.next_targets = to_cuda(self.next_samples, self.next_ref_samples, self.next_targets, self.device)
# more code for the alternative if record_stream() doesn't work:
# copy_ will record the use of the pinned source tensor in this side stream.
# self.next_input_gpu.copy_(self.next_input, non_blocking=True)
# self.next_target_gpu.copy_(self.next_target, non_blocking=True)
# self.next_input = self.next_input_gpu
# self.next_target = self.next_target_gpu
# With Amp, it isn't necessary to manually convert data to half.
# if args.fp16:
# self.next_input = self.next_input.half()
# else:
def next(self):
if self.prefetch:
torch.cuda.current_stream().wait_stream(self.stream)
samples = self.next_samples
targets = self.next_targets
if samples is not None:
samples.record_stream(torch.cuda.current_stream())
if targets is not None:
for t in targets:
for k, v in t.items():
v.record_stream(torch.cuda.current_stream())
self.preload()
else:
try:
# nested tensor, list[tensor]
samples, ref_samples, targets = next(self.loader)
assert ref_samples is None, [type(ref_samples[0]), type(ref_samples), type(samples), len(ref_samples), "ref_samples", ref_samples]
samples, ref_samples, targets = to_cuda(samples, ref_samples, targets, self.device)
except StopIteration:
samples = None
targets = None
ref_samples = None
assert True, "wwww"
return samples, targets