doxygen-c/html/dataloader_2base_8h_source.html

 #pragma once

 #include <torch/data/dataloader_options.h>
 #include <torch/data/detail/data_shuttle.h>
 #include <torch/data/detail/sequencers.h>
 #include <torch/data/iterator.h>
 #include <torch/data/samplers/random.h>
 #include <torch/data/worker_exception.h>
 #include <torch/types.h>

 #include <torch/csrc/utils/memory.h>
 #include <torch/csrc/utils/variadic.h>

 #include <c10/util/Exception.h>

 #include <cstddef>
 #include <exception>
 #include <memory>
 #include <thread>
 #include <type_traits>
 #include <utility>
 #include <vector>

 namespace torch {
 namespace data {
 template <typename Dataset, typename Batch, typename BatchRequest>
 class DataLoaderBase {
  public:
   using BatchType = Batch;
   using BatchRequestType = BatchRequest;

   DataLoaderBase(
       DataLoaderOptions options,
       std::unique_ptr<Dataset> main_thread_dataset = nullptr)
       : options_(std::move(options)),
         main_thread_dataset_(std::move(main_thread_dataset)),
         sequencer_(new_sequencer()) {}

   virtual ~DataLoaderBase() {
     join();
   }

   Iterator<Batch> begin() {
     AT_CHECK(
         shuttle_.in_flight_jobs() == 0,
         "Attempted to get a new DataLoader iterator "
         "while another iterator is not yet exhausted");
     reset();
     return Iterator<Batch>(torch::make_unique<detail::ValidIterator<Batch>>(
         [this] { return this->next(); }));
   }

   Iterator<Batch> end() {
     return Iterator<Batch>(
         torch::make_unique<detail::SentinelIterator<Batch>>());
   }

   void join() {
     if (joined_) {
       return;
     }
     shuttle_.drain();
     // Send one 'quit' message per worker. Since a worker dies (exits its
     // thread) after receiving this message, each `QuitWorker()` message will be
     // read by exactly one worker.
     for (size_t w = 0; w < options_.workers; ++w) {
       push_job(QuitWorker());
     }
     for (auto& worker : workers_) {
       worker.join();
     }
     joined_ = true;
   }

   const FullDataLoaderOptions& options() const noexcept {
     return options_;
   }

  protected:
   struct Sequenced {
     Sequenced() = default;
     Sequenced(size_t sqn) : sequence_number(sqn) {}
     size_t sequence_number;
   };

   struct QuitWorker {};

   struct Job : Sequenced {
     Job() = default;
     Job(QuitWorker q, size_t sqn) : Sequenced(sqn), quit(q) {}
     Job(BatchRequest&& i, size_t sqn)
         : Sequenced(sqn), batch_request(std::move(i)) {}
     optional<QuitWorker> quit;
     optional<BatchRequest> batch_request;
   };

   struct Result : Sequenced {
     Result() = default;
     Result(optional<Batch>&& b, size_t sqn)
         : Sequenced(sqn), batch(std::move(b)) {}
     Result(std::exception_ptr exception, size_t sqn)
         : Sequenced(sqn), exception(std::move(exception)) {}
     optional<Batch> batch;
     std::exception_ptr exception;
   };

   virtual optional<BatchRequestType> get_batch_request() = 0;

   virtual void reset() {
     shuttle_.drain();
     sequence_number_ = 0;
     sequencer_ = new_sequencer();
     prefetch();
   }

   void prefetch(size_t requested_jobs) {
     for (size_t r = 0; r < requested_jobs; ++r) {
       if (auto batch_request = get_batch_request()) {
         this->push_job(std::move(*batch_request));
       } else {
         break;
       }
     }
   }

   void prefetch() {
     prefetch(options_.max_jobs);
   }

   optional<BatchType> next() {
     if (options_.workers > 0) {
       while (optional<Result> result = this->pop_result()) {
         if (result->exception) {
           throw WorkerException(result->exception);
         } else if (result->batch) {
           prefetch(1);
           return std::move(result->batch);
         }
       }
     } else if (auto batch_request = get_batch_request()) {
       return this->main_thread_dataset_->get_batch(std::move(*batch_request));
     }
     return nullopt;
   }

   void worker_thread(Dataset& dataset) {
     while (true) {
       auto job = shuttle_.pop_job();
       if (job.quit) {
         break;
       }
       try {
         auto batch = dataset.get_batch(std::move(*job.batch_request));
         shuttle_.push_result({std::move(batch), job.sequence_number});
       } catch (...) {
         shuttle_.push_result({std::current_exception(), job.sequence_number});
       }
     }
   }

   template <typename T>
   void push_job(T value) {
     shuttle_.push_job({std::move(value), sequence_number_++});
   }

   optional<Result> pop_result() {
     return sequencer_->next(
         [this] { return this->shuttle_.pop_result(this->options_.timeout); });
   }

   std::unique_ptr<detail::sequencers::Sequencer<Result>> new_sequencer() {
     if (options_.enforce_ordering) {
       return torch::make_unique<detail::sequencers::OrderedSequencer<Result>>(
           options_.max_jobs);
     }
     return torch::make_unique<detail::sequencers::NoSequencer<Result>>();
   }

   const FullDataLoaderOptions options_;

   std::unique_ptr<Dataset> main_thread_dataset_;

   size_t sequence_number_ = 0;

   std::vector<std::thread> workers_;

   detail::DataShuttle<Job, Result> shuttle_;

   std::unique_ptr<detail::sequencers::Sequencer<Result>> sequencer_;

   bool joined_ = false;
 };
 } // namespace data
 } // namespace torch
torch::data::DataLoaderBase::prefetch
void prefetch()
Schedules the maximum number of jobs (based on the max_jobs option).
Definition: base.h:158

torch::data::DataLoaderBase::join
void join()
Joins the DataLoader&#39;s worker threads and drains internal queues.
Definition: base.h:77

torch::data::DataLoaderBase::sequence_number_
size_t sequence_number_
The sequence number for the next batch to be retrieved from the dataset.
Definition: base.h:231

torch::data::FullDataLoaderOptions
Like DataLoaderOptions, but without any unconfigured state.
Definition: dataloader_options.h:48

torch::data::DataLoaderBase::options
const FullDataLoaderOptions & options() const noexcept
Returns the options with which the DataLoader was configured.
Definition: base.h:95

T
Definition: dataloader.cpp:482

std
Definition: interned_strings.h:312

torch::data::DataLoaderBase::joined_
bool joined_
True if the DataLoader has joined its worker threads.
Definition: base.h:243

torch::data::DataLoaderBase::options_
const FullDataLoaderOptions options_
The options the DataLoader was configured with.
Definition: base.h:221

torch::data::Iterator
Definition: iterator.h:132

torch::data::DataLoaderBase::Sequenced
Simple mix-in to give something a sequence number.
Definition: base.h:101

torch::data::DataLoaderBase::Result
The finished result of a job.
Definition: base.h:121

torch::data::DataLoaderBase::get_batch_request
virtual optional< BatchRequestType > get_batch_request()=0
Subclass hook for getting the next batch request.

torch::data::DataLoaderOptions
Options to configure a DataLoader.
Definition: dataloader_options.h:13

torch::data::DataLoaderBase::end
Iterator< Batch > end()
Returns a special "sentinel" iterator that compares equal with a non-sentinel iterator once the DataL...
Definition: base.h:69

torch::data::DataLoaderBase::DataLoaderBase
DataLoaderBase(DataLoaderOptions options, std::unique_ptr< Dataset > main_thread_dataset=nullptr)
Constructs a new DataLoader from a dataset to sample from, options to configure the DataLoader with...
Definition: base.h:35

torch::data::detail::DataShuttle
Encapsulates the full life cycle of DataLoader jobs.
Definition: data_shuttle.h:26

c10::optional
Definition: Optional.h:118

torch::data::DataLoaderBase::reset
virtual void reset()
Resets the internal state of the DataLoader, optionally pre-fetching new jobs.
Definition: base.h:138

torch
Definition: jit_type.h:17

torch::data::DataLoaderBase::prefetch
void prefetch(size_t requested_jobs)
Schedules requested_jobs many new batches to be fetched.
Definition: base.h:147

torch::data::DataLoaderBase::pop_result
optional< Result > pop_result()
Convenience method that gets the next result from the sequencer.
Definition: base.h:205

torch::data::WorkerException
An exception thrown when a DataLoader&#39;s worker thread throws an exception, which is caught...
Definition: worker_exception.h:13

torch::data::DataLoaderBase::shuttle_
detail::DataShuttle< Job, Result > shuttle_
The DataShuttle which takes care of the life cycle of a job.
Definition: base.h:237

torch::data::DataLoaderBase::workers_
std::vector< std::thread > workers_
The worker threads, running the worker_thread() method.
Definition: base.h:234

torch::data::DataLoaderBase::sequencer_
std::unique_ptr< detail::sequencers::Sequencer< Result > > sequencer_
The Sequencer, which handles optional ordering of batches.
Definition: base.h:240

torch::data::DataLoaderBase
Definition: base.h:27

torch::data::DataLoaderBase::begin
Iterator< Batch > begin()
Returns an iterator into the DataLoader.
Definition: base.h:57

torch::data::DataLoaderBase::push_job
void push_job(T value)
Convenience method that calls shuttle_.push_job() with the next sequence number.
Definition: base.h:200

torch::data::DataLoaderBase::next
optional< BatchType > next()
Returns the next batch of data, or an empty optional if the DataLoader is exhausted.
Definition: base.h:165

torch::data::DataLoaderBase::QuitWorker
Definition: base.h:107

torch::data::DataLoaderBase::new_sequencer
std::unique_ptr< detail::sequencers::Sequencer< Result > > new_sequencer()
Convenience method that creates a new sequencer based on the enforce_ordering option.
Definition: base.h:212

torch::data::DataLoaderBase::Job
A Job is either a BatchRequest (new indices to fetch data at) or a QuitWorker object, to indicate the worker should shut down.
Definition: base.h:111

torch::data::DataLoaderBase::main_thread_dataset_
std::unique_ptr< Dataset > main_thread_dataset_
The dataset for the main thread, only has a value if the number of worker threads was configured as z...
Definition: base.h:227

torch::data::DataLoaderBase::worker_thread
void worker_thread(Dataset &dataset)
The function that worker threads run.
Definition: base.h:182