Source code for ray.rllib.offline.input_reader

from abc import ABCMeta, abstractmethod
import logging
import numpy as np
import threading

from ray.rllib.policy.sample_batch import MultiAgentBatch
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.framework import try_import_tf
from typing import Dict, List
from ray.rllib.utils.typing import TensorType, SampleBatchType

tf1, tf, tfv = try_import_tf()

logger = logging.getLogger(__name__)


[docs]@PublicAPI class InputReader(metaclass=ABCMeta): """API for collecting and returning experiences during policy evaluation."""
[docs] @abstractmethod @PublicAPI def next(self) -> SampleBatchType: """Returns the next batch of read experiences. Returns: The experience read (SampleBatch or MultiAgentBatch). """ raise NotImplementedError
[docs] @PublicAPI def tf_input_ops(self, queue_size: int = 1) -> Dict[str, TensorType]: """Returns TensorFlow queue ops for reading inputs from this reader. The main use of these ops is for integration into custom model losses. For example, you can use tf_input_ops() to read from files of external experiences to add an imitation learning loss to your model. This method creates a queue runner thread that will call next() on this reader repeatedly to feed the TensorFlow queue. Args: queue_size: Max elements to allow in the TF queue. .. testcode:: :skipif: True from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.offline.json_reader import JsonReader imitation_loss = ... class MyModel(ModelV2): def custom_loss(self, policy_loss, loss_inputs): reader = JsonReader(...) input_ops = reader.tf_input_ops() logits, _ = self._build_layers_v2( {"obs": input_ops["obs"]}, self.num_outputs, self.options) il_loss = imitation_loss(logits, input_ops["action"]) return policy_loss + il_loss You can find a runnable version of this in examples/custom_loss.py. Returns: Dict of Tensors, one for each column of the read SampleBatch. """ if hasattr(self, "_queue_runner"): raise ValueError( "A queue runner already exists for this input reader. " "You can only call tf_input_ops() once per reader." ) logger.info("Reading initial batch of data from input reader.") batch = self.next() if isinstance(batch, MultiAgentBatch): raise NotImplementedError( "tf_input_ops() is not implemented for multi agent batches" ) # Note on casting to `np.array(batch[k])`: In order to get all keys that # are numbers, we need to convert to numpy everything that is not a numpy array. # This is because SampleBatches used to only hold numpy arrays, but since our # RNN efforts under RLModules, we also allow lists. keys = [ k for k in sorted(batch.keys()) if np.issubdtype(np.array(batch[k]).dtype, np.number) ] dtypes = [batch[k].dtype for k in keys] shapes = {k: (-1,) + s[1:] for (k, s) in [(k, batch[k].shape) for k in keys]} queue = tf1.FIFOQueue(capacity=queue_size, dtypes=dtypes, names=keys) tensors = queue.dequeue() logger.info("Creating TF queue runner for {}".format(self)) self._queue_runner = _QueueRunner(self, queue, keys, dtypes) self._queue_runner.enqueue(batch) self._queue_runner.start() out = {k: tf.reshape(t, shapes[k]) for k, t in tensors.items()} return out
class _QueueRunner(threading.Thread): """Thread that feeds a TF queue from a InputReader.""" def __init__( self, input_reader: InputReader, queue: "tf1.FIFOQueue", keys: List[str], dtypes: "tf.dtypes.DType", ): threading.Thread.__init__(self) self.sess = tf1.get_default_session() self.daemon = True self.input_reader = input_reader self.keys = keys self.queue = queue self.placeholders = [tf1.placeholder(dtype) for dtype in dtypes] self.enqueue_op = queue.enqueue(dict(zip(keys, self.placeholders))) def enqueue(self, batch: SampleBatchType): data = {self.placeholders[i]: batch[key] for i, key in enumerate(self.keys)} self.sess.run(self.enqueue_op, feed_dict=data) def run(self): while True: try: batch = self.input_reader.next() self.enqueue(batch) except Exception: logger.exception("Error reading from input")