Source code for ray.rllib.offline.input_reader
from abc import ABCMeta, abstractmethod
import logging
import numpy as np
import threading
from ray.rllib.policy.sample_batch import MultiAgentBatch
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.framework import try_import_tf
from typing import Dict, List
from ray.rllib.utils.typing import TensorType, SampleBatchType
tf1, tf, tfv = try_import_tf()
logger = logging.getLogger(__name__)
[docs]
@PublicAPI
class InputReader(metaclass=ABCMeta):
"""API for collecting and returning experiences during policy evaluation."""
[docs]
@abstractmethod
@PublicAPI
def next(self) -> SampleBatchType:
"""Returns the next batch of read experiences.
Returns:
The experience read (SampleBatch or MultiAgentBatch).
"""
raise NotImplementedError
[docs]
@PublicAPI
def tf_input_ops(self, queue_size: int = 1) -> Dict[str, TensorType]:
"""Returns TensorFlow queue ops for reading inputs from this reader.
The main use of these ops is for integration into custom model losses.
For example, you can use tf_input_ops() to read from files of external
experiences to add an imitation learning loss to your model.
This method creates a queue runner thread that will call next() on this
reader repeatedly to feed the TensorFlow queue.
Args:
queue_size: Max elements to allow in the TF queue.
.. testcode::
:skipif: True
from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.offline.json_reader import JsonReader
imitation_loss = ...
class MyModel(ModelV2):
def custom_loss(self, policy_loss, loss_inputs):
reader = JsonReader(...)
input_ops = reader.tf_input_ops()
logits, _ = self._build_layers_v2(
{"obs": input_ops["obs"]},
self.num_outputs, self.options)
il_loss = imitation_loss(logits, input_ops["action"])
return policy_loss + il_loss
You can find a runnable version of this in examples/custom_loss.py.
Returns:
Dict of Tensors, one for each column of the read SampleBatch.
"""
if hasattr(self, "_queue_runner"):
raise ValueError(
"A queue runner already exists for this input reader. "
"You can only call tf_input_ops() once per reader."
)
logger.info("Reading initial batch of data from input reader.")
batch = self.next()
if isinstance(batch, MultiAgentBatch):
raise NotImplementedError(
"tf_input_ops() is not implemented for multi agent batches"
)
# Note on casting to `np.array(batch[k])`: In order to get all keys that
# are numbers, we need to convert to numpy everything that is not a numpy array.
# This is because SampleBatches used to only hold numpy arrays, but since our
# RNN efforts under RLModules, we also allow lists.
keys = [
k
for k in sorted(batch.keys())
if np.issubdtype(np.array(batch[k]).dtype, np.number)
]
dtypes = [batch[k].dtype for k in keys]
shapes = {k: (-1,) + s[1:] for (k, s) in [(k, batch[k].shape) for k in keys]}
queue = tf1.FIFOQueue(capacity=queue_size, dtypes=dtypes, names=keys)
tensors = queue.dequeue()
logger.info("Creating TF queue runner for {}".format(self))
self._queue_runner = _QueueRunner(self, queue, keys, dtypes)
self._queue_runner.enqueue(batch)
self._queue_runner.start()
out = {k: tf.reshape(t, shapes[k]) for k, t in tensors.items()}
return out
class _QueueRunner(threading.Thread):
"""Thread that feeds a TF queue from a InputReader."""
def __init__(
self,
input_reader: InputReader,
queue: "tf1.FIFOQueue",
keys: List[str],
dtypes: "tf.dtypes.DType",
):
threading.Thread.__init__(self)
self.sess = tf1.get_default_session()
self.daemon = True
self.input_reader = input_reader
self.keys = keys
self.queue = queue
self.placeholders = [tf1.placeholder(dtype) for dtype in dtypes]
self.enqueue_op = queue.enqueue(dict(zip(keys, self.placeholders)))
def enqueue(self, batch: SampleBatchType):
data = {self.placeholders[i]: batch[key] for i, key in enumerate(self.keys)}
self.sess.run(self.enqueue_op, feed_dict=data)
def run(self):
while True:
try:
batch = self.input_reader.next()
self.enqueue(batch)
except Exception:
logger.exception("Error reading from input")