Adds observation term history support to Observation Manager (#1439)

jtigue-bdai · fyu-bdai · kellyguo11 · web-flow · commit f7b59b315530 · 2024-12-15T22:30:24.000-05:00
# Description  This PR adds observation history by adding configuration parameters to ObservationTerms and having the ObservationManager handling the collection and storage of the histories via CircularBuffers. Fixes #1208  ## Type of change  - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there  --------- Signed-off-by: Kelly Guo <kellyg@nvidia.com> Co-authored-by: Fangzhou Yu <156015326+fyu-bdai@users.noreply.github.com> Co-authored-by: Kelly Guo <kellyg@nvidia.com>
diff --git a/source/extensions/omni.isaac.lab/config/extension.toml b/source/extensions/omni.isaac.lab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.27.29"
+version = "0.28.0"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"
diff --git a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
@@ -1,6 +1,18 @@
 Changelog
 ---------
 
+0.28.0 (2024-12-15)
+~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added observation history computation to :class:`omni.isaac.lab.manager.observation_manager.ObservationManager`.
+* Added ``history_length`` and ``flatten_history_dim`` configuration parameters to :class:`omni.isaac.lab.manager.manager_term_cfg.ObservationTermCfg`
+* Added ``history_length`` and ``flatten_history_dim`` configuration parameters to :class:`omni.isaac.lab.manager.manager_term_cfg.ObservationGroupCfg`
+* Added full buffer property to :class:`omni.isaac.lab.utils.buffers.circular_buffer.CircularBuffer`
+
+
 0.27.29 (2024-12-15)
 ~~~~~~~~~~~~~~~~~~~~
 
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/managers/manager_term_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/managers/manager_term_cfg.py
@@ -180,6 +180,19 @@ class ObservationTermCfg(ManagerTermBaseCfg):
     please make sure the length of the tuple matches the dimensions of the tensor outputted from the term.
     """
 
+    history_length: int = 0
+    """Number of past observations to store in the observation buffers. Defaults to 0, meaning no history.
+
+    Observation history initializes to empty, but is filled with the first append after reset or initialization. Subsequent history
+    only adds a single entry to the history buffer. If flatten_history_dim is set to True, the source data of shape
+    (N, H, D, ...) where N is the batch dimension and H is the history length will be reshaped to a 2D tensor of shape
+    (N, H*D*...). Otherwise, the data will be returned as is.
+    """
+
+    flatten_history_dim: bool = True
+    """Whether or not the observation manager should flatten history-based observation terms to a 2D (N, D) tensor.
+    Defaults to True."""
+
 
 @configclass
 class ObservationGroupCfg:
@@ -201,6 +214,22 @@ class ObservationGroupCfg:
     Otherwise, no corruption is applied.
     """
 
+    history_length: int | None = None
+    """Number of past observation to store in the observation buffers for all observation terms in group.
+
+    This parameter will override :attr:`ObservationTermCfg.history_length` if set. Defaults to None. If None, each
+    terms history will be controlled on a per term basis. See :class:`ObservationTermCfg` for details on history_length
+    implementation.
+    """
+
+    flatten_history_dim: bool = True
+    """Flag to flatten history-based observation terms to a 2D (num_env, D) tensor for all observation terms in group.
+    Defaults to True.
+
+    This parameter will override all :attr:`ObservationTermCfg.flatten_history_dim` in the group if
+    ObservationGroupCfg.history_length is set.
+    """
+
 
 ##
 # Event manager
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/managers/observation_manager.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/managers/observation_manager.py
@@ -8,12 +8,14 @@
 from __future__ import annotations
 
 import inspect
+import numpy as np
 import torch
 from collections.abc import Sequence
 from prettytable import PrettyTable
 from typing import TYPE_CHECKING
 
 from omni.isaac.lab.utils import modifiers
+from omni.isaac.lab.utils.buffers import CircularBuffer
 
 from .manager_base import ManagerBase, ManagerTermBase
 from .manager_term_cfg import ObservationGroupCfg, ObservationTermCfg
@@ -45,6 +47,11 @@ class ObservationManager(ManagerBase):
         concatenated. In this case, please set the :attr:`ObservationGroupCfg.concatenate_terms` attribute in the
         group configuration to False.
 
+    Observations can also have history. This means a running history is updated per sim step. History can be controlled
+    per :class:`ObservationTermCfg` (See the :attr:`ObservationTermCfg.history_length` and
+    :attr:`ObservationTermCfg.flatten_history_dim`). History can also be controlled via :class:`ObservationGroupCfg`
+    where group configuration overwrites per term configuration if set. History follows an oldest to newest ordering.
+
     The observation manager can be used to compute observations for all the groups or for a specific group. The
     observations are computed by calling the registered functions for each term in the group. The functions are
     called in the order of the terms in the group. The functions are expected to return a tensor with shape
@@ -174,12 +181,17 @@ def group_obs_concatenate(self) -> dict[str, bool]:
 
     def reset(self, env_ids: Sequence[int] | None = None) -> dict[str, float]:
         # call all terms that are classes
-        for group_cfg in self._group_obs_class_term_cfgs.values():
+        for group_name, group_cfg in self._group_obs_class_term_cfgs.items():
             for term_cfg in group_cfg:
                 term_cfg.func.reset(env_ids=env_ids)
+            # reset terms with history
+            for term_name in self._group_obs_term_names[group_name]:
+                if term_name in self._group_obs_term_history_buffer[group_name]:
+                    self._group_obs_term_history_buffer[group_name][term_name].reset(batch_ids=env_ids)
         # call all modifiers that are classes
         for mod in self._group_obs_class_modifiers:
             mod.reset(env_ids=env_ids)
+
         # nothing to log here
         return {}
 
@@ -248,7 +260,7 @@ def compute_group(self, group_name: str) -> torch.Tensor | dict[str, torch.Tenso
         obs_terms = zip(group_term_names, self._group_obs_term_cfgs[group_name])
 
         # evaluate terms: compute, add noise, clip, scale, custom modifiers
-        for name, term_cfg in obs_terms:
+        for term_name, term_cfg in obs_terms:
             # compute term's value
             obs: torch.Tensor = term_cfg.func(self._env, **term_cfg.params).clone()
             # apply post-processing
@@ -261,8 +273,17 @@ def compute_group(self, group_name: str) -> torch.Tensor | dict[str, torch.Tenso
                 obs = obs.clip_(min=term_cfg.clip[0], max=term_cfg.clip[1])
             if term_cfg.scale is not None:
                 obs = obs.mul_(term_cfg.scale)
-            # add value to list
-            group_obs[name] = obs
+            # Update the history buffer if observation term has history enabled
+            if term_cfg.history_length > 0:
+                self._group_obs_term_history_buffer[group_name][term_name].append(obs)
+                if term_cfg.flatten_history_dim:
+                    group_obs[term_name] = self._group_obs_term_history_buffer[group_name][term_name].buffer.reshape(
+                        self._env.num_envs, -1
+                    )
+                else:
+                    group_obs[term_name] = self._group_obs_term_history_buffer[group_name][term_name].buffer
+            else:
+                group_obs[term_name] = obs
 
         # concatenate all observations in the group together
         if self._group_obs_concatenate[group_name]:
@@ -283,7 +304,7 @@ def _prepare_terms(self):
         self._group_obs_term_cfgs: dict[str, list[ObservationTermCfg]] = dict()
         self._group_obs_class_term_cfgs: dict[str, list[ObservationTermCfg]] = dict()
         self._group_obs_concatenate: dict[str, bool] = dict()
-
+        self._group_obs_term_history_buffer: dict[str, dict] = dict()
         # create a list to store modifiers that are classes
         # we store it as a separate list to only call reset on them and prevent unnecessary calls
         self._group_obs_class_modifiers: list[modifiers.ModifierBase] = list()
@@ -309,6 +330,7 @@ def _prepare_terms(self):
             self._group_obs_term_dim[group_name] = list()
             self._group_obs_term_cfgs[group_name] = list()
             self._group_obs_class_term_cfgs[group_name] = list()
+            group_entry_history_buffer: dict[str, CircularBuffer] = dict()
             # read common config for the group
             self._group_obs_concatenate[group_name] = group_cfg.concatenate_terms
             # check if config is dict already
@@ -319,7 +341,7 @@ def _prepare_terms(self):
             # iterate over all the terms in each group
             for term_name, term_cfg in group_cfg_items:
                 # skip non-obs settings
-                if term_name in ["enable_corruption", "concatenate_terms"]:
+                if term_name in ["enable_corruption", "concatenate_terms", "history_length", "flatten_history_dim"]:
                     continue
                 # check for non config
                 if term_cfg is None:
@@ -335,12 +357,26 @@ def _prepare_terms(self):
                 # check noise settings
                 if not group_cfg.enable_corruption:
                     term_cfg.noise = None
+                # check group history params and override terms
+                if group_cfg.history_length is not None:
+                    term_cfg.history_length = group_cfg.history_length
+                    term_cfg.flatten_history_dim = group_cfg.flatten_history_dim
                 # add term config to list to list
                 self._group_obs_term_names[group_name].append(term_name)
                 self._group_obs_term_cfgs[group_name].append(term_cfg)
-
                 # call function the first time to fill up dimensions
                 obs_dims = tuple(term_cfg.func(self._env, **term_cfg.params).shape)
+                # create history buffers and calculate history term dimensions
+                if term_cfg.history_length > 0:
+                    group_entry_history_buffer[term_name] = CircularBuffer(
+                        max_len=term_cfg.history_length, batch_size=self._env.num_envs, device=self._env.device
+                    )
+                    old_dims = list(obs_dims)
+                    old_dims.insert(1, term_cfg.history_length)
+                    obs_dims = tuple(old_dims)
+                    if term_cfg.flatten_history_dim:
+                        obs_dims = (obs_dims[0], np.prod(obs_dims[1:]))
+
                 self._group_obs_term_dim[group_name].append(obs_dims[1:])
 
                 # if scale is set, check if single float or tuple
@@ -411,3 +447,5 @@ def _prepare_terms(self):
                     self._group_obs_class_term_cfgs[group_name].append(term_cfg)
                     # call reset (in-case above call to get obs dims changed the state)
                     term_cfg.func.reset()
+            # add history buffers for each group
+            self._group_obs_term_history_buffer[group_name] = group_entry_history_buffer
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/utils/buffers/circular_buffer.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/utils/buffers/circular_buffer.py
@@ -75,6 +75,16 @@ def current_length(self) -> torch.Tensor:
         """
         return torch.minimum(self._num_pushes, self._max_len)
 
+    @property
+    def buffer(self) -> torch.Tensor:
+        """Complete circular buffer with most recent entry at the end and oldest entry at the beginning.
+        Returns:
+            Complete circular buffer with most recent entry at the end and oldest entry at the beginning of dimension 1. The shape is [batch_size, max_length, data.shape[1:]].
+        """
+        buf = self._buffer.clone()
+        buf = torch.roll(buf, shifts=self.max_length - self._pointer - 1, dims=0)
+        return torch.transpose(buf, dim0=0, dim1=1)
+
     """
     Operations.
     """
@@ -89,8 +99,10 @@ def reset(self, batch_ids: Sequence[int] | None = None):
         if batch_ids is None:
             batch_ids = slice(None)
         # reset the number of pushes for the specified batch indices
-        # note: we don't need to reset the buffer since it will be overwritten. The pointer handles this.
         self._num_pushes[batch_ids] = 0
+        if self._buffer is not None:
+            # set buffer at batch_id reset indices to 0.0 so that the buffer() getter returns the cleared circular buffer after reset.
+            self._buffer[:, batch_ids, :] = 0.0
 
     def append(self, data: torch.Tensor):
         """Append the data to the circular buffer.
@@ -106,15 +118,20 @@ def append(self, data: torch.Tensor):
         if data.shape[0] != self.batch_size:
             raise ValueError(f"The input data has {data.shape[0]} environments while expecting {self.batch_size}")
 
-        # at the fist call, initialize the buffer
+        # at the first call, initialize the buffer size
         if self._buffer is None:
             self._pointer = -1
             self._buffer = torch.empty((self.max_length, *data.shape), dtype=data.dtype, device=self._device)
         # move the head to the next slot
         self._pointer = (self._pointer + 1) % self.max_length
         # add the new data to the last layer
         self._buffer[self._pointer] = data.to(self._device)
-        # increment number of number of pushes
+        # Check for batches with zero pushes and initialize all values in batch to first append
+        if 0 in self._num_pushes.tolist():
+            fill_ids = [i for i, x in enumerate(self._num_pushes.tolist()) if x == 0]
+            self._num_pushes.tolist().index(0) if 0 in self._num_pushes.tolist() else None
+            self._buffer[:, fill_ids, :] = data.to(self._device)[fill_ids]
+        # increment number of number of pushes for all batches
         self._num_pushes += 1
 
     def __getitem__(self, key: torch.Tensor) -> torch.Tensor:
diff --git a/source/extensions/omni.isaac.lab/test/managers/test_observation_manager.py b/source/extensions/omni.isaac.lab/test/managers/test_observation_manager.py
diff --git a/source/extensions/omni.isaac.lab/test/utils/test_circular_buffer.py b/source/extensions/omni.isaac.lab/test/utils/test_circular_buffer.py