diff --git a/examples/configs/locobot_p2p_nav_house.yaml b/examples/configs/locobot_p2p_nav_house.yaml
new file mode 100644
index 000000000..97b43a3b1
--- /dev/null
+++ b/examples/configs/locobot_p2p_nav_house.yaml
@@ -0,0 +1,63 @@
+# scene
+scene: building
+model_id: Ohoopee
+build_graph: true
+load_texture: true
+trav_map_erosion: 2
+
+# robot
+robot: Locobot
+velocity: 0.1
+
+# task, observation and action
+task: pointgoal # pointgoal|objectgoal|areagoal|reaching
+fisheye: false
+
+is_discrete: false
+additional_states_dim: 3
+
+# reward
+reward_type: geodesic
+success_reward: 10.0
+slack_reward: -0.01
+potential_reward_weight: 1.0
+collision_reward_weight: -0.1
+collision_ignore_link_a_ids: [1, 2, 3, 4]  # ignore collision with these agent's link ids
+
+# discount factor
+discount_factor: 0.99
+
+# termination condition
+death_z_thresh: 0.2
+dist_tol: 0.333  # body width
+max_step: 500
+
+# sensor
+output: [sensor, rgb, depth]
+resolution: 256
+fov: 90
+
+# display
+use_filler: true
+display_ui: false
+show_diagnostics: false
+ui_num: 2
+ui_components: [RGB_FILLED, DEPTH]
+random:
+  random_initial_pose: false
+  random_target_pose: false
+  random_init_x_range: [-0.1, 0.1]
+  random_init_y_range: [-0.1, 0.1]
+  random_init_z_range: [-0.1, 0.1]
+  random_init_rot_range: [-0.1, 0.1]
+
+speed:
+  timestep: 0.001
+  frameskip: 10
+
+mode: web_ui #gui|headless
+verbose: false
+fast_lq_render: true
+
+# debug
+debug: false
diff --git a/gibson2/core/physics/interactive_objects.py b/gibson2/core/physics/interactive_objects.py
index ef5134132..83ef92770 100644
--- a/gibson2/core/physics/interactive_objects.py
+++ b/gibson2/core/physics/interactive_objects.py
@@ -208,7 +208,7 @@ class InteractiveObj(object):
         org_pos, org_orn = p.getBasePositionAndOrientation(self.body_id)
         p.resetBasePositionAndOrientation(self.body_id, pos, org_orn)
 
-    def set_position_rotation(self, pos, orn):
+    def set_position_orientation(self, pos, orn):
         p.resetBasePositionAndOrientation(self.body_id, pos, orn)
 
 
diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py
index 835f28368..a99c566f5 100644
--- a/gibson2/core/physics/robot_locomotors.py
+++ b/gibson2/core/physics/robot_locomotors.py
@@ -95,6 +95,10 @@ class LocomotorRobot(BaseRobot):
     def set_orientation(self, orn):
         self.robot_body.set_orientation(orn)
 
+    def set_position_orientation(self, pos, orn):
+        self.robot_body.reset_position(pos)
+        self.robot_body.set_orientation(orn)
+
     def move_by(self, delta):
         new_pos = np.array(delta) + self.get_position()
         self.robot_body.reset_position(new_pos)
@@ -117,6 +121,10 @@ class LocomotorRobot(BaseRobot):
         new_orn = qmult((euler2quat(delta, 0, 0)), orn)
         self.robot_body.set_orientation(new_orn)
 
+    def keep_still(self):
+        for n, j in enumerate(self.ordered_joints):
+            j.set_motor_velocity(0.0)
+
     def get_rpy(self):
         return self.robot_body.get_rpy()
 
@@ -320,8 +328,8 @@ class Husky(LocomotorRobot):
         self.config = config
         self.torque = config.get("torque", 0.03)
         LocomotorRobot.__init__(self,
-                            "husky.urdf",
-                            "base_link",
+                                "husky.urdf",
+                                "base_link",
                                 action_dim=4,
                                 sensor_dim=17,
                                 power=2.5,
@@ -382,8 +390,8 @@ class Quadrotor(LocomotorRobot):
         self.config = config
         self.torque = config.get("torque", 0.02)
         LocomotorRobot.__init__(self,
-                            "quadrotor.urdf",
-                            "base_link",
+                                "quadrotor.urdf",
+                                "base_link",
                                 action_dim=6,
                                 sensor_dim=6,
                                 power=2.5,
@@ -436,8 +444,8 @@ class Turtlebot(LocomotorRobot):
         self.action_high = config.get("action_high", None)
         self.action_low = config.get("action_low", None)
         LocomotorRobot.__init__(self,
-                            "turtlebot/turtlebot.urdf",
-                            "base_link",
+                                "turtlebot/turtlebot.urdf",
+                                "base_link",
                                 action_dim=2,
                                 sensor_dim=16,
                                 power=2.5,
@@ -490,8 +498,8 @@ class Freight(LocomotorRobot):
         self.config = config
         self.velocity = config.get("velocity", 1.0)
         LocomotorRobot.__init__(self,
-                            "fetch/freight.urdf",
-                            "base_link",
+                                "fetch/freight.urdf",
+                                "base_link",
                                 action_dim=2,
                                 sensor_dim=16,
                                 power=2.5,
@@ -542,8 +550,8 @@ class Fetch(LocomotorRobot):
         self.action_high = config.get("action_high", None)
         self.action_low = config.get("action_low", None)
         LocomotorRobot.__init__(self,
-                            "fetch/fetch.urdf",
-                            "base_link",
+                                "fetch/fetch.urdf",
+                                "base_link",
                                 action_dim=6,
                                 sensor_dim=55,
                                 power=2.5,
@@ -595,8 +603,8 @@ class JR2(LocomotorRobot):
         self.config = config
         self.velocity = config.get('velocity', 0.1)
         LocomotorRobot.__init__(self,
-                            "jr2_urdf/jr2.urdf",
-                            "base_link",
+                                "jr2_urdf/jr2.urdf",
+                                "base_link",
                                 action_dim=4,
                                 sensor_dim=17,
                                 power=2.5,
@@ -646,8 +654,8 @@ class JR2_Kinova(LocomotorRobot):
         self.arm_dim = 5
 
         LocomotorRobot.__init__(self,
-                            "jr2_urdf/jr2_kinova.urdf",
-                            "base_link",
+                                "jr2_urdf/jr2_kinova.urdf",
+                                "base_link",
                                 action_dim=10,
                                 sensor_dim=46,
                                 power=2.5,
@@ -709,3 +717,70 @@ class JR2_Kinova(LocomotorRobot):
             for j in range(16, 28):
                 p.setCollisionFilterPair(robot_id, robot_id, joint, j, 0)
         return ids
+
+
+class Locobot(LocomotorRobot):
+    mjcf_scaling = 1
+    model_type = "URDF"
+    default_scale = 1
+
+    def __init__(self, config):
+        self.config = config
+        self.velocity = config.get('velocity', 0.1)
+        self.wheel_dim = 2
+        self.action_high = config.get("action_high", None)
+        self.action_low = config.get("action_low", None)
+        LocomotorRobot.__init__(self,
+                                "locobot/locobot.urdf",
+                                "base_link",
+                                action_dim=self.wheel_dim,
+                                sensor_dim=55,  # TODO: what is sensor_dim?
+                                power=2.5,
+                                scale=config.get("robot_scale", self.default_scale),
+                                resolution=config.get("resolution", 64),
+                                is_discrete=config.get("is_discrete", True),
+                                control="velocity",
+                                self_collision=False)
+
+    def set_up_continuous_action_space(self):
+        if self.action_high is not None and self.action_low is not None:
+            self.action_high = np.full(shape=self.wheel_dim, fill_value=self.action_high)
+            self.action_low = np.full(shape=self.wheel_dim, fill_value=self.action_low)
+        else:
+            self.action_high = np.array([self.velocity] * self.wheel_dim)
+            self.action_low = -self.action_high
+
+        self.action_space = gym.spaces.Box(shape=(self.action_dim,),
+                                           low=-1.0,
+                                           high=1.0,
+                                           dtype=np.float32)
+
+    def set_up_discrete_action_space(self):
+        assert False, "Locobot does not support discrete actions"
+
+    def apply_action(self, action):
+        real_action = self.action_to_real_action(action)
+        self.apply_real_action(real_action)
+
+    def calc_state(self):
+        base_state = LocomotorRobot.calc_state(self)
+        angular_velocity = self.robot_body.angular_velocity()
+        print(len(base_state), len(angular_velocity))
+        return np.concatenate((base_state, np.array(angular_velocity)))
+
+    def get_end_effector_position(self):
+        return self.parts['gripper_link'].get_position()
+
+    def load(self):
+        ids = self._load_model()
+        self.eyes = self.parts["eyes"]
+
+        robot_id = ids[0]
+
+        # disable collision for immediate parent
+        for joint in range(p.getNumJoints(robot_id)):
+            info = p.getJointInfo(robot_id, joint)
+            parent_id = info[-1]
+            p.setCollisionFilterPair(robot_id, robot_id, joint, parent_id, 0)
+
+        return ids
diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py
index 68f352c55..f5f3b9a0e 100644
--- a/gibson2/core/physics/scene.py
+++ b/gibson2/core/physics/scene.py
@@ -6,15 +6,18 @@ parentdir = os.path.dirname(currentdir)
 os.sys.path.insert(0, parentdir)
 import pybullet_data
 from gibson2.data.datasets import get_model_path
+from gibson2.utils.utils import l2_distance
+
 import numpy as np
 from PIL import Image
 import cv2
 import networkx as nx
 from IPython import embed
+import pickle
 
 class Scene:
     def load(self):
-        raise (NotImplementedError())
+        raise NotImplementedError()
 
 class EmptyScene(Scene):
     """
@@ -23,6 +26,7 @@ class EmptyScene(Scene):
     def load(self):
         planeName = os.path.join(pybullet_data.getDataPath(), "mjcf/ground_plane.xml")
         self.ground_plane_mjcf = p.loadMJCF(planeName)
+        p.changeDynamics(self.ground_plane_mjcf[0], -1, lateralFriction=1)
         return [item for item in self.ground_plane_mjcf]
 
 class StadiumScene(Scene):
@@ -43,6 +47,9 @@ class StadiumScene(Scene):
 
         return [item for item in self.stadium] + [item for item in self.ground_plane_mjcf]
 
+    def get_random_floor(self):
+        return 0
+
     def get_random_point(self, random_height=False):
         return self.get_random_point_floor(0, random_height)
 
@@ -54,6 +61,17 @@ class StadiumScene(Scene):
             np.random.uniform(0.4, 0.8) if random_height else 0.0
         ])
 
+    def get_floor_height(self, floor):
+        del floor
+        return 0.0
+
+    def reset_floor(self, floor=0, additional_elevation=0.05, height=None):
+        return
+
+    def get_shortest_path(self, floor, source_world, target_world, entire_path=False):
+        assert False, 'cannot compute shortest path in StadiumScene'
+
+
 class BuildingScene(Scene):
     """
     Gibson Environment building scenes
@@ -90,9 +108,6 @@ class BuildingScene(Scene):
         self.num_waypoints = num_waypoints
         self.waypoint_interval = int(waypoint_resolution / trav_map_resolution)
 
-    def l2_distance(self, a, b):
-        return np.linalg.norm(np.array(a) - np.array(b))
-
     def load(self):
         """
         Load the mesh into pybullet
@@ -141,39 +156,56 @@ class BuildingScene(Scene):
                 self.floors = sorted(list(map(float, f.readlines())))
                 print('floors', self.floors)
             for f in range(len(self.floors)):
-                trav_map = Image.open(os.path.join(get_model_path(self.model_id), 'floor_trav_{}.png'.format(f)))
-                obstacle_map = Image.open(os.path.join(get_model_path(self.model_id), 'floor_{}.png'.format(f)))
+                trav_map = np.array(Image.open(
+                    os.path.join(get_model_path(self.model_id), 'floor_trav_{}.png'.format(f))
+                ))
+                obstacle_map = np.array(Image.open(
+                    os.path.join(get_model_path(self.model_id), 'floor_{}.png'.format(f))
+                ))
                 if self.trav_map_original_size is None:
-                    width, height = trav_map.size
-                    assert width == height, 'trav map is not a square'
+                    height, width = trav_map.shape
+                    assert height == width, 'trav map is not a square'
                     self.trav_map_original_size = height
-                    self.trav_map_size = int(self.trav_map_original_size * self.trav_map_default_resolution / self.trav_map_resolution)
-                trav_map = np.array(trav_map.resize((self.trav_map_size, self.trav_map_size)))
-                obstacle_map = np.array(obstacle_map.resize((self.trav_map_size, self.trav_map_size)))
+                    self.trav_map_size = int(self.trav_map_original_size *
+                                             self.trav_map_default_resolution /
+                                             self.trav_map_resolution)
+
                 trav_map[obstacle_map == 0] = 0
+                trav_map = cv2.resize(trav_map, (self.trav_map_size, self.trav_map_size))
                 trav_map = cv2.erode(trav_map, np.ones((self.trav_map_erosion, self.trav_map_erosion)))
+                trav_map[trav_map < 255] = 0
 
                 if self.build_graph:
-                    g = nx.Graph()
-                    for i in range(self.trav_map_size):
-                        for j in range(self.trav_map_size):
-                            if trav_map[i, j] > 0:
-                                g.add_node((i, j))
-                                # 8-connected graph
-                                neighbors = [(i - 1, j - 1), (i, j - 1), (i + 1, j - 1), (i - 1, j)]
-                                for n in neighbors:
-                                    if 0 <= n[0] < self.trav_map_size and 0 <= n[1] < self.trav_map_size and \
-                                            trav_map[n[0], n[1]] > 0:
-                                        g.add_edge(n, (i, j), weight=self.l2_distance(n, (i, j)))
+                    graph_file = os.path.join(get_model_path(self.model_id), 'floor_trav_{}.p'.format(f))
+
+                    if os.path.isfile(graph_file):
+                        print("load traversable graph")
+                        with open(graph_file, 'rb') as pfile:
+                            g = pickle.load(pfile)
+                    else:
+                        print("build traversable graph")
+                        g = nx.Graph()
+                        for i in range(self.trav_map_size):
+                            for j in range(self.trav_map_size):
+                                if trav_map[i, j] > 0:
+                                    g.add_node((i, j))
+                                    # 8-connected graph
+                                    neighbors = [(i - 1, j - 1), (i, j - 1), (i + 1, j - 1), (i - 1, j)]
+                                    for n in neighbors:
+                                        if 0 <= n[0] < self.trav_map_size and 0 <= n[1] < self.trav_map_size and \
+                                                trav_map[n[0], n[1]] > 0:
+                                            g.add_edge(n, (i, j), weight=l2_distance(n, (i, j)))
+
+                        # only take the largest connected component
+                        largest_cc = max(nx.connected_components(g), key=len)
+                        g = g.subgraph(largest_cc).copy()
+                        with open(graph_file, 'wb') as pfile:
+                            pickle.dump(g, pfile, protocol=pickle.HIGHEST_PROTOCOL)
 
-                    # only take the largest connected component
-                    largest_cc = max(nx.connected_components(g), key=len)
-                    g = g.subgraph(largest_cc).copy()
                     self.floor_graph.append(g)
-
                     # update trav_map accordingly
                     trav_map[:, :] = 0
-                    for node in largest_cc:
+                    for node in g.nodes:
                         trav_map[node[0], node[1]] = 255
 
                 self.floor_map.append(trav_map)
@@ -205,6 +237,11 @@ class BuildingScene(Scene):
     def world_to_map(self, xy):
         return np.flip((xy / self.trav_map_resolution + self.trav_map_size / 2.0)).astype(np.int)
 
+    def has_node(self, floor, world_xy):
+        map_xy = tuple(self.world_to_map(world_xy))
+        g = self.floor_graph[floor]
+        return g.has_node(map_xy)
+
     def get_shortest_path(self, floor, source_world, target_world, entire_path=False):
         # print("called shortest path", source_world, target_world)
         assert self.build_graph, 'cannot get shortest path without building the graph'
@@ -216,20 +253,21 @@ class BuildingScene(Scene):
         if not g.has_node(target_map):
             nodes = np.array(g.nodes)
             closest_node = tuple(nodes[np.argmin(np.linalg.norm(nodes - target_map, axis=1))])
-            g.add_edge(closest_node, target_map, weight=self.l2_distance(closest_node, target_map))
+            g.add_edge(closest_node, target_map, weight=l2_distance(closest_node, target_map))
 
         if not g.has_node(source_map):
             nodes = np.array(g.nodes)
             closest_node = tuple(nodes[np.argmin(np.linalg.norm(nodes - source_map, axis=1))])
-            g.add_edge(closest_node, source_map, weight=self.l2_distance(closest_node, source_map))
+            g.add_edge(closest_node, source_map, weight=l2_distance(closest_node, source_map))
 
-        path_map = np.array(nx.astar_path(g, source_map, target_map, heuristic=self.l2_distance))
+        path_map = np.array(nx.astar_path(g, source_map, target_map, heuristic=l2_distance))
 
         path_world = self.map_to_world(path_map)
         geodesic_distance = np.sum(np.linalg.norm(path_world[1:] - path_world[:-1], axis=1))
+        path_world = path_world[::self.waypoint_interval]
 
         if not entire_path:
-            path_world = path_world[::self.waypoint_interval][:self.num_waypoints]
+            path_world = path_world[:self.num_waypoints]
             num_remaining_waypoints = self.num_waypoints - path_world.shape[0]
             if num_remaining_waypoints > 0:
                 remaining_waypoints = np.tile(target_world, (num_remaining_waypoints, 1))
diff --git a/gibson2/core/simulator.py b/gibson2/core/simulator.py
index dc7d266ff..e3c0007b6 100644
--- a/gibson2/core/simulator.py
+++ b/gibson2/core/simulator.py
@@ -36,6 +36,7 @@ class Simulator:
         self.gravity = gravity
         self.timestep = timestep
         self.mode = mode
+
         # renderer
         self.resolution = resolution
         self.fov = fov
@@ -63,8 +64,7 @@ class Simulator:
         """
         Destroy the MeshRenderer and physics simulator and start again.
         """
-        self.renderer.release()
-        p.disconnect(self.cid)
+        self.disconnect()
         self.load()
 
     def load(self):
@@ -91,6 +91,7 @@ class Simulator:
             self.cid = p.connect(p.DIRECT)
         p.setTimeStep(self.timestep)
         p.setGravity(0, 0, -self.gravity)
+        p.setPhysicsEngineParameter(enableFileCaching=0)
 
         if self.mode == 'gui' and not self.render_to_tensor:
             self.add_viewer()
@@ -101,14 +102,13 @@ class Simulator:
         self.objects = []
 
     def import_scene(self, scene, texture_scale=1.0, load_texture=True, class_id=0):
-
         """
         Import a scene. A scene could be a synthetic one or a realistic Gibson Environment.
 
         :param scene: Scene object
         :param texture_scale: Option to scale down the texture for rendering
         :param load_texture: If you don't need rgb output, texture loading could be skipped to make rendering faster
-        :param class_id: The class_id for background for rendering semantics.
+        :param class_id: Class id for rendering semantic segmentation
         """
 
         new_objects = scene.load()
@@ -153,7 +153,7 @@ class Simulator:
     def import_object(self, object, class_id=0):
         """
         :param object: Object to load
-        :param class_id: class_id to show for semantic segmentation mask
+        :param class_id: Class id for rendering semantic segmentation
         """
         new_object = object.load()
         self.objects.append(new_object)
@@ -215,7 +215,7 @@ class Simulator:
         Import a robot into Simulator
 
         :param robot: Robot
-        :param class_id: class_id to show for semantic segmentation mask
+        :param class_id: Class id for rendering semantic segmentation
         :return: id for robot in pybullet
         """
 
@@ -298,7 +298,7 @@ class Simulator:
         Import articulated objects into simulator
 
         :param obj:
-        :param class_id: class_id to show for semantic segmentation mask
+        :param class_id: Class id for rendering semantic segmentation
         :return: pybulet id
         """
         ids = obj.load()
@@ -382,6 +382,12 @@ class Simulator:
         """
 
         p.stepSimulation()
+        self.sync()
+
+    def sync(self):
+        """
+        Update positions in renderer without stepping the simulation. Usually used in the reset() function
+        """
         for instance in self.renderer.instances:
             if instance.dynamic:
                 self.update_position(instance)
@@ -427,5 +433,6 @@ class Simulator:
         clean up the simulator
         """
         if self.isconnected():
+            p.resetSimulation(physicsClientId=self.cid)
             p.disconnect(self.cid)
         self.renderer.release()
diff --git a/gibson2/envs/base_env.py b/gibson2/envs/base_env.py
index 3c33e7a2b..cff77c11c 100644
--- a/gibson2/envs/base_env.py
+++ b/gibson2/envs/base_env.py
@@ -1,4 +1,5 @@
-from gibson2.core.physics.robot_locomotors import Turtlebot, Husky, Ant, Humanoid, JR2, JR2_Kinova, Freight, Fetch
+from gibson2.core.physics.robot_locomotors \
+    import Turtlebot, Husky, Ant, Humanoid, JR2, JR2_Kinova, Freight, Fetch, Locobot
 from gibson2.core.simulator import Simulator
 from gibson2.core.physics.scene import BuildingScene, StadiumScene
 import gibson2
@@ -16,21 +17,31 @@ class BaseEnv(gym.Env):
                  config_file,
                  model_id=None,
                  mode='headless',
+                 action_timestep=1 / 10.0,
+                 physics_timestep=1 / 240.0,
                  device_idx=0):
         """
         :param config_file: config_file path
         :param model_id: override model_id in config file
         :param mode: headless or gui mode
-        :param device_idx: which GPU to run the simulation and rendering on
+        :param action_timestep: environment executes action per action_timestep second
+        :param physics_timestep: physics timestep for pybullet
+        :param device_idx: device_idx: which GPU to run the simulation and rendering on
         """
         self.config = parse_config(config_file)
         if model_id is not None:
             self.config['model_id'] = model_id
+
+        self.mode = mode
+        self.action_timestep = action_timestep
+        self.physics_timestep = physics_timestep
         self.simulator = Simulator(mode=mode,
+                                   timestep=physics_timestep,
                                    use_fisheye=self.config.get('fisheye', False),
                                    resolution=self.config.get('resolution', 64),
                                    fov=self.config.get('fov', 90),
                                    device_idx=device_idx)
+        self.simulator_loop = int(self.action_timestep / self.simulator.timestep)
         self.load()
 
     def reload(self, config_file):
@@ -77,6 +88,8 @@ class BaseEnv(gym.Env):
             robot = Freight(self.config)
         elif self.config['robot'] == 'Fetch':
             robot = Fetch(self.config)
+        elif self.config['robot'] == 'Locobot':
+            robot = Locobot(self.config)
         else:
             raise Exception('unknown robot type: {}'.format(self.config['robot']))
 
@@ -99,10 +112,16 @@ class BaseEnv(gym.Env):
         self.simulator.step()
 
     def step(self, action):
-        return NotImplementedError
+        """
+        Overwritten by subclasses
+        """
+        return NotImplementedError()
 
     def reset(self):
-        return NotImplementedError
+        """
+        Overwritten by subclasses
+        """
+        return NotImplementedError()
 
     def set_mode(self, mode):
-        self.simulator.mode = mode
\ No newline at end of file
+        self.simulator.mode = mode
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 4f594c8e3..b74bea965 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -1,5 +1,6 @@
-from gibson2.core.physics.interactive_objects import VisualMarker, InteractiveObj, BoxShape
 import gibson2
+from gibson2.core.physics.interactive_objects import VisualMarker, InteractiveObj, BoxShape
+from gibson2.core.physics.robot_locomotors import Turtlebot
 from gibson2.utils.utils import parse_config, rotate_vector_3d, l2_distance, quatToXYZW
 from gibson2.envs.base_env import BaseEnv
 from transforms3d.euler import euler2quat
@@ -20,27 +21,6 @@ import time
 import collections
 
 
-
-
-Episode = collections.namedtuple('Episode',
-                                 ['env',
-                                  'agent',
-                                  'initial_pos',
-                                  'target_pos',
-                                  'geodesic_distance',
-                                  'shortest_path',
-                                  'agent_trajectory',
-                                  'object_files',
-                                  'object_trajectory',
-                                  'success',
-                                  'path_efficiency',
-                                  'kinematic_disturbance',
-                                  'dynamic_disturbance_a',
-                                  'dynamic_disturbance_b',
-                                  'collision_step',
-                                  ])
-
-
 class NavigateEnv(BaseEnv):
     """
     We define navigation environments following Anderson, Peter, et al. 'On evaluation of embodied navigation agents.'
@@ -57,52 +37,43 @@ class NavigateEnv(BaseEnv):
             automatic_reset=False,
             device_idx=0,
     ):
+        """
+        :param config_file: config_file path
+        :param model_id: override model_id in config file
+        :param mode: headless or gui mode
+        :param action_timestep: environment executes action per action_timestep second
+        :param physics_timestep: physics timestep for pybullet
+        :param automatic_reset: whether to automatic reset after an episode finishes
+        :param device_idx: device_idx: which GPU to run the simulation and rendering on
+        """
         super(NavigateEnv, self).__init__(config_file=config_file,
                                           model_id=model_id,
                                           mode=mode,
+                                          action_timestep=action_timestep,
+                                          physics_timestep=physics_timestep,
                                           device_idx=device_idx)
         self.automatic_reset = automatic_reset
 
-        # simulation
-        self.mode = mode
-        self.action_timestep = action_timestep
-        self.physics_timestep = physics_timestep
-        self.simulator.set_timestep(physics_timestep)
-        self.simulator_loop = int(self.action_timestep / self.simulator.timestep)
-        self.current_step = 0
-        self.path_length = 0.0
-        self.agent_trajectory = []
-        self.stage = None
-        self.current_episode = 0
-        self.floor_num = None
-        self.num_object_classes = None
-        self.stored_episodes = collections.deque(maxlen=100)
-        self.interactive_objects = []
-        # self.reward_stats = []
-        # self.state_stats = {'sensor': [], 'auxiliary_sensor': []}
-
-    def load(self):
-        super(NavigateEnv, self).load()
+    def load_task_setup(self):
+        """
+        Load task setup, including initialization, termination conditino, reward, collision checking, discount factor
+        """
+        # initial and target pose
         self.initial_pos = np.array(self.config.get('initial_pos', [0, 0, 0]))
         self.initial_orn = np.array(self.config.get('initial_orn', [0, 0, 0]))
-
         self.target_pos = np.array(self.config.get('target_pos', [5, 5, 0]))
         self.target_orn = np.array(self.config.get('target_orn', [0, 0, 0]))
 
         self.additional_states_dim = self.config.get('additional_states_dim', 0)
         self.auxiliary_sensor_dim = self.config.get('auxiliary_sensor_dim', 0)
-        self.normalize_observation = self.config.get('normalize_observation', False)
-        self.observation_normalizer = self.config.get('observation_normalizer', {})
-        for key in self.observation_normalizer:
-            self.observation_normalizer[key] = np.array(self.observation_normalizer[key])
 
         # termination condition
         self.dist_tol = self.config.get('dist_tol', 0.2)
-        self.max_step = self.config.get('max_step', float('inf'))
+        self.max_step = self.config.get('max_step', 500)
 
         # reward
-        self.reward_type = self.config.get('reward_type', 'dense')
-        assert self.reward_type in ['dense', 'sparse', 'l2', 'stage_sparse']
+        self.reward_type = self.config.get('reward_type', 'geodesic')
+        assert self.reward_type in ['geodesic', 'l2', 'sparse']
 
         self.success_reward = self.config.get('success_reward', 10.0)
         self.slack_reward = self.config.get('slack_reward', -0.01)
@@ -110,27 +81,24 @@ class NavigateEnv(BaseEnv):
 
         # reward weight
         self.potential_reward_weight = self.config.get('potential_reward_weight', 10.0)
-        self.electricity_reward_weight = self.config.get('electricity_reward_weight', 0.0)
-        self.stall_torque_reward_weight = self.config.get('stall_torque_reward_weight', 0.0)
         self.collision_reward_weight = self.config.get('collision_reward_weight', 0.0)
-        # ignore the agent's collision with these body ids, typically ids of the ground
+
+        # ignore the agent's collision with these body ids
         self.collision_ignore_body_b_ids = set(self.config.get('collision_ignore_body_b_ids', []))
+        # ignore the agent's collision with these link ids of itself
         self.collision_ignore_link_a_ids = set(self.config.get('collision_ignore_link_a_ids', []))
 
         # discount factor
         self.discount_factor = self.config.get('discount_factor', 1.0)
+
+    def load_observation_space(self):
+        """
+        Load observation space
+        """
         self.output = self.config['output']
-        self.n_horizontal_rays = self.config.get('n_horizontal_rays', 128)
-        self.n_vertical_beams = self.config.get('n_vertical_beams', 9)
-
-        # TODO: sensor: observations that are passed as network input, e.g. target position in local frame
-        # TODO: auxiliary sensor: observations that are not passed as network input, but used to maintain the same
-        # subgoals for the next T time steps, e.g. agent pose in global frame
-        self.sensor_dim = self.additional_states_dim
-        self.action_dim = self.robots[0].action_dim
-
         observation_space = OrderedDict()
         if 'sensor' in self.output:
+            self.sensor_dim = self.additional_states_dim
             self.sensor_space = gym.spaces.Box(low=-np.inf,
                                                high=np.inf,
                                                shape=(self.sensor_dim,),
@@ -142,23 +110,33 @@ class NavigateEnv(BaseEnv):
                                                          shape=(self.auxiliary_sensor_dim,),
                                                          dtype=np.float32)
             observation_space['auxiliary_sensor'] = self.auxiliary_sensor_space
-        if 'pointgoal' in self.output:
-            self.pointgoal_space = gym.spaces.Box(low=-np.inf,
-                                                  high=np.inf,
-                                                  shape=(2,),
-                                                  dtype=np.float32)
-            observation_space['pointgoal'] = self.pointgoal_space
         if 'rgb' in self.output:
-            self.rgb_space = gym.spaces.Box(low=-np.inf,
-                                            high=np.inf,
+            self.rgb_space = gym.spaces.Box(low=0.0,
+                                            high=1.0,
                                             shape=(self.config.get('resolution', 64),
                                                    self.config.get('resolution', 64),
                                                    3),
                                             dtype=np.float32)
             observation_space['rgb'] = self.rgb_space
         if 'depth' in self.output:
-            self.depth_space = gym.spaces.Box(low=-np.inf,
-                                              high=np.inf,
+            self.depth_noise_rate = self.config.get('depth_noise_rate', 0.0)
+            if self.config['robot'] == 'Turtlebot':
+                # ASUS Xtion PRO LIVE
+                self.depth_low = 0.8
+                self.depth_high = 3.5
+            elif self.config['robot'] == 'Fetch':
+                # Primesense Carmine 1.09 short-range RGBD sensor
+                self.depth_low = 0.35
+                self.depth_high = 3.0  # http://xtionprolive.com/primesense-carmine-1.09
+                # self.depth_high = 1.4  # https://www.i3du.gr/pdf/primesense.pdf
+            elif self.config['robot'] == 'Locobot':
+                # https://store.intelrealsense.com/buy-intel-realsense-depth-camera-d435.html
+                self.depth_low = 0.1
+                self.depth_high = 10.0
+            else:
+                assert False, 'unknown robot for depth observation'
+            self.depth_space = gym.spaces.Box(low=0.0,
+                                              high=1.0,
                                               shape=(self.config.get('resolution', 64),
                                                      self.config.get('resolution', 64),
                                                      1),
@@ -172,18 +150,29 @@ class NavigateEnv(BaseEnv):
                                                    1),
                                             dtype=np.float32)
             observation_space['seg'] = self.seg_space
-        if 'depth_seg' in self.output:
-            self.depth_seg_space = gym.spaces.Box(low=0.0,
-                                                  high=1.0,
-                                                  shape=(self.config.get('resolution', 64),
-                                                         self.config.get('resolution', 64),
-                                                         2),
-                                                  dtype=np.float32)
-            observation_space['depth_seg'] = self.depth_seg_space
         if 'scan' in self.output:
-            self.scan_space = gym.spaces.Box(low=-np.inf,
-                                             high=np.inf,
-                                             shape=(self.n_horizontal_rays * self.n_vertical_beams, 3),
+            self.scan_noise_rate = self.config.get('scan_noise_rate', 0.0)
+            self.n_horizontal_rays = self.config.get('n_horizontal_rays', 128)
+            self.n_vertical_beams = self.config.get('n_vertical_beams', 1)
+            assert self.n_vertical_beams == 1, 'scan can only handle one vertical beam for now'
+            if self.config['robot'] == 'Turtlebot':
+                # Hokuyo URG-04LX-UG01
+                self.laser_linear_range = 5.6
+                self.laser_angular_range = 240.0
+                self.min_laser_dist = 0.05
+                self.laser_link_name = 'scan_link'
+            elif self.config['robot'] == 'Fetch':
+                # SICK TiM571-2050101 Laser Range Finder
+                self.laser_linear_range = 25.0
+                self.laser_angular_range = 220.0
+                self.min_laser_dist = 0.0
+                self.laser_link_name = 'laser_link'
+            else:
+                assert False, 'unknown robot for LiDAR observation'
+
+            self.scan_space = gym.spaces.Box(low=0.0,
+                                             high=1.0,
+                                             shape=(self.n_horizontal_rays * self.n_vertical_beams, 1),
                                              dtype=np.float32)
             observation_space['scan'] = self.scan_space
         if 'rgb_filled' in self.output:  # use filler
@@ -194,130 +183,209 @@ class NavigateEnv(BaseEnv):
             self.comp.eval()
 
         self.observation_space = gym.spaces.Dict(observation_space)
+
+    def load_action_space(self):
+        """
+        Load action space
+        """
         self.action_space = self.robots[0].action_space
 
-        # add visual objects
-        self.visual_object_at_initial_target_pos = self.config.get('visual_object_at_initial_target_pos', False)
+    def load_visualization(self):
+        """
+        Load visualization, such as initial and target position, shortest path, etc
+        """
+        if self.mode != 'gui':
+            return
 
-        if self.visual_object_at_initial_target_pos:
-            cyl_length = 0.2
-            self.initial_pos_vis_obj = VisualMarker(visual_shape=p.GEOM_CYLINDER,
-                                                    rgba_color=[1, 0, 0, 0.3],
-                                                    radius=self.dist_tol,
-                                                    length=cyl_length,
-                                                    initial_offset=[0, 0, cyl_length / 2.0])
-            self.target_pos_vis_obj = VisualMarker(visual_shape=p.GEOM_CYLINDER,
-                                                   rgba_color=[0, 0, 1, 0.3],
-                                                   radius=self.dist_tol,
-                                                   length=cyl_length,
-                                                   initial_offset=[0, 0, cyl_length / 2.0])
-            self.initial_pos_vis_obj.load()
-            if self.config.get('target_visual_object_visible_to_agent', False):
-                self.simulator.import_object(self.target_pos_vis_obj, class_id=255)
-            else:
-                self.target_pos_vis_obj.load()
+        cyl_length = 0.2
+        self.initial_pos_vis_obj = VisualMarker(visual_shape=p.GEOM_CYLINDER,
+                                                rgba_color=[1, 0, 0, 0.3],
+                                                radius=self.dist_tol,
+                                                length=cyl_length,
+                                                initial_offset=[0, 0, cyl_length / 2.0])
+        self.target_pos_vis_obj = VisualMarker(visual_shape=p.GEOM_CYLINDER,
+                                               rgba_color=[0, 0, 1, 0.3],
+                                               radius=self.dist_tol,
+                                               length=cyl_length,
+                                               initial_offset=[0, 0, cyl_length / 2.0])
+        self.initial_pos_vis_obj.load()
+        self.target_pos_vis_obj.load()
+
+        if self.scene.build_graph:
+            self.num_waypoints_vis = 250
+            self.waypoints_vis = [VisualMarker(visual_shape=p.GEOM_CYLINDER,
+                                               rgba_color=[0, 1, 0, 0.3],
+                                               radius=0.1,
+                                               length=cyl_length,
+                                               initial_offset=[0, 0, cyl_length / 2.0])
+                                  for _ in range(self.num_waypoints_vis)]
+            for waypoint in self.waypoints_vis:
+                waypoint.load()
+
+    def load_miscellaneous_variables(self):
+        """
+        Load miscellaneous variables for book keeping
+        """
+        self.current_step = 0
+        self.collision_step = 0
+        self.current_episode = 0
+        self.floor_num = None
+        # self.path_length = 0.0
+        # self.agent_trajectory = []
+        # self.stage = None
+        # self.floor_num = None
+        # self.num_object_classes = None
+        # self.interactive_objects = []
+
+    def load(self):
+        """
+        Load navigation environment
+        """
+        super(NavigateEnv, self).load()
+        self.load_task_setup()
+        self.load_observation_space()
+        self.load_action_space()
+        self.load_visualization()
+        self.load_miscellaneous_variables()
+
+    def global_to_local(self, pos):
+        """
+        Convert a 3D point in global frame to agent's local frame
+        :param pos: a 3D point in global frame
+        :return: the same 3D point in agent's local frame
+        """
+        return rotate_vector_3d(pos - self.robots[0].get_position(), *self.robots[0].get_rpy())
 
     def get_additional_states(self):
-        relative_position = self.target_pos - self.robots[0].get_position()
-        # rotate relative position back to body point of view
-        additional_states = rotate_vector_3d(relative_position, *self.robots[0].get_rpy())
-
+        """
+        :return: non-perception observation, such as goal location
+        """
+        additional_states = self.global_to_local(self.target_pos)
         if self.config['task'] == 'reaching':
-            end_effector_pos = self.robots[0].get_end_effector_position() - self.robots[0].get_position()
-            end_effector_pos = rotate_vector_3d(end_effector_pos, *self.robots[0].get_rpy())
-            additional_states = np.concatenate((additional_states, end_effector_pos))
+            end_effector_pos_local = self.global_to_local(self.robots[0].get_end_effector_position())
+            additional_states = np.concatenate((additional_states, end_effector_pos_local))
         assert len(additional_states) == self.additional_states_dim, 'additional states dimension mismatch'
-
         return additional_states
 
-    def get_auxiliary_sensor(self, collision_links=[]):
-        return np.array([])
+    def add_naive_noise_to_sensor(self, sensor_reading, noise_rate, noise_value=1.0):
+        """
+        Add naive sensor dropout to perceptual sensor, such as RGBD and LiDAR scan
+        :param sensor_reading: raw sensor reading, range must be between [0.0, 1.0]
+        :param noise_rate: how much noise to inject, 0.05 means 5% of the data will be replaced with noise_value
+        :param noise_value: noise_value to overwrite raw sensor reading
+        :return: sensor reading corrupted with noise
+        """
+        if noise_rate <= 0.0:
+            return sensor_reading
+
+        assert len(sensor_reading[(sensor_reading < 0.0) | (sensor_reading > 1.0)]) == 0,\
+            'sensor reading has to be between [0.0, 1.0]'
+
+        valid_mask = np.random.choice(2, sensor_reading.shape, p=[noise_rate, 1.0 - noise_rate])
+        sensor_reading[valid_mask == 0] = noise_value
+        return sensor_reading
+
+    def get_depth(self):
+        """
+        :return: depth sensor reading, normalized to [0.0, 1.0]
+        """
+        depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
+        # 0.0 is a special value for invalid entries
+        depth[depth < self.depth_low] = 0.0
+        depth[depth > self.depth_high] = 0.0
+
+        # re-scale depth to [0.0, 1.0]
+        depth /= self.depth_high
+        depth = self.add_naive_noise_to_sensor(depth, self.depth_noise_rate, noise_value=0.0)
+
+        return depth
+
+    def get_rgb(self):
+        """
+        :return: RGB sensor reading, normalized to [0.0, 1.0]
+        """
+        return self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
+
+    def get_pc(self):
+        """
+        :return: pointcloud sensor reading
+        """
+        return self.simulator.renderer.render_robot_cameras(modes=('3d'))[0]
+
+    def get_normal(self):
+        """
+        :return: surface normal reading
+        """
+        return self.simulator.renderer.render_robot_cameras(modes='normal')
+
+    def get_seg(self):
+        """
+        :return: semantic segmentation mask, normalized to [0.0, 1.0]
+        """
+        seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
+        if self.num_object_classes is not None:
+            seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
+        return seg
+
+    def get_scan(self):
+        """
+        :return: LiDAR sensor reading, normalized to [0.0, 1.0]
+        """
+        laser_angular_half_range = self.laser_angular_range / 2.0
+        laser_pose = self.robots[0].parts[self.laser_link_name].get_pose()
+        angle = np.arange(-laser_angular_half_range / 180 * np.pi,
+                          laser_angular_half_range / 180 * np.pi,
+                          self.laser_angular_range / 180.0 * np.pi / self.n_horizontal_rays)
+        unit_vector_local = np.array([[np.cos(ang), np.sin(ang), 0.0] for ang in angle])
+        transform_matrix = quat2mat([laser_pose[6], laser_pose[3], laser_pose[4], laser_pose[5]])  # [x, y, z, w]
+        unit_vector_world = transform_matrix.dot(unit_vector_local.T).T
+
+        start_pose = np.tile(laser_pose[:3], (self.n_horizontal_rays, 1))
+        start_pose += unit_vector_world * self.min_laser_dist
+        end_pose = laser_pose[:3] + unit_vector_world * self.laser_linear_range
+        results = p.rayTestBatch(start_pose, end_pose, 6)  # numThreads = 6
+
+        hit_fraction = np.array([item[2] for item in results])  # hit fraction = [0.0, 1.0] of self.laser_linear_range
+        hit_fraction = self.add_naive_noise_to_sensor(hit_fraction, self.scan_noise_rate)
+        scan = np.expand_dims(hit_fraction, 1)
+        return scan
 
     def get_state(self, collision_links=[]):
-        # calculate state
-        sensor_state = self.get_additional_states()
-        auxiliary_sensor = self.get_auxiliary_sensor(collision_links)
-
-        # rgb = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
-        # rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
-        # depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
-        # depth = np.clip(depth / 5.0, 0.0, 1.0)
-        # depth = 1.0 - depth  # flip black/white
-        # seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
-        # if self.num_object_classes is not None:
-        #     seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
-        # cv2.imshow('rgb', rgb)
-        # cv2.imshow('depth', depth)
-        # cv2.imshow('seg', seg)
-
+        """
+        :param collision_links: collisions from last time step
+        :return: observation as a dictionary
+        """
         state = OrderedDict()
         if 'sensor' in self.output:
-            state['sensor'] = sensor_state
-        if 'auxiliary_sensor' in self.output:
-            state['auxiliary_sensor'] = auxiliary_sensor
-        if 'pointgoal' in self.output:
-            state['pointgoal'] = sensor_state[:2]
+            state['sensor'] = self.get_additional_states()
         if 'rgb' in self.output:
-            state['rgb'] = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
+            state['rgb'] = self.get_rgb()
         if 'depth' in self.output:
-            depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
-            state['depth'] = depth
+            state['depth'] = self.get_depth()
+        if 'pc' in self.output:
+            state['pc'] = self.get_pc()
+        if 'rgbd' in self.output:
+            rgb = self.get_rgb()
+            depth = self.get_depth()
+            state['rgbd'] = np.concatenate((rgb, depth), axis=2)
         if 'normal' in self.output:
-            state['normal'] = self.simulator.renderer.render_robot_cameras(modes='normal')
+            state['normal'] = self.get_normal()
         if 'seg' in self.output:
-            seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
-            if self.num_object_classes is not None:
-                seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
-            state['seg'] = seg
-        if 'depth_seg' in self.output:
-            depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
-            depth = np.clip(depth / 5.0, 0.0, 1.0)
-            seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
-            if self.num_object_classes is not None:
-                seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
-            depth_seg = np.concatenate((depth, seg), axis=2)
-            state['depth_seg'] = depth_seg
+            state['seg'] = self.get_seg()
         if 'rgb_filled' in self.output:
             with torch.no_grad():
                 tensor = transforms.ToTensor()((state['rgb'] * 255).astype(np.uint8)).cuda()
                 rgb_filled = self.comp(tensor[None, :, :, :])[0].permute(1, 2, 0).cpu().numpy()
                 state['rgb_filled'] = rgb_filled
-
-        if 'pointgoal' in self.output:
-            state['pointgoal'] = sensor_state[:2]
-
-        # TODO: figure out why 'scan' consumes so much cpu
         if 'scan' in self.output:
-            assert 'scan_link' in self.robots[0].parts, "Requested scan but no scan_link"
-            pose_camera = self.robots[0].parts['scan_link'].get_pose()
-            angle = np.arange(0, 2 * np.pi, 2 * np.pi / float(self.n_horizontal_rays))
-            elev_bottom_angle = -30. * np.pi / 180.
-            elev_top_angle = 10. * np.pi / 180.
-            elev_angle = np.arange(elev_bottom_angle, elev_top_angle,
-                                   (elev_top_angle - elev_bottom_angle) / float(self.n_vertical_beams))
-            orig_offset = np.vstack([
-                np.vstack([np.cos(angle),
-                           np.sin(angle),
-                           np.repeat(np.tan(elev_ang), angle.shape)]).T for elev_ang in elev_angle
-            ])
-            transform_matrix = quat2mat([pose_camera[-1], pose_camera[3], pose_camera[4], pose_camera[5]])
-            offset = orig_offset.dot(np.linalg.inv(transform_matrix))
-            pose_camera = pose_camera[None, :3].repeat(self.n_horizontal_rays * self.n_vertical_beams, axis=0)
-
-            results = p.rayTestBatch(pose_camera, pose_camera + offset * 30)
-            hit = np.array([item[0] for item in results])
-            dist = np.array([item[2] for item in results])
-
-            valid_pts = (dist < 1. - 1e-5) & (dist > 0.1 / 30) & (hit != self.robots[0].robot_ids[0]) & (hit != -1)
-            dist[~valid_pts] = 0.0  # zero out invalid pts
-            dist *= 30
-
-            xyz = np.expand_dims(dist, 1) * orig_offset
-            state['scan'] = xyz
-
+            state['scan'] = self.get_scan()
         return state
 
     def run_simulation(self):
+        """
+        Run simulation for one action timestep (simulator_loop physics timestep)
+        :return: collisions from this simulation
+        """
         collision_links = []
         for _ in range(self.simulator_loop):
             self.simulator_step()
@@ -325,46 +393,87 @@ class NavigateEnv(BaseEnv):
         return self.filter_collision_links(collision_links)
 
     def filter_collision_links(self, collision_links):
-        return [[item for item in collision_per_sim_step
-                 if item[2] not in self.collision_ignore_body_b_ids and
-                 item[3] not in self.collision_ignore_link_a_ids]
-                for collision_per_sim_step in collision_links]
+        """
+        Filter out collisions that should be ignored
+        :param collision_links: original collisions, a list of lists of collisions
+        :return: filtered collisions
+        """
+        new_collision_links = []
+        for collision_per_sim_step in collision_links:
+            new_collision_per_sim_step = []
+            for item in collision_per_sim_step:
+                # ignore collision with body b
+                if item[2] in self.collision_ignore_body_b_ids:
+                    continue
+
+                # ignore collision with robot link a
+                if item[3] in self.collision_ignore_link_a_ids:
+                    continue
+
+                # ignore self collision with robot link a (body b is also robot itself)
+                if item[2] == self.robots[0].robot_ids[0] and item[4] in self.collision_ignore_link_a_ids:
+                    continue
+
+                new_collision_per_sim_step.append(item)
+            new_collision_links.append(new_collision_per_sim_step)
+        return new_collision_links
 
     def get_position_of_interest(self):
+        """
+        Get position of interest.
+        :return: If pointgoal task, return base position. If reaching task, return end effector position.
+        """
         if self.config['task'] == 'pointgoal':
             return self.robots[0].get_position()
         elif self.config['task'] == 'reaching':
             return self.robots[0].get_end_effector_position()
 
-    def get_potential(self):
-        return l2_distance(self.target_pos, self.get_position_of_interest())
+    def get_shortest_path(self, from_initial_pos=False, entire_path=False):
+        """
+        :param from_initial_pos: whether source is initial position rather than current position
+        :param entire_path: whether to return the entire shortest path
+        :return: shortest path and geodesic distance to the target position
+        """
+        if from_initial_pos:
+            source = self.initial_pos[:2]
+        else:
+            source = self.robots[0].get_position()[:2]
+        target = self.target_pos[:2]
+        return self.scene.get_shortest_path(self.floor_num, source, target, entire_path=entire_path)
+
+    def get_geodesic_potential(self):
+        """
+        :return: geodesic distance to the target position
+        """
+        _, geodesic_dist = self.get_shortest_path()
+        return geodesic_dist
 
     def get_l2_potential(self):
+        """
+        :return: L2 distance to the target position
+        """
         return l2_distance(self.target_pos, self.get_position_of_interest())
 
     def get_reward(self, collision_links=[], action=None, info={}):
+        """
+        :param collision_links: collisions from last time step
+        :param action: last action
+        :param info: a dictionary to store additional info
+        :return: reward, info
+        """
         collision_links_flatten = [item for sublist in collision_links for item in sublist]
         reward = self.slack_reward  # |slack_reward| = 0.01 per step
 
         if self.reward_type == 'l2':
             new_potential = self.get_l2_potential()
-        elif self.reward_type == 'dense':
-            new_potential = self.get_potential()
+        elif self.reward_type == 'geodesic':
+            new_potential = self.get_geodesic_potential()
         potential_reward = self.potential - new_potential
         reward += potential_reward * self.potential_reward_weight  # |potential_reward| ~= 0.1 per step
         self.potential = new_potential
 
-        # electricity_reward = np.abs(self.robots[0].joint_speeds * self.robots[0].joint_torque).mean().item()
-        electricity_reward = 0.0
-        reward += electricity_reward * self.electricity_reward_weight  # |electricity_reward| ~= 0.05 per step
-
-        # stall_torque_reward = np.square(self.robots[0].joint_torque).mean()
-        stall_torque_reward = 0.0
-        reward += stall_torque_reward * self.stall_torque_reward_weight  # |stall_torque_reward| ~= 0.05 per step
-
         collision_reward = float(len(collision_links_flatten) > 0)
-        self.collision_step += int(len(collision_links_flatten) > 0)
-        info['collision_reward'] = collision_reward * self.collision_reward_weight  # expose collision reward to info
+        self.collision_step += int(collision_reward)
         reward += collision_reward * self.collision_reward_weight  # |collision_reward| ~= 1.0 per step if collision
 
         # goal reached
@@ -374,15 +483,12 @@ class NavigateEnv(BaseEnv):
         return reward, info
 
     def get_termination(self, collision_links=[], info={}):
+        """
+        :param collision_links: collisions from last time step
+        :param info: a dictionary to store additional info
+        :return: done, info
+        """
         done = False
-        # for item in collision_links_flatten:
-        #     if item[9] > 500:
-        #         print("collision between " + self.id_to_name[self.robots[0].robot_ids[0]]["links"][item[3]]
-        #               + " and " + self.id_to_name[item[2]]["links"][item[4]])
-
-        # door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
-        # max_force = max([item[9] for item in collision_links_flatten]) if len(collision_links_flatten) > 0 else 0
-
         floor_height = 0.0 if self.floor_num is None else self.scene.get_floor_height(self.floor_num)
         if l2_distance(self.target_pos, self.get_position_of_interest()) < self.dist_tol:
             print("GOAL")
@@ -390,7 +496,7 @@ class NavigateEnv(BaseEnv):
             info['success'] = True
 
         elif self.robots[0].get_position()[2] > floor_height + self.death_z_thresh:
-            print("DEATH")
+            print("FLIP OVER")
             done = True
             info['success'] = False
 
@@ -398,83 +504,49 @@ class NavigateEnv(BaseEnv):
             done = True
             info['success'] = False
 
-        # elif door_angle < (-10.0 / 180.0 * np.pi):
-        #     print("WRONG PUSH")
-
-        # elif max_force > 500:
-        #     print("TOO MUCH FORCE")
-        #     done = True
-        #     info['success'] = False
-
         if done:
             info['episode_length'] = self.current_step
             info['path_length'] = self.path_length
             info['collision_step'] = self.collision_step
-            info['energy_cost'] = self.energy_cost
-            info['stage'] = self.stage
-
-            # Episode = collections.namedtuple('Episode',
-            #                                  ['initial_pos',
-            #                                   'target_pos',
-            #                                   'geodesic_distance',
-            #                                   'shortest_path',
-            #                                   'trajectory',
-            #                                   'success',
-            #                                   'path_efficiency',
-            #                                   'kinematic_disturbance',
-            #                                   'dynamic_disturbance_a',
-            #                                   'dynamic_disturbance_b',
-            #                                   'collision_step',
-            #                                   ])
-
-            # shortest_path, geodesic_distance = self.scene.get_shortest_path(self.floor_num,
-            #                                                                 self.initial_pos[:2],
-            #                                                                 self.target_pos[:2],
-            #                                                                 entire_path=True)
-            # floor_height = self.scene.get_floor_height(self.floor_num)
-            # shortest_path = np.array([np.array([path[0], path[1], floor_height]) for path in shortest_path])
-            # min_kin_dist = self.path_length * self.robots[0].robot_mass
-            # kinematic_disturbance = min_kin_dist / (min_kin_dist + self.kinematic_disturbance)
-            # min_dyn_dist = self.current_step * self.robots[0].robot_mass * 9.8
-            # dynamic_disturbance_a = min_dyn_dist / (min_dyn_dist + self.dynamic_disturbance_a)
-            # dynamic_disturbance_b = self.current_step / float(self.current_step + self.dynamic_disturbance_b)
-            # object_files = [obj.filename for obj in self.interactive_objects]
-            # episode = Episode(
-            #     env=self.scene.model_id,
-            #     agent=self.robots[0].model_file,
-            #     initial_pos=self.initial_pos,
-            #     target_pos=self.target_pos,
-            #     geodesic_distance=geodesic_distance,
-            #     shortest_path=shortest_path,
-            #     agent_trajectory=np.array(self.agent_trajectory),
-            #     object_files=object_files,
-            #     object_trajectory=np.array(self.object_trajectory),
-            #     success=float(info['success']),
-            #     path_efficiency=min(1.0, geodesic_distance / self.path_length),
-            #     kinematic_disturbance=kinematic_disturbance,
-            #     dynamic_disturbance_a=dynamic_disturbance_a,
-            #     dynamic_disturbance_b=dynamic_disturbance_b,
-            #     collision_step=self.collision_step,
-            # )
-            # self.stored_episodes.append(episode)
 
         return done, info
 
-    def get_stored_episodes(self):
-        return self.stored_episodes
-
     def before_simulation(self):
+        """
+        Additional steps before physics simulation. Overwritten by subclasses.
+        :return: cache
+        """
         return None
 
     def after_simulation(self, cache, collision_links):
+        """
+        :param cache: cache returned from before_simulation
+        :param collision_links: collisions from last time step
+        """
         return
 
+    def step_visualization(self):
+        if self.mode != 'gui':
+            return
+
+        self.initial_pos_vis_obj.set_position(self.initial_pos)
+        self.target_pos_vis_obj.set_position(self.target_pos)
+
+        shortest_path, _ = self.get_shortest_path(entire_path=True)
+        floor_height = 0.0 if self.floor_num is None else self.scene.get_floor_height(self.floor_num)
+        num_nodes = min(self.num_waypoints_vis, shortest_path.shape[0])
+        for i in range(num_nodes):
+            self.waypoints_vis[i].set_position(pos=np.array([shortest_path[i][0],
+                                                             shortest_path[i][1],
+                                                             floor_height]))
+        for i in range(num_nodes, self.num_waypoints_vis):
+            self.waypoints_vis[i].set_position(pos=np.array([0.0, 0.0, 100.0]))
+
     def step(self, action):
         """
-        apply robot's action and get state, reward, done and info, following openAI gym's convention
-
+        apply robot's action and get state, reward, done and info, following PpenAI gym's convention
         :param action: a list of control signals
-        :return: state: state, reward, done, info
+        :return: state, reward, done, info
         """
         self.current_step += 1
         self.robots[0].apply_action(action)
@@ -486,6 +558,7 @@ class NavigateEnv(BaseEnv):
         info = {}
         reward, info = self.get_reward(collision_links, action, info)
         done, info = self.get_termination(collision_links, info)
+        self.step_visualization()
 
         if done and self.automatic_reset:
             info['last_observation'] = state
@@ -493,52 +566,55 @@ class NavigateEnv(BaseEnv):
         return state, reward, done, info
 
     def reset_agent(self):
+        """
+        Reset the robot's joint configuration and base pose until no collision
+        """
         max_trials = 100
         for _ in range(max_trials):
             self.robots[0].robot_specific_reset()
             self.reset_initial_and_target_pos()
             if self.test_valid_position():
                 return
-        raise Exception("Failed to reset robot without collision")
+        print("WARNING: Failed to reset robot without collision")
 
     def reset_initial_and_target_pos(self):
+        """
+        Reset the robot's base pose
+        """
         self.robots[0].set_position(pos=self.initial_pos)
         self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(*self.initial_orn), 'wxyz'))
 
     def test_valid_position(self):
+        """
+        :return: if initialization is valid (collision-free)
+        """
+        self.robots[0].keep_still()
         collision_links = self.run_simulation()
         collision_links_flatten = [item for sublist in collision_links for item in sublist]
         return len(collision_links_flatten) == 0
 
-    def reset(self):
+    def reset_variables(self):
         """
-        Reset the agent to a collision-free start point
+        Reset bookkeeping variables for the next new episode
         """
-
         self.current_episode += 1
-        self.reset_agent()
-        state = self.get_state()
-
-        if self.reward_type == 'l2':
-            self.potential = self.get_l2_potential()
-        elif self.reward_type == 'dense':
-            self.potential = self.get_potential()
-
         self.current_step = 0
         self.collision_step = 0
-        self.kinematic_disturbance = 0.0
-        self.dynamic_disturbance_a = 0.0
-        self.dynamic_disturbance_b = 0.0
         self.path_length = 0.0
-        self.agent_trajectory = []
-        self.object_trajectory = []
-        self.interactive_objects_collided = set()
-        self.energy_cost = 0.0
 
-        # set position for visual objects
-        if self.visual_object_at_initial_target_pos:
-            self.initial_pos_vis_obj.set_position(self.initial_pos)
-            self.target_pos_vis_obj.set_position(self.target_pos)
+    def reset(self):
+        """
+        Reset episode
+        """
+        self.reset_agent()
+        self.simulator.sync()
+        state = self.get_state()
+        if self.reward_type == 'l2':
+            self.potential = self.get_l2_potential()
+        elif self.reward_type == 'geodesic':
+            self.potential = self.get_geodesic_potential()
+        self.reset_variables()
+        self.step_visualization()
 
         return state
 
@@ -555,6 +631,16 @@ class NavigateRandomEnv(NavigateEnv):
             random_height=False,
             device_idx=0,
     ):
+        """
+        :param config_file: config_file path
+        :param model_id: override model_id in config file
+        :param mode: headless or gui mode
+        :param action_timestep: environment executes action per action_timestep second
+        :param physics_timestep: physics timestep for pybullet
+        :param automatic_reset: whether to automatic reset after an episode finishes
+        :param random_height: whether to randomize height for target position (for reaching task)
+        :param device_idx: device_idx: which GPU to run the simulation and rendering on
+        """
         super(NavigateRandomEnv, self).__init__(config_file,
                                                 model_id=model_id,
                                                 mode=mode,
@@ -563,175 +649,122 @@ class NavigateRandomEnv(NavigateEnv):
                                                 automatic_reset=automatic_reset,
                                                 device_idx=device_idx)
         self.random_height = random_height
-        self.random_init_z_offset = self.config.get('random_init_z_offset', 0.1)
+
+        self.target_dist_min = self.config.get('target_dist_min', 1.0)
+        self.target_dist_max = self.config.get('target_dist_max', 10.0)
+
+        self.initial_pos_z_offset = self.config.get('initial_pos_z_offset', 0.1)
+        falling_distance_in_one_action_step = 0.5 * 9.8 * action_timestep * action_timestep
+        assert self.initial_pos_z_offset > falling_distance_in_one_action_step * 1.5,\
+            'initial_pos_z_offset for robot and objects should be larger than the falling distance in one action step'
 
     def reset_initial_and_target_pos(self):
-        floor, self.initial_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
+        """
+        Reset the robot's base to a random empty position on a random floor with a random orientation
+        The geodesic distance (or L2 distance if traversable map graph is not built)
+        has to be between [self.target_dist_min, self.target_dist_max]
+        """
+        _, self.initial_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
         max_trials = 100
         dist = 0.0
         for _ in range(max_trials):  # if initial and target positions are < 1 meter away from each other, reinitialize
             _, self.target_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
-            dist = l2_distance(self.initial_pos, self.target_pos)
-            if dist > 1.0:
+            if self.scene.build_graph:
+                _, dist = self.get_shortest_path(from_initial_pos=True)
+            else:
+                dist = l2_distance(self.initial_pos, self.target_pos)
+            if self.target_dist_min < dist < self.target_dist_max:
                 break
-        if dist < 1.0:
-            raise Exception("Failed to find initial and target pos that are >1m apart")
+        if not (self.target_dist_min < dist < self.target_dist_max):
+            print("WARNING: Failed to sample initial and target positions")
+
         self.robots[0].set_position(pos=[self.initial_pos[0],
                                          self.initial_pos[1],
-                                         self.initial_pos[2] + self.random_init_z_offset])
+                                         self.initial_pos[2] + self.initial_pos_z_offset])
         self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.random.uniform(0, np.pi * 2)), 'wxyz'))
 
     def reset(self):
+        """
+        Reset episode
+        """
         self.floor_num = self.scene.get_random_floor()
+        # reset "virtual floor" to the correct height
         self.scene.reset_floor(floor=self.floor_num, additional_elevation=0.02)
         state = super(NavigateRandomEnv, self).reset()
         return state
 
 
-class InteractiveGibsonNavigateEnv(NavigateRandomEnv):
+class NavigateRandomEnvSim2Real(NavigateRandomEnv):
     def __init__(self,
                  config_file,
                  model_id=None,
-                 collision_reward_weight=0.0,
                  mode='headless',
                  action_timestep=1 / 10.0,
                  physics_timestep=1 / 240.0,
                  device_idx=0,
                  automatic_reset=False,
+                 collision_reward_weight=0.0,
+                 track='static'
                  ):
-        super(InteractiveGibsonNavigateEnv, self).__init__(config_file,
-                                                           model_id=model_id,
-                                                           mode=mode,
-                                                           action_timestep=action_timestep,
-                                                           physics_timestep=physics_timestep,
-                                                           automatic_reset=automatic_reset,
-                                                           random_height=False,
-                                                           device_idx=device_idx)
+        super(NavigateRandomEnvSim2Real, self).__init__(config_file,
+                                                        model_id=model_id,
+                                                        mode=mode,
+                                                        action_timestep=action_timestep,
+                                                        physics_timestep=physics_timestep,
+                                                        automatic_reset=automatic_reset,
+                                                        random_height=False,
+                                                        device_idx=device_idx)
         self.collision_reward_weight = collision_reward_weight
+        assert track in ['static', 'interactive', 'dynamic'], 'unknown track'
+        self.track = track
 
-        self.replaced_objects = []
-        self.replaced_objects_pos = []
-        self.additional_objects = []
-        self.class_map = {}
+        if self.track == 'interactive':
+            self.interactive_objects_num_dups = 2
+            self.interactive_objects = self.load_interactive_objects()
+        elif self.track == 'dynamic':
+            self.num_dynamic_objects = 1
+            self.dynamic_objects = []
+            self.dynamic_objects_last_actions = []
+            for _ in range(self.num_dynamic_objects):
+                robot = Turtlebot(self.config)
+                self.simulator.import_robot(robot, class_id=1)
+                self.dynamic_objects.append(robot)
+                self.dynamic_objects_last_actions.append(robot.action_space.sample())
 
-        self.should_load_replaced_objects = self.config.get('should_load_replaced_objects', False)
-        self.should_load_additional_objects = self.config.get('should_load_additional_objects', False)
+            # dynamic objects will repeat their actions for 10 action timesteps
+            self.dynamic_objects_action_repeat = 10
 
-        self.build_class_map()
-        self.load_replaced_objects()
-        self.load_additional_objects()
-        self.interactive_objects = self.replaced_objects + self.additional_objects
-        self.new_potential = None
+    def load_interactive_objects(self):
+        """
+        Load interactive objects
+        :return: a list of interactive objects
+        """
+        interactive_objects = []
+        interactive_objects_path = [
+            'object_2eZY2JqYPQE.urdf',
+            'object_lGzQi2Pk5uC.urdf',
+            'object_ZU6u5fvE8Z1.urdf',
+            'object_H3ygj6efM8V.urdf',
+            'object_RcqC01G24pR.urdf'
+        ]
 
-        self.visualize_waypoints = True
-        if self.visualize_waypoints and self.mode == 'gui':
-            cyl_length = 0.2
-            self.waypoints_vis = [VisualMarker(visual_shape=p.GEOM_CYLINDER,
-                                               rgba_color=[0, 1, 0, 0.3],
-                                               radius=0.1,
-                                               length=cyl_length,
-                                               initial_offset=[0, 0, cyl_length / 2.0]) for _ in range(10)]
-            for waypoint in self.waypoints_vis:
-                waypoint.load()
-
-    def build_class_map(self):
-        # 0 and 1 are reserved for mesh and robot
-        init_class_id = 2
-
-        if self.should_load_replaced_objects:
-            self.replaced_object_classes = [
-                '03001627',  # chair
-                '04256520',  # couch
-                '04379243',  # table / desk
-                '00000001',  # door
-            ]
-            for item in self.replaced_object_classes:
-                self.class_map[item] = init_class_id
-                init_class_id += 1
-
-        if self.should_load_additional_objects:
-            self.additional_objects_path = [
-                'object_2eZY2JqYPQE.urdf',
-                'object_lGzQi2Pk5uC.urdf',
-                'object_ZU6u5fvE8Z1.urdf',
-                'object_H3ygj6efM8V.urdf',
-                'object_RcqC01G24pR.urdf'
-            ]
-
-            for item in self.additional_objects_path:
-                self.class_map[item] = init_class_id
-                init_class_id += 1
-
-        self.num_object_classes = init_class_id
-
-    def load_replaced_objects(self):
-        if not self.should_load_replaced_objects:
-            return
-        scene_path = os.path.join(gibson2.assets_path, 'dataset', self.scene.model_id)
-        urdf_files = [item for item in os.listdir(scene_path) if item[-4:] == 'urdf']
-        position_files = [item[:-4].replace('alignment_centered', 'pos') + 'txt' for item in urdf_files]
-
-        for urdf_file, position_file in zip(urdf_files, position_files):
-            with open(os.path.join(scene_path, position_file)) as f:
-                pos = np.array([float(item) for item in f.readlines()[0].strip().split()])
-                # filter out duplicate annotations for the same object
-                if len(self.replaced_objects_pos) == 0 or \
-                        np.min(np.linalg.norm(np.array(self.replaced_objects_pos) - pos, axis=1)) > 0.5:
-                    class_id = urdf_file.split('.')[0].split('_')[-1]
-                    obj = InteractiveObj(os.path.join(scene_path, urdf_file))
-                    self.simulator.import_object(obj, class_id=self.class_map[class_id])
-                    self.replaced_objects.append(obj)
-                    self.replaced_objects_pos.append(pos)
-
-    def load_additional_objects(self):
-        if not self.should_load_additional_objects:
-            return
-        num_dupicates = 2
-        for _ in range(num_dupicates):
-            for urdf_model in self.additional_objects_path:
+        for _ in range(self.interactive_objects_num_dups):
+            for urdf_model in interactive_objects_path:
                 obj = InteractiveObj(os.path.join(gibson2.assets_path, 'models/sample_urdfs', urdf_model))
-                self.simulator.import_object(obj, class_id=self.class_map[urdf_model])
-                self.additional_objects.append(obj)
+                self.simulator.import_object(obj, class_id=2)
+                interactive_objects.append(obj)
+        return interactive_objects
 
-    def global_to_local(self, pos):
-        return rotate_vector_3d(pos - self.robots[0].get_position(), *self.robots[0].get_rpy())
-
-    def get_additional_states(self):
-        target_pos_local = self.global_to_local(self.target_pos)
-        linear_velocity_local = rotate_vector_3d(self.robots[0].robot_body.velocity(),
-                                                 *self.robots[0].get_rpy())
-        angular_velocity_local = rotate_vector_3d(self.robots[0].robot_body.angular_velocity(),
-                                                  *self.robots[0].get_rpy())
-
-        source = self.robots[0].get_position()[:2]
-        target = self.target_pos[:2]
-        shortest_path, geodesic_dist = self.scene.get_shortest_path(self.floor_num, source, target)
-
-        robot_z = self.robots[0].get_position()[2]
-        if self.visualize_waypoints and self.mode == 'gui':
-            for i in range(10):
-                self.waypoints_vis[i].set_position(pos=np.array([shortest_path[i][0], shortest_path[i][1], robot_z]))
-        waypoints_local_xy = np.array([self.global_to_local(np.concatenate((waypoint, [robot_z])))[:2]
-                                       for waypoint in shortest_path]).flatten()
-        additional_states = np.concatenate((waypoints_local_xy,
-                                            target_pos_local,
-                                            linear_velocity_local,
-                                            angular_velocity_local))
-        # cache results for reward calculation
-        self.new_potential = geodesic_dist
-
-        assert len(additional_states) == self.additional_states_dim, 'additional states dimension mismatch'
-        return additional_states
-
-    def get_potential(self):
-        return self.new_potential
-
-    def reset_additional_objects(self):
-        for obj in self.additional_objects:
-            while True:
+    def reset_interactive_objects(self):
+        """
+        Reset the poses of interactive objects to have no collisions with the scene mesh
+        """
+        max_trials = 100
+        for obj in self.interactive_objects:
+            for _ in range(max_trials):
                 _, pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
-                obj.set_position_rotation([pos[0], pos[1], pos[2] + self.random_init_z_offset],
-                                          quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz'))
+                obj.set_position_orientation([pos[0], pos[1], pos[2] + self.initial_pos_z_offset],
+                                             quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz'))
                 has_collision = False
                 for _ in range(self.simulator_loop):
                     self.simulator_step()
@@ -741,567 +774,67 @@ class InteractiveGibsonNavigateEnv(NavigateRandomEnv):
                 if not has_collision:
                     break
 
-    def reset_replaced_objects(self):
-        for obj, pos in zip(self.replaced_objects, self.replaced_objects_pos):
-            obj.set_position_rotation([pos[0], pos[1], pos[2] + self.random_init_z_offset],
-                                      quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz'))
+    def reset_dynamic_objects(self):
+        """
+        Reset the poses of dynamic objects to have no collisions with the scene mesh
+        """
+        max_trials = 100
+        shortest_path, _ = self.get_shortest_path(entire_path=True)
+        floor_height = 0.0 if self.floor_num is None else self.scene.get_floor_height(self.floor_num)
+        for obj in self.dynamic_objects:
+            for _ in range(max_trials):
+                obj.robot_specific_reset()
+                pos = shortest_path[np.random.choice(shortest_path.shape[0])]
+                obj.set_position_orientation([pos[0], pos[1], floor_height + self.initial_pos_z_offset],
+                                             quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz'))
+                has_collision = False
+                for _ in range(self.simulator_loop):
+                    self.simulator_step()
+                    if len(p.getContactPoints(bodyA=obj.robot_ids[0])) > 0:
+                        has_collision = True
+                        break
+                if not has_collision:
+                    break
 
-    def reset(self):
-        self.floor_num = self.scene.get_random_floor()
-        self.scene.reset_floor(floor=self.floor_num, additional_elevation=0.05)
-        self.reset_replaced_objects()
-        self.reset_additional_objects()
-
-        self.new_potential = None
-        state = NavigateEnv.reset(self)
-        return state
-
-    def before_simulation(self):
-        robot_position = self.robots[0].get_position()
-        object_positions = [obj.get_position() for obj in self.interactive_objects]
-        return robot_position, object_positions
-
-    def after_simulation(self, cache, collision_links):
-        robot_position, object_positions = cache
-
-        collision_links_flatten = [item for sublist in collision_links for item in sublist]
-        if len(collision_links_flatten) > 0:
-            self.dynamic_disturbance_a += np.mean([
-                np.linalg.norm(
-                    np.sum([elem[9] * np.array(elem[7]) for elem in sublist], axis=0)  # sum of all forces
-                )
-                for sublist in collision_links])
-            collision_objects = set([col[2] for col in collision_links_flatten])
-            self.dynamic_disturbance_b += len(collision_objects)
-            self.interactive_objects_collided |= collision_objects
-
-        self.agent_trajectory.append(np.concatenate((self.robots[0].get_position(), self.robots[0].get_orientation())))
-        self.object_trajectory.append([np.concatenate((obj.get_position(), obj.get_orientation()))
-                                       for obj in self.interactive_objects])
-        self.path_length += np.linalg.norm(self.robots[0].get_position() - robot_position)
-        self.kinematic_disturbance += np.sum([
-            obj.mass * np.linalg.norm(np.array(obj.get_position()) - np.array(prev_pos))
-            for obj, prev_pos in zip(self.interactive_objects, object_positions)
-            if obj.body_id in self.interactive_objects_collided
-        ])
-
-
-class InteractiveNavigateEnv(NavigateEnv):
-    def __init__(self,
-                 config_file,
-                 mode='headless',
-                 action_timestep=1 / 10.0,
-                 physics_timestep=1 / 240.0,
-                 random_position=False,
-                 device_idx=0,
-                 automatic_reset=False,
-                 arena="simple_hl_ll"):
-        super(InteractiveNavigateEnv, self).__init__(config_file,
-                                                     mode=mode,
-                                                     action_timestep=action_timestep,
-                                                     physics_timestep=physics_timestep,
-                                                     automatic_reset=automatic_reset,
-                                                     device_idx=device_idx)
-
-        self.arena = arena
-        assert self.arena in [
-            "only_ll_obstacles",
-            "only_ll",
-            "simple_hl_ll",
-            "complex_hl_ll"
-        ], "Wrong arena"
-
-        self.floor = VisualMarker(visual_shape=p.GEOM_BOX, rgba_color=[0.643, 0.643, 0.788, 0.0],
-                                  half_extents=[20, 20, 0.02], initial_offset=[0, 0, -0.03])
-        self.floor.load()
-        self.floor.set_position([0, 0, 0])
-        self.simulator.import_object(self.floor, class_id=0)
-
-        self.door = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
-                                   scale=1.35)
-        self.simulator.import_interactive_object(self.door, class_id=2)
-        if self.arena == "only_ll" or self.arena == "only_ll_obstacles":
-            self.door.set_position_rotation([100.0, 100.0, -0.03], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
-        else:
-            self.door.set_position_rotation([0.0, 0.0, -0.03], quatToXYZW(euler2quat(0, 0, -np.pi / 2.0), 'wxyz'))
-        self.door_angle = self.config.get('door_angle', 90)
-        self.door_angle = (self.door_angle / 180.0) * np.pi
-        self.door_handle_link_id = 2
-        self.door_axis_link_id = 1
-        self.jr_end_effector_link_id = 33  # 'm1n6s200_end_effector'
-        self.random_position = random_position
-
-        if self.arena == "only_ll_obstacles":
-            self.box_poses = [
-                [[np.random.uniform(-4, 4), np.random.uniform(-4, -1), 1], [0, 0, 0, 1]],
-                [[np.random.uniform(-4, 4), np.random.uniform(-4, -1), 1], [0, 0, 0, 1]],
-                [[np.random.uniform(-4, 4), np.random.uniform(-4, -1), 1], [0, 0, 0, 1]],
-
-                [[np.random.uniform(-4, 4), np.random.uniform(1, 4), 1], [0, 0, 0, 1]],
-                [[np.random.uniform(-4, 4), np.random.uniform(1, 4), 1], [0, 0, 0, 1]],
-                [[np.random.uniform(-4, 4), np.random.uniform(1, 4), 1], [0, 0, 0, 1]],
-
-                [[np.random.uniform(-4, -1), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]],
-                [[np.random.uniform(-4, -1), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]],
-                [[np.random.uniform(-4, -1), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]],
-
-                [[np.random.uniform(1, 4), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]],
-                [[np.random.uniform(1, 4), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]],
-                [[np.random.uniform(1, 4), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]],
-            ]
-
-            self.walls = []
-            for box_pose in self.box_poses:
-                box = BoxShape(pos=box_pose[0], dim=[0.5, 0.5, 1])
-                self.simulator.import_object(box, class_id=3)
-                self.walls += [box]
-
-        elif self.arena == "only_ll":
-            self.wall_poses = [
-                [[0, -3, 1], [0, 0, 0, 1]],
-                [[0, 3, 1], [0, 0, 0, 1]],
-                [[-3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]],
-                [[3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]],
-            ]
-
-            self.walls = []
-            for wall_pose in self.wall_poses:
-                wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'),
-                                      scale=1)
-                self.simulator.import_object(wall, class_id=3)
-                wall.set_position_rotation(wall_pose[0], wall_pose[1])
-                self.walls += [wall]
-
-        elif self.arena == "simple_hl_ll":
-            self.wall_poses = [
-                [[0, -3, 1], [0, 0, 0, 1]],
-                [[0, 3, 1], [0, 0, 0, 1]],
-                [[-3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]],
-                [[3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]],
-                [[0, -7.8, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]],
-                [[0, 7.8, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]],
-            ]
-
-            self.walls = []
-            for wall_pose in self.wall_poses:
-                wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'),
-                                      scale=1)
-                self.simulator.import_object(wall, class_id=3)
-                wall.set_position_rotation(wall_pose[0], wall_pose[1])
-                self.walls += [wall]
-
-        elif self.arena == "complex_hl_ll":
-            self.wall_poses = [
-                [[0, -3, 1], [0, 0, 0, 1]],
-                [[0, 6, 1], [0, 0, 0, 1]],
-                [[-3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]],
-                [[3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]]
-            ]
-
-            self.half_wall_poses = [
-                [[1.3, 3, 1], [0, 0, 0, 1]],
-            ]
-
-            self.quarter_wall_poses = [
-                [[0.0, 7.68, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]],
-                [[0.0, 2.45, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]],
-            ]
-
-            self.walls = []
-            for wall_pose in self.wall_poses:
-                wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'),
-                                      scale=1)
-                self.simulator.import_object(wall, class_id=3)
-                wall.set_position_rotation(wall_pose[0], wall_pose[1])
-                self.walls += [wall]
-
-            for wall_pose in self.half_wall_poses:
-                wall = InteractiveObj(
-                    os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls_half.urdf'),
-                    scale=1)
-                self.simulator.import_object(wall, class_id=3)
-                wall.set_position_rotation(wall_pose[0], wall_pose[1])
-                self.walls += [wall]
-
-            for wall_pose in self.quarter_wall_poses:
-                wall = InteractiveObj(
-                    os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls_quarter.urdf'),
-                    scale=1)
-                self.simulator.import_object(wall, class_id=3)
-                wall.set_position_rotation(wall_pose[0], wall_pose[1])
-                self.walls += [wall]
-
-        # dense reward
-        self.stage = 0
-        self.prev_stage = self.stage
-        self.stage_get_to_door_handle = 0
-        self.stage_open_door = 1
-        self.stage_get_to_target_pos = 2
-
-        # attaching JR's arm to the door handle
-        self.door_handle_dist_thresh = 0.2
-        self.cid = None
-
-        # visualize subgoal
-        cyl_length = 3.0
-        self.subgoal_end_effector = VisualMarker(rgba_color=[0, 0, 0, 0.8], radius=0.06)
-        self.subgoal_end_effector.load()
-        self.subgoal_end_effector_base = VisualMarker(visual_shape=p.GEOM_CYLINDER,
-                                                      rgba_color=[1, 1, 0, 0.8],
-                                                      radius=0.05,
-                                                      length=cyl_length,
-                                                      initial_offset=[0, 0, cyl_length / 2])
-        self.subgoal_end_effector_base.load()
-
-        self.door_handle_vis = VisualMarker(rgba_color=[1, 0, 0, 0.0], radius=self.door_handle_dist_thresh)
-        self.door_handle_vis.load()
-
-        # TODO: move robot joint id and name mapping to robot_locomotors.py
-        self.id_to_name = {
-            0: {"name": "ground", "links": {-1: "base", 0: "ground"}},
-            1: {"name": "ground", "links": {-1: "base", 0: "ground"}},
-            2: {"name": "ground", "links": {-1: "base", 0: "ground"}},
-            3: {"name": "ground", "links": {-1: "base", 0: "ground"}},
-        }
-        self.id_to_name[self.door.body_id] = {"name": "door",
-                                              "links": {-1: "world", 0: "base", 1: "door_leaf", 2: "door_knob"}}
-        for i, wall in enumerate(self.walls):
-            self.id_to_name[wall.body_id] = {"name": "wall%d" % (i + 1), "links": {-1: "world", 0: "wall"}}
-        self.id_to_name[self.robots[0].robot_ids[0]] = {"name": "robot", "links": {
-            -1: "base",
-            0: "base_chassis",
-            1: "jr2_fixed_body (wrapper)",
-            2: "left wheel",
-            3: "right wheel",
-            4: "front_caster_pivot",
-            5: "front_caster_wheel",
-            6: "rear_caster_pivot",
-            7: "rear_caster_wheel",
-            8: "ext_imu_frame",
-            9: "rear_laser",
-            10: "front_laser",
-            11: "lower_velodyne_frame",
-            12: "occam",
-            13: "occam_omni_optical",
-            14: "upper_velodyne_frame",
-            15: "scan",
-            16: "gps_frame",
-            17: "pan",
-            18: "tilt",
-            19: "camera",
-            20: "camera_rgb_frame",
-            21: "camera_rgb_optical_frame",
-            22: "camera_depth_frame",
-            23: "camera_depth_optical_frame",
-            24: "eyes",
-            25: "right_arm_attach",
-            26: "m1n6s200_link_base",
-            27: "m1n6s200_link_1 (shoulder)",
-            28: "m1n6s200_link_2 (arm)",
-            29: "m1n6s200_link_3 (elbow)",
-            30: "m1n6s200_link_4 (forearm)",
-            31: "m1n6s200_link_5 (wrist)",
-            32: "m1n6s200_link_6 (hand)",
-            33: "end_effector",
-        }}
-        self.num_object_classes = 4
-
-    def set_subgoal(self, ideal_next_state):
-        obs_avg = (self.observation_normalizer['sensor'][1] + self.observation_normalizer['sensor'][0]) / 2.0
-        obs_mag = (self.observation_normalizer['sensor'][1] - self.observation_normalizer['sensor'][0]) / 2.0
-        ideal_next_state = (ideal_next_state * obs_mag) + obs_avg
-        self.subgoal_end_effector.set_position(ideal_next_state)
-        self.subgoal_end_effector_base.set_position([ideal_next_state[0], ideal_next_state[1], 0])
-
-    def set_subgoal_type(self, only_base=True):
-        if only_base:
-            # Make the marker for the end effector completely transparent
-            self.subgoal_end_effector.set_color([0, 0, 0, 0.0])
-        else:
-            self.subgoal_end_effector.set_color([0, 0, 0, 0.8])
-
-    def reset_interactive_objects(self):
-        # close the door
-        p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=0.0, targetVelocity=0.0)
-        if self.cid is not None:
-            p.removeConstraint(self.cid)
-            self.cid = None
-
-    def reset_initial_and_target_pos(self):
-        if self.arena == "only_ll" or self.arena == "only_ll_obstacles":
-            if self.random_position:
-                pos = [np.random.uniform(-0.5, 0.5), np.random.uniform(-0.5, 0.5), 0]
-            else:
-                pos = [0.0, 0.0, 0.0]
-        elif self.arena == "simple_hl_ll":
-            if self.random_position:
-                pos = [np.random.uniform(1, 2), np.random.uniform(-2, 2), 0]
-            else:
-                pos = [1.0, 0.0, 0.0]
-        elif self.arena == "complex_hl_ll":
-            if self.random_position:
-                pos = [np.random.uniform(-2, -1.7), np.random.uniform(4.5, 5), 0]
-            else:
-                pos = [-2, 4, 0.0]
-
-        # self.robots[0].set_position(pos=[pos[0], pos[1], pos[2] + 0.1])
-        self.robots[0].set_position(pos=[pos[0], pos[1], pos[2]])
-        self.initial_pos = pos
-
-        if self.arena == "only_ll" or self.arena == "only_ll_obstacles":
-            self.robots[0].set_orientation(
-                orn=quatToXYZW(euler2quat(0, 0, np.random.uniform(0, np.pi * 2)), 'wxyz')
-            )
-        elif self.arena == "simple_hl_ll":
-            if self.random_position:
-                self.robots[0].set_orientation(
-                    orn=quatToXYZW(euler2quat(0, 0, np.random.uniform(0, np.pi * 2)), 'wxyz')
-                )
-            else:
-                self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.pi), 'wxyz'))
-        elif self.arena == "complex_hl_ll":
-            if self.random_position:
-                self.robots[0].set_orientation(
-                    orn=quatToXYZW(euler2quat(0, 0, np.random.uniform(0, np.pi * 2)), 'wxyz')
-                )
-            else:
-                self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, 0), 'wxyz'))
-
-        # # wait for the base to fall down to the ground and for the arm to move to its initial position
-        # for _ in range(int(0.5 / self.physics_timestep)):
-        #     self.simulator_step()
-
-        if self.arena == "only_ll" or self.arena == "only_ll_obstacles":
-            self.target_pos = [-100, -100, 0]
-        elif self.arena == "simple_hl_ll" or self.arena == "complex_hl_ll":
-            if self.random_position:
-                self.target_pos = [np.random.uniform(-2, -1), np.random.uniform(-2, 2), 0.0]
-            else:
-                self.target_pos = np.array([-1.5, 0.0, 0.0])
-
-        self.door_handle_vis.set_position(pos=np.array(p.getLinkState(self.door.body_id, self.door_handle_link_id)[0]))
-
-    def reset(self):
-        self.reset_interactive_objects()
-        self.stage = 0
-        self.prev_stage = self.stage
-        return super(InteractiveNavigateEnv, self).reset()
-
-    # wrap the specified dimensions of the states to [-pi, pi]
-    def wrap_to_pi(self, states, indices):
-        states[indices] = states[indices] - 2.0 * np.pi * np.floor((states[indices] + np.pi) / (np.pi * 2))
-        return states
-
-    def get_state(self, collision_links=[]):
-        state = super(InteractiveNavigateEnv, self).get_state(collision_links)
-        # self.state_stats['sensor'].append(state['sensor'])
-        # self.state_stats['auxiliary_sensor'].append(state['auxiliary_sensor'])
-        if self.normalize_observation:
-            for key in state:
-                obs_min = self.observation_normalizer[key][0]
-                obs_max = self.observation_normalizer[key][1]
-                obs_avg = (self.observation_normalizer[key][1] + self.observation_normalizer[key][0]) / 2.0
-                obs_mag = (self.observation_normalizer[key][1] - self.observation_normalizer[key][0]) / 2.0
-                # clipped = np.clip(state[key], obs_min, obs_max)
-                # if np.sum(state[key] == clipped) / float(state[key].shape[0]) < 0.8:
-                #     print("WARNING: more than 20% of the observations are clipped for key: {}".format(key))
-                state[key] = (np.clip(state[key], obs_min, obs_max) - obs_avg) / obs_mag  # normalize to [-1, 1]
-        # self.state_stats['rgb'].append(state['rgb'])
-        # self.state_stats['depth'].append(state['depth'])
-        return state
-
-    def get_additional_states(self):
-        additional_states = self.robots[0].get_end_effector_position()
-        assert len(additional_states) == self.additional_states_dim, 'additional states dimension mismatch'
-        return additional_states
-
-    def get_auxiliary_sensor(self, collision_links=[]):
-        auxiliary_sensor = np.zeros(self.auxiliary_sensor_dim)
-        robot_state = self.robots[0].calc_state()
-        assert self.auxiliary_sensor_dim == 66
-        assert robot_state.shape[0] == 31
-
-        robot_state = self.wrap_to_pi(robot_state, np.arange(7, 28, 3))  # wrap wheel and arm joint pos to [-pi, pi]
-
-        end_effector_pos = self.robots[0].get_end_effector_position() - self.robots[0].get_position()
-        end_effector_pos = rotate_vector_3d(end_effector_pos, *self.robots[0].get_rpy())
-        auxiliary_sensor[:3] = self.robots[0].get_position()  # x, y, z
-        auxiliary_sensor[3:6] = end_effector_pos  # arm_x, arm_y_ arm_z (local)
-        auxiliary_sensor[6:11] = robot_state[1:6]  # vx, vy, vz, roll, pitch
-        auxiliary_sensor[11:46:5] = robot_state[7:28:3]  # pos for wheel 1, 2, arm joint 1, 2, 3, 4, 5
-        auxiliary_sensor[12:47:5] = robot_state[8:29:3]  # vel for wheel 1, 2, arm joint 1, 2, 3, 4, 5
-        auxiliary_sensor[13:48:5] = robot_state[9:30:3]  # trq for wheel 1, 2, arm joint 1, 2, 3, 4, 5
-        auxiliary_sensor[14:49:5] = np.cos(robot_state[7:28:3])  # cos(pos) for wheel 1, 2, arm joint 1, 2, 3, 4, 5
-        auxiliary_sensor[15:50:5] = np.sin(robot_state[7:28:3])  # sin(pos) for wheel 1, 2, arm joint 1, 2, 3, 4, 5
-        auxiliary_sensor[46:49] = robot_state[28:31]  # v_roll, v_pitch, v_yaw
-
-        roll, pitch, yaw = self.robots[0].get_rpy()
-        cos_yaw, sin_yaw = np.cos(yaw), np.sin(yaw)
-        door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
-        cos_door_angle, sin_door_angle = np.cos(door_angle), np.sin(door_angle)
-        has_door_handle_in_hand = 1.0 if self.stage == self.stage_open_door else -1.0
-        door_pos = np.array([0, 0, -0.02])
-        target_pos = self.target_pos
-        robot_pos = self.robots[0].get_position()
-        door_pos_local = rotate_vector_3d(door_pos - robot_pos, roll, pitch, yaw)
-        target_pos_local = rotate_vector_3d(target_pos - robot_pos, roll, pitch, yaw)
-        collision_links_flatten = [item for sublist in collision_links for item in sublist]
-        has_collision = 1.0 if len(collision_links_flatten) > 0 else -1.0
-
-        auxiliary_sensor[49:52] = np.array([yaw, cos_yaw, sin_yaw])
-        auxiliary_sensor[52:56] = np.array([door_angle, cos_door_angle, sin_door_angle, has_door_handle_in_hand])
-        auxiliary_sensor[56:59] = target_pos
-        auxiliary_sensor[59:62] = door_pos_local
-        auxiliary_sensor[62:65] = target_pos_local
-        auxiliary_sensor[65] = has_collision
-
-        return auxiliary_sensor
-
-    def filter_collision_links(self, collision_links):
-        collision_links = super(InteractiveNavigateEnv, self).filter_collision_links(collision_links)
-        # ignore collision between hand and door
-        collision_links = [[item for item in sublist
-                            if not (item[2] == self.door.body_id and item[3] in [32, 33])]
-                           for sublist in collision_links]
-        return collision_links
+    def step_dynamic_objects(self):
+        """
+        Apply actions to dynamic objects (default: temporally extended random walk)
+        """
+        if self.current_step % self.dynamic_objects_action_repeat == 0:
+            self.dynamic_objects_last_actions = [robot.action_space.sample() for robot in self.dynamic_objects]
+        for robot, action in zip(self.dynamic_objects, self.dynamic_objects_last_actions):
+            robot.apply_action(action)
 
     def step(self, action):
-        self.current_step += 1
+        """
+        Step dynamic objects as well
+        """
+        if self.track == 'dynamic':
+            self.step_dynamic_objects()
 
-        dist = np.linalg.norm(
-            np.array(p.getLinkState(self.door.body_id, self.door_handle_link_id)[0]) -
-            np.array(p.getLinkState(self.robots[0].robot_ids[0], self.jr_end_effector_link_id)[0])
-        )
-        # print('dist', dist)
+        return super(NavigateRandomEnvSim2Real, self).step(action)
 
-        self.prev_stage = self.stage
-        if self.stage == self.stage_get_to_door_handle and dist < self.door_handle_dist_thresh:
-            assert self.cid is None
-            self.cid = p.createConstraint(self.robots[0].robot_ids[0], self.jr_end_effector_link_id,
-                                          self.door.body_id, self.door_handle_link_id,
-                                          p.JOINT_POINT2POINT, [0, 0, 0],
-                                          [0, 0.0, 0], [0, 0, 0])
-            p.changeConstraint(self.cid, maxForce=500)
-            self.stage = self.stage_open_door
-            print("stage open_door")
+    def reset(self):
+        """
+        Reset episode
+        """
+        self.floor_num = self.scene.get_random_floor()
+        # reset "virtual floor" to the correct height
+        self.scene.reset_floor(floor=self.floor_num, additional_elevation=0.05)
 
-        if self.stage == self.stage_open_door and p.getJointState(self.door.body_id, 1)[0] > self.door_angle:
-            assert self.cid is not None
-            p.removeConstraint(self.cid)
-            self.cid = None
-            self.stage = self.stage_get_to_target_pos
-            print("stage get to target pos")
+        if self.track == 'interactive':
+            self.reset_interactive_objects()
 
-        door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
+        state = NavigateEnv.reset(self)
 
-        # door is pushed in the wrong direction, gradually reset it back to the neutral state
-        if door_angle < -0.01:
-            max_force = 10000
-            p.setJointMotorControl2(bodyUniqueId=self.door.body_id,
-                                    jointIndex=self.door_axis_link_id,
-                                    controlMode=p.POSITION_CONTROL,
-                                    targetPosition=0.0,
-                                    positionGain=1,
-                                    force=max_force)
-        # door is pushed in the correct direction
-        else:
-            # if the door has not been opened, overwrite the previous position control with a trivial one
-            if self.stage != self.stage_get_to_target_pos:
-                max_force = 0
-                p.setJointMotorControl2(bodyUniqueId=self.door.body_id,
-                                        jointIndex=self.door_axis_link_id,
-                                        controlMode=p.POSITION_CONTROL,
-                                        targetPosition=door_angle,
-                                        positionGain=0,
-                                        velocityGain=0,
-                                        force=max_force)
-
-            # if the door has already been opened, try to set velocity to 0 so that it's more difficult for
-            # the agent to move the door on its way to the target position
-            else:
-                max_force = 100
-                p.setJointMotorControl2(bodyUniqueId=self.door.body_id,
-                                        jointIndex=self.door_axis_link_id,
-                                        controlMode=p.POSITION_CONTROL,
-                                        targetPosition=door_angle,
-                                        targetVelocity=0.0,
-                                        positionGain=0,
-                                        velocityGain=1,
-                                        force=max_force)
-
-        return super(InteractiveNavigateEnv, self).step(action)
-
-    def get_potential(self):
-        door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
-        door_handle_pos = p.getLinkState(self.door.body_id, self.door_handle_link_id)[0]
-        if self.stage == self.stage_get_to_door_handle:
-            potential = l2_distance(door_handle_pos, self.robots[0].get_end_effector_position())
-        elif self.stage == self.stage_open_door:
-            potential = -door_angle
-        elif self.stage == self.stage_get_to_target_pos:
-            potential = l2_distance(self.target_pos, self.get_position_of_interest())
-        return potential
-
-    def get_reward(self, collision_links=[], action=None, info={}):
-        reward = 0.0
-
-        if self.reward_type == 'dense':
-            if self.stage != self.prev_stage:
-                # advance to the next stage
-                self.potential = self.get_potential()
-                reward += self.success_reward / 2.0
-            else:
-                new_potential = self.get_potential()
-                potential_reward = self.potential - new_potential
-                reward += potential_reward * self.potential_reward_weight  # |potential_reward| ~= 0.1 per step
-                self.potential = new_potential
-        elif self.reward_type == "l2":
-            new_potential = self.get_l2_potential()
-            potential_reward = self.potential - new_potential
-            reward += potential_reward * self.potential_reward_weight
-            self.potential = new_potential
-        elif self.reward_type == 'stage_sparse':
-            if self.stage != self.prev_stage:
-                reward += self.success_reward / 2.0
-
-        base_moving = np.any(np.abs(action[:2]) >= 0.01)
-        arm_moving = np.any(np.abs(action[2:]) >= 0.01)
-        electricity_reward = float(base_moving) + float(arm_moving)
-        self.energy_cost += electricity_reward
-        reward += electricity_reward * self.electricity_reward_weight
-
-        collision_links_flatten = [item for sublist in collision_links for item in sublist]
-        collision_reward = float(len(collision_links_flatten) > 0)
-        self.collision_step += int(len(collision_links_flatten) > 0)
-        reward += collision_reward * self.collision_reward_weight  # |collision_reward| ~= 1.0 per step if collision
-        info['collision_reward'] = collision_reward * self.collision_reward_weight  # expose collision reward to info
-        # self.reward_stats.append(np.abs(collision_reward * self.collision_reward_weight))
-
-        # goal reached
-        if l2_distance(self.target_pos, self.get_position_of_interest()) < self.dist_tol:
-            reward += self.success_reward  # |success_reward| = 10.0
-
-        # death penalty
-        floor_height = 0.0 if self.floor_num is None else self.scene.get_floor_height(self.floor_num)
-        if self.robots[0].get_position()[2] > floor_height + self.death_z_thresh:
-            reward -= self.success_reward * 1.0
-
-        # push door the wrong way
-        # door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
-        # if door_angle > (10.0 / 180.0 * np.pi):
-        #     reward -= self.success_reward * 1.0
-
-        # print("get_reward (stage %d): %f" % (self.stage, reward))
-        return reward, info
+        if self.track == 'dynamic':
+            self.reset_dynamic_objects()
+            state = self.get_state()
 
+        return state
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--robot',
-                        '-r',
-                        choices=['turtlebot', 'jr'],
-                        required=True,
-                        help='which robot [turtlebot|jr]')
     parser.add_argument(
         '--config',
         '-c',
@@ -1312,50 +845,31 @@ if __name__ == '__main__':
                         default='headless',
                         help='which mode for simulation (default: headless)')
     parser.add_argument('--env_type',
-                        choices=['deterministic', 'random', 'interactive', 'ig'],
+                        choices=['deterministic', 'random', 'sim2real'],
                         default='deterministic',
-                        help='which environment type (deterministic | random | interactive | ig')
+                        help='which environment type (deterministic | random | sim2real)')
+    parser.add_argument('--sim2real_track',
+                        choices=['static', 'interactive', 'dynamic'],
+                        default='static',
+                        help='which sim2real track (static | interactive | dynamic)')
     args = parser.parse_args()
 
-    if args.robot == 'turtlebot':
-        config_filename = os.path.join(os.path.dirname(gibson2.__file__),
-                                       '../examples/configs/turtlebot_p2p_nav_discrete.yaml') \
-            if args.config is None else args.config
-    elif args.robot == 'jr':
-        config_filename = os.path.join(os.path.dirname(gibson2.__file__),
-                                       '../examples/configs/jr2_reaching.yaml') \
-            if args.config is None else args.config
     if args.env_type == 'deterministic':
-        nav_env = NavigateEnv(config_file=config_filename,
+        nav_env = NavigateEnv(config_file=args.config,
                               mode=args.mode,
                               action_timestep=1.0 / 10.0,
                               physics_timestep=1.0 / 40.0)
     elif args.env_type == 'random':
-        nav_env = NavigateRandomEnv(config_file=config_filename,
+        nav_env = NavigateRandomEnv(config_file=args.config,
                                     mode=args.mode,
                                     action_timestep=1.0 / 10.0,
                                     physics_timestep=1.0 / 40.0)
-    elif args.env_type == 'ig':
-        nav_env = InteractiveGibsonNavigateEnv(config_file=config_filename,
-                                               mode=args.mode,
-                                               action_timestep=1.0 / 10.0,
-                                               physics_timestep=1 / 40.0)
-    else:
-        nav_env = InteractiveNavigateEnv(config_file=config_filename,
-                                         mode=args.mode,
-                                         action_timestep=1.0 / 10.0,
-                                         random_position=False,
-                                         physics_timestep=1.0 / 40.0,
-                                         arena='only_ll_obstacles')
-
-    # # Sample code: manually set action using slide bar UI
-    # debug_params = [
-    #     p.addUserDebugParameter('link1', -1.0, 1.0, -0.5),
-    #     p.addUserDebugParameter('link2', -1.0, 1.0, 0.5),
-    #     p.addUserDebugParameter('link3', -1.0, 1.0, 0.5),
-    #     p.addUserDebugParameter('link4', -1.0, 1.0, 0.5),
-    #     p.addUserDebugParameter('link5', -1.0, 1.0, 0.0),
-    # ]
+    elif args.env_type == 'sim2real':
+        nav_env = NavigateRandomEnvSim2Real(config_file=args.config,
+                                            mode=args.mode,
+                                            action_timestep=1.0 / 10.0,
+                                            physics_timestep=1.0 / 40.0,
+                                            track=args.sim2real_track)
 
     for episode in range(100):
         print('Episode: {}'.format(episode))
@@ -1363,18 +877,6 @@ if __name__ == '__main__':
         nav_env.reset()
         for step in range(50):  # 500 steps, 50s world time
             action = nav_env.action_space.sample()
-            # action[:] = 0.5
-            # action[:] = -1.0
-            # action[:] = -1.0 / 3
-            # if nav_env.stage == 0:
-            #     action[:2] = 0.5
-            # elif nav_env.stage == 1:
-            #     action[:2] = -0.5
-
-            # action = np.zeros(nav_env.action_space.shape)
-            # debug_param_values = [p.readUserDebugParameter(debug_param) for debug_param in debug_params]
-            # action[2:] = np.array(debug_param_values)
-
             state, reward, done, _ = nav_env.step(action)
             # print('reward', reward)
             if done:
diff --git a/gibson2/utils/utils.py b/gibson2/utils/utils.py
index eb9f04d6d..5d26be0df 100644
--- a/gibson2/utils/utils.py
+++ b/gibson2/utils/utils.py
@@ -1,6 +1,6 @@
 import numpy as np
 import yaml
-
+from scipy.spatial.transform import Rotation as R
 
 # File I/O related
 def parse_config(config):
@@ -8,19 +8,29 @@ def parse_config(config):
         config_data = yaml.load(f, Loader=yaml.FullLoader)
     return config_data
 
-
 # Geometry related
 def rotate_vector_3d(v, r, p, y):
-    """Rotates vector by roll, pitch and yaw counterclockwise"""
-    rot_x = np.array([[1, 0, 0], [0, np.cos(-r), -np.sin(-r)], [0, np.sin(-r), np.cos(-r)]])
-    rot_y = np.array([[np.cos(-p), 0, np.sin(-p)], [0, 1, 0], [-np.sin(-p), 0, np.cos(-p)]])
-    rot_z = np.array([[np.cos(-y), -np.sin(-y), 0], [np.sin(-y), np.cos(-y), 0], [0, 0, 1]])
-    return np.dot(rot_x, np.dot(rot_y, np.dot(rot_z, v)))
+    """Rotates 3d vector by roll, pitch and yaw counterclockwise"""
+    local_to_global = R.from_euler('xyz', [r, p, y]).as_dcm()
+    global_to_local = local_to_global.T
+    return np.dot(global_to_local, v)
 
+def rotate_vector_2d(v, yaw):
+    """Rotates 2d vector by yaw counterclockwise"""
+    local_to_global = R.from_euler('z', yaw).as_dcm()
+    global_to_local = local_to_global.T
+    global_to_local = global_to_local[:2, :2]
+    if len(v.shape) == 1:
+        return np.dot(global_to_local, v)
+    elif len(v.shape) == 2:
+        return np.dot(global_to_local, v.T).T
+    else:
+        print('Incorrect input shape for rotate_vector_2d', v.shape)
+        return v
 
 def l2_distance(v1, v2):
     """Returns the L2 distance between vector v1 and v2."""
-    return np.linalg.norm(v1 - v2)
+    return np.linalg.norm(np.array(v1) - np.array(v2))
 
 
 def quatFromXYZW(xyzw, seq):
diff --git a/setup.py b/setup.py
index 1d27a8115..b549f5d1c 100644
--- a/setup.py
+++ b/setup.py
@@ -110,7 +110,7 @@ setup(
             'gym==0.12',
             'Pillow==5.4.1',
             'PyYAML==5.3',
-            'pybullet==2.4.1',
+            'pybullet==2.4.9',
             'transforms3d==0.3.1',
             'tqdm==4.19.9',
             'Pillow==5.4.1',