From fa759cd6cac945405d2c43595aeb94de59d39290 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Thu, 24 Oct 2019 20:48:54 -0700
Subject: [PATCH 01/58] s2r p2p turtlebot

---
 .../configs/turtlebot_interactive_nav.yaml    |   2 +-
 gibson2/core/physics/scene.py                 |   6 +-
 gibson2/envs/locomotor_env.py                 | 404 ++++++++++++++----
 gibson2/utils/utils.py                        |  23 +-
 4 files changed, 351 insertions(+), 84 deletions(-)

diff --git a/examples/configs/turtlebot_interactive_nav.yaml b/examples/configs/turtlebot_interactive_nav.yaml
index c3a6c7102..f20625b95 100644
--- a/examples/configs/turtlebot_interactive_nav.yaml
+++ b/examples/configs/turtlebot_interactive_nav.yaml
@@ -47,7 +47,7 @@ dist_tol: 0.333  # body width
 max_step: 500
 
 # sensor
-output: [sensor, depth_seg]
+output: [sensor, depth]
 resolution: 68
 fov: 120
 
diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py
index 654f117d9..df7a7fd88 100644
--- a/gibson2/core/physics/scene.py
+++ b/gibson2/core/physics/scene.py
@@ -140,15 +140,15 @@ class BuildingScene(Scene):
                 print('floors', self.floors)
             for f in range(len(self.floors)):
                 trav_map = Image.open(os.path.join(get_model_path(self.model_id), 'floor_trav_{}.png'.format(f)))
-                obstacle_map = Image.open(os.path.join(get_model_path(self.model_id), 'floor_{}.png'.format(f)))
+                # obstacle_map = Image.open(os.path.join(get_model_path(self.model_id), 'floor_{}.png'.format(f)))
                 if self.trav_map_original_size is None:
                     width, height = trav_map.size
                     assert width == height, 'trav map is not a square'
                     self.trav_map_original_size = height
                     self.trav_map_size = int(self.trav_map_original_size * self.trav_map_default_resolution / self.trav_map_resolution)
                 trav_map = np.array(trav_map.resize((self.trav_map_size, self.trav_map_size)))
-                obstacle_map = np.array(obstacle_map.resize((self.trav_map_size, self.trav_map_size)))
-                trav_map[obstacle_map == 0] = 0
+                # obstacle_map = np.array(obstacle_map.resize((self.trav_map_size, self.trav_map_size)))
+                # trav_map[obstacle_map == 0] = 0
                 trav_map = cv2.erode(trav_map, np.ones((self.trav_map_erosion, self.trav_map_erosion)))
 
                 if self.build_graph:
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 72a41cdab..d1f000284 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -1,6 +1,6 @@
 from gibson2.core.physics.interactive_objects import VisualObject, InteractiveObj, BoxShape
 import gibson2
-from gibson2.utils.utils import parse_config, rotate_vector_3d, l2_distance, quatToXYZW
+from gibson2.utils.utils import parse_config, rotate_vector_3d, rotate_vector_2d, l2_distance, quatToXYZW
 from gibson2.envs.base_env import BaseEnv
 from transforms3d.euler import euler2quat
 from collections import OrderedDict
@@ -18,29 +18,30 @@ from IPython import embed
 import cv2
 import time
 import collections
-
+import matplotlib.pyplot as plt
 
 # define navigation environments following Anderson, Peter, et al. 'On evaluation of embodied navigation agents.'
 # arXiv preprint arXiv:1807.06757 (2018).
 # https://arxiv.org/pdf/1807.06757.pdf
 
 Episode = collections.namedtuple('Episode',
-                                 ['env',
-                                  'agent',
-                                  'initial_pos',
-                                  'target_pos',
-                                  'geodesic_distance',
-                                  'shortest_path',
-                                  'agent_trajectory',
-                                  'object_files',
-                                  'object_trajectory',
-                                  'success',
-                                  'path_efficiency',
-                                  'kinematic_disturbance',
-                                  'dynamic_disturbance_a',
-                                  'dynamic_disturbance_b',
-                                  'collision_step',
-                                  ])
+                                 [
+                                     # 'env',
+                                     # 'agent',
+                                     # 'initial_pos',
+                                     # 'target_pos',
+                                     # 'geodesic_distance',
+                                     # 'shortest_path',
+                                     # 'agent_trajectory',
+                                     # 'object_files',
+                                     # 'object_trajectory',
+                                     'success',
+                                     # 'path_efficiency',
+                                     # 'kinematic_disturbance',
+                                     # 'dynamic_disturbance_a',
+                                     # 'dynamic_disturbance_b',
+                                     # 'collision_step',
+                                 ])
 
 
 class NavigateEnv(BaseEnv):
@@ -424,34 +425,34 @@ class NavigateEnv(BaseEnv):
             #                                   'collision_step',
             #                                   ])
 
-            shortest_path, geodesic_distance = self.scene.get_shortest_path(self.floor_num,
-                                                                            self.initial_pos[:2],
-                                                                            self.target_pos[:2],
-                                                                            entire_path=True)
-            floor_height = self.scene.get_floor_height(self.floor_num)
-            shortest_path = np.array([np.array([path[0], path[1], floor_height]) for path in shortest_path])
-            min_kin_dist = self.path_length * self.robots[0].robot_mass
-            kinematic_disturbance = min_kin_dist / (min_kin_dist + self.kinematic_disturbance)
-            min_dyn_dist = self.current_step * self.robots[0].robot_mass * 9.8
-            dynamic_disturbance_a = min_dyn_dist / (min_dyn_dist + self.dynamic_disturbance_a)
-            dynamic_disturbance_b = self.current_step / float(self.current_step + self.dynamic_disturbance_b)
-            object_files = [obj.filename for obj in self.interactive_objects]
+            # shortest_path, geodesic_distance = self.scene.get_shortest_path(self.floor_num,
+            #                                                                 self.initial_pos[:2],
+            #                                                                 self.target_pos[:2],
+            #                                                                 entire_path=True)
+            # floor_height = self.scene.get_floor_height(self.floor_num)
+            # shortest_path = np.array([np.array([path[0], path[1], floor_height]) for path in shortest_path])
+            # min_kin_dist = self.path_length * self.robots[0].robot_mass
+            # kinematic_disturbance = min_kin_dist / (min_kin_dist + self.kinematic_disturbance)
+            # min_dyn_dist = self.current_step * self.robots[0].robot_mass * 9.8
+            # dynamic_disturbance_a = min_dyn_dist / (min_dyn_dist + self.dynamic_disturbance_a)
+            # dynamic_disturbance_b = self.current_step / float(self.current_step + self.dynamic_disturbance_b)
+            # object_files = [obj.filename for obj in self.interactive_objects]
             episode = Episode(
-                env=self.scene.model_id,
-                agent=self.robots[0].model_file,
-                initial_pos=self.initial_pos,
-                target_pos=self.target_pos,
-                geodesic_distance=geodesic_distance,
-                shortest_path=shortest_path,
-                agent_trajectory=np.array(self.agent_trajectory),
-                object_files=object_files,
-                object_trajectory=np.array(self.object_trajectory),
+                # env=self.scene.model_id,
+                # agent=self.robots[0].model_file,
+                # initial_pos=self.initial_pos,
+                # target_pos=self.target_pos,
+                # geodesic_distance=geodesic_distance,
+                # shortest_path=shortest_path,
+                # agent_trajectory=np.array(self.agent_trajectory),
+                # object_files=object_files,
+                # object_trajectory=np.array(self.object_trajectory),
                 success=float(info['success']),
-                path_efficiency=min(1.0, geodesic_distance / self.path_length),
-                kinematic_disturbance=kinematic_disturbance,
-                dynamic_disturbance_a=dynamic_disturbance_a,
-                dynamic_disturbance_b=dynamic_disturbance_b,
-                collision_step=self.collision_step,
+                # path_efficiency=min(1.0, geodesic_distance / self.path_length),
+                # kinematic_disturbance=kinematic_disturbance,
+                # dynamic_disturbance_a=dynamic_disturbance_a,
+                # dynamic_disturbance_b=dynamic_disturbance_b,
+                # collision_step=self.collision_step,
             )
             self.stored_episodes.append(episode)
 
@@ -501,9 +502,13 @@ class NavigateEnv(BaseEnv):
         collision_links_flatten = [item for sublist in collision_links for item in sublist]
         return len(collision_links_flatten) == 0
 
+    def after_reset_agent(self):
+        return
+
     def reset(self):
         self.current_episode += 1
         self.reset_agent()
+        self.after_reset_agent()
         state = self.get_state()
 
         if self.reward_type == 'l2':
@@ -772,6 +777,268 @@ class InteractiveGibsonNavigateEnv(NavigateRandomEnv):
         ])
 
 
+class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
+    def __init__(self,
+                 config_file,
+                 model_id=None,
+                 collision_reward_weight=0.0,
+                 mode='headless',
+                 action_timestep=1 / 10.0,
+                 physics_timestep=1 / 240.0,
+                 device_idx=0,
+                 automatic_reset=False,
+                 ):
+        super(InteractiveGibsonNavigateSim2RealEnv, self).__init__(config_file,
+                                                                   model_id=model_id,
+                                                                   mode=mode,
+                                                                   action_timestep=action_timestep,
+                                                                   physics_timestep=physics_timestep,
+                                                                   automatic_reset=automatic_reset,
+                                                                   random_height=False,
+                                                                   device_idx=device_idx)
+        self.collision_reward_weight = collision_reward_weight
+
+        self.replaced_objects = []
+        self.replaced_objects_pos = []
+        self.additional_objects = []
+        self.class_map = {}
+
+        self.should_load_replaced_objects = self.config.get('should_load_replaced_objects', False)
+        self.should_load_additional_objects = self.config.get('should_load_additional_objects', False)
+
+        self.build_class_map()
+        self.load_replaced_objects()
+        self.load_additional_objects()
+        self.interactive_objects = self.replaced_objects + self.additional_objects
+        self.new_potential = None
+
+        self.gt_pos = []
+        self.ns_pos = []
+
+        # self.eyes_vis = VisualObject(rgba_color=[1, 0, 0, 1.0], radius=0.03)
+        # self.eyes_vis.load()
+        resolution = self.config.get('resolution', 64)
+        width = resolution
+        height = int(width * (480.0 / 640.0))
+        self.observation_space.spaces['depth'] = gym.spaces.Box(low=-np.inf,
+                                                                high=np.inf,
+                                                                shape=(height, width, 1),
+                                                                dtype=np.float32)
+        self.visualize_waypoints = True
+        if self.visualize_waypoints and self.mode == 'gui':
+            cyl_length = 0.2
+            self.waypoints_vis = [VisualObject(visual_shape=p.GEOM_CYLINDER,
+                                               rgba_color=[0, 1, 0, 0.3],
+                                               radius=0.1,
+                                               length=cyl_length,
+                                               initial_offset=[0, 0, cyl_length / 2.0]) for _ in range(1000)]
+            for waypoint in self.waypoints_vis:
+                waypoint.load()
+
+    def build_class_map(self):
+        # 0 and 1 are reserved for mesh and robot
+        init_class_id = 2
+
+        if self.should_load_replaced_objects:
+            self.replaced_object_classes = [
+                '03001627',  # chair
+                '04256520',  # couch
+                '04379243',  # table / desk
+                '00000001',  # door
+            ]
+            for item in self.replaced_object_classes:
+                self.class_map[item] = init_class_id
+                init_class_id += 1
+
+        if self.should_load_additional_objects:
+            self.additional_objects_path = [
+                'object_2eZY2JqYPQE.urdf',
+                'object_lGzQi2Pk5uC.urdf',
+                'object_ZU6u5fvE8Z1.urdf',
+                'object_H3ygj6efM8V.urdf',
+                'object_RcqC01G24pR.urdf'
+            ]
+
+            for item in self.additional_objects_path:
+                self.class_map[item] = init_class_id
+                init_class_id += 1
+
+        self.num_object_classes = init_class_id
+
+    def load_replaced_objects(self):
+        if not self.should_load_replaced_objects:
+            return
+        scene_path = os.path.join(gibson2.assets_path, 'dataset', self.scene.model_id)
+        urdf_files = [item for item in os.listdir(scene_path) if item[-4:] == 'urdf']
+        position_files = [item[:-4].replace('alignment_centered', 'pos') + 'txt' for item in urdf_files]
+
+        for urdf_file, position_file in zip(urdf_files, position_files):
+            with open(os.path.join(scene_path, position_file)) as f:
+                pos = np.array([float(item) for item in f.readlines()[0].strip().split()])
+                # filter out duplicate annotations for the same object
+                if len(self.replaced_objects_pos) == 0 or \
+                        np.min(np.linalg.norm(np.array(self.replaced_objects_pos) - pos, axis=1)) > 0.5:
+                    class_id = urdf_file.split('.')[0].split('_')[-1]
+                    obj = InteractiveObj(os.path.join(scene_path, urdf_file))
+                    self.simulator.import_object(obj, class_id=self.class_map[class_id])
+                    self.replaced_objects.append(obj)
+                    self.replaced_objects_pos.append(pos)
+
+    def load_additional_objects(self):
+        if not self.should_load_additional_objects:
+            return
+        num_dupicates = 2
+        for _ in range(num_dupicates):
+            for urdf_model in self.additional_objects_path:
+                obj = InteractiveObj(os.path.join(gibson2.assets_path, 'models/sample_urdfs', urdf_model))
+                self.simulator.import_object(obj, class_id=self.class_map[urdf_model])
+                self.additional_objects.append(obj)
+
+    def global_to_local(self, pos, cur_pos, cur_rot):
+        return rotate_vector_3d(pos - cur_pos, *cur_rot)
+
+    def get_additional_states(self):
+        pos_noise = 0.0
+        cur_pos = self.robots[0].get_position()
+        cur_pos[:2] += np.random.normal(0, pos_noise, 2)
+        # self.gt_pos.append(self.robots[0].get_position())
+        # self.ns_pos.append(cur_pos)
+
+        rot_noise = 0.0 / 180.0 * np.pi
+        cur_rot = self.robots[0].get_rpy()
+        cur_rot = (cur_rot[0], cur_rot[1], cur_rot[2] + np.random.normal(0, rot_noise))
+
+        target_pos_local = self.global_to_local(self.target_pos, cur_pos, cur_rot)[:2]
+        linear_velocity_local = rotate_vector_3d(self.robots[0].robot_body.velocity(), *cur_rot)[:2]
+        angular_velocity_local = rotate_vector_3d(self.robots[0].robot_body.angular_velocity(), *cur_rot)[:2]
+
+        gt_pos = self.robots[0].get_position()[:2]
+        source = gt_pos
+        target = self.target_pos[:2]
+        _, geodesic_dist = self.scene.get_shortest_path(self.floor_num, source, target)
+        # geodesic_dist = 0.0
+
+        robot_z = self.robots[0].get_position()[2]
+        if self.visualize_waypoints and self.mode == 'gui':
+            for i in range(1000):
+                self.waypoints_vis[i].set_position(pos=np.array([0.0, 0.0, 0.0]))
+            for i in range(min(1000, self.shortest_path.shape[0])):
+                self.waypoints_vis[i].set_position(pos=np.array([self.shortest_path[i][0],
+                                                                 self.shortest_path[i][1],
+                                                                 robot_z]))
+        # self.eyes_vis.set_position(pos=self.robots[0].eyes.get_position())
+
+        closest_idx = np.argmin(np.linalg.norm(cur_pos[:2] - self.shortest_path, axis=1))
+        shortest_path = self.shortest_path[closest_idx:closest_idx + self.scene.num_waypoints]
+        num_remaining_waypoints = self.scene.num_waypoints - shortest_path.shape[0]
+        if num_remaining_waypoints > 0:
+            remaining_waypoints = np.tile(self.target_pos[:2], (num_remaining_waypoints, 1))
+            shortest_path = np.concatenate((shortest_path, remaining_waypoints), axis=0)
+
+        shortest_path = np.concatenate((shortest_path, robot_z * np.ones((shortest_path.shape[0], 1))), axis=1)
+
+        waypoints_local_xy = np.array([self.global_to_local(waypoint, cur_pos, cur_rot)[:2]
+                                       for waypoint in shortest_path]).flatten()
+        additional_states = np.concatenate((waypoints_local_xy,
+                                            target_pos_local,
+                                            linear_velocity_local,
+                                            angular_velocity_local))
+        # cache results for reward calculation
+        self.new_potential = geodesic_dist
+        assert len(additional_states) == self.additional_states_dim, 'additional states dimension mismatch'
+        return additional_states
+
+    def get_state(self, collision_links=[]):
+        state = super(InteractiveGibsonNavigateSim2RealEnv, self).get_state(collision_links)
+        depth = state['depth']
+        width = depth.shape[0]
+        height = int(width * (480.0 / 640.0))
+        half_diff = int((width - height) / 2)
+        depth = depth[half_diff:half_diff+height, :]
+        # cv2.imshow('depth', depth)
+        depth[depth < 0.6] = -1.0
+        depth[depth > 4.0] = -1.0
+        state['depth'] = depth
+        return state
+
+    def get_potential(self):
+        return self.new_potential
+
+    def reset_additional_objects(self):
+        for obj in self.additional_objects:
+            while True:
+                _, pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
+                obj.set_position_rotation([pos[0], pos[1], pos[2] + self.random_init_z_offset],
+                                          quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz'))
+                has_collision = False
+                for _ in range(self.simulator_loop):
+                    self.simulator_step()
+                    if len(p.getContactPoints(bodyA=obj.body_id)) > 0:
+                        has_collision = True
+                        break
+                if not has_collision:
+                    break
+
+    def reset_replaced_objects(self):
+        for obj, pos in zip(self.replaced_objects, self.replaced_objects_pos):
+            obj.set_position_rotation([pos[0], pos[1], pos[2] + self.random_init_z_offset],
+                                      quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz'))
+
+    def reset(self):
+        self.floor_num = self.scene.get_random_floor()
+        self.scene.reset_floor(floor=self.floor_num, additional_elevation=0.05)
+        self.reset_replaced_objects()
+        self.reset_additional_objects()
+        self.new_potential = None
+        # if len(self.gt_pos) > 0:
+        #     self.gt_pos = np.array(self.gt_pos)
+        #     self.ns_pos = np.array(self.ns_pos)
+        #     plt.figure()
+        #     plt.scatter(self.gt_pos[:, 0], self.gt_pos[:, 1], c='r')
+        #     plt.scatter(self.ns_pos[:, 0], self.ns_pos[:, 1], c='b')
+        #     plt.show()
+        #     self.gt_pos = []
+        #     self.ns_pos = []
+
+        state = NavigateEnv.reset(self)
+        return state
+
+    def after_reset_agent(self):
+        source = self.robots[0].get_position()[:2]
+        target = self.target_pos[:2]
+        shortest_path, _ = self.scene.get_shortest_path(self.floor_num, source, target, entire_path=True)
+        self.shortest_path = shortest_path
+
+    # def before_simulation(self):
+    #     robot_position = self.robots[0].get_position()
+    #     object_positions = [obj.get_position() for obj in self.interactive_objects]
+    #     return robot_position, object_positions
+    #
+    # def after_simulation(self, cache, collision_links):
+    #     robot_position, object_positions = cache
+    #
+    #     collision_links_flatten = [item for sublist in collision_links for item in sublist]
+    #     if len(collision_links_flatten) > 0:
+    #         self.dynamic_disturbance_a += np.mean([
+    #             np.linalg.norm(
+    #                 np.sum([elem[9] * np.array(elem[7]) for elem in sublist], axis=0)  # sum of all forces
+    #             )
+    #             for sublist in collision_links])
+    #         collision_objects = set([col[2] for col in collision_links_flatten])
+    #         self.dynamic_disturbance_b += len(collision_objects)
+    #         self.interactive_objects_collided |= collision_objects
+    #
+    #     self.agent_trajectory.append(np.concatenate((self.robots[0].get_position(), self.robots[0].get_orientation())))
+    #     self.object_trajectory.append([np.concatenate((obj.get_position(), obj.get_orientation()))
+    #                                    for obj in self.interactive_objects])
+    #     self.path_length += np.linalg.norm(self.robots[0].get_position() - robot_position)
+    #     self.kinematic_disturbance += np.sum([
+    #         obj.mass * np.linalg.norm(np.array(obj.get_position()) - np.array(prev_pos))
+    #         for obj, prev_pos in zip(self.interactive_objects, object_positions)
+    #         if obj.body_id in self.interactive_objects_collided
+    #     ])
+
+
 class InteractiveNavigateEnv(NavigateEnv):
     def __init__(self,
                  config_file,
@@ -1283,51 +1550,40 @@ class InteractiveNavigateEnv(NavigateEnv):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--robot',
-                        '-r',
-                        choices=['turtlebot', 'jr'],
-                        required=True,
-                        help='which robot [turtlebot|jr]')
-    parser.add_argument(
-        '--config',
-        '-c',
-        help='which config file to use [default: use yaml files in examples/configs]')
-    parser.add_argument('--mode',
-                        '-m',
+    parser.add_argument('--config', '-c',
+                        help='which config file to use [default: use yaml files in examples/configs]', required=True)
+    parser.add_argument('--env_type',
+                        choices=['deterministic', 'random', 'interactive', 'ig', 'ig_s2r'],
+                        default='deterministic',
+                        help='which environment type (deterministic | random | interactive | ig', required=True)
+    parser.add_argument('--mode', '-m',
                         choices=['headless', 'gui'],
                         default='headless',
                         help='which mode for simulation (default: headless)')
-    parser.add_argument('--env_type',
-                        choices=['deterministic', 'random', 'interactive', 'ig'],
-                        default='deterministic',
-                        help='which environment type (deterministic | random | interactive | ig')
     args = parser.parse_args()
 
-    if args.robot == 'turtlebot':
-        config_filename = os.path.join(os.path.dirname(gibson2.__file__),
-                                       '../examples/configs/turtlebot_p2p_nav_discrete.yaml') \
-            if args.config is None else args.config
-    elif args.robot == 'jr':
-        config_filename = os.path.join(os.path.dirname(gibson2.__file__),
-                                       '../examples/configs/jr2_reaching.yaml') \
-            if args.config is None else args.config
     if args.env_type == 'deterministic':
-        nav_env = NavigateEnv(config_file=config_filename,
+        nav_env = NavigateEnv(config_file=args.config,
                               mode=args.mode,
                               action_timestep=1.0 / 10.0,
                               physics_timestep=1.0 / 40.0)
     elif args.env_type == 'random':
-        nav_env = NavigateRandomEnv(config_file=config_filename,
+        nav_env = NavigateRandomEnv(config_file=args.config,
                                     mode=args.mode,
                                     action_timestep=1.0 / 10.0,
                                     physics_timestep=1.0 / 40.0)
     elif args.env_type == 'ig':
-        nav_env = InteractiveGibsonNavigateEnv(config_file=config_filename,
+        nav_env = InteractiveGibsonNavigateEnv(config_file=args.config,
                                                mode=args.mode,
                                                action_timestep=1.0 / 10.0,
-                                               physics_timestep=1 / 40.0)
-    else:
-        nav_env = InteractiveNavigateEnv(config_file=config_filename,
+                                               physics_timestep=1.0 / 40.0)
+    elif args.env_type == 'ig_s2r':
+        nav_env = InteractiveGibsonNavigateSim2RealEnv(config_file=args.config,
+                                                       mode=args.mode,
+                                                       action_timestep=1.0 / 10.0,
+                                                       physics_timestep=1.0 / 40.0)
+    elif args.env_type == 'interactive':
+        nav_env = InteractiveNavigateEnv(config_file=args.config,
                                          mode=args.mode,
                                          action_timestep=1.0 / 10.0,
                                          random_position=False,
@@ -1347,7 +1603,7 @@ if __name__ == '__main__':
         print('Episode: {}'.format(episode))
         start = time.time()
         nav_env.reset()
-        for step in range(50):  # 500 steps, 50s world time
+        for step in range(500):  # 500 steps, 50s world time
             action = nav_env.action_space.sample()
             # action[:] = 0.5
             # action[:] = -1.0
diff --git a/gibson2/utils/utils.py b/gibson2/utils/utils.py
index 0854bc435..6af774bd1 100644
--- a/gibson2/utils/utils.py
+++ b/gibson2/utils/utils.py
@@ -1,6 +1,6 @@
 import numpy as np
 import yaml
-
+from scipy.spatial.transform import Rotation as R
 
 # File I/O related
 def parse_config(config):
@@ -11,11 +11,22 @@ def parse_config(config):
 
 # Geometry related
 def rotate_vector_3d(v, r, p, y):
-    """Rotates vector by roll, pitch and yaw counterclockwise"""
-    rot_x = np.array([[1, 0, 0], [0, np.cos(-r), -np.sin(-r)], [0, np.sin(-r), np.cos(-r)]])
-    rot_y = np.array([[np.cos(-p), 0, np.sin(-p)], [0, 1, 0], [-np.sin(-p), 0, np.cos(-p)]])
-    rot_z = np.array([[np.cos(-y), -np.sin(-y), 0], [np.sin(-y), np.cos(-y), 0], [0, 0, 1]])
-    return np.dot(rot_x, np.dot(rot_y, np.dot(rot_z, v)))
+    local_to_global = R.from_euler('xyz', [r, p, y]).as_dcm()
+    global_to_local = local_to_global.T
+    return np.dot(global_to_local, v)
+
+
+def rotate_vector_2d(v, yaw):
+    local_to_global = R.from_euler('z', yaw).as_dcm()
+    global_to_local = local_to_global.T
+    global_to_local = global_to_local[:2, :2]
+    if len(v.shape) == 1:
+        return np.dot(global_to_local, v)
+    elif len(v.shape) == 2:
+        return np.dot(global_to_local, v.T).T
+    else:
+        print(v.shape)
+        raise Exception('invalid shape for v')
 
 
 def l2_distance(v1, v2):

From ba1aea09ed5e07f33647752ed5b8807c93b1054d Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Thu, 24 Oct 2019 21:33:55 -0700
Subject: [PATCH 02/58] s2r config file

---
 .../turtlebot_interactive_nav_s2r.yaml        | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 examples/configs/turtlebot_interactive_nav_s2r.yaml

diff --git a/examples/configs/turtlebot_interactive_nav_s2r.yaml b/examples/configs/turtlebot_interactive_nav_s2r.yaml
new file mode 100644
index 000000000..632382ffb
--- /dev/null
+++ b/examples/configs/turtlebot_interactive_nav_s2r.yaml
@@ -0,0 +1,81 @@
+# scene
+scene: building
+#model_id: Kerrtown
+model_id: area1
+#model_id: Albertville
+build_graph: true
+load_texture: false
+should_load_replaced_objects: false
+should_load_additional_objects: false
+trav_map_erosion: 2
+
+# robot
+robot: Turtlebot
+
+action_high: 0.1
+action_low: -0.05
+
+# task, observation and action
+task: pointgoal # pointgoal|objectgoal|areagoal|reaching
+fisheye: false
+
+initial_pos: [0, 0, 0.0]
+initial_orn: [0.0, 0.0, 0.0]
+
+target_pos: [0, 2, 0.0]
+target_orn: [0.0, 0.0, 0.0]
+
+is_discrete: false
+additional_states_dim: 26
+
+# reward
+reward_type: dense
+success_reward: 10.0
+slack_reward: 0.0
+potential_reward_weight: 1.0
+electricity_reward_weight: 0.0
+stall_torque_reward_weight: 0.0
+collision_reward_weight: -0.0
+collision_ignore_link_a_ids: [1, 2, 3, 4]  # ignore collision with these agent's link ids
+
+# discount factor
+discount_factor: 0.99
+
+# termination condition
+death_z_thresh: 0.2
+dist_tol: 0.333  # body width
+max_step: 500
+
+# sensor
+output: [sensor, depth]
+resolution: 80
+fov: 58
+
+# display
+use_filler: true
+display_ui: false
+show_diagnostics: false
+ui_num: 2
+ui_components: [RGB_FILLED, DEPTH]
+random:
+  random_initial_pose: false
+  random_target_pose: false
+  random_init_x_range: [-0.1, 0.1]
+  random_init_y_range: [-0.1, 0.1]
+  random_init_z_range: [-0.1, 0.1]
+  random_init_rot_range: [-0.1, 0.1]
+
+speed:
+  timestep: 0.001
+  frameskip: 10
+
+mode: web_ui #gui|headless
+verbose: false
+fast_lq_render: true
+
+# visual objects
+visual_object_at_initial_target_pos: true
+target_visual_object_visible_to_agent: false
+
+# debug
+debug: false

From 22a5af2b2b865426be155cdc6fd1b2218ad8e88e Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Fri, 29 Nov 2019 15:45:18 -0800
Subject: [PATCH 03/58] stadium scene has consistent API with house scene, stop
 rendering floor for better RGB, locomotor_env rewrite lidar for Fetch,
 angular velocity fix bug (now only z-axis), add approximate geodesic dist to
 speed up training, change depth map clipping, add fetch sim2real config

---
 .../configs/fetch_interactive_nav_s2r.yaml    |  83 +++++++
 gibson2/core/physics/scene.py                 |   9 +
 gibson2/core/simulator.py                     |  22 +-
 gibson2/envs/locomotor_env.py                 | 227 ++++++++++++------
 4 files changed, 255 insertions(+), 86 deletions(-)
 create mode 100644 examples/configs/fetch_interactive_nav_s2r.yaml

diff --git a/examples/configs/fetch_interactive_nav_s2r.yaml b/examples/configs/fetch_interactive_nav_s2r.yaml
new file mode 100644
index 000000000..d63ad8e7d
--- /dev/null
+++ b/examples/configs/fetch_interactive_nav_s2r.yaml
@@ -0,0 +1,83 @@
+# scene
+scene: building
+#model_id: Kerrtown
+model_id: Norvelt
+#model_id: Albertville
+build_graph: true
+load_texture: true
+should_load_replaced_objects: false
+should_load_additional_objects: false
+trav_map_erosion: 3
+
+# robot
+robot: Fetch
+
+action_high: 0.1
+action_low: -0.05
+
+# task, observation and action
+task: pointgoal # pointgoal|objectgoal|areagoal|reaching
+fisheye: false
+
+initial_pos: [0, 0, 0.0]
+initial_orn: [0.0, 0.0, 0.0]
+
+target_pos: [0, 2, 0.0]
+target_orn: [0.0, 0.0, 0.0]
+
+is_discrete: false
+additional_states_dim: 25
+
+# reward
+reward_type: dense
+success_reward: 10.0
+slack_reward: 0.0
+potential_reward_weight: 1.0
+electricity_reward_weight: 0.0
+stall_torque_reward_weight: 0.0
+collision_reward_weight: -0.0
+collision_ignore_link_a_ids: [0, 1, 2]  # ignore collision with these agent's link ids
+
+# discount factor
+discount_factor: 0.99
+
+# termination condition
+death_z_thresh: 0.2
+dist_tol: 0.5  # body width
+max_step: 500
+
+# sensor
+output: [sensor, depth, scan]
+resolution: 256
+fov: 54
+n_horizontal_rays: 220
+n_vertical_beams: 1
+
+# display
+use_filler: true
+display_ui: false
+show_diagnostics: false
+ui_num: 2
+ui_components: [RGB_FILLED, DEPTH]
+random:
+  random_initial_pose: false
+  random_target_pose: false
+  random_init_x_range: [-0.1, 0.1]
+  random_init_y_range: [-0.1, 0.1]
+  random_init_z_range: [-0.1, 0.1]
+  random_init_rot_range: [-0.1, 0.1]
+
+speed:
+  timestep: 0.001
+  frameskip: 10
+
+mode: web_ui #gui|headless
+verbose: false
+fast_lq_render: true
+
+# visual objects
+visual_object_at_initial_target_pos: true
+target_visual_object_visible_to_agent: false
+
+# debug
+debug: false
diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py
index df7a7fd88..6f952c4b1 100644
--- a/gibson2/core/physics/scene.py
+++ b/gibson2/core/physics/scene.py
@@ -40,6 +40,9 @@ class StadiumScene(Scene):
     def get_random_point(self, random_height=False):
         return self.get_random_point_floor(0, random_height)
 
+    def get_random_floor(self):
+        return 0
+
     def get_random_point_floor(self, floor, random_height=False):
         del floor
         return 0, np.array([
@@ -48,6 +51,12 @@ class StadiumScene(Scene):
             np.random.uniform(0.4, 0.8) if random_height else 0.0
         ])
 
+    def reset_floor(self, floor=0, additional_elevation=0.05, height=None):
+        return
+
+    def get_floor_height(self, floor):
+        return 0.0
+
 
 class StadiumSceneInteractive(Scene):
     zero_at_running_strip_start_line = True    # if False, center of coordinates (0,0,0) will be at the middle of the stadium
diff --git a/gibson2/core/simulator.py b/gibson2/core/simulator.py
index 34e6861e0..561184052 100644
--- a/gibson2/core/simulator.py
+++ b/gibson2/core/simulator.py
@@ -93,17 +93,19 @@ class Simulator:
         
 
                 elif type == p.GEOM_PLANE:
-                    filename = os.path.join(gibson2.assets_path, 'models/mjcf_primitives/cube.obj')
-                    self.renderer.load_object(filename,
-                                              transform_orn=rel_orn,
-                                              transform_pos=rel_pos,
-                                              input_kd=color[:3],
-                                              scale=[100, 100, 0.01])
-                    self.renderer.add_instance(len(self.renderer.visual_objects) - 1,
-                                               pybullet_uuid=new_object,
-                                               class_id=class_id,
-                                               dynamic=True)
+                    #filename = os.path.join(gibson2.assets_path, 'models/mjcf_primitives/cube.obj')
+                    #self.renderer.load_object(filename,
+                    #                          transform_orn=rel_orn,
+                    #                          transform_pos=rel_pos,
+                    #                          input_kd=color[:3],
+                    #                          scale=[100, 100, 0.01])
+                    #self.renderer.add_instance(len(self.renderer.visual_objects) - 1,
+                    #                           pybullet_uuid=new_object,
+                    #                           class_id=class_id,
+                    #                           dynamic=True)
 
+                    # Uncomment the above block if you want to render the additionally added floor
+                    pass
         self.scene = scene
         return new_objects
 
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index d1f000284..48509f647 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -36,7 +36,7 @@ Episode = collections.namedtuple('Episode',
                                      # 'object_files',
                                      # 'object_trajectory',
                                      'success',
-                                     # 'path_efficiency',
+                                     'path_efficiency',
                                      # 'kinematic_disturbance',
                                      # 'dynamic_disturbance_a',
                                      # 'dynamic_disturbance_b',
@@ -155,8 +155,14 @@ class NavigateEnv(BaseEnv):
                                             dtype=np.float32)
             observation_space['rgb'] = self.rgb_space
         if 'depth' in self.output:
-            self.depth_space = gym.spaces.Box(low=-np.inf,
-                                              high=np.inf,
+            # self.depth_space = gym.spaces.Box(low=-np.inf,
+            #                                   high=np.inf,
+            #                                   shape=(self.config.get('resolution', 64),
+            #                                          self.config.get('resolution', 64),
+            #                                          1),
+            #                                   dtype=np.float32)
+            self.depth_space = gym.spaces.Box(low=0.0,
+                                              high=1.0,
                                               shape=(self.config.get('resolution', 64),
                                                      self.config.get('resolution', 64),
                                                      1),
@@ -179,9 +185,13 @@ class NavigateEnv(BaseEnv):
                                                   dtype=np.float32)
             observation_space['depth_seg'] = self.depth_seg_space
         if 'scan' in self.output:
-            self.scan_space = gym.spaces.Box(low=-np.inf,
-                                             high=np.inf,
-                                             shape=(self.n_horizontal_rays * self.n_vertical_beams, 3),
+            # self.scan_space = gym.spaces.Box(low=-np.inf,
+            #                                  high=np.inf,
+            #                                  shape=(self.n_horizontal_rays * self.n_vertical_beams, 3),
+            #                                  dtype=np.float32)
+            self.scan_space = gym.spaces.Box(low=0.0,
+                                             high=1.0,
+                                             shape=(self.n_horizontal_rays * self.n_vertical_beams,),
                                              dtype=np.float32)
             observation_space['scan'] = self.scan_space
         if 'rgb_filled' in self.output:  # use filler
@@ -237,7 +247,8 @@ class NavigateEnv(BaseEnv):
         auxiliary_sensor = self.get_auxiliary_sensor(collision_links)
 
         # rgb = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
-        # rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
+        # rgb = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
+
         # depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
         # depth = np.clip(depth / 5.0, 0.0, 1.0)
         # depth = 1.0 - depth  # flip black/white
@@ -245,6 +256,8 @@ class NavigateEnv(BaseEnv):
         # if self.num_object_classes is not None:
         #     seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
         # cv2.imshow('rgb', rgb)
+        # cv2.imwrite('test/%d_%d.jpg' % (self.current_episode, self.current_step), (rgb * 255).astype(np.uint8))
+
         # cv2.imshow('depth', depth)
         # cv2.imshow('seg', seg)
 
@@ -286,32 +299,52 @@ class NavigateEnv(BaseEnv):
 
         # TODO: figure out why 'scan' consumes so much cpu
         if 'scan' in self.output:
-            assert 'scan_link' in self.robots[0].parts, "Requested scan but no scan_link"
-            pose_camera = self.robots[0].parts['scan_link'].get_pose()
-            angle = np.arange(0, 2 * np.pi, 2 * np.pi / float(self.n_horizontal_rays))
-            elev_bottom_angle = -30. * np.pi / 180.
-            elev_top_angle = 10. * np.pi / 180.
-            elev_angle = np.arange(elev_bottom_angle, elev_top_angle,
-                                   (elev_top_angle - elev_bottom_angle) / float(self.n_vertical_beams))
-            orig_offset = np.vstack([
-                np.vstack([np.cos(angle),
-                           np.sin(angle),
-                           np.repeat(np.tan(elev_ang), angle.shape)]).T for elev_ang in elev_angle
-            ])
-            transform_matrix = quat2mat([pose_camera[-1], pose_camera[3], pose_camera[4], pose_camera[5]])
-            offset = orig_offset.dot(np.linalg.inv(transform_matrix))
-            pose_camera = pose_camera[None, :3].repeat(self.n_horizontal_rays * self.n_vertical_beams, axis=0)
+            laser_linear_range = 25.0
+            laser_angular_range = 220.0
+            laser_angular_half_range = laser_angular_range / 2.0
+            min_laser_dist = 0.1
+            laser_pose = self.robots[0].parts['laser_link'].get_pose()
+            transform_matrix = quat2mat([laser_pose[6], laser_pose[3], laser_pose[4], laser_pose[5]])  # [x, y, z, w]
+            angle = np.arange(-laser_angular_half_range / 180 * np.pi,
+                              laser_angular_half_range / 180 * np.pi,
+                              laser_angular_range / 180.0 * np.pi / self.n_horizontal_rays)
+            unit_vector_local = np.array([[np.cos(ang), np.sin(ang), 0.0] for ang in angle])
+            unit_vector_world = transform_matrix.dot(unit_vector_local.T).T
+            start_pose = np.tile(laser_pose[:3], (self.n_horizontal_rays, 1))
+            start_pose += unit_vector_world * min_laser_dist
+            end_pose = laser_pose[:3] + unit_vector_world * laser_linear_range
+            results = p.rayTestBatch(start_pose, end_pose, 6)  # numThreads = 6
+            # hit_object_id = np.array([item[0] for item in results])
+            # link_id = np.array([item[1] for item in results])
+            hit_fraction = np.array([item[2] for item in results])  # hit fraction = [0.0, 1.0] of laser_linear_range
+            state['scan'] = hit_fraction
 
-            results = p.rayTestBatch(pose_camera, pose_camera + offset * 30)
-            hit = np.array([item[0] for item in results])
-            dist = np.array([item[2] for item in results])
-
-            valid_pts = (dist < 1. - 1e-5) & (dist > 0.1 / 30) & (hit != self.robots[0].robot_ids[0]) & (hit != -1)
-            dist[~valid_pts] = 0.0  # zero out invalid pts
-            dist *= 30
-
-            xyz = np.expand_dims(dist, 1) * orig_offset
-            state['scan'] = xyz
+            # assert 'scan_link' in self.robots[0].parts, "Requested scan but no scan_link"
+            # pose_camera = self.robots[0].parts['scan_link'].get_pose()
+            # angle = np.arange(0, 2 * np.pi, 2 * np.pi / float(self.n_horizontal_rays))
+            # elev_bottom_angle = -30. * np.pi / 180.
+            # elev_top_angle = 10. * np.pi / 180.
+            # elev_angle = np.arange(elev_bottom_angle, elev_top_angle,
+            #                        (elev_top_angle - elev_bottom_angle) / float(self.n_vertical_beams))
+            # orig_offset = np.vstack([
+            #     np.vstack([np.cos(angle),
+            #                np.sin(angle),
+            #                np.repeat(np.tan(elev_ang), angle.shape)]).T for elev_ang in elev_angle
+            # ])
+            # transform_matrix = quat2mat([pose_camera[-1], pose_camera[3], pose_camera[4], pose_camera[5]])
+            # offset = orig_offset.dot(np.linalg.inv(transform_matrix))
+            # pose_camera = pose_camera[None, :3].repeat(self.n_horizontal_rays * self.n_vertical_beams, axis=0)
+            #
+            # results = p.rayTestBatch(pose_camera, pose_camera + offset * 30)
+            # hit = np.array([item[0] for item in results])
+            # dist = np.array([item[2] for item in results])
+            #
+            # valid_pts = (dist < 1. - 1e-5) & (dist > 0.1 / 30) & (hit != self.robots[0].robot_ids[0]) & (hit != -1)
+            # dist[~valid_pts] = 0.0  # zero out invalid pts
+            # dist *= 30
+            #
+            # xyz = np.expand_dims(dist, 1) * orig_offset
+            # state['scan'] = xyz
 
         return state
 
@@ -368,7 +401,6 @@ class NavigateEnv(BaseEnv):
         # goal reached
         if l2_distance(self.target_pos, self.get_position_of_interest()) < self.dist_tol:
             reward += self.success_reward  # |success_reward| = 10.0 per step
-
         return reward, info
 
     def get_termination(self, collision_links=[], info={}):
@@ -425,10 +457,10 @@ class NavigateEnv(BaseEnv):
             #                                   'collision_step',
             #                                   ])
 
-            # shortest_path, geodesic_distance = self.scene.get_shortest_path(self.floor_num,
-            #                                                                 self.initial_pos[:2],
-            #                                                                 self.target_pos[:2],
-            #                                                                 entire_path=True)
+            shortest_path, geodesic_distance = self.scene.get_shortest_path(self.floor_num,
+                                                                            self.initial_pos[:2],
+                                                                            self.target_pos[:2],
+                                                                            entire_path=True)
             # floor_height = self.scene.get_floor_height(self.floor_num)
             # shortest_path = np.array([np.array([path[0], path[1], floor_height]) for path in shortest_path])
             # min_kin_dist = self.path_length * self.robots[0].robot_mass
@@ -448,7 +480,7 @@ class NavigateEnv(BaseEnv):
                 # object_files=object_files,
                 # object_trajectory=np.array(self.object_trajectory),
                 success=float(info['success']),
-                # path_efficiency=min(1.0, geodesic_distance / self.path_length),
+                path_efficiency=min(1.0, geodesic_distance / self.path_length),
                 # kinematic_disturbance=kinematic_disturbance,
                 # dynamic_disturbance_a=dynamic_disturbance_a,
                 # dynamic_disturbance_b=dynamic_disturbance_b,
@@ -814,6 +846,8 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
 
         self.gt_pos = []
         self.ns_pos = []
+        # self.linear_vel = []
+        # self.angular_vel = []
 
         # self.eyes_vis = VisualObject(rgba_color=[1, 0, 0, 1.0], radius=0.03)
         # self.eyes_vis.load()
@@ -835,6 +869,13 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
             for waypoint in self.waypoints_vis:
                 waypoint.load()
 
+        # cv2.namedWindow('depth', cv2.WINDOW_NORMAL)
+        # cv2.resizeWindow('depth', 400, 400)
+        # cv2.namedWindow('scan', cv2.WINDOW_NORMAL)
+        # cv2.resizeWindow('scan', 400, 400)
+        # cv2.namedWindow('collision', cv2.WINDOW_NORMAL)
+        # cv2.resizeWindow('collision', 400, 400)
+
     def build_class_map(self):
         # 0 and 1 are reserved for mesh and robot
         init_class_id = 2
@@ -910,14 +951,22 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
 
         target_pos_local = self.global_to_local(self.target_pos, cur_pos, cur_rot)[:2]
         linear_velocity_local = rotate_vector_3d(self.robots[0].robot_body.velocity(), *cur_rot)[:2]
-        angular_velocity_local = rotate_vector_3d(self.robots[0].robot_body.angular_velocity(), *cur_rot)[:2]
+        angular_velocity_local = rotate_vector_3d(self.robots[0].robot_body.angular_velocity(), *cur_rot)[2:3]
+        # print('linear', self.robots[0].robot_body.velocity())
+        # print('linear_local', linear_velocity_local)
+        # print('angular', self.robots[0].robot_body.angular_velocity())
+        # print('angular_local', angular_velocity_local)
+
+        # linear_vel = np.linalg.norm(self.robots[0].robot_body.velocity())
+        # angular_vel = np.linalg.norm(self.robots[0].robot_body.angular_velocity())
+        # self.linear_vel.append(linear_vel)
+        # self.angular_vel.append(angular_vel)
 
         gt_pos = self.robots[0].get_position()[:2]
         source = gt_pos
         target = self.target_pos[:2]
-        _, geodesic_dist = self.scene.get_shortest_path(self.floor_num, source, target)
-        # geodesic_dist = 0.0
-
+        # _, geodesic_dist = self.scene.get_shortest_path(self.floor_num, source, target)
+        geodesic_dist = 0.0
         robot_z = self.robots[0].get_position()[2]
         if self.visualize_waypoints and self.mode == 'gui':
             for i in range(1000):
@@ -929,6 +978,10 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
         # self.eyes_vis.set_position(pos=self.robots[0].eyes.get_position())
 
         closest_idx = np.argmin(np.linalg.norm(cur_pos[:2] - self.shortest_path, axis=1))
+        # approximate geodesic_dist to speed up training
+        # geodesic_dist = np.sum(
+        #     np.linalg.norm(self.shortest_path[closest_idx:-1] - self.shortest_path[closest_idx + 1:], axis=1)
+        # )
         shortest_path = self.shortest_path[closest_idx:closest_idx + self.scene.num_waypoints]
         num_remaining_waypoints = self.scene.num_waypoints - shortest_path.shape[0]
         if num_remaining_waypoints > 0:
@@ -955,10 +1008,20 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
         height = int(width * (480.0 / 640.0))
         half_diff = int((width - height) / 2)
         depth = depth[half_diff:half_diff+height, :]
-        # cv2.imshow('depth', depth)
-        depth[depth < 0.6] = -1.0
-        depth[depth > 4.0] = -1.0
+
+        # depth[depth < 0.6] = -1.0
+        # depth[depth > 4.0] = -1.0
+
+        invalid = depth == 0.0
+        depth[depth < 0.35] = 0.35
+        depth[depth > 5.0] = 5.0
+        depth[invalid] = 0.0
+        depth /= 5.0
         state['depth'] = depth
+
+        # cv2.imshow('depth', state['depth'])
+        # cv2.imshow('scan', state['scan'] * 10.0)
+
         return state
 
     def get_potential(self):
@@ -990,6 +1053,9 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
         self.reset_replaced_objects()
         self.reset_additional_objects()
         self.new_potential = None
+        # if len(self.linear_vel) > 0:
+        #     print('linear', np.mean(self.linear_vel), np.max(self.linear_vel))
+        #     print('angular', np.mean(self.angular_vel), np.max(self.angular_vel))
         # if len(self.gt_pos) > 0:
         #     self.gt_pos = np.array(self.gt_pos)
         #     self.ns_pos = np.array(self.ns_pos)
@@ -1008,35 +1074,43 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
         target = self.target_pos[:2]
         shortest_path, _ = self.scene.get_shortest_path(self.floor_num, source, target, entire_path=True)
         self.shortest_path = shortest_path
+        # robot_z = self.robots[0].get_position()[2]
+        # if self.visualize_waypoints and self.mode == 'gui':
+        #     for i in range(1000):
+        #         self.waypoints_vis[i].set_position(pos=np.array([0.0, 0.0, 0.0]))
+        #     for i in range(min(1000, self.shortest_path.shape[0])):
+        #         self.waypoints_vis[i].set_position(pos=np.array([self.shortest_path[i][0],
+        #                                                          self.shortest_path[i][1],
+        #                                                          robot_z]))
 
-    # def before_simulation(self):
-    #     robot_position = self.robots[0].get_position()
-    #     object_positions = [obj.get_position() for obj in self.interactive_objects]
-    #     return robot_position, object_positions
-    #
-    # def after_simulation(self, cache, collision_links):
-    #     robot_position, object_positions = cache
-    #
-    #     collision_links_flatten = [item for sublist in collision_links for item in sublist]
-    #     if len(collision_links_flatten) > 0:
-    #         self.dynamic_disturbance_a += np.mean([
-    #             np.linalg.norm(
-    #                 np.sum([elem[9] * np.array(elem[7]) for elem in sublist], axis=0)  # sum of all forces
-    #             )
-    #             for sublist in collision_links])
-    #         collision_objects = set([col[2] for col in collision_links_flatten])
-    #         self.dynamic_disturbance_b += len(collision_objects)
-    #         self.interactive_objects_collided |= collision_objects
-    #
-    #     self.agent_trajectory.append(np.concatenate((self.robots[0].get_position(), self.robots[0].get_orientation())))
-    #     self.object_trajectory.append([np.concatenate((obj.get_position(), obj.get_orientation()))
-    #                                    for obj in self.interactive_objects])
-    #     self.path_length += np.linalg.norm(self.robots[0].get_position() - robot_position)
-    #     self.kinematic_disturbance += np.sum([
-    #         obj.mass * np.linalg.norm(np.array(obj.get_position()) - np.array(prev_pos))
-    #         for obj, prev_pos in zip(self.interactive_objects, object_positions)
-    #         if obj.body_id in self.interactive_objects_collided
-    #     ])
+    def before_simulation(self):
+        robot_position = self.robots[0].get_position()
+        object_positions = [obj.get_position() for obj in self.interactive_objects]
+        return robot_position, object_positions
+
+    def after_simulation(self, cache, collision_links):
+        robot_position, object_positions = cache
+
+        # collision_links_flatten = [item for sublist in collision_links for item in sublist]
+        # if len(collision_links_flatten) > 0:
+        #     self.dynamic_disturbance_a += np.mean([
+        #         np.linalg.norm(
+        #             np.sum([elem[9] * np.array(elem[7]) for elem in sublist], axis=0)  # sum of all forces
+        #         )
+        #         for sublist in collision_links])
+        #     collision_objects = set([col[2] for col in collision_links_flatten])
+        #     self.dynamic_disturbance_b += len(collision_objects)
+        #     self.interactive_objects_collided |= collision_objects
+        #
+        # self.agent_trajectory.append(np.concatenate((self.robots[0].get_position(), self.robots[0].get_orientation())))
+        # self.object_trajectory.append([np.concatenate((obj.get_position(), obj.get_orientation()))
+        #                                for obj in self.interactive_objects])
+        self.path_length += np.linalg.norm(self.robots[0].get_position() - robot_position)
+        # self.kinematic_disturbance += np.sum([
+        #     obj.mass * np.linalg.norm(np.array(obj.get_position()) - np.array(prev_pos))
+        #     for obj, prev_pos in zip(self.interactive_objects, object_positions)
+        #     if obj.body_id in self.interactive_objects_collided
+        # ])
 
 
 class InteractiveNavigateEnv(NavigateEnv):
@@ -1528,6 +1602,7 @@ class InteractiveNavigateEnv(NavigateEnv):
         self.collision_step += int(len(collision_links_flatten) > 0)
         reward += collision_reward * self.collision_reward_weight  # |collision_reward| ~= 1.0 per step if collision
         info['collision_reward'] = collision_reward * self.collision_reward_weight  # expose collision reward to info
+
         # self.reward_stats.append(np.abs(collision_reward * self.collision_reward_weight))
 
         # goal reached
@@ -1605,8 +1680,9 @@ if __name__ == '__main__':
         nav_env.reset()
         for step in range(500):  # 500 steps, 50s world time
             action = nav_env.action_space.sample()
-            # action[:] = 0.5
-            # action[:] = -1.0
+            # action[0] = 2.0 / 3.0
+            # action[1] = -1.0
+            # action[:] = 1.0
             # action[:] = -1.0 / 3
             # if nav_env.stage == 0:
             #     action[:2] = 0.5
@@ -1618,7 +1694,6 @@ if __name__ == '__main__':
             # action[2:] = np.array(debug_param_values)
 
             state, reward, done, _ = nav_env.step(action)
-            # print('reward', reward)
             if done:
                 print('Episode finished after {} timesteps'.format(step + 1))
                 break

From d3475a8840367e725a132ee572f7d5aa4014e63f Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Fri, 6 Dec 2019 15:45:56 -0800
Subject: [PATCH 04/58] cabinet example

---
 examples/demo/cabinets_example.py        |  96 ++++++--
 examples/demo/cabinets_example_fetch.py  | 283 +++++++++++++++++++++++
 examples/demo/ik_example.py              |  78 +++++++
 examples/demo/ik_example_fetch.py        | 102 ++++++++
 gibson2/core/physics/robot_bases.py      |   1 +
 gibson2/core/physics/robot_locomotors.py |   3 +
 gibson2/core/physics/scene.py            |   1 +
 gibson2/external/pybullet_tools/utils.py |  37 ++-
 8 files changed, 576 insertions(+), 25 deletions(-)
 create mode 100644 examples/demo/cabinets_example_fetch.py
 create mode 100644 examples/demo/ik_example.py
 create mode 100644 examples/demo/ik_example_fetch.py

diff --git a/examples/demo/cabinets_example.py b/examples/demo/cabinets_example.py
index 784e571f0..107f59163 100644
--- a/examples/demo/cabinets_example.py
+++ b/examples/demo/cabinets_example.py
@@ -9,35 +9,52 @@ from gibson2.core.render.profiler import Profiler
 import pytest
 import pybullet as p
 import numpy as np
+from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_name, set_joint_position, \
+    set_joint_positions, add_data_path, connect, plan_base_motion, plan_joint_motion, enable_gravity, \
+    joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
+    get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, set_point, create_box, stable_z, control_joints
+
+import time
+import numpy as np
 
 config = parse_config('../configs/jr_interactive_nav.yaml')
-s = Simulator(mode='headless', timestep=1 / 240.0)
+s = Simulator(mode='gui', timestep=1 / 240.0)
 scene = EmptyScene()
 s.import_scene(scene)
 jr = JR2_Kinova(config)
 s.import_robot(jr)
 jr.robot_body.reset_position([0,0,0])
 jr.robot_body.reset_orientation([0,0,1,0])
-fetch = Fetch(config)
-s.import_robot(fetch)
-fetch.robot_body.reset_position([0,1,0])
-fetch.robot_body.reset_orientation([0,0,1,0])
+
+obstacles = []
 obj = InteractiveObj(filename='/data4/mdv0/cabinet/0007/part_objs/cabinet_0007.urdf')
-s.import_interactive_object(obj)
+ids = s.import_interactive_object(obj)
+obstacles.append(ids)
+
 obj.set_position([-2,0,0.5])
 obj = InteractiveObj(filename='/data4/mdv0/cabinet/0007/part_objs/cabinet_0007.urdf')
-s.import_interactive_object(obj)
+ids = s.import_interactive_object(obj)
+obstacles.append(ids)
+
 obj.set_position([-2,2,0.5])
 obj = InteractiveObj(filename='/data4/mdv0/cabinet/0004/part_objs/cabinet_0004.urdf')
-s.import_interactive_object(obj)
+ids = s.import_interactive_object(obj)
+obstacles.append(ids)
+
 obj.set_position([-2.1, 1.6, 2])
 obj = InteractiveObj(filename='/data4/mdv0/cabinet/0004/part_objs/cabinet_0004.urdf')
-s.import_interactive_object(obj)
+ids = s.import_interactive_object(obj)
+obstacles.append(ids)
 obj.set_position([-2.1, 0.4, 2])
+
 obj = BoxShape([-2.05,1,0.5], [0.35,0.6,0.5])
-s.import_interactive_object(obj)
+ids = s.import_interactive_object(obj)
+obstacles.append(ids)
+
 obj = BoxShape([-2.45,1,1.5], [0.01,2,1.5])
-s.import_interactive_object(obj)
+ids = s.import_interactive_object(obj)
+obstacles.append(ids)
+
 p.createConstraint(0,-1,obj.body_id, -1, p.JOINT_FIXED, [0,0,1], [-2.55,1,1.5], [0,0,0])
 obj = YCBObject('003_cracker_box')
 s.import_object(obj)
@@ -46,8 +63,59 @@ obj = YCBObject('003_cracker_box')
 s.import_object(obj)
 p.resetBasePositionAndOrientation(obj.body_id, [-2,2,1.2], [0,0,0,1])
 
-for i in range(1000):
-    with Profiler("Simulation: step"):
-        s.step()
+robot_id = jr.robot_ids[0]
+arm_joints = joints_from_names(robot_id, ['m1n6s200_joint_1', 'm1n6s200_joint_2', 'm1n6s200_joint_3', 'm1n6s200_joint_4', 'm1n6s200_joint_5'])
+rest_position = [-1.6214899194372223, 1.4082722179709484, -2.9650918084213216, -1.7071872988002772, 3.0503822148927712e-05]
+finger_joints = joints_from_names(robot_id, ['m1n6s200_joint_finger_1', 'm1n6s200_joint_finger_2'])
+
+path = None
+set_joint_positions(robot_id, arm_joints, rest_position)
+
+while path is None:
+    set_point(robot_id, [-3, -1, 0.0]) # Sets the z position of the robot
+    control_joints(robot_id, arm_joints, rest_position)
+    path = plan_base_motion(robot_id, [-1,1.5,np.pi], ((-6, -6), (6, 6)), obstacles=obstacles, restarts=10, iterations=50, smooth=30)
+
+for bq in path:
+    set_base_values(robot_id, [bq[0], bq[1], bq[2]])
+    control_joints(robot_id, arm_joints, rest_position)
+    time.sleep(0.05)
+    s.step()
+
+x,y,z = jr.get_end_effector_position()
+print(x,y,z)
+visual_marker = p.createVisualShape(p.GEOM_SPHERE, radius = 0.02, rgbaColor=(1,1,1,0.5))
+marker = p.createMultiBody(baseVisualShapeIndex = visual_marker)
+f = 1
+control_joints(robot_id, finger_joints, [f,f])
+
+
+while True:
+    joint_pos = p.calculateInverseKinematics(robot_id, 33, [x, y, z])[2:7]
+    control_joints(robot_id, arm_joints, joint_pos)
+    keys = p.getKeyboardEvents()
+    #set_base_values(robot_id, [-1,1.5,np.pi])
+    control_joints(robot_id, finger_joints, [f, f])
+
+    for k, v in keys.items():
+        if (k == p.B3G_RIGHT_ARROW and (v & p.KEY_IS_DOWN)):
+            x += 0.01
+        if (k == p.B3G_LEFT_ARROW and (v & p.KEY_IS_DOWN)):
+            x -= 0.01
+        if (k == p.B3G_UP_ARROW and (v & p.KEY_IS_DOWN)):
+            y += 0.01
+        if (k == p.B3G_DOWN_ARROW and (v & p.KEY_IS_DOWN)):
+            y -= 0.01
+        if (k == ord('z') and (v & p.KEY_IS_DOWN)):
+            z += 0.01
+        if (k == ord('x') and (v & p.KEY_IS_DOWN)):
+            z -= 0.01
+        if (k == ord('a') and (v & p.KEY_IS_DOWN)):
+            f += 0.01
+        if (k == ord('d') and (v & p.KEY_IS_DOWN)):
+            f -= 0.01
+
+    p.resetBasePositionAndOrientation(marker, [x,y,z], [0,0,0,1])
+    s.step()
 
 s.disconnect()
\ No newline at end of file
diff --git a/examples/demo/cabinets_example_fetch.py b/examples/demo/cabinets_example_fetch.py
new file mode 100644
index 000000000..43d38e887
--- /dev/null
+++ b/examples/demo/cabinets_example_fetch.py
@@ -0,0 +1,283 @@
+import yaml
+from gibson2.core.physics.robot_locomotors import Turtlebot, JR2_Kinova, Fetch
+from gibson2.core.simulator import Simulator
+from gibson2.core.physics.scene import EmptyScene
+from gibson2.core.physics.interactive_objects import InteractiveObj, BoxShape, YCBObject
+from gibson2.utils.utils import parse_config
+from gibson2.core.render.profiler import Profiler
+
+import pytest
+import pybullet as p
+import numpy as np
+from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_name, set_joint_position, \
+    set_joint_positions, add_data_path, connect, plan_base_motion, plan_joint_motion, enable_gravity, \
+    joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
+    get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion,\
+    set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits
+
+import time
+import numpy as np
+
+config = parse_config('../configs/jr_interactive_nav.yaml')
+s = Simulator(mode='gui', timestep=1 / 240.0)
+scene = EmptyScene()
+s.import_scene(scene)
+fetch = Fetch(config)
+s.import_robot(fetch)
+fetch.robot_body.reset_position([0,0,0])
+fetch.robot_body.reset_orientation([0,0,1,0])
+
+obstacles = []
+obj = InteractiveObj(filename='/data4/mdv0/cabinet/0007/part_objs/cabinet_0007.urdf')
+ids = s.import_interactive_object(obj)
+for i in range(5):
+    p.changeDynamics(ids,i,lateralFriction=50,mass=0.1, linearDamping=0, angularDamping=0)
+obstacles.append(ids)
+
+obj.set_position([-2,0,0.5])
+obj = InteractiveObj(filename='/data4/mdv0/cabinet/0007/part_objs/cabinet_0007.urdf')
+ids = s.import_interactive_object(obj)
+for i in range(5):
+    p.changeDynamics(ids,i,lateralFriction=50,mass=0.1, linearDamping=0, angularDamping=0)
+obstacles.append(ids)
+
+obj.set_position([-2,2,0.5])
+obj = InteractiveObj(filename='/data4/mdv0/cabinet/0004/part_objs/cabinet_0004.urdf')
+ids = s.import_interactive_object(obj)
+obstacles.append(ids)
+
+obj.set_position([-2.1, 1.6, 2])
+obj = InteractiveObj(filename='/data4/mdv0/cabinet/0004/part_objs/cabinet_0004.urdf')
+ids = s.import_interactive_object(obj)
+obstacles.append(ids)
+obj.set_position([-2.1, 0.4, 2])
+
+obj = BoxShape([-2.05,1,0.5], [0.35,0.6,0.5])
+ids = s.import_interactive_object(obj)
+obstacles.append(ids)
+
+obj = BoxShape([-2.45,1,1.5], [0.01,2,1.5])
+ids = s.import_interactive_object(obj)
+obstacles.append(ids)
+
+p.createConstraint(0,-1,obj.body_id, -1, p.JOINT_FIXED, [0,0,1], [-2.55,1,1.5], [0,0,0])
+obj = YCBObject('003_cracker_box')
+s.import_object(obj)
+p.resetBasePositionAndOrientation(obj.body_id, [-2,1,1.2], [0,0,0,1])
+obj = YCBObject('003_cracker_box')
+s.import_object(obj)
+p.resetBasePositionAndOrientation(obj.body_id, [-2,2,1.2], [0,0,0,1])
+
+robot_id = fetch.robot_ids[0]
+arm_joints = joints_from_names(robot_id, ['torso_lift_joint','shoulder_pan_joint', 'shoulder_lift_joint', 'upperarm_roll_joint',
+                                          'elbow_flex_joint', 'forearm_roll_joint', 'wrist_flex_joint', 'wrist_roll_joint'])
+rest_position = (0.38548146667743244, 1.1522793897208579, 1.2576467971105596, -0.312703569911879,
+                 1.7404867100093226, -0.0962895617312548, -1.4418232619629425, -1.6780152866247762)
+finger_joints = joints_from_names(robot_id, ['l_gripper_finger_joint', 'r_gripper_finger_joint'])
+
+path = None
+set_joint_positions(robot_id, arm_joints, rest_position)
+if 0:
+    while path is None:
+        set_point(robot_id, [-3, -1, 0.0]) # Sets the z position of the robot
+        set_joint_positions(robot_id, arm_joints, rest_position)
+        control_joints(robot_id, finger_joints, [0.04, 0.04])
+        path = plan_base_motion(robot_id, [-1,1.5,np.pi], ((-6, -6), (6, 6)), obstacles=obstacles, restarts=10, iterations=50, smooth=30)
+
+    for bq in path:
+        set_base_values(robot_id, [bq[0], bq[1], bq[2]])
+        set_joint_positions(robot_id, arm_joints, rest_position)
+        control_joints(robot_id, finger_joints, [0.04, 0.04])
+        time.sleep(0.05)
+        s.step()
+
+
+set_base_values(robot_id, [-1,1.5,np.pi])
+
+x,y,z = (-1.5234082112189532, 1.8056819568596753, 0.9170480480678451) #fetch.get_end_effector_position()
+print(x,y,z)
+visual_marker = p.createVisualShape(p.GEOM_SPHERE, radius = 0.02, rgbaColor=(1,1,1,0.5))
+marker = p.createMultiBody(baseVisualShapeIndex = visual_marker)
+f = 1
+#control_joints(robot_id, finger_joints, [f,f])
+
+max_limits = [0,0] + get_max_limits(robot_id, arm_joints) + [0.05,0.05]
+min_limits = [0,0] + get_min_limits(robot_id, arm_joints) + [0,0]
+rest_position_ik = [0,0] + list(get_joint_positions(robot_id, arm_joints)) + [0.04,0.04]
+joint_range = list(np.array(max_limits) - np.array(min_limits))
+joint_range = [item + 1 for item in joint_range]
+jd = [0.1 for item in joint_range]
+print(max_limits)
+print(min_limits)
+
+for i in range(100):
+    set_joint_positions(robot_id, arm_joints, rest_position)
+    control_joints(robot_id, finger_joints, [0.04, 0.04])
+    s.step()
+
+p.changeDynamics(robot_id, fetch.parts['r_gripper_finger_link'].body_part_index, lateralFriction=50)
+p.changeDynamics(robot_id, fetch.parts['l_gripper_finger_link'].body_part_index, lateralFriction=50)
+
+if 1:
+        sid = p.saveState()
+        joint_pos = p.calculateInverseKinematics(robot_id, fetch.parts['gripper_link'].body_part_index, [x, y, z],
+                                                 p.getQuaternionFromEuler([np.pi/2,-np.pi,0]),
+                                                 lowerLimits=min_limits,
+                                                 upperLimits=max_limits,
+                                                 jointRanges=joint_range,
+                                                 restPoses=rest_position_ik,
+                                                 jointDamping=jd,
+                                                 solver=p.IK_DLS,
+                                                 maxNumIterations=100)[2:10]
+
+        arm_path = plan_joint_motion(robot_id, arm_joints, joint_pos, disabled_collisions=set(),
+                                     self_collisions=True)
+        p.restoreState(sid)
+
+        for jp in arm_path:
+            set_base_values(robot_id, [-1, 1.5, np.pi])
+
+            for i in range(10):
+                control_joints(robot_id, finger_joints, [0.04, 0.04])
+                control_joints(robot_id, arm_joints, jp)
+                s.step()
+
+
+        sid = p.saveState()
+        joint_pos = p.calculateInverseKinematics(robot_id, fetch.parts['gripper_link'].body_part_index,
+                                                 [x-0.10, y, z],
+                                                 p.getQuaternionFromEuler([np.pi / 2, -np.pi, 0]),
+                                                 lowerLimits=min_limits,
+                                                 upperLimits=max_limits,
+                                                 jointRanges=joint_range,
+                                                 restPoses=rest_position_ik,
+                                                 jointDamping=jd,
+                                                 solver=p.IK_DLS,
+                                                 maxNumIterations=100)[2:10]
+
+        arm_path = plan_joint_motion(robot_id, arm_joints, joint_pos, disabled_collisions=set(),
+                                     self_collisions=True)
+        p.restoreState(sid)
+
+        for jp in arm_path:
+            set_base_values(robot_id, [-1, 1.5, np.pi])
+
+            for i in range(10):
+                control_joints(robot_id, finger_joints, [0.04, 0.04])
+                control_joints(robot_id, arm_joints, jp)
+                s.step()
+
+        p.setJointMotorControl2(robot_id, joint_from_name(robot_id, 'l_gripper_finger_joint'),  p.VELOCITY_CONTROL, targetVelocity=-0.2, force=500)
+        p.setJointMotorControl2(robot_id, joint_from_name(robot_id, 'r_gripper_finger_joint'),  p.VELOCITY_CONTROL, targetVelocity=-0.2, force=500)
+
+        for i in range(150):
+            set_base_values(robot_id, [-1, 1.5, np.pi])
+            s.step()
+
+        sid = p.saveState()
+        x,y,z = fetch.parts['gripper_link'].get_pose()[:3]
+        joint_pos = p.calculateInverseKinematics(robot_id, fetch.parts['gripper_link'].body_part_index,
+                                                 [x + 0.1, y, z],
+                                                 p.getQuaternionFromEuler([np.pi / 2, -np.pi, 0]),
+                                                 lowerLimits=min_limits,
+                                                 upperLimits=max_limits,
+                                                 jointRanges=joint_range,
+                                                 restPoses=rest_position_ik,
+                                                 jointDamping=jd,
+                                                 solver=p.IK_DLS,
+                                                 maxNumIterations=1000)[2:10]
+
+        arm_path = plan_joint_motion(robot_id, arm_joints, joint_pos, disabled_collisions=set(),
+                                     self_collisions=True)
+
+        p.restoreState(sid)
+
+        for jp in arm_path:
+            set_base_values(robot_id, [-1, 1.5, np.pi])
+
+            for i in range(10):
+                control_joints(robot_id, arm_joints, jp)
+                s.step()
+
+        sid = p.saveState()
+        x, y, z = fetch.parts['gripper_link'].get_pose()[:3]
+        joint_pos = p.calculateInverseKinematics(robot_id, fetch.parts['gripper_link'].body_part_index,
+                                                 [x + 0.1, y, z],
+                                                 p.getQuaternionFromEuler([np.pi / 2, -np.pi, 0]),
+                                                 lowerLimits=min_limits,
+                                                 upperLimits=max_limits,
+                                                 jointRanges=joint_range,
+                                                 restPoses=rest_position_ik,
+                                                 jointDamping=jd,
+                                                 solver=p.IK_DLS,
+                                                 maxNumIterations=100)[2:10]
+
+        arm_path = plan_joint_motion(robot_id, arm_joints, joint_pos, disabled_collisions=set(),
+                                     self_collisions=True)
+
+        p.restoreState(sid)
+
+        for jp in arm_path:
+            set_base_values(robot_id, [-1, 1.5, np.pi])
+
+            for i in range(10):
+                control_joints(robot_id, arm_joints, jp)
+                s.step()
+
+        sid = p.saveState()
+        x, y, z = fetch.parts['gripper_link'].get_pose()[:3]
+        joint_pos = p.calculateInverseKinematics(robot_id, fetch.parts['gripper_link'].body_part_index,
+                                                 [x + 0.1, y, z],
+                                                 p.getQuaternionFromEuler([np.pi / 2, -np.pi, 0]),
+                                                 lowerLimits=min_limits,
+                                                 upperLimits=max_limits,
+                                                 jointRanges=joint_range,
+                                                 restPoses=rest_position_ik,
+                                                 jointDamping=jd,
+                                                 solver=p.IK_DLS,
+                                                 maxNumIterations=100)[2:10]
+
+        arm_path = plan_joint_motion(robot_id, arm_joints, joint_pos, disabled_collisions=set(),
+                                     self_collisions=True)
+
+        p.restoreState(sid)
+
+        for jp in arm_path:
+            set_base_values(robot_id, [-1, 1.5, np.pi])
+
+            for i in range(10):
+                control_joints(robot_id, arm_joints, jp)
+                s.step()
+
+        keys = p.getKeyboardEvents()
+
+        for k, v in keys.items():
+            if (k == p.B3G_RIGHT_ARROW and (v & p.KEY_IS_DOWN)):
+                x += 0.01
+            if (k == p.B3G_LEFT_ARROW and (v & p.KEY_IS_DOWN)):
+                x -= 0.01
+            if (k == p.B3G_UP_ARROW and (v & p.KEY_IS_DOWN)):
+                y += 0.01
+            if (k == p.B3G_DOWN_ARROW and (v & p.KEY_IS_DOWN)):
+                y -= 0.01
+            if (k == ord('z') and (v & p.KEY_IS_DOWN)):
+                z += 0.01
+            if (k == ord('x') and (v & p.KEY_IS_DOWN)):
+                z -= 0.01
+            if (k == ord('a') and (v & p.KEY_IS_DOWN)):
+                f += 0.01
+            if (k == ord('d') and (v & p.KEY_IS_DOWN)):
+                f -= 0.01
+            if (k == ord('p') and (v & p.KEY_IS_DOWN)):
+                print(x,y,z)
+
+        p.resetBasePositionAndOrientation(marker, [x,y,z], [0,0,0,1])
+        p.setJointMotorControl2(robot_id, joint_from_name(robot_id, 'l_wheel_joint'), p.VELOCITY_CONTROL, targetVelocity=0, force=1000)
+        p.setJointMotorControl2(robot_id, joint_from_name(robot_id, 'r_wheel_joint'), p.VELOCITY_CONTROL, targetVelocity=0, force=1000)
+
+        while True:
+            set_base_values(robot_id, [-1, 1.5, np.pi])
+            s.step()
+
+
+s.disconnect()
diff --git a/examples/demo/ik_example.py b/examples/demo/ik_example.py
new file mode 100644
index 000000000..2e41ea5d5
--- /dev/null
+++ b/examples/demo/ik_example.py
@@ -0,0 +1,78 @@
+import yaml
+from gibson2.core.physics.robot_locomotors import Turtlebot, JR2_Kinova, Fetch
+from gibson2.core.simulator import Simulator
+from gibson2.core.physics.scene import EmptyScene
+from gibson2.core.physics.interactive_objects import InteractiveObj, BoxShape, YCBObject
+from gibson2.utils.utils import parse_config
+from gibson2.core.render.profiler import Profiler
+
+import pytest
+import pybullet as p
+import numpy as np
+from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_name, set_joint_position, \
+    set_joint_positions, add_data_path, connect, plan_base_motion, plan_joint_motion, enable_gravity, \
+    joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
+    get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, set_point, create_box, stable_z
+
+import time
+import numpy as np
+
+config = parse_config('../configs/jr_interactive_nav.yaml')
+s = Simulator(mode='gui', timestep=1 / 240.0)
+scene = EmptyScene()
+s.import_scene(scene)
+jr = JR2_Kinova(config)
+s.import_robot(jr)
+
+robot_id = jr.robot_ids[0]
+jr_end_effector_link_id = 33
+
+#arm_joints = joints_from_names(robot_id, ['shoulder_pan_joint', 'shoulder_lift_joint', 'elbow_flex_joint', 'wrist_flex_joint'])
+arm_joints = joints_from_names(robot_id, ['m1n6s200_joint_1', 'm1n6s200_joint_2', 'm1n6s200_joint_3', 'm1n6s200_joint_4', 'm1n6s200_joint_5'])
+finger_joints = joints_from_names(robot_id, ['m1n6s200_joint_finger_1', 'm1n6s200_joint_finger_2'])
+jr.robot_body.reset_position([0, 0, 0])
+jr.robot_body.reset_orientation([0, 0, 1, 0])
+rest_position = [-1.6214899194372223, 1.4082722179709484, -2.9650918084213216, -1.7071872988002772, 3.0503822148927712e-05]
+set_joint_positions(robot_id, arm_joints, rest_position)
+x,y,z = jr.get_end_effector_position()
+f = 1
+set_joint_positions(robot_id, finger_joints, [f,f])
+
+print(x,y,z)
+visual_marker = p.createVisualShape(p.GEOM_SPHERE, radius = 0.02)
+marker = p.createMultiBody(baseVisualShapeIndex = visual_marker)
+
+while True:
+    with Profiler("Simulation: step"):
+        jr.robot_body.reset_position([0, 0, 0])
+        jr.robot_body.reset_orientation([0, 0, 1, 0])
+        joint_pos = p.calculateInverseKinematics(robot_id, 33, [x, y, z])[2:7]
+        print(x,y,z)
+        ##z += 0.002
+        set_joint_positions(robot_id, arm_joints, joint_pos)
+        set_joint_positions(robot_id, finger_joints, [f, f])
+        s.step()
+        keys = p.getKeyboardEvents()
+        for k, v in keys.items():
+
+            if (k == p.B3G_RIGHT_ARROW and (v & p.KEY_IS_DOWN)):
+                x += 0.01
+            if (k == p.B3G_LEFT_ARROW and (v & p.KEY_IS_DOWN)):
+                x -= 0.01
+            if (k == p.B3G_UP_ARROW and (v & p.KEY_IS_DOWN)):
+                y += 0.01
+            if (k == p.B3G_DOWN_ARROW and (v & p.KEY_IS_DOWN)):
+                y -= 0.01
+            if (k == ord('z') and (v & p.KEY_IS_DOWN)):
+                z += 0.01
+            if (k == ord('x') and (v & p.KEY_IS_DOWN)):
+                z -= 0.01
+            if (k == ord('a') and (v & p.KEY_IS_DOWN)):
+                f += 0.01
+            if (k == ord('s') and (v & p.KEY_IS_DOWN)):
+                f -= 0.01
+
+        p.resetBasePositionAndOrientation(marker, [x,y,z], [0,0,0,1])
+
+#print(joint_pos)
+s.disconnect()
\ No newline at end of file
diff --git a/examples/demo/ik_example_fetch.py b/examples/demo/ik_example_fetch.py
new file mode 100644
index 000000000..2c177ce44
--- /dev/null
+++ b/examples/demo/ik_example_fetch.py
@@ -0,0 +1,102 @@
+import yaml
+from gibson2.core.physics.robot_locomotors import Turtlebot, JR2_Kinova, Fetch
+from gibson2.core.simulator import Simulator
+from gibson2.core.physics.scene import EmptyScene
+from gibson2.core.physics.interactive_objects import InteractiveObj, BoxShape, YCBObject
+from gibson2.utils.utils import parse_config
+from gibson2.core.render.profiler import Profiler
+
+import pytest
+import pybullet as p
+import numpy as np
+from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_name, set_joint_position, \
+    set_joint_positions, add_data_path, connect, plan_base_motion, plan_joint_motion, enable_gravity, \
+    joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
+    get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, \
+    set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits
+
+import time
+import numpy as np
+
+config = parse_config('../configs/jr_interactive_nav.yaml')
+s = Simulator(mode='gui', timestep=1 / 240.0)
+scene = EmptyScene()
+s.import_scene(scene)
+fetch = Fetch(config)
+s.import_robot(fetch)
+
+robot_id = fetch.robot_ids[0]
+
+#arm_joints = joints_from_names(robot_id, ['shoulder_pan_joint', 'shoulder_lift_joint', 'elbow_flex_joint', 'wrist_flex_joint'])
+arm_joints = joints_from_names(robot_id, ['torso_lift_joint','shoulder_pan_joint', 'shoulder_lift_joint', 'upperarm_roll_joint',
+                                          'elbow_flex_joint', 'forearm_roll_joint', 'wrist_flex_joint', 'wrist_roll_joint'])
+finger_joints = joints_from_names(robot_id, ['l_gripper_finger_joint', 'r_gripper_finger_joint'])
+fetch.robot_body.reset_position([0, 0, 0])
+fetch.robot_body.reset_orientation([0, 0, 1, 0])
+x,y,z = fetch.get_end_effector_position()
+set_joint_positions(robot_id, finger_joints, [0.04,0.04])
+
+print(x,y,z)
+visual_marker = p.createVisualShape(p.GEOM_SPHERE, radius = 0.02)
+marker = p.createMultiBody(baseVisualShapeIndex = visual_marker)
+
+max_limits = [0,0] + get_max_limits(robot_id, arm_joints) + [0.05,0.05]
+min_limits = [0,0] + get_min_limits(robot_id, arm_joints) + [0,0]
+rest_position = [0,0] + list(get_joint_positions(robot_id, arm_joints)) + [0.04,0.04]
+joint_range = list(np.array(max_limits) - np.array(min_limits))
+joint_range = [item + 1 for item in joint_range]
+jd = [0.1 for item in joint_range]
+print(max_limits)
+print(min_limits)
+
+
+def accurateCalculateInverseKinematics(robotid, endEffectorId, targetPos, threshold, maxIter):
+  closeEnough = False
+  iter = 0
+  dist2 = 1e30
+  while (not closeEnough and iter < maxIter):
+    jointPoses = p.calculateInverseKinematics(robotid, endEffectorId, targetPos,[0,0.7071067,0,0.7071067],
+                                                     lowerLimits = min_limits,
+                                                     upperLimits = max_limits,
+                                                     jointRanges = joint_range,
+                                                     restPoses = rest_position,
+                                                     jointDamping = jd)
+    set_joint_positions(robotid, arm_joints, jointPoses[2:10])
+    ls = p.getLinkState(robotid, endEffectorId)
+    newPos = ls[4]
+    diff = [targetPos[0] - newPos[0], targetPos[1] - newPos[1], targetPos[2] - newPos[2]]
+    dist2 = (diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2])
+    closeEnough = (dist2 < threshold)
+    iter = iter + 1
+  print ("Num iter: "+str(iter) + " threshold: "+str(dist2))
+  return jointPoses
+
+while True:
+    with Profiler("Simulation: step"):
+        fetch.robot_body.reset_position([0, 0, 0])
+        fetch.robot_body.reset_orientation([0, 0, 1, 0])
+        threshold = 0.001
+        maxIter = 100
+        joint_pos = accurateCalculateInverseKinematics(robot_id, fetch.parts['gripper_link'].body_part_index, [x, y, z], threshold, maxIter)[2:10]
+
+        control_joints(robot_id, finger_joints, [0.04, 0.04])
+        s.step()
+        keys = p.getKeyboardEvents()
+        for k, v in keys.items():
+
+            if (k == p.B3G_RIGHT_ARROW and (v & p.KEY_IS_DOWN)):
+                x += 0.01
+            if (k == p.B3G_LEFT_ARROW and (v & p.KEY_IS_DOWN)):
+                x -= 0.01
+            if (k == p.B3G_UP_ARROW and (v & p.KEY_IS_DOWN)):
+                y += 0.01
+            if (k == p.B3G_DOWN_ARROW and (v & p.KEY_IS_DOWN)):
+                y -= 0.01
+            if (k == ord('z') and (v & p.KEY_IS_DOWN)):
+                z += 0.01
+            if (k == ord('x') and (v & p.KEY_IS_DOWN)):
+                z -= 0.01
+
+        p.resetBasePositionAndOrientation(marker, [x,y,z], [0,0,0,1])
+
+s.disconnect()
\ No newline at end of file
diff --git a/gibson2/core/physics/robot_bases.py b/gibson2/core/physics/robot_bases.py
index bd9489b88..5d9d20141 100644
--- a/gibson2/core/physics/robot_bases.py
+++ b/gibson2/core/physics/robot_bases.py
@@ -106,6 +106,7 @@ class BaseRobot:
                                     force=0)
             _, joint_name, joint_type, _, _, _, _, _, _, _, _, _, part_name, _, _, _, _ = p.getJointInfo(
                 bodies[0], j)
+            print(p.getJointInfo(bodies[0], j))
             joint_name = joint_name.decode("utf8")
             part_name = part_name.decode("utf8")
 
diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py
index 835f28368..ed157d4c1 100644
--- a/gibson2/core/physics/robot_locomotors.py
+++ b/gibson2/core/physics/robot_locomotors.py
@@ -585,6 +585,9 @@ class Fetch(LocomotorRobot):
         print(len(base_state), len(angular_velocity))
         return np.concatenate((base_state, np.array(angular_velocity)))
 
+    def get_end_effector_position(self):
+        return self.parts['l_gripper_finger_link'].get_position()
+
 
 class JR2(LocomotorRobot):
     mjcf_scaling = 1
diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py
index 68f352c55..1bede1e33 100644
--- a/gibson2/core/physics/scene.py
+++ b/gibson2/core/physics/scene.py
@@ -23,6 +23,7 @@ class EmptyScene(Scene):
     def load(self):
         planeName = os.path.join(pybullet_data.getDataPath(), "mjcf/ground_plane.xml")
         self.ground_plane_mjcf = p.loadMJCF(planeName)
+        p.changeDynamics(self.ground_plane_mjcf[0], -1, lateralFriction=1)
         return [item for item in self.ground_plane_mjcf]
 
 class StadiumScene(Scene):
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index f08839563..cabaed444 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -2461,19 +2461,20 @@ def get_collision_fn(body, joints, obstacles, attachments, self_collisions, disa
     # TODO: test self collision with the holding
     def collision_fn(q):
         if not all_between(lower_limits, q, upper_limits):
-            #print('Joint limits violated')
-            return True
+            print(lower_limits, q, upper_limits)
+            print('Joint limits violated')
+            #return True
         set_joint_positions(body, joints, q)
         for attachment in attachments:
             attachment.assign()
         for link1, link2 in check_link_pairs:
             # Self-collisions should not have the max_distance parameter
             if pairwise_link_collision(body, link1, body, link2): #, **kwargs):
-                #print(get_body_name(body), get_link_name(body, link1), get_link_name(body, link2))
-                return True
+                print(get_body_name(body), get_link_name(body, link1), get_link_name(body, link2))
+                #return True
         for body1, body2 in check_body_pairs:
             if pairwise_collision(body1, body2, **kwargs):
-                #print(get_body_name(body1), get_body_name(body2))
+                print(get_body_name(body1), get_body_name(body2))
                 return True
         return False
     return collision_fn
@@ -2652,13 +2653,27 @@ def plan_base_motion(body, end_conf, base_limits, obstacles=[], direct=False,
     distance_fn = get_base_distance_fn(weights=weights)
 
     def extend_fn(q1, q2):
-        steps = np.abs(np.divide(difference_fn(q2, q1), resolutions))
-        n = int(np.max(steps)) + 1
-        q = q1
-        for i in range(n):
-            q = tuple((1. / (n - i)) * np.array(difference_fn(q2, q)) + q)
+        target_theta = np.arctan2(q2[1]-q1[1], q2[0]-q1[0])
+
+        n1 = int(np.abs(circular_difference(target_theta, q1[2]) / resolutions[2])) + 1
+        n3 = int(np.abs(circular_difference(q2[2], target_theta) / resolutions[2])) + 1
+        steps2 = np.abs(np.divide(difference_fn(q2, q1), resolutions))
+        n2 = int(np.max(steps2)) + 1
+
+        for i in range(n1):
+            q = (i / (n1)) * np.array(difference_fn((q1[0], q1[1], target_theta), q1)) + np.array(q1)
+            q = tuple(q)
+            yield q
+
+        for i in range(n2):
+            q = (i / (n2)) * np.array(difference_fn((q2[0], q2[1], target_theta), (q1[0], q1[1], target_theta))) +  np.array((q1[0], q1[1], target_theta))
+            q = tuple(q)
+            yield q
+
+        for i in range(n3):
+            q = (i / (n3)) * np.array(difference_fn(q2, (q2[0], q2[1], target_theta))) + np.array((q2[0], q2[1], target_theta))
+            q = tuple(q)
             yield q
-            # TODO: should wrap these joints
 
     def collision_fn(q):
         # TODO: update this function

From c8b45b925d84af533ca3a80042f72835debcd04b Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Fri, 13 Dec 2019 16:08:03 -0800
Subject: [PATCH 05/58] turtlebot motion planner

---
 examples/configs/turtlebot_p2p_nav_house.yaml |  2 +-
 gibson2/core/physics/scene.py                 |  4 +-
 gibson2/envs/locomotor_env.py                 |  1 +
 gibson2/envs/motion_planner_env.py            | 97 +++++++++++++++++++
 gibson2/external/pybullet_tools/utils.py      | 37 ++++---
 5 files changed, 128 insertions(+), 13 deletions(-)
 create mode 100644 gibson2/envs/motion_planner_env.py

diff --git a/examples/configs/turtlebot_p2p_nav_house.yaml b/examples/configs/turtlebot_p2p_nav_house.yaml
index fe5a423f6..438e672c1 100644
--- a/examples/configs/turtlebot_p2p_nav_house.yaml
+++ b/examples/configs/turtlebot_p2p_nav_house.yaml
@@ -1,6 +1,6 @@
 # scene
 scene: building
-model_id: Ohoopee
+model_id: Avonia
 
 # robot
 robot: Turtlebot
diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py
index 68f352c55..b115fc877 100644
--- a/gibson2/core/physics/scene.py
+++ b/gibson2/core/physics/scene.py
@@ -89,7 +89,7 @@ class BuildingScene(Scene):
         self.should_load_replaced_objects = should_load_replaced_objects
         self.num_waypoints = num_waypoints
         self.waypoint_interval = int(waypoint_resolution / trav_map_resolution)
-
+        self.mesh_body_id = None
     def l2_distance(self, a, b):
         return np.linalg.norm(np.array(a) - np.array(b))
 
@@ -113,6 +113,8 @@ class BuildingScene(Scene):
         visualId = -1
         boundaryUid = p.createMultiBody(baseCollisionShapeIndex=collisionId,
                                         baseVisualShapeIndex=visualId)
+
+        self.mesh_body_id = boundaryUid
         p.changeDynamics(boundaryUid, -1, lateralFriction=1)
 
         planeName = os.path.join(pybullet_data.getDataPath(), "mjcf/ground_plane.xml")
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 1fa49af7d..378c62899 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -431,6 +431,7 @@ class NavigateEnv(BaseEnv):
             #                                                                 self.initial_pos[:2],
             #                                                                 self.target_pos[:2],
             #                                                                 entire_path=True)
+
             # floor_height = self.scene.get_floor_height(self.floor_num)
             # shortest_path = np.array([np.array([path[0], path[1], floor_height]) for path in shortest_path])
             # min_kin_dist = self.path_length * self.robots[0].robot_mass
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
new file mode 100644
index 000000000..3fda4870b
--- /dev/null
+++ b/gibson2/envs/motion_planner_env.py
@@ -0,0 +1,97 @@
+from gibson2.core.physics.interactive_objects import VisualMarker, InteractiveObj, BoxShape
+import gibson2
+from gibson2.utils.utils import parse_config, rotate_vector_3d, l2_distance, quatToXYZW
+from gibson2.envs.base_env import BaseEnv
+from transforms3d.euler import euler2quat
+from collections import OrderedDict
+import argparse
+from gibson2.learn.completion import CompletionNet, identity_init, Perceptual
+import torch.nn as nn
+import torch
+from torchvision import datasets, transforms
+from transforms3d.quaternions import quat2mat, qmult
+import gym
+import numpy as np
+import os
+import pybullet as p
+from IPython import embed
+import cv2
+import time
+import collections
+from gibson2.envs.locomotor_env import NavigateEnv, NavigateRandomEnv
+from gibson2.external.pybullet_tools.utils import plan_base_motion, set_base_values
+
+class MotionPlanningEnv(NavigateRandomEnv):
+    def __init__(self,
+                 config_file,
+                 model_id=None,
+                 collision_reward_weight=0.0,
+                 mode='headless',
+                 action_timestep=1 / 10.0,
+                 physics_timestep=1 / 240.0,
+                 device_idx=0,
+                 automatic_reset=False,
+                 ):
+        super(MotionPlanningEnv, self).__init__(config_file,
+                                                           model_id=model_id,
+                                                           mode=mode,
+                                                           action_timestep=action_timestep,
+                                                           physics_timestep=physics_timestep,
+                                                           automatic_reset=automatic_reset,
+                                                           random_height=False,
+                                                           device_idx=device_idx)
+
+    def prepare_motion_planner(self):
+        self.robot_id = self.robots[0].robot_ids[0]
+        self.mesh_id = self.scene.mesh_body_id
+        self.map_size = self.scene.trav_map_original_size * self.scene.trav_map_default_resolution
+
+        print(self.robot_id, self.mesh_id, self.map_size)
+
+    def plan_base_motion(self, x,y,theta):
+        half_size = self.map_size / 2.0
+        path = plan_base_motion(self.robot_id, [x,y,theta], ((-half_size, -half_size), (half_size, half_size)), obstacles=[self.mesh_id], restarts=10,
+                                iterations=50, smooth=30)
+        return path
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--config',
+        '-c',
+        help='which config file to use [default: use yaml files in examples/configs]')
+    parser.add_argument('--mode',
+                        '-m',
+                        choices=['headless', 'gui'],
+                        default='headless',
+                        help='which mode for simulation (default: headless)')
+    
+    args = parser.parse_args()
+
+    nav_env = MotionPlanningEnv(config_file=args.config,
+                          mode=args.mode,
+                          action_timestep=1.0 / 10.0,
+                          physics_timestep=1.0 / 40.0)
+
+    nav_env.prepare_motion_planner()
+
+    for episode in range(100):
+        print('Episode: {}'.format(episode))
+        start = time.time()
+        nav_env.reset()
+        path = nav_env.plan_base_motion(nav_env.target_pos[0], nav_env.target_pos[1], 0)
+        if path is not None:
+            for bq in path:
+                set_base_values(nav_env.robot_id, [bq[0], bq[1], bq[2]])
+                nav_env.step([0,0])
+                #time.sleep(0.05)
+            #nav_env.step()
+        #for step in range(50):  # 500 steps, 50s world time
+        #    action = nav_env.action_space.sample()
+        #    state, reward, done, _ = nav_env.step(action)
+        #    # print('reward', reward)
+        #    if done:
+        #        print('Episode finished after {} timesteps'.format(step + 1))
+        #        break
+        print(time.time() - start)
+    nav_env.clean()
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index f08839563..cabaed444 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -2461,19 +2461,20 @@ def get_collision_fn(body, joints, obstacles, attachments, self_collisions, disa
     # TODO: test self collision with the holding
     def collision_fn(q):
         if not all_between(lower_limits, q, upper_limits):
-            #print('Joint limits violated')
-            return True
+            print(lower_limits, q, upper_limits)
+            print('Joint limits violated')
+            #return True
         set_joint_positions(body, joints, q)
         for attachment in attachments:
             attachment.assign()
         for link1, link2 in check_link_pairs:
             # Self-collisions should not have the max_distance parameter
             if pairwise_link_collision(body, link1, body, link2): #, **kwargs):
-                #print(get_body_name(body), get_link_name(body, link1), get_link_name(body, link2))
-                return True
+                print(get_body_name(body), get_link_name(body, link1), get_link_name(body, link2))
+                #return True
         for body1, body2 in check_body_pairs:
             if pairwise_collision(body1, body2, **kwargs):
-                #print(get_body_name(body1), get_body_name(body2))
+                print(get_body_name(body1), get_body_name(body2))
                 return True
         return False
     return collision_fn
@@ -2652,13 +2653,27 @@ def plan_base_motion(body, end_conf, base_limits, obstacles=[], direct=False,
     distance_fn = get_base_distance_fn(weights=weights)
 
     def extend_fn(q1, q2):
-        steps = np.abs(np.divide(difference_fn(q2, q1), resolutions))
-        n = int(np.max(steps)) + 1
-        q = q1
-        for i in range(n):
-            q = tuple((1. / (n - i)) * np.array(difference_fn(q2, q)) + q)
+        target_theta = np.arctan2(q2[1]-q1[1], q2[0]-q1[0])
+
+        n1 = int(np.abs(circular_difference(target_theta, q1[2]) / resolutions[2])) + 1
+        n3 = int(np.abs(circular_difference(q2[2], target_theta) / resolutions[2])) + 1
+        steps2 = np.abs(np.divide(difference_fn(q2, q1), resolutions))
+        n2 = int(np.max(steps2)) + 1
+
+        for i in range(n1):
+            q = (i / (n1)) * np.array(difference_fn((q1[0], q1[1], target_theta), q1)) + np.array(q1)
+            q = tuple(q)
+            yield q
+
+        for i in range(n2):
+            q = (i / (n2)) * np.array(difference_fn((q2[0], q2[1], target_theta), (q1[0], q1[1], target_theta))) +  np.array((q1[0], q1[1], target_theta))
+            q = tuple(q)
+            yield q
+
+        for i in range(n3):
+            q = (i / (n3)) * np.array(difference_fn(q2, (q2[0], q2[1], target_theta))) + np.array((q2[0], q2[1], target_theta))
+            q = tuple(q)
             yield q
-            # TODO: should wrap these joints
 
     def collision_fn(q):
         # TODO: update this function

From f7653249b1ed17cb1f38d70fcc7d5a68eee511ff Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Sun, 5 Jan 2020 22:54:39 -0800
Subject: [PATCH 06/58] env for subgoal learning

---
 examples/configs/turtlebot_p2p_nav_house.yaml |   6 +-
 gibson2/envs/locomotor_env.py                 |  11 ++
 gibson2/envs/motion_planner_env.py            | 127 +++++++++++++++---
 gibson2/external/pybullet_tools/utils.py      |   8 +-
 4 files changed, 128 insertions(+), 24 deletions(-)

diff --git a/examples/configs/turtlebot_p2p_nav_house.yaml b/examples/configs/turtlebot_p2p_nav_house.yaml
index 438e672c1..20e662d01 100644
--- a/examples/configs/turtlebot_p2p_nav_house.yaml
+++ b/examples/configs/turtlebot_p2p_nav_house.yaml
@@ -37,9 +37,9 @@ dist_tol: 0.2
 max_step: 500
 
 # sensor
-output: [sensor, rgb, depth]
-resolution: 256
-fov: 90
+output: [sensor, rgb, depth, pc]
+resolution: 128
+fov: 120
 
 # display
 use_filler: true
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 378c62899..fc5395397 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -164,6 +164,14 @@ class NavigateEnv(BaseEnv):
                                                      1),
                                               dtype=np.float32)
             observation_space['depth'] = self.depth_space
+        # if 'pc' in self.output:
+        #     self.depth_space = gym.spaces.Box(low=-np.inf,
+        #                                       high=np.inf,
+        #                                       shape=(self.config.get('resolution', 64),
+        #                                              self.config.get('resolution', 64),
+        #                                              4),
+        #                                       dtype=np.float32)
+        #     observation_space['pc'] = self.depth_space
         if 'seg' in self.output:
             self.seg_space = gym.spaces.Box(low=0.0,
                                             high=1.0,
@@ -262,6 +270,9 @@ class NavigateEnv(BaseEnv):
         if 'depth' in self.output:
             depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
             state['depth'] = depth
+        if 'pc' in self.output:
+            pc = self.simulator.renderer.render_robot_cameras(modes=('3d'))[0]
+            state['pc'] = pc
         if 'normal' in self.output:
             state['normal'] = self.simulator.renderer.render_robot_cameras(modes='normal')
         if 'seg' in self.output:
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 3fda4870b..5569bb1b2 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -19,7 +19,9 @@ import cv2
 import time
 import collections
 from gibson2.envs.locomotor_env import NavigateEnv, NavigateRandomEnv
-from gibson2.external.pybullet_tools.utils import plan_base_motion, set_base_values
+from gibson2.external.pybullet_tools.utils import plan_base_motion, set_base_values, get_base_values
+from gibson2.core.render.utils import quat_pos_to_mat
+
 
 class MotionPlanningEnv(NavigateRandomEnv):
     def __init__(self,
@@ -41,19 +43,111 @@ class MotionPlanningEnv(NavigateRandomEnv):
                                                            random_height=False,
                                                            device_idx=device_idx)
 
+        self.mp_loaded = False
+        # override some parameters:
+        self.max_step = 20
+        self.planner_step = 0
+        self.action_space = gym.spaces.Box(shape=(3,),
+                                           low = -1.0,
+                                           high=1.0,
+                                           dtype=np.float32)
+
+
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
         self.mesh_id = self.scene.mesh_body_id
         self.map_size = self.scene.trav_map_original_size * self.scene.trav_map_default_resolution
-
         print(self.robot_id, self.mesh_id, self.map_size)
+        self.marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
+                              rgba_color=[1, 0, 0, 1],
+                              radius=0.1,
+                              length=0.1,
+                              initial_offset=[0, 0, 0.1 / 2.0])
+        self.marker.load()
+        self.mp_loaded = True
 
     def plan_base_motion(self, x,y,theta):
         half_size = self.map_size / 2.0
-        path = plan_base_motion(self.robot_id, [x,y,theta], ((-half_size, -half_size), (half_size, half_size)), obstacles=[self.mesh_id], restarts=10,
-                                iterations=50, smooth=30)
+        path = plan_base_motion(self.robot_id, [x,y,theta], ((-half_size, -half_size), (half_size, half_size)), obstacles=[self.mesh_id])
         return path
 
+    def step(self, pt):
+        # point = [x,y]
+        x = int((pt[0]+1)/2.0 * 150)
+        y = int((pt[1]+1)/2.0 * 128)
+        yaw = self.robots[0].get_rpy()[2]
+        orn = pt[2] * np.pi + yaw
+
+        opos = get_base_values(self.robot_id)
+
+        self.get_additional_states()
+        org_potential = self.get_potential()
+
+        if x < 128:
+            state, reward, done, _ = super(MotionPlanningEnv, self).step([0, 0])
+
+            points = state['pc']
+            point = points[x, y]
+
+            camera_pose = (self.robots[0].parts['eyes'].get_pose())
+            transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
+                                         quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
+
+            projected_point = (transform_mat).dot(np.array([-point[2], -point[0], point[1], 1]))
+
+            subgoal = projected_point[:2]
+        else:
+            subgoal = list(opos)[:2]
+
+        path = self.plan_base_motion(subgoal[0], subgoal[1], orn)
+        if path is not None:
+            self.marker.set_position([subgoal[0], subgoal[1], 0.1])
+            bq = path[-1]
+            #for bq in path:
+            set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
+            state, _, done, info = super(MotionPlanningEnv, self).step([0, 0])
+            #time.sleep(0.05)
+            self.get_additional_states()
+            reward = org_potential - self.get_potential()
+        else:
+            set_base_values(self.robot_id, opos)
+            state, _, done, info = super(MotionPlanningEnv, self).step([0, 0])
+            reward = -0.02
+
+        done = False
+
+        if l2_distance(self.target_pos, self.robots[0].get_position()) < self.dist_tol:
+            reward += self.success_reward  # |success_reward| = 10.0 per step
+            done = True
+        else:
+            done = False
+
+        print('reward', reward)
+
+        self.planner_step += 1
+
+        if self.planner_step > self.max_step:
+            done = True
+        #print(info)
+        #if info['success']:
+        #    done = True
+        info['planner_step'] = self.planner_step
+        del state['pc']
+
+        return state, reward, done, info
+
+    def reset(self):
+        state = super(MotionPlanningEnv, self).reset()
+        if not self.mp_loaded:
+            self.prepare_motion_planner()
+
+        self.planner_step = 0
+
+        del state['pc']
+
+        return state
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -65,33 +159,32 @@ if __name__ == '__main__':
                         choices=['headless', 'gui'],
                         default='headless',
                         help='which mode for simulation (default: headless)')
-    
+
     args = parser.parse_args()
 
     nav_env = MotionPlanningEnv(config_file=args.config,
                           mode=args.mode,
-                          action_timestep=1.0 / 10.0,
-                          physics_timestep=1.0 / 40.0)
+                          action_timestep=1.0 / 1000000.0,
+                          physics_timestep=1.0 / 1000000.0)
+
 
-    nav_env.prepare_motion_planner()
 
     for episode in range(100):
         print('Episode: {}'.format(episode))
         start = time.time()
         nav_env.reset()
-        path = nav_env.plan_base_motion(nav_env.target_pos[0], nav_env.target_pos[1], 0)
-        if path is not None:
-            for bq in path:
-                set_base_values(nav_env.robot_id, [bq[0], bq[1], bq[2]])
-                nav_env.step([0,0])
-                #time.sleep(0.05)
+        for i in range(150):
+            action = nav_env.action_space.sample()
+            state, reward, done, info = nav_env.step(action)
+            print(state, reward, done, info)
+            #time.sleep(0.05)
             #nav_env.step()
         #for step in range(50):  # 500 steps, 50s world time
         #    action = nav_env.action_space.sample()
         #    state, reward, done, _ = nav_env.step(action)
         #    # print('reward', reward)
-        #    if done:
-        #        print('Episode finished after {} timesteps'.format(step + 1))
-        #        break
+            if done:
+                print('Episode finished after {} timesteps'.format(i + 1))
+                break
         print(time.time() - start)
     nav_env.clean()
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index cabaed444..9f17e6580 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -2508,10 +2508,10 @@ def plan_direct_joint_motion(body, joints, end_conf, **kwargs):
 
 def check_initial_end(start_conf, end_conf, collision_fn):
     if collision_fn(start_conf):
-        print("Warning: initial configuration is in collision")
+        #print("Warning: initial configuration is in collision")
         return False
     if collision_fn(end_conf):
-        print("Warning: end configuration is in collision")
+        #print("Warning: end configuration is in collision")
         return False
     return True
 
@@ -2682,10 +2682,10 @@ def plan_base_motion(body, end_conf, base_limits, obstacles=[], direct=False,
 
     start_conf = get_base_values(body)
     if collision_fn(start_conf):
-        print("Warning: initial configuration is in collision")
+        #print("Warning: initial configuration is in collision")
         return None
     if collision_fn(end_conf):
-        print("Warning: end configuration is in collision")
+        #print("Warning: end configuration is in collision")
         return None
     if direct:
         return direct_path(start_conf, end_conf, extend_fn, collision_fn)

From 3266a294bd9e892870aae90330acf5d6c221552d Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Mon, 6 Jan 2020 19:58:56 -0800
Subject: [PATCH 07/58] arm motion (dirty)

---
 .../configs/fetch_interactive_nav_s2r.yaml    |  71 +++++++
 gibson2/core/physics/robot_locomotors.py      |  22 +-
 gibson2/envs/locomotor_env.py                 |   3 +-
 gibson2/envs/motion_planner_env.py            | 191 +++++++++++++++++-
 4 files changed, 278 insertions(+), 9 deletions(-)
 create mode 100644 examples/configs/fetch_interactive_nav_s2r.yaml

diff --git a/examples/configs/fetch_interactive_nav_s2r.yaml b/examples/configs/fetch_interactive_nav_s2r.yaml
new file mode 100644
index 000000000..c5a874a63
--- /dev/null
+++ b/examples/configs/fetch_interactive_nav_s2r.yaml
@@ -0,0 +1,71 @@
+# scene
+scene: building
+model_id: Avonia
+
+# robot
+robot: Fetch
+velocity: 0.1
+
+# task, observation and action
+task: reaching # pointgoal|objectgoal|areagoal|reaching
+fisheye: false
+
+initial_pos: [-1.0, -1.0, 0.0]
+initial_orn: [0.0, 0.0, 0.0]
+
+target_pos: [1.0, 1.0, 0.0]
+target_orn: [0.0, 0.0, 0.0]
+
+is_discrete: false
+additional_states_dim: 6
+
+# reward
+success_reward: 10.0
+slack_reward: -0.01
+potential_reward_weight: 30.0
+#electricity_reward_weight: -0.02
+#stall_torque_reward_weight: -0.0001
+electricity_reward_weight: 0.0
+stall_torque_reward_weight: 0.0
+collision_reward_weight: 0.0
+
+# discount factor
+discount_factor: 0.99
+
+# termination condition
+dist_tol: 0.2
+max_step: 500
+
+# sensor
+output: [sensor, rgb, depth, pc]
+resolution: 128
+fov: 120
+
+# display
+use_filler: true
+display_ui: false
+show_diagnostics: false
+ui_num: 2
+ui_components: [RGB_FILLED, DEPTH]
+random:
+  random_initial_pose: false
+  random_target_pose: false
+  random_init_x_range: [-0.1, 0.1]
+  random_init_y_range: [-0.1, 0.1]
+  random_init_z_range: [-0.1, 0.1]
+  random_init_rot_range: [-0.1, 0.1]
+
+speed:
+  timestep: 0.001
+  frameskip: 10
+
+mode: web_ui #gui|headless
+verbose: false
+fast_lq_render: true
+
+# visual objects
+visual_object_at_initial_target_pos: true
+target_visual_object_visible_to_agent: true
+
+# debug
+debug: false
diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py
index ed157d4c1..f8f4f3f52 100644
--- a/gibson2/core/physics/robot_locomotors.py
+++ b/gibson2/core/physics/robot_locomotors.py
@@ -8,7 +8,11 @@ from transforms3d.euler import euler2quat, euler2mat
 from transforms3d.quaternions import quat2mat, qmult
 import transforms3d.quaternions as quat
 import sys
-
+from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_name, set_joint_position, \
+    set_joint_positions, add_data_path, connect, plan_base_motion, plan_joint_motion, enable_gravity, \
+    joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
+    get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, \
+    set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values
 
 class LocomotorRobot(BaseRobot):
     """ Built on top of BaseRobot
@@ -569,9 +573,19 @@ class Fetch(LocomotorRobot):
 
     def robot_specific_reset(self):
         super(Fetch, self).robot_specific_reset()
+        for j in self.ordered_joints:
+            j.reset_joint_state(0.0, 0.0)
         # roll the arm to its body
-        for i in range(2, 6):
-            self.ordered_joints[i].reset_joint_state(np.pi / 2.0, 0.0)
+        robot_id = self.robot_ids[0]
+        arm_joints = joints_from_names(robot_id, ['torso_lift_joint', 'shoulder_pan_joint', 'shoulder_lift_joint',
+                                                  'upperarm_roll_joint',
+                                                  'elbow_flex_joint', 'forearm_roll_joint', 'wrist_flex_joint',
+                                                  'wrist_roll_joint'])
+        rest_position = (0.38548146667743244, 1.1522793897208579, 1.2576467971105596, -0.312703569911879,
+                         1.7404867100093226, -0.0962895617312548, -1.4418232619629425, -1.6780152866247762)
+
+        set_joint_positions(robot_id, arm_joints, rest_position)
+
 
     def apply_action(self, action):
         denormalized_action = self.action_to_real_action(action)
@@ -586,7 +600,7 @@ class Fetch(LocomotorRobot):
         return np.concatenate((base_state, np.array(angular_velocity)))
 
     def get_end_effector_position(self):
-        return self.parts['l_gripper_finger_link'].get_position()
+        return self.parts['gripper_link'].get_position()
 
 
 class JR2(LocomotorRobot):
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index fc5395397..798633a54 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -489,7 +489,8 @@ class NavigateEnv(BaseEnv):
         :return: state: state, reward, done, info
         """
         self.current_step += 1
-        self.robots[0].apply_action(action)
+        if action is not None:
+            self.robots[0].apply_action(action)
         cache = self.before_simulation()
         collision_links = self.run_simulation()
         self.after_simulation(cache, collision_links)
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 5569bb1b2..00f56a9c5 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -19,9 +19,12 @@ import cv2
 import time
 import collections
 from gibson2.envs.locomotor_env import NavigateEnv, NavigateRandomEnv
-from gibson2.external.pybullet_tools.utils import plan_base_motion, set_base_values, get_base_values
 from gibson2.core.render.utils import quat_pos_to_mat
-
+from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_name, set_joint_position, \
+    set_joint_positions, add_data_path, connect, plan_base_motion, plan_joint_motion, enable_gravity, \
+    joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
+    get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, \
+    set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values
 
 class MotionPlanningEnv(NavigateRandomEnv):
     def __init__(self,
@@ -148,6 +151,186 @@ class MotionPlanningEnv(NavigateRandomEnv):
         return state
 
 
+class MotionPlanningBaseArmEnv(NavigateRandomEnv):
+    def __init__(self,
+                 config_file,
+                 model_id=None,
+                 collision_reward_weight=0.0,
+                 mode='headless',
+                 action_timestep=1 / 10.0,
+                 physics_timestep=1 / 240.0,
+                 device_idx=0,
+                 automatic_reset=False,
+                 ):
+        super(MotionPlanningBaseArmEnv, self).__init__(config_file,
+                                                           model_id=model_id,
+                                                           mode=mode,
+                                                           action_timestep=action_timestep,
+                                                           physics_timestep=physics_timestep,
+                                                           automatic_reset=automatic_reset,
+                                                           random_height=False,
+                                                           device_idx=device_idx)
+
+        self.mp_loaded = False
+        # override some parameters:
+        self.max_step = 20
+        self.planner_step = 0
+        self.action_space = gym.spaces.Box(shape=(5,),
+                                           low = -1.0,
+                                           high=1.0,
+                                           dtype=np.float32)
+
+
+    def prepare_motion_planner(self):
+        self.robot_id = self.robots[0].robot_ids[0]
+        self.mesh_id = self.scene.mesh_body_id
+        self.map_size = self.scene.trav_map_original_size * self.scene.trav_map_default_resolution
+        print(self.robot_id, self.mesh_id, self.map_size)
+        self.marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
+                              rgba_color=[1, 0, 0, 1],
+                              radius=0.1,
+                              length=0.1,
+                              initial_offset=[0, 0, 0.1 / 2.0])
+        self.marker.load()
+
+        self.marker2 = VisualMarker(visual_shape=p.GEOM_CYLINDER,
+                                   rgba_color=[1, 1, 0, 1],
+                                   radius=0.1,
+                                   length=0.1,
+                                   initial_offset=[0, 0, 0.1 / 2.0])
+        self.marker2.load()
+
+        self.mp_loaded = True
+
+    def plan_base_motion(self, x,y,theta):
+        half_size = self.map_size / 2.0
+        path = plan_base_motion(self.robot_id, [x,y,theta], ((-half_size, -half_size), (half_size, half_size)), obstacles=[self.mesh_id])
+        return path
+
+    def step(self, pt):
+        # point = [x,y]
+        x = int((pt[0]+1)/2.0 * 150)
+        y = int((pt[1]+1)/2.0 * 128)
+        yaw = self.robots[0].get_rpy()[2]
+        orn = pt[2] * np.pi + yaw
+
+        armx = int((pt[3]+1)/2.0 * 128)
+        army = int((pt[4]+1)/2.0 * 128)
+
+        opos = get_base_values(self.robot_id)
+
+        self.get_additional_states()
+        org_potential = self.get_potential()
+
+        state, _, done, _ = super(MotionPlanningBaseArmEnv, self).step(None)
+        points = state['pc']
+
+        if x < 128:
+
+            point = points[x, y]
+
+            camera_pose = (self.robots[0].parts['eyes'].get_pose())
+            transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
+                                         quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
+
+            projected_point = (transform_mat).dot(np.array([-point[2], -point[0], point[1], 1]))
+
+            subgoal = projected_point[:2]
+        else:
+            subgoal = list(opos)[:2]
+
+        path = self.plan_base_motion(subgoal[0], subgoal[1], orn)
+        if path is not None:
+            self.marker.set_position([subgoal[0], subgoal[1], 0.1])
+            bq = path[-1]
+            set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
+            state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
+            self.get_additional_states()
+
+        else:
+            set_base_values(self.robot_id, opos)
+            state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
+            reward = -0.02
+
+            del state['pc']
+            return state, reward, done, info
+
+        point = points[armx, army]
+        camera_pose = (self.robots[0].parts['eyes'].get_pose())
+        transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
+                                        quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
+        projected_point2 = (transform_mat).dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
+
+        projected_point2[2] += 1 # 1m from floor
+
+        self.marker2.set_position(projected_point2)
+
+        robot_id = self.robot_id
+        arm_joints = joints_from_names(robot_id, ['torso_lift_joint', 'shoulder_pan_joint', 'shoulder_lift_joint',
+                                                  'upperarm_roll_joint',
+                                                  'elbow_flex_joint', 'forearm_roll_joint', 'wrist_flex_joint',
+                                                  'wrist_roll_joint'])
+        max_limits = [0, 0] + get_max_limits(robot_id, arm_joints) + [0.05, 0.05]
+        min_limits = [0, 0] + get_min_limits(robot_id, arm_joints) + [0, 0]
+        rest_position = [0, 0] + list(get_joint_positions(robot_id, arm_joints)) + [0.04, 0.04]
+        joint_range = list(np.array(max_limits) - np.array(min_limits))
+        joint_range = [item + 1 for item in joint_range]
+        jd = [0.1 for item in joint_range]
+        #
+        jp = p.calculateInverseKinematics(self.robot_id, self.robots[0].parts['gripper_link'].body_part_index, projected_point2,
+                                     lowerLimits=min_limits,
+                                     upperLimits=max_limits,
+                                     jointRanges=joint_range,
+                                     restPoses=rest_position,
+                                     jointDamping=jd,
+                                     solver=p.IK_DLS,
+                                     maxNumIterations=100)[2:10]
+
+        print(jp)
+        set_joint_positions(self.robot_id, arm_joints, jp)
+        p1 = self.robots[0].get_end_effector_position()
+        dist = np.linalg.norm(np.array(p1) - np.array(projected_point2))
+
+        print(dist)
+        rp = (0.38548146667743244, 1.1522793897208579, 1.2576467971105596, -0.312703569911879,
+         1.7404867100093226, -0.0962895617312548, -1.4418232619629425, -1.6780152866247762)
+
+        reward = org_potential - self.get_potential()
+
+        if l2_distance(self.target_pos, self.robots[0].get_end_effector_position()) < self.dist_tol:
+            reward += self.success_reward  # |success_reward| = 10.0 per step
+            done = True
+        else:
+            done = False
+
+        set_joint_positions(self.robot_id, arm_joints, rp)
+        state, _, _, info = super(MotionPlanningBaseArmEnv, self).step(None)
+
+        print('reward', reward)
+        self.planner_step += 1
+
+        if self.planner_step > self.max_step:
+            done = True
+
+        info['planner_step'] = self.planner_step
+        del state['pc']
+
+        return state, reward, done, info
+
+    def reset(self):
+        state = super(MotionPlanningBaseArmEnv, self).reset()
+
+        if not self.mp_loaded:
+            self.prepare_motion_planner()
+
+        self.target_pos[2] = 1.0
+        self.target_pos_vis_obj.set_position(self.target_pos)
+
+        self.planner_step = 0
+        del state['pc']
+
+        return state
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -162,7 +345,7 @@ if __name__ == '__main__':
 
     args = parser.parse_args()
 
-    nav_env = MotionPlanningEnv(config_file=args.config,
+    nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
                           mode=args.mode,
                           action_timestep=1.0 / 1000000.0,
                           physics_timestep=1.0 / 1000000.0)
@@ -176,7 +359,7 @@ if __name__ == '__main__':
         for i in range(150):
             action = nav_env.action_space.sample()
             state, reward, done, info = nav_env.step(action)
-            print(state, reward, done, info)
+            #print(state, reward, done, info)
             #time.sleep(0.05)
             #nav_env.step()
         #for step in range(50):  # 500 steps, 50s world time

From c876fcd22b8194cccfd0b8751d5633dcd6f63aa0 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Thu, 9 Jan 2020 14:06:12 -0800
Subject: [PATCH 08/58] update

---
 examples/demo/cabinets_example_fetch.py | 20 ++++++++++++++------
 examples/demo/ik_example_fetch.py       |  9 ++++++---
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/examples/demo/cabinets_example_fetch.py b/examples/demo/cabinets_example_fetch.py
index 43d38e887..b0af1552d 100644
--- a/examples/demo/cabinets_example_fetch.py
+++ b/examples/demo/cabinets_example_fetch.py
@@ -30,15 +30,15 @@ fetch.robot_body.reset_orientation([0,0,1,0])
 obstacles = []
 obj = InteractiveObj(filename='/data4/mdv0/cabinet/0007/part_objs/cabinet_0007.urdf')
 ids = s.import_interactive_object(obj)
-for i in range(5):
-    p.changeDynamics(ids,i,lateralFriction=50,mass=0.1, linearDamping=0, angularDamping=0)
+# for i in range(5):
+#     p.changeDynamics(ids,i,lateralFriction=50,mass=0.1, linearDamping=0, angularDamping=0)
 obstacles.append(ids)
 
 obj.set_position([-2,0,0.5])
 obj = InteractiveObj(filename='/data4/mdv0/cabinet/0007/part_objs/cabinet_0007.urdf')
 ids = s.import_interactive_object(obj)
-for i in range(5):
-    p.changeDynamics(ids,i,lateralFriction=50,mass=0.1, linearDamping=0, angularDamping=0)
+# for i in range(5):
+#     p.changeDynamics(ids,i,lateralFriction=50,mass=0.1, linearDamping=0, angularDamping=0)
 obstacles.append(ids)
 
 obj.set_position([-2,2,0.5])
@@ -52,6 +52,14 @@ ids = s.import_interactive_object(obj)
 obstacles.append(ids)
 obj.set_position([-2.1, 0.4, 2])
 
+for ob in obstacles:
+    #jointPositions = [0.000000, 0.000000, 0.000000, 0.000000, 0.000000]
+    for jointIndex in range(p.getNumJoints(ob)):
+        #p.resetJointState(ob, jointIndex, jointPositions[jointIndex])
+        friction = 1
+        p.setJointMotorControl2(ob, jointIndex, p.VELOCITY_CONTROL, force=friction)
+
+
 obj = BoxShape([-2.05,1,0.5], [0.35,0.6,0.5])
 ids = s.import_interactive_object(obj)
 obstacles.append(ids)
@@ -115,8 +123,8 @@ for i in range(100):
     control_joints(robot_id, finger_joints, [0.04, 0.04])
     s.step()
 
-p.changeDynamics(robot_id, fetch.parts['r_gripper_finger_link'].body_part_index, lateralFriction=50)
-p.changeDynamics(robot_id, fetch.parts['l_gripper_finger_link'].body_part_index, lateralFriction=50)
+p.changeDynamics(robot_id, fetch.parts['r_gripper_finger_link'].body_part_index, lateralFriction=1)
+p.changeDynamics(robot_id, fetch.parts['l_gripper_finger_link'].body_part_index, lateralFriction=1)
 
 if 1:
         sid = p.saveState()
diff --git a/examples/demo/ik_example_fetch.py b/examples/demo/ik_example_fetch.py
index 2c177ce44..d33bc8c24 100644
--- a/examples/demo/ik_example_fetch.py
+++ b/examples/demo/ik_example_fetch.py
@@ -13,7 +13,7 @@ from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_na
     set_joint_positions, add_data_path, connect, plan_base_motion, plan_joint_motion, enable_gravity, \
     joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
     get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, \
-    set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits
+    set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_sample_fn
 
 import time
 import numpy as np
@@ -51,11 +51,14 @@ print(min_limits)
 
 
 def accurateCalculateInverseKinematics(robotid, endEffectorId, targetPos, threshold, maxIter):
+
+  sample_fn = get_sample_fn(robotid, arm_joints)
+  set_joint_positions(robotid, arm_joints, sample_fn())
   closeEnough = False
   iter = 0
   dist2 = 1e30
   while (not closeEnough and iter < maxIter):
-    jointPoses = p.calculateInverseKinematics(robotid, endEffectorId, targetPos,[0,0.7071067,0,0.7071067],
+    jointPoses = p.calculateInverseKinematics(robotid, endEffectorId, targetPos,
                                                      lowerLimits = min_limits,
                                                      upperLimits = max_limits,
                                                      jointRanges = joint_range,
@@ -79,7 +82,7 @@ while True:
         maxIter = 100
         joint_pos = accurateCalculateInverseKinematics(robot_id, fetch.parts['gripper_link'].body_part_index, [x, y, z], threshold, maxIter)[2:10]
 
-        control_joints(robot_id, finger_joints, [0.04, 0.04])
+        set_joint_positions(robot_id, finger_joints, [0.04, 0.04])
         s.step()
         keys = p.getKeyboardEvents()
         for k, v in keys.items():

From 8a953be284e00edf52459704f6c2df1b2392750e Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Fri, 10 Jan 2020 11:01:42 -0800
Subject: [PATCH 09/58] turtlebot rgbd+lidar+seg obs, pushing obstacles

---
 .../turtlebot_interactive_nav_s2r.yaml        |  14 +-
 gibson2/core/physics/robot_bases.py           |  24 +-
 gibson2/envs/locomotor_env.py                 | 239 +++++++++++++++---
 3 files changed, 229 insertions(+), 48 deletions(-)

diff --git a/examples/configs/turtlebot_interactive_nav_s2r.yaml b/examples/configs/turtlebot_interactive_nav_s2r.yaml
index 632382ffb..bf7990cdd 100644
--- a/examples/configs/turtlebot_interactive_nav_s2r.yaml
+++ b/examples/configs/turtlebot_interactive_nav_s2r.yaml
@@ -4,9 +4,9 @@ scene: building
 model_id: area1
 #model_id: Albertville
 build_graph: true
-load_texture: false
+load_texture: true
 should_load_replaced_objects: false
-should_load_additional_objects: false
+should_load_additional_objects: true
 trav_map_erosion: 2
 
 # robot
@@ -26,7 +26,7 @@ target_pos: [0, 2, 0.0]
 target_orn: [0.0, 0.0, 0.0]
 
 is_discrete: false
-additional_states_dim: 26
+additional_states_dim: 25
 
 # reward
 reward_type: dense
@@ -47,9 +47,13 @@ dist_tol: 0.333  # body width
 max_step: 500
 
 # sensor
-output: [sensor, depth]
-resolution: 80
+output: [sensor, rgbd, scan, seg_pred]
+resolution: 96
+#output: [sensor, rgbd, seg_pred]
+#resolution: 320
 fov: 58
+n_horizontal_rays: 228
+n_vertical_beams: 1
 
 # display
 use_filler: true
diff --git a/gibson2/core/physics/robot_bases.py b/gibson2/core/physics/robot_bases.py
index 47bdb68df..2fdae52bc 100644
--- a/gibson2/core/physics/robot_bases.py
+++ b/gibson2/core/physics/robot_bases.py
@@ -338,14 +338,6 @@ class Joint:
             vx *= self.scale
         return x, vx, trq
 
-    def set_state(self, x, vx):
-        """Set state of joint
-           x is defined in real world scale """
-        if self.joint_type == p.JOINT_PRISMATIC:
-            x /= self.scale
-            vx /= self.scale
-        p.resetJointState(self.bodies[self.body_index], self.joint_index, x, vx)
-
     def get_relative_state(self):
         pos, vel, trq = self.get_state()
 
@@ -391,7 +383,21 @@ class Joint:
                                 force=torque)    # , positionGain=0.1, velocityGain=0.1)
 
     def reset_state(self, pos, vel):
-        self.set_state(pos, vel)
+        """
+        Reset pos and vel of joint
+        """
+        if self.joint_type == p.JOINT_PRISMATIC:
+            pos /= self.scale
+            vel /= self.scale
+
+        # resetJointState does not reset targetVelocity correctly, so we need to also call setJointMotorControl2.
+        p.resetJointState(self.bodies[self.body_index], self.joint_index, targetValue=pos, targetVelocity=vel)
+        p.setJointMotorControl2(bodyIndex=self.bodies[self.body_index],
+                                jointIndex=self.joint_index,
+                                controlMode=p.POSITION_CONTROL,
+                                targetPosition=pos,
+                                targetVelocity=vel,
+                                force=0)
 
     def disable_motor(self):
         p.setJointMotorControl2(self.bodies[self.body_index],
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 48509f647..09319cbfc 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -1,3 +1,6 @@
+from semantic_segmentation_pytorch.models import ModelBuilder
+from semantic_segmentation_pytorch.utils import colorEncode
+import semantic_segmentation_pytorch
 from gibson2.core.physics.interactive_objects import VisualObject, InteractiveObj, BoxShape
 import gibson2
 from gibson2.utils.utils import parse_config, rotate_vector_3d, rotate_vector_2d, l2_distance, quatToXYZW
@@ -19,6 +22,9 @@ import cv2
 import time
 import collections
 import matplotlib.pyplot as plt
+from scipy.io import loadmat
+from PIL import Image
+
 
 # define navigation environments following Anderson, Peter, et al. 'On evaluation of embodied navigation agents.'
 # arXiv preprint arXiv:1807.06757 (2018).
@@ -168,6 +174,14 @@ class NavigateEnv(BaseEnv):
                                                      1),
                                               dtype=np.float32)
             observation_space['depth'] = self.depth_space
+        if 'rgbd' in self.output:
+            self.rgbd_space = gym.spaces.Box(low=0.0,
+                                             high=1.0,
+                                             shape=(self.config.get('resolution', 64),
+                                                    self.config.get('resolution', 64),
+                                                    4),
+                                             dtype=np.float32)
+            observation_space['rgbd'] = self.rgbd_space
         if 'seg' in self.output:
             self.seg_space = gym.spaces.Box(low=0.0,
                                             high=1.0,
@@ -200,6 +214,46 @@ class NavigateEnv(BaseEnv):
             self.comp.load_state_dict(
                 torch.load(os.path.join(gibson2.assets_path, 'networks', 'model.pth')))
             self.comp.eval()
+        if 'seg_pred' in self.output:
+            # torch.cuda.set_device(1)
+            encoder_weights = os.path.join(os.path.dirname(semantic_segmentation_pytorch.__file__),
+                                           # 'ckpt/ade20k-resnet18dilated-ppm_deepsup/encoder_epoch_20.pth')
+                                           'ckpt/ade20k-mobilenetv2dilated-c1_deepsup/encoder_epoch_20.pth')
+
+            self.seg_encoder = ModelBuilder.build_encoder(
+                arch='mobilenetv2dilated',
+                # arch='resnet18dilated',
+                weights=encoder_weights)
+            self.seg_encoder.cuda()
+            self.seg_encoder.eval()
+
+            decoder_weights = os.path.join(os.path.dirname(semantic_segmentation_pytorch.__file__),
+                                           # 'ckpt/ade20k-resnet18dilated-ppm_deepsup/decoder_epoch_20.pth')
+                                           'ckpt/ade20k-mobilenetv2dilated-c1_deepsup/decoder_epoch_20.pth')
+            self.seg_decoder = ModelBuilder.build_decoder(
+                # arch='ppm_deepsup',
+                # fc_dim=512,
+                arch='c1_deepsup',
+                fc_dim=320,
+                num_class=150,
+                weights=decoder_weights,
+                use_softmax=True)
+            self.seg_decoder.cuda()
+            self.seg_decoder.eval()
+            color_path = os.path.join(os.path.dirname(semantic_segmentation_pytorch.__file__), 'data/color150.mat')
+            self.seg_colors = loadmat(color_path)['colors']
+
+            self.seg_normalizer = transforms.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225])
+
+            self.seg_pred_space = gym.spaces.Box(low=0.0,
+                                                 high=1.0,
+                                                 shape=(self.config.get('resolution', 64) // 8,
+                                                        self.config.get('resolution', 64) // 8,
+                                                        320),
+                                                 dtype=np.float32)
+            observation_space['seg_pred'] = self.seg_pred_space
 
         self.observation_space = gym.spaces.Dict(observation_space)
         self.action_space = self.robots[0].action_space
@@ -256,12 +310,15 @@ class NavigateEnv(BaseEnv):
         # if self.num_object_classes is not None:
         #     seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
         # cv2.imshow('rgb', rgb)
-        # cv2.imwrite('test/%d_%d.jpg' % (self.current_episode, self.current_step), (rgb * 255).astype(np.uint8))
-
+        # cv2.imwrite('test_sc/%d_%d.jpg' % (self.current_episode, self.current_step), (rgb * 255).astype(np.uint8))
+        # cv2.imwrite('test_sc/%d_%d_depth.jpg' % (self.current_episode, self.current_step), (depth * 255).astype(np.uint8))
         # cv2.imshow('depth', depth)
         # cv2.imshow('seg', seg)
 
         state = OrderedDict()
+        rgb = None
+        depth = None
+        seg = None
         if 'sensor' in self.output:
             state['sensor'] = sensor_state
         if 'auxiliary_sensor' in self.output:
@@ -269,19 +326,30 @@ class NavigateEnv(BaseEnv):
         if 'pointgoal' in self.output:
             state['pointgoal'] = sensor_state[:2]
         if 'rgb' in self.output:
-            state['rgb'] = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
+            if rgb is None:
+                rgb = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
+            state['rgb'] = rgb
         if 'depth' in self.output:
-            depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
+            if depth is None:
+                depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
             state['depth'] = depth
+        if 'rgbd' in self.output:
+            if rgb is None:
+                rgb = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
+            if depth is None:
+                depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
+            state['rgbd'] = np.concatenate((rgb, depth), axis=2)
         if 'normal' in self.output:
             state['normal'] = self.simulator.renderer.render_robot_cameras(modes='normal')
         if 'seg' in self.output:
-            seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
+            if seg is None:
+                seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
             if self.num_object_classes is not None:
                 seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
             state['seg'] = seg
         if 'depth_seg' in self.output:
-            depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
+            if depth is None:
+                depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
             depth = np.clip(depth / 5.0, 0.0, 1.0)
             seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
             if self.num_object_classes is not None:
@@ -293,17 +361,74 @@ class NavigateEnv(BaseEnv):
                 tensor = transforms.ToTensor()((state['rgb'] * 255).astype(np.uint8)).cuda()
                 rgb_filled = self.comp(tensor[None, :, :, :])[0].permute(1, 2, 0).cpu().numpy()
                 state['rgb_filled'] = rgb_filled
+        if 'seg_pred' in self.output:
+            if rgb is None:
+                rgb = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
+            with torch.no_grad():
+                width = rgb.shape[0]
+                height = int(width * (480.0 / 640.0))
+                half_diff = int((width - height) / 2)
+                rgb_cropped = rgb[half_diff:half_diff + height, :]
+                rgb_cropped = (rgb_cropped * 255).astype(np.uint8)
+                tensor = transforms.ToTensor()(rgb_cropped)
+                img = self.seg_normalizer(tensor).unsqueeze(0).cuda()
+                seg_pred = self.seg_encoder(img)
+                seg_pred = seg_pred[0][0].permute(1, 2, 0).cpu().numpy()
+                state['seg_pred'] = seg_pred
+
+                scores = self.seg_decoder(self.seg_encoder(img, return_feature_maps=True), segSize=(height, width))
+                _, pred = torch.max(scores, dim=1)
+                pred = pred.squeeze(0).cpu().numpy().astype(np.int32)
+                pred_color = colorEncode(pred, self.seg_colors).astype(np.uint8)
+                pred_color = cv2.cvtColor(pred_color, cv2.COLOR_RGB2BGR)
+
+                depth_cropped = depth[half_diff:half_diff + height, :]
+                low = 0.8
+                high = 3.5
+                invalid = depth_cropped == 0.0
+                depth_cropped[depth_cropped < low] = low
+                depth_cropped[depth_cropped > high] = high
+                depth_cropped[invalid] = 0.0
+                depth_cropped /= high
+                depth_cropped = (depth_cropped * 255).astype(np.uint8)
+                depth_cropped = np.tile(depth_cropped, (1, 1, 3))
+
+                rgb_cropped = cv2.cvtColor(rgb_cropped, cv2.COLOR_RGB2BGR)
+
+                vis = np.concatenate((rgb_cropped, depth_cropped, pred_color), axis=1)
+                cv2.imshow('vis', vis)
+                #
+                # # aggregate images and save
+                # im_vis = np.concatenate((rgb_cropped, pred_color), axis=1)
+                # if self.current_step % 10 == 0:
+                #     img_name = 'seg_results_area1/%d_%d.png' % (self.current_episode, self.current_step)
+                #     Image.fromarray(im_vis).save(img_name)
 
         if 'pointgoal' in self.output:
             state['pointgoal'] = sensor_state[:2]
 
         # TODO: figure out why 'scan' consumes so much cpu
         if 'scan' in self.output:
-            laser_linear_range = 25.0
-            laser_angular_range = 220.0
+            if self.config['robot'] == 'Turtlebot':
+                # Hokuyo URG-04LX-UG01
+                laser_linear_range = 5.6
+                laser_angular_range = 240.0
+                min_laser_dist = 0.05
+                laser_link_name = 'scan_link'
+            elif self.config['robot'] == 'Fetch':
+                # SICK TiM571-2050101 Laser Range Finder
+                laser_linear_range = 25.0
+                laser_angular_range = 220.0
+                min_laser_dist = 0.1
+                laser_link_name = 'laser_link'
+            else:
+                assert False, 'unknown robot for LiDAR observation'
+
             laser_angular_half_range = laser_angular_range / 2.0
-            min_laser_dist = 0.1
-            laser_pose = self.robots[0].parts['laser_link'].get_pose()
+            laser_pose = self.robots[0].parts[laser_link_name].get_pose()
+
+            # self.scan_vis.set_position(pos=laser_pose[:3])
+
             transform_matrix = quat2mat([laser_pose[6], laser_pose[3], laser_pose[4], laser_pose[5]])  # [x, y, z, w]
             angle = np.arange(-laser_angular_half_range / 180 * np.pi,
                               laser_angular_half_range / 180 * np.pi,
@@ -593,12 +718,13 @@ class NavigateRandomEnv(NavigateEnv):
         floor, self.initial_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
         max_trials = 100
         dist = 0.0
+        # Need to change to 5 meter if we need to add obstacles
         for _ in range(max_trials):  # if initial and target positions are < 1 meter away from each other, reinitialize
             _, self.target_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
             dist = l2_distance(self.initial_pos, self.target_pos)
-            if dist > 1.0:
+            if dist > 5.0:
                 break
-        if dist < 1.0:
+        if dist < 5.0:
             raise Exception("Failed to find initial and target pos that are >1m apart")
         self.robots[0].set_position(pos=[self.initial_pos[0],
                                          self.initial_pos[1],
@@ -854,10 +980,19 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
         resolution = self.config.get('resolution', 64)
         width = resolution
         height = int(width * (480.0 / 640.0))
-        self.observation_space.spaces['depth'] = gym.spaces.Box(low=-np.inf,
-                                                                high=np.inf,
-                                                                shape=(height, width, 1),
-                                                                dtype=np.float32)
+        if 'rgbd' in self.output:
+            self.observation_space.spaces['rgbd'] = gym.spaces.Box(low=0.0,
+                                                                   high=1.0,
+                                                                   shape=(height, width, 4),
+                                                                   dtype=np.float32)
+        if 'seg_pred' in self.output:
+            self.observation_space.spaces['seg_pred'] = gym.spaces.Box(low=-np.inf,
+                                                                       high=np.inf,
+                                                                       shape=(height // 8, width // 8, 320),
+                                                                       dtype=np.float32)
+        # self.scan_vis = VisualObject(rgba_color=[1, 0, 0, 1.0], radius=0.05)
+        # self.scan_vis.load()
+
         self.visualize_waypoints = True
         if self.visualize_waypoints and self.mode == 'gui':
             cyl_length = 0.2
@@ -1003,24 +1138,35 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
 
     def get_state(self, collision_links=[]):
         state = super(InteractiveGibsonNavigateSim2RealEnv, self).get_state(collision_links)
-        depth = state['depth']
-        width = depth.shape[0]
-        height = int(width * (480.0 / 640.0))
-        half_diff = int((width - height) / 2)
-        depth = depth[half_diff:half_diff+height, :]
+        if 'rgbd' in state:
+            rgbd = state['rgbd']
+            width = rgbd.shape[0]
+            height = int(width * (480.0 / 640.0))
+            half_diff = int((width - height) / 2)
+            rgbd = rgbd[half_diff:half_diff+height, :]
 
-        # depth[depth < 0.6] = -1.0
-        # depth[depth > 4.0] = -1.0
+            if self.config['robot'] == 'Turtlebot':
+                # ASUS Xtion PRO LIVE
+                low = 0.8
+                high = 3.5
+            elif self.config['robot'] == 'Fetch':
+                # Primesense Carmine 1.09 short-range RGBD sensor
+                low = 0.35
+                high = 1.4
+            else:
+                assert False, 'unknown robot for RGBD observation'
 
-        invalid = depth == 0.0
-        depth[depth < 0.35] = 0.35
-        depth[depth > 5.0] = 5.0
-        depth[invalid] = 0.0
-        depth /= 5.0
-        state['depth'] = depth
+            depth = rgbd[:, :, 3]
+            invalid = depth == 0.0
+            depth[depth < low] = low
+            depth[depth > high] = high
+            depth[invalid] = 0.0
+            depth /= high
+            rgbd[:, :, 3] = depth
 
+            state['rgbd'] = rgbd
         # cv2.imshow('depth', state['depth'])
-        # cv2.imshow('scan', state['scan'] * 10.0)
+        # cv2.imshow('scan', state['scan'])
 
         return state
 
@@ -1030,7 +1176,13 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
     def reset_additional_objects(self):
         for obj in self.additional_objects:
             while True:
-                _, pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
+                # _, pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
+                pos_without_z = self.shortest_path[np.random.randint(self.shortest_path.shape[0])]
+                pos = [
+                    pos_without_z[0] + np.random.uniform(-0.2, 0.2),
+                    pos_without_z[1] + np.random.uniform(-0.2, 0.2),
+                    self.scene.get_floor_height(self.floor_num)
+                ]
                 obj.set_position_rotation([pos[0], pos[1], pos[2] + self.random_init_z_offset],
                                           quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz'))
                 has_collision = False
@@ -1051,7 +1203,7 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
         self.floor_num = self.scene.get_random_floor()
         self.scene.reset_floor(floor=self.floor_num, additional_elevation=0.05)
         self.reset_replaced_objects()
-        self.reset_additional_objects()
+        # self.reset_additional_objects()
         self.new_potential = None
         # if len(self.linear_vel) > 0:
         #     print('linear', np.mean(self.linear_vel), np.max(self.linear_vel))
@@ -1074,6 +1226,7 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
         target = self.target_pos[:2]
         shortest_path, _ = self.scene.get_shortest_path(self.floor_num, source, target, entire_path=True)
         self.shortest_path = shortest_path
+        self.reset_additional_objects()
         # robot_z = self.robots[0].get_position()[2]
         # if self.visualize_waypoints and self.mode == 'gui':
         #     for i in range(1000):
@@ -1674,12 +1827,16 @@ if __name__ == '__main__':
     #     p.addUserDebugParameter('link5', -1.0, 1.0, 0.0),
     # ]
 
+    step_time_list = []
     for episode in range(100):
         print('Episode: {}'.format(episode))
         start = time.time()
         nav_env.reset()
-        for step in range(500):  # 500 steps, 50s world time
+        for step in range(1000):  # 500 steps, 50s world time
             action = nav_env.action_space.sample()
+            # action[:] = 1.0
+            action[0] = -0.4666666666666666
+            action[1] = -0.2
             # action[0] = 2.0 / 3.0
             # action[1] = -1.0
             # action[:] = 1.0
@@ -1694,8 +1851,22 @@ if __name__ == '__main__':
             # action[2:] = np.array(debug_param_values)
 
             state, reward, done, _ = nav_env.step(action)
+            print(state.keys())
+            print(state['sensor'])
+            # embed()
+            # print('reward', reward)
             if done:
                 print('Episode finished after {} timesteps'.format(step + 1))
                 break
-        print(time.time() - start)
+        episode_time = time.time() - start
+        step_time = episode_time / 100.0
+        fps = 1.0 / step_time
+        print('episode_time', episode_time)
+        print('step_time', step_time)
+        print('fps', fps)
+        if episode != 0:
+            step_time_list.append(step_time)
+
+    print('avg_step_time', np.mean(step_time_list))
+    print('avg_fps', 1.0 / np.mean(step_time_list))
     nav_env.clean()

From f9ba2b6b26dbcf6c456fe96529faa22b745f5fde Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Fri, 10 Jan 2020 15:01:53 -0800
Subject: [PATCH 10/58] update to offset

---
 gibson2/envs/motion_planner_env.py | 79 ++++++++++++++++--------------
 1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 00f56a9c5..fb4ec237a 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -208,52 +208,47 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return path
 
     def step(self, pt):
-        # point = [x,y]
-        x = int((pt[0]+1)/2.0 * 150)
-        y = int((pt[1]+1)/2.0 * 128)
+        dx = pt[0] * 0.5
+        dy = pt[1] * 0.5
         yaw = self.robots[0].get_rpy()[2]
         orn = pt[2] * np.pi + yaw
-
         armx = int((pt[3]+1)/2.0 * 128)
         army = int((pt[4]+1)/2.0 * 128)
-
         opos = get_base_values(self.robot_id)
-
         self.get_additional_states()
         org_potential = self.get_potential()
-
         state, _, done, _ = super(MotionPlanningBaseArmEnv, self).step(None)
         points = state['pc']
 
-        if x < 128:
-
-            point = points[x, y]
-
-            camera_pose = (self.robots[0].parts['eyes'].get_pose())
-            transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
-                                         quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
-
-            projected_point = (transform_mat).dot(np.array([-point[2], -point[0], point[1], 1]))
-
-            subgoal = projected_point[:2]
-        else:
-            subgoal = list(opos)[:2]
-
-        path = self.plan_base_motion(subgoal[0], subgoal[1], orn)
-        if path is not None:
-            self.marker.set_position([subgoal[0], subgoal[1], 0.1])
-            bq = path[-1]
-            set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
-            state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
-            self.get_additional_states()
-
-        else:
-            set_base_values(self.robot_id, opos)
-            state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
-            reward = -0.02
-
-            del state['pc']
-            return state, reward, done, info
+        # if x < 128:
+        #
+        #     point = points[x, y]
+        #
+        #     camera_pose = (self.robots[0].parts['eyes'].get_pose())
+        #     transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
+        #                                  quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
+        #
+        #     projected_point = (transform_mat).dot(np.array([-point[2], -point[0], point[1], 1]))
+        #
+        #     subgoal = projected_point[:2]
+        # else:
+        #     subgoal = list(opos)[:2]
+        #
+        # path = self.plan_base_motion(subgoal[0], subgoal[1], orn)
+        # if path is not None:
+        #     self.marker.set_position([subgoal[0], subgoal[1], 0.1])
+        #     bq = path[-1]
+        #     set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
+        #     state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
+        #     self.get_additional_states()
+        #
+        # else:
+        #     set_base_values(self.robot_id, opos)
+        #     state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
+        #     reward = -0.02
+        #
+        #     del state['pc']
+        #     return state, reward, done, info
 
         point = points[armx, army]
         camera_pose = (self.robots[0].parts['eyes'].get_pose())
@@ -261,8 +256,18 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                         quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
         projected_point2 = (transform_mat).dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
 
-        projected_point2[2] += 1 # 1m from floor
+        subgoal = [projected_point2[0] + dx, projected_point2[1] + dy]
+        path = self.plan_base_motion(subgoal[0], subgoal[1], orn)
+        if path is not None:
+            self.marker.set_position([subgoal[0], subgoal[1], 0.1])
+            bq = path[-1]
+            set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
+            state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
+            self.get_additional_states()
+        else:
+            set_base_values(self.robot_id, opos)
 
+        self.marker.set_position([subgoal[0], subgoal[1], 0.1])
         self.marker2.set_position(projected_point2)
 
         robot_id = self.robot_id

From 9fd6552587164937dbed4ad1adb7f1d173168ebc Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Tue, 14 Jan 2020 13:48:55 -0800
Subject: [PATCH 11/58] fetch reaching task with motion planner

---
 .../configs/fetch_interactive_nav_s2r_mp.yaml |  24 +-
 gibson2/core/physics/scene.py                 |  27 +-
 gibson2/envs/locomotor_env.py                 | 111 +++--
 gibson2/envs/motion_planner_env.py            | 444 +++++++++++-------
 4 files changed, 365 insertions(+), 241 deletions(-)

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp.yaml b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
index c5a874a63..fff39dfc8 100644
--- a/examples/configs/fetch_interactive_nav_s2r_mp.yaml
+++ b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
@@ -1,6 +1,11 @@
 # scene
 scene: building
 model_id: Avonia
+build_graph: true
+load_texture: true
+should_load_replaced_objects: false
+should_load_additional_objects: false
+trav_map_erosion: 3
 
 # robot
 robot: Fetch
@@ -17,29 +22,32 @@ target_pos: [1.0, 1.0, 0.0]
 target_orn: [0.0, 0.0, 0.0]
 
 is_discrete: false
-additional_states_dim: 6
+additional_states_dim: 23
 
 # reward
+reward_type: dense
 success_reward: 10.0
-slack_reward: -0.01
-potential_reward_weight: 30.0
-#electricity_reward_weight: -0.02
-#stall_torque_reward_weight: -0.0001
+slack_reward: 0.0
+potential_reward_weight: 1.0
 electricity_reward_weight: 0.0
 stall_torque_reward_weight: 0.0
-collision_reward_weight: 0.0
+collision_reward_weight: -0.0
+collision_ignore_link_a_ids: [0, 1, 2]  # ignore collision with these agent's link ids
 
 # discount factor
 discount_factor: 0.99
 
 # termination condition
-dist_tol: 0.2
-max_step: 500
+death_z_thresh: 0.2
+dist_tol: 0.5  # body width
+max_step: 20
 
 # sensor
 output: [sensor, rgb, depth, pc]
 resolution: 128
 fov: 120
+n_horizontal_rays: 220
+n_vertical_beams: 1
 
 # display
 use_filler: true
diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py
index ca041b7d4..dc9d56ccc 100644
--- a/gibson2/core/physics/scene.py
+++ b/gibson2/core/physics/scene.py
@@ -174,17 +174,23 @@ class BuildingScene(Scene):
                 self.floors = sorted(list(map(float, f.readlines())))
                 print('floors', self.floors)
             for f in range(len(self.floors)):
-                trav_map = Image.open(os.path.join(get_model_path(self.model_id), 'floor_trav_{}.png'.format(f)))
-                # obstacle_map = Image.open(os.path.join(get_model_path(self.model_id), 'floor_{}.png'.format(f)))
+                trav_map = np.array(Image.open(
+                    os.path.join(get_model_path(self.model_id), 'floor_trav_{}.png'.format(f))
+                ))
+                obstacle_map = np.array(Image.open(
+                    os.path.join(get_model_path(self.model_id), 'floor_{}.png'.format(f))
+                ))
                 if self.trav_map_original_size is None:
-                    width, height = trav_map.size
-                    assert width == height, 'trav map is not a square'
+                    height, width = trav_map.shape
+                    assert height == width, 'trav map is not a square'
                     self.trav_map_original_size = height
-                    self.trav_map_size = int(self.trav_map_original_size * self.trav_map_default_resolution / self.trav_map_resolution)
-                trav_map = np.array(trav_map.resize((self.trav_map_size, self.trav_map_size)))
-                # obstacle_map = np.array(obstacle_map.resize((self.trav_map_size, self.trav_map_size)))
-                # trav_map[obstacle_map == 0] = 0
+                    self.trav_map_size = int(self.trav_map_original_size *
+                                             self.trav_map_default_resolution /
+                                             self.trav_map_resolution)
+                trav_map[obstacle_map == 0] = 0
+                trav_map = cv2.resize(trav_map, (self.trav_map_size, self.trav_map_size))
                 trav_map = cv2.erode(trav_map, np.ones((self.trav_map_erosion, self.trav_map_erosion)))
+                trav_map[trav_map < 255] = 0
 
                 if self.build_graph:
                     g = nx.Graph()
@@ -238,6 +244,11 @@ class BuildingScene(Scene):
     def world_to_map(self, xy):
         return np.flip((xy / self.trav_map_resolution + self.trav_map_size / 2.0)).astype(np.int)
 
+    def has_node(self, floor, world_xy):
+        map_xy = tuple(self.world_to_map(xy))
+        g = self.floor_graph[floor]
+        return g.has_node(map_xy)
+
     def get_shortest_path(self, floor, source_world, target_world, entire_path=False):
         # print("called shortest path", source_world, target_world)
         assert self.build_graph, 'cannot get shortest path without building the graph'
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index f85933d9a..8bd926053 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -2,7 +2,6 @@ from gibson2.core.physics.interactive_objects import VisualMarker, InteractiveOb
 from semantic_segmentation_pytorch.models import ModelBuilder
 from semantic_segmentation_pytorch.utils import colorEncode
 import semantic_segmentation_pytorch
-from gibson2.core.physics.interactive_objects import VisualObject, InteractiveObj, BoxShape
 import gibson2
 from gibson2.utils.utils import parse_config, rotate_vector_3d, rotate_vector_2d, l2_distance, quatToXYZW
 from gibson2.envs.base_env import BaseEnv
@@ -568,56 +567,56 @@ class NavigateEnv(BaseEnv):
         #     done = True
         #     info['success'] = False
 
-        if done:
-            info['episode_length'] = self.current_step
-            info['path_length'] = self.path_length
-            info['collision_step'] = self.collision_step
-            info['energy_cost'] = self.energy_cost
-            info['stage'] = self.stage
-
-            # Episode = collections.namedtuple('Episode',
-            #                                  ['initial_pos',
-            #                                   'target_pos',
-            #                                   'geodesic_distance',
-            #                                   'shortest_path',
-            #                                   'trajectory',
-            #                                   'success',
-            #                                   'path_efficiency',
-            #                                   'kinematic_disturbance',
-            #                                   'dynamic_disturbance_a',
-            #                                   'dynamic_disturbance_b',
-            #                                   'collision_step',
-            #                                   ])
-            # shortest_path, geodesic_distance = self.scene.get_shortest_path(self.floor_num,
-            #                                                                 self.initial_pos[:2],
-            #                                                                 self.target_pos[:2],
-            #                                                                 entire_path=True)
-            # floor_height = self.scene.get_floor_height(self.floor_num)
-            # shortest_path = np.array([np.array([path[0], path[1], floor_height]) for path in shortest_path])
-            # min_kin_dist = self.path_length * self.robots[0].robot_mass
-            # kinematic_disturbance = min_kin_dist / (min_kin_dist + self.kinematic_disturbance)
-            # min_dyn_dist = self.current_step * self.robots[0].robot_mass * 9.8
-            # dynamic_disturbance_a = min_dyn_dist / (min_dyn_dist + self.dynamic_disturbance_a)
-            # dynamic_disturbance_b = self.current_step / float(self.current_step + self.dynamic_disturbance_b)
-            # object_files = [obj.filename for obj in self.interactive_objects]
-            # episode = Episode(
-                # env=self.scene.model_id,
-                # agent=self.robots[0].model_file,
-                # initial_pos=self.initial_pos,
-                # target_pos=self.target_pos,
-                # geodesic_distance=geodesic_distance,
-                # shortest_path=shortest_path,
-                # agent_trajectory=np.array(self.agent_trajectory),
-                # object_files=object_files,
-                # object_trajectory=np.array(self.object_trajectory),
-                # success=float(info['success']),
-                # path_efficiency=min(1.0, geodesic_distance / self.path_length),
-                # kinematic_disturbance=kinematic_disturbance,
-                # dynamic_disturbance_a=dynamic_disturbance_a,
-                # dynamic_disturbance_b=dynamic_disturbance_b,
-                # collision_step=self.collision_step,
-            # )
-            # self.stored_episodes.append(episode)
+        # if done:
+        #     info['episode_length'] = self.current_step
+        #     info['path_length'] = self.path_length
+        #     info['collision_step'] = self.collision_step
+        #     info['energy_cost'] = self.energy_cost
+        #     info['stage'] = self.stage
+        #
+        #     Episode = collections.namedtuple('Episode',
+        #                                      ['initial_pos',
+        #                                       'target_pos',
+        #                                       'geodesic_distance',
+        #                                       'shortest_path',
+        #                                       'trajectory',
+        #                                       'success',
+        #                                       'path_efficiency',
+        #                                       'kinematic_disturbance',
+        #                                       'dynamic_disturbance_a',
+        #                                       'dynamic_disturbance_b',
+        #                                       'collision_step',
+        #                                       ])
+        #     shortest_path, geodesic_distance = self.scene.get_shortest_path(self.floor_num,
+        #                                                                     self.initial_pos[:2],
+        #                                                                     self.target_pos[:2],
+        #                                                                     entire_path=True)
+        #     floor_height = self.scene.get_floor_height(self.floor_num)
+        #     shortest_path = np.array([np.array([path[0], path[1], floor_height]) for path in shortest_path])
+        #     min_kin_dist = self.path_length * self.robots[0].robot_mass
+        #     kinematic_disturbance = min_kin_dist / (min_kin_dist + self.kinematic_disturbance)
+        #     min_dyn_dist = self.current_step * self.robots[0].robot_mass * 9.8
+        #     dynamic_disturbance_a = min_dyn_dist / (min_dyn_dist + self.dynamic_disturbance_a)
+        #     dynamic_disturbance_b = self.current_step / float(self.current_step + self.dynamic_disturbance_b)
+        #     object_files = [obj.filename for obj in self.interactive_objects]
+        #     episode = Episode(
+        #         env=self.scene.model_id,
+        #         agent=self.robots[0].model_file,
+        #         initial_pos=self.initial_pos,
+        #         target_pos=self.target_pos,
+        #         geodesic_distance=geodesic_distance,
+        #         shortest_path=shortest_path,
+        #         agent_trajectory=np.array(self.agent_trajectory),
+        #         object_files=object_files,
+        #         object_trajectory=np.array(self.object_trajectory),
+        #         success=float(info['success']),
+        #         path_efficiency=min(1.0, geodesic_distance / self.path_length),
+        #         kinematic_disturbance=kinematic_disturbance,
+        #         dynamic_disturbance_a=dynamic_disturbance_a,
+        #         dynamic_disturbance_b=dynamic_disturbance_b,
+        #         collision_step=self.collision_step,
+        #     )
+        #     self.stored_episodes.append(episode)
 
         return done, info
 
@@ -739,9 +738,9 @@ class NavigateRandomEnv(NavigateEnv):
         for _ in range(max_trials):  # if initial and target positions are < 1 meter away from each other, reinitialize
             _, self.target_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
             dist = l2_distance(self.initial_pos, self.target_pos)
-            if dist > 5.0:
+            if dist > 1.0:
                 break
-        if dist < 5.0:
+        if dist < 1.0:
             raise Exception("Failed to find initial and target pos that are >1m apart")
         self.robots[0].set_position(pos=[self.initial_pos[0],
                                          self.initial_pos[1],
@@ -993,7 +992,7 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
         # self.linear_vel = []
         # self.angular_vel = []
 
-        # self.eyes_vis = VisualObject(rgba_color=[1, 0, 0, 1.0], radius=0.03)
+        # self.eyes_vis = VisualMarker(rgba_color=[1, 0, 0, 1.0], radius=0.03)
         # self.eyes_vis.load()
         resolution = self.config.get('resolution', 64)
         width = resolution
@@ -1008,13 +1007,13 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
                                                                        high=np.inf,
                                                                        shape=(height // 8, width // 8, 320),
                                                                        dtype=np.float32)
-        # self.scan_vis = VisualObject(rgba_color=[1, 0, 0, 1.0], radius=0.05)
+        # self.scan_vis = VisualMarker(rgba_color=[1, 0, 0, 1.0], radius=0.05)
         # self.scan_vis.load()
 
         self.visualize_waypoints = True
         if self.visualize_waypoints and self.mode == 'gui':
             cyl_length = 0.2
-            self.waypoints_vis = [VisualObject(visual_shape=p.GEOM_CYLINDER,
+            self.waypoints_vis = [VisualMarker(visual_shape=p.GEOM_CYLINDER,
                                                rgba_color=[0, 1, 0, 0.3],
                                                radius=0.1,
                                                length=cyl_length,
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index fb4ec237a..7251faa99 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -26,6 +26,7 @@ from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_na
     get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, \
     set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values
 
+
 class MotionPlanningEnv(NavigateRandomEnv):
     def __init__(self,
                  config_file,
@@ -38,46 +39,50 @@ class MotionPlanningEnv(NavigateRandomEnv):
                  automatic_reset=False,
                  ):
         super(MotionPlanningEnv, self).__init__(config_file,
-                                                           model_id=model_id,
-                                                           mode=mode,
-                                                           action_timestep=action_timestep,
-                                                           physics_timestep=physics_timestep,
-                                                           automatic_reset=automatic_reset,
-                                                           random_height=False,
-                                                           device_idx=device_idx)
+                                                model_id=model_id,
+                                                mode=mode,
+                                                action_timestep=action_timestep,
+                                                physics_timestep=physics_timestep,
+                                                automatic_reset=automatic_reset,
+                                                random_height=False,
+                                                device_idx=device_idx)
 
         self.mp_loaded = False
         # override some parameters:
         self.max_step = 20
         self.planner_step = 0
         self.action_space = gym.spaces.Box(shape=(3,),
-                                           low = -1.0,
+                                           low=-1.0,
                                            high=1.0,
                                            dtype=np.float32)
 
-
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
         self.mesh_id = self.scene.mesh_body_id
         self.map_size = self.scene.trav_map_original_size * self.scene.trav_map_default_resolution
         print(self.robot_id, self.mesh_id, self.map_size)
         self.marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
-                              rgba_color=[1, 0, 0, 1],
-                              radius=0.1,
-                              length=0.1,
-                              initial_offset=[0, 0, 0.1 / 2.0])
+                                   rgba_color=[1, 0, 0, 1],
+                                   radius=0.1,
+                                   length=0.1,
+                                   initial_offset=[0, 0, 0.1 / 2.0])
         self.marker.load()
         self.mp_loaded = True
 
-    def plan_base_motion(self, x,y,theta):
+    def plan_base_motion(self, x, y, theta):
         half_size = self.map_size / 2.0
-        path = plan_base_motion(self.robot_id, [x,y,theta], ((-half_size, -half_size), (half_size, half_size)), obstacles=[self.mesh_id])
+        path = plan_base_motion(
+            self.robot_id,
+            [x, y, theta],
+            ((-half_size, -half_size), (half_size, half_size)),
+            obstacles=[self.mesh_id]
+        )
         return path
 
     def step(self, pt):
         # point = [x,y]
-        x = int((pt[0]+1)/2.0 * 150)
-        y = int((pt[1]+1)/2.0 * 128)
+        x = int((pt[0] + 1) / 2.0 * 150)
+        y = int((pt[1] + 1) / 2.0 * 128)
         yaw = self.robots[0].get_rpy()[2]
         orn = pt[2] * np.pi + yaw
 
@@ -94,7 +99,7 @@ class MotionPlanningEnv(NavigateRandomEnv):
 
             camera_pose = (self.robots[0].parts['eyes'].get_pose())
             transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
-                                         quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
+                                            quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
 
             projected_point = (transform_mat).dot(np.array([-point[2], -point[0], point[1], 1]))
 
@@ -106,10 +111,10 @@ class MotionPlanningEnv(NavigateRandomEnv):
         if path is not None:
             self.marker.set_position([subgoal[0], subgoal[1], 0.1])
             bq = path[-1]
-            #for bq in path:
+            # for bq in path:
             set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
             state, _, done, info = super(MotionPlanningEnv, self).step([0, 0])
-            #time.sleep(0.05)
+            # time.sleep(0.05)
             self.get_additional_states()
             reward = org_potential - self.get_potential()
         else:
@@ -131,8 +136,8 @@ class MotionPlanningEnv(NavigateRandomEnv):
 
         if self.planner_step > self.max_step:
             done = True
-        #print(info)
-        #if info['success']:
+        # print(info)
+        # if info['success']:
         #    done = True
         info['planner_step'] = self.planner_step
         del state['pc']
@@ -163,179 +168,281 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                  automatic_reset=False,
                  ):
         super(MotionPlanningBaseArmEnv, self).__init__(config_file,
-                                                           model_id=model_id,
-                                                           mode=mode,
-                                                           action_timestep=action_timestep,
-                                                           physics_timestep=physics_timestep,
-                                                           automatic_reset=automatic_reset,
-                                                           random_height=False,
-                                                           device_idx=device_idx)
+                                                       model_id=model_id,
+                                                       mode=mode,
+                                                       action_timestep=action_timestep,
+                                                       physics_timestep=physics_timestep,
+                                                       automatic_reset=automatic_reset,
+                                                       random_height=False,
+                                                       device_idx=device_idx)
+        resolution = self.config.get('resolution', 64)
+        # width = resolution
+        # height = int(width * (480.0 / 640.0))
+        # if 'rgb' in self.output:
+        #     self.observation_space.spaces['rgb'] = gym.spaces.Box(low=0.0,
+        #                                                           high=1.0,
+        #                                                           shape=(height, width, 3),
+        #                                                           dtype=np.float32)
+        # if 'depth' in self.output:
+        #     self.observation_space.spaces['depth'] = gym.spaces.Box(low=0.0,
+        #                                                             high=1.0,
+        #                                                             shape=(height, width, 1),
+        #                                                             dtype=np.float32)
 
-        self.mp_loaded = False
-        # override some parameters:
-        self.max_step = 20
-        self.planner_step = 0
+        self.visualize_waypoints = True
+        if self.visualize_waypoints and self.mode == 'gui':
+            cyl_length = 0.2
+            self.waypoints_vis = [VisualMarker(visual_shape=p.GEOM_CYLINDER,
+                                               rgba_color=[0, 1, 0, 0.3],
+                                               radius=0.1,
+                                               length=cyl_length,
+                                               initial_offset=[0, 0, cyl_length / 2.0]) for _ in range(1000)]
+            for waypoint in self.waypoints_vis:
+                waypoint.load()
+
+        self.new_potential = None
+        self.collision_reward_weight = collision_reward_weight
+        # action[0] = arm_img_u
+        # action[1] = arm_img_v
+        # action[2] = base_dx
+        # action[3] = base_dy
+        # action[4] = base_dtheta
         self.action_space = gym.spaces.Box(shape=(5,),
-                                           low = -1.0,
+                                           low=-1.0,
                                            high=1.0,
                                            dtype=np.float32)
-
+        self.prepare_motion_planner()
 
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
         self.mesh_id = self.scene.mesh_body_id
         self.map_size = self.scene.trav_map_original_size * self.scene.trav_map_default_resolution
-        print(self.robot_id, self.mesh_id, self.map_size)
-        self.marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
-                              rgba_color=[1, 0, 0, 1],
-                              radius=0.1,
-                              length=0.1,
-                              initial_offset=[0, 0, 0.1 / 2.0])
-        self.marker.load()
+        # print(self.robot_id, self.mesh_id, self.map_size)
+        self.base_marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
+                                        rgba_color=[1, 0, 0, 1],
+                                        radius=0.1,
+                                        length=0.1,
+                                        initial_offset=[0, 0, 0.1 / 2.0])
+        self.base_marker.load()
 
-        self.marker2 = VisualMarker(visual_shape=p.GEOM_CYLINDER,
-                                   rgba_color=[1, 1, 0, 1],
-                                   radius=0.1,
-                                   length=0.1,
-                                   initial_offset=[0, 0, 0.1 / 2.0])
-        self.marker2.load()
+        self.arm_marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
+                                       rgba_color=[1, 1, 0, 1],
+                                       radius=0.1,
+                                       length=0.1,
+                                       initial_offset=[0, 0, 0.1 / 2.0])
+        self.arm_marker.load()
 
-        self.mp_loaded = True
-
-    def plan_base_motion(self, x,y,theta):
+    def plan_base_motion(self, x, y, theta):
         half_size = self.map_size / 2.0
-        path = plan_base_motion(self.robot_id, [x,y,theta], ((-half_size, -half_size), (half_size, half_size)), obstacles=[self.mesh_id])
+        path = plan_base_motion(self.robot_id, [x, y, theta], ((-half_size, -half_size), (half_size, half_size)),
+                                obstacles=[self.mesh_id])
         return path
 
-    def step(self, pt):
-        dx = pt[0] * 0.5
-        dy = pt[1] * 0.5
-        yaw = self.robots[0].get_rpy()[2]
-        orn = pt[2] * np.pi + yaw
-        armx = int((pt[3]+1)/2.0 * 128)
-        army = int((pt[4]+1)/2.0 * 128)
-        opos = get_base_values(self.robot_id)
-        self.get_additional_states()
-        org_potential = self.get_potential()
-        state, _, done, _ = super(MotionPlanningBaseArmEnv, self).step(None)
+    def global_to_local(self, pos, cur_pos, cur_rot):
+        return rotate_vector_3d(pos - cur_pos, *cur_rot)
+
+    def get_additional_states(self):
+        pos_noise = 0.0
+        cur_pos = self.robots[0].get_position()
+        cur_pos[:2] += np.random.normal(0, pos_noise, 2)
+
+        rot_noise = 0.0 / 180.0 * np.pi
+        cur_rot = self.robots[0].get_rpy()
+        cur_rot = (cur_rot[0], cur_rot[1], cur_rot[2] + np.random.normal(0, rot_noise))
+
+        target_pos_local = self.global_to_local(self.target_pos, cur_pos, cur_rot)
+        # linear_velocity_local = rotate_vector_3d(self.robots[0].robot_body.velocity(), *cur_rot)[:2]
+        # angular_velocity_local = rotate_vector_3d(self.robots[0].robot_body.angular_velocity(), *cur_rot)[2:3]
+
+        gt_pos = self.robots[0].get_position()[:2]
+        source = gt_pos
+        target = self.target_pos[:2]
+        _, geodesic_dist = self.scene.get_shortest_path(self.floor_num, source, target)
+        # geodesic_dist = 0.0
+        robot_z = self.robots[0].get_position()[2]
+        if self.visualize_waypoints and self.mode == 'gui':
+            for i in range(1000):
+                self.waypoints_vis[i].set_position(pos=np.array([0.0, 0.0, 0.0]))
+            for i in range(min(1000, self.shortest_path.shape[0])):
+                self.waypoints_vis[i].set_position(pos=np.array([self.shortest_path[i][0],
+                                                                 self.shortest_path[i][1],
+                                                                 robot_z]))
+
+        closest_idx = np.argmin(np.linalg.norm(cur_pos[:2] - self.shortest_path, axis=1))
+        # approximate geodesic_dist to speed up training
+        # geodesic_dist = np.sum(
+        #     np.linalg.norm(self.shortest_path[closest_idx:-1] - self.shortest_path[closest_idx + 1:], axis=1)
+        # )
+        shortest_path = self.shortest_path[closest_idx:closest_idx + self.scene.num_waypoints]
+        num_remaining_waypoints = self.scene.num_waypoints - shortest_path.shape[0]
+        if num_remaining_waypoints > 0:
+            remaining_waypoints = np.tile(self.target_pos[:2], (num_remaining_waypoints, 1))
+            shortest_path = np.concatenate((shortest_path, remaining_waypoints), axis=0)
+
+        shortest_path = np.concatenate((shortest_path, robot_z * np.ones((shortest_path.shape[0], 1))), axis=1)
+
+        waypoints_local_xy = np.array([self.global_to_local(waypoint, cur_pos, cur_rot)[:2]
+                                       for waypoint in shortest_path]).flatten()
+        additional_states = np.concatenate((waypoints_local_xy,
+                                            target_pos_local))
+                                            # linear_velocity_local,
+                                            # angular_velocity_local))
+        # cache results for reward calculation
+        self.new_potential = geodesic_dist
+        assert len(additional_states) == self.additional_states_dim, 'additional states dimension mismatch'
+        return additional_states
+
+    def get_state(self, collision_links=[]):
+        state = super(MotionPlanningBaseArmEnv, self).get_state(collision_links)
+        for modality in ['depth', 'pc']:
+            if modality in state:
+                img = state[modality]
+                # width = img.shape[0]
+                # height = int(width * (480.0 / 640.0))
+                # half_diff = int((width - height) / 2)
+                # img = img[half_diff:half_diff+height, :]
+                if modality == 'depth':
+                    high = 25.0
+                    img[img > high] = high
+                    img /= high
+                state[modality] = img
+
+        # cv2.imshow('depth', state['depth'])
+        # cv2.imshow('scan', state['scan'])
+
+        return state
+
+    def get_potential(self):
+        return self.new_potential
+
+    def after_reset_agent(self):
+        source = self.robots[0].get_position()[:2]
+        target = self.target_pos[:2]
+        shortest_path, geodesic_dist = self.scene.get_shortest_path(self.floor_num, source, target, entire_path=True)
+        self.shortest_path = shortest_path
+        self.new_potential = geodesic_dist
+
+    def step(self, action):
+        self.current_step += 1
+
+        state = self.get_state()
         points = state['pc']
+        height, width = points.shape[0:2]
 
-        # if x < 128:
-        #
-        #     point = points[x, y]
-        #
-        #     camera_pose = (self.robots[0].parts['eyes'].get_pose())
-        #     transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
-        #                                  quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
-        #
-        #     projected_point = (transform_mat).dot(np.array([-point[2], -point[0], point[1], 1]))
-        #
-        #     subgoal = projected_point[:2]
-        # else:
-        #     subgoal = list(opos)[:2]
-        #
-        # path = self.plan_base_motion(subgoal[0], subgoal[1], orn)
-        # if path is not None:
-        #     self.marker.set_position([subgoal[0], subgoal[1], 0.1])
-        #     bq = path[-1]
-        #     set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
-        #     state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
-        #     self.get_additional_states()
-        #
-        # else:
-        #     set_base_values(self.robot_id, opos)
-        #     state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
-        #     reward = -0.02
-        #
-        #     del state['pc']
-        #     return state, reward, done, info
+        arm_img_u = int((action[0] + 1) / 2.0 * height)
+        arm_img_v = int((action[1] + 1) / 2.0 * width)
 
-        point = points[armx, army]
+        # original_pos = get_base_values(self.robot_id)
+        point = points[arm_img_u, arm_img_v]
         camera_pose = (self.robots[0].parts['eyes'].get_pose())
         transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
                                         quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
-        projected_point2 = (transform_mat).dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
+        arm_subgoal = transform_mat.dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
 
-        subgoal = [projected_point2[0] + dx, projected_point2[1] + dy]
-        path = self.plan_base_motion(subgoal[0], subgoal[1], orn)
-        if path is not None:
-            self.marker.set_position([subgoal[0], subgoal[1], 0.1])
-            bq = path[-1]
-            set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
-            state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
-            self.get_additional_states()
+        dx = action[2] * 0.5
+        dy = action[3] * 0.5
+        yaw = self.robots[0].get_rpy()[2]
+        dtheta = action[4] * np.pi
+        base_subgoal_orn = dtheta + yaw
+
+        base_subgoal_pos = np.array([arm_subgoal[0] + dx, arm_subgoal[1] + dy, self.robots[0].get_position()[2]])
+        self.arm_marker.set_position(arm_subgoal)
+        self.base_marker.set_position(base_subgoal_pos)
+
+        # embed()
+        # print('before plan_base_motion')
+        # path = self.plan_base_motion(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
+        # print('after plan_base_motion')
+        # embed()
+        # print('base mp finished -------------------')
+
+        is_base_subgoal_valid = self.scene.has_node(self.floor_num, base_subgoal_pos[:2])
+        if is_base_subgoal_valid:
+        # if path is not None:
+        #     print('subgoal succeed')
+        #     last_step = path[-1]
+            set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
+            # state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
+            # self.get_additional_states()
         else:
-            set_base_values(self.robot_id, opos)
+            # print('subgoal fail')
+            # set_base_values(self.robot_id, original_pos)
+            reward = -0.02
+            # print('reward', reward)
+            # time.sleep(3)
+            info = {}
+            done, info = self.get_termination([], info)
 
-        self.marker.set_position([subgoal[0], subgoal[1], 0.1])
-        self.marker2.set_position(projected_point2)
+            if done and self.automatic_reset:
+                info['last_observation'] = state
+                state = self.reset()
 
-        robot_id = self.robot_id
-        arm_joints = joints_from_names(robot_id, ['torso_lift_joint', 'shoulder_pan_joint', 'shoulder_lift_joint',
-                                                  'upperarm_roll_joint',
-                                                  'elbow_flex_joint', 'forearm_roll_joint', 'wrist_flex_joint',
-                                                  'wrist_roll_joint'])
-        max_limits = [0, 0] + get_max_limits(robot_id, arm_joints) + [0.05, 0.05]
-        min_limits = [0, 0] + get_min_limits(robot_id, arm_joints) + [0, 0]
-        rest_position = [0, 0] + list(get_joint_positions(robot_id, arm_joints)) + [0.04, 0.04]
+            del state['pc']
+            return state, reward, done, info
+
+        arm_joints = joints_from_names(self.robot_id,
+                                       ['torso_lift_joint',
+                                        'shoulder_pan_joint',
+                                        'shoulder_lift_joint',
+                                        'upperarm_roll_joint',
+                                        'elbow_flex_joint',
+                                        'forearm_roll_joint',
+                                        'wrist_flex_joint',
+                                        'wrist_roll_joint'])
+        max_limits = [0., 0.] + get_max_limits(self.robot_id, arm_joints) + [0.05, 0.05]
+        min_limits = [0., 0.] + get_min_limits(self.robot_id, arm_joints) + [0., 0.]
+        rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, arm_joints)) + [0.04, 0.04]
         joint_range = list(np.array(max_limits) - np.array(min_limits))
         joint_range = [item + 1 for item in joint_range]
-        jd = [0.1 for item in joint_range]
-        #
-        jp = p.calculateInverseKinematics(self.robot_id, self.robots[0].parts['gripper_link'].body_part_index, projected_point2,
-                                     lowerLimits=min_limits,
-                                     upperLimits=max_limits,
-                                     jointRanges=joint_range,
-                                     restPoses=rest_position,
-                                     jointDamping=jd,
-                                     solver=p.IK_DLS,
-                                     maxNumIterations=100)[2:10]
+        joint_damping = [0.1 for _ in joint_range]
+        # print('before arm IK')
+        joint_positions = p.calculateInverseKinematics(self.robot_id,
+                                                       self.robots[0].parts['gripper_link'].body_part_index,
+                                                       arm_subgoal,
+                                                       lowerLimits=min_limits,
+                                                       upperLimits=max_limits,
+                                                       jointRanges=joint_range,
+                                                       restPoses=rest_position,
+                                                       jointDamping=joint_damping,
+                                                       solver=p.IK_DLS,
+                                                       maxNumIterations=100)[2:10]
+        # print('after arm IK')
+        # print(joint_positions)
+        set_joint_positions(self.robot_id, arm_joints, joint_positions)
+        # dist = l2_distance(arm_subgoal, self.get_position_of_interest())
+        # print(dist)
 
-        print(jp)
-        set_joint_positions(self.robot_id, arm_joints, jp)
-        p1 = self.robots[0].get_end_effector_position()
-        dist = np.linalg.norm(np.array(p1) - np.array(projected_point2))
+        # trigger re-computation of geodesic distance for get_reward
+        self.get_state()
 
-        print(dist)
-        rp = (0.38548146667743244, 1.1522793897208579, 1.2576467971105596, -0.312703569911879,
-         1.7404867100093226, -0.0962895617312548, -1.4418232619629425, -1.6780152866247762)
+        info = {}
+        reward, info = self.get_reward([], action, info)
+        done, info = self.get_termination([], info)
 
-        reward = org_potential - self.get_potential()
+        # arm reset
+        arm_default_joint_positions = (0.38548146667743244, 1.1522793897208579,
+                                       1.2576467971105596, -0.312703569911879,
+                                       1.7404867100093226, -0.0962895617312548,
+                                       -1.4418232619629425, -1.6780152866247762)
+        set_joint_positions(self.robot_id, arm_joints, arm_default_joint_positions)
 
-        if l2_distance(self.target_pos, self.robots[0].get_end_effector_position()) < self.dist_tol:
-            reward += self.success_reward  # |success_reward| = 10.0 per step
-            done = True
-        else:
-            done = False
-
-        set_joint_positions(self.robot_id, arm_joints, rp)
-        state, _, _, info = super(MotionPlanningBaseArmEnv, self).step(None)
-
-        print('reward', reward)
-        self.planner_step += 1
-
-        if self.planner_step > self.max_step:
-            done = True
-
-        info['planner_step'] = self.planner_step
+        # need to call get_state again after arm reset (camera height could be different if torso moves)
+        # TODO: should only call get_state once or twice (e.g. disable torso movement, or cache get_state result)
+        state = self.get_state()
+        if done and self.automatic_reset:
+            state = self.reset()
         del state['pc']
+        # print('reward', reward)
+        # time.sleep(3)
 
         return state, reward, done, info
 
     def reset(self):
         state = super(MotionPlanningBaseArmEnv, self).reset()
-
-        if not self.mp_loaded:
-            self.prepare_motion_planner()
-
-        self.target_pos[2] = 1.0
-        self.target_pos_vis_obj.set_position(self.target_pos)
-
-        self.planner_step = 0
         del state['pc']
-
         return state
 
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -351,26 +458,25 @@ if __name__ == '__main__':
     args = parser.parse_args()
 
     nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
-                          mode=args.mode,
-                          action_timestep=1.0 / 1000000.0,
-                          physics_timestep=1.0 / 1000000.0)
-
-
+                                       mode=args.mode,
+                                       action_timestep=1.0 / 1000000.0,
+                                       physics_timestep=1.0 / 1000000.0)
 
     for episode in range(100):
         print('Episode: {}'.format(episode))
         start = time.time()
-        nav_env.reset()
+        state = nav_env.reset()
         for i in range(150):
+            # print('Step: {}'.format(i))
             action = nav_env.action_space.sample()
             state, reward, done, info = nav_env.step(action)
-            #print(state, reward, done, info)
-            #time.sleep(0.05)
-            #nav_env.step()
-        #for step in range(50):  # 500 steps, 50s world time
-        #    action = nav_env.action_space.sample()
-        #    state, reward, done, _ = nav_env.step(action)
-        #    # print('reward', reward)
+            # print('Reward:', reward)
+            # time.sleep(0.05)
+            # nav_env.step()
+            # for step in range(50):  # 500 steps, 50s world time
+            #    action = nav_env.action_space.sample()
+            #    state, reward, done, _ = nav_env.step(action)
+            #    # print('reward', reward)
             if done:
                 print('Episode finished after {} timesteps'.format(i + 1))
                 break

From c2b3243f6a4cb1a8865b3993fb56e3e1b7103039 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Tue, 14 Jan 2020 17:35:48 -0800
Subject: [PATCH 12/58] add visualiation utils

---
 gibson2/core/physics/scene.py      |  2 +-
 gibson2/envs/locomotor_env.py      | 86 +++++++++++++++---------------
 gibson2/envs/motion_planner_env.py | 53 +++++++++++++-----
 3 files changed, 83 insertions(+), 58 deletions(-)

diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py
index dc9d56ccc..c9e584e20 100644
--- a/gibson2/core/physics/scene.py
+++ b/gibson2/core/physics/scene.py
@@ -245,7 +245,7 @@ class BuildingScene(Scene):
         return np.flip((xy / self.trav_map_resolution + self.trav_map_size / 2.0)).astype(np.int)
 
     def has_node(self, floor, world_xy):
-        map_xy = tuple(self.world_to_map(xy))
+        map_xy = tuple(self.world_to_map(world_xy))
         g = self.floor_graph[floor]
         return g.has_node(map_xy)
 
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 8bd926053..572ea4900 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -1,7 +1,7 @@
 from gibson2.core.physics.interactive_objects import VisualMarker, InteractiveObj, BoxShape
-from semantic_segmentation_pytorch.models import ModelBuilder
-from semantic_segmentation_pytorch.utils import colorEncode
-import semantic_segmentation_pytorch
+#from semantic_segmentation_pytorch.models import ModelBuilder
+#from semantic_segmentation_pytorch.utils import colorEncode
+#import semantic_segmentation_pytorch
 import gibson2
 from gibson2.utils.utils import parse_config, rotate_vector_3d, rotate_vector_2d, l2_distance, quatToXYZW
 from gibson2.envs.base_env import BaseEnv
@@ -217,46 +217,46 @@ class NavigateEnv(BaseEnv):
             self.comp.load_state_dict(
                 torch.load(os.path.join(gibson2.assets_path, 'networks', 'model.pth')))
             self.comp.eval()
-        if 'seg_pred' in self.output:
-            # torch.cuda.set_device(1)
-            encoder_weights = os.path.join(os.path.dirname(semantic_segmentation_pytorch.__file__),
-                                           # 'ckpt/ade20k-resnet18dilated-ppm_deepsup/encoder_epoch_20.pth')
-                                           'ckpt/ade20k-mobilenetv2dilated-c1_deepsup/encoder_epoch_20.pth')
-
-            self.seg_encoder = ModelBuilder.build_encoder(
-                arch='mobilenetv2dilated',
-                # arch='resnet18dilated',
-                weights=encoder_weights)
-            self.seg_encoder.cuda()
-            self.seg_encoder.eval()
-
-            decoder_weights = os.path.join(os.path.dirname(semantic_segmentation_pytorch.__file__),
-                                           # 'ckpt/ade20k-resnet18dilated-ppm_deepsup/decoder_epoch_20.pth')
-                                           'ckpt/ade20k-mobilenetv2dilated-c1_deepsup/decoder_epoch_20.pth')
-            self.seg_decoder = ModelBuilder.build_decoder(
-                # arch='ppm_deepsup',
-                # fc_dim=512,
-                arch='c1_deepsup',
-                fc_dim=320,
-                num_class=150,
-                weights=decoder_weights,
-                use_softmax=True)
-            self.seg_decoder.cuda()
-            self.seg_decoder.eval()
-            color_path = os.path.join(os.path.dirname(semantic_segmentation_pytorch.__file__), 'data/color150.mat')
-            self.seg_colors = loadmat(color_path)['colors']
-
-            self.seg_normalizer = transforms.Normalize(
-                mean=[0.485, 0.456, 0.406],
-                std=[0.229, 0.224, 0.225])
-
-            self.seg_pred_space = gym.spaces.Box(low=0.0,
-                                                 high=1.0,
-                                                 shape=(self.config.get('resolution', 64) // 8,
-                                                        self.config.get('resolution', 64) // 8,
-                                                        320),
-                                                 dtype=np.float32)
-            observation_space['seg_pred'] = self.seg_pred_space
+        # if 'seg_pred' in self.output:
+        #     # torch.cuda.set_device(1)
+        #     encoder_weights = os.path.join(os.path.dirname(semantic_segmentation_pytorch.__file__),
+        #                                    # 'ckpt/ade20k-resnet18dilated-ppm_deepsup/encoder_epoch_20.pth')
+        #                                    'ckpt/ade20k-mobilenetv2dilated-c1_deepsup/encoder_epoch_20.pth')
+        #
+        #     self.seg_encoder = ModelBuilder.build_encoder(
+        #         arch='mobilenetv2dilated',
+        #         # arch='resnet18dilated',
+        #         weights=encoder_weights)
+        #     self.seg_encoder.cuda()
+        #     self.seg_encoder.eval()
+        #
+        #     decoder_weights = os.path.join(os.path.dirname(semantic_segmentation_pytorch.__file__),
+        #                                    # 'ckpt/ade20k-resnet18dilated-ppm_deepsup/decoder_epoch_20.pth')
+        #                                    'ckpt/ade20k-mobilenetv2dilated-c1_deepsup/decoder_epoch_20.pth')
+        #     self.seg_decoder = ModelBuilder.build_decoder(
+        #         # arch='ppm_deepsup',
+        #         # fc_dim=512,
+        #         arch='c1_deepsup',
+        #         fc_dim=320,
+        #         num_class=150,
+        #         weights=decoder_weights,
+        #         use_softmax=True)
+        #     self.seg_decoder.cuda()
+        #     self.seg_decoder.eval()
+        #     color_path = os.path.join(os.path.dirname(semantic_segmentation_pytorch.__file__), 'data/color150.mat')
+        #     self.seg_colors = loadmat(color_path)['colors']
+        #
+        #     self.seg_normalizer = transforms.Normalize(
+        #         mean=[0.485, 0.456, 0.406],
+        #         std=[0.229, 0.224, 0.225])
+        #
+        #     self.seg_pred_space = gym.spaces.Box(low=0.0,
+        #                                          high=1.0,
+        #                                          shape=(self.config.get('resolution', 64) // 8,
+        #                                                 self.config.get('resolution', 64) // 8,
+        #                                                 320),
+        #                                          dtype=np.float32)
+        #     observation_space['seg_pred'] = self.seg_pred_space
 
         self.observation_space = gym.spaces.Dict(observation_space)
         self.action_space = self.robots[0].action_space
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 7251faa99..964f515d4 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -26,7 +26,6 @@ from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_na
     get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, \
     set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values
 
-
 class MotionPlanningEnv(NavigateRandomEnv):
     def __init__(self,
                  config_file,
@@ -37,6 +36,7 @@ class MotionPlanningEnv(NavigateRandomEnv):
                  physics_timestep=1 / 240.0,
                  device_idx=0,
                  automatic_reset=False,
+                 eval=False
                  ):
         super(MotionPlanningEnv, self).__init__(config_file,
                                                 model_id=model_id,
@@ -56,6 +56,8 @@ class MotionPlanningEnv(NavigateRandomEnv):
                                            high=1.0,
                                            dtype=np.float32)
 
+        self.eval = eval
+
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
         self.mesh_id = self.scene.mesh_body_id
@@ -71,12 +73,15 @@ class MotionPlanningEnv(NavigateRandomEnv):
 
     def plan_base_motion(self, x, y, theta):
         half_size = self.map_size / 2.0
+        if self.mode == 'gui':
+            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False)
         path = plan_base_motion(
             self.robot_id,
             [x, y, theta],
             ((-half_size, -half_size), (half_size, half_size)),
-            obstacles=[self.mesh_id]
-        )
+            obstacles=[self.mesh_id])
+        if self.mode == 'gui':
+            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
         return path
 
     def step(self, pt):
@@ -110,11 +115,14 @@ class MotionPlanningEnv(NavigateRandomEnv):
         path = self.plan_base_motion(subgoal[0], subgoal[1], orn)
         if path is not None:
             self.marker.set_position([subgoal[0], subgoal[1], 0.1])
-            bq = path[-1]
-            # for bq in path:
-            set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
+            if not self.eval:
+                bq = path[-1]
+                set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
+            else:
+                for bq in path:
+                    set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
+                    time.sleep(0.02) # for visualization
             state, _, done, info = super(MotionPlanningEnv, self).step([0, 0])
-            # time.sleep(0.05)
             self.get_additional_states()
             reward = org_potential - self.get_potential()
         else:
@@ -166,6 +174,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                  physics_timestep=1 / 240.0,
                  device_idx=0,
                  automatic_reset=False,
+                 eval = False,
                  ):
         super(MotionPlanningBaseArmEnv, self).__init__(config_file,
                                                        model_id=model_id,
@@ -174,7 +183,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                        physics_timestep=physics_timestep,
                                                        automatic_reset=automatic_reset,
                                                        random_height=False,
-                                                       device_idx=device_idx)
+                                                       device_idx=device_idx,
+                                                       eval=eval)
         resolution = self.config.get('resolution', 64)
         # width = resolution
         # height = int(width * (480.0 / 640.0))
@@ -234,8 +244,12 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
     def plan_base_motion(self, x, y, theta):
         half_size = self.map_size / 2.0
+        if self.mode == 'gui':
+            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False)
         path = plan_base_motion(self.robot_id, [x, y, theta], ((-half_size, -half_size), (half_size, half_size)),
                                 obstacles=[self.mesh_id])
+        if self.mode == 'gui':
+            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
         return path
 
     def global_to_local(self, pos, cur_pos, cur_rot):
@@ -358,10 +372,18 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         is_base_subgoal_valid = self.scene.has_node(self.floor_num, base_subgoal_pos[:2])
         if is_base_subgoal_valid:
-        # if path is not None:
-        #     print('subgoal succeed')
-        #     last_step = path[-1]
-            set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
+            if self.eval:
+                path = self.plan_base_motion(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
+                if path is not None:
+                    for way_point in path:
+                        set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
+                        time.sleep(0.1)
+            else:
+                set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
+                
+            # print('subgoal succeed')
+            # last_step = path[-1]
+            # set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
             # state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
             # self.get_additional_states()
         else:
@@ -409,6 +431,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # print('after arm IK')
         # print(joint_positions)
         set_joint_positions(self.robot_id, arm_joints, joint_positions)
+        if self.eval:
+            time.sleep(0.5) # for visualization
         # dist = l2_distance(arm_subgoal, self.get_position_of_interest())
         # print(dist)
 
@@ -457,10 +481,11 @@ if __name__ == '__main__':
 
     args = parser.parse_args()
 
-    nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
+    nav_env = MotionPlanningEnv(config_file=args.config,
                                        mode=args.mode,
                                        action_timestep=1.0 / 1000000.0,
-                                       physics_timestep=1.0 / 1000000.0)
+                                       physics_timestep=1.0 / 1000000.0,
+                                eval=True)
 
     for episode in range(100):
         print('Episode: {}'.format(episode))

From ce495a77bb28cae8a4800e558b12da490a491b9e Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Tue, 14 Jan 2020 17:41:01 -0800
Subject: [PATCH 13/58] arm img u off by one bug

---
 gibson2/envs/motion_planner_env.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 7251faa99..a940b3f85 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -329,8 +329,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         points = state['pc']
         height, width = points.shape[0:2]
 
-        arm_img_u = int((action[0] + 1) / 2.0 * height)
-        arm_img_v = int((action[1] + 1) / 2.0 * width)
+        arm_img_u = np.clip(int((action[0] + 1) / 2.0 * height), 0, height - 1)
+        arm_img_v = np.clip(int((action[1] + 1) / 2.0 * width), 0, width - 1)
 
         # original_pos = get_base_values(self.robot_id)
         point = points[arm_img_u, arm_img_v]

From 5284640e967d4dd4487ac8dfdd4de3a4af8ab89f Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Tue, 14 Jan 2020 19:59:33 -0800
Subject: [PATCH 14/58] fix mp visualization bug for eval

---
 gibson2/envs/motion_planner_env.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 266c128e1..167f1083b 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -174,7 +174,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                  physics_timestep=1 / 240.0,
                  device_idx=0,
                  automatic_reset=False,
-                 eval = False,
+                 eval=False,
                  ):
         super(MotionPlanningBaseArmEnv, self).__init__(config_file,
                                                        model_id=model_id,
@@ -183,9 +183,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                        physics_timestep=physics_timestep,
                                                        automatic_reset=automatic_reset,
                                                        random_height=False,
-                                                       device_idx=device_idx,
-                                                       eval=eval)
-        resolution = self.config.get('resolution', 64)
+                                                       device_idx=device_idx)
+        # resolution = self.config.get('resolution', 64)
         # width = resolution
         # height = int(width * (480.0 / 640.0))
         # if 'rgb' in self.output:
@@ -199,6 +198,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         #                                                             shape=(height, width, 1),
         #                                                             dtype=np.float32)
 
+        self.eval = eval
         self.visualize_waypoints = True
         if self.visualize_waypoints and self.mode == 'gui':
             cyl_length = 0.2

From 29047c27a9b54649861c1d3f61622572207d12fd Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Thu, 16 Jan 2020 22:04:40 -0800
Subject: [PATCH 15/58] local occupancy map motion planner

---
 gibson2/envs/motion_planner_env.py       | 76 ++++++++++++++++++++--
 gibson2/external/pybullet_tools/utils.py | 82 ++++++++++++++++++++++++
 2 files changed, 154 insertions(+), 4 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 167f1083b..683d400cf 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -24,7 +24,7 @@ from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_na
     set_joint_positions, add_data_path, connect, plan_base_motion, plan_joint_motion, enable_gravity, \
     joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
     get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, \
-    set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values
+    set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values, plan_base_motion_2d
 
 class MotionPlanningEnv(NavigateRandomEnv):
     def __init__(self,
@@ -84,6 +84,8 @@ class MotionPlanningEnv(NavigateRandomEnv):
             p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
         return path
 
+
+
     def step(self, pt):
         # point = [x,y]
         x = int((pt[0] + 1) / 2.0 * 150)
@@ -223,6 +225,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                            dtype=np.float32)
         self.prepare_motion_planner()
 
+        self.grid_resolution = 400
+        self.occupancy_range = 8.0  # m
+        self.robot_footprint_radius = 0.2
+        self.robot_footprint_radius_in_map = int(self.robot_footprint_radius / self.occupancy_range * self.grid_resolution)
+
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
         self.mesh_id = self.scene.mesh_body_id
@@ -252,9 +259,64 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
         return path
 
+    def plan_base_motion_2d(self, x, y, theta):
+        half_size = self.map_size / 2.0
+        if self.mode == 'gui':
+            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False)
+
+        grid = self.get_local_occupancy_grid()
+        path = plan_base_motion_2d(self.robot_id, [x, y, theta], ((-half_size, -half_size), (half_size, half_size)),
+                                   map_2d=grid, occupancy_range=self.occupancy_range, grid_resolution=self.grid_resolution,
+                                   robot_footprint_radius_in_map = self.robot_footprint_radius_in_map, obstacles=[])
+        if self.mode == 'gui':
+            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
+        return path
+
     def global_to_local(self, pos, cur_pos, cur_rot):
         return rotate_vector_3d(pos - cur_pos, *cur_rot)
 
+    def get_local_occupancy_grid(self):
+        # assumes it has "scan" state
+        # assumes it is either Turtlebot or fetch
+        if self.config['robot'] == 'Turtlebot':
+            # Hokuyo URG-04LX-UG01
+            laser_linear_range = 5.6
+            laser_angular_range = 240.0
+            min_laser_dist = 0.05
+            laser_link_name = 'scan_link'
+        elif self.config['robot'] == 'Fetch':
+            # SICK TiM571-2050101 Laser Range Finder
+            laser_linear_range = 25.0
+            laser_angular_range = 220.0
+            min_laser_dist = 0.1
+            laser_link_name = 'laser_link'
+
+        laser_angular_half_range = laser_angular_range / 2.0
+        laser_pose = self.robots[0].parts[laser_link_name].get_pose()
+        base_pose = self.robots[0].parts['base_link'].get_pose()
+        laser_in_base_frame = laser_pose[:3] - base_pose[:3]
+
+        angle = np.arange(-laser_angular_half_range / 180 * np.pi,
+                          laser_angular_half_range / 180 * np.pi,
+                          laser_angular_range / 180.0 * np.pi / self.n_horizontal_rays)
+        unit_vector_local = np.array([[np.cos(ang), np.sin(ang), 0.0] for ang in angle])
+
+        state = self.get_state()
+        scan = state['scan']
+
+        scan_local = unit_vector_local[:, :2] * scan[:, None] * laser_linear_range - laser_in_base_frame[:2]
+        scan_local = np.concatenate([np.array([[0, 0]]), scan_local, np.array([[0, 0]])], axis=0)
+
+
+        occupancy_grid = np.zeros((self.grid_resolution, self.grid_resolution)).astype(np.uint8)
+        scan_local_in_map = scan_local / (self.occupancy_range / 2) * (self.grid_resolution / 2) + self.grid_resolution / 2
+        scan_local_in_map = scan_local_in_map.reshape((1, -1, 1, 2)).astype(np.int32)
+        cv2.fillPoly(occupancy_grid, scan_local_in_map, True, 1)
+        cv2.circle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2), int(self.robot_footprint_radius_in_map), 1,
+                   -1)
+
+        return occupancy_grid
+
     def get_additional_states(self):
         pos_noise = 0.0
         cur_pos = self.robots[0].get_position()
@@ -303,7 +365,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                             # angular_velocity_local))
         # cache results for reward calculation
         self.new_potential = geodesic_dist
-        assert len(additional_states) == self.additional_states_dim, 'additional states dimension mismatch'
+        assert len(additional_states) == self.additional_states_dim, \
+            'additional states dimension mismatch, {}, {}'.format(len(additional_states), self.additional_states_dim)
         return additional_states
 
     def get_state(self, collision_links=[]):
@@ -373,7 +436,12 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         is_base_subgoal_valid = self.scene.has_node(self.floor_num, base_subgoal_pos[:2])
         if is_base_subgoal_valid:
             if self.eval:
-                path = self.plan_base_motion(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
+                #path = self.plan_base_motion(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
+                ## Motion planning from global map
+
+                path = self.plan_base_motion_2d(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
+                ## Motion planning from local map
+
                 if path is not None:
                     for way_point in path:
                         set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
@@ -481,7 +549,7 @@ if __name__ == '__main__':
 
     args = parser.parse_args()
 
-    nav_env = MotionPlanningEnv(config_file=args.config,
+    nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
                                        mode=args.mode,
                                        action_timestep=1.0 / 1000000.0,
                                        physics_timestep=1.0 / 1000000.0,
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index 9f17e6580..93357c632 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -21,6 +21,7 @@ directory = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.join(directory, '../motion'))
 from motion_planners.rrt_connect import birrt, direct_path
 #from ..motion.motion_planners.rrt_connect import birrt, direct_path
+import cv2
 
 # from future_builtins import map, filter
 # from builtins import input # TODO - use future
@@ -2692,6 +2693,87 @@ def plan_base_motion(body, end_conf, base_limits, obstacles=[], direct=False,
     return birrt(start_conf, end_conf, distance_fn,
                  sample_fn, extend_fn, collision_fn, **kwargs)
 
+
+def plan_base_motion_2d(body, end_conf, base_limits, map_2d, occupancy_range, grid_resolution, robot_footprint_radius_in_map,
+                        obstacles=[], direct=False, weights=1 * np.ones(3), resolutions=0.05 * np.ones(3),
+                        max_distance=MAX_DISTANCE, **kwargs):
+    def sample_fn():
+        x, y = np.random.uniform(*base_limits)
+        theta = np.random.uniform(*CIRCULAR_LIMITS)
+        return (x, y, theta)
+
+    difference_fn = get_base_difference_fn()
+    distance_fn = get_base_distance_fn(weights=weights)
+
+    def extend_fn(q1, q2):
+        target_theta = np.arctan2(q2[1] - q1[1], q2[0] - q1[0])
+
+        n1 = int(np.abs(circular_difference(target_theta, q1[2]) / resolutions[2])) + 1
+        n3 = int(np.abs(circular_difference(q2[2], target_theta) / resolutions[2])) + 1
+        steps2 = np.abs(np.divide(difference_fn(q2, q1), resolutions))
+        n2 = int(np.max(steps2)) + 1
+
+        for i in range(n1):
+            q = (i / (n1)) * np.array(difference_fn((q1[0], q1[1], target_theta), q1)) + np.array(q1)
+            q = tuple(q)
+            yield q
+
+        for i in range(n2):
+            q = (i / (n2)) * np.array(
+                difference_fn((q2[0], q2[1], target_theta), (q1[0], q1[1], target_theta))) + np.array(
+                (q1[0], q1[1], target_theta))
+            q = tuple(q)
+            yield q
+
+        for i in range(n3):
+            q = (i / (n3)) * np.array(difference_fn(q2, (q2[0], q2[1], target_theta))) + np.array(
+                (q2[0], q2[1], target_theta))
+            q = tuple(q)
+            yield q
+
+    start_conf = get_base_values(body)
+
+    def collision_fn(q):
+        # TODO: update this function
+        # set_base_values(body, q)
+        # return any(pairwise_collision(body, obs, max_distance=max_distance) for obs in obstacles)
+        delta = np.array(q)[:2] - np.array(start_conf)[:2]
+        theta = start_conf[2]
+        x_dir = np.array([np.sin(theta), -np.cos(theta)])
+        y_dir = np.array([np.cos(theta), np.sin(theta)])
+        end_in_start_frame = [x_dir.dot(delta), y_dir.dot(delta)]
+        pts = np.array(end_in_start_frame) / (occupancy_range) * (grid_resolution / 2) + grid_resolution / 2
+        pts = pts.astype(np.int32)
+        #print(pts)
+
+        if pts[0] < robot_footprint_radius_in_map or pts[1] < robot_footprint_radius_in_map \
+                or pts[0] > grid_resolution - robot_footprint_radius_in_map - 1 or pts[
+            1] > grid_resolution - robot_footprint_radius_in_map - 1:
+            return True
+
+        # plt.figure()
+        # plt.imshow(map_2d[pts[0]-1:pts[0]+1, pts[1]-1:pts[1]+1])
+        # plt.colorbar()
+        mask = np.zeros((robot_footprint_radius_in_map * 2 + 1, robot_footprint_radius_in_map * 2 + 1))
+        cv2.circle(mask, (robot_footprint_radius_in_map, robot_footprint_radius_in_map), robot_footprint_radius_in_map,
+                   1, -1)
+        mask = mask.astype(np.bool)
+        return np.any(map_2d[pts[0] - robot_footprint_radius_in_map:pts[0] + robot_footprint_radius_in_map + 1,
+                      pts[1] - robot_footprint_radius_in_map:pts[1] + robot_footprint_radius_in_map + 1][mask] == 0)
+
+    if collision_fn(start_conf):
+        print("Warning: initial configuration is in collision")
+        return None
+    if collision_fn(end_conf):
+        print("Warning: end configuration is in collision")
+        return None
+    if direct:
+        return direct_path(start_conf, end_conf, extend_fn, collision_fn)
+    return birrt(start_conf, end_conf, distance_fn,
+                 sample_fn, extend_fn, collision_fn, **kwargs)
+
+
+
 #####################################
 
 # Placements

From a59a80ab772550e21afa18ee0a4e95055f973fe6 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Fri, 17 Jan 2020 12:45:46 -0800
Subject: [PATCH 16/58] button-door env setup

---
 .../configs/fetch_interactive_nav_s2r_mp.yaml |   6 +-
 gibson2/core/simulator.py                     |   6 +
 gibson2/envs/locomotor_env.py                 | 120 +++----
 gibson2/envs/motion_planner_env.py            | 335 +++++++++++-------
 4 files changed, 275 insertions(+), 192 deletions(-)

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp.yaml b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
index fff39dfc8..d96745970 100644
--- a/examples/configs/fetch_interactive_nav_s2r_mp.yaml
+++ b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
@@ -12,7 +12,7 @@ robot: Fetch
 velocity: 0.1
 
 # task, observation and action
-task: reaching # pointgoal|objectgoal|areagoal|reaching
+task: pointgoal # pointgoal|objectgoal|areagoal|reaching
 fisheye: false
 
 initial_pos: [-1.0, -1.0, 0.0]
@@ -22,7 +22,7 @@ target_pos: [1.0, 1.0, 0.0]
 target_orn: [0.0, 0.0, 0.0]
 
 is_discrete: false
-additional_states_dim: 23
+additional_states_dim: 22
 
 # reward
 reward_type: dense
@@ -43,7 +43,7 @@ dist_tol: 0.5  # body width
 max_step: 20
 
 # sensor
-output: [sensor, rgb, depth, pc]
+output: [sensor, rgb, depth, scan, pc]
 resolution: 128
 fov: 120
 n_horizontal_rays: 220
diff --git a/gibson2/core/simulator.py b/gibson2/core/simulator.py
index 38b581039..61f4a34e6 100644
--- a/gibson2/core/simulator.py
+++ b/gibson2/core/simulator.py
@@ -384,6 +384,12 @@ class Simulator:
         """
 
         p.stepSimulation()
+        self.sync()
+
+    def sync(self):
+        """
+        Update positions in renderer without stepping the simulation. Usually used in the reset() function
+        """
         for instance in self.renderer.instances:
             if instance.dynamic:
                 self.update_position(instance)
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 572ea4900..62ada37ce 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -28,24 +28,24 @@ from PIL import Image
 
 
 
-# Episode = collections.namedtuple('Episode',
-#                                  [
-#                                      'env',
-#                                      'agent',
-#                                      'initial_pos',
-#                                      'target_pos',
-#                                      'geodesic_distance',
-#                                      'shortest_path',
-#                                      'agent_trajectory',
-#                                      'object_files',
-#                                      'object_trajectory',
-#                                      'success',
-#                                      'path_efficiency',
-#                                      'kinematic_disturbance',
-#                                      'dynamic_disturbance_a',
-#                                      'dynamic_disturbance_b',
-#                                      'collision_step',
-#                                  ])
+Episode = collections.namedtuple('Episode',
+                                 [
+                                     # 'env',
+                                     # 'agent',
+                                     # 'initial_pos',
+                                     # 'target_pos',
+                                     # 'geodesic_distance',
+                                     # 'shortest_path',
+                                     # 'agent_trajectory',
+                                     # 'object_files',
+                                     # 'object_trajectory',
+                                     'success',
+                                     # 'path_efficiency',
+                                     # 'kinematic_disturbance',
+                                     # 'dynamic_disturbance_a',
+                                     # 'dynamic_disturbance_b',
+                                     # 'collision_step',
+                                 ])
 
 
 class NavigateEnv(BaseEnv):
@@ -208,7 +208,7 @@ class NavigateEnv(BaseEnv):
             #                                  dtype=np.float32)
             self.scan_space = gym.spaces.Box(low=0.0,
                                              high=1.0,
-                                             shape=(self.n_horizontal_rays * self.n_vertical_beams,),
+                                             shape=(self.n_horizontal_rays * self.n_vertical_beams, 1),
                                              dtype=np.float32)
             observation_space['scan'] = self.scan_space
         if 'rgb_filled' in self.output:  # use filler
@@ -305,16 +305,25 @@ class NavigateEnv(BaseEnv):
 
         # rgb = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
         # rgb = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
-
+        #
         # depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
-        # depth = np.clip(depth / 5.0, 0.0, 1.0)
+        # depth = np.clip(depth / 8.0, 0.0, 1.0)
         # depth = 1.0 - depth  # flip black/white
         # seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
         # if self.num_object_classes is not None:
         #     seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
         # cv2.imshow('rgb', rgb)
-        # cv2.imwrite('test_sc/%d_%d.jpg' % (self.current_episode, self.current_step), (rgb * 255).astype(np.uint8))
-        # cv2.imwrite('test_sc/%d_%d_depth.jpg' % (self.current_episode, self.current_step), (depth * 255).astype(np.uint8))
+        # cv2.imwrite('test.jpg', (rgb * 255).astype(np.uint8))
+        # cv2.imwrite('test_depth.jpg', (depth * 255).astype(np.uint8))
+
+        # print(cv2.imwrite('button/%d_%d.jpg' % (self.current_episode, self.current_step), (rgb * 255).astype(np.uint8)))
+        # print(cv2.imwrite('button/%d_%d_depth.jpg' % (self.current_episode, self.current_step), (depth * 255).astype(np.uint8)))
+        # assert False
+        # cv2.imshow('rgb', rgb)
+        # cv2.imshow('depth', depth)
+
+        # assert False
+
         # cv2.imshow('depth', depth)
         # cv2.imshow('seg', seg)
 
@@ -448,7 +457,7 @@ class NavigateEnv(BaseEnv):
             # hit_object_id = np.array([item[0] for item in results])
             # link_id = np.array([item[1] for item in results])
             hit_fraction = np.array([item[2] for item in results])  # hit fraction = [0.0, 1.0] of laser_linear_range
-            state['scan'] = hit_fraction
+            state['scan'] = np.expand_dims(hit_fraction, 1)
 
             # assert 'scan_link' in self.robots[0].parts, "Requested scan but no scan_link"
             # pose_camera = self.robots[0].parts['scan_link'].get_pose()
@@ -567,26 +576,13 @@ class NavigateEnv(BaseEnv):
         #     done = True
         #     info['success'] = False
 
-        # if done:
+        if done:
         #     info['episode_length'] = self.current_step
         #     info['path_length'] = self.path_length
         #     info['collision_step'] = self.collision_step
         #     info['energy_cost'] = self.energy_cost
         #     info['stage'] = self.stage
         #
-        #     Episode = collections.namedtuple('Episode',
-        #                                      ['initial_pos',
-        #                                       'target_pos',
-        #                                       'geodesic_distance',
-        #                                       'shortest_path',
-        #                                       'trajectory',
-        #                                       'success',
-        #                                       'path_efficiency',
-        #                                       'kinematic_disturbance',
-        #                                       'dynamic_disturbance_a',
-        #                                       'dynamic_disturbance_b',
-        #                                       'collision_step',
-        #                                       ])
         #     shortest_path, geodesic_distance = self.scene.get_shortest_path(self.floor_num,
         #                                                                     self.initial_pos[:2],
         #                                                                     self.target_pos[:2],
@@ -599,24 +595,24 @@ class NavigateEnv(BaseEnv):
         #     dynamic_disturbance_a = min_dyn_dist / (min_dyn_dist + self.dynamic_disturbance_a)
         #     dynamic_disturbance_b = self.current_step / float(self.current_step + self.dynamic_disturbance_b)
         #     object_files = [obj.filename for obj in self.interactive_objects]
-        #     episode = Episode(
-        #         env=self.scene.model_id,
-        #         agent=self.robots[0].model_file,
-        #         initial_pos=self.initial_pos,
-        #         target_pos=self.target_pos,
-        #         geodesic_distance=geodesic_distance,
-        #         shortest_path=shortest_path,
-        #         agent_trajectory=np.array(self.agent_trajectory),
-        #         object_files=object_files,
-        #         object_trajectory=np.array(self.object_trajectory),
-        #         success=float(info['success']),
-        #         path_efficiency=min(1.0, geodesic_distance / self.path_length),
-        #         kinematic_disturbance=kinematic_disturbance,
-        #         dynamic_disturbance_a=dynamic_disturbance_a,
-        #         dynamic_disturbance_b=dynamic_disturbance_b,
-        #         collision_step=self.collision_step,
-        #     )
-        #     self.stored_episodes.append(episode)
+            episode = Episode(
+                # env=self.scene.model_id,
+                # agent=self.robots[0].model_file,
+                # initial_pos=self.initial_pos,
+                # target_pos=self.target_pos,
+                # geodesic_distance=geodesic_distance,
+                # shortest_path=shortest_path,
+                # agent_trajectory=np.array(self.agent_trajectory),
+                # object_files=object_files,
+                # object_trajectory=np.array(self.object_trajectory),
+                success=float(info['success']),
+                # path_efficiency=min(1.0, geodesic_distance / self.path_length),
+                # kinematic_disturbance=kinematic_disturbance,
+                # dynamic_disturbance_a=dynamic_disturbance_a,
+                # dynamic_disturbance_b=dynamic_disturbance_b,
+                # collision_step=self.collision_step,
+            )
+            self.stored_episodes.append(episode)
 
         return done, info
 
@@ -682,6 +678,13 @@ class NavigateEnv(BaseEnv):
         self.current_episode += 1
         self.reset_agent()
         self.after_reset_agent()
+
+        # set position for visual objects
+        if self.visual_object_at_initial_target_pos:
+            self.initial_pos_vis_obj.set_position(self.initial_pos)
+            self.target_pos_vis_obj.set_position(self.target_pos)
+
+        self.simulator.sync()
         state = self.get_state()
 
         if self.reward_type == 'l2':
@@ -700,11 +703,6 @@ class NavigateEnv(BaseEnv):
         self.interactive_objects_collided = set()
         self.energy_cost = 0.0
 
-        # set position for visual objects
-        if self.visual_object_at_initial_target_pos:
-            self.initial_pos_vis_obj.set_position(self.initial_pos)
-            self.target_pos_vis_obj.set_position(self.target_pos)
-
         return state
 
 
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 167f1083b..3311c13b2 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -73,15 +73,15 @@ class MotionPlanningEnv(NavigateRandomEnv):
 
     def plan_base_motion(self, x, y, theta):
         half_size = self.map_size / 2.0
-        if self.mode == 'gui':
-            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False)
+        # if self.mode == 'gui':
+        #     p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False)
         path = plan_base_motion(
             self.robot_id,
             [x, y, theta],
             ((-half_size, -half_size), (half_size, half_size)),
             obstacles=[self.mesh_id])
-        if self.mode == 'gui':
-            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
+        # if self.mode == 'gui':
+        #     p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
         return path
 
     def step(self, pt):
@@ -175,6 +175,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                  device_idx=0,
                  automatic_reset=False,
                  eval=False,
+                 arena=None,
                  ):
         super(MotionPlanningBaseArmEnv, self).__init__(config_file,
                                                        model_id=model_id,
@@ -198,6 +199,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         #                                                             shape=(height, width, 1),
         #                                                             dtype=np.float32)
 
+        self.arena = arena
         self.eval = eval
         self.visualize_waypoints = True
         if self.visualize_waypoints and self.mode == 'gui':
@@ -212,27 +214,24 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         self.new_potential = None
         self.collision_reward_weight = collision_reward_weight
-        # action[0] = arm_img_u
-        # action[1] = arm_img_v
-        # action[2] = base_dx
-        # action[3] = base_dy
-        # action[4] = base_dtheta
-        self.action_space = gym.spaces.Box(shape=(5,),
+        # action[0] = base_or_arm
+        # action[1] = base_subgoal_theta
+        # action[2] = base_subgoal_dist
+        # action[3] = base_orn
+        # action[4] = arm_img_u
+        # action[5] = arm_img_v
+        self.action_space = gym.spaces.Box(shape=(6,),
                                            low=-1.0,
                                            high=1.0,
                                            dtype=np.float32)
         self.prepare_motion_planner()
 
-    def prepare_motion_planner(self):
-        self.robot_id = self.robots[0].robot_ids[0]
-        self.mesh_id = self.scene.mesh_body_id
-        self.map_size = self.scene.trav_map_original_size * self.scene.trav_map_default_resolution
-        # print(self.robot_id, self.mesh_id, self.map_size)
+        # visualization
         self.base_marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
                                         rgba_color=[1, 0, 0, 1],
                                         radius=0.1,
-                                        length=0.1,
-                                        initial_offset=[0, 0, 0.1 / 2.0])
+                                        length=2.0,
+                                        initial_offset=[0, 0, 2.0 / 2.0])
         self.base_marker.load()
 
         self.arm_marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
@@ -242,14 +241,42 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                        initial_offset=[0, 0, 0.1 / 2.0])
         self.arm_marker.load()
 
+        if self.arena == 'button':
+            self.button_marker = VisualMarker(visual_shape=p.GEOM_SPHERE,
+                                              rgba_color=[0, 1, 0, 1],
+                                              radius=0.3)
+            self.simulator.import_object(self.button_marker, class_id=255)
+
+            self.door = InteractiveObj(
+                os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
+                scale=4.0)
+            self.simulator.import_interactive_object(self.door, class_id=2)
+
+            self.wall_poses = [
+                [[0, -2.0, 1], [0, 0, 0, 1]],
+            ]
+            self.walls = []
+            for wall_pose in self.wall_poses:
+                wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'),
+                                      scale=1)
+                self.simulator.import_object(wall, class_id=3)
+                wall.set_position_rotation(wall_pose[0], wall_pose[1])
+                self.walls += [wall]
+
+    def prepare_motion_planner(self):
+        self.robot_id = self.robots[0].robot_ids[0]
+        self.mesh_id = self.scene.mesh_body_id
+        self.map_size = self.scene.trav_map_original_size * self.scene.trav_map_default_resolution
+        # print(self.robot_id, self.mesh_id, self.map_size)
+
     def plan_base_motion(self, x, y, theta):
         half_size = self.map_size / 2.0
-        if self.mode == 'gui':
-            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False)
+        # if self.mode == 'gui':
+        #     p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False)
         path = plan_base_motion(self.robot_id, [x, y, theta], ((-half_size, -half_size), (half_size, half_size)),
                                 obstacles=[self.mesh_id])
-        if self.mode == 'gui':
-            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
+        # if self.mode == 'gui':
+        #     p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
         return path
 
     def global_to_local(self, pos, cur_pos, cur_rot):
@@ -297,8 +324,26 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         waypoints_local_xy = np.array([self.global_to_local(waypoint, cur_pos, cur_rot)[:2]
                                        for waypoint in shortest_path]).flatten()
+
+        # # convert Cartesian space to radian space START
+        # for i in range(waypoints_local_xy.shape[0] // 2):
+        #     vec = waypoints_local_xy[(i * 2):(i * 2 + 2)]
+        #     norm = np.linalg.norm(vec)
+        #     if norm == 0:
+        #         continue
+        #     dir = np.arctan2(vec[1], vec[0])
+        #     waypoints_local_xy[i * 2] = dir
+        #     waypoints_local_xy[i * 2 + 1] = norm
+        #
+        # norm = np.linalg.norm(target_pos_local[:2])
+        # if norm != 0:
+        #     dir = np.arctan2(target_pos_local[1], target_pos_local[0])
+        #     target_pos_local[0] = dir
+        #     target_pos_local[1] = norm
+        # # convert Cartesian space to radian space END
+
         additional_states = np.concatenate((waypoints_local_xy,
-                                            target_pos_local))
+                                            target_pos_local[:2]))
                                             # linear_velocity_local,
                                             # angular_velocity_local))
         # cache results for reward calculation
@@ -316,7 +361,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 # half_diff = int((width - height) / 2)
                 # img = img[half_diff:half_diff+height, :]
                 if modality == 'depth':
-                    high = 25.0
+                    high = 20.0
                     img[img > high] = high
                     img /= high
                 state[modality] = img
@@ -337,133 +382,164 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.new_potential = geodesic_dist
 
     def step(self, action):
+        # action[0] = base_or_arm
+        # action[1] = base_subgoal_theta
+        # action[2] = base_subgoal_dist
+        # action[3] = base_orn
+        # action[4] = arm_img_u
+        # action[5] = arm_img_v
+
         self.current_step += 1
+        subgoal_success = True
+        use_base = action[0] > 0.0
+        if use_base:
+            # print('base')
+            # use base
+            yaw = self.robots[0].get_rpy()[2]
+            robot_pos = self.robots[0].get_position()
+            base_subgoal_theta = (action[1] * 110.0) / 180.0 * np.pi  # [-110.0, 110.0]
+            base_subgoal_theta += yaw
+            base_subgoal_dist = (action[2] + 1)  # [0.0, 2.0]
+            base_subgoal_pos = np.array([np.cos(base_subgoal_theta), np.sin(base_subgoal_theta)])
+            base_subgoal_pos *= base_subgoal_dist
+            base_subgoal_pos = np.append(base_subgoal_pos, 0.0)
+            base_subgoal_pos += robot_pos
+            self.base_marker.set_position(base_subgoal_pos)
 
-        state = self.get_state()
-        points = state['pc']
-        height, width = points.shape[0:2]
+            base_subgoal_orn = action[3] * np.pi
+            base_subgoal_orn += yaw
 
-        arm_img_u = np.clip(int((action[0] + 1) / 2.0 * height), 0, height - 1)
-        arm_img_v = np.clip(int((action[1] + 1) / 2.0 * width), 0, width - 1)
-
-        # original_pos = get_base_values(self.robot_id)
-        point = points[arm_img_u, arm_img_v]
-        camera_pose = (self.robots[0].parts['eyes'].get_pose())
-        transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
-                                        quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
-        arm_subgoal = transform_mat.dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
-
-        dx = action[2] * 0.5
-        dy = action[3] * 0.5
-        yaw = self.robots[0].get_rpy()[2]
-        dtheta = action[4] * np.pi
-        base_subgoal_orn = dtheta + yaw
-
-        base_subgoal_pos = np.array([arm_subgoal[0] + dx, arm_subgoal[1] + dy, self.robots[0].get_position()[2]])
-        self.arm_marker.set_position(arm_subgoal)
-        self.base_marker.set_position(base_subgoal_pos)
-
-        # embed()
-        # print('before plan_base_motion')
-        # path = self.plan_base_motion(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
-        # print('after plan_base_motion')
-        # embed()
-        # print('base mp finished -------------------')
-
-        is_base_subgoal_valid = self.scene.has_node(self.floor_num, base_subgoal_pos[:2])
-        if is_base_subgoal_valid:
-            if self.eval:
-                path = self.plan_base_motion(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
-                if path is not None:
-                    for way_point in path:
-                        set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
-                        time.sleep(0.1)
+            is_base_subgoal_valid = self.scene.has_node(self.floor_num, base_subgoal_pos[:2])
+            if self.arena == 'button':
+                x_min = -6.0 if self.door_open else -3.0
+                x_max = 2.0
+                y_min, y_max = -2.0, 2.0
+                is_valid = x_min <= base_subgoal_pos[0] <= x_max and y_min <= base_subgoal_pos[1] <= y_max
+                is_base_subgoal_valid = is_base_subgoal_valid and is_valid
+            if is_base_subgoal_valid:
+                if self.eval:
+                    path = self.plan_base_motion(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
+                    if path is not None:
+                        for way_point in path:
+                            set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
+                            time.sleep(0.1)
+                else:
+                    set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
+                # set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
+                # print('subgoal succeed')
             else:
-                set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
-                
-            # print('subgoal succeed')
-            # last_step = path[-1]
-            # set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
-            # state, _, done, info = super(MotionPlanningBaseArmEnv, self).step(None)
-            # self.get_additional_states()
+                # print('subgoal fail')
+                subgoal_success = False
         else:
-            # print('subgoal fail')
-            # set_base_values(self.robot_id, original_pos)
-            reward = -0.02
-            # print('reward', reward)
-            # time.sleep(3)
-            info = {}
-            done, info = self.get_termination([], info)
+            # print('arm')
+            # use arm
+            state = self.get_state()
+            points = state['pc']
+            height, width = points.shape[0:2]
 
-            if done and self.automatic_reset:
-                info['last_observation'] = state
-                state = self.reset()
+            arm_img_u = np.clip(int((action[4] + 1) / 2.0 * height), 0, height - 1)
+            arm_img_v = np.clip(int((action[5] + 1) / 2.0 * width), 0, width - 1)
 
-            del state['pc']
-            return state, reward, done, info
+            point = points[arm_img_u, arm_img_v]
+            camera_pose = (self.robots[0].parts['eyes'].get_pose())
+            transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
+                                            quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
+            arm_subgoal = transform_mat.dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
+            self.arm_marker.set_position(arm_subgoal)
 
-        arm_joints = joints_from_names(self.robot_id,
-                                       ['torso_lift_joint',
-                                        'shoulder_pan_joint',
-                                        'shoulder_lift_joint',
-                                        'upperarm_roll_joint',
-                                        'elbow_flex_joint',
-                                        'forearm_roll_joint',
-                                        'wrist_flex_joint',
-                                        'wrist_roll_joint'])
-        max_limits = [0., 0.] + get_max_limits(self.robot_id, arm_joints) + [0.05, 0.05]
-        min_limits = [0., 0.] + get_min_limits(self.robot_id, arm_joints) + [0., 0.]
-        rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, arm_joints)) + [0.04, 0.04]
-        joint_range = list(np.array(max_limits) - np.array(min_limits))
-        joint_range = [item + 1 for item in joint_range]
-        joint_damping = [0.1 for _ in joint_range]
-        # print('before arm IK')
-        joint_positions = p.calculateInverseKinematics(self.robot_id,
-                                                       self.robots[0].parts['gripper_link'].body_part_index,
-                                                       arm_subgoal,
-                                                       lowerLimits=min_limits,
-                                                       upperLimits=max_limits,
-                                                       jointRanges=joint_range,
-                                                       restPoses=rest_position,
-                                                       jointDamping=joint_damping,
-                                                       solver=p.IK_DLS,
-                                                       maxNumIterations=100)[2:10]
-        # print('after arm IK')
-        # print(joint_positions)
-        set_joint_positions(self.robot_id, arm_joints, joint_positions)
-        if self.eval:
-            time.sleep(0.5) # for visualization
-        # dist = l2_distance(arm_subgoal, self.get_position_of_interest())
-        # print(dist)
+            arm_joints = joints_from_names(self.robot_id,
+                                           ['torso_lift_joint',
+                                            'shoulder_pan_joint',
+                                            'shoulder_lift_joint',
+                                            'upperarm_roll_joint',
+                                            'elbow_flex_joint',
+                                            'forearm_roll_joint',
+                                            'wrist_flex_joint',
+                                            'wrist_roll_joint'])
+            max_limits = [0., 0.] + get_max_limits(self.robot_id, arm_joints) + [0.05, 0.05]
+            min_limits = [0., 0.] + get_min_limits(self.robot_id, arm_joints) + [0., 0.]
+            rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, arm_joints)) + [0.04, 0.04]
+            joint_range = list(np.array(max_limits) - np.array(min_limits))
+            joint_range = [item + 1 for item in joint_range]
+            joint_damping = [0.1 for _ in joint_range]
+            joint_positions = p.calculateInverseKinematics(self.robot_id,
+                                                           self.robots[0].parts['gripper_link'].body_part_index,
+                                                           arm_subgoal,
+                                                           lowerLimits=min_limits,
+                                                           upperLimits=max_limits,
+                                                           jointRanges=joint_range,
+                                                           restPoses=rest_position,
+                                                           jointDamping=joint_damping,
+                                                           solver=p.IK_DLS,
+                                                           maxNumIterations=100)[2:10]
+            set_joint_positions(self.robot_id, arm_joints, joint_positions)
+            if self.eval:
+                time.sleep(0.5)  # for visualization
 
-        # trigger re-computation of geodesic distance for get_reward
-        self.get_state()
+        if use_base:
+            # trigger re-computation of geodesic distance for get_reward
+            state = self.get_state()
 
         info = {}
-        reward, info = self.get_reward([], action, info)
+        if subgoal_success:
+            reward, info = self.get_reward([], action, info)
+        else:
+            reward = -0.0
         done, info = self.get_termination([], info)
 
-        # arm reset
-        arm_default_joint_positions = (0.38548146667743244, 1.1522793897208579,
-                                       1.2576467971105596, -0.312703569911879,
-                                       1.7404867100093226, -0.0962895617312548,
-                                       -1.4418232619629425, -1.6780152866247762)
-        set_joint_positions(self.robot_id, arm_joints, arm_default_joint_positions)
+        if self.arena == 'button':
+            if not self.door_open and \
+                    l2_distance(self.robots[0].get_end_effector_position(), self.button_marker_pos) < 0.5:
+                # touch the button -> remove the button and the door
+                print("OPEN DOOR")
+                self.door_open = True
+                self.button_marker.set_position([100.0, 100.0, 0.0])
+                self.door.set_position([100.0, 100.0, 0.0])
+                reward += 5.0
+
+        if not use_base:
+            # arm reset
+            arm_default_joint_positions = (0.38548146667743244, 1.1522793897208579,
+                                           1.2576467971105596, -0.312703569911879,
+                                           1.7404867100093226, -0.0962895617312548,
+                                           -1.4418232619629425, -1.6780152866247762)
+            set_joint_positions(self.robot_id, arm_joints, arm_default_joint_positions)
+            # need to call get_state again after arm reset (camera height could be different if torso moves)
+            # TODO: should only call get_state once or twice (e.g. disable torso movement, or cache get_state result)
+            state = self.get_state()
 
-        # need to call get_state again after arm reset (camera height could be different if torso moves)
-        # TODO: should only call get_state once or twice (e.g. disable torso movement, or cache get_state result)
-        state = self.get_state()
         if done and self.automatic_reset:
             state = self.reset()
         del state['pc']
         # print('reward', reward)
         # time.sleep(3)
-
         return state, reward, done, info
 
+    def reset_initial_and_target_pos(self):
+        if self.arena == 'button':
+            floor_height = self.scene.get_floor_height(self.floor_num)
+            self.initial_pos = np.array([1.2, 0.0, floor_height])
+            self.target_pos = np.array([-5.0, 0.0, floor_height])
+            self.robots[0].set_position(pos=[self.initial_pos[0],
+                                             self.initial_pos[1],
+                                             self.initial_pos[2] + self.random_init_z_offset])
+            self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.pi), 'wxyz'))
+            self.button_marker_pos = [
+                np.random.uniform(-3.0, -0.5),
+                np.random.uniform(-1.0, 1.0),
+                1.5
+            ]
+            self.button_marker.set_position(self.button_marker_pos)
+            self.door.set_position_rotation([-3.5, 0, 0.0], [0, 0, np.sqrt(0.5), np.sqrt(0.5)])
+        else:
+            super(MotionPlanningBaseArmEnv, self).reset_initial_and_target_pos()
+
     def reset(self):
         state = super(MotionPlanningBaseArmEnv, self).reset()
         del state['pc']
+        # embed()
+        if self.arena == 'button':
+            self.door_open = False
         return state
 
 
@@ -481,11 +557,13 @@ if __name__ == '__main__':
 
     args = parser.parse_args()
 
-    nav_env = MotionPlanningEnv(config_file=args.config,
+    nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
                                        mode=args.mode,
                                        action_timestep=1.0 / 1000000.0,
                                        physics_timestep=1.0 / 1000000.0,
-                                eval=True)
+                                       eval=args.mode == 'gui',
+                                       arena='button',
+                                       )
 
     for episode in range(100):
         print('Episode: {}'.format(episode))
@@ -495,6 +573,7 @@ if __name__ == '__main__':
             # print('Step: {}'.format(i))
             action = nav_env.action_space.sample()
             state, reward, done, info = nav_env.step(action)
+            # embed()
             # print('Reward:', reward)
             # time.sleep(0.05)
             # nav_env.step()

From 6b8af49684445167d8473b1976692526c9a09e36 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Fri, 17 Jan 2020 17:50:51 -0800
Subject: [PATCH 17/58] trying to fix motion planner 2d, still sometimes go
 through wall

---
 gibson2/envs/motion_planner_env.py | 83 +++++++++++++++++++++---------
 1 file changed, 59 insertions(+), 24 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 09c66bb9a..e757f2c53 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -316,26 +316,38 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         laser_angular_half_range = laser_angular_range / 2.0
         laser_pose = self.robots[0].parts[laser_link_name].get_pose()
         base_pose = self.robots[0].parts['base_link'].get_pose()
-        laser_in_base_frame = laser_pose[:3] - base_pose[:3]
 
         angle = np.arange(-laser_angular_half_range / 180 * np.pi,
                           laser_angular_half_range / 180 * np.pi,
                           laser_angular_range / 180.0 * np.pi / self.n_horizontal_rays)
-        unit_vector_local = np.array([[np.cos(ang), np.sin(ang), 0.0] for ang in angle])
+        unit_vector_laser = np.array([[np.cos(ang), np.sin(ang), 0.0] for ang in angle])
 
         state = self.get_state()
         scan = state['scan']
 
-        scan_local = unit_vector_local[:, :2] * scan * laser_linear_range - laser_in_base_frame[:2]
+        scan_laser = unit_vector_laser * scan * laser_linear_range
+        # embed()
+
+        laser_translation = laser_pose[:3]
+        laser_rotation = quat2mat([laser_pose[6], laser_pose[3], laser_pose[4], laser_pose[5]])
+        scan_world = laser_rotation.dot(scan_laser.T).T + laser_translation
+
+        base_translation = base_pose[:3]
+        base_rotation = quat2mat([base_pose[6], base_pose[3], base_pose[4], base_pose[5]])
+        scan_local = base_rotation.T.dot((scan_world - base_translation).T).T
+        scan_local = scan_local[:, :2]
         scan_local = np.concatenate([np.array([[0, 0]]), scan_local, np.array([[0, 0]])], axis=0)
 
-
+        # flip y axis
+        scan_local[:, 1] *= -1
         occupancy_grid = np.zeros((self.grid_resolution, self.grid_resolution)).astype(np.uint8)
-        scan_local_in_map = scan_local / (self.occupancy_range / 2) * (self.grid_resolution / 2) + self.grid_resolution / 2
+        scan_local_in_map = scan_local / (self.occupancy_range / 2) * (self.grid_resolution / 2) + (self.grid_resolution / 2)
         scan_local_in_map = scan_local_in_map.reshape((1, -1, 1, 2)).astype(np.int32)
         cv2.fillPoly(occupancy_grid, scan_local_in_map, True, 1)
         cv2.circle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2), int(self.robot_footprint_radius_in_map), 1, -1)
-
+        # cv2.imwrite('occupancy_grid.png', occupancy_grid)
+        # embed()
+        # assert False
         return occupancy_grid
 
     def get_additional_states(self):
@@ -439,6 +451,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.new_potential = geodesic_dist
 
     def step(self, action):
+        print('-' * 20)
         # action[0] = base_or_arm
         # action[1] = base_subgoal_theta
         # action[2] = base_subgoal_dist
@@ -449,6 +462,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.current_step += 1
         subgoal_success = True
         use_base = action[0] > 0.0
+        use_base = True
         if use_base:
             # print('base')
             # use base
@@ -461,32 +475,53 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             base_subgoal_pos *= base_subgoal_dist
             base_subgoal_pos = np.append(base_subgoal_pos, 0.0)
             base_subgoal_pos += robot_pos
+            print('base_subgoal_pos', base_subgoal_pos)
+
             self.base_marker.set_position(base_subgoal_pos)
 
             base_subgoal_orn = action[3] * np.pi
             base_subgoal_orn += yaw
 
-            is_base_subgoal_valid = self.scene.has_node(self.floor_num, base_subgoal_pos[:2])
-            if self.arena == 'button':
-                x_min = -6.0 if self.door_open else -3.0
-                x_max = 2.0
-                y_min, y_max = -2.0, 2.0
-                is_valid = x_min <= base_subgoal_pos[0] <= x_max and y_min <= base_subgoal_pos[1] <= y_max
-                is_base_subgoal_valid = is_base_subgoal_valid and is_valid
-            if is_base_subgoal_valid:
+            original_pos = get_base_values(self.robot_id)
+
+            path = self.plan_base_motion_2d(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
+            if path is not None:
+                print('base mp success')
                 if self.eval:
-                    path = self.plan_base_motion_2d(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
-                    if path is not None:
-                        for way_point in path:
-                            set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
-                            time.sleep(0.05)
+                    for way_point in path:
+                        set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
+                        time.sleep(0.05)
                 else:
                     set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
-                # set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
-                # print('subgoal succeed')
             else:
-                # print('subgoal fail')
+                print('base mp failure')
                 subgoal_success = False
+                set_base_values(self.robot_id, original_pos)
+
+            # is_base_subgoal_valid = self.scene.has_node(self.floor_num, base_subgoal_pos[:2])
+            # if self.arena == 'button':
+            #     x_min = -6.0 if self.door_open else -3.0
+            #     x_max = 2.0
+            #     y_min, y_max = -2.0, 2.0
+            #     is_valid = x_min <= base_subgoal_pos[0] <= x_max and y_min <= base_subgoal_pos[1] <= y_max
+            #     is_base_subgoal_valid = is_base_subgoal_valid and is_valid
+            # if is_base_subgoal_valid:
+            #     if self.eval:
+            #         path = self.plan_base_motion_2d(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
+            #         if path is not None:
+            #             print('base mp success')
+            #             for way_point in path:
+            #                 set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
+            #                 time.sleep(0.05)
+            #         else:
+            #             print('base mp failure')
+            #     else:
+            #         set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
+            #     # set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
+            #     # print('subgoal succeed')
+            # else:
+            #     # print('subgoal fail')
+            #     subgoal_success = False
         else:
             # print('arm')
             # use arm
@@ -568,8 +603,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         if done and self.automatic_reset:
             state = self.reset()
         del state['pc']
-        # print('reward', reward)
-        # time.sleep(3)
+        print('reward', reward)
+        time.sleep(3)
         return state, reward, done, info
 
     def reset_initial_and_target_pos(self):

From a205d2011ea3cb86d99a3d407a97d0d316b995f4 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Sat, 18 Jan 2020 00:01:38 -0800
Subject: [PATCH 18/58] fix 2d mp bug

---
 gibson2/envs/motion_planner_env.py       | 11 ++++++++---
 gibson2/external/pybullet_tools/utils.py |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index e757f2c53..cf5b2e900 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -270,7 +270,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         self.grid_resolution = 400
         self.occupancy_range = 8.0  # m
-        self.robot_footprint_radius = 0.3
+        self.robot_footprint_radius = 0.279
         self.robot_footprint_radius_in_map = int(self.robot_footprint_radius / self.occupancy_range * self.grid_resolution)
 
     def plan_base_motion(self, x, y, theta):
@@ -306,11 +306,13 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             # Hokuyo URG-04LX-UG01
             laser_linear_range = 5.6
             laser_angular_range = 240.0
+            min_laser_dist = 0.05
             laser_link_name = 'scan_link'
         elif self.config['robot'] == 'Fetch':
             # SICK TiM571-2050101 Laser Range Finder
             laser_linear_range = 25.0
             laser_angular_range = 220.0
+            min_laser_dist = 0.1
             laser_link_name = 'laser_link'
 
         laser_angular_half_range = laser_angular_range / 2.0
@@ -325,7 +327,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         state = self.get_state()
         scan = state['scan']
 
-        scan_laser = unit_vector_laser * scan * laser_linear_range
+        scan_laser = unit_vector_laser * (scan * (laser_linear_range - min_laser_dist) + min_laser_dist)
         # embed()
 
         laser_translation = laser_pose[:3]
@@ -603,8 +605,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         if done and self.automatic_reset:
             state = self.reset()
         del state['pc']
+
+        info['start_conf'] = original_pos
+        info['path'] = path
+
         print('reward', reward)
-        time.sleep(3)
         return state, reward, done, info
 
     def reset_initial_and_target_pos(self):
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index 93357c632..18068928c 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -2742,7 +2742,7 @@ def plan_base_motion_2d(body, end_conf, base_limits, map_2d, occupancy_range, gr
         x_dir = np.array([np.sin(theta), -np.cos(theta)])
         y_dir = np.array([np.cos(theta), np.sin(theta)])
         end_in_start_frame = [x_dir.dot(delta), y_dir.dot(delta)]
-        pts = np.array(end_in_start_frame) / (occupancy_range) * (grid_resolution / 2) + grid_resolution / 2
+        pts = np.array(end_in_start_frame) / (occupancy_range / 2) * (grid_resolution / 2) + grid_resolution / 2
         pts = pts.astype(np.int32)
         #print(pts)
 

From f8db6ea32dc926a05132c9b4fb74578803975669 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Sat, 18 Jan 2020 22:29:52 -0800
Subject: [PATCH 19/58] part of arm motion planner

---
 .../configs/fetch_interactive_nav_s2r_mp.yaml |   2 +-
 gibson2/core/physics/robot_locomotors.py      |  20 ++-
 gibson2/envs/motion_planner_env.py            | 129 ++++++++++++++----
 3 files changed, 126 insertions(+), 25 deletions(-)

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp.yaml b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
index d96745970..fa87a137a 100644
--- a/examples/configs/fetch_interactive_nav_s2r_mp.yaml
+++ b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
@@ -32,7 +32,7 @@ potential_reward_weight: 1.0
 electricity_reward_weight: 0.0
 stall_torque_reward_weight: 0.0
 collision_reward_weight: -0.0
-collision_ignore_link_a_ids: [0, 1, 2]  # ignore collision with these agent's link ids
+collision_ignore_link_a_ids: [0, 1, 2, 3, 13, 20, 21, 22, 23, 24, 25, 26]  # ignore collision with these agent's link ids
 
 # discount factor
 discount_factor: 0.99
diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py
index f8f4f3f52..9077bc62f 100644
--- a/gibson2/core/physics/robot_locomotors.py
+++ b/gibson2/core/physics/robot_locomotors.py
@@ -554,7 +554,8 @@ class Fetch(LocomotorRobot):
                                 scale=config.get("robot_scale", self.default_scale),
                                 resolution=config.get("resolution", 64),
                                 is_discrete=config.get("is_discrete", True),
-                                control="velocity")
+                                control="velocity",
+                                self_collision=True)
 
     def set_up_continuous_action_space(self):
         if self.action_high is not None and self.action_low is not None:
@@ -602,6 +603,23 @@ class Fetch(LocomotorRobot):
     def get_end_effector_position(self):
         return self.parts['gripper_link'].get_position()
 
+    def load(self):
+        ids = self._load_model()
+        self.eyes = self.parts["eyes"]
+
+        robot_id = ids[0]
+
+        # disable collision for immediate parent
+        for joint in range(p.getNumJoints(robot_id)):
+            info = p.getJointInfo(robot_id, joint)
+            parent_id = info[-1]
+            p.setCollisionFilterPair(robot_id, robot_id, joint, parent_id, 0)
+
+        # disable collision for torso_lift_joint and shoulder_lift_joint
+        p.setCollisionFilterPair(robot_id, robot_id, 3, 13, 0)
+
+        return ids
+
 
 class JR2(LocomotorRobot):
     mjcf_scaling = 1
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index cf5b2e900..4315fa45f 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -24,7 +24,8 @@ from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_na
     set_joint_positions, add_data_path, connect, plan_base_motion, plan_joint_motion, enable_gravity, \
     joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
     get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, \
-    set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values, plan_base_motion_2d
+    set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values, \
+    plan_base_motion_2d, get_sample_fn
 
 class MotionPlanningEnv(NavigateRandomEnv):
     def __init__(self,
@@ -241,6 +242,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                        initial_offset=[0, 0, 0.1 / 2.0])
         self.arm_marker.load()
 
+        self.arm_default_joint_positions = (0.38548146667743244, 1.1522793897208579,
+                                           1.2576467971105596, -0.312703569911879,
+                                           1.7404867100093226, -0.0962895617312548,
+                                           -1.4418232619629425, -1.6780152866247762)
+
         if self.arena == 'button':
             self.button_marker = VisualMarker(visual_shape=p.GEOM_SPHERE,
                                               rgba_color=[0, 1, 0, 1],
@@ -464,7 +470,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.current_step += 1
         subgoal_success = True
         use_base = action[0] > 0.0
-        use_base = True
+        #use_base = True
         if use_base:
             # print('base')
             # use base
@@ -492,7 +498,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 if self.eval:
                     for way_point in path:
                         set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
-                        time.sleep(0.05)
+                        time.sleep(0.02)
                 else:
                     set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
             else:
@@ -556,20 +562,99 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             joint_range = list(np.array(max_limits) - np.array(min_limits))
             joint_range = [item + 1 for item in joint_range]
             joint_damping = [0.1 for _ in joint_range]
-            joint_positions = p.calculateInverseKinematics(self.robot_id,
-                                                           self.robots[0].parts['gripper_link'].body_part_index,
-                                                           arm_subgoal,
-                                                           lowerLimits=min_limits,
-                                                           upperLimits=max_limits,
-                                                           jointRanges=joint_range,
-                                                           restPoses=rest_position,
-                                                           jointDamping=joint_damping,
-                                                           solver=p.IK_DLS,
-                                                           maxNumIterations=100)[2:10]
-            set_joint_positions(self.robot_id, arm_joints, joint_positions)
-            if self.eval:
-                time.sleep(0.5)  # for visualization
 
+            n_attempt = 0
+            max_attempt = 200
+            sample_fn = get_sample_fn(nav_env.robot_id, arm_joints)
+
+            while n_attempt < max_attempt: # find collision free ik solution
+
+                set_joint_positions(self.robot_id, arm_joints, sample_fn())
+
+                joint_positions = p.calculateInverseKinematics(self.robot_id,
+                                                               self.robots[0].parts['gripper_link'].body_part_index,
+                                                               arm_subgoal,
+                                                               lowerLimits=min_limits,
+                                                               upperLimits=max_limits,
+                                                               jointRanges=joint_range,
+                                                               restPoses=rest_position,
+                                                               jointDamping=joint_damping,
+                                                               solver=p.IK_DLS,
+                                                               maxNumIterations=100)[2:10]
+                n_attempt += 1
+                #print(n_attempt)
+
+                set_joint_positions(self.robot_id, arm_joints, joint_positions)
+
+                end_effector_link_state = p.getLinkState(self.robot_id, self.robots[0].parts['gripper_link'].body_part_index)
+                end_effector_position = end_effector_link_state[4]
+                diff = [arm_subgoal[0] - end_effector_position[0], arm_subgoal[1] - end_effector_position[1], arm_subgoal[2] - end_effector_position[2]]
+                ik_threshold = 0.05
+                dist = np.linalg.norm(np.array(diff))
+                close_enough = (dist < ik_threshold)
+
+                collision_free = True
+                for arm_link in arm_joints:
+                    for other_link in range(p.getNumJoints(self.robot_id)):
+                        contact_pts = p.getContactPoints(self.robot_id, self.robot_id, arm_link, other_link)
+                        #print(contact_pts)
+                        if len(contact_pts) > 0:
+                            collision_free = False
+                if collision_free and close_enough:
+                    break
+
+            if n_attempt == max_attempt:
+                ik_success = False
+            else:
+                ik_success = True
+
+            if ik_success:
+                set_joint_positions(self.robot_id, arm_joints, self.arm_default_joint_positions)
+                arm_path = plan_joint_motion(self.robot_id, arm_joints, joint_positions, disabled_collisions=set(),
+                                     self_collisions=False)
+
+                if arm_path is not None:
+                    arm_mp_success = True
+                else:
+                    arm_mp_success = False
+            else:
+                arm_mp_success = False
+
+            print('ik_success', ik_success)
+            print('arm_mp_success', arm_mp_success)
+
+            set_joint_positions(self.robot_id, arm_joints, self.arm_default_joint_positions)
+            if arm_mp_success and ik_success:
+                if self.eval:
+                    for joint_way_point in arm_path:
+                        set_joint_positions(self.robot_id, arm_joints, joint_way_point)
+                        time.sleep(0.02) # animation
+                else:
+                    set_joint_positions(self.robot_id, arm_joints, joint_positions) # set to last position in training
+
+            ##push
+            # push_vector = np.array([0,0.2,0])
+            # if arm_mp_success and ik_success:
+            #     for i in range(100):
+            #         push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
+            #
+            #         joint_positions = p.calculateInverseKinematics(self.robot_id,
+            #                                                        self.robots[0].parts['gripper_link'].body_part_index,
+            #                                                        push_goal,
+            #                                                        lowerLimits=min_limits,
+            #                                                        upperLimits=max_limits,
+            #                                                        jointRanges=joint_range,
+            #                                                        restPoses=rest_position,
+            #                                                        jointDamping=joint_damping,
+            #                                                        solver=p.IK_DLS,
+            #                                                        maxNumIterations=100)[2:10]
+            #
+            #         set_joint_positions(self.robot_id, arm_joints, joint_positions)
+            #         if eval:
+            #             time.sleep(0.02) # for visualization
+
+
+        ###### reward computation ######
         if use_base:
             # trigger re-computation of geodesic distance for get_reward
             state = self.get_state()
@@ -593,11 +678,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         if not use_base:
             # arm reset
-            arm_default_joint_positions = (0.38548146667743244, 1.1522793897208579,
-                                           1.2576467971105596, -0.312703569911879,
-                                           1.7404867100093226, -0.0962895617312548,
-                                           -1.4418232619629425, -1.6780152866247762)
-            set_joint_positions(self.robot_id, arm_joints, arm_default_joint_positions)
+
+            set_joint_positions(self.robot_id, arm_joints, self.arm_default_joint_positions)
             # need to call get_state again after arm reset (camera height could be different if torso moves)
             # TODO: should only call get_state once or twice (e.g. disable torso movement, or cache get_state result)
             state = self.get_state()
@@ -606,8 +688,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             state = self.reset()
         del state['pc']
 
-        info['start_conf'] = original_pos
-        info['path'] = path
+        if use_base:
+            info['start_conf'] = original_pos
+            info['path'] = path
 
         print('reward', reward)
         return state, reward, done, info

From 95635f4fc4f7e6e8cd92298e61fb13a001dc9596 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Sun, 19 Jan 2020 14:52:56 -0800
Subject: [PATCH 20/58] minor change

---
 gibson2/envs/motion_planner_env.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index cf5b2e900..3a190c7e2 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -453,7 +453,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.new_potential = geodesic_dist
 
     def step(self, action):
-        print('-' * 20)
+        # print('-' * 20)
         # action[0] = base_or_arm
         # action[1] = base_subgoal_theta
         # action[2] = base_subgoal_dist
@@ -464,7 +464,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.current_step += 1
         subgoal_success = True
         use_base = action[0] > 0.0
-        use_base = True
         if use_base:
             # print('base')
             # use base
@@ -477,7 +476,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             base_subgoal_pos *= base_subgoal_dist
             base_subgoal_pos = np.append(base_subgoal_pos, 0.0)
             base_subgoal_pos += robot_pos
-            print('base_subgoal_pos', base_subgoal_pos)
+            # print('base_subgoal_pos', base_subgoal_pos)
 
             self.base_marker.set_position(base_subgoal_pos)
 
@@ -488,7 +487,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
             path = self.plan_base_motion_2d(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
             if path is not None:
-                print('base mp success')
+                # print('base mp success')
                 if self.eval:
                     for way_point in path:
                         set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
@@ -496,7 +495,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 else:
                     set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
             else:
-                print('base mp failure')
+                # print('base mp failure')
                 subgoal_success = False
                 set_base_values(self.robot_id, original_pos)
 
@@ -578,7 +577,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         if subgoal_success:
             reward, info = self.get_reward([], action, info)
         else:
-            reward = -0.0
+            # failed subgoal penalty
+            reward = 0.0
         done, info = self.get_termination([], info)
 
         if self.arena == 'button':
@@ -589,6 +589,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 self.door_open = True
                 self.button_marker.set_position([100.0, 100.0, 0.0])
                 self.door.set_position([100.0, 100.0, 0.0])
+
+                # button pressed reward
                 reward += 5.0
 
         if not use_base:
@@ -606,10 +608,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             state = self.reset()
         del state['pc']
 
-        info['start_conf'] = original_pos
-        info['path'] = path
+        # info['start_conf'] = original_pos
+        # info['path'] = path
 
-        print('reward', reward)
+        # print('reward', reward)
+        # time.sleep(3)
         return state, reward, done, info
 
     def reset_initial_and_target_pos(self):
@@ -623,7 +626,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.pi), 'wxyz'))
             self.button_marker_pos = [
                 np.random.uniform(-3.0, -0.5),
-                np.random.uniform(-1.0, 1.0),
+                np.random.uniform(-1.25, 1.25),
                 1.5
             ]
             self.button_marker.set_position(self.button_marker_pos)

From 487e3339b45f04f6ed90dff2b1f57d01bbfeb6bd Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Sun, 19 Jan 2020 15:35:23 -0800
Subject: [PATCH 21/58] test pushing

---
 gibson2/envs/motion_planner_env.py       | 79 +++++++++++++++++-------
 gibson2/external/pybullet_tools/utils.py | 28 +++++++++
 2 files changed, 83 insertions(+), 24 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 4315fa45f..679fe82ff 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -25,7 +25,7 @@ from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_na
     joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
     get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, \
     set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values, \
-    plan_base_motion_2d, get_sample_fn
+    plan_base_motion_2d, get_sample_fn, add_p2p_constraint, remove_constraint
 
 class MotionPlanningEnv(NavigateRandomEnv):
     def __init__(self,
@@ -565,8 +565,10 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
             n_attempt = 0
             max_attempt = 200
-            sample_fn = get_sample_fn(nav_env.robot_id, arm_joints)
+            sample_fn = get_sample_fn(self.robot_id, arm_joints)
 
+            if self.mode == 'gui':
+                p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False)
             while n_attempt < max_attempt: # find collision free ik solution
 
                 set_joint_positions(self.robot_id, arm_joints, sample_fn())
@@ -603,6 +605,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 if collision_free and close_enough:
                     break
 
+            if self.mode == 'gui':
+                p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
+
             if n_attempt == max_attempt:
                 ik_success = False
             else:
@@ -610,8 +615,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
             if ik_success:
                 set_joint_positions(self.robot_id, arm_joints, self.arm_default_joint_positions)
-                arm_path = plan_joint_motion(self.robot_id, arm_joints, joint_positions, disabled_collisions=set(),
-                                     self_collisions=False)
+                arm_path = plan_joint_motion(self.robot_id, arm_joints, joint_positions,
+                                             disabled_collisions=set(), self_collisions=False)
 
                 if arm_path is not None:
                     arm_mp_success = True
@@ -632,27 +637,53 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 else:
                     set_joint_positions(self.robot_id, arm_joints, joint_positions) # set to last position in training
 
-            ##push
-            # push_vector = np.array([0,0.2,0])
-            # if arm_mp_success and ik_success:
-            #     for i in range(100):
-            #         push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
-            #
-            #         joint_positions = p.calculateInverseKinematics(self.robot_id,
-            #                                                        self.robots[0].parts['gripper_link'].body_part_index,
-            #                                                        push_goal,
-            #                                                        lowerLimits=min_limits,
-            #                                                        upperLimits=max_limits,
-            #                                                        jointRanges=joint_range,
-            #                                                        restPoses=rest_position,
-            #                                                        jointDamping=joint_damping,
-            #                                                        solver=p.IK_DLS,
-            #                                                        maxNumIterations=100)[2:10]
-            #
-            #         set_joint_positions(self.robot_id, arm_joints, joint_positions)
-            #         if eval:
-            #             time.sleep(0.02) # for visualization
+            ## push
+            # choose which body to push
+            focus = None
 
+            if arm_mp_success and ik_success:
+                points = []
+                for i in range(p.getNumBodies()):
+                    if i != self.robot_id and i != self.mesh_id:
+                        points.extend(p.getClosestPoints(self.robot_id, i, distance=0.01,
+                                                         linkIndexA=self.robots[0].parts['gripper_link'].body_part_index))
+
+                dist = 1e4
+                for point in points:
+                    if point[8] < dist:
+                        dist = point[9]
+                        #if not focus is None and not (focus[2] == point[2] and focus[4] == point[4]):
+                        # p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 1, 1, 1])
+                        focus = point
+                        #p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 0, 0, 1])
+
+                print(focus)
+
+                if not focus is None:
+                    c = add_p2p_constraint(focus[2], focus[4], self.robot_id, self.robots[0].parts['gripper_link'].body_part_index,
+                                           max_force=50)
+
+                push_vector = np.array([0.3,0.3,0])
+                if arm_mp_success and ik_success:
+                    for i in range(100):
+                        push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
+
+                        joint_positions = p.calculateInverseKinematics(self.robot_id,
+                                                                       self.robots[0].parts['gripper_link'].body_part_index,
+                                                                       push_goal,
+                                                                       lowerLimits=min_limits,
+                                                                       upperLimits=max_limits,
+                                                                       jointRanges=joint_range,
+                                                                       restPoses=rest_position,
+                                                                       jointDamping=joint_damping,
+                                                                       solver=p.IK_DLS,
+                                                                       maxNumIterations=100)[2:10]
+
+                        set_joint_positions(self.robot_id, arm_joints, joint_positions)
+                        if eval:
+                            time.sleep(0.02) # for visualization
+                if not focus is None:
+                    remove_constraint(c)
 
         ###### reward computation ######
         if use_base:
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index 18068928c..e4160b90d 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -2870,6 +2870,34 @@ def get_constraints():
     return [p.getConstraintUniqueId(i, physicsClientId=CLIENT)
             for i in range(p.getNumConstraints(physicsClientId=CLIENT))]
 
+def add_p2p_constraint(body, body_link, robot, robot_link, max_force=None):
+    if body_link == -1:
+        body_pose = get_pose(body)
+    else:
+        body_pose = get_com_pose(body, link=body_link)
+    #end_effector_pose = get_link_pose(robot, robot_link)
+    end_effector_pose = get_com_pose(robot, robot_link)
+    grasp_pose = multiply(invert(end_effector_pose), body_pose)
+    point, quat = grasp_pose
+    # TODO: can I do this when I'm not adjacent?
+    # joint axis in local frame (ignored for JOINT_FIXED)
+    #return p.createConstraint(robot, robot_link, body, body_link,
+    #                          p.JOINT_FIXED, jointAxis=unit_point(),
+    #                          parentFramePosition=unit_point(),
+    #                          childFramePosition=point,
+    #                          parentFrameOrientation=unit_quat(),
+    #                          childFrameOrientation=quat)
+    constraint = p.createConstraint(robot, robot_link, body, body_link,  # Both seem to work
+                                    p.JOINT_POINT2POINT, jointAxis=unit_point(),
+                                    parentFramePosition=point,
+                                    childFramePosition=unit_point(),
+                                    parentFrameOrientation=quat,
+                                    childFrameOrientation=unit_quat(),
+                                    physicsClientId=CLIENT)
+    if max_force is not None:
+        p.changeConstraint(constraint, maxForce=max_force, physicsClientId=CLIENT)
+    return constraint
+
 def remove_constraint(constraint):
     p.removeConstraint(constraint, physicsClientId=CLIENT)
 

From 81f636830af47fdc0c6c242bac98a0532949bf77 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Sun, 19 Jan 2020 16:13:19 -0800
Subject: [PATCH 22/58] when loading stop vis

---
 gibson2/core/simulator.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/gibson2/core/simulator.py b/gibson2/core/simulator.py
index 61f4a34e6..29947f128 100644
--- a/gibson2/core/simulator.py
+++ b/gibson2/core/simulator.py
@@ -100,6 +100,15 @@ class Simulator:
         self.scene = None
         self.objects = []
 
+    def load_without_pybullet_vis(load_func):
+        def wrapped_load_func(*args, **kwargs):
+            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False)
+            res = load_func(*args, **kwargs)
+            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
+            return res
+        return wrapped_load_func
+
+    @load_without_pybullet_vis
     def import_scene(self, scene, texture_scale=1.0, load_texture=True, class_id=0):
 
         """
@@ -152,6 +161,7 @@ class Simulator:
         self.scene = scene
         return new_objects
 
+    @load_without_pybullet_vis
     def import_object(self, object, class_id=0):
         """
         :param object: Object to load
@@ -212,6 +222,7 @@ class Simulator:
 
         return new_object
 
+    @load_without_pybullet_vis
     def import_robot(self, robot, class_id=0):
         """
         Import a robot into Simulator
@@ -295,6 +306,7 @@ class Simulator:
 
         return ids
 
+    @load_without_pybullet_vis
     def import_interactive_object(self, obj, class_id=0):
         """
         Import articulated objects into simulator

From c72a544fac91239a6c776d5d204fae25e019a667 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Sun, 19 Jan 2020 16:17:20 -0800
Subject: [PATCH 23/58] minor clean up; fix link position

---
 gibson2/core/physics/robot_bases.py      |   2 +-
 gibson2/envs/motion_planner_env.py       | 121 +++++++++++------------
 gibson2/external/pybullet_tools/utils.py |   5 +-
 3 files changed, 63 insertions(+), 65 deletions(-)

diff --git a/gibson2/core/physics/robot_bases.py b/gibson2/core/physics/robot_bases.py
index b4145c6e3..85a68ab26 100644
--- a/gibson2/core/physics/robot_bases.py
+++ b/gibson2/core/physics/robot_bases.py
@@ -253,7 +253,7 @@ class BodyPart:
         if link_id == -1:
             (x, y, z), (a, b, c, d) = p.getBasePositionAndOrientation(body_id)
         else:
-            (x, y, z), (a, b, c, d), _, _, _, _ = p.getLinkState(body_id, link_id)
+            _, _, _, _, (x, y, z), (a, b, c, d) = p.getLinkState(body_id, link_id)
         x, y, z = x * self.scale, y * self.scale, z * self.scale
         return np.array([x, y, z, a, b, c, d])
 
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 261e58e02..8655d5a1c 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -27,6 +27,7 @@ from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_na
     set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values, \
     plan_base_motion_2d, get_sample_fn
 
+
 class MotionPlanningEnv(NavigateRandomEnv):
     def __init__(self,
                  config_file,
@@ -122,7 +123,7 @@ class MotionPlanningEnv(NavigateRandomEnv):
             else:
                 for bq in path:
                     set_base_values(self.robot_id, [bq[0], bq[1], bq[2]])
-                    time.sleep(0.02) # for visualization
+                    time.sleep(0.02)  # for visualization
             state, _, done, info = super(MotionPlanningEnv, self).step([0, 0])
             self.get_additional_states()
             reward = org_potential - self.get_potential()
@@ -243,15 +244,17 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.arm_marker.load()
 
         self.arm_default_joint_positions = (0.38548146667743244, 1.1522793897208579,
-                                           1.2576467971105596, -0.312703569911879,
-                                           1.7404867100093226, -0.0962895617312548,
-                                           -1.4418232619629425, -1.6780152866247762)
+                                            1.2576467971105596, -0.312703569911879,
+                                            1.7404867100093226, -0.0962895617312548,
+                                            -1.4418232619629425, -1.6780152866247762)
+        self.arm_subgoal_threshold = 0.05
 
         if self.arena == 'button':
             self.button_marker = VisualMarker(visual_shape=p.GEOM_SPHERE,
                                               rgba_color=[0, 1, 0, 1],
                                               radius=0.3)
             self.simulator.import_object(self.button_marker, class_id=255)
+            self.button_marker_threshold = 0.5
 
             self.door = InteractiveObj(
                 os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
@@ -277,7 +280,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.grid_resolution = 400
         self.occupancy_range = 8.0  # m
         self.robot_footprint_radius = 0.279
-        self.robot_footprint_radius_in_map = int(self.robot_footprint_radius / self.occupancy_range * self.grid_resolution)
+        self.robot_footprint_radius_in_map = int(
+            self.robot_footprint_radius / self.occupancy_range * self.grid_resolution)
 
     def plan_base_motion(self, x, y, theta):
         half_size = self.map_size / 2.0
@@ -296,7 +300,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         grid = self.get_local_occupancy_grid()
         path = plan_base_motion_2d(self.robot_id, [x, y, theta], ((-half_size, -half_size), (half_size, half_size)),
-                                   map_2d=grid, occupancy_range=self.occupancy_range, grid_resolution=self.grid_resolution,
+                                   map_2d=grid, occupancy_range=self.occupancy_range,
+                                   grid_resolution=self.grid_resolution,
                                    robot_footprint_radius_in_map=self.robot_footprint_radius_in_map, obstacles=[])
         # if self.mode == 'gui':
         #     p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True)
@@ -349,10 +354,12 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # flip y axis
         scan_local[:, 1] *= -1
         occupancy_grid = np.zeros((self.grid_resolution, self.grid_resolution)).astype(np.uint8)
-        scan_local_in_map = scan_local / (self.occupancy_range / 2) * (self.grid_resolution / 2) + (self.grid_resolution / 2)
+        scan_local_in_map = scan_local / (self.occupancy_range / 2) * (self.grid_resolution / 2) + (
+                    self.grid_resolution / 2)
         scan_local_in_map = scan_local_in_map.reshape((1, -1, 1, 2)).astype(np.int32)
         cv2.fillPoly(occupancy_grid, scan_local_in_map, True, 1)
-        cv2.circle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2), int(self.robot_footprint_radius_in_map), 1, -1)
+        cv2.circle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2),
+                   int(self.robot_footprint_radius_in_map), 1, -1)
         # cv2.imwrite('occupancy_grid.png', occupancy_grid)
         # embed()
         # assert False
@@ -420,8 +427,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         additional_states = np.concatenate((waypoints_local_xy,
                                             target_pos_local[:2]))
-                                            # linear_velocity_local,
-                                            # angular_velocity_local))
+        # linear_velocity_local,
+        # angular_velocity_local))
         # cache results for reward calculation
         self.new_potential = geodesic_dist
         assert len(additional_states) == self.additional_states_dim, \
@@ -459,7 +466,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.new_potential = geodesic_dist
 
     def step(self, action):
-        # print('-' * 20)
+        # print('-' * 30)
         # action[0] = base_or_arm
         # action[1] = base_subgoal_theta
         # action[2] = base_subgoal_dist
@@ -468,7 +475,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # action[5] = arm_img_v
 
         self.current_step += 1
-        subgoal_success = True
         use_base = action[0] > 0.0
         if use_base:
             # print('base')
@@ -492,7 +498,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             original_pos = get_base_values(self.robot_id)
 
             path = self.plan_base_motion_2d(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
-            if path is not None:
+            subgoal_success = path is not None
+            if subgoal_success:
                 # print('base mp success')
                 if self.eval:
                     for way_point in path:
@@ -502,7 +509,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                     set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
             else:
                 # print('base mp failure')
-                subgoal_success = False
                 set_base_values(self.robot_id, original_pos)
 
             # is_base_subgoal_valid = self.scene.has_node(self.floor_num, base_subgoal_pos[:2])
@@ -564,13 +570,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
             n_attempt = 0
             max_attempt = 200
-            sample_fn = get_sample_fn(nav_env.robot_id, arm_joints)
-
-            while n_attempt < max_attempt: # find collision free ik solution
+            sample_fn = get_sample_fn(self.robot_id, arm_joints)
 
+            while n_attempt < max_attempt:  # find self-collision-free ik solution
                 set_joint_positions(self.robot_id, arm_joints, sample_fn())
-
-                joint_positions = p.calculateInverseKinematics(self.robot_id,
+                subgoal_joint_positions = p.calculateInverseKinematics(self.robot_id,
                                                                self.robots[0].parts['gripper_link'].body_part_index,
                                                                arm_subgoal,
                                                                lowerLimits=min_limits,
@@ -580,60 +584,53 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                                jointDamping=joint_damping,
                                                                solver=p.IK_DLS,
                                                                maxNumIterations=100)[2:10]
+                set_joint_positions(self.robot_id, arm_joints, subgoal_joint_positions)
+
+                dist = l2_distance(self.robots[0].get_end_effector_position(), arm_subgoal)
+                if dist < self.arm_subgoal_threshold:
+                    # print('arm_subgoal_dist', dist)
+                    collision_free = True
+                    num_joints = p.getNumJoints(self.robot_id)
+                    for arm_link in arm_joints:
+                        for other_link in range(num_joints):
+                            contact_pts = p.getContactPoints(self.robot_id, self.robot_id, arm_link, other_link)
+                            # print(contact_pts)
+                            if len(contact_pts) > 0:
+                                collision_free = False
+                                break
+                        if not collision_free:
+                            break
+                    if collision_free:
+                        break
                 n_attempt += 1
-                #print(n_attempt)
 
-                set_joint_positions(self.robot_id, arm_joints, joint_positions)
-
-                end_effector_link_state = p.getLinkState(self.robot_id, self.robots[0].parts['gripper_link'].body_part_index)
-                end_effector_position = end_effector_link_state[4]
-                diff = [arm_subgoal[0] - end_effector_position[0], arm_subgoal[1] - end_effector_position[1], arm_subgoal[2] - end_effector_position[2]]
-                ik_threshold = 0.05
-                dist = np.linalg.norm(np.array(diff))
-                close_enough = (dist < ik_threshold)
-
-                collision_free = True
-                for arm_link in arm_joints:
-                    for other_link in range(p.getNumJoints(self.robot_id)):
-                        contact_pts = p.getContactPoints(self.robot_id, self.robot_id, arm_link, other_link)
-                        #print(contact_pts)
-                        if len(contact_pts) > 0:
-                            collision_free = False
-                if collision_free and close_enough:
-                    break
-
-            if n_attempt == max_attempt:
-                ik_success = False
-            else:
-                ik_success = True
+            ik_success = n_attempt != max_attempt
 
             if ik_success:
                 set_joint_positions(self.robot_id, arm_joints, self.arm_default_joint_positions)
-                arm_path = plan_joint_motion(self.robot_id, arm_joints, joint_positions, disabled_collisions=set(),
-                                     self_collisions=False)
-
-                if arm_path is not None:
-                    arm_mp_success = True
-                else:
-                    arm_mp_success = False
+                arm_path = plan_joint_motion(self.robot_id, arm_joints, subgoal_joint_positions,
+                                             disabled_collisions=set(),
+                                             self_collisions=False)
+                subgoal_success = arm_path is not None
             else:
-                arm_mp_success = False
-
-            print('ik_success', ik_success)
-            print('arm_mp_success', arm_mp_success)
+                subgoal_success = False
 
             set_joint_positions(self.robot_id, arm_joints, self.arm_default_joint_positions)
-            if arm_mp_success and ik_success:
+
+            # print('ik_success', ik_success)
+            # print('arm_mp_success', subgoal_success)
+
+            if subgoal_success:
                 if self.eval:
                     for joint_way_point in arm_path:
                         set_joint_positions(self.robot_id, arm_joints, joint_way_point)
-                        time.sleep(0.02) # animation
+                        time.sleep(0.02)  # animation
                 else:
-                    set_joint_positions(self.robot_id, arm_joints, joint_positions) # set to last position in training
+                    set_joint_positions(self.robot_id, arm_joints, subgoal_joint_positions)  # set to last position in training
 
             ##push
             # push_vector = np.array([0,0.2,0])
-            # if arm_mp_success and ik_success:
+            # if subgoal_success:
             #     for i in range(100):
             #         push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
             #
@@ -652,7 +649,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             #         if eval:
             #             time.sleep(0.02) # for visualization
 
-
         ###### reward computation ######
         if use_base:
             # trigger re-computation of geodesic distance for get_reward
@@ -667,20 +663,21 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         done, info = self.get_termination([], info)
 
         if self.arena == 'button':
-            if not self.door_open and \
-                    l2_distance(self.robots[0].get_end_effector_position(), self.button_marker_pos) < 0.5:
+            dist = l2_distance(self.robots[0].get_end_effector_position(), self.button_marker_pos)
+            # print('button_marker_dist', dist)
+            if not self.door_open and dist < self.button_marker_threshold:
                 # touch the button -> remove the button and the door
                 print("OPEN DOOR")
                 self.door_open = True
                 self.button_marker.set_position([100.0, 100.0, 0.0])
                 self.door.set_position([100.0, 100.0, 0.0])
+                self.simulator.sync()
 
                 # button pressed reward
                 reward += 5.0
 
         if not use_base:
             # arm reset
-
             set_joint_positions(self.robot_id, arm_joints, self.arm_default_joint_positions)
             # need to call get_state again after arm reset (camera height could be different if torso moves)
             # TODO: should only call get_state once or twice (e.g. disable torso movement, or cache get_state result)
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index a24e5f9eb..c44a575cf 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -2462,8 +2462,9 @@ def get_collision_fn(body, joints, obstacles, attachments, self_collisions, disa
     # TODO: test self collision with the holding
     def collision_fn(q):
         if not all_between(lower_limits, q, upper_limits):
-            print(lower_limits, q, upper_limits)
-            print('Joint limits violated')
+            pass
+            # print(lower_limits, q, upper_limits)
+            # print('Joint limits violated')
             #return True
         set_joint_positions(body, joints, q)
         for attachment in attachments:

From af1c543c085bcf910410b95fba43376a5c01f5d3 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Sun, 19 Jan 2020 19:41:26 -0800
Subject: [PATCH 24/58] door env (wip)

---
 gibson2/envs/motion_planner_env.py | 168 ++++++++++++++++++-----------
 1 file changed, 103 insertions(+), 65 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 077068808..4c093e55e 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -251,12 +251,15 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         self.push_dist_threshold = 0.01
 
+        self.failed_subgoal_penalty = -0.0
+
         if self.arena == 'button':
             self.button_marker = VisualMarker(visual_shape=p.GEOM_SPHERE,
                                               rgba_color=[0, 1, 0, 1],
                                               radius=0.3)
             self.simulator.import_object(self.button_marker, class_id=255)
-            self.button_marker_threshold = 0.5
+            self.button_threshold = 0.5
+            self.button_reward = 5.0
 
             self.door = InteractiveObj(
                 os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
@@ -273,6 +276,26 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 self.simulator.import_object(wall, class_id=3)
                 wall.set_position_rotation(wall_pose[0], wall_pose[1])
                 self.walls += [wall]
+        elif self.arena == 'push_door':
+            self.door = InteractiveObj(
+                os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
+                scale=1.0)
+            self.simulator.import_interactive_object(self.door, class_id=2)
+            self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
+            self.door_axis_link_id = 1
+
+            # self.wall_poses = [
+            #     [[-3.5, 7.5, 1], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')],
+            #     [[-3.5, -7.5, 1], quatToXYZW(euler2quat(0, 0, -np.pi / 2.0), 'wxyz')],
+            # ]
+            # self.walls = []
+            # for wall_pose in self.wall_poses:
+            #     wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'),
+            #                           scale=1)
+            #     self.simulator.import_object(wall, class_id=3)
+            #     wall.set_position_rotation(wall_pose[0], wall_pose[1])
+            #     self.walls += [wall]
+
 
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
@@ -468,6 +491,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.new_potential = geodesic_dist
 
     def step(self, action):
+        embed()
         # print('-' * 30)
         # action[0] = base_or_arm
         # action[1] = base_subgoal_theta
@@ -629,60 +653,68 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                         set_joint_positions(self.robot_id, arm_joints, joint_way_point)
                         time.sleep(0.02)  # animation
                 else:
-                    set_joint_positions(self.robot_id, arm_joints,
-                                        subgoal_joint_positions)  # set to last position in training
+                    set_joint_positions(self.robot_id, arm_joints, subgoal_joint_positions)
+
+                embed()
 
                 ## push
                 # TODO: figure out push dist threshold (i.e. can push object x centimeters away from the gripper)
                 # TODO: whitelist object ids that can be pushed
 
-                # # find the closest object
-                # focus = None
-                # points = []
-                # pushable_obj_ids = [self.door.body_id]
-                # for i in pushable_obj_ids:
-                #         points.extend(
-                #             p.getClosestPoints(self.robot_id, i, distance=self.push_dist_threshold,
-                #                                linkIndexA=self.robots[0].parts['gripper_link'].body_part_index))
-                # dist = 1e4
-                # for point in points:
-                #     if point[8] < dist:
-                #         dist = point[8]
-                #         #if not focus is None and not (focus[2] == point[2] and focus[4] == point[4]):
-                #         # p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 1, 1, 1])
-                #         focus = point
-                #         #p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 0, 0, 1])
-                #
-                # # print(focus)
-                #
-                # if focus is not None:
-                #     c = add_p2p_constraint(focus[2],
-                #                            focus[4],
-                #                            self.robot_id,
-                #                            self.robots[0].parts['gripper_link'].body_part_index,
-                #                            max_force=50)
-                #
-                # push_vector = np.array([0.3, 0.3, 0])
-                #
-                # for i in range(100):
-                #     push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
-                #
-                #     joint_positions = p.calculateInverseKinematics(self.robot_id,
-                #                                                    self.robots[0].parts['gripper_link'].body_part_index,
-                #                                                    push_goal,
-                #                                                    lowerLimits=min_limits,
-                #                                                    upperLimits=max_limits,
-                #                                                    jointRanges=joint_range,
-                #                                                    restPoses=rest_position,
-                #                                                    jointDamping=joint_damping,
-                #                                                    solver=p.IK_DLS,
-                #                                                    maxNumIterations=100)[2:10]
-                #
-                #     set_joint_positions(self.robot_id, arm_joints, joint_positions)
-                #     if eval:
-                #         time.sleep(0.02)  # for visualization
-                # if focus is not None:
-                #     remove_constraint(c)
+                # find the closest object
+                focus = None
+                points = []
+                pushable_obj_ids = [self.door.body_id]
+                for i in pushable_obj_ids:
+                        points.extend(
+                            p.getClosestPoints(self.robot_id, i, distance=self.push_dist_threshold,
+                                               linkIndexA=self.robots[0].parts['gripper_link'].body_part_index))
+                dist = 1e4
+                for point in points:
+                    if point[8] < dist:
+                        dist = point[8]
+                        #if not focus is None and not (focus[2] == point[2] and focus[4] == point[4]):
+                        # p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 1, 1, 1])
+                        focus = point
+                        #p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 0, 0, 1])
+
+                # print(focus)
+
+                if focus is not None:
+                    c = add_p2p_constraint(focus[2],
+                                           focus[4],
+                                           self.robot_id,
+                                           self.robots[0].parts['gripper_link'].body_part_index,
+                                           max_force=5000)
+
+                push_vector = np.array([-0.5, 0.0, 0])
+
+                base_pose = get_base_values(self.robot_id)
+                for i in range(100):
+                    push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
+
+                    joint_positions = p.calculateInverseKinematics(self.robot_id,
+                                                                   self.robots[0].parts['gripper_link'].body_part_index,
+                                                                   push_goal,
+                                                                   lowerLimits=min_limits,
+                                                                   upperLimits=max_limits,
+                                                                   jointRanges=joint_range,
+                                                                   restPoses=rest_position,
+                                                                   jointDamping=joint_damping,
+                                                                   solver=p.IK_DLS,
+                                                                   maxNumIterations=100)[2:10]
+                    set_base_values(self.robot_id, base_pose)
+
+                    set_joint_positions(self.robot_id, arm_joints, joint_positions)
+                    self.simulator.set_timestep(0.002)
+                    self.simulator_step()
+                    self.simulator.set_timestep(1e-8)
+                    if eval:
+                        time.sleep(0.02)  # for visualization
+                if focus is not None:
+                    remove_constraint(c)
+
+                set_base_values(self.robot_id, base_pose)
 
         ###### reward computation ######
         if use_base:
@@ -694,22 +726,20 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             reward, info = self.get_reward([], action, info)
         else:
             # failed subgoal penalty
-            reward = 0.0
+            reward = self.failed_subgoal_penalty
         done, info = self.get_termination([], info)
 
         if self.arena == 'button':
             dist = l2_distance(self.robots[0].get_end_effector_position(), self.button_marker_pos)
             # print('button_marker_dist', dist)
-            if not self.door_open and dist < self.button_marker_threshold:
+            if not self.door_open and dist < self.button_threshold:
                 # touch the button -> remove the button and the door
                 print("OPEN DOOR")
                 self.door_open = True
                 self.button_marker.set_position([100.0, 100.0, 0.0])
                 self.door.set_position([100.0, 100.0, 0.0])
-                self.simulator.sync()
 
-                # button pressed reward
-                reward += 5.0
+                reward += self.button_reward
 
         if not use_base:
             # arm reset
@@ -727,24 +757,32 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             info['path'] = path
         # print('reward', reward)
         # time.sleep(3)
+
+        self.simulator.sync()
+
         return state, reward, done, info
 
     def reset_initial_and_target_pos(self):
-        if self.arena == 'button':
+        if self.arena in ['button', 'push_door']:
             floor_height = self.scene.get_floor_height(self.floor_num)
-            self.initial_pos = np.array([1.2, 0.0, floor_height])
+            self.initial_pos = np.array([-3.0, 0.0, floor_height])
             self.target_pos = np.array([-5.0, 0.0, floor_height])
             self.robots[0].set_position(pos=[self.initial_pos[0],
                                              self.initial_pos[1],
                                              self.initial_pos[2] + self.random_init_z_offset])
             self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.pi), 'wxyz'))
-            self.button_marker_pos = [
-                np.random.uniform(-3.0, -0.5),
-                np.random.uniform(-1.25, 1.25),
-                1.5
-            ]
-            self.button_marker.set_position(self.button_marker_pos)
-            self.door.set_position_rotation([-3.5, 0, 0.0], [0, 0, np.sqrt(0.5), np.sqrt(0.5)])
+
+            if self.arena == 'button':
+                self.button_marker_pos = [
+                    np.random.uniform(-3.0, -0.5),
+                    np.random.uniform(-1.25, 1.25),
+                    1.5
+                ]
+                self.button_marker.set_position(self.button_marker_pos)
+                self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
+            elif self.arena == 'push_door':
+                p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=0.0, targetVelocity=0.0)
+
         else:
             super(MotionPlanningBaseArmEnv, self).reset_initial_and_target_pos()
 
@@ -776,7 +814,7 @@ if __name__ == '__main__':
                                        action_timestep=1.0 / 1000000.0,
                                        physics_timestep=1.0 / 1000000.0,
                                        eval=args.mode == 'gui',
-                                       arena='button',
+                                       arena='push_door',
                                        )
 
     for episode in range(100):

From aa6a8b1c1be6dc2059b68f6016ceb444a49ae4f5 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Sun, 19 Jan 2020 21:44:57 -0800
Subject: [PATCH 25/58] door opening hack

---
 gibson2/core/physics/interactive_objects.py | 10 ++--
 gibson2/envs/motion_planner_env.py          | 52 ++++++++++-----------
 2 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/gibson2/core/physics/interactive_objects.py b/gibson2/core/physics/interactive_objects.py
index ef5134132..69c9cd232 100644
--- a/gibson2/core/physics/interactive_objects.py
+++ b/gibson2/core/physics/interactive_objects.py
@@ -158,10 +158,11 @@ class VisualMarker(object):
 
 
 class BoxShape(object):
-    def __init__(self, pos=[1, 2, 3], dim=[1, 2, 3]):
+    def __init__(self, pos=[1, 2, 3], dim=[1, 2, 3], visual_only = False):
         self.basePos = pos
         self.dimension = dim
         self.body_id = None
+        self.visual_only = visual_only
 
     def load(self):
         mass = 1000
@@ -169,8 +170,11 @@ class BoxShape(object):
 
         colBoxId = p.createCollisionShape(p.GEOM_BOX, halfExtents=self.dimension)
         visualShapeId = p.createVisualShape(p.GEOM_BOX, halfExtents=self.dimension)
-
-        self.body_id = p.createMultiBody(baseMass=mass,
+        if self.visual_only:
+            self.body_id = p.createMultiBody(baseCollisionShapeIndex=-1,
+                                             baseVisualShapeIndex=visualShapeId)
+        else:
+            self.body_id = p.createMultiBody(baseMass=mass,
                                          baseCollisionShapeIndex=colBoxId,
                                          baseVisualShapeIndex=visualShapeId)
 
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 4c093e55e..d6d2cdade 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -273,8 +273,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             for wall_pose in self.wall_poses:
                 wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'),
                                       scale=1)
-                self.simulator.import_object(wall, class_id=3)
+                self.simulator.import_interactive_object(wall, class_id=3)
                 wall.set_position_rotation(wall_pose[0], wall_pose[1])
+                self.simulator.sync()
                 self.walls += [wall]
         elif self.arena == 'push_door':
             self.door = InteractiveObj(
@@ -283,19 +284,20 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.simulator.import_interactive_object(self.door, class_id=2)
             self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
             self.door_axis_link_id = 1
+            # for i in range(p.getNumJoints(self.door.body_id)):
+            #    for j in range(p.getNumJoints(self.robot_id)):
+            #        #if j != self.robots[0].parts['gripper_link'].body_part_index:
+            #        p.setCollisionFilterPair(self.door.body_id, self.robot_id, i, j, 0) # disable collision for robot and door
 
-            # self.wall_poses = [
-            #     [[-3.5, 7.5, 1], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')],
-            #     [[-3.5, -7.5, 1], quatToXYZW(euler2quat(0, 0, -np.pi / 2.0), 'wxyz')],
-            # ]
-            # self.walls = []
-            # for wall_pose in self.wall_poses:
-            #     wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'),
-            #                           scale=1)
-            #     self.simulator.import_object(wall, class_id=3)
-            #     wall.set_position_rotation(wall_pose[0], wall_pose[1])
-            #     self.walls += [wall]
+            self.walls = []
+            wall = BoxShape([-3.5, 1, 0.7], [0.2, 0.35, 0.5], visual_only=True)
+            self.simulator.import_interactive_object(wall, class_id=3)
+            self.walls += [wall]
+            wall = BoxShape([-3.5, -1, 0.7], [0.2, 0.45, 0.5], visual_only=True)
+            self.simulator.import_interactive_object(wall, class_id=3)
+            self.walls += [wall]
 
+            self.simulator.sync()
 
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
@@ -491,7 +493,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.new_potential = geodesic_dist
 
     def step(self, action):
-        embed()
         # print('-' * 30)
         # action[0] = base_or_arm
         # action[1] = base_subgoal_theta
@@ -499,7 +500,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # action[3] = base_orn
         # action[4] = arm_img_u
         # action[5] = arm_img_v
-
+        
         self.current_step += 1
         use_base = action[0] > 0.0
         if use_base:
@@ -587,9 +588,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                             'forearm_roll_joint',
                                             'wrist_flex_joint',
                                             'wrist_roll_joint'])
-            max_limits = [0., 0.] + get_max_limits(self.robot_id, arm_joints) + [0.05, 0.05]
-            min_limits = [0., 0.] + get_min_limits(self.robot_id, arm_joints) + [0., 0.]
-            rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, arm_joints)) + [0.04, 0.04]
+            max_limits = [0., 0.] + get_max_limits(self.robot_id, arm_joints)
+            min_limits = [0., 0.] + get_min_limits(self.robot_id, arm_joints)
+            rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, arm_joints))
             joint_range = list(np.array(max_limits) - np.array(min_limits))
             joint_range = [item + 1 for item in joint_range]
             joint_damping = [0.1 for _ in joint_range]
@@ -655,8 +656,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 else:
                     set_joint_positions(self.robot_id, arm_joints, subgoal_joint_positions)
 
-                embed()
-
                 ## push
                 # TODO: figure out push dist threshold (i.e. can push object x centimeters away from the gripper)
                 # TODO: whitelist object ids that can be pushed
@@ -666,17 +665,17 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 points = []
                 pushable_obj_ids = [self.door.body_id]
                 for i in pushable_obj_ids:
-                        points.extend(
-                            p.getClosestPoints(self.robot_id, i, distance=self.push_dist_threshold,
-                                               linkIndexA=self.robots[0].parts['gripper_link'].body_part_index))
+                    points.extend(
+                        p.getClosestPoints(self.robot_id, i, distance=self.push_dist_threshold,
+                                           linkIndexA=self.robots[0].parts['gripper_link'].body_part_index))
                 dist = 1e4
                 for point in points:
                     if point[8] < dist:
                         dist = point[8]
-                        #if not focus is None and not (focus[2] == point[2] and focus[4] == point[4]):
+                        # if not focus is None and not (focus[2] == point[2] and focus[4] == point[4]):
                         # p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 1, 1, 1])
                         focus = point
-                        #p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 0, 0, 1])
+                        # p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 0, 0, 1])
 
                 # print(focus)
 
@@ -685,11 +684,12 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                            focus[4],
                                            self.robot_id,
                                            self.robots[0].parts['gripper_link'].body_part_index,
-                                           max_force=5000)
+                                           max_force=500)
 
                 push_vector = np.array([-0.5, 0.0, 0])
 
                 base_pose = get_base_values(self.robot_id)
+
                 for i in range(100):
                     push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
 
@@ -709,7 +709,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                     self.simulator.set_timestep(0.002)
                     self.simulator_step()
                     self.simulator.set_timestep(1e-8)
-                    if eval:
+                    if self.eval:
                         time.sleep(0.02)  # for visualization
                 if focus is not None:
                     remove_constraint(c)

From 48a31ffbfe802127d29480b37e5b7f66d52a24f4 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Sun, 19 Jan 2020 22:16:06 -0800
Subject: [PATCH 26/58] door opening without constraint

---
 gibson2/envs/motion_planner_env.py | 42 +++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index d6d2cdade..bbc6cd772 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -290,10 +290,10 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             #        p.setCollisionFilterPair(self.door.body_id, self.robot_id, i, j, 0) # disable collision for robot and door
 
             self.walls = []
-            wall = BoxShape([-3.5, 1, 0.7], [0.2, 0.35, 0.5], visual_only=True)
+            wall = BoxShape([-3.5, 1, 0.7], [0.2, 0.35, 0.7], visual_only=True)
             self.simulator.import_interactive_object(wall, class_id=3)
             self.walls += [wall]
-            wall = BoxShape([-3.5, -1, 0.7], [0.2, 0.45, 0.5], visual_only=True)
+            wall = BoxShape([-3.5, -1, 0.7], [0.2, 0.45, 0.7], visual_only=True)
             self.simulator.import_interactive_object(wall, class_id=3)
             self.walls += [wall]
 
@@ -500,7 +500,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # action[3] = base_orn
         # action[4] = arm_img_u
         # action[5] = arm_img_v
-        
+
         self.current_step += 1
         use_base = action[0] > 0.0
         if use_base:
@@ -596,7 +596,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             joint_damping = [0.1 for _ in joint_range]
 
             n_attempt = 0
-            max_attempt = 200
+            max_attempt = 2000
             sample_fn = get_sample_fn(self.robot_id, arm_joints)
 
             while n_attempt < max_attempt:  # find self-collision-free ik solution
@@ -619,6 +619,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                     # print('arm_subgoal_dist', dist)
                     collision_free = True
                     num_joints = p.getNumJoints(self.robot_id)
+                    # self collision
                     for arm_link in arm_joints:
                         for other_link in range(num_joints):
                             contact_pts = p.getContactPoints(self.robot_id, self.robot_id, arm_link, other_link)
@@ -628,6 +629,21 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                 break
                         if not collision_free:
                             break
+
+                    # # arm collision with door
+                    # if self.arena == 'push_door':
+                    #     for arm_link in arm_joints:
+                    #         for door_link in range(p.getNumJoints(self.door.body_id)):
+                    #             if arm_link != self.robots[0].parts['gripper_link'].body_part_index:
+                    #                 contact_pts = p.getContactPoints(self.robot_id, self.door.body_id, arm_link,
+                    #                                                  door_link)
+                    #                 if len(contact_pts) > 0:
+                    #                     print(arm_link, 'in collision with door')
+                    #                     collision_free = False
+                    #                     break
+                    #         if not collision_free:
+                    #             break
+
                     if collision_free:
                         break
                 n_attempt += 1
@@ -679,12 +695,12 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
                 # print(focus)
 
-                if focus is not None:
-                    c = add_p2p_constraint(focus[2],
-                                           focus[4],
-                                           self.robot_id,
-                                           self.robots[0].parts['gripper_link'].body_part_index,
-                                           max_force=500)
+                # if focus is not None:
+                #     c = add_p2p_constraint(focus[2],
+                #                            focus[4],
+                #                            self.robot_id,
+                #                            self.robots[0].parts['gripper_link'].body_part_index,
+                #                            max_force=500)
 
                 push_vector = np.array([-0.5, 0.0, 0])
 
@@ -711,8 +727,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                     self.simulator.set_timestep(1e-8)
                     if self.eval:
                         time.sleep(0.02)  # for visualization
-                if focus is not None:
-                    remove_constraint(c)
+                # if focus is not None:
+                #    remove_constraint(c)
 
                 set_base_values(self.robot_id, base_pose)
 
@@ -792,6 +808,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # embed()
         if self.arena == 'button':
             self.door_open = False
+
+        self.simulator.sync()
         return state
 
 

From 72842a3943cf23b48d98daeb85501ef81b8a20e4 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Sun, 19 Jan 2020 23:45:40 -0800
Subject: [PATCH 27/58] door pushing with physics simulation

---
 gibson2/envs/motion_planner_env.py       | 44 +++++++++++++-----------
 gibson2/external/pybullet_tools/utils.py |  4 +--
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index bbc6cd772..585b59711 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -577,6 +577,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
                                             quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
             arm_subgoal = transform_mat.dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
+            #arm_subgoal[0] += 0.15
             self.arm_marker.set_position(arm_subgoal)
 
             arm_joints = joints_from_names(self.robot_id,
@@ -596,9 +597,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             joint_damping = [0.1 for _ in joint_range]
 
             n_attempt = 0
-            max_attempt = 2000
+            max_attempt = 50
             sample_fn = get_sample_fn(self.robot_id, arm_joints)
-
+            base_pose = get_base_values(self.robot_id)
             while n_attempt < max_attempt:  # find self-collision-free ik solution
                 set_joint_positions(self.robot_id, arm_joints, sample_fn())
                 subgoal_joint_positions = p.calculateInverseKinematics(self.robot_id,
@@ -613,6 +614,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                                        solver=p.IK_DLS,
                                                                        maxNumIterations=100)[2:10]
                 set_joint_positions(self.robot_id, arm_joints, subgoal_joint_positions)
+                self.simulator_step()
+                set_base_values(self.robot_id, base_pose)
 
                 dist = l2_distance(self.robots[0].get_end_effector_position(), arm_subgoal)
                 if dist < self.arm_subgoal_threshold:
@@ -630,19 +633,19 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                         if not collision_free:
                             break
 
-                    # # arm collision with door
-                    # if self.arena == 'push_door':
-                    #     for arm_link in arm_joints:
-                    #         for door_link in range(p.getNumJoints(self.door.body_id)):
-                    #             if arm_link != self.robots[0].parts['gripper_link'].body_part_index:
-                    #                 contact_pts = p.getContactPoints(self.robot_id, self.door.body_id, arm_link,
-                    #                                                  door_link)
-                    #                 if len(contact_pts) > 0:
-                    #                     print(arm_link, 'in collision with door')
-                    #                     collision_free = False
-                    #                     break
-                    #         if not collision_free:
-                    #             break
+                    # arm collision with door
+                    if self.arena == 'push_door':
+                        for arm_link in arm_joints:
+                            for door_link in range(p.getNumJoints(self.door.body_id)):
+                                if arm_link != self.robots[0].parts['gripper_link'].body_part_index:
+                                    contact_pts = p.getContactPoints(self.robot_id, self.door.body_id, arm_link,
+                                                                     door_link)
+                                    if len(contact_pts) > 0:
+                                        print(arm_link, 'in collision with door')
+                                        collision_free = False
+                                        break
+                            if not collision_free:
+                                break
 
                     if collision_free:
                         break
@@ -703,8 +706,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 #                            max_force=500)
 
                 push_vector = np.array([-0.5, 0.0, 0])
-
-                base_pose = get_base_values(self.robot_id)
+                door_pos = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
+                p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=door_pos, targetVelocity=0.0)
 
                 for i in range(100):
                     push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
@@ -721,16 +724,17 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                                    maxNumIterations=100)[2:10]
                     set_base_values(self.robot_id, base_pose)
 
-                    set_joint_positions(self.robot_id, arm_joints, joint_positions)
+                    #set_joint_positions(self.robot_id, arm_joints, joint_positions)
+                    control_joints(self.robot_id, arm_joints, joint_positions)
                     self.simulator.set_timestep(0.002)
                     self.simulator_step()
-                    self.simulator.set_timestep(1e-8)
                     if self.eval:
                         time.sleep(0.02)  # for visualization
                 # if focus is not None:
                 #    remove_constraint(c)
 
-                set_base_values(self.robot_id, base_pose)
+            self.simulator.set_timestep(1e-8)
+            set_base_values(self.robot_id, base_pose)
 
         ###### reward computation ######
         if use_base:
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index 99e9ed15a..b8cb21cf9 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -3038,13 +3038,13 @@ def control_joints(body, joints, positions):
     # TODO: the whole PR2 seems to jitter
     #kp = 1.0
     #kv = 0.3
-    #forces = [get_max_force(body, joint) for joint in joints]
+    forces = [get_max_force(body, joint)*100 for joint in joints]
     #forces = [5000]*len(joints)
     #forces = [20000]*len(joints)
     return p.setJointMotorControlArray(body, joints, p.POSITION_CONTROL,
                                        targetPositions=positions,
                                        targetVelocities=[0.0] * len(joints),
-                                       physicsClientId=CLIENT) #,
+                                       physicsClientId=CLIENT, forces=forces) #,
                                         #positionGains=[kp] * len(joints),
                                         #velocityGains=[kv] * len(joints),)
                                         #forces=forces)

From fb90958b31bf8d3aaef95e9631d5d0a8255264a1 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Mon, 20 Jan 2020 15:33:48 -0800
Subject: [PATCH 28/58] clean up push_door env, cache traversable graph

---
 gibson2/core/physics/scene.py            |  43 +-
 gibson2/envs/locomotor_env.py            |   4 +
 gibson2/envs/motion_planner_env.py       | 669 +++++++++++++----------
 gibson2/external/pybullet_tools/utils.py |   2 +-
 4 files changed, 409 insertions(+), 309 deletions(-)

diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py
index c9e584e20..873ea21c4 100644
--- a/gibson2/core/physics/scene.py
+++ b/gibson2/core/physics/scene.py
@@ -11,6 +11,7 @@ from PIL import Image
 import cv2
 import networkx as nx
 from IPython import embed
+import pickle
 
 class Scene:
     def load(self):
@@ -121,6 +122,7 @@ class BuildingScene(Scene):
         self.num_waypoints = num_waypoints
         self.waypoint_interval = int(waypoint_resolution / trav_map_resolution)
         self.mesh_body_id = None
+
     def l2_distance(self, a, b):
         return np.linalg.norm(np.array(a) - np.array(b))
 
@@ -193,26 +195,35 @@ class BuildingScene(Scene):
                 trav_map[trav_map < 255] = 0
 
                 if self.build_graph:
-                    g = nx.Graph()
-                    for i in range(self.trav_map_size):
-                        for j in range(self.trav_map_size):
-                            if trav_map[i, j] > 0:
-                                g.add_node((i, j))
-                                # 8-connected graph
-                                neighbors = [(i - 1, j - 1), (i, j - 1), (i + 1, j - 1), (i - 1, j)]
-                                for n in neighbors:
-                                    if 0 <= n[0] < self.trav_map_size and 0 <= n[1] < self.trav_map_size and \
-                                            trav_map[n[0], n[1]] > 0:
-                                        g.add_edge(n, (i, j), weight=self.l2_distance(n, (i, j)))
+                    graph_file = os.path.join(get_model_path(self.model_id), 'floor_trav_{}.p'.format(f))
+                    if os.path.isfile(graph_file):
+                        print("load traversable graph")
+                        with open(graph_file, 'rb') as pfile:
+                            g = pickle.load(pfile)
+                    else:
+                        print("build traversable graph")
+                        g = nx.Graph()
+                        for i in range(self.trav_map_size):
+                            for j in range(self.trav_map_size):
+                                if trav_map[i, j] > 0:
+                                    g.add_node((i, j))
+                                    # 8-connected graph
+                                    neighbors = [(i - 1, j - 1), (i, j - 1), (i + 1, j - 1), (i - 1, j)]
+                                    for n in neighbors:
+                                        if 0 <= n[0] < self.trav_map_size and 0 <= n[1] < self.trav_map_size and \
+                                                trav_map[n[0], n[1]] > 0:
+                                            g.add_edge(n, (i, j), weight=self.l2_distance(n, (i, j)))
+
+                        # only take the largest connected component
+                        largest_cc = max(nx.connected_components(g), key=len)
+                        g = g.subgraph(largest_cc).copy()
+                        with open(graph_file, 'wb') as pfile:
+                            pickle.dump(g, pfile, protocol=pickle.HIGHEST_PROTOCOL)
 
-                    # only take the largest connected component
-                    largest_cc = max(nx.connected_components(g), key=len)
-                    g = g.subgraph(largest_cc).copy()
                     self.floor_graph.append(g)
-
                     # update trav_map accordingly
                     trav_map[:, :] = 0
-                    for node in largest_cc:
+                    for node in g.nodes:
                         trav_map[node[0], node[1]] = 255
 
                 self.floor_map.append(trav_map)
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 62ada37ce..fa82af9ba 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -667,6 +667,9 @@ class NavigateEnv(BaseEnv):
         collision_links_flatten = [item for sublist in collision_links for item in sublist]
         return len(collision_links_flatten) == 0
 
+    def before_reset_agent(self):
+        return
+
     def after_reset_agent(self):
         return
 
@@ -676,6 +679,7 @@ class NavigateEnv(BaseEnv):
         """
 
         self.current_episode += 1
+        self.before_reset_agent()
         self.reset_agent()
         self.after_reset_agent()
 
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 585b59711..f505b3642 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -1,6 +1,6 @@
 from gibson2.core.physics.interactive_objects import VisualMarker, InteractiveObj, BoxShape
 import gibson2
-from gibson2.utils.utils import parse_config, rotate_vector_3d, l2_distance, quatToXYZW
+from gibson2.utils.utils import parse_config, rotate_vector_3d, rotate_vector_2d, l2_distance, quatToXYZW
 from gibson2.envs.base_env import BaseEnv
 from transforms3d.euler import euler2quat
 from collections import OrderedDict
@@ -187,6 +187,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                        automatic_reset=automatic_reset,
                                                        random_height=False,
                                                        device_idx=device_idx)
+
+        # # real sensor spec for Fetch
         # resolution = self.config.get('resolution', 64)
         # width = resolution
         # height = int(width * (480.0 / 640.0))
@@ -216,19 +218,21 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         self.new_potential = None
         self.collision_reward_weight = collision_reward_weight
+
         # action[0] = base_or_arm
         # action[1] = base_subgoal_theta
         # action[2] = base_subgoal_dist
         # action[3] = base_orn
         # action[4] = arm_img_u
         # action[5] = arm_img_v
-        self.action_space = gym.spaces.Box(shape=(6,),
+        # action[6] = arm_push_vector_x
+        # action[7] = arm_push_vector_y
+        self.action_space = gym.spaces.Box(shape=(8,),
                                            low=-1.0,
                                            high=1.0,
                                            dtype=np.float32)
         self.prepare_motion_planner()
 
-        # visualization
         self.base_marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
                                         rgba_color=[1, 0, 0, 1],
                                         radius=0.1,
@@ -242,24 +246,30 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                        length=0.1,
                                        initial_offset=[0, 0, 0.1 / 2.0])
         self.arm_marker.load()
-
         self.arm_default_joint_positions = (0.38548146667743244, 1.1522793897208579,
                                             1.2576467971105596, -0.312703569911879,
                                             1.7404867100093226, -0.0962895617312548,
                                             -1.4418232619629425, -1.6780152866247762)
+        self.arm_joint_ids = joints_from_names(self.robot_id,
+                                               ['torso_lift_joint',
+                                                'shoulder_pan_joint',
+                                                'shoulder_lift_joint',
+                                                'upperarm_roll_joint',
+                                                'elbow_flex_joint',
+                                                'forearm_roll_joint',
+                                                'wrist_flex_joint',
+                                                'wrist_roll_joint'])
         self.arm_subgoal_threshold = 0.05
-
-        self.push_dist_threshold = 0.01
-
         self.failed_subgoal_penalty = -0.0
 
         if self.arena == 'button':
+            self.button_threshold = 0.5
+            self.button_reward = 5.0
+
             self.button_marker = VisualMarker(visual_shape=p.GEOM_SPHERE,
                                               rgba_color=[0, 1, 0, 1],
                                               radius=0.3)
             self.simulator.import_object(self.button_marker, class_id=255)
-            self.button_threshold = 0.5
-            self.button_reward = 5.0
 
             self.door = InteractiveObj(
                 os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
@@ -275,8 +285,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                       scale=1)
                 self.simulator.import_interactive_object(wall, class_id=3)
                 wall.set_position_rotation(wall_pose[0], wall_pose[1])
-                self.simulator.sync()
                 self.walls += [wall]
+
         elif self.arena == 'push_door':
             self.door = InteractiveObj(
                 os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
@@ -284,20 +294,19 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.simulator.import_interactive_object(self.door, class_id=2)
             self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
             self.door_axis_link_id = 1
-            # for i in range(p.getNumJoints(self.door.body_id)):
-            #    for j in range(p.getNumJoints(self.robot_id)):
-            #        #if j != self.robots[0].parts['gripper_link'].body_part_index:
-            #        p.setCollisionFilterPair(self.door.body_id, self.robot_id, i, j, 0) # disable collision for robot and door
 
+            self.wall_poses = [
+                [[-3.5, 0.45, 0.45], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')],
+                [[-3.5, -0.4, 0.45], quatToXYZW(euler2quat(0, 0, -np.pi / 2.0), 'wxyz')],
+            ]
             self.walls = []
-            wall = BoxShape([-3.5, 1, 0.7], [0.2, 0.35, 0.7], visual_only=True)
-            self.simulator.import_interactive_object(wall, class_id=3)
-            self.walls += [wall]
-            wall = BoxShape([-3.5, -1, 0.7], [0.2, 0.45, 0.7], visual_only=True)
-            self.simulator.import_interactive_object(wall, class_id=3)
-            self.walls += [wall]
-
-            self.simulator.sync()
+            for wall_pose in self.wall_poses:
+                wall = InteractiveObj(
+                    os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls_quarter.urdf'),
+                    scale=0.3)
+                self.simulator.import_interactive_object(wall, class_id=3)
+                wall.set_position_rotation(wall_pose[0], wall_pose[1])
+                self.walls += [wall]
 
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
@@ -306,9 +315,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         self.grid_resolution = 400
         self.occupancy_range = 8.0  # m
-        self.robot_footprint_radius = 0.279
-        self.robot_footprint_radius_in_map = int(
-            self.robot_footprint_radius / self.occupancy_range * self.grid_resolution)
+        robot_footprint_radius = 0.279
+        self.robot_footprint_radius_in_map = int(robot_footprint_radius / self.occupancy_range * self.grid_resolution)
 
     def plan_base_motion(self, x, y, theta):
         half_size = self.map_size / 2.0
@@ -338,8 +346,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return rotate_vector_3d(pos - cur_pos, *cur_rot)
 
     def get_local_occupancy_grid(self):
-        # assumes it has "scan" state
-        # assumes it is either Turtlebot or fetch
+        assert 'scan' in self.output
+        assert self.config['robot'] in ['Turtlebot', 'Fetch']
+
         if self.config['robot'] == 'Turtlebot':
             # Hokuyo URG-04LX-UG01
             laser_linear_range = 5.6
@@ -366,7 +375,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         scan = state['scan']
 
         scan_laser = unit_vector_laser * (scan * (laser_linear_range - min_laser_dist) + min_laser_dist)
-        # embed()
 
         laser_translation = laser_pose[:3]
         laser_rotation = quat2mat([laser_pose[6], laser_pose[3], laser_pose[4], laser_pose[5]])
@@ -387,9 +395,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         cv2.fillPoly(occupancy_grid, scan_local_in_map, True, 1)
         cv2.circle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2),
                    int(self.robot_footprint_radius_in_map), 1, -1)
+
         # cv2.imwrite('occupancy_grid.png', occupancy_grid)
-        # embed()
-        # assert False
         return occupancy_grid
 
     def get_additional_states(self):
@@ -435,7 +442,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         waypoints_local_xy = np.array([self.global_to_local(waypoint, cur_pos, cur_rot)[:2]
                                        for waypoint in shortest_path]).flatten()
 
-        # # convert Cartesian space to radian space START
+        # # convert Cartesian space to radian space
         # for i in range(waypoints_local_xy.shape[0] // 2):
         #     vec = waypoints_local_xy[(i * 2):(i * 2 + 2)]
         #     norm = np.linalg.norm(vec)
@@ -450,16 +457,18 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         #     dir = np.arctan2(target_pos_local[1], target_pos_local[0])
         #     target_pos_local[0] = dir
         #     target_pos_local[1] = norm
-        # # convert Cartesian space to radian space END
 
         additional_states = np.concatenate((waypoints_local_xy,
                                             target_pos_local[:2]))
         # linear_velocity_local,
         # angular_velocity_local))
+
         # cache results for reward calculation
         self.new_potential = geodesic_dist
+
         assert len(additional_states) == self.additional_states_dim, \
             'additional states dimension mismatch, {}, {}'.format(len(additional_states), self.additional_states_dim)
+
         return additional_states
 
     def get_state(self, collision_links=[]):
@@ -492,6 +501,310 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.shortest_path = shortest_path
         self.new_potential = geodesic_dist
 
+    def get_base_subgoal(self, action):
+        """
+        Convert action to base_subgoal
+        :param action: policy output
+        :return: base_subgoal_pos [x, y] in the world frame
+        :return: base_subgoal_orn yaw in the world frame
+        """
+        # print('base')
+        yaw = self.robots[0].get_rpy()[2]
+        robot_pos = self.robots[0].get_position()
+        base_subgoal_theta = (action[1] * 110.0) / 180.0 * np.pi  # [-110.0, 110.0]
+        base_subgoal_theta += yaw
+        base_subgoal_dist = (action[2] + 1)  # [0.0, 2.0]
+        base_subgoal_pos = np.array([np.cos(base_subgoal_theta), np.sin(base_subgoal_theta)])
+        base_subgoal_pos *= base_subgoal_dist
+        base_subgoal_pos = np.append(base_subgoal_pos, 0.0)
+        base_subgoal_pos += robot_pos
+        base_subgoal_orn = action[3] * np.pi
+        base_subgoal_orn += yaw
+
+        # print('base_subgoal_pos', base_subgoal_pos)
+        self.base_marker.set_position(base_subgoal_pos)
+
+        return base_subgoal_pos, base_subgoal_orn
+
+    def reach_base_subgoal(self, base_subgoal_pos, base_subgoal_orn):
+        """
+        Attempt to reach base_subgoal and return success / failure
+        If failed, reset the base to its original pose
+        :param base_subgoal_pos: [x, y] in the world frame
+        :param base_subgoal_orn: yaw in the world frame
+        :return: whether base_subgoal is achieved
+        """
+        original_pos = get_base_values(self.robot_id)
+
+        path = self.plan_base_motion_2d(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
+        if path is not None:
+            # print('base mp success')
+            if self.eval:
+                for way_point in path:
+                    set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
+                    time.sleep(0.02)
+            else:
+                set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
+
+            return True
+        else:
+            # print('base mp failure')
+            set_base_values(self.robot_id, original_pos)
+            return False
+
+    def move_base(self, action):
+        """
+        Execute action for base_subgoal
+        :param action: policy output
+        :return: whether base_subgoal is achieved
+        """
+        # print('base')
+        # start = time.time()
+        base_subgoal_pos, base_subgoal_orn = self.get_base_subgoal(action)
+        # print('get_base_subgoal', time.time() - start)
+
+        # start = time.time()
+        subgoal_success = self.reach_base_subgoal(base_subgoal_pos, base_subgoal_orn)
+        # print('reach_base_subgoal', time.time() - start)
+
+        return subgoal_success
+
+    def get_arm_subgoal(self, action):
+        """
+        Convert action to arm_subgoal
+        :param action: policy output
+        :return: arm_subgoal [x, y, z] in the world frame
+        """
+        state = self.get_state()
+        points = state['pc']
+        height, width = points.shape[0:2]
+
+        arm_img_u = np.clip(int((action[4] + 1) / 2.0 * height), 0, height - 1)
+        arm_img_v = np.clip(int((action[5] + 1) / 2.0 * width), 0, width - 1)
+
+        point = points[arm_img_u, arm_img_v]
+        camera_pose = (self.robots[0].parts['eyes'].get_pose())
+        transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
+                                        quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
+        arm_subgoal = transform_mat.dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
+        self.arm_marker.set_position(arm_subgoal)
+        return arm_subgoal
+
+    def is_collision_free(self, body_a, link_a_list, body_b=None, link_b_list=None):
+        """
+        :param body_a: body id of body A
+        :param link_a_list: link ids of body A that that of interest
+        :param body_b: body id of body B (optional)
+        :param link_b_list: link ids of body B that are of interest (optional)
+        :return: whether the bodies and links of interest are collision-free
+        """
+        if body_b is None:
+            for link_a in link_a_list:
+                contact_pts = p.getContactPoints(bodyA=body_a, linkIndexA=link_a)
+                if len(contact_pts) > 0:
+                    return False
+        elif link_b_list is None:
+            for link_a in link_a_list:
+                contact_pts = p.getContactPoints(bodyA=body_a, bodyB=body_b, linkIndexA=link_a)
+                if len(contact_pts) > 0:
+                    return False
+        else:
+            for link_a in link_a_list:
+                for link_b in link_b_list:
+                    contact_pts = p.getContactPoints(bodyA=body_a, bodyB=body_b, linkIndexA=link_a, linkIndexB=link_b)
+                    if len(contact_pts) > 0:
+                        return False
+
+        return True
+
+    def get_arm_joint_positions(self, arm_subgoal):
+        """
+        Attempt to find arm_joint_positions that satisfies arm_subgoal
+        If failed, return None
+        :param arm_subgoal: [x, y, z] in the world frame
+        :return: arm joint positions
+        """
+        max_limits = [0., 0.] + get_max_limits(self.robot_id, self.arm_joint_ids)
+        min_limits = [0., 0.] + get_min_limits(self.robot_id, self.arm_joint_ids)
+        rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, self.arm_joint_ids))
+        joint_range = list(np.array(max_limits) - np.array(min_limits))
+        joint_range = [item + 1 for item in joint_range]
+        joint_damping = [0.1 for _ in joint_range]
+
+        n_attempt = 0
+        max_attempt = 50
+        sample_fn = get_sample_fn(self.robot_id, self.arm_joint_ids)
+        base_pose = get_base_values(self.robot_id)
+
+        # find collision-free IK solution for arm_subgoal
+        while n_attempt < max_attempt:
+            set_joint_positions(self.robot_id, self.arm_joint_ids, sample_fn())
+            arm_joint_positions = p.calculateInverseKinematics(self.robot_id,
+                                                               self.robots[0].parts['gripper_link'].body_part_index,
+                                                               arm_subgoal,
+                                                               lowerLimits=min_limits,
+                                                               upperLimits=max_limits,
+                                                               jointRanges=joint_range,
+                                                               restPoses=rest_position,
+                                                               jointDamping=joint_damping,
+                                                               solver=p.IK_DLS,
+                                                               maxNumIterations=100)[2:10]
+            set_joint_positions(self.robot_id, self.arm_joint_ids, arm_joint_positions)
+
+            dist = l2_distance(self.robots[0].get_end_effector_position(), arm_subgoal)
+            if dist > self.arm_subgoal_threshold:
+                n_attempt += 1
+                continue
+
+            self.simulator_step()
+            set_base_values(self.robot_id, base_pose)
+
+            # arm should not have any collision
+            collision_free = self.is_collision_free(body_a=self.robot_id,
+                                                    link_a_list=self.arm_joint_ids)
+            if not collision_free:
+                n_attempt += 1
+                continue
+
+            # gripper should not have any self-collision
+            collision_free = self.is_collision_free(body_a=self.robot_id,
+                                                    link_a_list=[self.robots[0].parts['gripper_link'].body_part_index],
+                                                    body_b=self.robot_id)
+            if not collision_free:
+                n_attempt += 1
+                continue
+
+            return arm_joint_positions
+
+        return
+
+    def reach_arm_subgoal(self, arm_joint_positions):
+        """
+        Attempt to reach arm arm_joint_positions and return success / failure
+        If failed, reset the arm to its original pose
+        :param arm_joint_positions
+        :return: whether arm_joint_positions is achieved
+        """
+        set_joint_positions(self.robot_id, self.arm_joint_ids, self.arm_default_joint_positions)
+
+        if arm_joint_positions is None:
+            return False
+
+        arm_path = plan_joint_motion(self.robot_id,
+                                     self.arm_joint_ids,
+                                     arm_joint_positions,
+                                     disabled_collisions=set(),
+                                     self_collisions=False)
+        if arm_path is not None:
+            if self.eval:
+                for joint_way_point in arm_path:
+                    set_joint_positions(self.robot_id, self.arm_joint_ids, joint_way_point)
+                    time.sleep(0.02)  # animation
+            else:
+                set_joint_positions(self.robot_id, self.arm_joint_ids, arm_joint_positions)
+            return True
+        else:
+            set_joint_positions(self.robot_id, self.arm_joint_ids, self.arm_default_joint_positions)
+            return False
+
+    def reset_object_velocities(self):
+        """
+        Remove any accumulated velocities or forces of objects resulting from arm motion planner
+        """
+        if self.arena == 'push_door':
+            door_pos = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
+            p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=door_pos, targetVelocity=0.0)
+            for wall in self.walls:
+                p.resetBaseVelocity(wall.body_id, (0.0, 0.0, 0.0), (0.0, 0.0, 0.0))
+
+    def interact(self, action, arm_subgoal):
+        """
+        Move the arm according to push_vector and physically simulate the interaction
+        :param action: policy output
+        :param arm_subgoal: starting location of the interaction
+        :return: None
+        """
+        push_vector_local = np.array([action[6], action[7]])  # [-1.0, 1.0]
+        push_vector = rotate_vector_2d(push_vector_local, -self.robots[0].get_rpy()[2])
+        push_vector = np.append(push_vector, 0.0)
+
+        # push_vector = np.array([-0.5, 0.0, 0.0])
+
+        max_limits = [0., 0.] + get_max_limits(self.robot_id, self.arm_joint_ids)
+        min_limits = [0., 0.] + get_min_limits(self.robot_id, self.arm_joint_ids)
+        rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, self.arm_joint_ids))
+        joint_range = list(np.array(max_limits) - np.array(min_limits))
+        joint_range = [item + 1 for item in joint_range]
+        joint_damping = [0.1 for _ in joint_range]
+
+        base_pose = get_base_values(self.robot_id)
+
+        self.simulator.set_timestep(0.002)
+        for i in range(100):
+            push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
+
+            joint_positions = p.calculateInverseKinematics(self.robot_id,
+                                                           self.robots[0].parts['gripper_link'].body_part_index,
+                                                           push_goal,
+                                                           lowerLimits=min_limits,
+                                                           upperLimits=max_limits,
+                                                           jointRanges=joint_range,
+                                                           restPoses=rest_position,
+                                                           jointDamping=joint_damping,
+                                                           solver=p.IK_DLS,
+                                                           maxNumIterations=100)[2:10]
+
+            # set_joint_positions(self.robot_id, self.arm_joint_ids, joint_positions)
+            control_joints(self.robot_id, self.arm_joint_ids, joint_positions)
+            self.simulator_step()
+            set_base_values(self.robot_id, base_pose)
+
+            if self.eval:
+                time.sleep(0.02)  # for visualization
+
+        self.simulator.set_timestep(1e-8)
+
+    def move_arm(self, action):
+        """
+        Execute action for arm_subgoal and push_vector
+        :param action: policy output
+        :return: whether arm_subgoal is achieved
+        """
+        # print('arm')
+        # start = time.time()
+        arm_subgoal = self.get_arm_subgoal(action)
+        # print('get_arm_subgoal', time.time() - start)
+
+        # start = time.time()
+        # print(p.getNumBodies())
+        # state_id = p.saveState()
+        # print('saveState', time.time() - start)
+
+        # start = time.time()
+        arm_joint_positions = self.get_arm_joint_positions(arm_subgoal)
+        # print('get_arm_joint_positions', time.time() - start)
+
+        # start = time.time()
+        subgoal_success = self.reach_arm_subgoal(arm_joint_positions)
+        # print('reach_arm_subgoal', time.time() - start)
+
+        # start = time.time()
+        # p.restoreState(stateId=state_id)
+        # print('restoreState', time.time() - start)
+
+        # start = time.time()
+        self.reset_object_velocities()
+        # print('reset_object_velocities', time.time() - start)
+
+        if subgoal_success:
+            # set_joint_positions(self.robot_id, self.arm_joint_ids, arm_joint_positions)
+
+            # start = time.time()
+            self.interact(action, arm_subgoal)
+            # print('interact', time.time() - start)
+
+        return subgoal_success
+
     def step(self, action):
         # print('-' * 30)
         # action[0] = base_or_arm
@@ -500,243 +813,23 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # action[3] = base_orn
         # action[4] = arm_img_u
         # action[5] = arm_img_v
+        # action[6] = arm_push_vector_x
+        # action[7] = arm_push_vector_y
 
         self.current_step += 1
         use_base = action[0] > 0.0
         if use_base:
-            # print('base')
-            # use base
-            yaw = self.robots[0].get_rpy()[2]
-            robot_pos = self.robots[0].get_position()
-            base_subgoal_theta = (action[1] * 110.0) / 180.0 * np.pi  # [-110.0, 110.0]
-            base_subgoal_theta += yaw
-            base_subgoal_dist = (action[2] + 1)  # [0.0, 2.0]
-            base_subgoal_pos = np.array([np.cos(base_subgoal_theta), np.sin(base_subgoal_theta)])
-            base_subgoal_pos *= base_subgoal_dist
-            base_subgoal_pos = np.append(base_subgoal_pos, 0.0)
-            base_subgoal_pos += robot_pos
-            # print('base_subgoal_pos', base_subgoal_pos)
-
-            self.base_marker.set_position(base_subgoal_pos)
-
-            base_subgoal_orn = action[3] * np.pi
-            base_subgoal_orn += yaw
-
-            original_pos = get_base_values(self.robot_id)
-
-            path = self.plan_base_motion_2d(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
-            subgoal_success = path is not None
-            if subgoal_success:
-                # print('base mp success')
-                if self.eval:
-                    for way_point in path:
-                        set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
-                        time.sleep(0.02)
-                else:
-                    set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
-            else:
-                # print('base mp failure')
-                set_base_values(self.robot_id, original_pos)
-
-            # is_base_subgoal_valid = self.scene.has_node(self.floor_num, base_subgoal_pos[:2])
-            # if self.arena == 'button':
-            #     x_min = -6.0 if self.door_open else -3.0
-            #     x_max = 2.0
-            #     y_min, y_max = -2.0, 2.0
-            #     is_valid = x_min <= base_subgoal_pos[0] <= x_max and y_min <= base_subgoal_pos[1] <= y_max
-            #     is_base_subgoal_valid = is_base_subgoal_valid and is_valid
-            # if is_base_subgoal_valid:
-            #     if self.eval:
-            #         path = self.plan_base_motion_2d(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
-            #         if path is not None:
-            #             print('base mp success')
-            #             for way_point in path:
-            #                 set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
-            #                 time.sleep(0.05)
-            #         else:
-            #             print('base mp failure')
-            #     else:
-            #         set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
-            #     # set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
-            #     # print('subgoal succeed')
-            # else:
-            #     # print('subgoal fail')
-            #     subgoal_success = False
+            subgoal_success = self.move_base(action)
         else:
-            # print('arm')
-            # use arm
-            state = self.get_state()
-            points = state['pc']
-            height, width = points.shape[0:2]
+            subgoal_success = self.move_arm(action)
 
-            arm_img_u = np.clip(int((action[4] + 1) / 2.0 * height), 0, height - 1)
-            arm_img_v = np.clip(int((action[5] + 1) / 2.0 * width), 0, width - 1)
+        # print('subgoal success', subgoal_success)
 
-            point = points[arm_img_u, arm_img_v]
-            camera_pose = (self.robots[0].parts['eyes'].get_pose())
-            transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
-                                            quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
-            arm_subgoal = transform_mat.dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
-            #arm_subgoal[0] += 0.15
-            self.arm_marker.set_position(arm_subgoal)
+        return self.compute_next_step(use_base, subgoal_success)
 
-            arm_joints = joints_from_names(self.robot_id,
-                                           ['torso_lift_joint',
-                                            'shoulder_pan_joint',
-                                            'shoulder_lift_joint',
-                                            'upperarm_roll_joint',
-                                            'elbow_flex_joint',
-                                            'forearm_roll_joint',
-                                            'wrist_flex_joint',
-                                            'wrist_roll_joint'])
-            max_limits = [0., 0.] + get_max_limits(self.robot_id, arm_joints)
-            min_limits = [0., 0.] + get_min_limits(self.robot_id, arm_joints)
-            rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, arm_joints))
-            joint_range = list(np.array(max_limits) - np.array(min_limits))
-            joint_range = [item + 1 for item in joint_range]
-            joint_damping = [0.1 for _ in joint_range]
+    def compute_next_step(self, use_base, subgoal_success):
+        self.simulator.sync()
 
-            n_attempt = 0
-            max_attempt = 50
-            sample_fn = get_sample_fn(self.robot_id, arm_joints)
-            base_pose = get_base_values(self.robot_id)
-            while n_attempt < max_attempt:  # find self-collision-free ik solution
-                set_joint_positions(self.robot_id, arm_joints, sample_fn())
-                subgoal_joint_positions = p.calculateInverseKinematics(self.robot_id,
-                                                                       self.robots[0].parts[
-                                                                           'gripper_link'].body_part_index,
-                                                                       arm_subgoal,
-                                                                       lowerLimits=min_limits,
-                                                                       upperLimits=max_limits,
-                                                                       jointRanges=joint_range,
-                                                                       restPoses=rest_position,
-                                                                       jointDamping=joint_damping,
-                                                                       solver=p.IK_DLS,
-                                                                       maxNumIterations=100)[2:10]
-                set_joint_positions(self.robot_id, arm_joints, subgoal_joint_positions)
-                self.simulator_step()
-                set_base_values(self.robot_id, base_pose)
-
-                dist = l2_distance(self.robots[0].get_end_effector_position(), arm_subgoal)
-                if dist < self.arm_subgoal_threshold:
-                    # print('arm_subgoal_dist', dist)
-                    collision_free = True
-                    num_joints = p.getNumJoints(self.robot_id)
-                    # self collision
-                    for arm_link in arm_joints:
-                        for other_link in range(num_joints):
-                            contact_pts = p.getContactPoints(self.robot_id, self.robot_id, arm_link, other_link)
-                            # print(contact_pts)
-                            if len(contact_pts) > 0:
-                                collision_free = False
-                                break
-                        if not collision_free:
-                            break
-
-                    # arm collision with door
-                    if self.arena == 'push_door':
-                        for arm_link in arm_joints:
-                            for door_link in range(p.getNumJoints(self.door.body_id)):
-                                if arm_link != self.robots[0].parts['gripper_link'].body_part_index:
-                                    contact_pts = p.getContactPoints(self.robot_id, self.door.body_id, arm_link,
-                                                                     door_link)
-                                    if len(contact_pts) > 0:
-                                        print(arm_link, 'in collision with door')
-                                        collision_free = False
-                                        break
-                            if not collision_free:
-                                break
-
-                    if collision_free:
-                        break
-                n_attempt += 1
-
-            ik_success = n_attempt != max_attempt
-
-            if ik_success:
-                set_joint_positions(self.robot_id, arm_joints, self.arm_default_joint_positions)
-                arm_path = plan_joint_motion(self.robot_id, arm_joints, subgoal_joint_positions,
-                                             disabled_collisions=set(),
-                                             self_collisions=False)
-                subgoal_success = arm_path is not None
-            else:
-                subgoal_success = False
-
-            set_joint_positions(self.robot_id, arm_joints, self.arm_default_joint_positions)
-
-            # print('ik_success', ik_success)
-            # print('arm_mp_success', subgoal_success)
-
-            if subgoal_success:
-                if self.eval:
-                    for joint_way_point in arm_path:
-                        set_joint_positions(self.robot_id, arm_joints, joint_way_point)
-                        time.sleep(0.02)  # animation
-                else:
-                    set_joint_positions(self.robot_id, arm_joints, subgoal_joint_positions)
-
-                ## push
-                # TODO: figure out push dist threshold (i.e. can push object x centimeters away from the gripper)
-                # TODO: whitelist object ids that can be pushed
-
-                # find the closest object
-                focus = None
-                points = []
-                pushable_obj_ids = [self.door.body_id]
-                for i in pushable_obj_ids:
-                    points.extend(
-                        p.getClosestPoints(self.robot_id, i, distance=self.push_dist_threshold,
-                                           linkIndexA=self.robots[0].parts['gripper_link'].body_part_index))
-                dist = 1e4
-                for point in points:
-                    if point[8] < dist:
-                        dist = point[8]
-                        # if not focus is None and not (focus[2] == point[2] and focus[4] == point[4]):
-                        # p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 1, 1, 1])
-                        focus = point
-                        # p.changeVisualShape(objectUniqueId=focus[2], linkIndex=focus[4], rgbaColor=[1, 0, 0, 1])
-
-                # print(focus)
-
-                # if focus is not None:
-                #     c = add_p2p_constraint(focus[2],
-                #                            focus[4],
-                #                            self.robot_id,
-                #                            self.robots[0].parts['gripper_link'].body_part_index,
-                #                            max_force=500)
-
-                push_vector = np.array([-0.5, 0.0, 0])
-                door_pos = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
-                p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=door_pos, targetVelocity=0.0)
-
-                for i in range(100):
-                    push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
-
-                    joint_positions = p.calculateInverseKinematics(self.robot_id,
-                                                                   self.robots[0].parts['gripper_link'].body_part_index,
-                                                                   push_goal,
-                                                                   lowerLimits=min_limits,
-                                                                   upperLimits=max_limits,
-                                                                   jointRanges=joint_range,
-                                                                   restPoses=rest_position,
-                                                                   jointDamping=joint_damping,
-                                                                   solver=p.IK_DLS,
-                                                                   maxNumIterations=100)[2:10]
-                    set_base_values(self.robot_id, base_pose)
-
-                    #set_joint_positions(self.robot_id, arm_joints, joint_positions)
-                    control_joints(self.robot_id, arm_joints, joint_positions)
-                    self.simulator.set_timestep(0.002)
-                    self.simulator_step()
-                    if self.eval:
-                        time.sleep(0.02)  # for visualization
-                # if focus is not None:
-                #    remove_constraint(c)
-
-            self.simulator.set_timestep(1e-8)
-            set_base_values(self.robot_id, base_pose)
-
-        ###### reward computation ######
         if use_base:
             # trigger re-computation of geodesic distance for get_reward
             state = self.get_state()
@@ -751,69 +844,61 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         if self.arena == 'button':
             dist = l2_distance(self.robots[0].get_end_effector_position(), self.button_marker_pos)
-            # print('button_marker_dist', dist)
             if not self.door_open and dist < self.button_threshold:
-                # touch the button -> remove the button and the door
                 print("OPEN DOOR")
                 self.door_open = True
                 self.button_marker.set_position([100.0, 100.0, 0.0])
                 self.door.set_position([100.0, 100.0, 0.0])
-
                 reward += self.button_reward
+        elif self.arena == 'push_door':
+            door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
+            door_angle_diff = door_angle - self.door_angle
+            reward += door_angle_diff
+            self.door_angle = door_angle
 
         if not use_base:
-            # arm reset
-            set_joint_positions(self.robot_id, arm_joints, self.arm_default_joint_positions)
-            # need to call get_state again after arm reset (camera height could be different if torso moves)
-            # TODO: should only call get_state once or twice (e.g. disable torso movement, or cache get_state result)
+            set_joint_positions(self.robot_id, self.arm_joint_ids, self.arm_default_joint_positions)
             state = self.get_state()
 
         if done and self.automatic_reset:
             state = self.reset()
+
         del state['pc']
 
-        if use_base:
-            info['start_conf'] = original_pos
-            info['path'] = path
         # print('reward', reward)
         # time.sleep(3)
 
-        self.simulator.sync()
-
         return state, reward, done, info
 
     def reset_initial_and_target_pos(self):
         if self.arena in ['button', 'push_door']:
             floor_height = self.scene.get_floor_height(self.floor_num)
-            self.initial_pos = np.array([-3.0, 0.0, floor_height])
+            self.initial_pos = np.array([1.2, 0.0, floor_height])
             self.target_pos = np.array([-5.0, 0.0, floor_height])
             self.robots[0].set_position(pos=[self.initial_pos[0],
                                              self.initial_pos[1],
                                              self.initial_pos[2] + self.random_init_z_offset])
             self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.pi), 'wxyz'))
-
-            if self.arena == 'button':
-                self.button_marker_pos = [
-                    np.random.uniform(-3.0, -0.5),
-                    np.random.uniform(-1.25, 1.25),
-                    1.5
-                ]
-                self.button_marker.set_position(self.button_marker_pos)
-                self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
-            elif self.arena == 'push_door':
-                p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=0.0, targetVelocity=0.0)
-
         else:
             super(MotionPlanningBaseArmEnv, self).reset_initial_and_target_pos()
 
+    def before_reset_agent(self):
+        if self.arena == 'button':
+            self.button_marker_pos = [
+                np.random.uniform(-3.0, -0.5),
+                np.random.uniform(-1.25, 1.25),
+                1.5
+            ]
+            self.button_marker.set_position(self.button_marker_pos)
+            self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
+            self.door_open = False
+        elif self.arena == 'push_door':
+            p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=0.0, targetVelocity=0.0)
+            self.door_angle = 0.0
+
     def reset(self):
         state = super(MotionPlanningBaseArmEnv, self).reset()
         del state['pc']
-        # embed()
-        if self.arena == 'button':
-            self.door_open = False
-
-        self.simulator.sync()
         return state
 
 
@@ -833,8 +918,8 @@ if __name__ == '__main__':
 
     nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
                                        mode=args.mode,
-                                       action_timestep=1.0 / 1000000.0,
-                                       physics_timestep=1.0 / 1000000.0,
+                                       action_timestep=1e-8,
+                                       physics_timestep=1e-8,
                                        eval=args.mode == 'gui',
                                        arena='push_door',
                                        )
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index b8cb21cf9..6db8735f4 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -3038,7 +3038,7 @@ def control_joints(body, joints, positions):
     # TODO: the whole PR2 seems to jitter
     #kp = 1.0
     #kv = 0.3
-    forces = [get_max_force(body, joint)*100 for joint in joints]
+    forces = [get_max_force(body, joint) * 100 for joint in joints]
     #forces = [5000]*len(joints)
     #forces = [20000]*len(joints)
     return p.setJointMotorControlArray(body, joints, p.POSITION_CONTROL,

From 264c606d692b165500aaf626cf1a241edfc2bbf1 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Mon, 20 Jan 2020 16:16:35 -0800
Subject: [PATCH 29/58] fix minor issue

---
 gibson2/envs/motion_planner_env.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index f505b3642..5e4bfb3da 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -825,9 +825,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         # print('subgoal success', subgoal_success)
 
-        return self.compute_next_step(use_base, subgoal_success)
+        return self.compute_next_step(action, use_base, subgoal_success)
 
-    def compute_next_step(self, use_base, subgoal_success):
+    def compute_next_step(self, action, use_base, subgoal_success):
         self.simulator.sync()
 
         if use_base:

From fae30bee98d53bfc9a662bb76c029d9c71753b73 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Mon, 20 Jan 2020 21:18:28 -0800
Subject: [PATCH 30/58] same physics step through experiment

---
 gibson2/envs/motion_planner_env.py       | 33 ++++++++++++++----------
 gibson2/external/pybullet_tools/utils.py |  5 ++++
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index f505b3642..27d36d4d5 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -25,7 +25,7 @@ from gibson2.external.pybullet_tools.utils import set_base_values, joint_from_na
     joint_controller, dump_body, load_model, joints_from_names, user_input, disconnect, get_joint_positions, \
     get_link_pose, link_from_name, HideOutput, get_pose, wait_for_user, dump_world, plan_nonholonomic_motion, \
     set_point, create_box, stable_z, control_joints, get_max_limits, get_min_limits, get_base_values, \
-    plan_base_motion_2d, get_sample_fn, add_p2p_constraint, remove_constraint
+    plan_base_motion_2d, get_sample_fn, add_p2p_constraint, remove_constraint, set_base_values_with_z
 
 
 class MotionPlanningEnv(NavigateRandomEnv):
@@ -541,15 +541,17 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             # print('base mp success')
             if self.eval:
                 for way_point in path:
-                    set_base_values(self.robot_id, [way_point[0], way_point[1], way_point[2]])
+                    set_base_values_with_z(self.robot_id, [way_point[0], way_point[1], way_point[2]],
+                                           z=self.initial_pos[2] + self.random_init_z_offset)
                     time.sleep(0.02)
             else:
-                set_base_values(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn])
+                set_base_values_with_z(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn],
+                                       z=self.initial_pos[2] + self.random_init_z_offset)
 
             return True
         else:
             # print('base mp failure')
-            set_base_values(self.robot_id, original_pos)
+            set_base_values_with_z(self.robot_id, original_pos, z=self.initial_pos[2] + self.random_init_z_offset)
             return False
 
     def move_base(self, action):
@@ -657,7 +659,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 continue
 
             self.simulator_step()
-            set_base_values(self.robot_id, base_pose)
+            set_base_values_with_z(self.robot_id, base_pose, z=self.initial_pos[2] + self.random_init_z_offset)
+            self.reset_object_velocities()
 
             # arm should not have any collision
             collision_free = self.is_collision_free(body_a=self.robot_id,
@@ -707,13 +710,16 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             set_joint_positions(self.robot_id, self.arm_joint_ids, self.arm_default_joint_positions)
             return False
 
+    def stash_object_position(self):
+        if self.arena == 'push_door':
+            self.door_pos = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
+
     def reset_object_velocities(self):
         """
         Remove any accumulated velocities or forces of objects resulting from arm motion planner
         """
         if self.arena == 'push_door':
-            door_pos = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
-            p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=door_pos, targetVelocity=0.0)
+            p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=self.door_pos, targetVelocity=0.0)
             for wall in self.walls:
                 p.resetBaseVelocity(wall.body_id, (0.0, 0.0, 0.0), (0.0, 0.0, 0.0))
 
@@ -757,13 +763,12 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             # set_joint_positions(self.robot_id, self.arm_joint_ids, joint_positions)
             control_joints(self.robot_id, self.arm_joint_ids, joint_positions)
             self.simulator_step()
-            set_base_values(self.robot_id, base_pose)
+            set_base_values_with_z(self.robot_id, base_pose,
+                                   z=self.initial_pos[2] + self.random_init_z_offset)
 
             if self.eval:
                 time.sleep(0.02)  # for visualization
 
-        self.simulator.set_timestep(1e-8)
-
     def move_arm(self, action):
         """
         Execute action for arm_subgoal and push_vector
@@ -780,6 +785,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # state_id = p.saveState()
         # print('saveState', time.time() - start)
 
+        self.stash_object_position()
+
         # start = time.time()
         arm_joint_positions = self.get_arm_joint_positions(arm_subgoal)
         # print('get_arm_joint_positions', time.time() - start)
@@ -873,7 +880,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
     def reset_initial_and_target_pos(self):
         if self.arena in ['button', 'push_door']:
             floor_height = self.scene.get_floor_height(self.floor_num)
-            self.initial_pos = np.array([1.2, 0.0, floor_height])
+            self.initial_pos = np.array([-3, 0.0, floor_height])
             self.target_pos = np.array([-5.0, 0.0, floor_height])
             self.robots[0].set_position(pos=[self.initial_pos[0],
                                              self.initial_pos[1],
@@ -918,8 +925,8 @@ if __name__ == '__main__':
 
     nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
                                        mode=args.mode,
-                                       action_timestep=1e-8,
-                                       physics_timestep=1e-8,
+                                       action_timestep=1/500.0,
+                                       physics_timestep=1/500.0,
                                        eval=args.mode == 'gui',
                                        arena='push_door',
                                        )
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index 6db8735f4..ddfee0c51 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -1083,6 +1083,11 @@ def set_base_values(body, values):
     set_point(body, (x, y, z))
     set_quat(body, z_rotation(theta))
 
+def set_base_values_with_z(body, values, z):
+    x, y, theta = values
+    set_point(body, (x, y, z))
+    set_quat(body, z_rotation(theta))
+
 def get_velocity(body):
     linear, angular = p.getBaseVelocity(body, physicsClientId=CLIENT)
     return linear, angular # [x,y,z], [wx,wy,wz]

From 7bd7718b791a010f5e55888b1338974485a65477 Mon Sep 17 00:00:00 2001
From: Roberto Martin-Martin <rmartinmar@gmail.com>
Date: Wed, 22 Jan 2020 10:41:07 -0800
Subject: [PATCH 31/58] Changes for button_door environment

---
 gibson2/envs/motion_planner_env.py | 67 ++++++++++++++++++------------
 1 file changed, 40 insertions(+), 27 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 71bde3df9..f9f069c72 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -262,27 +262,34 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.arm_subgoal_threshold = 0.05
         self.failed_subgoal_penalty = -0.0
 
-        if self.arena == 'button':
-            self.button_threshold = 0.5
+        if self.arena == 'button_door':
+            self.button_threshold = -0.05
             self.button_reward = 5.0
 
-            self.button_marker = VisualMarker(visual_shape=p.GEOM_SPHERE,
-                                              rgba_color=[0, 1, 0, 1],
-                                              radius=0.3)
-            self.simulator.import_object(self.button_marker, class_id=255)
+            # self.button_marker = VisualMarker(visual_shape=p.GEOM_SPHERE,
+            #                                   rgba_color=[0, 1, 0, 1],
+            #                                   radius=0.3)
+            self.button = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'eswitch', 'eswitch.urdf'),
+                                      scale=1)
+            self.simulator.import_interactive_object(self.button, class_id=255)
+            self.button_axis_link_id = 1
 
             self.door = InteractiveObj(
-                os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
-                scale=4.0)
+                os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor_closed.urdf'),
+                scale=3.0)
             self.simulator.import_interactive_object(self.door, class_id=2)
+            self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
+            self.door_axis_link_id = 1
 
             self.wall_poses = [
-                [[0, -2.0, 1], [0, 0, 0, 1]],
+                [[-3.5, 0.45, 0.45], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')],
+                [[-3.5, -0.4, 0.45], quatToXYZW(euler2quat(0, 0, -np.pi / 2.0), 'wxyz')],
             ]
             self.walls = []
             for wall_pose in self.wall_poses:
-                wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'),
-                                      scale=1)
+                wall = InteractiveObj(
+                    os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls_quarter.urdf'),
+                    scale=0.3)
                 self.simulator.import_interactive_object(wall, class_id=3)
                 wall.set_position_rotation(wall_pose[0], wall_pose[1])
                 self.walls += [wall]
@@ -711,14 +718,20 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             return False
 
     def stash_object_position(self):
-        if self.arena == 'push_door':
+        if self.arena == 'button_door':
+            self.button_pos = p.getJointState(self.button.body_id, self.button_axis_link_id)[0]
+        elif self.arena == 'push_door':
             self.door_pos = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
 
     def reset_object_velocities(self):
         """
         Remove any accumulated velocities or forces of objects resulting from arm motion planner
         """
-        if self.arena == 'push_door':
+        if self.arena == 'button_door':
+            p.resetJointState(self.button.body_id, self.button_axis_link_id, targetValue=self.button_pos, targetVelocity=0.0)
+            for wall in self.walls:
+                p.resetBaseVelocity(wall.body_id, (0.0, 0.0, 0.0), (0.0, 0.0, 0.0))
+        elif self.arena == 'push_door':
             p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=self.door_pos, targetVelocity=0.0)
             for wall in self.walls:
                 p.resetBaseVelocity(wall.body_id, (0.0, 0.0, 0.0), (0.0, 0.0, 0.0))
@@ -849,12 +862,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             reward = self.failed_subgoal_penalty
         done, info = self.get_termination([], info)
 
-        if self.arena == 'button':
-            dist = l2_distance(self.robots[0].get_end_effector_position(), self.button_marker_pos)
-            if not self.door_open and dist < self.button_threshold:
+        if self.arena == 'button_door':
+            button_pos = p.getJointState(self.button.body_id, self.button_axis_link_id)[0]
+            if not self.button_pressed and button_pos < self.button_threshold:
                 print("OPEN DOOR")
-                self.door_open = True
-                self.button_marker.set_position([100.0, 100.0, 0.0])
+                self.button_pressed = True
                 self.door.set_position([100.0, 100.0, 0.0])
                 reward += self.button_reward
         elif self.arena == 'push_door':
@@ -878,9 +890,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return state, reward, done, info
 
     def reset_initial_and_target_pos(self):
-        if self.arena in ['button', 'push_door']:
+        if self.arena in ['button_door', 'push_door']:
             floor_height = self.scene.get_floor_height(self.floor_num)
-            self.initial_pos = np.array([-3, 0.0, floor_height])
+            self.initial_pos = np.array([0, 0.0, floor_height])
             self.target_pos = np.array([-5.0, 0.0, floor_height])
             self.robots[0].set_position(pos=[self.initial_pos[0],
                                              self.initial_pos[1],
@@ -890,15 +902,16 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             super(MotionPlanningBaseArmEnv, self).reset_initial_and_target_pos()
 
     def before_reset_agent(self):
-        if self.arena == 'button':
-            self.button_marker_pos = [
-                np.random.uniform(-3.0, -0.5),
-                np.random.uniform(-1.25, 1.25),
+        if self.arena == 'button_door':
+            self.button_obj_pos = [
+                np.random.uniform(-3.0, 0),
+                np.random.uniform(-1.25, -1),
                 1.5
             ]
-            self.button_marker.set_position(self.button_marker_pos)
+            self.button.set_position(self.button_obj_pos)
+            p.resetJointState(self.button.body_id, self.button_axis_link_id, targetValue=0.0, targetVelocity=0.0)
             self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
-            self.door_open = False
+            self.button_pressed = False
         elif self.arena == 'push_door':
             p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=0.0, targetVelocity=0.0)
             self.door_angle = 0.0
@@ -928,7 +941,7 @@ if __name__ == '__main__':
                                        action_timestep=1/500.0,
                                        physics_timestep=1/500.0,
                                        eval=args.mode == 'gui',
-                                       arena='push_door',
+                                       arena='button_door',
                                        )
 
     for episode in range(100):

From f617ae438293a7dd07b110bce1d99be266994754 Mon Sep 17 00:00:00 2001
From: Roberto Martin-Martin <rmartinmar@gmail.com>
Date: Wed, 22 Jan 2020 10:44:37 -0800
Subject: [PATCH 32/58] arg for arena, default push door

---
 gibson2/envs/motion_planner_env.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index f9f069c72..9dd299443 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -276,7 +276,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
             self.door = InteractiveObj(
                 os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor_closed.urdf'),
-                scale=3.0)
+                scale=1.0)
             self.simulator.import_interactive_object(self.door, class_id=2)
             self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
             self.door_axis_link_id = 1
@@ -934,6 +934,12 @@ if __name__ == '__main__':
                         default='headless',
                         help='which mode for simulation (default: headless)')
 
+    parser.add_argument('--arena',
+                        '-a',
+                        choices=['button_door', 'push_door'],
+                        default='push_door',
+                        help='which arena to train or test (default: push_door)')
+
     args = parser.parse_args()
 
     nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
@@ -941,7 +947,7 @@ if __name__ == '__main__':
                                        action_timestep=1/500.0,
                                        physics_timestep=1/500.0,
                                        eval=args.mode == 'gui',
-                                       arena='button_door',
+                                       arena=args.arena,
                                        )
 
     for episode in range(100):

From 5584e2b994f57cd7841843bb07c8d0aa0f069660 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Wed, 22 Jan 2020 14:05:39 -0800
Subject: [PATCH 33/58] PushDoorEnv now have two doors, random sample which
 room to enter

---
 gibson2/envs/motion_planner_env.py | 77 +++++++++++++++++++-----------
 1 file changed, 50 insertions(+), 27 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 71bde3df9..dfc1e093d 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -288,12 +288,24 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 self.walls += [wall]
 
         elif self.arena == 'push_door':
-            self.door = InteractiveObj(
-                os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
-                scale=1.0)
-            self.simulator.import_interactive_object(self.door, class_id=2)
-            self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
+            self.doors = []
+            door_scales = [1.0, 0.9]
+            door_positions = [[-3.5, 0, 0.0], [-1.2, -2.47, 0.0]]
+            door_rotations = [np.pi / 2.0, -np.pi / 2.0]
+
+            for scale, position, rotation in zip(door_scales, door_positions, door_rotations):
+                door = InteractiveObj(
+                    os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
+                    scale=scale)
+                self.simulator.import_interactive_object(door, class_id=2)
+                door.set_position_rotation(position, quatToXYZW(euler2quat(0, 0, rotation), 'wxyz'))
+                self.doors.append(door)
+
             self.door_axis_link_id = 1
+            self.door_target_pos = [
+                [[-5.5, -4.5], [-1.0, 1.0]],
+                [[0.5, 2.0], [-4.5, -3.0]]
+            ]
 
             self.wall_poses = [
                 [[-3.5, 0.45, 0.45], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')],
@@ -542,16 +554,16 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             if self.eval:
                 for way_point in path:
                     set_base_values_with_z(self.robot_id, [way_point[0], way_point[1], way_point[2]],
-                                           z=self.initial_pos[2] + self.random_init_z_offset)
+                                           z=self.initial_height)
                     time.sleep(0.02)
             else:
                 set_base_values_with_z(self.robot_id, [base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn],
-                                       z=self.initial_pos[2] + self.random_init_z_offset)
+                                       z=self.initial_height)
 
             return True
         else:
             # print('base mp failure')
-            set_base_values_with_z(self.robot_id, original_pos, z=self.initial_pos[2] + self.random_init_z_offset)
+            set_base_values_with_z(self.robot_id, original_pos, z=self.initial_height)
             return False
 
     def move_base(self, action):
@@ -659,7 +671,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 continue
 
             self.simulator_step()
-            set_base_values_with_z(self.robot_id, base_pose, z=self.initial_pos[2] + self.random_init_z_offset)
+            set_base_values_with_z(self.robot_id, base_pose, z=self.initial_height)
             self.reset_object_velocities()
 
             # arm should not have any collision
@@ -710,16 +722,18 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             set_joint_positions(self.robot_id, self.arm_joint_ids, self.arm_default_joint_positions)
             return False
 
-    def stash_object_position(self):
+    def stash_object_positions(self):
         if self.arena == 'push_door':
-            self.door_pos = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
+            for i, door in enumerate(self.doors):
+                self.door_angles[i] = p.getJointState(door.body_id, self.door_axis_link_id)[0]
 
     def reset_object_velocities(self):
         """
         Remove any accumulated velocities or forces of objects resulting from arm motion planner
         """
         if self.arena == 'push_door':
-            p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=self.door_pos, targetVelocity=0.0)
+            for door, door_angle in zip(self.doors, self.door_angles):
+                p.resetJointState(door.body_id, self.door_axis_link_id, targetValue=door_angle, targetVelocity=0.0)
             for wall in self.walls:
                 p.resetBaseVelocity(wall.body_id, (0.0, 0.0, 0.0), (0.0, 0.0, 0.0))
 
@@ -745,7 +759,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         base_pose = get_base_values(self.robot_id)
 
-        self.simulator.set_timestep(0.002)
+        #self.simulator.set_timestep(0.002)
         for i in range(100):
             push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
 
@@ -763,8 +777,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             # set_joint_positions(self.robot_id, self.arm_joint_ids, joint_positions)
             control_joints(self.robot_id, self.arm_joint_ids, joint_positions)
             self.simulator_step()
-            set_base_values_with_z(self.robot_id, base_pose,
-                                   z=self.initial_pos[2] + self.random_init_z_offset)
+            set_base_values_with_z(self.robot_id, base_pose, z=self.initial_height)
 
             if self.eval:
                 time.sleep(0.02)  # for visualization
@@ -785,7 +798,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # state_id = p.saveState()
         # print('saveState', time.time() - start)
 
-        self.stash_object_position()
+        self.stash_object_positions()
 
         # start = time.time()
         arm_joint_positions = self.get_arm_joint_positions(arm_subgoal)
@@ -858,11 +871,10 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 self.door.set_position([100.0, 100.0, 0.0])
                 reward += self.button_reward
         elif self.arena == 'push_door':
-            door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
-            door_angle_diff = door_angle - self.door_angle
+            new_door_angle = p.getJointState(self.doors[self.door_idx].body_id, self.door_axis_link_id)[0]
+            door_angle_diff = new_door_angle - self.door_angles[self.door_idx]
             reward += door_angle_diff
-            self.door_angle = door_angle
-
+            self.door_angles[self.door_idx] = new_door_angle
         if not use_base:
             set_joint_positions(self.robot_id, self.arm_joint_ids, self.arm_default_joint_positions)
             state = self.get_state()
@@ -880,28 +892,39 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
     def reset_initial_and_target_pos(self):
         if self.arena in ['button', 'push_door']:
             floor_height = self.scene.get_floor_height(self.floor_num)
-            self.initial_pos = np.array([-3, 0.0, floor_height])
-            self.target_pos = np.array([-5.0, 0.0, floor_height])
+            self.initial_height = floor_height + self.random_init_z_offset
+            self.initial_pos = np.array([1.2, 0.0, floor_height])
             self.robots[0].set_position(pos=[self.initial_pos[0],
                                              self.initial_pos[1],
-                                             self.initial_pos[2] + self.random_init_z_offset])
+                                             self.initial_height])
             self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.pi), 'wxyz'))
+            if self.arena == 'button':
+                self.target_pos = np.array([-5.0, 0.0, floor_height])
+            elif self.arena == 'push_door':
+                self.door_idx = np.random.randint(0, len(self.doors))
+                door_target_pos = self.door_target_pos[self.door_idx]
+                self.target_pos = np.array([
+                    np.random.uniform(door_target_pos[0][0], door_target_pos[0][1]),
+                    np.random.uniform(door_target_pos[1][0], door_target_pos[1][1]),
+                    floor_height
+                ])
         else:
             super(MotionPlanningBaseArmEnv, self).reset_initial_and_target_pos()
 
     def before_reset_agent(self):
         if self.arena == 'button':
-            self.button_marker_pos = [
+            self.button_marker_pos = np.array([
                 np.random.uniform(-3.0, -0.5),
                 np.random.uniform(-1.25, 1.25),
                 1.5
-            ]
+            ])
             self.button_marker.set_position(self.button_marker_pos)
             self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
             self.door_open = False
         elif self.arena == 'push_door':
-            p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=0.0, targetVelocity=0.0)
-            self.door_angle = 0.0
+            self.door_angles = np.zeros(len(self.doors))
+            for door, door_angle in zip(self.doors, self.door_angles):
+                p.resetJointState(door.body_id, self.door_axis_link_id, targetValue=door_angle, targetVelocity=0.0)
 
     def reset(self):
         state = super(MotionPlanningBaseArmEnv, self).reset()

From 6d6d181b15182656a2467b496a0a9edccf49c7ce Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Wed, 22 Jan 2020 22:04:22 -0800
Subject: [PATCH 34/58] obstacles_env

---
 gibson2/core/physics/interactive_objects.py | 10 +++---
 gibson2/envs/motion_planner_env.py          | 34 ++++++++++++++++++---
 gibson2/external/pybullet_tools/utils.py    |  4 +--
 3 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/gibson2/core/physics/interactive_objects.py b/gibson2/core/physics/interactive_objects.py
index 69c9cd232..19207db3c 100644
--- a/gibson2/core/physics/interactive_objects.py
+++ b/gibson2/core/physics/interactive_objects.py
@@ -158,23 +158,23 @@ class VisualMarker(object):
 
 
 class BoxShape(object):
-    def __init__(self, pos=[1, 2, 3], dim=[1, 2, 3], visual_only = False):
+    def __init__(self, pos=[1, 2, 3], dim=[1, 2, 3], visual_only=False, mass=1000, color=[1,1,1,1]):
         self.basePos = pos
         self.dimension = dim
         self.body_id = None
         self.visual_only = visual_only
+        self.mass = mass
+        self.color = color
 
     def load(self):
-        mass = 1000
         baseOrientation = [0, 0, 0, 1]
-
         colBoxId = p.createCollisionShape(p.GEOM_BOX, halfExtents=self.dimension)
-        visualShapeId = p.createVisualShape(p.GEOM_BOX, halfExtents=self.dimension)
+        visualShapeId = p.createVisualShape(p.GEOM_BOX, halfExtents=self.dimension, rgbaColor=self.color)
         if self.visual_only:
             self.body_id = p.createMultiBody(baseCollisionShapeIndex=-1,
                                              baseVisualShapeIndex=visualShapeId)
         else:
-            self.body_id = p.createMultiBody(baseMass=mass,
+            self.body_id = p.createMultiBody(baseMass=self.mass,
                                          baseCollisionShapeIndex=colBoxId,
                                          baseVisualShapeIndex=visualShapeId)
 
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 71bde3df9..e45e609f0 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -308,6 +308,17 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 wall.set_position_rotation(wall_pose[0], wall_pose[1])
                 self.walls += [wall]
 
+        elif self.arena == 'obstacles':
+            self.obstacle_poses = [[-3.5, 0.5, 0.6], [-3.5, -0.05, 0.6], [-3.5, -0.6, 0.6], [-3.5, -1.15, 0.6]]
+            self.obstacles = []
+            head_joint = joint_from_name(self.robot_id, 'head_tilt_joint')
+            set_joint_position(self.robot_id, head_joint, 1.2)
+            for obstacle_pose in self.obstacle_poses:
+                obstacle = BoxShape(pos=obstacle_pose, dim=[0.25,0.25,0.5], mass=10, color=[1,0.64,0,1])
+                self.simulator.import_interactive_object(obstacle, class_id=4)
+                p.changeDynamics(obstacle.body_id, -1, lateralFriction=0.5)
+                self.obstacles.append(obstacle)
+
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
         self.mesh_id = self.scene.mesh_body_id
@@ -661,6 +672,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.simulator_step()
             set_base_values_with_z(self.robot_id, base_pose, z=self.initial_pos[2] + self.random_init_z_offset)
             self.reset_object_velocities()
+            self.reset_obstacles_z()
 
             # arm should not have any collision
             collision_free = self.is_collision_free(body_a=self.robot_id,
@@ -681,6 +693,13 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         return
 
+
+    def reset_obstacles_z(self):
+        for obstacle in self.obstacles:
+            obstacle_pose = get_base_values(obstacle.body_id)
+            set_base_values_with_z(obstacle.body_id, obstacle_pose, 0.6)
+
+
     def reach_arm_subgoal(self, arm_joint_positions):
         """
         Attempt to reach arm arm_joint_positions and return success / failure
@@ -734,7 +753,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         push_vector = rotate_vector_2d(push_vector_local, -self.robots[0].get_rpy()[2])
         push_vector = np.append(push_vector, 0.0)
 
-        # push_vector = np.array([-0.5, 0.0, 0.0])
+        push_vector = np.array([-0.5, 0.0, 0.0])
 
         max_limits = [0., 0.] + get_max_limits(self.robot_id, self.arm_joint_ids)
         min_limits = [0., 0.] + get_min_limits(self.robot_id, self.arm_joint_ids)
@@ -766,6 +785,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             set_base_values_with_z(self.robot_id, base_pose,
                                    z=self.initial_pos[2] + self.random_init_z_offset)
 
+            self.reset_obstacles_z()
+
             if self.eval:
                 time.sleep(0.02)  # for visualization
 
@@ -878,9 +899,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return state, reward, done, info
 
     def reset_initial_and_target_pos(self):
-        if self.arena in ['button', 'push_door']:
+        if self.arena in ['button', 'push_door', 'obstacles']:
             floor_height = self.scene.get_floor_height(self.floor_num)
-            self.initial_pos = np.array([-3, 0.0, floor_height])
+            self.initial_pos = np.array([-2.7, 0.0, floor_height])
             self.target_pos = np.array([-5.0, 0.0, floor_height])
             self.robots[0].set_position(pos=[self.initial_pos[0],
                                              self.initial_pos[1],
@@ -902,7 +923,10 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         elif self.arena == 'push_door':
             p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=0.0, targetVelocity=0.0)
             self.door_angle = 0.0
-
+        elif self.arena == 'obstacles':
+            # reset obstacle poses
+            for i in range(len(self.obstacles)):
+                set_base_values_with_z(self.obstacles[i].body_id, [self.obstacle_poses[i][0], self.obstacle_poses[i][1], 0], 0.6)
     def reset(self):
         state = super(MotionPlanningBaseArmEnv, self).reset()
         del state['pc']
@@ -928,7 +952,7 @@ if __name__ == '__main__':
                                        action_timestep=1/500.0,
                                        physics_timestep=1/500.0,
                                        eval=args.mode == 'gui',
-                                       arena='push_door',
+                                       arena='obstacles',
                                        )
 
     for episode in range(100):
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index ddfee0c51..8dce9635a 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -969,11 +969,11 @@ def wrap_angle(theta, lower=-np.pi): # [-np.pi, np.pi)
 def circular_difference(theta2, theta1):
     return wrap_angle(theta2 - theta1)
 
-def base_values_from_pose(pose, tolerance=1e-3):
+def base_values_from_pose(pose):
     (point, quat) = pose
     x, y, _ = point
     roll, pitch, yaw = euler_from_quat(quat)
-    assert (abs(roll) < tolerance) and (abs(pitch) < tolerance)
+    #assert (abs(roll) < tolerance) and (abs(pitch) < tolerance)
     return (x, y, yaw)
 
 pose2d_from_pose = base_values_from_pose

From 8dd755c49eddcda1f62ac532c695b4ff98294c0e Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Wed, 22 Jan 2020 22:05:18 -0800
Subject: [PATCH 35/58] obstacles env

---
 gibson2/envs/motion_planner_env.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index e45e609f0..ece21ddf2 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -672,7 +672,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.simulator_step()
             set_base_values_with_z(self.robot_id, base_pose, z=self.initial_pos[2] + self.random_init_z_offset)
             self.reset_object_velocities()
-            self.reset_obstacles_z()
+
+            self.arena == 'obstacles':
+                self.reset_obstacles_z()
 
             # arm should not have any collision
             collision_free = self.is_collision_free(body_a=self.robot_id,
@@ -785,7 +787,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             set_base_values_with_z(self.robot_id, base_pose,
                                    z=self.initial_pos[2] + self.random_init_z_offset)
 
-            self.reset_obstacles_z()
+            self.arena == 'obstacles':
+                self.reset_obstacles_z()
 
             if self.eval:
                 time.sleep(0.02)  # for visualization

From 59e2388923e290cb61a06a3bd7de959a074e155a Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Wed, 22 Jan 2020 22:22:57 -0800
Subject: [PATCH 36/58] unify push_door and button_door environment, buttons
 now resides next to doors, rather than floating in the mid air

---
 gibson2/envs/motion_planner_env.py | 160 ++++++++++++++---------------
 1 file changed, 75 insertions(+), 85 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index eca68e1ca..bacea2de5 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -262,46 +262,35 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.arm_subgoal_threshold = 0.05
         self.failed_subgoal_penalty = -0.0
 
-        if self.arena == 'button_door':
-            self.button_threshold = -0.05
-            self.button_reward = 5.0
+        self.prepare_scene()
 
-            # self.button_marker = VisualMarker(visual_shape=p.GEOM_SPHERE,
-            #                                   rgba_color=[0, 1, 0, 1],
-            #                                   radius=0.3)
-            self.button = InteractiveObj(
-                os.path.join(gibson2.assets_path, 'models', 'scene_components', 'eswitch', 'eswitch.urdf'),
-                scale=2.0)
-            self.simulator.import_interactive_object(self.button, class_id=255)
-            self.button_axis_link_id = 1
-
-            self.door = InteractiveObj(
-                os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor_closed.urdf'),
-                scale=1.0)
-            self.simulator.import_interactive_object(self.door, class_id=2)
-            self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
-            self.door_axis_link_id = 1
-
-            self.wall_poses = [
+    def prepare_scene(self):
+        if self.scene.model_id == 'Avonia':
+            door_scales = [1.0, 0.9]
+            self.door_positions = [[-3.5, 0, 0.0], [-1.2, -2.47, 0.0]]
+            self.door_rotations = [np.pi / 2.0, -np.pi / 2.0]
+            wall_poses = [
                 [[-3.5, 0.45, 0.45], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')],
                 [[-3.5, -0.4, 0.45], quatToXYZW(euler2quat(0, 0, -np.pi / 2.0), 'wxyz')],
             ]
-            self.walls = []
-            for wall_pose in self.wall_poses:
-                wall = InteractiveObj(
-                    os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls_quarter.urdf'),
-                    scale=0.3)
-                self.simulator.import_interactive_object(wall, class_id=3)
-                wall.set_position_rotation(wall_pose[0], wall_pose[1])
-                self.walls += [wall]
+            self.door_target_pos = [
+                [[-5.5, -4.5], [-1.0, 1.0]],
+                [[0.5, 2.0], [-4.5, -3.0]]
+            ]
+            button_scales = [2.0, 2.0]
+            self.button_positions = [
+                [[-2.85, -2.85], [1.2, 1.7]],
+                [[-2.1, -1.6], [-2.95, -2.95]]
+            ]
+            self.button_rotations = [-np.pi / 2.0, 0.0]
+        else:
+            # TODO: handcraft environments for more scenes
+            assert False, 'model_id unknown'
 
-        elif self.arena == 'push_door':
+        if self.arena in ['push_door', 'button_door']:
+            self.door_axis_link_id = 1
             self.doors = []
-            door_scales = [1.0, 0.9]
-            door_positions = [[-3.5, 0, 0.0], [-1.2, -2.47, 0.0]]
-            door_rotations = [np.pi / 2.0, -np.pi / 2.0]
-
-            for scale, position, rotation in zip(door_scales, door_positions, door_rotations):
+            for scale, position, rotation in zip(door_scales, self.door_positions, self.door_rotations):
                 door = InteractiveObj(
                     os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
                     scale=scale)
@@ -309,24 +298,27 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 door.set_position_rotation(position, quatToXYZW(euler2quat(0, 0, rotation), 'wxyz'))
                 self.doors.append(door)
 
-            self.door_axis_link_id = 1
-            self.door_target_pos = [
-                [[-5.5, -4.5], [-1.0, 1.0]],
-                [[0.5, 2.0], [-4.5, -3.0]]
-            ]
-
-            self.wall_poses = [
-                [[-3.5, 0.45, 0.45], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')],
-                [[-3.5, -0.4, 0.45], quatToXYZW(euler2quat(0, 0, -np.pi / 2.0), 'wxyz')],
-            ]
             self.walls = []
-            for wall_pose in self.wall_poses:
+            for wall_pose in wall_poses:
                 wall = InteractiveObj(
                     os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls_quarter.urdf'),
                     scale=0.3)
                 self.simulator.import_interactive_object(wall, class_id=3)
                 wall.set_position_rotation(wall_pose[0], wall_pose[1])
-                self.walls += [wall]
+                self.walls.append(wall)
+
+        if self.arena == 'button_door':
+            self.button_axis_link_id = 1
+            self.button_threshold = -0.05
+            self.button_reward = 5.0
+
+            self.buttons = []
+            for scale in button_scales:
+                button = InteractiveObj(
+                    os.path.join(gibson2.assets_path, 'models', 'scene_components', 'eswitch', 'eswitch.urdf'),
+                    scale=scale)
+                self.simulator.import_interactive_object(button, class_id=255)
+                self.buttons.append(button)
 
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
@@ -731,27 +723,27 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             return False
 
     def stash_object_positions(self):
-        if self.arena == 'button_door':
-            self.button_pos = p.getJointState(self.button.body_id, self.button_axis_link_id)[0]
-        elif self.arena == 'push_door':
+        if self.arena in ['push_door', 'button_door']:
             for i, door in enumerate(self.doors):
                 self.door_angles[i] = p.getJointState(door.body_id, self.door_axis_link_id)[0]
+        if self.arena == 'button_door':
+            for i, button in enumerate(self.buttons):
+                self.button_states[i] = p.getJointState(button.body_id, self.button_axis_link_id)[0]
 
     def reset_object_velocities(self):
         """
         Remove any accumulated velocities or forces of objects resulting from arm motion planner
         """
-        if self.arena == 'button_door':
-            p.resetJointState(self.button.body_id, self.button_axis_link_id,
-                              targetValue=self.button_pos, targetVelocity=0.0)
-            for wall in self.walls:
-                p.resetBaseVelocity(wall.body_id, (0.0, 0.0, 0.0), (0.0, 0.0, 0.0))
-        elif self.arena == 'push_door':
+        if self.arena in ['push_door', 'button_door']:
             for door, door_angle in zip(self.doors, self.door_angles):
                 p.resetJointState(door.body_id, self.door_axis_link_id,
                                   targetValue=door_angle, targetVelocity=0.0)
             for wall in self.walls:
                 p.resetBaseVelocity(wall.body_id, (0.0, 0.0, 0.0), (0.0, 0.0, 0.0))
+        if self.arena == 'button_door':
+            for button, button_state in zip(self.buttons, self.button_states):
+                p.resetJointState(button.body_id, self.button_axis_link_id,
+                                  targetValue=button_state, targetVelocity=0.0)
 
     def interact(self, action, arm_subgoal):
         """
@@ -880,11 +872,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         done, info = self.get_termination([], info)
 
         if self.arena == 'button_door':
-            button_pos = p.getJointState(self.button.body_id, self.button_axis_link_id)[0]
-            if not self.button_pressed and button_pos < self.button_threshold:
+            button_state = p.getJointState(self.buttons[self.door_idx].body_id, self.button_axis_link_id)[0]
+            if not self.button_pressed and button_state < self.button_threshold:
                 print("OPEN DOOR")
                 self.button_pressed = True
-                self.door.set_position([100.0, 100.0, 0.0])
+                self.doors[self.door_idx].set_position([100.0, 100.0, 0.0])
                 reward += self.button_reward
         elif self.arena == 'push_door':
             new_door_angle = p.getJointState(self.doors[self.door_idx].body_id, self.door_axis_link_id)[0]
@@ -906,7 +898,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return state, reward, done, info
 
     def reset_initial_and_target_pos(self):
-        if self.arena in ['button_door', 'push_door']:
+        if self.arena in ['push_door', 'button_door']:
             floor_height = self.scene.get_floor_height(self.floor_num)
             self.initial_height = floor_height + self.random_init_z_offset
             self.initial_pos = np.array([1.2, 0.0, floor_height])
@@ -914,37 +906,35 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                              self.initial_pos[1],
                                              self.initial_height])
             self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.pi), 'wxyz'))
-            if self.arena == 'button_door':
-                self.target_pos = np.array([-5.0, 0.0, floor_height])
-            elif self.arena == 'push_door':
-                self.door_idx = np.random.randint(0, len(self.doors))
-                door_target_pos = self.door_target_pos[self.door_idx]
-                self.target_pos = np.array([
-                    np.random.uniform(door_target_pos[0][0], door_target_pos[0][1]),
-                    np.random.uniform(door_target_pos[1][0], door_target_pos[1][1]),
-                    floor_height
-                ])
+            self.door_idx = np.random.randint(0, len(self.doors))
+            door_target_pos = self.door_target_pos[self.door_idx]
+            self.target_pos = np.array([
+                np.random.uniform(door_target_pos[0][0], door_target_pos[0][1]),
+                np.random.uniform(door_target_pos[1][0], door_target_pos[1][1]),
+                floor_height
+            ])
         else:
             super(MotionPlanningBaseArmEnv, self).reset_initial_and_target_pos()
 
     def before_reset_agent(self):
-        if self.arena == 'button_door':
-            left_button = np.random.random() > 0.5
-            y = np.random.uniform(-1.5, -1) if left_button else np.random.uniform(1, 1.5)
-            self.button_obj_pos = [
-                np.random.uniform(-3.0, 0),
-                y,
-                1.5
-            ]
-            orn = 0.0 if left_button else np.pi
-            self.button.set_position_rotation(self.button_obj_pos, quatToXYZW(euler2quat(0, 0, orn), 'wxyz'))
-            p.resetJointState(self.button.body_id, self.button_axis_link_id, targetValue=0.0, targetVelocity=0.0)
-            self.door.set_position_rotation([-3.5, 0, 0.0], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz'))
-            self.button_pressed = False
-        elif self.arena == 'push_door':
+        if self.arena in ['push_door', 'button_door']:
             self.door_angles = np.zeros(len(self.doors))
-            for door, door_angle in zip(self.doors, self.door_angles):
-                p.resetJointState(door.body_id, self.door_axis_link_id, targetValue=door_angle, targetVelocity=0.0)
+            for door, angle, pos, orn in zip(self.doors, self.door_angles, self.door_positions, self.door_rotations):
+                p.resetJointState(door.body_id, self.door_axis_link_id, targetValue=angle, targetVelocity=0.0)
+                door.set_position_rotation(pos, quatToXYZW(euler2quat(0, 0, orn), 'wxyz'))
+        if self.arena == 'button_door':
+            self.button_pressed = False
+            self.button_states = np.zeros(len(self.buttons))
+            for button, button_pos_range, button_rotation, button_state in \
+                    zip(self.buttons, self.button_positions, self.button_rotations, self.button_states):
+                button_pos = np.array([
+                    np.random.uniform(button_pos_range[0][0], button_pos_range[0][1]),
+                    np.random.uniform(button_pos_range[1][0], button_pos_range[1][1]),
+                    1.5
+                ])
+                button.set_position_rotation(button_pos, quatToXYZW(euler2quat(0, 0, button_rotation), 'wxyz'))
+                p.resetJointState(button.body_id, self.button_axis_link_id,
+                                  targetValue=button_state, targetVelocity=0.0)
 
     def reset(self):
         state = super(MotionPlanningBaseArmEnv, self).reset()

From 9d52c7effaf21f4c7b2142832c0d30c6e7bb8cab Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Thu, 23 Jan 2020 17:24:40 -0800
Subject: [PATCH 37/58] change fetch default position, tilt down camera 15
 degree, lower buttons

---
 .../configs/fetch_interactive_nav_s2r_mp.yaml |  2 +-
 gibson2/core/physics/robot_locomotors.py      | 27 ++++--
 gibson2/envs/motion_planner_env.py            | 92 ++++++++++++-------
 3 files changed, 80 insertions(+), 41 deletions(-)

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp.yaml b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
index fa87a137a..e2fb77d84 100644
--- a/examples/configs/fetch_interactive_nav_s2r_mp.yaml
+++ b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
@@ -32,7 +32,7 @@ potential_reward_weight: 1.0
 electricity_reward_weight: 0.0
 stall_torque_reward_weight: 0.0
 collision_reward_weight: -0.0
-collision_ignore_link_a_ids: [0, 1, 2, 3, 13, 20, 21, 22, 23, 24, 25, 26]  # ignore collision with these agent's link ids
+collision_ignore_link_a_ids: [0, 1, 2, 3, 20, 21, 22]  # ignore collision with these agent's link ids
 
 # discount factor
 discount_factor: 0.99
diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py
index 9077bc62f..44e4ed357 100644
--- a/gibson2/core/physics/robot_locomotors.py
+++ b/gibson2/core/physics/robot_locomotors.py
@@ -578,12 +578,27 @@ class Fetch(LocomotorRobot):
             j.reset_joint_state(0.0, 0.0)
         # roll the arm to its body
         robot_id = self.robot_ids[0]
-        arm_joints = joints_from_names(robot_id, ['torso_lift_joint', 'shoulder_pan_joint', 'shoulder_lift_joint',
-                                                  'upperarm_roll_joint',
-                                                  'elbow_flex_joint', 'forearm_roll_joint', 'wrist_flex_joint',
-                                                  'wrist_roll_joint'])
-        rest_position = (0.38548146667743244, 1.1522793897208579, 1.2576467971105596, -0.312703569911879,
-                         1.7404867100093226, -0.0962895617312548, -1.4418232619629425, -1.6780152866247762)
+        arm_joints = joints_from_names(robot_id,
+                                       [
+                                           'head_tilt_joint',
+                                           'torso_lift_joint',
+                                           'shoulder_pan_joint',
+                                           'shoulder_lift_joint',
+                                           'upperarm_roll_joint',
+                                           'elbow_flex_joint',
+                                           'forearm_roll_joint',
+                                           'wrist_flex_joint',
+                                           'wrist_roll_joint'
+                                       ])
+        rest_position = (
+            np.pi / 12,
+            0.02, np.pi / 2.0,
+            np.pi / 2.0, 0.0,
+            np.pi / 2.0, 0.0,
+            np.pi / 2.0, 0.0
+        )
+        # rest_position = (0.38548146667743244, 1.1522793897208579, 1.2576467971105596, -0.312703569911879,
+        #                  1.7404867100093226, -0.0962895617312548, -1.4418232619629425, -1.6780152866247762)
 
         set_joint_positions(robot_id, arm_joints, rest_position)
 
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 634588bfe..a81b04907 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -246,19 +246,29 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                        length=0.1,
                                        initial_offset=[0, 0, 0.1 / 2.0])
         self.arm_marker.load()
-        self.arm_default_joint_positions = (0.38548146667743244, 1.1522793897208579,
-                                            1.2576467971105596, -0.312703569911879,
-                                            1.7404867100093226, -0.0962895617312548,
-                                            -1.4418232619629425, -1.6780152866247762)
+        # self.arm_default_joint_positions = (0.38548146667743244, 1.1522793897208579,
+        #                                     1.2576467971105596, -0.312703569911879,
+        #                                     1.7404867100093226, -0.0962895617312548,
+        #                                     -1.4418232619629425, -1.6780152866247762)
+        self.arm_default_joint_positions = (
+            np.pi / 12,
+            0.02, np.pi / 2.0,
+            np.pi / 2.0, 0.0,
+            np.pi / 2.0, 0.0,
+            np.pi / 2.0, 0.0
+        )
         self.arm_joint_ids = joints_from_names(self.robot_id,
-                                               ['torso_lift_joint',
-                                                'shoulder_pan_joint',
-                                                'shoulder_lift_joint',
-                                                'upperarm_roll_joint',
-                                                'elbow_flex_joint',
-                                                'forearm_roll_joint',
-                                                'wrist_flex_joint',
-                                                'wrist_roll_joint'])
+                                               [
+                                                   'head_tilt_joint',
+                                                   'torso_lift_joint',
+                                                   'shoulder_pan_joint',
+                                                   'shoulder_lift_joint',
+                                                   'upperarm_roll_joint',
+                                                   'elbow_flex_joint',
+                                                   'forearm_roll_joint',
+                                                   'wrist_flex_joint',
+                                                   'wrist_roll_joint'
+                                               ])
         self.arm_subgoal_threshold = 0.05
         self.failed_subgoal_penalty = -0.0
 
@@ -266,8 +276,14 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
     def prepare_scene(self):
         if self.scene.model_id == 'Avonia':
-            door_scales = [1.0, 0.9]
-            self.door_positions = [[-3.5, 0, 0.0], [-1.2, -2.47, 0.0]]
+            door_scales = [
+                1.0,
+                0.9,
+            ]
+            self.door_positions = [
+                [-3.5, 0, 0.0],
+                [-1.2, -2.47, 0.0],
+            ]
             self.door_rotations = [np.pi / 2.0, -np.pi / 2.0]
             wall_poses = [
                 [[-3.5, 0.45, 0.45], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')],
@@ -275,17 +291,27 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             ]
             self.door_target_pos = [
                 [[-5.5, -4.5], [-1.0, 1.0]],
-                [[0.5, 2.0], [-4.5, -3.0]]
+                [[0.5, 2.0], [-4.5, -3.0]],
             ]
 
-            button_scales = [2.0, 2.0]
+            button_scales = [
+                2.0,
+                2.0,
+            ]
             self.button_positions = [
                 [[-2.85, -2.85], [1.2, 1.7]],
                 [[-2.1, -1.6], [-2.95, -2.95]]
             ]
-            self.button_rotations = [-np.pi / 2.0, 0.0]
+            self.button_rotations = [
+                -np.pi / 2.0,
+                0.0,
+            ]
 
-            self.obstacle_poses = [[-3.5, 0.5, 0.6], [-3.5, -0.05, 0.6], [-3.5, -0.6, 0.6], [-3.5, -1.15, 0.6]]
+            self.obstacle_poses = [
+                [-3.5, 0.4, 0.6],
+                [-3.5, -0.3, 0.6],
+                [-3.5, -1.0, 0.6],
+            ]
 
             # TODO: initial_pos and target_pos sampling should also be put here (scene-specific)
 
@@ -652,12 +678,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         :param arm_subgoal: [x, y, z] in the world frame
         :return: arm joint positions
         """
-        max_limits = [0., 0.] + get_max_limits(self.robot_id, self.arm_joint_ids)
-        min_limits = [0., 0.] + get_min_limits(self.robot_id, self.arm_joint_ids)
-        rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, self.arm_joint_ids))
-        joint_range = list(np.array(max_limits) - np.array(min_limits))
-        joint_range = [item + 1 for item in joint_range]
-        joint_damping = [0.1 for _ in joint_range]
+        max_limits, min_limits, rest_position, joint_range, joint_damping = self.get_ik_parameters()
 
         n_attempt = 0
         max_attempt = 50
@@ -676,7 +697,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                                restPoses=rest_position,
                                                                jointDamping=joint_damping,
                                                                solver=p.IK_DLS,
-                                                               maxNumIterations=100)[2:10]
+                                                               maxNumIterations=100)[2:11]
             set_joint_positions(self.robot_id, self.arm_joint_ids, arm_joint_positions)
 
             dist = l2_distance(self.robots[0].get_end_effector_position(), arm_subgoal)
@@ -773,6 +794,15 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             for obstacle, obstacle_state in zip(self.obstacles, self.obstacle_states):
                 p.resetBasePositionAndOrientation(obstacle.body_id, *obstacle_state)
 
+    def get_ik_parameters(self):
+        max_limits = [0., 0., np.pi / 12] + get_max_limits(self.robot_id, self.arm_joint_ids)
+        min_limits = [0., 0., np.pi / 12] + get_min_limits(self.robot_id, self.arm_joint_ids)
+        rest_position = [0., 0., np.pi / 12] + list(get_joint_positions(self.robot_id, self.arm_joint_ids))
+        joint_range = list(np.array(max_limits) - np.array(min_limits))
+        joint_range = [item + 1 for item in joint_range]
+        joint_damping = [0.1 for _ in joint_range]
+        return max_limits, min_limits, rest_position, joint_range, joint_damping
+
     def interact(self, action, arm_subgoal):
         """
         Move the arm according to push_vector and physically simulate the interaction
@@ -786,13 +816,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         # push_vector = np.array([-0.5, 0.0, 0.0])
 
-        max_limits = [0., 0.] + get_max_limits(self.robot_id, self.arm_joint_ids)
-        min_limits = [0., 0.] + get_min_limits(self.robot_id, self.arm_joint_ids)
-        rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, self.arm_joint_ids))
-        joint_range = list(np.array(max_limits) - np.array(min_limits))
-        joint_range = [item + 1 for item in joint_range]
-        joint_damping = [0.1 for _ in joint_range]
-
+        max_limits, min_limits, rest_position, joint_range, joint_damping = self.get_ik_parameters()
         base_pose = get_base_values(self.robot_id)
 
         # self.simulator.set_timestep(0.002)
@@ -808,7 +832,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                            restPoses=rest_position,
                                                            jointDamping=joint_damping,
                                                            solver=p.IK_DLS,
-                                                           maxNumIterations=100)[2:10]
+                                                           maxNumIterations=100)[2:11]
 
             # set_joint_positions(self.robot_id, self.arm_joint_ids, joint_positions)
             control_joints(self.robot_id, self.arm_joint_ids, joint_positions)
@@ -966,7 +990,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                     button_pos = np.array([
                         np.random.uniform(button_pos_range[0][0], button_pos_range[0][1]),
                         np.random.uniform(button_pos_range[1][0], button_pos_range[1][1]),
-                        1.5
+                        1.2
                     ])
                     button.set_position_rotation(button_pos, quatToXYZW(euler2quat(0, 0, button_rotation), 'wxyz'))
                     p.resetJointState(button.body_id, self.button_axis_link_id,

From c95db38498611ef8d3814de430daf2a9d4ae3abf Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Thu, 23 Jan 2020 17:50:58 -0800
Subject: [PATCH 38/58] add move obstacle reward: moved_distance * 5.0

---
 gibson2/envs/motion_planner_env.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index a81b04907..712ec3fa3 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -938,6 +938,16 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             door_angle_diff = new_door_angle - self.door_angles[self.door_idx]
             reward += door_angle_diff
             self.door_angles[self.door_idx] = new_door_angle
+        elif self.arena == 'obstacles':
+            new_obstacles_moved_dist = 0.0
+            for obstacle, original_obstacle_pose in zip(self.obstacles, self.obstacle_poses):
+                obstacle_pose = get_base_values(obstacle.body_id)
+                new_obstacles_moved_dist += l2_distance(np.array(obstacle_pose[:2]),
+                                                        original_obstacle_pose[:2])
+            obstacles_moved_dist_diff = new_obstacles_moved_dist - self.obstacles_moved_dist
+            reward += (obstacles_moved_dist_diff * 5.0)
+            # print('obstacles_moved_dist_diff', obstacles_moved_dist_diff)
+            self.obstacles_moved_dist = new_obstacles_moved_dist
 
         if not use_base:
             set_joint_positions(self.robot_id, self.arm_joint_ids, self.arm_default_joint_positions)
@@ -999,6 +1009,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.obstacle_states = [None] * len(self.obstacles)
             for obstacle, obstacle_pose in zip(self.obstacles, self.obstacle_poses):
                 set_base_values_with_z(obstacle.body_id, [obstacle_pose[0], obstacle_pose[1], 0], 0.6)
+            self.obstacles_moved_dist = 0.0
 
     def reset(self):
         state = super(MotionPlanningBaseArmEnv, self).reset()

From 24d7cd19a1180387c3c0db7cec60cf1b29a584d8 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Fri, 24 Jan 2020 18:14:01 -0800
Subject: [PATCH 39/58] add semantic obstacles, fix fetch urdf tile camera bug,
 fix push door second door scratch floor bug

---
 gibson2/core/physics/robot_locomotors.py |  2 -
 gibson2/envs/motion_planner_env.py       | 75 ++++++++++++++++++------
 2 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py
index 44e4ed357..56c1495d3 100644
--- a/gibson2/core/physics/robot_locomotors.py
+++ b/gibson2/core/physics/robot_locomotors.py
@@ -580,7 +580,6 @@ class Fetch(LocomotorRobot):
         robot_id = self.robot_ids[0]
         arm_joints = joints_from_names(robot_id,
                                        [
-                                           'head_tilt_joint',
                                            'torso_lift_joint',
                                            'shoulder_pan_joint',
                                            'shoulder_lift_joint',
@@ -591,7 +590,6 @@ class Fetch(LocomotorRobot):
                                            'wrist_roll_joint'
                                        ])
         rest_position = (
-            np.pi / 12,
             0.02, np.pi / 2.0,
             np.pi / 2.0, 0.0,
             np.pi / 2.0, 0.0,
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 712ec3fa3..78a48f68d 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -251,7 +251,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         #                                     1.7404867100093226, -0.0962895617312548,
         #                                     -1.4418232619629425, -1.6780152866247762)
         self.arm_default_joint_positions = (
-            np.pi / 12,
             0.02, np.pi / 2.0,
             np.pi / 2.0, 0.0,
             np.pi / 2.0, 0.0,
@@ -259,7 +258,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         )
         self.arm_joint_ids = joints_from_names(self.robot_id,
                                                [
-                                                   'head_tilt_joint',
                                                    'torso_lift_joint',
                                                    'shoulder_pan_joint',
                                                    'shoulder_lift_joint',
@@ -276,13 +274,14 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
     def prepare_scene(self):
         if self.scene.model_id == 'Avonia':
+            # push_door, button_door
             door_scales = [
                 1.0,
                 0.9,
             ]
             self.door_positions = [
-                [-3.5, 0, 0.0],
-                [-1.2, -2.47, 0.0],
+                [-3.5, 0, 0.02],
+                [-1.2, -2.47, 0.02],
             ]
             self.door_rotations = [np.pi / 2.0, -np.pi / 2.0]
             wall_poses = [
@@ -294,6 +293,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 [[0.5, 2.0], [-4.5, -3.0]],
             ]
 
+            # button_door
             button_scales = [
                 2.0,
                 2.0,
@@ -307,12 +307,27 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 0.0,
             ]
 
+            # obstacles
             self.obstacle_poses = [
                 [-3.5, 0.4, 0.6],
                 [-3.5, -0.3, 0.6],
                 [-3.5, -1.0, 0.6],
             ]
 
+            # semantic_obstacles
+            self.semantic_obstacle_poses = [
+                [-3.5, 0.2, 0.6],
+                [-3.5, -1.0, 0.6],
+            ]
+            self.semantic_obstacle_masses = [
+                1.0,
+                10000.0,
+            ]
+            self.semantic_obstacle_colors = [
+                [1.0, 0.0, 0.0, 1],
+                [0.0, 1.0, 0.0, 1],
+            ]
+
             # TODO: initial_pos and target_pos sampling should also be put here (scene-specific)
 
         else:
@@ -360,6 +375,15 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 p.changeDynamics(obstacle.body_id, -1, lateralFriction=0.5)
                 self.obstacles.append(obstacle)
 
+        elif self.arena == 'semantic_obstacles':
+            self.obstacles = []
+            for pose, mass, color in \
+                    zip(self.semantic_obstacle_poses, self.semantic_obstacle_masses, self.semantic_obstacle_colors):
+                obstacle = BoxShape(pos=pose, dim=[0.3, 0.3, 0.5], mass=mass, color=color)
+                self.simulator.import_interactive_object(obstacle, class_id=4)
+                p.changeDynamics(obstacle.body_id, -1, lateralFriction=0.5)
+                self.obstacles.append(obstacle)
+
     def prepare_motion_planner(self):
         self.robot_id = self.robots[0].robot_ids[0]
         self.mesh_id = self.scene.mesh_body_id
@@ -697,7 +721,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                                restPoses=rest_position,
                                                                jointDamping=joint_damping,
                                                                solver=p.IK_DLS,
-                                                               maxNumIterations=100)[2:11]
+                                                               maxNumIterations=100)[2:10]
             set_joint_positions(self.robot_id, self.arm_joint_ids, arm_joint_positions)
 
             dist = l2_distance(self.robots[0].get_end_effector_position(), arm_subgoal)
@@ -772,7 +796,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             if self.arena == 'button_door':
                 for i, button in enumerate(self.buttons):
                     self.button_states[i] = p.getJointState(button.body_id, self.button_axis_link_id)[0]
-        elif self.arena == 'obstacles':
+        elif self.arena in ['obstacles', 'semantic_obstacles']:
             for i, obstacle in enumerate(self.obstacles):
                 self.obstacle_states[i] = p.getBasePositionAndOrientation(obstacle.body_id)
 
@@ -790,14 +814,14 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 for button, button_state in zip(self.buttons, self.button_states):
                     p.resetJointState(button.body_id, self.button_axis_link_id,
                                       targetValue=button_state, targetVelocity=0.0)
-        elif self.arena == 'obstacles':
+        elif self.arena in ['obstacles', 'semantic_obstacles']:
             for obstacle, obstacle_state in zip(self.obstacles, self.obstacle_states):
                 p.resetBasePositionAndOrientation(obstacle.body_id, *obstacle_state)
 
     def get_ik_parameters(self):
-        max_limits = [0., 0., np.pi / 12] + get_max_limits(self.robot_id, self.arm_joint_ids)
-        min_limits = [0., 0., np.pi / 12] + get_min_limits(self.robot_id, self.arm_joint_ids)
-        rest_position = [0., 0., np.pi / 12] + list(get_joint_positions(self.robot_id, self.arm_joint_ids))
+        max_limits = [0., 0.] + get_max_limits(self.robot_id, self.arm_joint_ids)
+        min_limits = [0., 0.] + get_min_limits(self.robot_id, self.arm_joint_ids)
+        rest_position = [0., 0.] + list(get_joint_positions(self.robot_id, self.arm_joint_ids))
         joint_range = list(np.array(max_limits) - np.array(min_limits))
         joint_range = [item + 1 for item in joint_range]
         joint_damping = [0.1 for _ in joint_range]
@@ -832,14 +856,14 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                            restPoses=rest_position,
                                                            jointDamping=joint_damping,
                                                            solver=p.IK_DLS,
-                                                           maxNumIterations=100)[2:11]
+                                                           maxNumIterations=100)[2:10]
 
             # set_joint_positions(self.robot_id, self.arm_joint_ids, joint_positions)
             control_joints(self.robot_id, self.arm_joint_ids, joint_positions)
             self.simulator_step()
             set_base_values_with_z(self.robot_id, base_pose, z=self.initial_height)
 
-            if self.arena == 'obstacles':
+            if self.arena in ['obstacles', 'semantic_obstacles']:
                 self.reset_obstacles_z()
 
             if self.eval:
@@ -948,6 +972,16 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             reward += (obstacles_moved_dist_diff * 5.0)
             # print('obstacles_moved_dist_diff', obstacles_moved_dist_diff)
             self.obstacles_moved_dist = new_obstacles_moved_dist
+        elif self.arena == 'semantic_obstacles':
+            new_obstacles_moved_dist = 0.0
+            for obstacle, original_obstacle_pose in zip(self.obstacles, self.semantic_obstacle_poses):
+                obstacle_pose = get_base_values(obstacle.body_id)
+                new_obstacles_moved_dist += l2_distance(np.array(obstacle_pose[:2]),
+                                                        original_obstacle_pose[:2])
+            obstacles_moved_dist_diff = new_obstacles_moved_dist - self.obstacles_moved_dist
+            reward += (obstacles_moved_dist_diff * 5.0)
+            # print('obstacles_moved_dist_diff', obstacles_moved_dist_diff)
+            self.obstacles_moved_dist = new_obstacles_moved_dist
 
         if not use_base:
             set_joint_positions(self.robot_id, self.arm_joint_ids, self.arm_default_joint_positions)
@@ -964,7 +998,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return state, reward, done, info
 
     def reset_initial_and_target_pos(self):
-        if self.arena in ['button_door', 'push_door', 'obstacles']:
+        if self.arena in ['button_door', 'push_door', 'obstacles', 'semantic_obstacles']:
             floor_height = self.scene.get_floor_height(self.floor_num)
             self.initial_height = floor_height + self.random_init_z_offset
             self.initial_pos = np.array([1.2, 0.0, floor_height])
@@ -1005,12 +1039,19 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                     button.set_position_rotation(button_pos, quatToXYZW(euler2quat(0, 0, button_rotation), 'wxyz'))
                     p.resetJointState(button.body_id, self.button_axis_link_id,
                                       targetValue=button_state, targetVelocity=0.0)
-        elif self.arena == 'obstacles':
+        elif self.arena in ['obstacles', 'semantic_obstacles']:
             self.obstacle_states = [None] * len(self.obstacles)
-            for obstacle, obstacle_pose in zip(self.obstacles, self.obstacle_poses):
-                set_base_values_with_z(obstacle.body_id, [obstacle_pose[0], obstacle_pose[1], 0], 0.6)
             self.obstacles_moved_dist = 0.0
 
+            if self.arena == 'semantic_obstacles':
+                np.random.shuffle(self.semantic_obstacle_poses)
+                obstacle_poses = self.semantic_obstacle_poses
+            else:
+                obstacle_poses = self.obstacle_poses
+
+            for obstacle, obstacle_pose in zip(self.obstacles, obstacle_poses):
+                set_base_values_with_z(obstacle.body_id, [obstacle_pose[0], obstacle_pose[1], 0], 0.6)
+
     def reset(self):
         state = super(MotionPlanningBaseArmEnv, self).reset()
         del state['pc']
@@ -1031,7 +1072,7 @@ if __name__ == '__main__':
 
     parser.add_argument('--arena',
                         '-a',
-                        choices=['button_door', 'push_door', 'obstacles'],
+                        choices=['button_door', 'push_door', 'obstacles', 'semantic_obstacles'],
                         default='push_door',
                         help='which arena to train or test (default: push_door)')
 

From 45c769e387a7dbffe16e8dd022b9fe497914d687 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Sun, 26 Jan 2020 17:08:39 -0800
Subject: [PATCH 40/58] fix semantic obstacle env, the two boxes are too far
 apart before so that the robot can squeeze through

---
 gibson2/envs/motion_planner_env.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 78a48f68d..c305edba8 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -316,8 +316,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
             # semantic_obstacles
             self.semantic_obstacle_poses = [
-                [-3.5, 0.2, 0.6],
-                [-3.5, -1.0, 0.6],
+                [-3.5, 0.15, 0.6],
+                [-3.5, -0.95, 0.6],
             ]
             self.semantic_obstacle_masses = [
                 1.0,
@@ -379,7 +379,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.obstacles = []
             for pose, mass, color in \
                     zip(self.semantic_obstacle_poses, self.semantic_obstacle_masses, self.semantic_obstacle_colors):
-                obstacle = BoxShape(pos=pose, dim=[0.3, 0.3, 0.5], mass=mass, color=color)
+                obstacle = BoxShape(pos=pose, dim=[0.35, 0.35, 0.5], mass=mass, color=color)
                 self.simulator.import_interactive_object(obstacle, class_id=4)
                 p.changeDynamics(obstacle.body_id, -1, lateralFriction=0.5)
                 self.obstacles.append(obstacle)

From bfbe5d8e261b83b46330d1e9b9ee291b4cbed91c Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Sun, 26 Jan 2020 20:51:57 -0800
Subject: [PATCH 41/58] fix a bug that fetch exploits

---
 gibson2/envs/locomotor_env.py            |  2 +-
 gibson2/envs/motion_planner_env.py       | 21 +++++++++++++++------
 gibson2/external/pybullet_tools/utils.py |  6 +++++-
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index fa82af9ba..bef2740a9 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -434,7 +434,7 @@ class NavigateEnv(BaseEnv):
                 # SICK TiM571-2050101 Laser Range Finder
                 laser_linear_range = 25.0
                 laser_angular_range = 220.0
-                min_laser_dist = 0.1
+                min_laser_dist = 0
                 laser_link_name = 'laser_link'
             else:
                 assert False, 'unknown robot for LiDAR observation'
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index c305edba8..6d07b350c 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -270,6 +270,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.arm_subgoal_threshold = 0.05
         self.failed_subgoal_penalty = -0.0
 
+        # self.n_occ_img = 0
         self.prepare_scene()
 
     def prepare_scene(self):
@@ -389,8 +390,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.mesh_id = self.scene.mesh_body_id
         self.map_size = self.scene.trav_map_original_size * self.scene.trav_map_default_resolution
 
-        self.grid_resolution = 400
-        self.occupancy_range = 8.0  # m
+        self.grid_resolution = 800
+        self.occupancy_range = 5.0  # m
         robot_footprint_radius = 0.279
         self.robot_footprint_radius_in_map = int(robot_footprint_radius / self.occupancy_range * self.grid_resolution)
 
@@ -435,7 +436,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             # SICK TiM571-2050101 Laser Range Finder
             laser_linear_range = 25.0
             laser_angular_range = 220.0
-            min_laser_dist = 0.1
+            min_laser_dist = 0
             laser_link_name = 'laser_link'
 
         laser_angular_half_range = laser_angular_range / 2.0
@@ -451,6 +452,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         scan = state['scan']
 
         scan_laser = unit_vector_laser * (scan * (laser_linear_range - min_laser_dist) + min_laser_dist)
+        scan_laser = np.concatenate([np.array([[0, 0 ,0]]), scan_laser, np.array([[0, 0, 0]])], axis=0)
 
         laser_translation = laser_pose[:3]
         laser_rotation = quat2mat([laser_pose[6], laser_pose[3], laser_pose[4], laser_pose[5]])
@@ -460,7 +462,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         base_rotation = quat2mat([base_pose[6], base_pose[3], base_pose[4], base_pose[5]])
         scan_local = base_rotation.T.dot((scan_world - base_translation).T).T
         scan_local = scan_local[:, :2]
-        scan_local = np.concatenate([np.array([[0, 0]]), scan_local, np.array([[0, 0]])], axis=0)
 
         # flip y axis
         scan_local[:, 1] *= -1
@@ -469,10 +470,19 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 self.grid_resolution / 2)
         scan_local_in_map = scan_local_in_map.reshape((1, -1, 1, 2)).astype(np.int32)
         cv2.fillPoly(occupancy_grid, scan_local_in_map, True, 1)
+        #kernel = np.ones((7, 7), np.uint8) # require 6cm of clearance
+        #occupancy_grid = cv2.erode(occupancy_grid, kernel, iterations=1)
+
         cv2.circle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2),
                    int(self.robot_footprint_radius_in_map), 1, -1)
 
-        # cv2.imwrite('occupancy_grid.png', occupancy_grid)
+        cv2.rectangle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2 - int(self.robot_footprint_radius_in_map + 2)),
+                      (self.grid_resolution // 2 + int(self.robot_footprint_radius_in_map), self.grid_resolution // 2 + int(self.robot_footprint_radius_in_map + 2)), \
+                      1, -1)
+
+        # self.n_occ_img += 1
+        # cv2.imwrite('occupancy_grid{:04d}.png'.format(self.n_occ_img), (occupancy_grid * 200).astype(np.uint8))
+
         return occupancy_grid
 
     def get_additional_states(self):
@@ -611,7 +621,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         :return: whether base_subgoal is achieved
         """
         original_pos = get_base_values(self.robot_id)
-
         path = self.plan_base_motion_2d(base_subgoal_pos[0], base_subgoal_pos[1], base_subgoal_orn)
         if path is not None:
             # print('base mp success')
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index de0b345b2..1474edd5c 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -2702,7 +2702,7 @@ def plan_base_motion(body, end_conf, base_limits, obstacles=[], direct=False,
 
 def plan_base_motion_2d(body, end_conf, base_limits, map_2d, occupancy_range, grid_resolution, robot_footprint_radius_in_map,
                         obstacles=[], direct=False, weights=1 * np.ones(3), resolutions=0.05 * np.ones(3),
-                        max_distance=MAX_DISTANCE, **kwargs):
+                        max_distance=MAX_DISTANCE, min_goal_dist = 0.02, **kwargs):
     def sample_fn():
         x, y = np.random.uniform(*base_limits)
         theta = np.random.uniform(*CIRCULAR_LIMITS)
@@ -2739,6 +2739,10 @@ def plan_base_motion_2d(body, end_conf, base_limits, map_2d, occupancy_range, gr
 
     start_conf = get_base_values(body)
 
+    if np.abs(start_conf[0] - end_conf[0]) < min_goal_dist and np.abs(start_conf[1] - end_conf[1]) < min_goal_dist: 
+        # do not do plans that is smaller than 30mm
+        return None
+
     def collision_fn(q):
         # TODO: update this function
         # set_base_values(body, q)

From 4c8cdf24a9d43cdf5e5d82522794072587efe39a Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Sun, 26 Jan 2020 23:23:09 -0800
Subject: [PATCH 42/58] refactor locomotor_env, add sensor noise, add action
 noise, improve interaction speed

---
 .../configs/fetch_interactive_nav_s2r_mp.yaml |   2 +
 gibson2/envs/locomotor_env.py                 | 325 +++++++++---------
 gibson2/envs/motion_planner_env.py            |  84 +++--
 3 files changed, 233 insertions(+), 178 deletions(-)

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp.yaml b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
index e2fb77d84..1b11f1651 100644
--- a/examples/configs/fetch_interactive_nav_s2r_mp.yaml
+++ b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
@@ -48,6 +48,8 @@ resolution: 128
 fov: 120
 n_horizontal_rays: 220
 n_vertical_beams: 1
+scan_noise_rate: 0.0
+depth_noise_rate: 0.0
 
 # display
 use_filler: true
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index fa82af9ba..8462955fb 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -126,9 +126,13 @@ class NavigateEnv(BaseEnv):
 
         # discount factor
         self.discount_factor = self.config.get('discount_factor', 1.0)
+
+        # output: sensors
         self.output = self.config['output']
         self.n_horizontal_rays = self.config.get('n_horizontal_rays', 128)
         self.n_vertical_beams = self.config.get('n_vertical_beams', 9)
+        self.scan_noise_rate = self.config.get('scan_noise_rate', 0.0)
+        self.depth_noise_rate = self.config.get('depth_noise_rate', 0.0)
 
         # TODO: sensor: observations that are passed as network input, e.g. target position in local frame
         # TODO: auxiliary sensor: observations that are not passed as network input, but used to maintain the same
@@ -298,6 +302,163 @@ class NavigateEnv(BaseEnv):
     def get_auxiliary_sensor(self, collision_links=[]):
         return np.array([])
 
+    def add_naive_noise_to_sensor(self, sensor_reading, noise_rate):
+        if noise_rate <= 0.0:
+            return sensor_reading
+
+        assert len(sensor_reading[(sensor_reading < 0.0) | (sensor_reading > 1.0)]) == 0,\
+            'sensor reading has to be between [0.0, 1.0]'
+
+        valid_mask = np.random.choice(2, sensor_reading.shape, p=[noise_rate, 1.0 - noise_rate])
+        # set invalid values to be the maximum range (e.g. depth and scan)
+        sensor_reading[valid_mask == 0] = 1.0
+        return sensor_reading
+
+    def get_depth(self):
+        depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
+        if self.config['robot'] == 'Turtlebot':
+            # ASUS Xtion PRO LIVE
+            low = 0.8
+            high = 3.5
+        elif self.config['robot'] == 'Fetch':
+            # Primesense Carmine 1.09 short-range RGBD sensor
+            low = 0.0
+            high = 20.0  # http://xtionprolive.com/primesense-carmine-1.09
+            # high = 1.4  # https://www.i3du.gr/pdf/primesense.pdf
+        else:
+            assert False, 'unknown robot for RGBD observation'
+
+        invalid = depth == 0.0
+        depth[depth < low] = low
+        depth[depth > high] = high
+        # depth[invalid] = high
+
+        # re-scale depth to [0.0, 1.0]
+        depth = (depth - low) / (high - low)
+        depth = self.add_naive_noise_to_sensor(depth, self.depth_noise_rate)
+
+        return depth
+
+    def get_rgb(self):
+        return self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
+
+    def get_pc(self):
+        return self.simulator.renderer.render_robot_cameras(modes=('3d'))[0]
+
+    def get_normal(self):
+        return self.simulator.renderer.render_robot_cameras(modes='normal')
+
+    def get_seg(self):
+        seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
+        if self.num_object_classes is not None:
+            seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
+        return seg
+
+    def get_seg_pred(self):
+        rgb = self.get_rgb()
+        with torch.no_grad():
+            width = rgb.shape[0]
+            height = int(width * (480.0 / 640.0))
+            half_diff = int((width - height) / 2)
+            rgb_cropped = rgb[half_diff:half_diff + height, :]
+            rgb_cropped = (rgb_cropped * 255).astype(np.uint8)
+            tensor = transforms.ToTensor()(rgb_cropped)
+            img = self.seg_normalizer(tensor).unsqueeze(0).cuda()
+            seg_pred = self.seg_encoder(img)
+            seg_pred = seg_pred[0][0].permute(1, 2, 0).cpu().numpy()
+            return seg_pred
+
+            # # visualize predicted segmentation mask
+            # scores = self.seg_decoder(self.seg_encoder(img, return_feature_maps=True), segSize=(height, width))
+            # _, pred = torch.max(scores, dim=1)
+            # pred = pred.squeeze(0).cpu().numpy().astype(np.int32)
+            # pred_color = colorEncode(pred, self.seg_colors).astype(np.uint8)
+            # pred_color = cv2.cvtColor(pred_color, cv2.COLOR_RGB2BGR)
+            #
+            # depth_cropped = depth[half_diff:half_diff + height, :]
+            # low = 0.8
+            # high = 3.5
+            # invalid = depth_cropped == 0.0
+            # depth_cropped[depth_cropped < low] = low
+            # depth_cropped[depth_cropped > high] = high
+            # depth_cropped[invalid] = 0.0
+            # depth_cropped /= high
+            # depth_cropped = (depth_cropped * 255).astype(np.uint8)
+            # depth_cropped = np.tile(depth_cropped, (1, 1, 3))
+            #
+            # rgb_cropped = cv2.cvtColor(rgb_cropped, cv2.COLOR_RGB2BGR)
+            #
+            # vis = np.concatenate((rgb_cropped, depth_cropped, pred_color), axis=1)
+            # cv2.imshow('vis', vis)
+
+    def get_scan(self):
+        if self.config['robot'] == 'Turtlebot':
+            # Hokuyo URG-04LX-UG01
+            laser_linear_range = 5.6
+            laser_angular_range = 240.0
+            min_laser_dist = 0.05
+            laser_link_name = 'scan_link'
+        elif self.config['robot'] == 'Fetch':
+            # SICK TiM571-2050101 Laser Range Finder
+            laser_linear_range = 25.0
+            laser_angular_range = 220.0
+            min_laser_dist = 0.1
+            laser_link_name = 'laser_link'
+        else:
+            assert False, 'unknown robot for LiDAR observation'
+
+        assert self.n_vertical_beams == 1, 'scan can only handle one vertical beam for now.'
+
+        laser_angular_half_range = laser_angular_range / 2.0
+        laser_pose = self.robots[0].parts[laser_link_name].get_pose()
+
+        # self.scan_vis.set_position(pos=laser_pose[:3])
+
+        transform_matrix = quat2mat([laser_pose[6], laser_pose[3], laser_pose[4], laser_pose[5]])  # [x, y, z, w]
+        angle = np.arange(-laser_angular_half_range / 180 * np.pi,
+                          laser_angular_half_range / 180 * np.pi,
+                          laser_angular_range / 180.0 * np.pi / self.n_horizontal_rays)
+        unit_vector_local = np.array([[np.cos(ang), np.sin(ang), 0.0] for ang in angle])
+        unit_vector_world = transform_matrix.dot(unit_vector_local.T).T
+        start_pose = np.tile(laser_pose[:3], (self.n_horizontal_rays, 1))
+        start_pose += unit_vector_world * min_laser_dist
+        end_pose = laser_pose[:3] + unit_vector_world * laser_linear_range
+        results = p.rayTestBatch(start_pose, end_pose, 6)  # numThreads = 6
+        # hit_object_id = np.array([item[0] for item in results])
+        # link_id = np.array([item[1] for item in results])
+        hit_fraction = np.array([item[2] for item in results])  # hit fraction = [0.0, 1.0] of laser_linear_range
+        hit_fraction = self.add_naive_noise_to_sensor(hit_fraction, self.scan_noise_rate)
+
+        scan = np.expand_dims(hit_fraction, 1)
+        return scan
+
+        # assert 'scan_link' in self.robots[0].parts, "Requested scan but no scan_link"
+        # pose_camera = self.robots[0].parts['scan_link'].get_pose()
+        # angle = np.arange(0, 2 * np.pi, 2 * np.pi / float(self.n_horizontal_rays))
+        # elev_bottom_angle = -30. * np.pi / 180.
+        # elev_top_angle = 10. * np.pi / 180.
+        # elev_angle = np.arange(elev_bottom_angle, elev_top_angle,
+        #                        (elev_top_angle - elev_bottom_angle) / float(self.n_vertical_beams))
+        # orig_offset = np.vstack([
+        #     np.vstack([np.cos(angle),
+        #                np.sin(angle),
+        #                np.repeat(np.tan(elev_ang), angle.shape)]).T for elev_ang in elev_angle
+        # ])
+        # transform_matrix = quat2mat([pose_camera[-1], pose_camera[3], pose_camera[4], pose_camera[5]])
+        # offset = orig_offset.dot(np.linalg.inv(transform_matrix))
+        # pose_camera = pose_camera[None, :3].repeat(self.n_horizontal_rays * self.n_vertical_beams, axis=0)
+        #
+        # results = p.rayTestBatch(pose_camera, pose_camera + offset * 30)
+        # hit = np.array([item[0] for item in results])
+        # dist = np.array([item[2] for item in results])
+        #
+        # valid_pts = (dist < 1. - 1e-5) & (dist > 0.1 / 30) & (hit != self.robots[0].robot_ids[0]) & (hit != -1)
+        # dist[~valid_pts] = 0.0  # zero out invalid pts
+        # dist *= 30
+        #
+        # xyz = np.expand_dims(dist, 1) * orig_offset
+        # state['scan'] = xyz
+
     def get_state(self, collision_links=[]):
         # calculate state
         sensor_state = self.get_additional_states()
@@ -328,9 +489,6 @@ class NavigateEnv(BaseEnv):
         # cv2.imshow('seg', seg)
 
         state = OrderedDict()
-        rgb = None
-        depth = None
-        seg = None
         if 'sensor' in self.output:
             state['sensor'] = sensor_state
         if 'auxiliary_sensor' in self.output:
@@ -338,37 +496,22 @@ class NavigateEnv(BaseEnv):
         if 'pointgoal' in self.output:
             state['pointgoal'] = sensor_state[:2]
         if 'rgb' in self.output:
-            if rgb is None:
-                rgb = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
-            state['rgb'] = rgb
+            state['rgb'] = self.get_rgb()
         if 'depth' in self.output:
-            if depth is None:
-                depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
-            state['depth'] = depth
+            state['depth'] = self.get_depth()
         if 'pc' in self.output:
-            pc = self.simulator.renderer.render_robot_cameras(modes=('3d'))[0]
-            state['pc'] = pc
+            state['pc'] = self.get_pc()
         if 'rgbd' in self.output:
-            if rgb is None:
-                rgb = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
-            if depth is None:
-                depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
+            rgb = self.get_rgb()
+            depth = self.get_depth()
             state['rgbd'] = np.concatenate((rgb, depth), axis=2)
         if 'normal' in self.output:
-            state['normal'] = self.simulator.renderer.render_robot_cameras(modes='normal')
+            state['normal'] = self.get_normal()
         if 'seg' in self.output:
-            if seg is None:
-                seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
-            if self.num_object_classes is not None:
-                seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
-            state['seg'] = seg
+            state['seg'] = self.get_seg()
         if 'depth_seg' in self.output:
-            if depth is None:
-                depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3]
-            depth = np.clip(depth / 5.0, 0.0, 1.0)
-            seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1]
-            if self.num_object_classes is not None:
-                seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0)
+            depth = self.get_depth()
+            seg = self.get_seg()
             depth_seg = np.concatenate((depth, seg), axis=2)
             state['depth_seg'] = depth_seg
         if 'rgb_filled' in self.output:
@@ -377,115 +520,11 @@ class NavigateEnv(BaseEnv):
                 rgb_filled = self.comp(tensor[None, :, :, :])[0].permute(1, 2, 0).cpu().numpy()
                 state['rgb_filled'] = rgb_filled
         if 'seg_pred' in self.output:
-            if rgb is None:
-                rgb = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3]
-            with torch.no_grad():
-                width = rgb.shape[0]
-                height = int(width * (480.0 / 640.0))
-                half_diff = int((width - height) / 2)
-                rgb_cropped = rgb[half_diff:half_diff + height, :]
-                rgb_cropped = (rgb_cropped * 255).astype(np.uint8)
-                tensor = transforms.ToTensor()(rgb_cropped)
-                img = self.seg_normalizer(tensor).unsqueeze(0).cuda()
-                seg_pred = self.seg_encoder(img)
-                seg_pred = seg_pred[0][0].permute(1, 2, 0).cpu().numpy()
-                state['seg_pred'] = seg_pred
-
-                scores = self.seg_decoder(self.seg_encoder(img, return_feature_maps=True), segSize=(height, width))
-                _, pred = torch.max(scores, dim=1)
-                pred = pred.squeeze(0).cpu().numpy().astype(np.int32)
-                pred_color = colorEncode(pred, self.seg_colors).astype(np.uint8)
-                pred_color = cv2.cvtColor(pred_color, cv2.COLOR_RGB2BGR)
-
-                depth_cropped = depth[half_diff:half_diff + height, :]
-                low = 0.8
-                high = 3.5
-                invalid = depth_cropped == 0.0
-                depth_cropped[depth_cropped < low] = low
-                depth_cropped[depth_cropped > high] = high
-                depth_cropped[invalid] = 0.0
-                depth_cropped /= high
-                depth_cropped = (depth_cropped * 255).astype(np.uint8)
-                depth_cropped = np.tile(depth_cropped, (1, 1, 3))
-
-                rgb_cropped = cv2.cvtColor(rgb_cropped, cv2.COLOR_RGB2BGR)
-
-                vis = np.concatenate((rgb_cropped, depth_cropped, pred_color), axis=1)
-                cv2.imshow('vis', vis)
-                #
-                # # aggregate images and save
-                # im_vis = np.concatenate((rgb_cropped, pred_color), axis=1)
-                # if self.current_step % 10 == 0:
-                #     img_name = 'seg_results_area1/%d_%d.png' % (self.current_episode, self.current_step)
-                #     Image.fromarray(im_vis).save(img_name)
-
+            state['seg_pred'] = self.get_seg_pred()
         if 'pointgoal' in self.output:
             state['pointgoal'] = sensor_state[:2]
-
-        # TODO: figure out why 'scan' consumes so much cpu
         if 'scan' in self.output:
-            if self.config['robot'] == 'Turtlebot':
-                # Hokuyo URG-04LX-UG01
-                laser_linear_range = 5.6
-                laser_angular_range = 240.0
-                min_laser_dist = 0.05
-                laser_link_name = 'scan_link'
-            elif self.config['robot'] == 'Fetch':
-                # SICK TiM571-2050101 Laser Range Finder
-                laser_linear_range = 25.0
-                laser_angular_range = 220.0
-                min_laser_dist = 0.1
-                laser_link_name = 'laser_link'
-            else:
-                assert False, 'unknown robot for LiDAR observation'
-
-            laser_angular_half_range = laser_angular_range / 2.0
-            laser_pose = self.robots[0].parts[laser_link_name].get_pose()
-
-            # self.scan_vis.set_position(pos=laser_pose[:3])
-
-            transform_matrix = quat2mat([laser_pose[6], laser_pose[3], laser_pose[4], laser_pose[5]])  # [x, y, z, w]
-            angle = np.arange(-laser_angular_half_range / 180 * np.pi,
-                              laser_angular_half_range / 180 * np.pi,
-                              laser_angular_range / 180.0 * np.pi / self.n_horizontal_rays)
-            unit_vector_local = np.array([[np.cos(ang), np.sin(ang), 0.0] for ang in angle])
-            unit_vector_world = transform_matrix.dot(unit_vector_local.T).T
-            start_pose = np.tile(laser_pose[:3], (self.n_horizontal_rays, 1))
-            start_pose += unit_vector_world * min_laser_dist
-            end_pose = laser_pose[:3] + unit_vector_world * laser_linear_range
-            results = p.rayTestBatch(start_pose, end_pose, 6)  # numThreads = 6
-            # hit_object_id = np.array([item[0] for item in results])
-            # link_id = np.array([item[1] for item in results])
-            hit_fraction = np.array([item[2] for item in results])  # hit fraction = [0.0, 1.0] of laser_linear_range
-            state['scan'] = np.expand_dims(hit_fraction, 1)
-
-            # assert 'scan_link' in self.robots[0].parts, "Requested scan but no scan_link"
-            # pose_camera = self.robots[0].parts['scan_link'].get_pose()
-            # angle = np.arange(0, 2 * np.pi, 2 * np.pi / float(self.n_horizontal_rays))
-            # elev_bottom_angle = -30. * np.pi / 180.
-            # elev_top_angle = 10. * np.pi / 180.
-            # elev_angle = np.arange(elev_bottom_angle, elev_top_angle,
-            #                        (elev_top_angle - elev_bottom_angle) / float(self.n_vertical_beams))
-            # orig_offset = np.vstack([
-            #     np.vstack([np.cos(angle),
-            #                np.sin(angle),
-            #                np.repeat(np.tan(elev_ang), angle.shape)]).T for elev_ang in elev_angle
-            # ])
-            # transform_matrix = quat2mat([pose_camera[-1], pose_camera[3], pose_camera[4], pose_camera[5]])
-            # offset = orig_offset.dot(np.linalg.inv(transform_matrix))
-            # pose_camera = pose_camera[None, :3].repeat(self.n_horizontal_rays * self.n_vertical_beams, axis=0)
-            #
-            # results = p.rayTestBatch(pose_camera, pose_camera + offset * 30)
-            # hit = np.array([item[0] for item in results])
-            # dist = np.array([item[2] for item in results])
-            #
-            # valid_pts = (dist < 1. - 1e-5) & (dist > 0.1 / 30) & (hit != self.robots[0].robot_ids[0]) & (hit != -1)
-            # dist[~valid_pts] = 0.0  # zero out invalid pts
-            # dist *= 30
-            #
-            # xyz = np.expand_dims(dist, 1) * orig_offset
-            # state['scan'] = xyz
-
+            state['scan'] = self.get_scan()
         return state
 
     def run_simulation(self):
@@ -1163,26 +1202,6 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
             height = int(width * (480.0 / 640.0))
             half_diff = int((width - height) / 2)
             rgbd = rgbd[half_diff:half_diff+height, :]
-
-            if self.config['robot'] == 'Turtlebot':
-                # ASUS Xtion PRO LIVE
-                low = 0.8
-                high = 3.5
-            elif self.config['robot'] == 'Fetch':
-                # Primesense Carmine 1.09 short-range RGBD sensor
-                low = 0.35
-                high = 1.4
-            else:
-                assert False, 'unknown robot for RGBD observation'
-
-            depth = rgbd[:, :, 3]
-            invalid = depth == 0.0
-            depth[depth < low] = low
-            depth[depth > high] = high
-            depth[invalid] = 0.0
-            depth /= high
-            rgbd[:, :, 3] = depth
-
             state['rgbd'] = rgbd
         # cv2.imshow('depth', state['depth'])
         # cv2.imshow('scan', state['scan'])
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index c305edba8..2bf8bbb33 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -246,6 +246,14 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                        length=0.1,
                                        initial_offset=[0, 0, 0.1 / 2.0])
         self.arm_marker.load()
+
+        self.arm_interact_marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
+                                                rgba_color=[1, 0, 1, 1],
+                                                radius=0.1,
+                                                length=0.1,
+                                                initial_offset=[0, 0, 0.1 / 2.0])
+        self.arm_interact_marker.load()
+
         # self.arm_default_joint_positions = (0.38548146667743244, 1.1522793897208579,
         #                                     1.2576467971105596, -0.312703569911879,
         #                                     1.7404867100093226, -0.0962895617312548,
@@ -272,6 +280,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         self.prepare_scene()
 
+        self.times = {}
+        for key in ['get_base_subgoal', 'reach_base_subgoal', 'get_arm_subgoal', 'stash_object_states',
+                    'get_arm_joint_positions', 'reach_arm_subgoal', 'reset_object_states', 'interact']:
+            self.times[key] = []
+
     def prepare_scene(self):
         if self.scene.model_id == 'Avonia':
             # push_door, button_door
@@ -549,18 +562,14 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
     def get_state(self, collision_links=[]):
         state = super(MotionPlanningBaseArmEnv, self).get_state(collision_links)
-        for modality in ['depth', 'pc']:
-            if modality in state:
-                img = state[modality]
-                # width = img.shape[0]
-                # height = int(width * (480.0 / 640.0))
-                # half_diff = int((width - height) / 2)
-                # img = img[half_diff:half_diff+height, :]
-                if modality == 'depth':
-                    high = 20.0
-                    img[img > high] = high
-                    img /= high
-                state[modality] = img
+        # for modality in ['depth', 'pc']:
+        #     if modality in state:
+        #         img = state[modality]
+        #         width = img.shape[0]
+        #         height = int(width * (480.0 / 640.0))
+        #         half_diff = int((width - height) / 2)
+        #         img = img[half_diff:half_diff+height, :]
+        #         state[modality] = img
 
         # cv2.imshow('depth', state['depth'])
         # cv2.imshow('scan', state['scan'])
@@ -637,12 +646,14 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         :return: whether base_subgoal is achieved
         """
         # print('base')
-        # start = time.time()
+        start = time.time()
         base_subgoal_pos, base_subgoal_orn = self.get_base_subgoal(action)
+        self.times['get_base_subgoal'].append(time.time() - start)
         # print('get_base_subgoal', time.time() - start)
 
-        # start = time.time()
+        start = time.time()
         subgoal_success = self.reach_base_subgoal(base_subgoal_pos, base_subgoal_orn)
+        self.times['reach_base_subgoal'].append(time.time() - start)
         # print('reach_base_subgoal', time.time() - start)
 
         return subgoal_success
@@ -666,6 +677,12 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                         quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
         arm_subgoal = transform_mat.dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
         self.arm_marker.set_position(arm_subgoal)
+
+        push_vector_local = np.array([action[6], action[7]])  # [-1.0, 1.0]
+        push_vector = rotate_vector_2d(push_vector_local, -self.robots[0].get_rpy()[2])
+        push_vector = np.append(push_vector, 0.0)
+        self.arm_interact_marker.set_position(arm_subgoal + push_vector)
+
         return arm_subgoal
 
     def is_collision_free(self, body_a, link_a_list, body_b=None, link_b_list=None):
@@ -844,8 +861,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         base_pose = get_base_values(self.robot_id)
 
         # self.simulator.set_timestep(0.002)
-        for i in range(100):
-            push_goal = np.array(arm_subgoal) + push_vector * i / 100.0
+        steps = 50
+        for i in range(steps):
+            push_goal = np.array(arm_subgoal) + push_vector * (i + 1) / float(steps)
 
             joint_positions = p.calculateInverseKinematics(self.robot_id,
                                                            self.robots[0].parts['gripper_link'].body_part_index,
@@ -876,8 +894,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         :return: whether arm_subgoal is achieved
         """
         # print('arm')
-        # start = time.time()
+        start = time.time()
         arm_subgoal = self.get_arm_subgoal(action)
+        self.times['get_arm_subgoal'].append(time.time() - start)
         # print('get_arm_subgoal', time.time() - start)
 
         # start = time.time()
@@ -885,29 +904,36 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # state_id = p.saveState()
         # print('saveState', time.time() - start)
 
+        start = time.time()
         self.stash_object_states()
+        self.times['stash_object_states'].append(time.time() - start)
 
-        # start = time.time()
+        start = time.time()
         arm_joint_positions = self.get_arm_joint_positions(arm_subgoal)
+        self.times['get_arm_joint_positions'].append(time.time() - start)
         # print('get_arm_joint_positions', time.time() - start)
 
-        # start = time.time()
+        start = time.time()
         subgoal_success = self.reach_arm_subgoal(arm_joint_positions)
+        self.times['reach_arm_subgoal'].append(time.time() - start)
         # print('reach_arm_subgoal', time.time() - start)
 
         # start = time.time()
         # p.restoreState(stateId=state_id)
         # print('restoreState', time.time() - start)
 
-        # start = time.time()
+        start = time.time()
         self.reset_object_states()
+        self.times['reset_object_states'].append(time.time() - start)
+
         # print('reset_object_velocities', time.time() - start)
 
         if subgoal_success:
             # set_joint_positions(self.robot_id, self.arm_joint_ids, arm_joint_positions)
 
-            # start = time.time()
+            start = time.time()
             self.interact(action, arm_subgoal)
+            self.times['interact'].append(time.time() - start)
             # print('interact', time.time() - start)
 
         return subgoal_success
@@ -923,15 +949,19 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # action[5] = arm_img_v
         # action[6] = arm_push_vector_x
         # action[7] = arm_push_vector_y
-
+        # print('-' * 20)
         self.current_step += 1
         use_base = action[0] > 0.0
+
+        # add action noise before execution
+        # action[1:] = np.clip(action[1:] + np.random.normal(0.0, 0.05, action.shape[0] - 1), -1.0, 1.0)
+
         if use_base:
             subgoal_success = self.move_base(action)
+            print('move_base:', subgoal_success)
         else:
             subgoal_success = self.move_arm(action)
-
-        # print('subgoal success', subgoal_success)
+            print('move_arm:', subgoal_success)
 
         return self.compute_next_step(action, use_base, subgoal_success)
 
@@ -1086,7 +1116,7 @@ if __name__ == '__main__':
                                        arena=args.arena,
                                        )
 
-    for episode in range(100):
+    for episode in range(20):
         print('Episode: {}'.format(episode))
         start = time.time()
         state = nav_env.reset()
@@ -1106,4 +1136,8 @@ if __name__ == '__main__':
                 print('Episode finished after {} timesteps'.format(i + 1))
                 break
         print(time.time() - start)
+
+    for key in nav_env.times:
+        print(key, len(nav_env.times[key]), np.mean(nav_env.times[key]))
+
     nav_env.clean()

From 71a4e516672ac9560eb337131c670925d58ed71d Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Mon, 27 Jan 2020 00:26:45 -0800
Subject: [PATCH 43/58] add sensor noise, correct fov and depth camera range,
 increase obstacle height, revert occupancy grid

---
 .../configs/fetch_interactive_nav_s2r_mp.yaml |  6 +-
 gibson2/envs/locomotor_env.py                 |  6 +-
 gibson2/envs/motion_planner_env.py            | 88 ++++++++++---------
 3 files changed, 54 insertions(+), 46 deletions(-)

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp.yaml b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
index 1b11f1651..5bb9b5eb8 100644
--- a/examples/configs/fetch_interactive_nav_s2r_mp.yaml
+++ b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
@@ -45,11 +45,11 @@ max_step: 20
 # sensor
 output: [sensor, rgb, depth, scan, pc]
 resolution: 128
-fov: 120
+fov: 57.5
 n_horizontal_rays: 220
 n_vertical_beams: 1
-scan_noise_rate: 0.0
-depth_noise_rate: 0.0
+scan_noise_rate: 0.02
+depth_noise_rate: 0.02
 
 # display
 use_filler: true
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 5074434fb..161561469 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -322,8 +322,8 @@ class NavigateEnv(BaseEnv):
             high = 3.5
         elif self.config['robot'] == 'Fetch':
             # Primesense Carmine 1.09 short-range RGBD sensor
-            low = 0.0
-            high = 20.0  # http://xtionprolive.com/primesense-carmine-1.09
+            low = 0.35
+            high = 3.0  # http://xtionprolive.com/primesense-carmine-1.09
             # high = 1.4  # https://www.i3du.gr/pdf/primesense.pdf
         else:
             assert False, 'unknown robot for RGBD observation'
@@ -331,7 +331,7 @@ class NavigateEnv(BaseEnv):
         invalid = depth == 0.0
         depth[depth < low] = low
         depth[depth > high] = high
-        # depth[invalid] = high
+        depth[invalid] = high
 
         # re-scale depth to [0.0, 1.0]
         depth = (depth - low) / (high - low)
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 07803b9b0..c3dfacfe3 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -188,20 +188,20 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                        random_height=False,
                                                        device_idx=device_idx)
 
-        # # real sensor spec for Fetch
-        # resolution = self.config.get('resolution', 64)
-        # width = resolution
-        # height = int(width * (480.0 / 640.0))
-        # if 'rgb' in self.output:
-        #     self.observation_space.spaces['rgb'] = gym.spaces.Box(low=0.0,
-        #                                                           high=1.0,
-        #                                                           shape=(height, width, 3),
-        #                                                           dtype=np.float32)
-        # if 'depth' in self.output:
-        #     self.observation_space.spaces['depth'] = gym.spaces.Box(low=0.0,
-        #                                                             high=1.0,
-        #                                                             shape=(height, width, 1),
-        #                                                             dtype=np.float32)
+        # real sensor spec for Fetch
+        resolution = self.config.get('resolution', 64)
+        width = resolution
+        height = int(width * (480.0 / 640.0))
+        if 'rgb' in self.output:
+            self.observation_space.spaces['rgb'] = gym.spaces.Box(low=0.0,
+                                                                  high=1.0,
+                                                                  shape=(height, width, 3),
+                                                                  dtype=np.float32)
+        if 'depth' in self.output:
+            self.observation_space.spaces['depth'] = gym.spaces.Box(low=0.0,
+                                                                    high=1.0,
+                                                                    shape=(height, width, 1),
+                                                                    dtype=np.float32)
 
         self.arena = arena
         self.eval = eval
@@ -323,15 +323,15 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
             # obstacles
             self.obstacle_poses = [
-                [-3.5, 0.4, 0.6],
-                [-3.5, -0.3, 0.6],
-                [-3.5, -1.0, 0.6],
+                [-3.5, 0.4, 0.7],
+                [-3.5, -0.3, 0.7],
+                [-3.5, -1.0, 0.7],
             ]
 
             # semantic_obstacles
             self.semantic_obstacle_poses = [
-                [-3.5, 0.15, 0.6],
-                [-3.5, -0.95, 0.6],
+                [-3.5, 0.15, 0.7],
+                [-3.5, -0.95, 0.7],
             ]
             self.semantic_obstacle_masses = [
                 1.0,
@@ -384,7 +384,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         elif self.arena == 'obstacles':
             self.obstacles = []
             for obstacle_pose in self.obstacle_poses:
-                obstacle = BoxShape(pos=obstacle_pose, dim=[0.25, 0.25, 0.5], mass=10, color=[1, 0.64, 0, 1])
+                obstacle = BoxShape(pos=obstacle_pose, dim=[0.25, 0.25, 0.6], mass=10, color=[1, 0.64, 0, 1])
                 self.simulator.import_interactive_object(obstacle, class_id=4)
                 p.changeDynamics(obstacle.body_id, -1, lateralFriction=0.5)
                 self.obstacles.append(obstacle)
@@ -393,7 +393,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.obstacles = []
             for pose, mass, color in \
                     zip(self.semantic_obstacle_poses, self.semantic_obstacle_masses, self.semantic_obstacle_colors):
-                obstacle = BoxShape(pos=pose, dim=[0.35, 0.35, 0.5], mass=mass, color=color)
+                obstacle = BoxShape(pos=pose, dim=[0.35, 0.35, 0.6], mass=mass, color=color)
                 self.simulator.import_interactive_object(obstacle, class_id=4)
                 p.changeDynamics(obstacle.body_id, -1, lateralFriction=0.5)
                 self.obstacles.append(obstacle)
@@ -403,7 +403,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.mesh_id = self.scene.mesh_body_id
         self.map_size = self.scene.trav_map_original_size * self.scene.trav_map_default_resolution
 
-        self.grid_resolution = 800
+        self.grid_resolution = 500
         self.occupancy_range = 5.0  # m
         robot_footprint_radius = 0.279
         self.robot_footprint_radius_in_map = int(robot_footprint_radius / self.occupancy_range * self.grid_resolution)
@@ -465,7 +465,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         scan = state['scan']
 
         scan_laser = unit_vector_laser * (scan * (laser_linear_range - min_laser_dist) + min_laser_dist)
-        scan_laser = np.concatenate([np.array([[0, 0 ,0]]), scan_laser, np.array([[0, 0, 0]])], axis=0)
+        # scan_laser = np.concatenate([np.array([[0, 0 ,0]]), scan_laser, np.array([[0, 0, 0]])], axis=0)
 
         laser_translation = laser_pose[:3]
         laser_rotation = quat2mat([laser_pose[6], laser_pose[3], laser_pose[4], laser_pose[5]])
@@ -475,6 +475,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         base_rotation = quat2mat([base_pose[6], base_pose[3], base_pose[4], base_pose[5]])
         scan_local = base_rotation.T.dot((scan_world - base_translation).T).T
         scan_local = scan_local[:, :2]
+        scan_local = np.concatenate([np.array([[0, 0]]), scan_local, np.array([[0, 0]])], axis=0)
 
         # flip y axis
         scan_local[:, 1] *= -1
@@ -489,9 +490,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         cv2.circle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2),
                    int(self.robot_footprint_radius_in_map), 1, -1)
 
-        cv2.rectangle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2 - int(self.robot_footprint_radius_in_map + 2)),
-                      (self.grid_resolution // 2 + int(self.robot_footprint_radius_in_map), self.grid_resolution // 2 + int(self.robot_footprint_radius_in_map + 2)), \
-                      1, -1)
+        # cv2.rectangle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2 - int(self.robot_footprint_radius_in_map + 2)),
+        #               (self.grid_resolution // 2 + int(self.robot_footprint_radius_in_map), self.grid_resolution // 2 + int(self.robot_footprint_radius_in_map + 2)), \
+        #               1, -1)
 
         # self.n_occ_img += 1
         # cv2.imwrite('occupancy_grid{:04d}.png'.format(self.n_occ_img), (occupancy_grid * 200).astype(np.uint8))
@@ -572,14 +573,14 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
     def get_state(self, collision_links=[]):
         state = super(MotionPlanningBaseArmEnv, self).get_state(collision_links)
-        # for modality in ['depth', 'pc']:
-        #     if modality in state:
-        #         img = state[modality]
-        #         width = img.shape[0]
-        #         height = int(width * (480.0 / 640.0))
-        #         half_diff = int((width - height) / 2)
-        #         img = img[half_diff:half_diff+height, :]
-        #         state[modality] = img
+        for modality in ['depth', 'pc']:
+            if modality in state:
+                img = state[modality]
+                width = img.shape[0]
+                height = int(width * (480.0 / 640.0))
+                half_diff = int((width - height) / 2)
+                img = img[half_diff:half_diff+height, :]
+                state[modality] = img
 
         # cv2.imshow('depth', state['depth'])
         # cv2.imshow('scan', state['scan'])
@@ -687,7 +688,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         arm_subgoal = transform_mat.dot(np.array([-point[2], -point[0], point[1], 1]))[:3]
         self.arm_marker.set_position(arm_subgoal)
 
-        push_vector_local = np.array([action[6], action[7]])  # [-1.0, 1.0]
+        push_vector_local = np.array([action[6], action[7]]) * 0.5  # [-0.5, 0.5]
         push_vector = rotate_vector_2d(push_vector_local, -self.robots[0].get_rpy()[2])
         push_vector = np.append(push_vector, 0.0)
         self.arm_interact_marker.set_position(arm_subgoal + push_vector)
@@ -782,9 +783,16 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         """
         Make all obstacles perpendicular to the ground
         """
-        for obstacle in self.obstacles:
+        if self.arena == 'semantic_obstacles':
+            obstacle_poses = self.semantic_obstacle_poses
+        elif self.arena == 'obstacles':
+            obstacle_poses = self.obstacle_poses
+        else:
+            assert False
+
+        for obstacle, obstacle_original_pose in zip(self.obstacles, obstacle_poses):
             obstacle_pose = get_base_values(obstacle.body_id)
-            set_base_values_with_z(obstacle.body_id, obstacle_pose, 0.6)
+            set_base_values_with_z(obstacle.body_id, obstacle_pose, obstacle_original_pose[2])
 
     def reach_arm_subgoal(self, arm_joint_positions):
         """
@@ -860,7 +868,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         :param arm_subgoal: starting location of the interaction
         :return: None
         """
-        push_vector_local = np.array([action[6], action[7]])  # [-1.0, 1.0]
+        push_vector_local = np.array([action[6], action[7]]) * 0.5  # [-0.5, 0.5]
         push_vector = rotate_vector_2d(push_vector_local, -self.robots[0].get_rpy()[2])
         push_vector = np.append(push_vector, 0.0)
 
@@ -963,7 +971,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         use_base = action[0] > 0.0
 
         # add action noise before execution
-        # action[1:] = np.clip(action[1:] + np.random.normal(0.0, 0.05, action.shape[0] - 1), -1.0, 1.0)
+        action[1:] = np.clip(action[1:] + np.random.normal(0.0, 0.05, action.shape[0] - 1), -1.0, 1.0)
 
         if use_base:
             subgoal_success = self.move_base(action)
@@ -1089,7 +1097,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 obstacle_poses = self.obstacle_poses
 
             for obstacle, obstacle_pose in zip(self.obstacles, obstacle_poses):
-                set_base_values_with_z(obstacle.body_id, [obstacle_pose[0], obstacle_pose[1], 0], 0.6)
+                set_base_values_with_z(obstacle.body_id, [obstacle_pose[0], obstacle_pose[1], 0], obstacle_pose[2])
 
     def reset(self):
         state = super(MotionPlanningBaseArmEnv, self).reset()

From 85a4b339fcb70f8017fea576d9f773ea206f57b9 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Tue, 28 Jan 2020 01:42:58 -0800
Subject: [PATCH 44/58] allow obs occupancy grid (replace scan) -> base subgoal
 and waypoints / goal in the occupancy grid, improve training efficiency, fix
 shortest path entire path bug, TODO: reset agent crash bug, saveWorld

---
 .../configs/fetch_interactive_nav_s2r_mp.yaml |   2 +-
 gibson2/core/physics/scene.py                 |   4 +-
 gibson2/envs/base_env.py                      |   2 +
 gibson2/envs/locomotor_env.py                 |  21 +-
 gibson2/envs/motion_planner_env.py            | 201 +++++++++++-------
 5 files changed, 145 insertions(+), 85 deletions(-)

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp.yaml b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
index 5bb9b5eb8..14a0f8822 100644
--- a/examples/configs/fetch_interactive_nav_s2r_mp.yaml
+++ b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
@@ -43,7 +43,7 @@ dist_tol: 0.5  # body width
 max_step: 20
 
 # sensor
-output: [sensor, rgb, depth, scan, pc]
+output: [sensor, rgb, depth, scan]
 resolution: 128
 fov: 57.5
 n_horizontal_rays: 220
diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py
index 873ea21c4..dff7cbf31 100644
--- a/gibson2/core/physics/scene.py
+++ b/gibson2/core/physics/scene.py
@@ -282,13 +282,15 @@ class BuildingScene(Scene):
 
         path_world = self.map_to_world(path_map)
         geodesic_distance = np.sum(np.linalg.norm(path_world[1:] - path_world[:-1], axis=1))
+        path_world = path_world[::self.waypoint_interval]
 
         if not entire_path:
-            path_world = path_world[::self.waypoint_interval][:self.num_waypoints]
+            path_world = path_world[:self.num_waypoints]
             num_remaining_waypoints = self.num_waypoints - path_world.shape[0]
             if num_remaining_waypoints > 0:
                 remaining_waypoints = np.tile(target_world, (num_remaining_waypoints, 1))
                 path_world = np.concatenate((path_world, remaining_waypoints), axis=0)
+
         return path_world, geodesic_distance
 
     def reset_floor(self, floor=0, additional_elevation=0.05, height=None):
diff --git a/gibson2/envs/base_env.py b/gibson2/envs/base_env.py
index 3c33e7a2b..d9d10aa00 100644
--- a/gibson2/envs/base_env.py
+++ b/gibson2/envs/base_env.py
@@ -52,6 +52,8 @@ class BaseEnv(gym.Env):
         elif self.config['scene'] == 'building':
             scene = BuildingScene(
                 self.config['model_id'],
+                waypoint_resolution=self.config.get('waypoint_resolution', 0.2),
+                num_waypoints=self.config.get('num_waypoints', 10),
                 build_graph=self.config.get('build_graph', False),
                 trav_map_erosion=self.config.get('trav_map_erosion', 2),
                 should_load_replaced_objects=self.config.get('should_load_replaced_objects', False)
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 161561469..587c70b86 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -24,8 +24,8 @@ import collections
 import matplotlib.pyplot as plt
 from scipy.io import loadmat
 from PIL import Image
-
-
+import string
+import random
 
 
 Episode = collections.namedtuple('Episode',
@@ -322,7 +322,7 @@ class NavigateEnv(BaseEnv):
             high = 3.5
         elif self.config['robot'] == 'Fetch':
             # Primesense Carmine 1.09 short-range RGBD sensor
-            low = 0.35
+            low = 0.1
             high = 3.0  # http://xtionprolive.com/primesense-carmine-1.09
             # high = 1.4  # https://www.i3du.gr/pdf/primesense.pdf
         else:
@@ -693,9 +693,17 @@ class NavigateEnv(BaseEnv):
         for _ in range(max_trials):
             self.robots[0].robot_specific_reset()
             self.reset_initial_and_target_pos()
-            if self.test_valid_position():
+            valid, collision_links_flatten = self.test_valid_position()
+            if valid:
                 return
-        raise Exception("Failed to reset robot without collision")
+
+        print('Failed to reset robot without collision' + '-' * 30)
+        for collision_link in collision_links_flatten:
+            print(collision_link)
+        random_string = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
+        p.saveBullet(random_string + '.bullet')
+        p.saveWorld(random_string + '.py')
+        # raise Exception("Failed to reset robot without collision")
 
     def reset_initial_and_target_pos(self):
         self.robots[0].set_position(pos=self.initial_pos)
@@ -704,7 +712,7 @@ class NavigateEnv(BaseEnv):
     def test_valid_position(self):
         collision_links = self.run_simulation()
         collision_links_flatten = [item for sublist in collision_links for item in sublist]
-        return len(collision_links_flatten) == 0
+        return len(collision_links_flatten) == 0, collision_links_flatten
 
     def before_reset_agent(self):
         return
@@ -1869,7 +1877,6 @@ if __name__ == '__main__':
     for episode in range(100):
         print('Episode: {}'.format(episode))
         start = time.time()
-        nav_env.reset()
         for step in range(1000):  # 500 steps, 50s world time
             action = nav_env.action_space.sample()
             # action[:] = 1.0
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index c3dfacfe3..1c5f24a46 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -187,22 +187,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                        automatic_reset=automatic_reset,
                                                        random_height=False,
                                                        device_idx=device_idx)
-
-        # real sensor spec for Fetch
-        resolution = self.config.get('resolution', 64)
-        width = resolution
-        height = int(width * (480.0 / 640.0))
-        if 'rgb' in self.output:
-            self.observation_space.spaces['rgb'] = gym.spaces.Box(low=0.0,
-                                                                  high=1.0,
-                                                                  shape=(height, width, 3),
-                                                                  dtype=np.float32)
-        if 'depth' in self.output:
-            self.observation_space.spaces['depth'] = gym.spaces.Box(low=0.0,
-                                                                    high=1.0,
-                                                                    shape=(height, width, 1),
-                                                                    dtype=np.float32)
-
         self.arena = arena
         self.eval = eval
         self.visualize_waypoints = True
@@ -220,11 +204,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         self.collision_reward_weight = collision_reward_weight
 
         # action[0] = base_or_arm
-        # action[1] = base_subgoal_theta
-        # action[2] = base_subgoal_dist
+        # action[1] = base_subgoal_theta / base_img_v
+        # action[2] = base_subgoal_dist / base_img_u
         # action[3] = base_orn
-        # action[4] = arm_img_u
-        # action[5] = arm_img_v
+        # action[4] = arm_img_v
+        # action[5] = arm_img_u
         # action[6] = arm_push_vector_x
         # action[7] = arm_push_vector_y
         self.action_space = gym.spaces.Box(shape=(8,),
@@ -233,6 +217,32 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                            dtype=np.float32)
         self.prepare_motion_planner()
 
+        # real sensor spec for Fetch
+        resolution = self.config.get('resolution', 64)
+        width = resolution
+        height = int(width * (480.0 / 640.0))
+        if 'rgb' in self.output:
+            self.observation_space.spaces['rgb'] = gym.spaces.Box(low=0.0,
+                                                                  high=1.0,
+                                                                  shape=(height, width, 3),
+                                                                  dtype=np.float32)
+        if 'depth' in self.output:
+            self.observation_space.spaces['depth'] = gym.spaces.Box(low=0.0,
+                                                                    high=1.0,
+                                                                    shape=(height, width, 1),
+                                                                    dtype=np.float32)
+        if 'occupancy_grid' in self.output:
+            self.observation_space.spaces['occupancy_grid'] = gym.spaces.Box(low=0.0,
+                                                                             high=1.0,
+                                                                             shape=(self.grid_resolution,
+                                                                                    self.grid_resolution,
+                                                                                    1),
+                                                                             dtype=np.float32)
+            self.use_occupancy_grid = True
+        else:
+            self.use_occupancy_grid = False
+
+
         self.base_marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
                                         rgba_color=[1, 0, 0, 1],
                                         radius=0.1,
@@ -283,7 +293,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         self.times = {}
         for key in ['get_base_subgoal', 'reach_base_subgoal', 'get_arm_subgoal', 'stash_object_states',
-                    'get_arm_joint_positions', 'reach_arm_subgoal', 'reset_object_states', 'interact']:
+                    'get_arm_joint_positions', 'reach_arm_subgoal', 'reset_object_states', 'interact',
+                    'compute_next_step']:
             self.times[key] = []
 
     def prepare_scene(self):
@@ -422,8 +433,10 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         half_size = self.map_size / 2.0
         # if self.mode == 'gui':
         #     p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False)
-
-        grid = self.get_local_occupancy_grid()
+        if 'occupancy_grid' in self.output:
+            grid = self.state['occupancy_grid'].astype(np.uint8)
+        else:
+            grid = self.get_local_occupancy_grid()
         path = plan_base_motion_2d(self.robot_id, [x, y, theta], ((-half_size, -half_size), (half_size, half_size)),
                                    map_2d=grid, occupancy_range=self.occupancy_range,
                                    grid_resolution=self.grid_resolution,
@@ -436,9 +449,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return rotate_vector_3d(pos - cur_pos, *cur_rot)
 
     def get_local_occupancy_grid(self):
-        assert 'scan' in self.output
         assert self.config['robot'] in ['Turtlebot', 'Fetch']
-
         if self.config['robot'] == 'Turtlebot':
             # Hokuyo URG-04LX-UG01
             laser_linear_range = 5.6
@@ -461,8 +472,13 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                           laser_angular_range / 180.0 * np.pi / self.n_horizontal_rays)
         unit_vector_laser = np.array([[np.cos(ang), np.sin(ang), 0.0] for ang in angle])
 
-        state = self.get_state()
-        scan = state['scan']
+        if 'scan' in self.output:
+            # state = self.get_state()
+            state = self.state
+            # print('get_occu_grid', state['current_step'])
+            scan = state['scan']
+        else:
+            scan = self.get_scan()
 
         scan_laser = unit_vector_laser * (scan * (laser_linear_range - min_laser_dist) + min_laser_dist)
         # scan_laser = np.concatenate([np.array([[0, 0 ,0]]), scan_laser, np.array([[0, 0, 0]])], axis=0)
@@ -541,6 +557,21 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         waypoints_local_xy = np.array([self.global_to_local(waypoint, cur_pos, cur_rot)[:2]
                                        for waypoint in shortest_path]).flatten()
+        target_pos_local_xy = target_pos_local[:2]
+
+        if self.use_occupancy_grid:
+            waypoints_img_vu = np.zeros_like(waypoints_local_xy)
+            target_pos_img_vu = np.zeros_like(target_pos_local_xy)
+
+            for i in range(self.scene.num_waypoints):
+                waypoints_img_vu[2 * i] = -waypoints_local_xy[2 * i + 1] / (self.occupancy_range / 2.0)
+                waypoints_img_vu[2 * i + 1] = waypoints_local_xy[2 * i] / (self.occupancy_range / 2.0)
+
+            target_pos_img_vu[0] = -target_pos_local_xy[1] / (self.occupancy_range / 2.0)
+            target_pos_img_vu[1] = target_pos_local_xy[0] / (self.occupancy_range / 2.0)
+
+            waypoints_local_xy = waypoints_img_vu
+            target_pos_local_xy = target_pos_img_vu
 
         # # convert Cartesian space to radian space
         # for i in range(waypoints_local_xy.shape[0] // 2):
@@ -559,7 +590,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         #     target_pos_local[1] = norm
 
         additional_states = np.concatenate((waypoints_local_xy,
-                                            target_pos_local[:2]))
+                                            target_pos_local_xy))
         # linear_velocity_local,
         # angular_velocity_local))
 
@@ -571,16 +602,21 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         return additional_states
 
+    def crop_rect_image(self, img):
+        width = img.shape[0]
+        height = int(width * (480.0 / 640.0))
+        half_diff = int((width - height) / 2)
+        img = img[half_diff:half_diff + height, :]
+        return img
+
     def get_state(self, collision_links=[]):
         state = super(MotionPlanningBaseArmEnv, self).get_state(collision_links)
-        for modality in ['depth', 'pc']:
+        for modality in ['rgb', 'depth']:
             if modality in state:
-                img = state[modality]
-                width = img.shape[0]
-                height = int(width * (480.0 / 640.0))
-                half_diff = int((width - height) / 2)
-                img = img[half_diff:half_diff+height, :]
-                state[modality] = img
+                state[modality] = self.crop_rect_image(state[modality])
+
+        if 'occupancy_grid' in self.output:
+            state['occupancy_grid'] = self.get_local_occupancy_grid().astype(np.float32)
 
         # cv2.imshow('depth', state['depth'])
         # cv2.imshow('scan', state['scan'])
@@ -604,12 +640,19 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         :return: base_subgoal_pos [x, y] in the world frame
         :return: base_subgoal_orn yaw in the world frame
         """
-        # print('base')
+        if self.use_occupancy_grid:
+            base_img_v, base_img_u = action[1], action[2]
+            base_local_y = (-base_img_v) * self.occupancy_range / 2.0
+            base_local_x = base_img_u * self.occupancy_range / 2.0
+            base_subgoal_theta = np.arctan2(base_local_y, base_local_x)
+            base_subgoal_dist = np.linalg.norm([base_local_x, base_local_y])
+        else:
+            base_subgoal_theta = (action[1] * 110.0) / 180.0 * np.pi  # [-110.0, 110.0]
+            base_subgoal_dist = (action[2] + 1)  # [0.0, 2.0]
+
         yaw = self.robots[0].get_rpy()[2]
         robot_pos = self.robots[0].get_position()
-        base_subgoal_theta = (action[1] * 110.0) / 180.0 * np.pi  # [-110.0, 110.0]
         base_subgoal_theta += yaw
-        base_subgoal_dist = (action[2] + 1)  # [0.0, 2.0]
         base_subgoal_pos = np.array([np.cos(base_subgoal_theta), np.sin(base_subgoal_theta)])
         base_subgoal_pos *= base_subgoal_dist
         base_subgoal_pos = np.append(base_subgoal_pos, 0.0)
@@ -656,14 +699,14 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         :return: whether base_subgoal is achieved
         """
         # print('base')
-        start = time.time()
+        # start = time.time()
         base_subgoal_pos, base_subgoal_orn = self.get_base_subgoal(action)
-        self.times['get_base_subgoal'].append(time.time() - start)
+        # self.times['get_base_subgoal'].append(time.time() - start)
         # print('get_base_subgoal', time.time() - start)
 
-        start = time.time()
+        # start = time.time()
         subgoal_success = self.reach_base_subgoal(base_subgoal_pos, base_subgoal_orn)
-        self.times['reach_base_subgoal'].append(time.time() - start)
+        # self.times['reach_base_subgoal'].append(time.time() - start)
         # print('reach_base_subgoal', time.time() - start)
 
         return subgoal_success
@@ -674,14 +717,14 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         :param action: policy output
         :return: arm_subgoal [x, y, z] in the world frame
         """
-        state = self.get_state()
-        points = state['pc']
+        # print('get_arm_subgoal', state['current_step'])
+        points = self.crop_rect_image(self.get_pc())
         height, width = points.shape[0:2]
 
-        arm_img_u = np.clip(int((action[4] + 1) / 2.0 * height), 0, height - 1)
-        arm_img_v = np.clip(int((action[5] + 1) / 2.0 * width), 0, width - 1)
+        arm_img_v = np.clip(int((action[4] + 1) / 2.0 * height), 0, height - 1)
+        arm_img_u = np.clip(int((action[5] + 1) / 2.0 * width), 0, width - 1)
 
-        point = points[arm_img_u, arm_img_v]
+        point = points[arm_img_v, arm_img_u]
         camera_pose = (self.robots[0].parts['eyes'].get_pose())
         transform_mat = quat_pos_to_mat(pos=camera_pose[:3],
                                         quat=[camera_pose[6], camera_pose[3], camera_pose[4], camera_pose[5]])
@@ -911,9 +954,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         :return: whether arm_subgoal is achieved
         """
         # print('arm')
-        start = time.time()
+        # start = time.time()
         arm_subgoal = self.get_arm_subgoal(action)
-        self.times['get_arm_subgoal'].append(time.time() - start)
+        # self.times['get_arm_subgoal'].append(time.time() - start)
         # print('get_arm_subgoal', time.time() - start)
 
         # start = time.time()
@@ -921,36 +964,36 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # state_id = p.saveState()
         # print('saveState', time.time() - start)
 
-        start = time.time()
+        # start = time.time()
         self.stash_object_states()
-        self.times['stash_object_states'].append(time.time() - start)
+        # self.times['stash_object_states'].append(time.time() - start)
 
-        start = time.time()
+        # start = time.time()
         arm_joint_positions = self.get_arm_joint_positions(arm_subgoal)
-        self.times['get_arm_joint_positions'].append(time.time() - start)
+        # self.times['get_arm_joint_positions'].append(time.time() - start)
         # print('get_arm_joint_positions', time.time() - start)
 
-        start = time.time()
+        # start = time.time()
         subgoal_success = self.reach_arm_subgoal(arm_joint_positions)
-        self.times['reach_arm_subgoal'].append(time.time() - start)
+        # self.times['reach_arm_subgoal'].append(time.time() - start)
         # print('reach_arm_subgoal', time.time() - start)
 
         # start = time.time()
         # p.restoreState(stateId=state_id)
         # print('restoreState', time.time() - start)
 
-        start = time.time()
+        # start = time.time()
         self.reset_object_states()
-        self.times['reset_object_states'].append(time.time() - start)
+        # self.times['reset_object_states'].append(time.time() - start)
 
         # print('reset_object_velocities', time.time() - start)
 
         if subgoal_success:
             # set_joint_positions(self.robot_id, self.arm_joint_ids, arm_joint_positions)
 
-            start = time.time()
+            # start = time.time()
             self.interact(action, arm_subgoal)
-            self.times['interact'].append(time.time() - start)
+            # self.times['interact'].append(time.time() - start)
             # print('interact', time.time() - start)
 
         return subgoal_success
@@ -959,11 +1002,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # print('-' * 30)
         # embed()
         # action[0] = base_or_arm
-        # action[1] = base_subgoal_theta
-        # action[2] = base_subgoal_dist
+        # action[1] = base_subgoal_theta / base_img_v
+        # action[2] = base_subgoal_dist / base_img_u
         # action[3] = base_orn
-        # action[4] = arm_img_u
-        # action[5] = arm_img_v
+        # action[4] = arm_img_v
+        # action[5] = arm_img_u
         # action[6] = arm_push_vector_x
         # action[7] = arm_push_vector_y
         # print('-' * 20)
@@ -975,19 +1018,23 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         if use_base:
             subgoal_success = self.move_base(action)
-            print('move_base:', subgoal_success)
+            # print('move_base:', subgoal_success)
         else:
             subgoal_success = self.move_arm(action)
-            print('move_arm:', subgoal_success)
+            # print('move_arm:', subgoal_success)
 
-        return self.compute_next_step(action, use_base, subgoal_success)
+        # start = time.time()
+        state, reward, done, info = self.compute_next_step(action, use_base, subgoal_success)
+        # self.times['compute_next_step'].append(time.time() - start)
+
+        return state, reward, done, info
 
     def compute_next_step(self, action, use_base, subgoal_success):
         self.simulator.sync()
 
         if use_base:
             # trigger re-computation of geodesic distance for get_reward
-            state = self.get_state()
+            self.state = self.get_state()
 
         info = {}
         if subgoal_success:
@@ -1032,17 +1079,19 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         if not use_base:
             set_joint_positions(self.robot_id, self.arm_joint_ids, self.arm_default_joint_positions)
-            state = self.get_state()
+            self.state = self.get_state()
 
         if done and self.automatic_reset:
-            state = self.reset()
+            self.state = self.reset()
 
-        del state['pc']
+        # self.state['current_step'] = self.current_step
+
+        # print('compute_next_step', self.state['current_step'])
 
         # print('reward', reward)
         # time.sleep(3)
 
-        return state, reward, done, info
+        return self.state, reward, done, info
 
     def reset_initial_and_target_pos(self):
         if self.arena in ['button_door', 'push_door', 'obstacles', 'semantic_obstacles']:
@@ -1100,9 +1149,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 set_base_values_with_z(obstacle.body_id, [obstacle_pose[0], obstacle_pose[1], 0], obstacle_pose[2])
 
     def reset(self):
-        state = super(MotionPlanningBaseArmEnv, self).reset()
-        del state['pc']
-        return state
+        self.state = super(MotionPlanningBaseArmEnv, self).reset()
+        # self.state['current_step'] = self.current_step
+        return self.state
 
 
 if __name__ == '__main__':
@@ -1133,7 +1182,7 @@ if __name__ == '__main__':
                                        arena=args.arena,
                                        )
 
-    for episode in range(20):
+    for episode in range(100):
         print('Episode: {}'.format(episode))
         start = time.time()
         state = nav_env.reset()
@@ -1155,6 +1204,6 @@ if __name__ == '__main__':
         print(time.time() - start)
 
     for key in nav_env.times:
-        print(key, len(nav_env.times[key]), np.mean(nav_env.times[key]))
+        print(key, len(nav_env.times[key]), np.sum(nav_env.times[key]), np.mean(nav_env.times[key]))
 
     nav_env.clean()

From 443e502f2252e944cd1068b25b668ba48d5cf749 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Tue, 28 Jan 2020 02:11:18 -0800
Subject: [PATCH 45/58] cand center integration

---
 .../configs/fetch_interactive_nav_s2r_mp.yaml |  4 +--
 gibson2/core/physics/scene.py                 | 23 +++++++++---
 gibson2/envs/motion_planner_env.py            | 36 +++++++++++++++++--
 3 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp.yaml b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
index 14a0f8822..877fae870 100644
--- a/examples/configs/fetch_interactive_nav_s2r_mp.yaml
+++ b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
@@ -1,6 +1,6 @@
 # scene
 scene: building
-model_id: Avonia
+model_id: candcenter
 build_graph: true
 load_texture: true
 should_load_replaced_objects: false
@@ -75,7 +75,7 @@ fast_lq_render: true
 
 # visual objects
 visual_object_at_initial_target_pos: true
-target_visual_object_visible_to_agent: true
+target_visual_object_visible_to_agent: false
 
 # debug
 debug: false
diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py
index dff7cbf31..1fe6a4089 100644
--- a/gibson2/core/physics/scene.py
+++ b/gibson2/core/physics/scene.py
@@ -139,11 +139,20 @@ class BuildingScene(Scene):
             else:
                 filename = os.path.join(get_model_path(self.model_id), "mesh_z_up.obj")
         scaling = [1, 1, 1]
+
         collisionId = p.createCollisionShape(p.GEOM_MESH,
                                              fileName=filename,
                                              meshScale=scaling,
                                              flags=p.GEOM_FORCE_CONCAVE_TRIMESH)
         visualId = -1
+        #p.createVisualShape(p.GEOM_MESH,
+        #                        fileName=filename,
+        #                        meshScale=scaling)
+
+        # texture_filename = os.path.join(get_model_path(self.model_id), "{}_mesh_texture.small.jpg".format(self.model_id))
+        # texture_id = p.loadTexture(texture_filename)
+        # print('pybullet texture id:', texture_id, texture_filename)
+
         boundaryUid = p.createMultiBody(baseCollisionShapeIndex=collisionId,
                                         baseVisualShapeIndex=visualId)
 
@@ -157,14 +166,18 @@ class BuildingScene(Scene):
         p.resetBasePositionAndOrientation(self.ground_plane_mjcf[0],
                                           posObj=[0, 0, 0],
                                           ornObj=[0, 0, 0, 1])
-        p.changeVisualShape(boundaryUid,
-                            -1,
-                            rgbaColor=[168 / 255.0, 164 / 255.0, 92 / 255.0, 1.0],
-                            specularColor=[0.5, 0.5, 0.5])
+        # p.changeVisualShape(boundaryUid,
+        #                     -1,
+        #                     rgbaColor=[168 / 255.0, 164 / 255.0, 92 / 255.0, 1.0],
+        #                     specularColor=[0.5, 0.5, 0.5])
+        # if texture_id >= 0:
+        #     p.changeVisualShape(boundaryUid,
+        #                     -1,
+        #                     textureUniqueId=texture_id)
 
         p.changeVisualShape(self.ground_plane_mjcf[0],
                             -1,
-                            rgbaColor=[168 / 255.0, 164 / 255.0, 92 / 255.0, 1.0],
+                            rgbaColor=[168 / 255.0, 164 / 255.0, 92 / 255.0, 0.35],
                             specularColor=[0.5, 0.5, 0.5])
 
         floor_height_path = os.path.join(get_model_path(self.model_id), 'floors.txt')
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 1c5f24a46..ff3956752 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -353,8 +353,28 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 [0.0, 1.0, 0.0, 1],
             ]
 
+            self.initial_pos_range = np.array([[1.2,1.2], [0.0,0.0]])
+
             # TODO: initial_pos and target_pos sampling should also be put here (scene-specific)
 
+        elif self.scene.model_id == 'gates_jan20':
+            pass
+        elif self.scene.model_id == 'candcenter':
+            door_scales = [
+                1.03
+            ]
+            self.door_positions = [
+                [1.2,-2.15,0],
+            ]
+            self.door_rotations = [np.pi]
+
+            wall_poses = []
+
+            self.door_target_pos = np.array([[[-1.0, 1.0], [-3, -5]]])
+
+            self.initial_pos_range = np.array([[-1, 8], [-2, 4]])
+
+
         else:
             # TODO: handcraft environments for more scenes
             assert False, 'model_id unknown'
@@ -369,6 +389,9 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 self.simulator.import_interactive_object(door, class_id=2)
                 door.set_position_rotation(position, quatToXYZW(euler2quat(0, 0, rotation), 'wxyz'))
                 self.doors.append(door)
+                # remove door collision with mesh
+                for i in range(p.getNumJoints(door.body_id)):
+                    p.setCollisionFilterPair(self.mesh_id, door.body_id, -1, i, 0)
 
             self.walls = []
             for wall_pose in wall_poses:
@@ -1096,8 +1119,15 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
     def reset_initial_and_target_pos(self):
         if self.arena in ['button_door', 'push_door', 'obstacles', 'semantic_obstacles']:
             floor_height = self.scene.get_floor_height(self.floor_num)
+
+            self.initial_pos = np.array([
+                np.random.uniform(self.initial_pos_range[0][0], self.initial_pos_range[0][1]),
+                np.random.uniform(self.initial_pos_range[1][0], self.initial_pos_range[1][1]),
+                floor_height
+            ])
+
             self.initial_height = floor_height + self.random_init_z_offset
-            self.initial_pos = np.array([1.2, 0.0, floor_height])
+
             self.robots[0].set_position(pos=[self.initial_pos[0],
                                              self.initial_pos[1],
                                              self.initial_height])
@@ -1114,6 +1144,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             else:
                 self.target_pos = np.array([-5.0, 0.0, floor_height])
         else:
+            floor_height = self.scene.get_floor_height(self.floor_num)
+            self.initial_height = floor_height + self.random_init_z_offset
             super(MotionPlanningBaseArmEnv, self).reset_initial_and_target_pos()
 
     def before_reset_agent(self):
@@ -1168,7 +1200,7 @@ if __name__ == '__main__':
 
     parser.add_argument('--arena',
                         '-a',
-                        choices=['button_door', 'push_door', 'obstacles', 'semantic_obstacles'],
+                        choices=['button_door', 'push_door', 'obstacles', 'semantic_obstacles', 'empty'],
                         default='push_door',
                         help='which arena to train or test (default: push_door)')
 

From dd458c68db3c8dffb71c7fb75ef5542f51797645 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Tue, 28 Jan 2020 14:38:24 -0800
Subject: [PATCH 46/58] fix wall issues

---
 gibson2/envs/motion_planner_env.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index ff3956752..9df8f6fff 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -310,8 +310,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             ]
             self.door_rotations = [np.pi / 2.0, -np.pi / 2.0]
             wall_poses = [
-                [[-3.5, 0.45, 0.45], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')],
-                [[-3.5, -0.4, 0.45], quatToXYZW(euler2quat(0, 0, -np.pi / 2.0), 'wxyz')],
+                [[-3.5, 0.47, 0.45], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')],
+                [[-3.5, -0.45, 0.45], quatToXYZW(euler2quat(0, 0, -np.pi / 2.0), 'wxyz')],
             ]
             self.door_target_pos = [
                 [[-5.5, -4.5], [-1.0, 1.0]],
@@ -368,7 +368,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             ]
             self.door_rotations = [np.pi]
 
-            wall_poses = []
+            wall_poses = [[[1.5, -2.2, 0.25], quatToXYZW(euler2quat(0, 0, 0), 'wxyz')]]
 
             self.door_target_pos = np.array([[[-1.0, 1.0], [-3, -5]]])
 
@@ -396,7 +396,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.walls = []
             for wall_pose in wall_poses:
                 wall = InteractiveObj(
-                    os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls_quarter.urdf'),
+                    os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls_quarter_white.urdf'),
                     scale=0.3)
                 self.simulator.import_interactive_object(wall, class_id=3)
                 wall.set_position_rotation(wall_pose[0], wall_pose[1])

From 7702bb774e87ce12cd2fb020ddc58240de9db446 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Tue, 28 Jan 2020 20:22:18 -0800
Subject: [PATCH 47/58] finialize environment

---
 examples/demo/inspect_env.py       |  78 ++++++++++++++++++++++
 gibson2/envs/motion_planner_env.py | 104 +++++++++++++++++++++++++++--
 2 files changed, 175 insertions(+), 7 deletions(-)
 create mode 100644 examples/demo/inspect_env.py

diff --git a/examples/demo/inspect_env.py b/examples/demo/inspect_env.py
new file mode 100644
index 000000000..3e31fdbaf
--- /dev/null
+++ b/examples/demo/inspect_env.py
@@ -0,0 +1,78 @@
+from gibson2.envs.motion_planner_env import MotionPlanningBaseArmEnv
+from gibson2.core.physics.interactive_objects import VisualMarker, InteractiveObj, BoxShape
+import gibson2
+import argparse, time
+from transforms3d.quaternions import quat2mat, qmult
+from transforms3d.euler import euler2quat
+from gibson2.utils.utils import parse_config, rotate_vector_3d, rotate_vector_2d, l2_distance, quatToXYZW
+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+from gibson2.external.pybullet_tools.utils import *
+import sys
+
+config_file = '../../examples/configs/fetch_interactive_nav_s2r_mp.yaml'
+mode = 'gui'
+
+nav_env = MotionPlanningBaseArmEnv(config_file=config_file,
+                                       model_id=sys.argv[1],
+                                       mode=mode,
+                                       action_timestep=1.0 / 500.0,
+                                       physics_timestep=1.0 / 500.0,
+                                        eval=True, arena='button_door')
+
+nav_env.reset()
+#for i in range(10):
+#    nav_env.simulator_step()
+
+for pose, mass, color in \
+        zip(nav_env.semantic_obstacle_poses, nav_env.semantic_obstacle_masses, nav_env.semantic_obstacle_colors):
+    obstacle = BoxShape(pos=pose, dim=[nav_env.obstacle_dim, nav_env.obstacle_dim, 0.6], mass=mass, color=color)
+    nav_env.simulator.import_interactive_object(obstacle, class_id=4)
+    p.changeDynamics(obstacle.body_id, -1, lateralFriction=0.5)
+
+
+
+def draw_box(x1,x2,y1,y2, perm=False):
+    if perm:
+        t = 0
+    else:
+        t = 10
+        
+    p.addUserDebugLine([x1,y1,0.5], [x2,y2,0.5], lineWidth=3, lifeTime=t)
+    p.addUserDebugLine([x1,y2,0.5], [x2,y1,0.5], lineWidth=3, lifeTime=t)\
+    
+    p.addUserDebugLine([x1,y1,0.5], [x1,y2,0.5], lineWidth=3, lifeTime=t)
+    p.addUserDebugLine([x2,y1,0.5], [x2,y2,0.5], lineWidth=3, lifeTime=t)
+    
+    p.addUserDebugLine([x1,y1,0.5], [x2,y1,0.5], lineWidth=3, lifeTime=t)
+    p.addUserDebugLine([x1,y2,0.5], [x2,y2,0.5], lineWidth=3, lifeTime=t)
+    
+def draw_text(x, y, text):
+    p.addUserDebugText(text, [x,y,0.5])
+
+
+
+draw_box(nav_env.initial_pos_range[0][0], nav_env.initial_pos_range[0][1], 
+         nav_env.initial_pos_range[1][0], nav_env.initial_pos_range[1][1], perm=True)
+draw_text((nav_env.initial_pos_range[0][0] + nav_env.initial_pos_range[0][1])/2, 
+          (nav_env.initial_pos_range[1][0] + nav_env.initial_pos_range[1][1])/2, 'start range')
+
+
+draw_box(nav_env.target_pos_range[0][0], nav_env.target_pos_range[0][1], 
+         nav_env.target_pos_range[1][0], nav_env.target_pos_range[1][1], perm=True)
+draw_text((nav_env.target_pos_range[0][0] + nav_env.target_pos_range[0][1])/2, 
+          (nav_env.target_pos_range[1][0] + nav_env.target_pos_range[1][1])/2, 'target range')
+
+
+for door_pos_range in nav_env.door_target_pos:
+
+    draw_box(door_pos_range[0][0], door_pos_range[0][1], 
+         door_pos_range[1][0], door_pos_range[1][1], perm=True)
+    
+    draw_text((door_pos_range[0][0] + door_pos_range[0][1])/2, 
+          (door_pos_range[1][0] + door_pos_range[1][1])/2, 'door target range')
+    
+
+while True:
+	nav_env.simulator_step()
\ No newline at end of file
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 9df8f6fff..8caf23f34 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -353,12 +353,57 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 [0.0, 1.0, 0.0, 1],
             ]
 
-            self.initial_pos_range = np.array([[1.2,1.2], [0.0,0.0]])
+            self.initial_pos_range = np.array([[-1,1.5], [-1,1]])
+            self.target_pos_range = np.array([[-5.5, -4.5], [-1.0, 1.0]])
 
             # TODO: initial_pos and target_pos sampling should also be put here (scene-specific)
-
+            self.obstacle_dim = 0.35
         elif self.scene.model_id == 'gates_jan20':
-            pass
+            door_scales = [
+                0.95,
+                0.95,
+            ]
+            self.door_positions = [
+                [-29.95, 0.7, 0.05],
+                [-36, 0.7, 0.05],
+            ]
+            self.door_rotations = [np.pi, np.pi]
+            wall_poses = [
+            ]
+            self.door_target_pos = [
+                [[-30.5, -30], [-3, -1]],
+                [[-36.75, -36.25], [-3, -1]],
+            ]
+            self.initial_pos_range = np.array([[-25, -21], [4, 9]])
+            self.target_pos_range = np.array([[-40, -30], [1.25, 1.75]])
+
+            # button_door
+            button_scales = [
+                2.0,
+                2.0,
+            ]
+            self.button_positions = [
+                [[-29.2, -29.2], [0.86, 0.86]],
+                [[-35.2, -35.2], [0.86, 0.86]]
+            ]
+            self.button_rotations = [
+                0.0,
+                0.0,
+            ]
+
+            # semantic_obstacles
+            self.semantic_obstacle_poses = [
+                [-28.1, 1.45, 0.7],
+            ]
+            self.semantic_obstacle_masses = [
+                1.0,
+            ]
+            self.semantic_obstacle_colors = [
+                [1.0, 0.0, 0.0, 1],
+            ]
+
+            self.obstacle_dim = 0.25
+
         elif self.scene.model_id == 'candcenter':
             door_scales = [
                 1.03
@@ -370,10 +415,48 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
             wall_poses = [[[1.5, -2.2, 0.25], quatToXYZW(euler2quat(0, 0, 0), 'wxyz')]]
 
-            self.door_target_pos = np.array([[[-1.0, 1.0], [-3, -5]]])
+            self.door_target_pos = np.array([[[-1.0, 1.0], [-5, -3]]])
 
             self.initial_pos_range = np.array([[-1, 8], [-2, 4]])
+            self.target_pos_range = np.array([[-3.75,-3.25],[-1,5.5]])
 
+            button_scales = [
+                1.7,
+            ]
+            self.button_positions = [
+                [[0.6, 0.6], [-2.1, -2.1]],
+            ]
+            self.button_rotations = [
+                0.0,
+            ]
+
+            # semantic_obstacles
+            self.semantic_obstacle_poses = [
+                [-2, 0, 0.7],
+                [-2, -0.8, 0.7],
+                [-2, -1.6, 0.7],
+                [-2, 5, 0.7],
+                [-2, 6, 0.7],
+                [-2, 7, 0.7],
+            ]
+            self.semantic_obstacle_masses = [
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+                10000.0,
+                10000.0,
+            ]
+            self.semantic_obstacle_colors = [
+                [1.0, 0.0, 0.0, 1],
+                [1.0, 0.0, 0.0, 1],
+                [1.0, 0.0, 0.0, 1],
+                [1.0, 0.0, 0.0, 1],
+                [0.0, 1.0, 0.0, 1],
+                [0.0, 1.0, 0.0, 1],
+            ]
+            self.obstacle_dim = 0.35
 
         else:
             # TODO: handcraft environments for more scenes
@@ -427,7 +510,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.obstacles = []
             for pose, mass, color in \
                     zip(self.semantic_obstacle_poses, self.semantic_obstacle_masses, self.semantic_obstacle_colors):
-                obstacle = BoxShape(pos=pose, dim=[0.35, 0.35, 0.6], mass=mass, color=color)
+                obstacle = BoxShape(pos=pose, dim=[self.obstacle_dim, self.obstacle_dim, 0.6], mass=mass, color=color)
                 self.simulator.import_interactive_object(obstacle, class_id=4)
                 p.changeDynamics(obstacle.body_id, -1, lateralFriction=0.5)
                 self.obstacles.append(obstacle)
@@ -1117,6 +1200,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return self.state, reward, done, info
 
     def reset_initial_and_target_pos(self):
+        self.initial_orn_z = np.random.uniform(-np.pi, np.pi)
         if self.arena in ['button_door', 'push_door', 'obstacles', 'semantic_obstacles']:
             floor_height = self.scene.get_floor_height(self.floor_num)
 
@@ -1131,7 +1215,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.robots[0].set_position(pos=[self.initial_pos[0],
                                              self.initial_pos[1],
                                              self.initial_height])
-            self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.pi), 'wxyz'))
+            self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, self.initial_orn_z), 'wxyz'))
 
             if self.arena in ['button_door', 'push_door']:
                 self.door_idx = np.random.randint(0, len(self.doors))
@@ -1142,7 +1226,13 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                     floor_height
                 ])
             else:
-                self.target_pos = np.array([-5.0, 0.0, floor_height])
+                #self.target_pos = np.array([-5.0, 0.0, floor_height]) Avonia target
+                self.target_pos = np.array([
+                    np.random.uniform(self.target_pos_range[0][0], self.target_pos_range[0][1]),
+                    np.random.uniform(self.target_pos_range[1][0], self.target_pos_range[1][1]),
+                    floor_height
+                ])
+
         else:
             floor_height = self.scene.get_floor_height(self.floor_num)
             self.initial_height = floor_height + self.random_init_z_offset

From 5f8b2018765d1224f4a47c5757bf1e9f0129e96f Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Wed, 29 Jan 2020 00:55:12 -0800
Subject: [PATCH 48/58] fine tune with environments

---
 gibson2/envs/motion_planner_env.py | 42 ++++++++++++------------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 8caf23f34..49c798e2c 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -374,7 +374,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 [[-30.5, -30], [-3, -1]],
                 [[-36.75, -36.25], [-3, -1]],
             ]
-            self.initial_pos_range = np.array([[-25, -21], [4, 9]])
+            self.initial_pos_range = np.array([[-24, -22.5], [6, 9]])
             self.target_pos_range = np.array([[-40, -30], [1.25, 1.75]])
 
             # button_door
@@ -409,17 +409,21 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 1.03
             ]
             self.door_positions = [
-                [1.2,-2.15,0],
+                [1.2, -2.15, 0],
             ]
             self.door_rotations = [np.pi]
-
-            wall_poses = [[[1.5, -2.2, 0.25], quatToXYZW(euler2quat(0, 0, 0), 'wxyz')]]
-
-            self.door_target_pos = np.array([[[-1.0, 1.0], [-5, -3]]])
-
-            self.initial_pos_range = np.array([[-1, 8], [-2, 4]])
-            self.target_pos_range = np.array([[-3.75,-3.25],[-1,5.5]])
-
+            wall_poses = [
+                [[1.5, -2.2, 0.25], quatToXYZW(euler2quat(0, 0, 0), 'wxyz')]
+            ]
+            self.door_target_pos = np.array([
+                [[-1.0, 1.0], [-5.0, -3.0]]
+            ])
+            self.initial_pos_range = np.array([
+                [5.0, 7.0], [-1.7, 0.3]
+            ])
+            self.target_pos_range = np.array([
+                [-3.75, -3.25], [-1, 0.0]
+            ])
             button_scales = [
                 1.7,
             ]
@@ -429,31 +433,17 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.button_rotations = [
                 0.0,
             ]
-
             # semantic_obstacles
             self.semantic_obstacle_poses = [
-                [-2, 0, 0.7],
-                [-2, -0.8, 0.7],
-                [-2, -1.6, 0.7],
-                [-2, 5, 0.7],
-                [-2, 6, 0.7],
-                [-2, 7, 0.7],
+                [-2, -1.25, 0.7],
+                [-2, -0.1, 0.7],
             ]
             self.semantic_obstacle_masses = [
                 1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                10000.0,
                 10000.0,
             ]
             self.semantic_obstacle_colors = [
                 [1.0, 0.0, 0.0, 1],
-                [1.0, 0.0, 0.0, 1],
-                [1.0, 0.0, 0.0, 1],
-                [1.0, 0.0, 0.0, 1],
-                [0.0, 1.0, 0.0, 1],
                 [0.0, 1.0, 0.0, 1],
             ]
             self.obstacle_dim = 0.35

From 09606720980274f6e1856996edab0e70b7c3f38d Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Wed, 29 Jan 2020 01:06:25 -0800
Subject: [PATCH 49/58] change max step to 25

---
 examples/configs/fetch_interactive_nav_s2r_mp.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp.yaml b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
index 877fae870..a837dcd3b 100644
--- a/examples/configs/fetch_interactive_nav_s2r_mp.yaml
+++ b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
@@ -40,7 +40,7 @@ discount_factor: 0.99
 # termination condition
 death_z_thresh: 0.2
 dist_tol: 0.5  # body width
-max_step: 20
+max_step: 25
 
 # sensor
 output: [sensor, rgb, depth, scan]

From a15a3df0aa4b741da769fd2aa00c6d8488cd935a Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Wed, 29 Jan 2020 12:33:56 -0800
Subject: [PATCH 50/58] fix button door bug, use realdoor_closed urdf now

---
 gibson2/envs/motion_planner_env.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 49c798e2c..2c00085c9 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -455,9 +455,10 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         if self.arena in ['push_door', 'button_door']:
             self.door_axis_link_id = 1
             self.doors = []
+            door_urdf = 'realdoor.urdf' if self.arena == 'push_door' else 'realdoor_closed.urdf'
             for scale, position, rotation in zip(door_scales, self.door_positions, self.door_rotations):
                 door = InteractiveObj(
-                    os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'),
+                    os.path.join(gibson2.assets_path, 'models', 'scene_components', door_urdf),
                     scale=scale)
                 self.simulator.import_interactive_object(door, class_id=2)
                 door.set_position_rotation(position, quatToXYZW(euler2quat(0, 0, rotation), 'wxyz'))
@@ -1152,6 +1153,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             door_angle_diff = new_door_angle - self.door_angles[self.door_idx]
             reward += door_angle_diff
             self.door_angles[self.door_idx] = new_door_angle
+            if new_door_angle > (80.0 / 180.0 * np.pi):
+                print("PUSH OPEN DOOR")
         elif self.arena == 'obstacles':
             new_obstacles_moved_dist = 0.0
             for obstacle, original_obstacle_pose in zip(self.obstacles, self.obstacle_poses):

From dd7827d6e6eb914761d3108d5f3b8bbb47035c28 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Wed, 29 Jan 2020 14:19:39 -0800
Subject: [PATCH 51/58] start working on rss baselines for continuous action

---
 gibson2/envs/motion_planner_env.py | 56 +++++++++++++++++++++++++-----
 1 file changed, 48 insertions(+), 8 deletions(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 2c00085c9..a03505653 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -175,6 +175,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                  action_timestep=1 / 10.0,
                  physics_timestep=1 / 240.0,
                  device_idx=0,
+                 random_height=False,
                  automatic_reset=False,
                  eval=False,
                  arena=None,
@@ -185,7 +186,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                        action_timestep=action_timestep,
                                                        physics_timestep=physics_timestep,
                                                        automatic_reset=automatic_reset,
-                                                       random_height=False,
+                                                       random_height=random_height,
                                                        device_idx=device_idx)
         self.arena = arena
         self.eval = eval
@@ -1269,6 +1270,38 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return self.state
 
 
+class MotionPlanningBaseArmContinuousEnv(NavigateRandomEnv):
+    def __init__(self,
+                 config_file,
+                 model_id=None,
+                 collision_reward_weight=0.0,
+                 mode='headless',
+                 action_timestep=1 / 10.0,
+                 physics_timestep=1 / 240.0,
+                 device_idx=0,
+                 random_height=False,
+                 automatic_reset=False,
+                 eval=False,
+                 arena=None,
+                 ):
+        super(MotionPlanningBaseArmContinuousEnv, self).__init__(config_file,
+                                                                 model_id=model_id,
+                                                                 mode=mode,
+                                                                 action_timestep=action_timestep,
+                                                                 physics_timestep=physics_timestep,
+                                                                 automatic_reset=automatic_reset,
+                                                                 random_height=random_height,
+                                                                 device_idx=device_idx,
+                                                                 eval=eval,
+                                                                 arena=arena,
+                                                                 collision_reward_weight=collision_reward_weight,
+                                                                 )
+
+    def step(self, action):
+        embed()
+
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -1289,13 +1322,20 @@ if __name__ == '__main__':
 
     args = parser.parse_args()
 
-    nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
-                                       mode=args.mode,
-                                       action_timestep=1 / 500.0,
-                                       physics_timestep=1 / 500.0,
-                                       eval=args.mode == 'gui',
-                                       arena=args.arena,
-                                       )
+    # nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
+    #                                    mode=args.mode,
+    #                                    action_timestep=1 / 500.0,
+    #                                    physics_timestep=1 / 500.0,
+    #                                    eval=args.mode == 'gui',
+    #                                    arena=args.arena,
+    #                                    )
+    nav_env = MotionPlanningBaseArmContinuousEnv(config_file=args.config,
+                                                 mode=args.mode,
+                                                 action_timestep=1 / 10.0,
+                                                 physics_timestep=1 / 100.0,
+                                                 eval=args.mode == 'gui',
+                                                 arena=args.arena,
+                                                 )
 
     for episode in range(100):
         print('Episode: {}'.format(episode))

From 55b6f3e3820b14a561094fdf88c537e217b9eb11 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Wed, 29 Jan 2020 17:25:19 -0800
Subject: [PATCH 52/58] test_valid_position bug fix, set joint velocity to 0,
 slightly modify Fetch arm default position

---
 gibson2/core/physics/robot_locomotors.py | 25 +++++++++++++++---------
 gibson2/envs/locomotor_env.py            |  7 +++++++
 gibson2/envs/motion_planner_env.py       | 13 ++++++++----
 3 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py
index 56c1495d3..46bf451ac 100644
--- a/gibson2/core/physics/robot_locomotors.py
+++ b/gibson2/core/physics/robot_locomotors.py
@@ -494,8 +494,8 @@ class Freight(LocomotorRobot):
         self.config = config
         self.velocity = config.get("velocity", 1.0)
         LocomotorRobot.__init__(self,
-                            "fetch/freight.urdf",
-                            "base_link",
+                                "fetch/freight.urdf",
+                                "base_link",
                                 action_dim=2,
                                 sensor_dim=16,
                                 power=2.5,
@@ -541,14 +541,18 @@ class Fetch(LocomotorRobot):
 
     def __init__(self, config):
         self.config = config
-        self.velocity = config.get("velocity", 1.0)
+        self.wheel_velocity = config.get('wheel_velocity', 0.1)
+        self.torso_lift_velocity = config.get('torso_lift_velocity', 0.01)
+        self.arm_velocity = config.get('arm_velocity', 0.01)
         self.wheel_dim = 2
+        self.torso_lift_dim = 1
+        self.arm_dim = 7
         self.action_high = config.get("action_high", None)
         self.action_low = config.get("action_low", None)
         LocomotorRobot.__init__(self,
-                            "fetch/fetch.urdf",
-                            "base_link",
-                                action_dim=6,
+                                "fetch/fetch.urdf",
+                                "base_link",
+                                action_dim=self.wheel_dim + self.torso_lift_dim + self.arm_dim,
                                 sensor_dim=55,
                                 power=2.5,
                                 scale=config.get("robot_scale", self.default_scale),
@@ -562,9 +566,12 @@ class Fetch(LocomotorRobot):
             self.action_high = np.full(shape=self.wheel_dim, fill_value=self.action_high)
             self.action_low = np.full(shape=self.wheel_dim, fill_value=self.action_low)
         else:
-            self.action_high = np.full(shape=self.wheel_dim, fill_value=self.velocity)
+            self.action_high = np.array([self.wheel_velocity] * self.wheel_dim +
+                                        [self.torso_lift_velocity] * self.torso_lift_dim +
+                                        [self.arm_velocity] * self.arm_dim)
             self.action_low = -self.action_high
-        self.action_space = gym.spaces.Box(shape=(self.wheel_dim,),
+
+        self.action_space = gym.spaces.Box(shape=(self.action_dim,),
                                            low=-1.0,
                                            high=1.0,
                                            dtype=np.float32)
@@ -592,7 +599,7 @@ class Fetch(LocomotorRobot):
         rest_position = (
             0.02, np.pi / 2.0,
             np.pi / 2.0, 0.0,
-            np.pi / 2.0, 0.0,
+            np.pi / 2.0 + 0.1, 0.0,
             np.pi / 2.0, 0.0
         )
         # rest_position = (0.38548146667743244, 1.1522793897208579, 1.2576467971105596, -0.312703569911879,
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 587c70b86..75cd8c993 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -696,6 +696,10 @@ class NavigateEnv(BaseEnv):
             valid, collision_links_flatten = self.test_valid_position()
             if valid:
                 return
+            # for collision_link in collision_links_flatten:
+            #     print(collision_link)
+            # print('reset agent failed')
+            # embed()
 
         print('Failed to reset robot without collision' + '-' * 30)
         for collision_link in collision_links_flatten:
@@ -710,6 +714,9 @@ class NavigateEnv(BaseEnv):
         self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(*self.initial_orn), 'wxyz'))
 
     def test_valid_position(self):
+        # assume joint velocity control, set velocity to 0 when testing whether the initial configuration is valid
+        assert self.robots[0].control == 'velocity'
+        self.robots[0].apply_real_action(np.zeros(self.action_dim))
         collision_links = self.run_simulation()
         collision_links_flatten = [item for sublist in collision_links for item in sublist]
         return len(collision_links_flatten) == 0, collision_links_flatten
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index a03505653..46c68a0c4 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -38,7 +38,8 @@ class MotionPlanningEnv(NavigateRandomEnv):
                  physics_timestep=1 / 240.0,
                  device_idx=0,
                  automatic_reset=False,
-                 eval=False
+                 eval=False,
+                 random_height=False
                  ):
         super(MotionPlanningEnv, self).__init__(config_file,
                                                 model_id=model_id,
@@ -46,7 +47,7 @@ class MotionPlanningEnv(NavigateRandomEnv):
                                                 action_timestep=action_timestep,
                                                 physics_timestep=physics_timestep,
                                                 automatic_reset=automatic_reset,
-                                                random_height=False,
+                                                random_height=random_height,
                                                 device_idx=device_idx)
 
         self.mp_loaded = False
@@ -1195,7 +1196,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
     def reset_initial_and_target_pos(self):
         self.initial_orn_z = np.random.uniform(-np.pi, np.pi)
-        if self.arena in ['button_door', 'push_door', 'obstacles', 'semantic_obstacles']:
+        if self.arena in ['button_door', 'push_door', 'obstacles', 'semantic_obstacles', 'empty']:
             floor_height = self.scene.get_floor_height(self.floor_num)
 
             self.initial_pos = np.array([
@@ -1270,7 +1271,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return self.state
 
 
-class MotionPlanningBaseArmContinuousEnv(NavigateRandomEnv):
+class MotionPlanningBaseArmContinuousEnv(MotionPlanningBaseArmEnv):
     def __init__(self,
                  config_file,
                  model_id=None,
@@ -1296,6 +1297,10 @@ class MotionPlanningBaseArmContinuousEnv(NavigateRandomEnv):
                                                                  arena=arena,
                                                                  collision_reward_weight=collision_reward_weight,
                                                                  )
+        self.action_space = gym.spaces.Box(shape=(self.action_dim,),
+                                           low=-1.0,
+                                           high=1.0,
+                                           dtype=np.float32)
 
     def step(self, action):
         embed()

From 8cb8ae197c0bc1ed81f9ba6948686b01f2e4f29f Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Wed, 29 Jan 2020 18:54:07 -0800
Subject: [PATCH 53/58] baseline code done

---
 ...tch_interactive_nav_s2r_mp_continuous.yaml | 83 +++++++++++++++++++
 gibson2/core/physics/robot_locomotors.py      |  4 +-
 gibson2/envs/locomotor_env.py                 |  6 +-
 gibson2/envs/motion_planner_env.py            | 61 +++++++++-----
 4 files changed, 129 insertions(+), 25 deletions(-)
 create mode 100644 examples/configs/fetch_interactive_nav_s2r_mp_continuous.yaml

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp_continuous.yaml b/examples/configs/fetch_interactive_nav_s2r_mp_continuous.yaml
new file mode 100644
index 000000000..6fcc394eb
--- /dev/null
+++ b/examples/configs/fetch_interactive_nav_s2r_mp_continuous.yaml
@@ -0,0 +1,83 @@
+# scene
+scene: building
+model_id: candcenter
+build_graph: true
+load_texture: true
+should_load_replaced_objects: false
+should_load_additional_objects: false
+trav_map_erosion: 3
+
+# robot
+robot: Fetch
+wheel_velocity: 0.05
+torso_lift_velocity: 0.0001
+arm_velocity: 0.01
+
+# task, observation and action
+task: pointgoal # pointgoal|objectgoal|areagoal|reaching
+fisheye: false
+
+initial_pos: [-1.0, -1.0, 0.0]
+initial_orn: [0.0, 0.0, 0.0]
+
+target_pos: [1.0, 1.0, 0.0]
+target_orn: [0.0, 0.0, 0.0]
+
+is_discrete: false
+additional_states_dim: 22
+
+# reward
+reward_type: dense
+success_reward: 10.0
+slack_reward: 0.0
+potential_reward_weight: 1.0
+electricity_reward_weight: 0.0
+stall_torque_reward_weight: 0.0
+collision_reward_weight: -0.0
+collision_ignore_link_a_ids: [0, 1, 2, 3, 20, 21, 22]  # ignore collision with these agent's link ids
+
+# discount factor
+discount_factor: 0.99
+
+# termination condition
+death_z_thresh: 0.2
+dist_tol: 0.5  # body width
+max_step: 750
+
+# sensor
+output: [sensor, rgb, depth, scan]
+resolution: 128
+fov: 57.5
+n_horizontal_rays: 220
+n_vertical_beams: 1
+scan_noise_rate: 0.02
+depth_noise_rate: 0.02
+
+# display
+use_filler: true
+display_ui: false
+show_diagnostics: false
+ui_num: 2
+ui_components: [RGB_FILLED, DEPTH]
+random:
+  random_initial_pose: false
+  random_target_pose: false
+  random_init_x_range: [-0.1, 0.1]
+  random_init_y_range: [-0.1, 0.1]
+  random_init_z_range: [-0.1, 0.1]
+  random_init_rot_range: [-0.1, 0.1]
+
+speed:
+  timestep: 0.001
+  frameskip: 10
+
+mode: web_ui #gui|headless
+verbose: false
+fast_lq_render: true
+
+# visual objects
+visual_object_at_initial_target_pos: true
+target_visual_object_visible_to_agent: false
+
+# debug
+debug: false
diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py
index 46bf451ac..8f33302ad 100644
--- a/gibson2/core/physics/robot_locomotors.py
+++ b/gibson2/core/physics/robot_locomotors.py
@@ -609,9 +609,7 @@ class Fetch(LocomotorRobot):
 
 
     def apply_action(self, action):
-        denormalized_action = self.action_to_real_action(action)
-        real_action = np.zeros(self.action_dim)
-        real_action[:self.wheel_dim] = denormalized_action
+        real_action = self.action_to_real_action(action)
         self.apply_real_action(real_action)
 
     def calc_state(self):
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 75cd8c993..ac5f2f934 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -1816,9 +1816,9 @@ class InteractiveNavigateEnv(NavigateEnv):
             reward += self.success_reward  # |success_reward| = 10.0
 
         # death penalty
-        floor_height = 0.0 if self.floor_num is None else self.scene.get_floor_height(self.floor_num)
-        if self.robots[0].get_position()[2] > floor_height + self.death_z_thresh:
-            reward -= self.success_reward * 1.0
+        # floor_height = 0.0 if self.floor_num is None else self.scene.get_floor_height(self.floor_num)
+        # if self.robots[0].get_position()[2] > floor_height + self.death_z_thresh:
+        #     reward -= self.success_reward * 1.0
 
         # push door the wrong way
         # door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0]
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 46c68a0c4..7378a0947 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -1128,21 +1128,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         return state, reward, done, info
 
-    def compute_next_step(self, action, use_base, subgoal_success):
-        self.simulator.sync()
-
-        if use_base:
-            # trigger re-computation of geodesic distance for get_reward
-            self.state = self.get_state()
-
-        info = {}
-        if subgoal_success:
-            reward, info = self.get_reward([], action, info)
-        else:
-            # failed subgoal penalty
-            reward = self.failed_subgoal_penalty
-        done, info = self.get_termination([], info)
-
+    def get_reward(self, collision_links=[], action=None, info={}):
+        reward, info = super(NavigateRandomEnv, self).get_reward(collision_links, action, info)
         if self.arena == 'button_door':
             button_state = p.getJointState(self.buttons[self.door_idx].body_id, self.button_axis_link_id)[0]
             if not self.button_pressed and button_state < self.button_threshold:
@@ -1177,6 +1164,22 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             reward += (obstacles_moved_dist_diff * 5.0)
             # print('obstacles_moved_dist_diff', obstacles_moved_dist_diff)
             self.obstacles_moved_dist = new_obstacles_moved_dist
+        return reward, info
+
+    def compute_next_step(self, action, use_base, subgoal_success):
+        self.simulator.sync()
+
+        if use_base:
+            # trigger re-computation of geodesic distance for get_reward
+            self.state = self.get_state()
+
+        info = {}
+        if subgoal_success:
+            reward, info = self.get_reward([], action, info)
+        else:
+            # failed subgoal penalty
+            reward = self.failed_subgoal_penalty
+        done, info = self.get_termination([], info)
 
         if not use_base:
             set_joint_positions(self.robot_id, self.arm_joint_ids, self.arm_default_joint_positions)
@@ -1302,9 +1305,27 @@ class MotionPlanningBaseArmContinuousEnv(MotionPlanningBaseArmEnv):
                                            high=1.0,
                                            dtype=np.float32)
 
-    def step(self, action):
-        embed()
+    def get_termination(self, collision_links=[], info={}):
+        done, info = super(MotionPlanningBaseArmEnv, self).get_termination(action)
+        if done:
+            return done, info
+        else:
+            collision_links_flatten = [item for sublist in collision_links for item in sublist]
+            collision_links_flatten_filter = [item for item in collision_links_flatten
+                                              if item[2] != self.robots[0].robot_ids[0] and item[3] == -1]
+            # base collision with external objects
+            if len(collision_links_flatten_filter) > 0:
+                done = True
+                info['success'] = False
+                # print('base collision')
+                # episode = Episode(
+                #     success=float(info['success']),
+                # )
+                # self.stored_episodes.append(episode)
+            return done, info
 
+    def step(self, action):
+        return super(NavigateRandomEnv, self).step(action)
 
 
 if __name__ == '__main__':
@@ -1337,7 +1358,7 @@ if __name__ == '__main__':
     nav_env = MotionPlanningBaseArmContinuousEnv(config_file=args.config,
                                                  mode=args.mode,
                                                  action_timestep=1 / 10.0,
-                                                 physics_timestep=1 / 100.0,
+                                                 physics_timestep=1 / 40.0,
                                                  eval=args.mode == 'gui',
                                                  arena=args.arena,
                                                  )
@@ -1346,9 +1367,11 @@ if __name__ == '__main__':
         print('Episode: {}'.format(episode))
         start = time.time()
         state = nav_env.reset()
-        for i in range(150):
+        for i in range(1000):
             # print('Step: {}'.format(i))
             action = nav_env.action_space.sample()
+            # action[:] = 0.0
+            # action[:3] = 1.0
             state, reward, done, info = nav_env.step(action)
             # embed()
             # print('Reward:', reward)

From 5dacbd2d68f228f83e221ea73f84929b6a96abdd Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Wed, 29 Jan 2020 19:13:35 -0800
Subject: [PATCH 54/58] fix baseline bug

---
 gibson2/envs/motion_planner_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 7378a0947..6f08709dc 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -1306,7 +1306,7 @@ class MotionPlanningBaseArmContinuousEnv(MotionPlanningBaseArmEnv):
                                            dtype=np.float32)
 
     def get_termination(self, collision_links=[], info={}):
-        done, info = super(MotionPlanningBaseArmEnv, self).get_termination(action)
+        done, info = super(MotionPlanningBaseArmEnv, self).get_termination(collision_links, info)
         if done:
             return done, info
         else:

From 9d38491a3037aeae05b2b7f6cdf059d9839d91e6 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Thu, 30 Jan 2020 22:42:36 -0800
Subject: [PATCH 55/58] motion planner matches real better

---
 .../configs/fetch_interactive_nav_s2r_mp.yaml |  1 +
 gibson2/envs/motion_planner_env.py            | 62 +++++++++++++++++--
 gibson2/external/pybullet_tools/utils.py      | 10 +--
 3 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/examples/configs/fetch_interactive_nav_s2r_mp.yaml b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
index a837dcd3b..de9677887 100644
--- a/examples/configs/fetch_interactive_nav_s2r_mp.yaml
+++ b/examples/configs/fetch_interactive_nav_s2r_mp.yaml
@@ -10,6 +10,7 @@ trav_map_erosion: 3
 # robot
 robot: Fetch
 velocity: 0.1
+fine_motion_plan: true
 
 # task, observation and action
 task: pointgoal # pointgoal|objectgoal|areagoal|reaching
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 2c00085c9..f27b13b48 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -219,6 +219,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         # real sensor spec for Fetch
         resolution = self.config.get('resolution', 64)
+        self.fine_motion_plan = self.config.get('fine_motion_plan', False)
+
         width = resolution
         height = int(width * (480.0 / 640.0))
         if 'rgb' in self.output:
@@ -882,6 +884,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             arm_joint_positions = p.calculateInverseKinematics(self.robot_id,
                                                                self.robots[0].parts['gripper_link'].body_part_index,
                                                                arm_subgoal,
+                                                               self.robots[0].get_orientation(),
                                                                lowerLimits=min_limits,
                                                                upperLimits=max_limits,
                                                                jointRanges=joint_range,
@@ -946,11 +949,35 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         if arm_joint_positions is None:
             return False
 
-        arm_path = plan_joint_motion(self.robot_id,
+        mp_obstacles = []
+        mp_obstacles.append(self.mesh_id)
+        for door in self.doors:
+            mp_obstacles.append(door.body_id)
+
+        disabled_collisions = {
+            (link_from_name(self.robot_id, 'torso_lift_link'), link_from_name(self.robot_id, 'torso_fixed_link')),
+            (link_from_name(self.robot_id, 'torso_lift_link'), link_from_name(self.robot_id, 'shoulder_lift_link')),
+            (link_from_name(self.robot_id, 'torso_lift_link'), link_from_name(self.robot_id, 'upperarm_roll_link')),
+            (link_from_name(self.robot_id, 'torso_lift_link'), link_from_name(self.robot_id, 'forearm_roll_link')),
+            (link_from_name(self.robot_id, 'torso_lift_link'), link_from_name(self.robot_id, 'elbow_flex_link'))}
+
+        #print(mp_obstacles)
+        mp_obstacles = tuple(mp_obstacles)
+        if self.fine_motion_plan:
+            arm_path = plan_joint_motion(self.robot_id,
                                      self.arm_joint_ids,
                                      arm_joint_positions,
-                                     disabled_collisions=set(),
-                                     self_collisions=False)
+                                     disabled_collisions=disabled_collisions,
+                                     self_collisions=True,
+                                     obstacles=mp_obstacles)
+        else:
+            arm_path = plan_joint_motion(self.robot_id,
+                                         self.arm_joint_ids,
+                                         arm_joint_positions,
+                                         disabled_collisions=disabled_collisions,
+                                         self_collisions=False,
+                                         obstacles=[])
+
         if arm_path is not None:
             if self.eval:
                 for joint_way_point in arm_path:
@@ -1011,12 +1038,38 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         push_vector_local = np.array([action[6], action[7]]) * 0.5  # [-0.5, 0.5]
         push_vector = rotate_vector_2d(push_vector_local, -self.robots[0].get_rpy()[2])
         push_vector = np.append(push_vector, 0.0)
-
+        
         # push_vector = np.array([-0.5, 0.0, 0.0])
 
         max_limits, min_limits, rest_position, joint_range, joint_damping = self.get_ik_parameters()
         base_pose = get_base_values(self.robot_id)
 
+        joint_positions_original = get_joint_positions(self.robot_id, self.arm_joint_ids)
+
+        # test arm_subgoal + push_vector reachability
+        joint_positions = p.calculateInverseKinematics(self.robot_id,
+                                                       self.robots[0].parts['gripper_link'].body_part_index,
+                                                       arm_subgoal + push_vector,
+                                                       self.robots[0].get_orientation(),
+                                                       lowerLimits=min_limits,
+                                                       upperLimits=max_limits,
+                                                       jointRanges=joint_range,
+                                                       restPoses=rest_position,
+                                                       jointDamping=joint_damping,
+                                                       solver=p.IK_DLS,
+                                                       maxNumIterations=100)[2:10]
+
+        set_joint_positions(self.robot_id, self.arm_joint_ids, joint_positions)
+        #ls = p.getLinkState(robotid, endEffectorId)
+        #newPos = ls[4]
+        #diff = [targetPos[0] - newPos[0], targetPos[1] - newPos[1], targetPos[2] - newPos[2]]
+        #dist2 = (diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2])
+        diff = l2_distance(arm_subgoal + push_vector, self.robots[0].get_end_effector_position())
+        set_joint_positions(self.robot_id, self.arm_joint_ids, joint_positions_original)
+        print(diff)
+        if diff > 0.03:
+            return
+
         # self.simulator.set_timestep(0.002)
         steps = 50
         for i in range(steps):
@@ -1025,6 +1078,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             joint_positions = p.calculateInverseKinematics(self.robot_id,
                                                            self.robots[0].parts['gripper_link'].body_part_index,
                                                            push_goal,
+                                                           self.robots[0].get_orientation(),
                                                            lowerLimits=min_limits,
                                                            upperLimits=max_limits,
                                                            jointRanges=joint_range,
diff --git a/gibson2/external/pybullet_tools/utils.py b/gibson2/external/pybullet_tools/utils.py
index 1474edd5c..c2e747b61 100644
--- a/gibson2/external/pybullet_tools/utils.py
+++ b/gibson2/external/pybullet_tools/utils.py
@@ -2456,7 +2456,8 @@ def get_collision_fn(body, joints, obstacles, attachments, self_collisions, disa
     # TODO: convert most of these to keyword arguments
     check_link_pairs = get_self_link_pairs(body, joints, disabled_collisions) \
         if self_collisions else []
-    moving_links = frozenset(get_moving_links(body, joints))
+    moving_links = frozenset([item for item in get_moving_links(body, joints) if item != 19])
+    # TODO: This is a fetch specific change
     attached_bodies = [attachment.child for attachment in attachments]
     moving_bodies = [(body, moving_links)] + attached_bodies
     #moving_bodies = [body] + [attachment.child for attachment in attachments]
@@ -2477,11 +2478,12 @@ def get_collision_fn(body, joints, obstacles, attachments, self_collisions, disa
         for link1, link2 in check_link_pairs:
             # Self-collisions should not have the max_distance parameter
             if pairwise_link_collision(body, link1, body, link2): #, **kwargs):
-                print(get_body_name(body), get_link_name(body, link1), get_link_name(body, link2))
-                #return True
+                #print(get_body_name(body), get_link_name(body, link1), get_link_name(body, link2))
+                return True
         for body1, body2 in check_body_pairs:
             if pairwise_collision(body1, body2, **kwargs):
-                print(get_body_name(body1), get_body_name(body2))
+                #print(body1, body2)
+                #print(get_body_name(body1), get_body_name(body2))
                 return True
         return False
     return collision_fn

From 39bef87d1ba406e4e9b6b79b38d1caa6f5d645c8 Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Wed, 5 Feb 2020 10:09:34 -0800
Subject: [PATCH 56/58] navigation only policy with mp

---
 gibson2/envs/locomotor_env.py      | 17 ++++----
 gibson2/envs/motion_planner_env.py | 66 ++++++++++++++++++++----------
 2 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index ac5f2f934..776e11e4c 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -302,7 +302,7 @@ class NavigateEnv(BaseEnv):
     def get_auxiliary_sensor(self, collision_links=[]):
         return np.array([])
 
-    def add_naive_noise_to_sensor(self, sensor_reading, noise_rate):
+    def add_naive_noise_to_sensor(self, sensor_reading, noise_rate, noise_value=1.0):
         if noise_rate <= 0.0:
             return sensor_reading
 
@@ -311,7 +311,7 @@ class NavigateEnv(BaseEnv):
 
         valid_mask = np.random.choice(2, sensor_reading.shape, p=[noise_rate, 1.0 - noise_rate])
         # set invalid values to be the maximum range (e.g. depth and scan)
-        sensor_reading[valid_mask == 0] = 1.0
+        sensor_reading[valid_mask == 0] = noise_value
         return sensor_reading
 
     def get_depth(self):
@@ -322,20 +322,19 @@ class NavigateEnv(BaseEnv):
             high = 3.5
         elif self.config['robot'] == 'Fetch':
             # Primesense Carmine 1.09 short-range RGBD sensor
-            low = 0.1
+            low = 0.35
             high = 3.0  # http://xtionprolive.com/primesense-carmine-1.09
             # high = 1.4  # https://www.i3du.gr/pdf/primesense.pdf
         else:
             assert False, 'unknown robot for RGBD observation'
 
-        invalid = depth == 0.0
-        depth[depth < low] = low
-        depth[depth > high] = high
-        depth[invalid] = high
+        # 0.0 is a special value for invalid entries
+        depth[depth < low] = 0.0
+        depth[depth > high] = 0.0
 
         # re-scale depth to [0.0, 1.0]
-        depth = (depth - low) / (high - low)
-        depth = self.add_naive_noise_to_sensor(depth, self.depth_noise_rate)
+        depth /= high
+        depth = self.add_naive_noise_to_sensor(depth, self.depth_noise_rate, noise_value=0.0)
 
         return depth
 
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index 6f08709dc..abecfddd4 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -213,10 +213,16 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # action[5] = arm_img_u
         # action[6] = arm_push_vector_x
         # action[7] = arm_push_vector_y
-        self.action_space = gym.spaces.Box(shape=(8,),
-                                           low=-1.0,
-                                           high=1.0,
-                                           dtype=np.float32)
+        if self.arena == 'random_nav':
+            self.action_space = gym.spaces.Box(shape=(3,),
+                                               low=-1.0,
+                                               high=1.0,
+                                               dtype=np.float32)
+        else:
+            self.action_space = gym.spaces.Box(shape=(8,),
+                                               low=-1.0,
+                                               high=1.0,
+                                               dtype=np.float32)
         self.prepare_motion_planner()
 
         # real sensor spec for Fetch
@@ -452,7 +458,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         else:
             # TODO: handcraft environments for more scenes
-            assert False, 'model_id unknown'
+            if self.arena != 'random_nav':
+                assert False, 'model_id unknown'
 
         if self.arena in ['push_door', 'button_door']:
             self.door_axis_link_id = 1
@@ -1110,9 +1117,15 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # action[7] = arm_push_vector_y
         # print('-' * 20)
         self.current_step += 1
-        use_base = action[0] > 0.0
 
-        # add action noise before execution
+        if self.arena == 'random_nav':
+            use_base = True
+            # append a dummy value to the front of the array to achieve compatibility
+            action = np.insert(action, 0, 0.0)
+        else:
+            use_base = action[0] > 0.0
+            # add action noise before execution
+
         action[1:] = np.clip(action[1:] + np.random.normal(0.0, 0.05, action.shape[0] - 1), -1.0, 1.0)
 
         if use_base:
@@ -1342,26 +1355,35 @@ if __name__ == '__main__':
 
     parser.add_argument('--arena',
                         '-a',
-                        choices=['button_door', 'push_door', 'obstacles', 'semantic_obstacles', 'empty'],
+                        choices=['button_door', 'push_door', 'obstacles', 'semantic_obstacles', 'empty', 'random_nav'],
                         default='push_door',
                         help='which arena to train or test (default: push_door)')
 
+    parser.add_argument('--action_type',
+                        '-t',
+                        choices=['high-level', 'low-level'],
+                        default='high-level',
+                        help='which action type to use (default: high-level)')
+
+
     args = parser.parse_args()
 
-    # nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
-    #                                    mode=args.mode,
-    #                                    action_timestep=1 / 500.0,
-    #                                    physics_timestep=1 / 500.0,
-    #                                    eval=args.mode == 'gui',
-    #                                    arena=args.arena,
-    #                                    )
-    nav_env = MotionPlanningBaseArmContinuousEnv(config_file=args.config,
-                                                 mode=args.mode,
-                                                 action_timestep=1 / 10.0,
-                                                 physics_timestep=1 / 40.0,
-                                                 eval=args.mode == 'gui',
-                                                 arena=args.arena,
-                                                 )
+    if args.action_type == 'high-level':
+        nav_env = MotionPlanningBaseArmEnv(config_file=args.config,
+                                           mode=args.mode,
+                                           action_timestep=1 / 500.0,
+                                           physics_timestep=1 / 500.0,
+                                           eval=args.mode == 'gui',
+                                           arena=args.arena,
+                                           )
+    else:
+        nav_env = MotionPlanningBaseArmContinuousEnv(config_file=args.config,
+                                                     mode=args.mode,
+                                                     action_timestep=1 / 10.0,
+                                                     physics_timestep=1 / 40.0,
+                                                     eval=args.mode == 'gui',
+                                                     arena=args.arena,
+                                                     )
 
     for episode in range(100):
         print('Episode: {}'.format(episode))

From ccb09ae9485c26ebae2c2b63d481e46f7338f065 Mon Sep 17 00:00:00 2001
From: fxia22 <xf1280@gmail.com>
Date: Mon, 10 Feb 2020 11:03:34 -0800
Subject: [PATCH 57/58] locobot

---
 gibson2/core/physics/robot_locomotors.py |  68 ++++++++++++++-
 gibson2/envs/base_env.py                 |   7 +-
 gibson2/envs/locomotor_env.py            | 100 +++++++++++++++--------
 3 files changed, 138 insertions(+), 37 deletions(-)

diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py
index 8f33302ad..b411fdc93 100644
--- a/gibson2/core/physics/robot_locomotors.py
+++ b/gibson2/core/physics/robot_locomotors.py
@@ -684,7 +684,6 @@ class JR2(LocomotorRobot):
         }
 
 
-# TODO: set up joint id and name mapping
 class JR2_Kinova(LocomotorRobot):
     mjcf_scaling = 1
     model_type = "URDF"
@@ -762,3 +761,70 @@ class JR2_Kinova(LocomotorRobot):
             for j in range(16, 28):
                 p.setCollisionFilterPair(robot_id, robot_id, joint, j, 0)
         return ids
+
+
+class Locobot(LocomotorRobot):
+    mjcf_scaling = 1
+    model_type = "URDF"
+    default_scale = 1
+
+    def __init__(self, config):
+        self.config = config
+        self.wheel_velocity = config.get('wheel_velocity', 0.1)
+        self.wheel_dim = 2
+        self.action_high = config.get("action_high", None)
+        self.action_low = config.get("action_low", None)
+        LocomotorRobot.__init__(self,
+                                "locobot/locobot.urdf",
+                                "base_link",
+                                action_dim=self.wheel_dim,
+                                sensor_dim=55, # TODO: what is sensor_dim?
+                                power=2.5,
+                                scale=config.get("robot_scale", self.default_scale),
+                                resolution=config.get("resolution", 64),
+                                is_discrete=config.get("is_discrete", True),
+                                control="velocity",
+                                self_collision=False)
+
+    def set_up_continuous_action_space(self):
+        if self.action_high is not None and self.action_low is not None:
+            self.action_high = np.full(shape=self.wheel_dim, fill_value=self.action_high)
+            self.action_low = np.full(shape=self.wheel_dim, fill_value=self.action_low)
+        else:
+            self.action_high = np.array([self.wheel_velocity] * self.wheel_dim)
+            self.action_low = -self.action_high
+
+        self.action_space = gym.spaces.Box(shape=(self.action_dim,),
+                                           low=-1.0,
+                                           high=1.0,
+                                           dtype=np.float32)
+
+    def set_up_discrete_action_space(self):
+        assert False, "Locobot does not support discrete actions"
+
+    def apply_action(self, action):
+        real_action = self.action_to_real_action(action)
+        self.apply_real_action(real_action)
+
+    def calc_state(self):
+        base_state = LocomotorRobot.calc_state(self)
+        angular_velocity = self.robot_body.angular_velocity()
+        print(len(base_state), len(angular_velocity))
+        return np.concatenate((base_state, np.array(angular_velocity)))
+
+    def get_end_effector_position(self):
+        return self.parts['gripper_link'].get_position()
+
+    def load(self):
+        ids = self._load_model()
+        self.eyes = self.parts["eyes"]
+
+        robot_id = ids[0]
+
+        # disable collision for immediate parent
+        for joint in range(p.getNumJoints(robot_id)):
+            info = p.getJointInfo(robot_id, joint)
+            parent_id = info[-1]
+            p.setCollisionFilterPair(robot_id, robot_id, joint, parent_id, 0)
+
+        return ids
diff --git a/gibson2/envs/base_env.py b/gibson2/envs/base_env.py
index d9d10aa00..0ac09714a 100644
--- a/gibson2/envs/base_env.py
+++ b/gibson2/envs/base_env.py
@@ -1,4 +1,5 @@
-from gibson2.core.physics.robot_locomotors import Turtlebot, Husky, Ant, Humanoid, JR2, JR2_Kinova, Freight, Fetch
+from gibson2.core.physics.robot_locomotors import Turtlebot, Husky, Ant, Humanoid, JR2, JR2_Kinova, Freight, Fetch, \
+    Locobot
 from gibson2.core.simulator import Simulator
 from gibson2.core.physics.scene import BuildingScene, StadiumScene
 import gibson2
@@ -79,6 +80,8 @@ class BaseEnv(gym.Env):
             robot = Freight(self.config)
         elif self.config['robot'] == 'Fetch':
             robot = Fetch(self.config)
+        elif self.config['robot'] == 'Locobot':
+            robot = Locobot(self.config)
         else:
             raise Exception('unknown robot type: {}'.format(self.config['robot']))
 
@@ -107,4 +110,4 @@ class BaseEnv(gym.Env):
         return NotImplementedError
 
     def set_mode(self, mode):
-        self.simulator.mode = mode
\ No newline at end of file
+        self.simulator.mode = mode
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index ac5f2f934..04c44b937 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -302,7 +302,7 @@ class NavigateEnv(BaseEnv):
     def get_auxiliary_sensor(self, collision_links=[]):
         return np.array([])
 
-    def add_naive_noise_to_sensor(self, sensor_reading, noise_rate):
+    def add_naive_noise_to_sensor(self, sensor_reading, noise_rate, noise_value=1.0):
         if noise_rate <= 0.0:
             return sensor_reading
 
@@ -311,7 +311,7 @@ class NavigateEnv(BaseEnv):
 
         valid_mask = np.random.choice(2, sensor_reading.shape, p=[noise_rate, 1.0 - noise_rate])
         # set invalid values to be the maximum range (e.g. depth and scan)
-        sensor_reading[valid_mask == 0] = 1.0
+        sensor_reading[valid_mask == 0] = noise_value
         return sensor_reading
 
     def get_depth(self):
@@ -322,20 +322,23 @@ class NavigateEnv(BaseEnv):
             high = 3.5
         elif self.config['robot'] == 'Fetch':
             # Primesense Carmine 1.09 short-range RGBD sensor
-            low = 0.1
+            low = 0.35
             high = 3.0  # http://xtionprolive.com/primesense-carmine-1.09
             # high = 1.4  # https://www.i3du.gr/pdf/primesense.pdf
+        elif self.config['robot'] == 'Locobot':
+            # https://store.intelrealsense.com/buy-intel-realsense-depth-camera-d435.html
+            low = 0.1
+            high = 10.0
         else:
             assert False, 'unknown robot for RGBD observation'
 
-        invalid = depth == 0.0
-        depth[depth < low] = low
-        depth[depth > high] = high
-        depth[invalid] = high
+        # 0.0 is a special value for invalid entries
+        depth[depth < low] = 0.0
+        depth[depth > high] = 0.0
 
         # re-scale depth to [0.0, 1.0]
-        depth = (depth - low) / (high - low)
-        depth = self.add_naive_noise_to_sensor(depth, self.depth_noise_rate)
+        depth /= high
+        depth = self.add_naive_noise_to_sensor(depth, self.depth_noise_rate, noise_value=0.0)
 
         return depth
 
@@ -535,10 +538,30 @@ class NavigateEnv(BaseEnv):
         return self.filter_collision_links(collision_links)
 
     def filter_collision_links(self, collision_links):
-        return [[item for item in collision_per_sim_step
-                 if item[2] not in self.collision_ignore_body_b_ids and
-                 item[3] not in self.collision_ignore_link_a_ids]
-                for collision_per_sim_step in collision_links]
+        new_collision_links = []
+        for collision_per_sim_step in collision_links:
+            new_collision_per_sim_step = []
+            for item in collision_per_sim_step:
+                # ignore collision with body b
+                if item[2] in self.collision_ignore_body_b_ids:
+                    continue
+
+                # ignore collision with robot link a
+                if item[3] in self.collision_ignore_link_a_ids:
+                    continue
+
+                # ignore self collision with robot link a (body b is robot itself)
+                if item[2] == self.robots[0].robot_ids[0] and item[4] in self.collision_ignore_link_a_ids:
+                    continue
+
+                new_collision_per_sim_step.append(item)
+            new_collision_links.append(new_collision_per_sim_step)
+        return new_collision_links
+
+        # return [[item for item in collision_per_sim_step
+        #          if item[2] not in self.collision_ignore_body_b_ids and
+        #          item[3] not in self.collision_ignore_link_a_ids]
+        #         for collision_per_sim_step in collision_links]
 
     def get_position_of_interest(self):
         if self.config['task'] == 'pointgoal':
@@ -700,7 +723,6 @@ class NavigateEnv(BaseEnv):
             #     print(collision_link)
             # print('reset agent failed')
             # embed()
-
         print('Failed to reset robot without collision' + '-' * 30)
         for collision_link in collision_links_flatten:
             print(collision_link)
@@ -788,6 +810,7 @@ class NavigateRandomEnv(NavigateEnv):
 
     def reset_initial_and_target_pos(self):
         floor, self.initial_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height)
+
         max_trials = 100
         dist = 0.0
         # Need to change to 5 meter if we need to add obstacles
@@ -1063,6 +1086,17 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
                                                                        high=np.inf,
                                                                        shape=(height // 8, width // 8, 320),
                                                                        dtype=np.float32)
+        if 'rgb' in self.output:
+            self.observation_space.spaces['rgb'] = gym.spaces.Box(low=0.0,
+                                                                  high=1.0,
+                                                                  shape=(height, width, 3),
+                                                                  dtype=np.float32)
+        if 'depth' in self.output:
+            self.observation_space.spaces['depth'] = gym.spaces.Box(low=0.0,
+                                                                    high=1.0,
+                                                                    shape=(height, width, 1),
+                                                                    dtype=np.float32)
+
         # self.scan_vis = VisualMarker(rgba_color=[1, 0, 0, 1.0], radius=0.05)
         # self.scan_vis.load()
 
@@ -1173,8 +1207,8 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
         gt_pos = self.robots[0].get_position()[:2]
         source = gt_pos
         target = self.target_pos[:2]
-        # _, geodesic_dist = self.scene.get_shortest_path(self.floor_num, source, target)
-        geodesic_dist = 0.0
+        _, geodesic_dist = self.scene.get_shortest_path(self.floor_num, source, target)
+        # geodesic_dist = 0.0
         robot_z = self.robots[0].get_position()[2]
         if self.visualize_waypoints and self.mode == 'gui':
             for i in range(1000):
@@ -1205,22 +1239,23 @@ class InteractiveGibsonNavigateSim2RealEnv(NavigateRandomEnv):
                                             linear_velocity_local,
                                             angular_velocity_local))
         # cache results for reward calculation
+        additional_states = target_pos_local
         self.new_potential = geodesic_dist
         assert len(additional_states) == self.additional_states_dim, 'additional states dimension mismatch'
         return additional_states
 
+    def crop_rect_image(self, img):
+        width = img.shape[0]
+        height = int(width * (480.0 / 640.0))
+        half_diff = int((width - height) / 2)
+        img = img[half_diff:half_diff + height, :]
+        return img
+
     def get_state(self, collision_links=[]):
         state = super(InteractiveGibsonNavigateSim2RealEnv, self).get_state(collision_links)
-        if 'rgbd' in state:
-            rgbd = state['rgbd']
-            width = rgbd.shape[0]
-            height = int(width * (480.0 / 640.0))
-            half_diff = int((width - height) / 2)
-            rgbd = rgbd[half_diff:half_diff+height, :]
-            state['rgbd'] = rgbd
-        # cv2.imshow('depth', state['depth'])
-        # cv2.imshow('scan', state['scan'])
-
+        for modality in ['rgb', 'rgbd', 'depth']:
+            if modality in state:
+                state[modality] = self.crop_rect_image(state[modality])
         return state
 
     def get_potential(self):
@@ -1882,13 +1917,14 @@ if __name__ == '__main__':
 
     step_time_list = []
     for episode in range(100):
+        nav_env.reset()
         print('Episode: {}'.format(episode))
         start = time.time()
-        for step in range(1000):  # 500 steps, 50s world time
+        for step in range(50):  # 500 steps, 50s world time
             action = nav_env.action_space.sample()
             # action[:] = 1.0
-            action[0] = -0.4666666666666666
-            action[1] = -0.2
+            # action[0] = -0.4666666666666666
+            # action[1] = -0.2
             # action[0] = 2.0 / 3.0
             # action[1] = -1.0
             # action[:] = 1.0
@@ -1901,12 +1937,8 @@ if __name__ == '__main__':
             # action = np.zeros(nav_env.action_space.shape)
             # debug_param_values = [p.readUserDebugParameter(debug_param) for debug_param in debug_params]
             # action[2:] = np.array(debug_param_values)
-
             state, reward, done, _ = nav_env.step(action)
-            print(state.keys())
-            print(state['sensor'])
-            # embed()
-            # print('reward', reward)
+            #print('reward', reward)
             if done:
                 print('Episode finished after {} timesteps'.format(step + 1))
                 break

From 75ba9f2107fbb276131cbf58685db04de9e1517e Mon Sep 17 00:00:00 2001
From: Chengshu Li <chengshu@stanford.edu>
Date: Thu, 19 Mar 2020 17:39:03 -0700
Subject: [PATCH 58/58] add manipulation-only environment (robot already at
 door front) and new default arm joint position

---
 gibson2/core/physics/robot_locomotors.py |  16 ++-
 gibson2/envs/locomotor_env.py            |  13 ++-
 gibson2/envs/motion_planner_env.py       | 143 ++++++++++++++---------
 3 files changed, 108 insertions(+), 64 deletions(-)

diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py
index b411fdc93..41eb01347 100644
--- a/gibson2/core/physics/robot_locomotors.py
+++ b/gibson2/core/physics/robot_locomotors.py
@@ -596,14 +596,18 @@ class Fetch(LocomotorRobot):
                                            'wrist_flex_joint',
                                            'wrist_roll_joint'
                                        ])
-        rest_position = (
-            0.02, np.pi / 2.0,
-            np.pi / 2.0, 0.0,
-            np.pi / 2.0 + 0.1, 0.0,
-            np.pi / 2.0, 0.0
-        )
+        # rest_position = (
+        #     0.02, np.pi / 2.0,
+        #     np.pi / 2.0, 0.0,
+        #     np.pi / 2.0 + 0.1, 0.0,
+        #     np.pi / 2.0, 0.0
+        # )
         # rest_position = (0.38548146667743244, 1.1522793897208579, 1.2576467971105596, -0.312703569911879,
         #                  1.7404867100093226, -0.0962895617312548, -1.4418232619629425, -1.6780152866247762)
+        rest_position = (0.30322468280792236, -1.414019864768982,
+                         1.5178184935241699, 0.8189625336474915,
+                         2.200358942909668, 2.9631312579803466,
+                         -1.2862852996643066, 0.0008453550418615341)
 
         set_joint_positions(robot_id, arm_joints, rest_position)
 
diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py
index 04c44b937..6729b5d7a 100644
--- a/gibson2/envs/locomotor_env.py
+++ b/gibson2/envs/locomotor_env.py
@@ -723,13 +723,14 @@ class NavigateEnv(BaseEnv):
             #     print(collision_link)
             # print('reset agent failed')
             # embed()
-        print('Failed to reset robot without collision' + '-' * 30)
-        for collision_link in collision_links_flatten:
-            print(collision_link)
-        random_string = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
-        p.saveBullet(random_string + '.bullet')
-        p.saveWorld(random_string + '.py')
+        # print('Failed to reset robot without collision' + '-' * 30)
+        # for collision_link in collision_links_flatten:
+        #     print(collision_link)
+        # random_string = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
+        # p.saveBullet(random_string + '.bullet')
+        # p.saveWorld(random_string + '.py')
         # raise Exception("Failed to reset robot without collision")
+        print("WARNING: Failed to reset robot without collision")
 
     def reset_initial_and_target_pos(self):
         self.robots[0].set_position(pos=self.initial_pos)
diff --git a/gibson2/envs/motion_planner_env.py b/gibson2/envs/motion_planner_env.py
index d6c841103..0f703ca74 100644
--- a/gibson2/envs/motion_planner_env.py
+++ b/gibson2/envs/motion_planner_env.py
@@ -218,6 +218,11 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                low=-1.0,
                                                high=1.0,
                                                dtype=np.float32)
+        elif self.arena == 'random_manip':
+            self.action_space = gym.spaces.Box(shape=(4,),
+                                               low=-1.0,
+                                               high=1.0,
+                                               dtype=np.float32)
         else:
             self.action_space = gym.spaces.Box(shape=(8,),
                                                low=-1.0,
@@ -227,7 +232,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
 
         # real sensor spec for Fetch
         resolution = self.config.get('resolution', 64)
-        self.fine_motion_plan = self.config.get('fine_motion_plan', False)
+        self.fine_motion_plan = self.config.get('fine_motion_plan', True)
 
         width = resolution
         height = int(width * (480.0 / 640.0))
@@ -252,7 +257,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         else:
             self.use_occupancy_grid = False
 
-
         self.base_marker = VisualMarker(visual_shape=p.GEOM_CYLINDER,
                                         rgba_color=[1, 0, 0, 1],
                                         radius=0.1,
@@ -278,12 +282,16 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         #                                     1.2576467971105596, -0.312703569911879,
         #                                     1.7404867100093226, -0.0962895617312548,
         #                                     -1.4418232619629425, -1.6780152866247762)
-        self.arm_default_joint_positions = (
-            0.02, np.pi / 2.0,
-            np.pi / 2.0, 0.0,
-            np.pi / 2.0, 0.0,
-            np.pi / 2.0, 0.0
-        )
+        # self.arm_default_joint_positions = (
+        #     0.02, np.pi / 2.0,
+        #     np.pi / 2.0, 0.0,
+        #     np.pi / 2.0, 0.0,
+        #     np.pi / 2.0, 0.0
+        # )
+        self.arm_default_joint_positions = (0.30322468280792236, -1.414019864768982,
+                                            1.5178184935241699, 0.8189625336474915,
+                                            2.200358942909668, 2.9631312579803466,
+                                            -1.2862852996643066, 0.0008453550418615341)
         self.arm_joint_ids = joints_from_names(self.robot_id,
                                                [
                                                    'torso_lift_joint',
@@ -363,7 +371,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 [0.0, 1.0, 0.0, 1],
             ]
 
-            self.initial_pos_range = np.array([[-1,1.5], [-1,1]])
+            self.initial_pos_range = np.array([[-1, 1.5], [-1, 1]])
             self.target_pos_range = np.array([[-5.5, -4.5], [-1.0, 1.0]])
 
             # TODO: initial_pos and target_pos sampling should also be put here (scene-specific)
@@ -431,6 +439,12 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             self.initial_pos_range = np.array([
                 [5.0, 7.0], [-1.7, 0.3]
             ])
+            self.initial_pos_range_near_door = np.array([
+                [1.0, 1.4], [-1.6, -1.4]
+            ])
+            self.initial_orn_range_near_door = np.array([
+                -np.pi / 2.0 - np.pi / 12.0, -np.pi / 2.0 + np.pi / 12.0,
+            ])
             self.target_pos_range = np.array([
                 [-3.75, -3.25], [-1, 0.0]
             ])
@@ -463,10 +477,10 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             if self.arena != 'random_nav':
                 assert False, 'model_id unknown'
 
-        if self.arena in ['push_door', 'button_door']:
+        if self.arena in ['push_door', 'button_door', 'random_manip']:
             self.door_axis_link_id = 1
             self.doors = []
-            door_urdf = 'realdoor.urdf' if self.arena == 'push_door' else 'realdoor_closed.urdf'
+            door_urdf = 'realdoor.urdf' if self.arena in ['push_door', 'random_manip'] else 'realdoor_closed.urdf'
             for scale, position, rotation in zip(door_scales, self.door_positions, self.door_rotations):
                 door = InteractiveObj(
                     os.path.join(gibson2.assets_path, 'models', 'scene_components', door_urdf),
@@ -608,8 +622,8 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 self.grid_resolution / 2)
         scan_local_in_map = scan_local_in_map.reshape((1, -1, 1, 2)).astype(np.int32)
         cv2.fillPoly(occupancy_grid, scan_local_in_map, True, 1)
-        #kernel = np.ones((7, 7), np.uint8) # require 6cm of clearance
-        #occupancy_grid = cv2.erode(occupancy_grid, kernel, iterations=1)
+        # kernel = np.ones((7, 7), np.uint8) # require 6cm of clearance
+        # occupancy_grid = cv2.erode(occupancy_grid, kernel, iterations=1)
 
         cv2.circle(occupancy_grid, (self.grid_resolution // 2, self.grid_resolution // 2),
                    int(self.robot_footprint_radius_in_map), 1, -1)
@@ -970,15 +984,15 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             (link_from_name(self.robot_id, 'torso_lift_link'), link_from_name(self.robot_id, 'forearm_roll_link')),
             (link_from_name(self.robot_id, 'torso_lift_link'), link_from_name(self.robot_id, 'elbow_flex_link'))}
 
-        #print(mp_obstacles)
+        # print(mp_obstacles)
         mp_obstacles = tuple(mp_obstacles)
         if self.fine_motion_plan:
             arm_path = plan_joint_motion(self.robot_id,
-                                     self.arm_joint_ids,
-                                     arm_joint_positions,
-                                     disabled_collisions=disabled_collisions,
-                                     self_collisions=True,
-                                     obstacles=mp_obstacles)
+                                         self.arm_joint_ids,
+                                         arm_joint_positions,
+                                         disabled_collisions=disabled_collisions,
+                                         self_collisions=True,
+                                         obstacles=mp_obstacles)
         else:
             arm_path = plan_joint_motion(self.robot_id,
                                          self.arm_joint_ids,
@@ -1000,7 +1014,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             return False
 
     def stash_object_states(self):
-        if self.arena in ['push_door', 'button_door']:
+        if self.arena in ['push_door', 'button_door', 'random_manip']:
             for i, door in enumerate(self.doors):
                 self.door_angles[i] = p.getJointState(door.body_id, self.door_axis_link_id)[0]
             if self.arena == 'button_door':
@@ -1014,7 +1028,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         """
         Remove any accumulated velocities or forces of objects resulting from arm motion planner
         """
-        if self.arena in ['push_door', 'button_door']:
+        if self.arena in ['push_door', 'button_door', 'random_manip']:
             for door, door_angle in zip(self.doors, self.door_angles):
                 p.resetJointState(door.body_id, self.door_axis_link_id,
                                   targetValue=door_angle, targetVelocity=0.0)
@@ -1047,7 +1061,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         push_vector_local = np.array([action[6], action[7]]) * 0.5  # [-0.5, 0.5]
         push_vector = rotate_vector_2d(push_vector_local, -self.robots[0].get_rpy()[2])
         push_vector = np.append(push_vector, 0.0)
-        
+
         # push_vector = np.array([-0.5, 0.0, 0.0])
 
         max_limits, min_limits, rest_position, joint_range, joint_damping = self.get_ik_parameters()
@@ -1069,13 +1083,12 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                                                        maxNumIterations=100)[2:10]
 
         set_joint_positions(self.robot_id, self.arm_joint_ids, joint_positions)
-        #ls = p.getLinkState(robotid, endEffectorId)
-        #newPos = ls[4]
-        #diff = [targetPos[0] - newPos[0], targetPos[1] - newPos[1], targetPos[2] - newPos[2]]
-        #dist2 = (diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2])
+        # ls = p.getLinkState(robotid, endEffectorId)
+        # newPos = ls[4]
+        # diff = [targetPos[0] - newPos[0], targetPos[1] - newPos[1], targetPos[2] - newPos[2]]
+        # dist2 = (diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2])
         diff = l2_distance(arm_subgoal + push_vector, self.robots[0].get_end_effector_position())
         set_joint_positions(self.robot_id, self.arm_joint_ids, joint_positions_original)
-        print(diff)
         if diff > 0.03:
             return
 
@@ -1159,8 +1172,6 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return subgoal_success
 
     def step(self, action):
-        # print('-' * 30)
-        # embed()
         # action[0] = base_or_arm
         # action[1] = base_subgoal_theta / base_img_v
         # action[2] = base_subgoal_dist / base_img_u
@@ -1172,14 +1183,19 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # print('-' * 20)
         self.current_step += 1
 
-        if self.arena == 'random_nav':
-            use_base = True
-            # append a dummy value to the front of the array to achieve compatibility
-            action = np.insert(action, 0, 0.0)
-        else:
-            use_base = action[0] > 0.0
-            # add action noise before execution
+        if self.arena in ['random_nav', 'random_manip']:
+            new_action = np.zeros(8)
+            if self.arena == 'random_nav':
+                new_action[0] = 1.0
+                new_action[1:4] = action
+            else:
+                new_action[0] = -1.0
+                new_action[4:8] = action
 
+            action = new_action
+
+        use_base = action[0] > 0.0
+        # add action noise before execution
         action[1:] = np.clip(action[1:] + np.random.normal(0.0, 0.05, action.shape[0] - 1), -1.0, 1.0)
 
         if use_base:
@@ -1204,13 +1220,15 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                 self.button_pressed = True
                 self.doors[self.door_idx].set_position([100.0, 100.0, 0.0])
                 reward += self.button_reward
-        elif self.arena == 'push_door':
+        elif self.arena in ['push_door', 'random_manip']:
             new_door_angle = p.getJointState(self.doors[self.door_idx].body_id, self.door_axis_link_id)[0]
             door_angle_diff = new_door_angle - self.door_angles[self.door_idx]
             reward += door_angle_diff
             self.door_angles[self.door_idx] = new_door_angle
-            if new_door_angle > (80.0 / 180.0 * np.pi):
+            if new_door_angle > (60.0 / 180.0 * np.pi):
                 print("PUSH OPEN DOOR")
+                if self.arena == 'random_manip':
+                    reward += self.success_reward
         elif self.arena == 'obstacles':
             new_obstacles_moved_dist = 0.0
             for obstacle, original_obstacle_pose in zip(self.obstacles, self.obstacle_poses):
@@ -1265,24 +1283,36 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         return self.state, reward, done, info
 
     def reset_initial_and_target_pos(self):
-        self.initial_orn_z = np.random.uniform(-np.pi, np.pi)
-        if self.arena in ['button_door', 'push_door', 'obstacles', 'semantic_obstacles', 'empty']:
+        if self.arena in ['button_door', 'push_door', 'obstacles', 'semantic_obstacles', 'empty', 'random_manip']:
             floor_height = self.scene.get_floor_height(self.floor_num)
 
-            self.initial_pos = np.array([
-                np.random.uniform(self.initial_pos_range[0][0], self.initial_pos_range[0][1]),
-                np.random.uniform(self.initial_pos_range[1][0], self.initial_pos_range[1][1]),
-                floor_height
-            ])
+            if self.arena == 'random_manip':
+                self.initial_pos = np.array([
+                    np.random.uniform(self.initial_pos_range_near_door[0][0], self.initial_pos_range_near_door[0][1]),
+                    np.random.uniform(self.initial_pos_range_near_door[1][0], self.initial_pos_range_near_door[1][1]),
+                    floor_height
+                ])
+            else:
+                self.initial_pos = np.array([
+                    np.random.uniform(self.initial_pos_range[0][0], self.initial_pos_range[0][1]),
+                    np.random.uniform(self.initial_pos_range[1][0], self.initial_pos_range[1][1]),
+                    floor_height
+                ])
 
             self.initial_height = floor_height + self.random_init_z_offset
-
             self.robots[0].set_position(pos=[self.initial_pos[0],
                                              self.initial_pos[1],
                                              self.initial_height])
+
+            if self.arena == 'random_manip':
+                self.initial_orn_z = np.random.uniform(self.initial_orn_range_near_door[0],
+                                                       self.initial_orn_range_near_door[1])
+            else:
+                self.initial_orn_z = np.random.uniform(-np.pi, np.pi)
+
             self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, self.initial_orn_z), 'wxyz'))
 
-            if self.arena in ['button_door', 'push_door']:
+            if self.arena in ['button_door', 'push_door', 'random_manip']:
                 self.door_idx = np.random.randint(0, len(self.doors))
                 door_target_pos = self.door_target_pos[self.door_idx]
                 self.target_pos = np.array([
@@ -1291,7 +1321,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
                     floor_height
                 ])
             else:
-                #self.target_pos = np.array([-5.0, 0.0, floor_height]) Avonia target
+                # self.target_pos = np.array([-5.0, 0.0, floor_height]) Avonia target
                 self.target_pos = np.array([
                     np.random.uniform(self.target_pos_range[0][0], self.target_pos_range[0][1]),
                     np.random.uniform(self.target_pos_range[1][0], self.target_pos_range[1][1]),
@@ -1304,7 +1334,7 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
             super(MotionPlanningBaseArmEnv, self).reset_initial_and_target_pos()
 
     def before_reset_agent(self):
-        if self.arena in ['push_door', 'button_door']:
+        if self.arena in ['push_door', 'button_door', 'random_manip']:
             self.door_angles = np.zeros(len(self.doors))
             for door, angle, pos, orn in zip(self.doors, self.door_angles, self.door_positions, self.door_rotations):
                 p.resetJointState(door.body_id, self.door_axis_link_id, targetValue=angle, targetVelocity=0.0)
@@ -1340,6 +1370,15 @@ class MotionPlanningBaseArmEnv(NavigateRandomEnv):
         # self.state['current_step'] = self.current_step
         return self.state
 
+    def get_termination(self, collision_links=[], info={}):
+        done, info = super(MotionPlanningBaseArmEnv, self).get_termination(collision_links, info)
+        if self.arena == 'random_manip':
+            new_door_angle = p.getJointState(self.doors[self.door_idx].body_id, self.door_axis_link_id)[0]
+            if new_door_angle > (60.0 / 180.0 * np.pi):
+                done = True
+                info['success'] = True
+        return done, info
+
 
 class MotionPlanningBaseArmContinuousEnv(MotionPlanningBaseArmEnv):
     def __init__(self,
@@ -1409,7 +1448,8 @@ if __name__ == '__main__':
 
     parser.add_argument('--arena',
                         '-a',
-                        choices=['button_door', 'push_door', 'obstacles', 'semantic_obstacles', 'empty', 'random_nav'],
+                        choices=['button_door', 'push_door', 'obstacles', 'semantic_obstacles', 'empty', 'random_nav',
+                                 'random_manip'],
                         default='push_door',
                         help='which arena to train or test (default: push_door)')
 
@@ -1419,7 +1459,6 @@ if __name__ == '__main__':
                         default='high-level',
                         help='which action type to use (default: high-level)')
 
-
     args = parser.parse_args()
 
     if args.action_type == 'high-level':
@@ -1450,7 +1489,7 @@ if __name__ == '__main__':
             # action[:3] = 1.0
             state, reward, done, info = nav_env.step(action)
             # embed()
-            # print('Reward:', reward)
+            print('Reward:', reward)
             # time.sleep(0.05)
             # nav_env.step()
             # for step in range(50):  # 500 steps, 50s world time