diff --git a/examples/configs/locobot_p2p_nav_house.yaml b/examples/configs/locobot_p2p_nav_house.yaml new file mode 100644 index 000000000..97b43a3b1 --- /dev/null +++ b/examples/configs/locobot_p2p_nav_house.yaml @@ -0,0 +1,63 @@ +# scene +scene: building +model_id: Ohoopee +build_graph: true +load_texture: true +trav_map_erosion: 2 + +# robot +robot: Locobot +velocity: 0.1 + +# task, observation and action +task: pointgoal # pointgoal|objectgoal|areagoal|reaching +fisheye: false + +is_discrete: false +additional_states_dim: 3 + +# reward +reward_type: geodesic +success_reward: 10.0 +slack_reward: -0.01 +potential_reward_weight: 1.0 +collision_reward_weight: -0.1 +collision_ignore_link_a_ids: [1, 2, 3, 4] # ignore collision with these agent's link ids + +# discount factor +discount_factor: 0.99 + +# termination condition +death_z_thresh: 0.2 +dist_tol: 0.333 # body width +max_step: 500 + +# sensor +output: [sensor, rgb, depth] +resolution: 256 +fov: 90 + +# display +use_filler: true +display_ui: false +show_diagnostics: false +ui_num: 2 +ui_components: [RGB_FILLED, DEPTH] +random: + random_initial_pose: false + random_target_pose: false + random_init_x_range: [-0.1, 0.1] + random_init_y_range: [-0.1, 0.1] + random_init_z_range: [-0.1, 0.1] + random_init_rot_range: [-0.1, 0.1] + +speed: + timestep: 0.001 + frameskip: 10 + +mode: web_ui #gui|headless +verbose: false +fast_lq_render: true + +# debug +debug: false diff --git a/gibson2/core/physics/interactive_objects.py b/gibson2/core/physics/interactive_objects.py index ef5134132..83ef92770 100644 --- a/gibson2/core/physics/interactive_objects.py +++ b/gibson2/core/physics/interactive_objects.py @@ -208,7 +208,7 @@ class InteractiveObj(object): org_pos, org_orn = p.getBasePositionAndOrientation(self.body_id) p.resetBasePositionAndOrientation(self.body_id, pos, org_orn) - def set_position_rotation(self, pos, orn): + def set_position_orientation(self, pos, orn): p.resetBasePositionAndOrientation(self.body_id, pos, orn) diff --git a/gibson2/core/physics/robot_locomotors.py b/gibson2/core/physics/robot_locomotors.py index 835f28368..a99c566f5 100644 --- a/gibson2/core/physics/robot_locomotors.py +++ b/gibson2/core/physics/robot_locomotors.py @@ -95,6 +95,10 @@ class LocomotorRobot(BaseRobot): def set_orientation(self, orn): self.robot_body.set_orientation(orn) + def set_position_orientation(self, pos, orn): + self.robot_body.reset_position(pos) + self.robot_body.set_orientation(orn) + def move_by(self, delta): new_pos = np.array(delta) + self.get_position() self.robot_body.reset_position(new_pos) @@ -117,6 +121,10 @@ class LocomotorRobot(BaseRobot): new_orn = qmult((euler2quat(delta, 0, 0)), orn) self.robot_body.set_orientation(new_orn) + def keep_still(self): + for n, j in enumerate(self.ordered_joints): + j.set_motor_velocity(0.0) + def get_rpy(self): return self.robot_body.get_rpy() @@ -320,8 +328,8 @@ class Husky(LocomotorRobot): self.config = config self.torque = config.get("torque", 0.03) LocomotorRobot.__init__(self, - "husky.urdf", - "base_link", + "husky.urdf", + "base_link", action_dim=4, sensor_dim=17, power=2.5, @@ -382,8 +390,8 @@ class Quadrotor(LocomotorRobot): self.config = config self.torque = config.get("torque", 0.02) LocomotorRobot.__init__(self, - "quadrotor.urdf", - "base_link", + "quadrotor.urdf", + "base_link", action_dim=6, sensor_dim=6, power=2.5, @@ -436,8 +444,8 @@ class Turtlebot(LocomotorRobot): self.action_high = config.get("action_high", None) self.action_low = config.get("action_low", None) LocomotorRobot.__init__(self, - "turtlebot/turtlebot.urdf", - "base_link", + "turtlebot/turtlebot.urdf", + "base_link", action_dim=2, sensor_dim=16, power=2.5, @@ -490,8 +498,8 @@ class Freight(LocomotorRobot): self.config = config self.velocity = config.get("velocity", 1.0) LocomotorRobot.__init__(self, - "fetch/freight.urdf", - "base_link", + "fetch/freight.urdf", + "base_link", action_dim=2, sensor_dim=16, power=2.5, @@ -542,8 +550,8 @@ class Fetch(LocomotorRobot): self.action_high = config.get("action_high", None) self.action_low = config.get("action_low", None) LocomotorRobot.__init__(self, - "fetch/fetch.urdf", - "base_link", + "fetch/fetch.urdf", + "base_link", action_dim=6, sensor_dim=55, power=2.5, @@ -595,8 +603,8 @@ class JR2(LocomotorRobot): self.config = config self.velocity = config.get('velocity', 0.1) LocomotorRobot.__init__(self, - "jr2_urdf/jr2.urdf", - "base_link", + "jr2_urdf/jr2.urdf", + "base_link", action_dim=4, sensor_dim=17, power=2.5, @@ -646,8 +654,8 @@ class JR2_Kinova(LocomotorRobot): self.arm_dim = 5 LocomotorRobot.__init__(self, - "jr2_urdf/jr2_kinova.urdf", - "base_link", + "jr2_urdf/jr2_kinova.urdf", + "base_link", action_dim=10, sensor_dim=46, power=2.5, @@ -709,3 +717,70 @@ class JR2_Kinova(LocomotorRobot): for j in range(16, 28): p.setCollisionFilterPair(robot_id, robot_id, joint, j, 0) return ids + + +class Locobot(LocomotorRobot): + mjcf_scaling = 1 + model_type = "URDF" + default_scale = 1 + + def __init__(self, config): + self.config = config + self.velocity = config.get('velocity', 0.1) + self.wheel_dim = 2 + self.action_high = config.get("action_high", None) + self.action_low = config.get("action_low", None) + LocomotorRobot.__init__(self, + "locobot/locobot.urdf", + "base_link", + action_dim=self.wheel_dim, + sensor_dim=55, # TODO: what is sensor_dim? + power=2.5, + scale=config.get("robot_scale", self.default_scale), + resolution=config.get("resolution", 64), + is_discrete=config.get("is_discrete", True), + control="velocity", + self_collision=False) + + def set_up_continuous_action_space(self): + if self.action_high is not None and self.action_low is not None: + self.action_high = np.full(shape=self.wheel_dim, fill_value=self.action_high) + self.action_low = np.full(shape=self.wheel_dim, fill_value=self.action_low) + else: + self.action_high = np.array([self.velocity] * self.wheel_dim) + self.action_low = -self.action_high + + self.action_space = gym.spaces.Box(shape=(self.action_dim,), + low=-1.0, + high=1.0, + dtype=np.float32) + + def set_up_discrete_action_space(self): + assert False, "Locobot does not support discrete actions" + + def apply_action(self, action): + real_action = self.action_to_real_action(action) + self.apply_real_action(real_action) + + def calc_state(self): + base_state = LocomotorRobot.calc_state(self) + angular_velocity = self.robot_body.angular_velocity() + print(len(base_state), len(angular_velocity)) + return np.concatenate((base_state, np.array(angular_velocity))) + + def get_end_effector_position(self): + return self.parts['gripper_link'].get_position() + + def load(self): + ids = self._load_model() + self.eyes = self.parts["eyes"] + + robot_id = ids[0] + + # disable collision for immediate parent + for joint in range(p.getNumJoints(robot_id)): + info = p.getJointInfo(robot_id, joint) + parent_id = info[-1] + p.setCollisionFilterPair(robot_id, robot_id, joint, parent_id, 0) + + return ids diff --git a/gibson2/core/physics/scene.py b/gibson2/core/physics/scene.py index 68f352c55..f5f3b9a0e 100644 --- a/gibson2/core/physics/scene.py +++ b/gibson2/core/physics/scene.py @@ -6,15 +6,18 @@ parentdir = os.path.dirname(currentdir) os.sys.path.insert(0, parentdir) import pybullet_data from gibson2.data.datasets import get_model_path +from gibson2.utils.utils import l2_distance + import numpy as np from PIL import Image import cv2 import networkx as nx from IPython import embed +import pickle class Scene: def load(self): - raise (NotImplementedError()) + raise NotImplementedError() class EmptyScene(Scene): """ @@ -23,6 +26,7 @@ class EmptyScene(Scene): def load(self): planeName = os.path.join(pybullet_data.getDataPath(), "mjcf/ground_plane.xml") self.ground_plane_mjcf = p.loadMJCF(planeName) + p.changeDynamics(self.ground_plane_mjcf[0], -1, lateralFriction=1) return [item for item in self.ground_plane_mjcf] class StadiumScene(Scene): @@ -43,6 +47,9 @@ class StadiumScene(Scene): return [item for item in self.stadium] + [item for item in self.ground_plane_mjcf] + def get_random_floor(self): + return 0 + def get_random_point(self, random_height=False): return self.get_random_point_floor(0, random_height) @@ -54,6 +61,17 @@ class StadiumScene(Scene): np.random.uniform(0.4, 0.8) if random_height else 0.0 ]) + def get_floor_height(self, floor): + del floor + return 0.0 + + def reset_floor(self, floor=0, additional_elevation=0.05, height=None): + return + + def get_shortest_path(self, floor, source_world, target_world, entire_path=False): + assert False, 'cannot compute shortest path in StadiumScene' + + class BuildingScene(Scene): """ Gibson Environment building scenes @@ -90,9 +108,6 @@ class BuildingScene(Scene): self.num_waypoints = num_waypoints self.waypoint_interval = int(waypoint_resolution / trav_map_resolution) - def l2_distance(self, a, b): - return np.linalg.norm(np.array(a) - np.array(b)) - def load(self): """ Load the mesh into pybullet @@ -141,39 +156,56 @@ class BuildingScene(Scene): self.floors = sorted(list(map(float, f.readlines()))) print('floors', self.floors) for f in range(len(self.floors)): - trav_map = Image.open(os.path.join(get_model_path(self.model_id), 'floor_trav_{}.png'.format(f))) - obstacle_map = Image.open(os.path.join(get_model_path(self.model_id), 'floor_{}.png'.format(f))) + trav_map = np.array(Image.open( + os.path.join(get_model_path(self.model_id), 'floor_trav_{}.png'.format(f)) + )) + obstacle_map = np.array(Image.open( + os.path.join(get_model_path(self.model_id), 'floor_{}.png'.format(f)) + )) if self.trav_map_original_size is None: - width, height = trav_map.size - assert width == height, 'trav map is not a square' + height, width = trav_map.shape + assert height == width, 'trav map is not a square' self.trav_map_original_size = height - self.trav_map_size = int(self.trav_map_original_size * self.trav_map_default_resolution / self.trav_map_resolution) - trav_map = np.array(trav_map.resize((self.trav_map_size, self.trav_map_size))) - obstacle_map = np.array(obstacle_map.resize((self.trav_map_size, self.trav_map_size))) + self.trav_map_size = int(self.trav_map_original_size * + self.trav_map_default_resolution / + self.trav_map_resolution) + trav_map[obstacle_map == 0] = 0 + trav_map = cv2.resize(trav_map, (self.trav_map_size, self.trav_map_size)) trav_map = cv2.erode(trav_map, np.ones((self.trav_map_erosion, self.trav_map_erosion))) + trav_map[trav_map < 255] = 0 if self.build_graph: - g = nx.Graph() - for i in range(self.trav_map_size): - for j in range(self.trav_map_size): - if trav_map[i, j] > 0: - g.add_node((i, j)) - # 8-connected graph - neighbors = [(i - 1, j - 1), (i, j - 1), (i + 1, j - 1), (i - 1, j)] - for n in neighbors: - if 0 <= n[0] < self.trav_map_size and 0 <= n[1] < self.trav_map_size and \ - trav_map[n[0], n[1]] > 0: - g.add_edge(n, (i, j), weight=self.l2_distance(n, (i, j))) + graph_file = os.path.join(get_model_path(self.model_id), 'floor_trav_{}.p'.format(f)) + + if os.path.isfile(graph_file): + print("load traversable graph") + with open(graph_file, 'rb') as pfile: + g = pickle.load(pfile) + else: + print("build traversable graph") + g = nx.Graph() + for i in range(self.trav_map_size): + for j in range(self.trav_map_size): + if trav_map[i, j] > 0: + g.add_node((i, j)) + # 8-connected graph + neighbors = [(i - 1, j - 1), (i, j - 1), (i + 1, j - 1), (i - 1, j)] + for n in neighbors: + if 0 <= n[0] < self.trav_map_size and 0 <= n[1] < self.trav_map_size and \ + trav_map[n[0], n[1]] > 0: + g.add_edge(n, (i, j), weight=l2_distance(n, (i, j))) + + # only take the largest connected component + largest_cc = max(nx.connected_components(g), key=len) + g = g.subgraph(largest_cc).copy() + with open(graph_file, 'wb') as pfile: + pickle.dump(g, pfile, protocol=pickle.HIGHEST_PROTOCOL) - # only take the largest connected component - largest_cc = max(nx.connected_components(g), key=len) - g = g.subgraph(largest_cc).copy() self.floor_graph.append(g) - # update trav_map accordingly trav_map[:, :] = 0 - for node in largest_cc: + for node in g.nodes: trav_map[node[0], node[1]] = 255 self.floor_map.append(trav_map) @@ -205,6 +237,11 @@ class BuildingScene(Scene): def world_to_map(self, xy): return np.flip((xy / self.trav_map_resolution + self.trav_map_size / 2.0)).astype(np.int) + def has_node(self, floor, world_xy): + map_xy = tuple(self.world_to_map(world_xy)) + g = self.floor_graph[floor] + return g.has_node(map_xy) + def get_shortest_path(self, floor, source_world, target_world, entire_path=False): # print("called shortest path", source_world, target_world) assert self.build_graph, 'cannot get shortest path without building the graph' @@ -216,20 +253,21 @@ class BuildingScene(Scene): if not g.has_node(target_map): nodes = np.array(g.nodes) closest_node = tuple(nodes[np.argmin(np.linalg.norm(nodes - target_map, axis=1))]) - g.add_edge(closest_node, target_map, weight=self.l2_distance(closest_node, target_map)) + g.add_edge(closest_node, target_map, weight=l2_distance(closest_node, target_map)) if not g.has_node(source_map): nodes = np.array(g.nodes) closest_node = tuple(nodes[np.argmin(np.linalg.norm(nodes - source_map, axis=1))]) - g.add_edge(closest_node, source_map, weight=self.l2_distance(closest_node, source_map)) + g.add_edge(closest_node, source_map, weight=l2_distance(closest_node, source_map)) - path_map = np.array(nx.astar_path(g, source_map, target_map, heuristic=self.l2_distance)) + path_map = np.array(nx.astar_path(g, source_map, target_map, heuristic=l2_distance)) path_world = self.map_to_world(path_map) geodesic_distance = np.sum(np.linalg.norm(path_world[1:] - path_world[:-1], axis=1)) + path_world = path_world[::self.waypoint_interval] if not entire_path: - path_world = path_world[::self.waypoint_interval][:self.num_waypoints] + path_world = path_world[:self.num_waypoints] num_remaining_waypoints = self.num_waypoints - path_world.shape[0] if num_remaining_waypoints > 0: remaining_waypoints = np.tile(target_world, (num_remaining_waypoints, 1)) diff --git a/gibson2/core/simulator.py b/gibson2/core/simulator.py index dc7d266ff..e3c0007b6 100644 --- a/gibson2/core/simulator.py +++ b/gibson2/core/simulator.py @@ -36,6 +36,7 @@ class Simulator: self.gravity = gravity self.timestep = timestep self.mode = mode + # renderer self.resolution = resolution self.fov = fov @@ -63,8 +64,7 @@ class Simulator: """ Destroy the MeshRenderer and physics simulator and start again. """ - self.renderer.release() - p.disconnect(self.cid) + self.disconnect() self.load() def load(self): @@ -91,6 +91,7 @@ class Simulator: self.cid = p.connect(p.DIRECT) p.setTimeStep(self.timestep) p.setGravity(0, 0, -self.gravity) + p.setPhysicsEngineParameter(enableFileCaching=0) if self.mode == 'gui' and not self.render_to_tensor: self.add_viewer() @@ -101,14 +102,13 @@ class Simulator: self.objects = [] def import_scene(self, scene, texture_scale=1.0, load_texture=True, class_id=0): - """ Import a scene. A scene could be a synthetic one or a realistic Gibson Environment. :param scene: Scene object :param texture_scale: Option to scale down the texture for rendering :param load_texture: If you don't need rgb output, texture loading could be skipped to make rendering faster - :param class_id: The class_id for background for rendering semantics. + :param class_id: Class id for rendering semantic segmentation """ new_objects = scene.load() @@ -153,7 +153,7 @@ class Simulator: def import_object(self, object, class_id=0): """ :param object: Object to load - :param class_id: class_id to show for semantic segmentation mask + :param class_id: Class id for rendering semantic segmentation """ new_object = object.load() self.objects.append(new_object) @@ -215,7 +215,7 @@ class Simulator: Import a robot into Simulator :param robot: Robot - :param class_id: class_id to show for semantic segmentation mask + :param class_id: Class id for rendering semantic segmentation :return: id for robot in pybullet """ @@ -298,7 +298,7 @@ class Simulator: Import articulated objects into simulator :param obj: - :param class_id: class_id to show for semantic segmentation mask + :param class_id: Class id for rendering semantic segmentation :return: pybulet id """ ids = obj.load() @@ -382,6 +382,12 @@ class Simulator: """ p.stepSimulation() + self.sync() + + def sync(self): + """ + Update positions in renderer without stepping the simulation. Usually used in the reset() function + """ for instance in self.renderer.instances: if instance.dynamic: self.update_position(instance) @@ -427,5 +433,6 @@ class Simulator: clean up the simulator """ if self.isconnected(): + p.resetSimulation(physicsClientId=self.cid) p.disconnect(self.cid) self.renderer.release() diff --git a/gibson2/envs/base_env.py b/gibson2/envs/base_env.py index 3c33e7a2b..cff77c11c 100644 --- a/gibson2/envs/base_env.py +++ b/gibson2/envs/base_env.py @@ -1,4 +1,5 @@ -from gibson2.core.physics.robot_locomotors import Turtlebot, Husky, Ant, Humanoid, JR2, JR2_Kinova, Freight, Fetch +from gibson2.core.physics.robot_locomotors \ + import Turtlebot, Husky, Ant, Humanoid, JR2, JR2_Kinova, Freight, Fetch, Locobot from gibson2.core.simulator import Simulator from gibson2.core.physics.scene import BuildingScene, StadiumScene import gibson2 @@ -16,21 +17,31 @@ class BaseEnv(gym.Env): config_file, model_id=None, mode='headless', + action_timestep=1 / 10.0, + physics_timestep=1 / 240.0, device_idx=0): """ :param config_file: config_file path :param model_id: override model_id in config file :param mode: headless or gui mode - :param device_idx: which GPU to run the simulation and rendering on + :param action_timestep: environment executes action per action_timestep second + :param physics_timestep: physics timestep for pybullet + :param device_idx: device_idx: which GPU to run the simulation and rendering on """ self.config = parse_config(config_file) if model_id is not None: self.config['model_id'] = model_id + + self.mode = mode + self.action_timestep = action_timestep + self.physics_timestep = physics_timestep self.simulator = Simulator(mode=mode, + timestep=physics_timestep, use_fisheye=self.config.get('fisheye', False), resolution=self.config.get('resolution', 64), fov=self.config.get('fov', 90), device_idx=device_idx) + self.simulator_loop = int(self.action_timestep / self.simulator.timestep) self.load() def reload(self, config_file): @@ -77,6 +88,8 @@ class BaseEnv(gym.Env): robot = Freight(self.config) elif self.config['robot'] == 'Fetch': robot = Fetch(self.config) + elif self.config['robot'] == 'Locobot': + robot = Locobot(self.config) else: raise Exception('unknown robot type: {}'.format(self.config['robot'])) @@ -99,10 +112,16 @@ class BaseEnv(gym.Env): self.simulator.step() def step(self, action): - return NotImplementedError + """ + Overwritten by subclasses + """ + return NotImplementedError() def reset(self): - return NotImplementedError + """ + Overwritten by subclasses + """ + return NotImplementedError() def set_mode(self, mode): - self.simulator.mode = mode \ No newline at end of file + self.simulator.mode = mode diff --git a/gibson2/envs/locomotor_env.py b/gibson2/envs/locomotor_env.py index 4f594c8e3..b74bea965 100644 --- a/gibson2/envs/locomotor_env.py +++ b/gibson2/envs/locomotor_env.py @@ -1,5 +1,6 @@ -from gibson2.core.physics.interactive_objects import VisualMarker, InteractiveObj, BoxShape import gibson2 +from gibson2.core.physics.interactive_objects import VisualMarker, InteractiveObj, BoxShape +from gibson2.core.physics.robot_locomotors import Turtlebot from gibson2.utils.utils import parse_config, rotate_vector_3d, l2_distance, quatToXYZW from gibson2.envs.base_env import BaseEnv from transforms3d.euler import euler2quat @@ -20,27 +21,6 @@ import time import collections - - -Episode = collections.namedtuple('Episode', - ['env', - 'agent', - 'initial_pos', - 'target_pos', - 'geodesic_distance', - 'shortest_path', - 'agent_trajectory', - 'object_files', - 'object_trajectory', - 'success', - 'path_efficiency', - 'kinematic_disturbance', - 'dynamic_disturbance_a', - 'dynamic_disturbance_b', - 'collision_step', - ]) - - class NavigateEnv(BaseEnv): """ We define navigation environments following Anderson, Peter, et al. 'On evaluation of embodied navigation agents.' @@ -57,52 +37,43 @@ class NavigateEnv(BaseEnv): automatic_reset=False, device_idx=0, ): + """ + :param config_file: config_file path + :param model_id: override model_id in config file + :param mode: headless or gui mode + :param action_timestep: environment executes action per action_timestep second + :param physics_timestep: physics timestep for pybullet + :param automatic_reset: whether to automatic reset after an episode finishes + :param device_idx: device_idx: which GPU to run the simulation and rendering on + """ super(NavigateEnv, self).__init__(config_file=config_file, model_id=model_id, mode=mode, + action_timestep=action_timestep, + physics_timestep=physics_timestep, device_idx=device_idx) self.automatic_reset = automatic_reset - # simulation - self.mode = mode - self.action_timestep = action_timestep - self.physics_timestep = physics_timestep - self.simulator.set_timestep(physics_timestep) - self.simulator_loop = int(self.action_timestep / self.simulator.timestep) - self.current_step = 0 - self.path_length = 0.0 - self.agent_trajectory = [] - self.stage = None - self.current_episode = 0 - self.floor_num = None - self.num_object_classes = None - self.stored_episodes = collections.deque(maxlen=100) - self.interactive_objects = [] - # self.reward_stats = [] - # self.state_stats = {'sensor': [], 'auxiliary_sensor': []} - - def load(self): - super(NavigateEnv, self).load() + def load_task_setup(self): + """ + Load task setup, including initialization, termination conditino, reward, collision checking, discount factor + """ + # initial and target pose self.initial_pos = np.array(self.config.get('initial_pos', [0, 0, 0])) self.initial_orn = np.array(self.config.get('initial_orn', [0, 0, 0])) - self.target_pos = np.array(self.config.get('target_pos', [5, 5, 0])) self.target_orn = np.array(self.config.get('target_orn', [0, 0, 0])) self.additional_states_dim = self.config.get('additional_states_dim', 0) self.auxiliary_sensor_dim = self.config.get('auxiliary_sensor_dim', 0) - self.normalize_observation = self.config.get('normalize_observation', False) - self.observation_normalizer = self.config.get('observation_normalizer', {}) - for key in self.observation_normalizer: - self.observation_normalizer[key] = np.array(self.observation_normalizer[key]) # termination condition self.dist_tol = self.config.get('dist_tol', 0.2) - self.max_step = self.config.get('max_step', float('inf')) + self.max_step = self.config.get('max_step', 500) # reward - self.reward_type = self.config.get('reward_type', 'dense') - assert self.reward_type in ['dense', 'sparse', 'l2', 'stage_sparse'] + self.reward_type = self.config.get('reward_type', 'geodesic') + assert self.reward_type in ['geodesic', 'l2', 'sparse'] self.success_reward = self.config.get('success_reward', 10.0) self.slack_reward = self.config.get('slack_reward', -0.01) @@ -110,27 +81,24 @@ class NavigateEnv(BaseEnv): # reward weight self.potential_reward_weight = self.config.get('potential_reward_weight', 10.0) - self.electricity_reward_weight = self.config.get('electricity_reward_weight', 0.0) - self.stall_torque_reward_weight = self.config.get('stall_torque_reward_weight', 0.0) self.collision_reward_weight = self.config.get('collision_reward_weight', 0.0) - # ignore the agent's collision with these body ids, typically ids of the ground + + # ignore the agent's collision with these body ids self.collision_ignore_body_b_ids = set(self.config.get('collision_ignore_body_b_ids', [])) + # ignore the agent's collision with these link ids of itself self.collision_ignore_link_a_ids = set(self.config.get('collision_ignore_link_a_ids', [])) # discount factor self.discount_factor = self.config.get('discount_factor', 1.0) + + def load_observation_space(self): + """ + Load observation space + """ self.output = self.config['output'] - self.n_horizontal_rays = self.config.get('n_horizontal_rays', 128) - self.n_vertical_beams = self.config.get('n_vertical_beams', 9) - - # TODO: sensor: observations that are passed as network input, e.g. target position in local frame - # TODO: auxiliary sensor: observations that are not passed as network input, but used to maintain the same - # subgoals for the next T time steps, e.g. agent pose in global frame - self.sensor_dim = self.additional_states_dim - self.action_dim = self.robots[0].action_dim - observation_space = OrderedDict() if 'sensor' in self.output: + self.sensor_dim = self.additional_states_dim self.sensor_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.sensor_dim,), @@ -142,23 +110,33 @@ class NavigateEnv(BaseEnv): shape=(self.auxiliary_sensor_dim,), dtype=np.float32) observation_space['auxiliary_sensor'] = self.auxiliary_sensor_space - if 'pointgoal' in self.output: - self.pointgoal_space = gym.spaces.Box(low=-np.inf, - high=np.inf, - shape=(2,), - dtype=np.float32) - observation_space['pointgoal'] = self.pointgoal_space if 'rgb' in self.output: - self.rgb_space = gym.spaces.Box(low=-np.inf, - high=np.inf, + self.rgb_space = gym.spaces.Box(low=0.0, + high=1.0, shape=(self.config.get('resolution', 64), self.config.get('resolution', 64), 3), dtype=np.float32) observation_space['rgb'] = self.rgb_space if 'depth' in self.output: - self.depth_space = gym.spaces.Box(low=-np.inf, - high=np.inf, + self.depth_noise_rate = self.config.get('depth_noise_rate', 0.0) + if self.config['robot'] == 'Turtlebot': + # ASUS Xtion PRO LIVE + self.depth_low = 0.8 + self.depth_high = 3.5 + elif self.config['robot'] == 'Fetch': + # Primesense Carmine 1.09 short-range RGBD sensor + self.depth_low = 0.35 + self.depth_high = 3.0 # http://xtionprolive.com/primesense-carmine-1.09 + # self.depth_high = 1.4 # https://www.i3du.gr/pdf/primesense.pdf + elif self.config['robot'] == 'Locobot': + # https://store.intelrealsense.com/buy-intel-realsense-depth-camera-d435.html + self.depth_low = 0.1 + self.depth_high = 10.0 + else: + assert False, 'unknown robot for depth observation' + self.depth_space = gym.spaces.Box(low=0.0, + high=1.0, shape=(self.config.get('resolution', 64), self.config.get('resolution', 64), 1), @@ -172,18 +150,29 @@ class NavigateEnv(BaseEnv): 1), dtype=np.float32) observation_space['seg'] = self.seg_space - if 'depth_seg' in self.output: - self.depth_seg_space = gym.spaces.Box(low=0.0, - high=1.0, - shape=(self.config.get('resolution', 64), - self.config.get('resolution', 64), - 2), - dtype=np.float32) - observation_space['depth_seg'] = self.depth_seg_space if 'scan' in self.output: - self.scan_space = gym.spaces.Box(low=-np.inf, - high=np.inf, - shape=(self.n_horizontal_rays * self.n_vertical_beams, 3), + self.scan_noise_rate = self.config.get('scan_noise_rate', 0.0) + self.n_horizontal_rays = self.config.get('n_horizontal_rays', 128) + self.n_vertical_beams = self.config.get('n_vertical_beams', 1) + assert self.n_vertical_beams == 1, 'scan can only handle one vertical beam for now' + if self.config['robot'] == 'Turtlebot': + # Hokuyo URG-04LX-UG01 + self.laser_linear_range = 5.6 + self.laser_angular_range = 240.0 + self.min_laser_dist = 0.05 + self.laser_link_name = 'scan_link' + elif self.config['robot'] == 'Fetch': + # SICK TiM571-2050101 Laser Range Finder + self.laser_linear_range = 25.0 + self.laser_angular_range = 220.0 + self.min_laser_dist = 0.0 + self.laser_link_name = 'laser_link' + else: + assert False, 'unknown robot for LiDAR observation' + + self.scan_space = gym.spaces.Box(low=0.0, + high=1.0, + shape=(self.n_horizontal_rays * self.n_vertical_beams, 1), dtype=np.float32) observation_space['scan'] = self.scan_space if 'rgb_filled' in self.output: # use filler @@ -194,130 +183,209 @@ class NavigateEnv(BaseEnv): self.comp.eval() self.observation_space = gym.spaces.Dict(observation_space) + + def load_action_space(self): + """ + Load action space + """ self.action_space = self.robots[0].action_space - # add visual objects - self.visual_object_at_initial_target_pos = self.config.get('visual_object_at_initial_target_pos', False) + def load_visualization(self): + """ + Load visualization, such as initial and target position, shortest path, etc + """ + if self.mode != 'gui': + return - if self.visual_object_at_initial_target_pos: - cyl_length = 0.2 - self.initial_pos_vis_obj = VisualMarker(visual_shape=p.GEOM_CYLINDER, - rgba_color=[1, 0, 0, 0.3], - radius=self.dist_tol, - length=cyl_length, - initial_offset=[0, 0, cyl_length / 2.0]) - self.target_pos_vis_obj = VisualMarker(visual_shape=p.GEOM_CYLINDER, - rgba_color=[0, 0, 1, 0.3], - radius=self.dist_tol, - length=cyl_length, - initial_offset=[0, 0, cyl_length / 2.0]) - self.initial_pos_vis_obj.load() - if self.config.get('target_visual_object_visible_to_agent', False): - self.simulator.import_object(self.target_pos_vis_obj, class_id=255) - else: - self.target_pos_vis_obj.load() + cyl_length = 0.2 + self.initial_pos_vis_obj = VisualMarker(visual_shape=p.GEOM_CYLINDER, + rgba_color=[1, 0, 0, 0.3], + radius=self.dist_tol, + length=cyl_length, + initial_offset=[0, 0, cyl_length / 2.0]) + self.target_pos_vis_obj = VisualMarker(visual_shape=p.GEOM_CYLINDER, + rgba_color=[0, 0, 1, 0.3], + radius=self.dist_tol, + length=cyl_length, + initial_offset=[0, 0, cyl_length / 2.0]) + self.initial_pos_vis_obj.load() + self.target_pos_vis_obj.load() + + if self.scene.build_graph: + self.num_waypoints_vis = 250 + self.waypoints_vis = [VisualMarker(visual_shape=p.GEOM_CYLINDER, + rgba_color=[0, 1, 0, 0.3], + radius=0.1, + length=cyl_length, + initial_offset=[0, 0, cyl_length / 2.0]) + for _ in range(self.num_waypoints_vis)] + for waypoint in self.waypoints_vis: + waypoint.load() + + def load_miscellaneous_variables(self): + """ + Load miscellaneous variables for book keeping + """ + self.current_step = 0 + self.collision_step = 0 + self.current_episode = 0 + self.floor_num = None + # self.path_length = 0.0 + # self.agent_trajectory = [] + # self.stage = None + # self.floor_num = None + # self.num_object_classes = None + # self.interactive_objects = [] + + def load(self): + """ + Load navigation environment + """ + super(NavigateEnv, self).load() + self.load_task_setup() + self.load_observation_space() + self.load_action_space() + self.load_visualization() + self.load_miscellaneous_variables() + + def global_to_local(self, pos): + """ + Convert a 3D point in global frame to agent's local frame + :param pos: a 3D point in global frame + :return: the same 3D point in agent's local frame + """ + return rotate_vector_3d(pos - self.robots[0].get_position(), *self.robots[0].get_rpy()) def get_additional_states(self): - relative_position = self.target_pos - self.robots[0].get_position() - # rotate relative position back to body point of view - additional_states = rotate_vector_3d(relative_position, *self.robots[0].get_rpy()) - + """ + :return: non-perception observation, such as goal location + """ + additional_states = self.global_to_local(self.target_pos) if self.config['task'] == 'reaching': - end_effector_pos = self.robots[0].get_end_effector_position() - self.robots[0].get_position() - end_effector_pos = rotate_vector_3d(end_effector_pos, *self.robots[0].get_rpy()) - additional_states = np.concatenate((additional_states, end_effector_pos)) + end_effector_pos_local = self.global_to_local(self.robots[0].get_end_effector_position()) + additional_states = np.concatenate((additional_states, end_effector_pos_local)) assert len(additional_states) == self.additional_states_dim, 'additional states dimension mismatch' - return additional_states - def get_auxiliary_sensor(self, collision_links=[]): - return np.array([]) + def add_naive_noise_to_sensor(self, sensor_reading, noise_rate, noise_value=1.0): + """ + Add naive sensor dropout to perceptual sensor, such as RGBD and LiDAR scan + :param sensor_reading: raw sensor reading, range must be between [0.0, 1.0] + :param noise_rate: how much noise to inject, 0.05 means 5% of the data will be replaced with noise_value + :param noise_value: noise_value to overwrite raw sensor reading + :return: sensor reading corrupted with noise + """ + if noise_rate <= 0.0: + return sensor_reading + + assert len(sensor_reading[(sensor_reading < 0.0) | (sensor_reading > 1.0)]) == 0,\ + 'sensor reading has to be between [0.0, 1.0]' + + valid_mask = np.random.choice(2, sensor_reading.shape, p=[noise_rate, 1.0 - noise_rate]) + sensor_reading[valid_mask == 0] = noise_value + return sensor_reading + + def get_depth(self): + """ + :return: depth sensor reading, normalized to [0.0, 1.0] + """ + depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3] + # 0.0 is a special value for invalid entries + depth[depth < self.depth_low] = 0.0 + depth[depth > self.depth_high] = 0.0 + + # re-scale depth to [0.0, 1.0] + depth /= self.depth_high + depth = self.add_naive_noise_to_sensor(depth, self.depth_noise_rate, noise_value=0.0) + + return depth + + def get_rgb(self): + """ + :return: RGB sensor reading, normalized to [0.0, 1.0] + """ + return self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3] + + def get_pc(self): + """ + :return: pointcloud sensor reading + """ + return self.simulator.renderer.render_robot_cameras(modes=('3d'))[0] + + def get_normal(self): + """ + :return: surface normal reading + """ + return self.simulator.renderer.render_robot_cameras(modes='normal') + + def get_seg(self): + """ + :return: semantic segmentation mask, normalized to [0.0, 1.0] + """ + seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1] + if self.num_object_classes is not None: + seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0) + return seg + + def get_scan(self): + """ + :return: LiDAR sensor reading, normalized to [0.0, 1.0] + """ + laser_angular_half_range = self.laser_angular_range / 2.0 + laser_pose = self.robots[0].parts[self.laser_link_name].get_pose() + angle = np.arange(-laser_angular_half_range / 180 * np.pi, + laser_angular_half_range / 180 * np.pi, + self.laser_angular_range / 180.0 * np.pi / self.n_horizontal_rays) + unit_vector_local = np.array([[np.cos(ang), np.sin(ang), 0.0] for ang in angle]) + transform_matrix = quat2mat([laser_pose[6], laser_pose[3], laser_pose[4], laser_pose[5]]) # [x, y, z, w] + unit_vector_world = transform_matrix.dot(unit_vector_local.T).T + + start_pose = np.tile(laser_pose[:3], (self.n_horizontal_rays, 1)) + start_pose += unit_vector_world * self.min_laser_dist + end_pose = laser_pose[:3] + unit_vector_world * self.laser_linear_range + results = p.rayTestBatch(start_pose, end_pose, 6) # numThreads = 6 + + hit_fraction = np.array([item[2] for item in results]) # hit fraction = [0.0, 1.0] of self.laser_linear_range + hit_fraction = self.add_naive_noise_to_sensor(hit_fraction, self.scan_noise_rate) + scan = np.expand_dims(hit_fraction, 1) + return scan def get_state(self, collision_links=[]): - # calculate state - sensor_state = self.get_additional_states() - auxiliary_sensor = self.get_auxiliary_sensor(collision_links) - - # rgb = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3] - # rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) - # depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3] - # depth = np.clip(depth / 5.0, 0.0, 1.0) - # depth = 1.0 - depth # flip black/white - # seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1] - # if self.num_object_classes is not None: - # seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0) - # cv2.imshow('rgb', rgb) - # cv2.imshow('depth', depth) - # cv2.imshow('seg', seg) - + """ + :param collision_links: collisions from last time step + :return: observation as a dictionary + """ state = OrderedDict() if 'sensor' in self.output: - state['sensor'] = sensor_state - if 'auxiliary_sensor' in self.output: - state['auxiliary_sensor'] = auxiliary_sensor - if 'pointgoal' in self.output: - state['pointgoal'] = sensor_state[:2] + state['sensor'] = self.get_additional_states() if 'rgb' in self.output: - state['rgb'] = self.simulator.renderer.render_robot_cameras(modes=('rgb'))[0][:, :, :3] + state['rgb'] = self.get_rgb() if 'depth' in self.output: - depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3] - state['depth'] = depth + state['depth'] = self.get_depth() + if 'pc' in self.output: + state['pc'] = self.get_pc() + if 'rgbd' in self.output: + rgb = self.get_rgb() + depth = self.get_depth() + state['rgbd'] = np.concatenate((rgb, depth), axis=2) if 'normal' in self.output: - state['normal'] = self.simulator.renderer.render_robot_cameras(modes='normal') + state['normal'] = self.get_normal() if 'seg' in self.output: - seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1] - if self.num_object_classes is not None: - seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0) - state['seg'] = seg - if 'depth_seg' in self.output: - depth = -self.simulator.renderer.render_robot_cameras(modes=('3d'))[0][:, :, 2:3] - depth = np.clip(depth / 5.0, 0.0, 1.0) - seg = self.simulator.renderer.render_robot_cameras(modes='seg')[0][:, :, 0:1] - if self.num_object_classes is not None: - seg = np.clip(seg * 255.0 / self.num_object_classes, 0.0, 1.0) - depth_seg = np.concatenate((depth, seg), axis=2) - state['depth_seg'] = depth_seg + state['seg'] = self.get_seg() if 'rgb_filled' in self.output: with torch.no_grad(): tensor = transforms.ToTensor()((state['rgb'] * 255).astype(np.uint8)).cuda() rgb_filled = self.comp(tensor[None, :, :, :])[0].permute(1, 2, 0).cpu().numpy() state['rgb_filled'] = rgb_filled - - if 'pointgoal' in self.output: - state['pointgoal'] = sensor_state[:2] - - # TODO: figure out why 'scan' consumes so much cpu if 'scan' in self.output: - assert 'scan_link' in self.robots[0].parts, "Requested scan but no scan_link" - pose_camera = self.robots[0].parts['scan_link'].get_pose() - angle = np.arange(0, 2 * np.pi, 2 * np.pi / float(self.n_horizontal_rays)) - elev_bottom_angle = -30. * np.pi / 180. - elev_top_angle = 10. * np.pi / 180. - elev_angle = np.arange(elev_bottom_angle, elev_top_angle, - (elev_top_angle - elev_bottom_angle) / float(self.n_vertical_beams)) - orig_offset = np.vstack([ - np.vstack([np.cos(angle), - np.sin(angle), - np.repeat(np.tan(elev_ang), angle.shape)]).T for elev_ang in elev_angle - ]) - transform_matrix = quat2mat([pose_camera[-1], pose_camera[3], pose_camera[4], pose_camera[5]]) - offset = orig_offset.dot(np.linalg.inv(transform_matrix)) - pose_camera = pose_camera[None, :3].repeat(self.n_horizontal_rays * self.n_vertical_beams, axis=0) - - results = p.rayTestBatch(pose_camera, pose_camera + offset * 30) - hit = np.array([item[0] for item in results]) - dist = np.array([item[2] for item in results]) - - valid_pts = (dist < 1. - 1e-5) & (dist > 0.1 / 30) & (hit != self.robots[0].robot_ids[0]) & (hit != -1) - dist[~valid_pts] = 0.0 # zero out invalid pts - dist *= 30 - - xyz = np.expand_dims(dist, 1) * orig_offset - state['scan'] = xyz - + state['scan'] = self.get_scan() return state def run_simulation(self): + """ + Run simulation for one action timestep (simulator_loop physics timestep) + :return: collisions from this simulation + """ collision_links = [] for _ in range(self.simulator_loop): self.simulator_step() @@ -325,46 +393,87 @@ class NavigateEnv(BaseEnv): return self.filter_collision_links(collision_links) def filter_collision_links(self, collision_links): - return [[item for item in collision_per_sim_step - if item[2] not in self.collision_ignore_body_b_ids and - item[3] not in self.collision_ignore_link_a_ids] - for collision_per_sim_step in collision_links] + """ + Filter out collisions that should be ignored + :param collision_links: original collisions, a list of lists of collisions + :return: filtered collisions + """ + new_collision_links = [] + for collision_per_sim_step in collision_links: + new_collision_per_sim_step = [] + for item in collision_per_sim_step: + # ignore collision with body b + if item[2] in self.collision_ignore_body_b_ids: + continue + + # ignore collision with robot link a + if item[3] in self.collision_ignore_link_a_ids: + continue + + # ignore self collision with robot link a (body b is also robot itself) + if item[2] == self.robots[0].robot_ids[0] and item[4] in self.collision_ignore_link_a_ids: + continue + + new_collision_per_sim_step.append(item) + new_collision_links.append(new_collision_per_sim_step) + return new_collision_links def get_position_of_interest(self): + """ + Get position of interest. + :return: If pointgoal task, return base position. If reaching task, return end effector position. + """ if self.config['task'] == 'pointgoal': return self.robots[0].get_position() elif self.config['task'] == 'reaching': return self.robots[0].get_end_effector_position() - def get_potential(self): - return l2_distance(self.target_pos, self.get_position_of_interest()) + def get_shortest_path(self, from_initial_pos=False, entire_path=False): + """ + :param from_initial_pos: whether source is initial position rather than current position + :param entire_path: whether to return the entire shortest path + :return: shortest path and geodesic distance to the target position + """ + if from_initial_pos: + source = self.initial_pos[:2] + else: + source = self.robots[0].get_position()[:2] + target = self.target_pos[:2] + return self.scene.get_shortest_path(self.floor_num, source, target, entire_path=entire_path) + + def get_geodesic_potential(self): + """ + :return: geodesic distance to the target position + """ + _, geodesic_dist = self.get_shortest_path() + return geodesic_dist def get_l2_potential(self): + """ + :return: L2 distance to the target position + """ return l2_distance(self.target_pos, self.get_position_of_interest()) def get_reward(self, collision_links=[], action=None, info={}): + """ + :param collision_links: collisions from last time step + :param action: last action + :param info: a dictionary to store additional info + :return: reward, info + """ collision_links_flatten = [item for sublist in collision_links for item in sublist] reward = self.slack_reward # |slack_reward| = 0.01 per step if self.reward_type == 'l2': new_potential = self.get_l2_potential() - elif self.reward_type == 'dense': - new_potential = self.get_potential() + elif self.reward_type == 'geodesic': + new_potential = self.get_geodesic_potential() potential_reward = self.potential - new_potential reward += potential_reward * self.potential_reward_weight # |potential_reward| ~= 0.1 per step self.potential = new_potential - # electricity_reward = np.abs(self.robots[0].joint_speeds * self.robots[0].joint_torque).mean().item() - electricity_reward = 0.0 - reward += electricity_reward * self.electricity_reward_weight # |electricity_reward| ~= 0.05 per step - - # stall_torque_reward = np.square(self.robots[0].joint_torque).mean() - stall_torque_reward = 0.0 - reward += stall_torque_reward * self.stall_torque_reward_weight # |stall_torque_reward| ~= 0.05 per step - collision_reward = float(len(collision_links_flatten) > 0) - self.collision_step += int(len(collision_links_flatten) > 0) - info['collision_reward'] = collision_reward * self.collision_reward_weight # expose collision reward to info + self.collision_step += int(collision_reward) reward += collision_reward * self.collision_reward_weight # |collision_reward| ~= 1.0 per step if collision # goal reached @@ -374,15 +483,12 @@ class NavigateEnv(BaseEnv): return reward, info def get_termination(self, collision_links=[], info={}): + """ + :param collision_links: collisions from last time step + :param info: a dictionary to store additional info + :return: done, info + """ done = False - # for item in collision_links_flatten: - # if item[9] > 500: - # print("collision between " + self.id_to_name[self.robots[0].robot_ids[0]]["links"][item[3]] - # + " and " + self.id_to_name[item[2]]["links"][item[4]]) - - # door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0] - # max_force = max([item[9] for item in collision_links_flatten]) if len(collision_links_flatten) > 0 else 0 - floor_height = 0.0 if self.floor_num is None else self.scene.get_floor_height(self.floor_num) if l2_distance(self.target_pos, self.get_position_of_interest()) < self.dist_tol: print("GOAL") @@ -390,7 +496,7 @@ class NavigateEnv(BaseEnv): info['success'] = True elif self.robots[0].get_position()[2] > floor_height + self.death_z_thresh: - print("DEATH") + print("FLIP OVER") done = True info['success'] = False @@ -398,83 +504,49 @@ class NavigateEnv(BaseEnv): done = True info['success'] = False - # elif door_angle < (-10.0 / 180.0 * np.pi): - # print("WRONG PUSH") - - # elif max_force > 500: - # print("TOO MUCH FORCE") - # done = True - # info['success'] = False - if done: info['episode_length'] = self.current_step info['path_length'] = self.path_length info['collision_step'] = self.collision_step - info['energy_cost'] = self.energy_cost - info['stage'] = self.stage - - # Episode = collections.namedtuple('Episode', - # ['initial_pos', - # 'target_pos', - # 'geodesic_distance', - # 'shortest_path', - # 'trajectory', - # 'success', - # 'path_efficiency', - # 'kinematic_disturbance', - # 'dynamic_disturbance_a', - # 'dynamic_disturbance_b', - # 'collision_step', - # ]) - - # shortest_path, geodesic_distance = self.scene.get_shortest_path(self.floor_num, - # self.initial_pos[:2], - # self.target_pos[:2], - # entire_path=True) - # floor_height = self.scene.get_floor_height(self.floor_num) - # shortest_path = np.array([np.array([path[0], path[1], floor_height]) for path in shortest_path]) - # min_kin_dist = self.path_length * self.robots[0].robot_mass - # kinematic_disturbance = min_kin_dist / (min_kin_dist + self.kinematic_disturbance) - # min_dyn_dist = self.current_step * self.robots[0].robot_mass * 9.8 - # dynamic_disturbance_a = min_dyn_dist / (min_dyn_dist + self.dynamic_disturbance_a) - # dynamic_disturbance_b = self.current_step / float(self.current_step + self.dynamic_disturbance_b) - # object_files = [obj.filename for obj in self.interactive_objects] - # episode = Episode( - # env=self.scene.model_id, - # agent=self.robots[0].model_file, - # initial_pos=self.initial_pos, - # target_pos=self.target_pos, - # geodesic_distance=geodesic_distance, - # shortest_path=shortest_path, - # agent_trajectory=np.array(self.agent_trajectory), - # object_files=object_files, - # object_trajectory=np.array(self.object_trajectory), - # success=float(info['success']), - # path_efficiency=min(1.0, geodesic_distance / self.path_length), - # kinematic_disturbance=kinematic_disturbance, - # dynamic_disturbance_a=dynamic_disturbance_a, - # dynamic_disturbance_b=dynamic_disturbance_b, - # collision_step=self.collision_step, - # ) - # self.stored_episodes.append(episode) return done, info - def get_stored_episodes(self): - return self.stored_episodes - def before_simulation(self): + """ + Additional steps before physics simulation. Overwritten by subclasses. + :return: cache + """ return None def after_simulation(self, cache, collision_links): + """ + :param cache: cache returned from before_simulation + :param collision_links: collisions from last time step + """ return + def step_visualization(self): + if self.mode != 'gui': + return + + self.initial_pos_vis_obj.set_position(self.initial_pos) + self.target_pos_vis_obj.set_position(self.target_pos) + + shortest_path, _ = self.get_shortest_path(entire_path=True) + floor_height = 0.0 if self.floor_num is None else self.scene.get_floor_height(self.floor_num) + num_nodes = min(self.num_waypoints_vis, shortest_path.shape[0]) + for i in range(num_nodes): + self.waypoints_vis[i].set_position(pos=np.array([shortest_path[i][0], + shortest_path[i][1], + floor_height])) + for i in range(num_nodes, self.num_waypoints_vis): + self.waypoints_vis[i].set_position(pos=np.array([0.0, 0.0, 100.0])) + def step(self, action): """ - apply robot's action and get state, reward, done and info, following openAI gym's convention - + apply robot's action and get state, reward, done and info, following PpenAI gym's convention :param action: a list of control signals - :return: state: state, reward, done, info + :return: state, reward, done, info """ self.current_step += 1 self.robots[0].apply_action(action) @@ -486,6 +558,7 @@ class NavigateEnv(BaseEnv): info = {} reward, info = self.get_reward(collision_links, action, info) done, info = self.get_termination(collision_links, info) + self.step_visualization() if done and self.automatic_reset: info['last_observation'] = state @@ -493,52 +566,55 @@ class NavigateEnv(BaseEnv): return state, reward, done, info def reset_agent(self): + """ + Reset the robot's joint configuration and base pose until no collision + """ max_trials = 100 for _ in range(max_trials): self.robots[0].robot_specific_reset() self.reset_initial_and_target_pos() if self.test_valid_position(): return - raise Exception("Failed to reset robot without collision") + print("WARNING: Failed to reset robot without collision") def reset_initial_and_target_pos(self): + """ + Reset the robot's base pose + """ self.robots[0].set_position(pos=self.initial_pos) self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(*self.initial_orn), 'wxyz')) def test_valid_position(self): + """ + :return: if initialization is valid (collision-free) + """ + self.robots[0].keep_still() collision_links = self.run_simulation() collision_links_flatten = [item for sublist in collision_links for item in sublist] return len(collision_links_flatten) == 0 - def reset(self): + def reset_variables(self): """ - Reset the agent to a collision-free start point + Reset bookkeeping variables for the next new episode """ - self.current_episode += 1 - self.reset_agent() - state = self.get_state() - - if self.reward_type == 'l2': - self.potential = self.get_l2_potential() - elif self.reward_type == 'dense': - self.potential = self.get_potential() - self.current_step = 0 self.collision_step = 0 - self.kinematic_disturbance = 0.0 - self.dynamic_disturbance_a = 0.0 - self.dynamic_disturbance_b = 0.0 self.path_length = 0.0 - self.agent_trajectory = [] - self.object_trajectory = [] - self.interactive_objects_collided = set() - self.energy_cost = 0.0 - # set position for visual objects - if self.visual_object_at_initial_target_pos: - self.initial_pos_vis_obj.set_position(self.initial_pos) - self.target_pos_vis_obj.set_position(self.target_pos) + def reset(self): + """ + Reset episode + """ + self.reset_agent() + self.simulator.sync() + state = self.get_state() + if self.reward_type == 'l2': + self.potential = self.get_l2_potential() + elif self.reward_type == 'geodesic': + self.potential = self.get_geodesic_potential() + self.reset_variables() + self.step_visualization() return state @@ -555,6 +631,16 @@ class NavigateRandomEnv(NavigateEnv): random_height=False, device_idx=0, ): + """ + :param config_file: config_file path + :param model_id: override model_id in config file + :param mode: headless or gui mode + :param action_timestep: environment executes action per action_timestep second + :param physics_timestep: physics timestep for pybullet + :param automatic_reset: whether to automatic reset after an episode finishes + :param random_height: whether to randomize height for target position (for reaching task) + :param device_idx: device_idx: which GPU to run the simulation and rendering on + """ super(NavigateRandomEnv, self).__init__(config_file, model_id=model_id, mode=mode, @@ -563,175 +649,122 @@ class NavigateRandomEnv(NavigateEnv): automatic_reset=automatic_reset, device_idx=device_idx) self.random_height = random_height - self.random_init_z_offset = self.config.get('random_init_z_offset', 0.1) + + self.target_dist_min = self.config.get('target_dist_min', 1.0) + self.target_dist_max = self.config.get('target_dist_max', 10.0) + + self.initial_pos_z_offset = self.config.get('initial_pos_z_offset', 0.1) + falling_distance_in_one_action_step = 0.5 * 9.8 * action_timestep * action_timestep + assert self.initial_pos_z_offset > falling_distance_in_one_action_step * 1.5,\ + 'initial_pos_z_offset for robot and objects should be larger than the falling distance in one action step' def reset_initial_and_target_pos(self): - floor, self.initial_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height) + """ + Reset the robot's base to a random empty position on a random floor with a random orientation + The geodesic distance (or L2 distance if traversable map graph is not built) + has to be between [self.target_dist_min, self.target_dist_max] + """ + _, self.initial_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height) max_trials = 100 dist = 0.0 for _ in range(max_trials): # if initial and target positions are < 1 meter away from each other, reinitialize _, self.target_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height) - dist = l2_distance(self.initial_pos, self.target_pos) - if dist > 1.0: + if self.scene.build_graph: + _, dist = self.get_shortest_path(from_initial_pos=True) + else: + dist = l2_distance(self.initial_pos, self.target_pos) + if self.target_dist_min < dist < self.target_dist_max: break - if dist < 1.0: - raise Exception("Failed to find initial and target pos that are >1m apart") + if not (self.target_dist_min < dist < self.target_dist_max): + print("WARNING: Failed to sample initial and target positions") + self.robots[0].set_position(pos=[self.initial_pos[0], self.initial_pos[1], - self.initial_pos[2] + self.random_init_z_offset]) + self.initial_pos[2] + self.initial_pos_z_offset]) self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.random.uniform(0, np.pi * 2)), 'wxyz')) def reset(self): + """ + Reset episode + """ self.floor_num = self.scene.get_random_floor() + # reset "virtual floor" to the correct height self.scene.reset_floor(floor=self.floor_num, additional_elevation=0.02) state = super(NavigateRandomEnv, self).reset() return state -class InteractiveGibsonNavigateEnv(NavigateRandomEnv): +class NavigateRandomEnvSim2Real(NavigateRandomEnv): def __init__(self, config_file, model_id=None, - collision_reward_weight=0.0, mode='headless', action_timestep=1 / 10.0, physics_timestep=1 / 240.0, device_idx=0, automatic_reset=False, + collision_reward_weight=0.0, + track='static' ): - super(InteractiveGibsonNavigateEnv, self).__init__(config_file, - model_id=model_id, - mode=mode, - action_timestep=action_timestep, - physics_timestep=physics_timestep, - automatic_reset=automatic_reset, - random_height=False, - device_idx=device_idx) + super(NavigateRandomEnvSim2Real, self).__init__(config_file, + model_id=model_id, + mode=mode, + action_timestep=action_timestep, + physics_timestep=physics_timestep, + automatic_reset=automatic_reset, + random_height=False, + device_idx=device_idx) self.collision_reward_weight = collision_reward_weight + assert track in ['static', 'interactive', 'dynamic'], 'unknown track' + self.track = track - self.replaced_objects = [] - self.replaced_objects_pos = [] - self.additional_objects = [] - self.class_map = {} + if self.track == 'interactive': + self.interactive_objects_num_dups = 2 + self.interactive_objects = self.load_interactive_objects() + elif self.track == 'dynamic': + self.num_dynamic_objects = 1 + self.dynamic_objects = [] + self.dynamic_objects_last_actions = [] + for _ in range(self.num_dynamic_objects): + robot = Turtlebot(self.config) + self.simulator.import_robot(robot, class_id=1) + self.dynamic_objects.append(robot) + self.dynamic_objects_last_actions.append(robot.action_space.sample()) - self.should_load_replaced_objects = self.config.get('should_load_replaced_objects', False) - self.should_load_additional_objects = self.config.get('should_load_additional_objects', False) + # dynamic objects will repeat their actions for 10 action timesteps + self.dynamic_objects_action_repeat = 10 - self.build_class_map() - self.load_replaced_objects() - self.load_additional_objects() - self.interactive_objects = self.replaced_objects + self.additional_objects - self.new_potential = None + def load_interactive_objects(self): + """ + Load interactive objects + :return: a list of interactive objects + """ + interactive_objects = [] + interactive_objects_path = [ + 'object_2eZY2JqYPQE.urdf', + 'object_lGzQi2Pk5uC.urdf', + 'object_ZU6u5fvE8Z1.urdf', + 'object_H3ygj6efM8V.urdf', + 'object_RcqC01G24pR.urdf' + ] - self.visualize_waypoints = True - if self.visualize_waypoints and self.mode == 'gui': - cyl_length = 0.2 - self.waypoints_vis = [VisualMarker(visual_shape=p.GEOM_CYLINDER, - rgba_color=[0, 1, 0, 0.3], - radius=0.1, - length=cyl_length, - initial_offset=[0, 0, cyl_length / 2.0]) for _ in range(10)] - for waypoint in self.waypoints_vis: - waypoint.load() - - def build_class_map(self): - # 0 and 1 are reserved for mesh and robot - init_class_id = 2 - - if self.should_load_replaced_objects: - self.replaced_object_classes = [ - '03001627', # chair - '04256520', # couch - '04379243', # table / desk - '00000001', # door - ] - for item in self.replaced_object_classes: - self.class_map[item] = init_class_id - init_class_id += 1 - - if self.should_load_additional_objects: - self.additional_objects_path = [ - 'object_2eZY2JqYPQE.urdf', - 'object_lGzQi2Pk5uC.urdf', - 'object_ZU6u5fvE8Z1.urdf', - 'object_H3ygj6efM8V.urdf', - 'object_RcqC01G24pR.urdf' - ] - - for item in self.additional_objects_path: - self.class_map[item] = init_class_id - init_class_id += 1 - - self.num_object_classes = init_class_id - - def load_replaced_objects(self): - if not self.should_load_replaced_objects: - return - scene_path = os.path.join(gibson2.assets_path, 'dataset', self.scene.model_id) - urdf_files = [item for item in os.listdir(scene_path) if item[-4:] == 'urdf'] - position_files = [item[:-4].replace('alignment_centered', 'pos') + 'txt' for item in urdf_files] - - for urdf_file, position_file in zip(urdf_files, position_files): - with open(os.path.join(scene_path, position_file)) as f: - pos = np.array([float(item) for item in f.readlines()[0].strip().split()]) - # filter out duplicate annotations for the same object - if len(self.replaced_objects_pos) == 0 or \ - np.min(np.linalg.norm(np.array(self.replaced_objects_pos) - pos, axis=1)) > 0.5: - class_id = urdf_file.split('.')[0].split('_')[-1] - obj = InteractiveObj(os.path.join(scene_path, urdf_file)) - self.simulator.import_object(obj, class_id=self.class_map[class_id]) - self.replaced_objects.append(obj) - self.replaced_objects_pos.append(pos) - - def load_additional_objects(self): - if not self.should_load_additional_objects: - return - num_dupicates = 2 - for _ in range(num_dupicates): - for urdf_model in self.additional_objects_path: + for _ in range(self.interactive_objects_num_dups): + for urdf_model in interactive_objects_path: obj = InteractiveObj(os.path.join(gibson2.assets_path, 'models/sample_urdfs', urdf_model)) - self.simulator.import_object(obj, class_id=self.class_map[urdf_model]) - self.additional_objects.append(obj) + self.simulator.import_object(obj, class_id=2) + interactive_objects.append(obj) + return interactive_objects - def global_to_local(self, pos): - return rotate_vector_3d(pos - self.robots[0].get_position(), *self.robots[0].get_rpy()) - - def get_additional_states(self): - target_pos_local = self.global_to_local(self.target_pos) - linear_velocity_local = rotate_vector_3d(self.robots[0].robot_body.velocity(), - *self.robots[0].get_rpy()) - angular_velocity_local = rotate_vector_3d(self.robots[0].robot_body.angular_velocity(), - *self.robots[0].get_rpy()) - - source = self.robots[0].get_position()[:2] - target = self.target_pos[:2] - shortest_path, geodesic_dist = self.scene.get_shortest_path(self.floor_num, source, target) - - robot_z = self.robots[0].get_position()[2] - if self.visualize_waypoints and self.mode == 'gui': - for i in range(10): - self.waypoints_vis[i].set_position(pos=np.array([shortest_path[i][0], shortest_path[i][1], robot_z])) - waypoints_local_xy = np.array([self.global_to_local(np.concatenate((waypoint, [robot_z])))[:2] - for waypoint in shortest_path]).flatten() - additional_states = np.concatenate((waypoints_local_xy, - target_pos_local, - linear_velocity_local, - angular_velocity_local)) - # cache results for reward calculation - self.new_potential = geodesic_dist - - assert len(additional_states) == self.additional_states_dim, 'additional states dimension mismatch' - return additional_states - - def get_potential(self): - return self.new_potential - - def reset_additional_objects(self): - for obj in self.additional_objects: - while True: + def reset_interactive_objects(self): + """ + Reset the poses of interactive objects to have no collisions with the scene mesh + """ + max_trials = 100 + for obj in self.interactive_objects: + for _ in range(max_trials): _, pos = self.scene.get_random_point_floor(self.floor_num, self.random_height) - obj.set_position_rotation([pos[0], pos[1], pos[2] + self.random_init_z_offset], - quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz')) + obj.set_position_orientation([pos[0], pos[1], pos[2] + self.initial_pos_z_offset], + quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz')) has_collision = False for _ in range(self.simulator_loop): self.simulator_step() @@ -741,567 +774,67 @@ class InteractiveGibsonNavigateEnv(NavigateRandomEnv): if not has_collision: break - def reset_replaced_objects(self): - for obj, pos in zip(self.replaced_objects, self.replaced_objects_pos): - obj.set_position_rotation([pos[0], pos[1], pos[2] + self.random_init_z_offset], - quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz')) + def reset_dynamic_objects(self): + """ + Reset the poses of dynamic objects to have no collisions with the scene mesh + """ + max_trials = 100 + shortest_path, _ = self.get_shortest_path(entire_path=True) + floor_height = 0.0 if self.floor_num is None else self.scene.get_floor_height(self.floor_num) + for obj in self.dynamic_objects: + for _ in range(max_trials): + obj.robot_specific_reset() + pos = shortest_path[np.random.choice(shortest_path.shape[0])] + obj.set_position_orientation([pos[0], pos[1], floor_height + self.initial_pos_z_offset], + quatToXYZW(euler2quat(0.0, 0.0, 0.0), 'wxyz')) + has_collision = False + for _ in range(self.simulator_loop): + self.simulator_step() + if len(p.getContactPoints(bodyA=obj.robot_ids[0])) > 0: + has_collision = True + break + if not has_collision: + break - def reset(self): - self.floor_num = self.scene.get_random_floor() - self.scene.reset_floor(floor=self.floor_num, additional_elevation=0.05) - self.reset_replaced_objects() - self.reset_additional_objects() - - self.new_potential = None - state = NavigateEnv.reset(self) - return state - - def before_simulation(self): - robot_position = self.robots[0].get_position() - object_positions = [obj.get_position() for obj in self.interactive_objects] - return robot_position, object_positions - - def after_simulation(self, cache, collision_links): - robot_position, object_positions = cache - - collision_links_flatten = [item for sublist in collision_links for item in sublist] - if len(collision_links_flatten) > 0: - self.dynamic_disturbance_a += np.mean([ - np.linalg.norm( - np.sum([elem[9] * np.array(elem[7]) for elem in sublist], axis=0) # sum of all forces - ) - for sublist in collision_links]) - collision_objects = set([col[2] for col in collision_links_flatten]) - self.dynamic_disturbance_b += len(collision_objects) - self.interactive_objects_collided |= collision_objects - - self.agent_trajectory.append(np.concatenate((self.robots[0].get_position(), self.robots[0].get_orientation()))) - self.object_trajectory.append([np.concatenate((obj.get_position(), obj.get_orientation())) - for obj in self.interactive_objects]) - self.path_length += np.linalg.norm(self.robots[0].get_position() - robot_position) - self.kinematic_disturbance += np.sum([ - obj.mass * np.linalg.norm(np.array(obj.get_position()) - np.array(prev_pos)) - for obj, prev_pos in zip(self.interactive_objects, object_positions) - if obj.body_id in self.interactive_objects_collided - ]) - - -class InteractiveNavigateEnv(NavigateEnv): - def __init__(self, - config_file, - mode='headless', - action_timestep=1 / 10.0, - physics_timestep=1 / 240.0, - random_position=False, - device_idx=0, - automatic_reset=False, - arena="simple_hl_ll"): - super(InteractiveNavigateEnv, self).__init__(config_file, - mode=mode, - action_timestep=action_timestep, - physics_timestep=physics_timestep, - automatic_reset=automatic_reset, - device_idx=device_idx) - - self.arena = arena - assert self.arena in [ - "only_ll_obstacles", - "only_ll", - "simple_hl_ll", - "complex_hl_ll" - ], "Wrong arena" - - self.floor = VisualMarker(visual_shape=p.GEOM_BOX, rgba_color=[0.643, 0.643, 0.788, 0.0], - half_extents=[20, 20, 0.02], initial_offset=[0, 0, -0.03]) - self.floor.load() - self.floor.set_position([0, 0, 0]) - self.simulator.import_object(self.floor, class_id=0) - - self.door = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'realdoor.urdf'), - scale=1.35) - self.simulator.import_interactive_object(self.door, class_id=2) - if self.arena == "only_ll" or self.arena == "only_ll_obstacles": - self.door.set_position_rotation([100.0, 100.0, -0.03], quatToXYZW(euler2quat(0, 0, np.pi / 2.0), 'wxyz')) - else: - self.door.set_position_rotation([0.0, 0.0, -0.03], quatToXYZW(euler2quat(0, 0, -np.pi / 2.0), 'wxyz')) - self.door_angle = self.config.get('door_angle', 90) - self.door_angle = (self.door_angle / 180.0) * np.pi - self.door_handle_link_id = 2 - self.door_axis_link_id = 1 - self.jr_end_effector_link_id = 33 # 'm1n6s200_end_effector' - self.random_position = random_position - - if self.arena == "only_ll_obstacles": - self.box_poses = [ - [[np.random.uniform(-4, 4), np.random.uniform(-4, -1), 1], [0, 0, 0, 1]], - [[np.random.uniform(-4, 4), np.random.uniform(-4, -1), 1], [0, 0, 0, 1]], - [[np.random.uniform(-4, 4), np.random.uniform(-4, -1), 1], [0, 0, 0, 1]], - - [[np.random.uniform(-4, 4), np.random.uniform(1, 4), 1], [0, 0, 0, 1]], - [[np.random.uniform(-4, 4), np.random.uniform(1, 4), 1], [0, 0, 0, 1]], - [[np.random.uniform(-4, 4), np.random.uniform(1, 4), 1], [0, 0, 0, 1]], - - [[np.random.uniform(-4, -1), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]], - [[np.random.uniform(-4, -1), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]], - [[np.random.uniform(-4, -1), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]], - - [[np.random.uniform(1, 4), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]], - [[np.random.uniform(1, 4), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]], - [[np.random.uniform(1, 4), np.random.uniform(-4, 4), 1], [0, 0, 0, 1]], - ] - - self.walls = [] - for box_pose in self.box_poses: - box = BoxShape(pos=box_pose[0], dim=[0.5, 0.5, 1]) - self.simulator.import_object(box, class_id=3) - self.walls += [box] - - elif self.arena == "only_ll": - self.wall_poses = [ - [[0, -3, 1], [0, 0, 0, 1]], - [[0, 3, 1], [0, 0, 0, 1]], - [[-3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]], - [[3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]], - ] - - self.walls = [] - for wall_pose in self.wall_poses: - wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'), - scale=1) - self.simulator.import_object(wall, class_id=3) - wall.set_position_rotation(wall_pose[0], wall_pose[1]) - self.walls += [wall] - - elif self.arena == "simple_hl_ll": - self.wall_poses = [ - [[0, -3, 1], [0, 0, 0, 1]], - [[0, 3, 1], [0, 0, 0, 1]], - [[-3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]], - [[3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]], - [[0, -7.8, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]], - [[0, 7.8, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]], - ] - - self.walls = [] - for wall_pose in self.wall_poses: - wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'), - scale=1) - self.simulator.import_object(wall, class_id=3) - wall.set_position_rotation(wall_pose[0], wall_pose[1]) - self.walls += [wall] - - elif self.arena == "complex_hl_ll": - self.wall_poses = [ - [[0, -3, 1], [0, 0, 0, 1]], - [[0, 6, 1], [0, 0, 0, 1]], - [[-3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]], - [[3, 0, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]] - ] - - self.half_wall_poses = [ - [[1.3, 3, 1], [0, 0, 0, 1]], - ] - - self.quarter_wall_poses = [ - [[0.0, 7.68, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]], - [[0.0, 2.45, 1], [0, 0, np.sqrt(0.5), np.sqrt(0.5)]], - ] - - self.walls = [] - for wall_pose in self.wall_poses: - wall = InteractiveObj(os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls.urdf'), - scale=1) - self.simulator.import_object(wall, class_id=3) - wall.set_position_rotation(wall_pose[0], wall_pose[1]) - self.walls += [wall] - - for wall_pose in self.half_wall_poses: - wall = InteractiveObj( - os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls_half.urdf'), - scale=1) - self.simulator.import_object(wall, class_id=3) - wall.set_position_rotation(wall_pose[0], wall_pose[1]) - self.walls += [wall] - - for wall_pose in self.quarter_wall_poses: - wall = InteractiveObj( - os.path.join(gibson2.assets_path, 'models', 'scene_components', 'walls_quarter.urdf'), - scale=1) - self.simulator.import_object(wall, class_id=3) - wall.set_position_rotation(wall_pose[0], wall_pose[1]) - self.walls += [wall] - - # dense reward - self.stage = 0 - self.prev_stage = self.stage - self.stage_get_to_door_handle = 0 - self.stage_open_door = 1 - self.stage_get_to_target_pos = 2 - - # attaching JR's arm to the door handle - self.door_handle_dist_thresh = 0.2 - self.cid = None - - # visualize subgoal - cyl_length = 3.0 - self.subgoal_end_effector = VisualMarker(rgba_color=[0, 0, 0, 0.8], radius=0.06) - self.subgoal_end_effector.load() - self.subgoal_end_effector_base = VisualMarker(visual_shape=p.GEOM_CYLINDER, - rgba_color=[1, 1, 0, 0.8], - radius=0.05, - length=cyl_length, - initial_offset=[0, 0, cyl_length / 2]) - self.subgoal_end_effector_base.load() - - self.door_handle_vis = VisualMarker(rgba_color=[1, 0, 0, 0.0], radius=self.door_handle_dist_thresh) - self.door_handle_vis.load() - - # TODO: move robot joint id and name mapping to robot_locomotors.py - self.id_to_name = { - 0: {"name": "ground", "links": {-1: "base", 0: "ground"}}, - 1: {"name": "ground", "links": {-1: "base", 0: "ground"}}, - 2: {"name": "ground", "links": {-1: "base", 0: "ground"}}, - 3: {"name": "ground", "links": {-1: "base", 0: "ground"}}, - } - self.id_to_name[self.door.body_id] = {"name": "door", - "links": {-1: "world", 0: "base", 1: "door_leaf", 2: "door_knob"}} - for i, wall in enumerate(self.walls): - self.id_to_name[wall.body_id] = {"name": "wall%d" % (i + 1), "links": {-1: "world", 0: "wall"}} - self.id_to_name[self.robots[0].robot_ids[0]] = {"name": "robot", "links": { - -1: "base", - 0: "base_chassis", - 1: "jr2_fixed_body (wrapper)", - 2: "left wheel", - 3: "right wheel", - 4: "front_caster_pivot", - 5: "front_caster_wheel", - 6: "rear_caster_pivot", - 7: "rear_caster_wheel", - 8: "ext_imu_frame", - 9: "rear_laser", - 10: "front_laser", - 11: "lower_velodyne_frame", - 12: "occam", - 13: "occam_omni_optical", - 14: "upper_velodyne_frame", - 15: "scan", - 16: "gps_frame", - 17: "pan", - 18: "tilt", - 19: "camera", - 20: "camera_rgb_frame", - 21: "camera_rgb_optical_frame", - 22: "camera_depth_frame", - 23: "camera_depth_optical_frame", - 24: "eyes", - 25: "right_arm_attach", - 26: "m1n6s200_link_base", - 27: "m1n6s200_link_1 (shoulder)", - 28: "m1n6s200_link_2 (arm)", - 29: "m1n6s200_link_3 (elbow)", - 30: "m1n6s200_link_4 (forearm)", - 31: "m1n6s200_link_5 (wrist)", - 32: "m1n6s200_link_6 (hand)", - 33: "end_effector", - }} - self.num_object_classes = 4 - - def set_subgoal(self, ideal_next_state): - obs_avg = (self.observation_normalizer['sensor'][1] + self.observation_normalizer['sensor'][0]) / 2.0 - obs_mag = (self.observation_normalizer['sensor'][1] - self.observation_normalizer['sensor'][0]) / 2.0 - ideal_next_state = (ideal_next_state * obs_mag) + obs_avg - self.subgoal_end_effector.set_position(ideal_next_state) - self.subgoal_end_effector_base.set_position([ideal_next_state[0], ideal_next_state[1], 0]) - - def set_subgoal_type(self, only_base=True): - if only_base: - # Make the marker for the end effector completely transparent - self.subgoal_end_effector.set_color([0, 0, 0, 0.0]) - else: - self.subgoal_end_effector.set_color([0, 0, 0, 0.8]) - - def reset_interactive_objects(self): - # close the door - p.resetJointState(self.door.body_id, self.door_axis_link_id, targetValue=0.0, targetVelocity=0.0) - if self.cid is not None: - p.removeConstraint(self.cid) - self.cid = None - - def reset_initial_and_target_pos(self): - if self.arena == "only_ll" or self.arena == "only_ll_obstacles": - if self.random_position: - pos = [np.random.uniform(-0.5, 0.5), np.random.uniform(-0.5, 0.5), 0] - else: - pos = [0.0, 0.0, 0.0] - elif self.arena == "simple_hl_ll": - if self.random_position: - pos = [np.random.uniform(1, 2), np.random.uniform(-2, 2), 0] - else: - pos = [1.0, 0.0, 0.0] - elif self.arena == "complex_hl_ll": - if self.random_position: - pos = [np.random.uniform(-2, -1.7), np.random.uniform(4.5, 5), 0] - else: - pos = [-2, 4, 0.0] - - # self.robots[0].set_position(pos=[pos[0], pos[1], pos[2] + 0.1]) - self.robots[0].set_position(pos=[pos[0], pos[1], pos[2]]) - self.initial_pos = pos - - if self.arena == "only_ll" or self.arena == "only_ll_obstacles": - self.robots[0].set_orientation( - orn=quatToXYZW(euler2quat(0, 0, np.random.uniform(0, np.pi * 2)), 'wxyz') - ) - elif self.arena == "simple_hl_ll": - if self.random_position: - self.robots[0].set_orientation( - orn=quatToXYZW(euler2quat(0, 0, np.random.uniform(0, np.pi * 2)), 'wxyz') - ) - else: - self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, np.pi), 'wxyz')) - elif self.arena == "complex_hl_ll": - if self.random_position: - self.robots[0].set_orientation( - orn=quatToXYZW(euler2quat(0, 0, np.random.uniform(0, np.pi * 2)), 'wxyz') - ) - else: - self.robots[0].set_orientation(orn=quatToXYZW(euler2quat(0, 0, 0), 'wxyz')) - - # # wait for the base to fall down to the ground and for the arm to move to its initial position - # for _ in range(int(0.5 / self.physics_timestep)): - # self.simulator_step() - - if self.arena == "only_ll" or self.arena == "only_ll_obstacles": - self.target_pos = [-100, -100, 0] - elif self.arena == "simple_hl_ll" or self.arena == "complex_hl_ll": - if self.random_position: - self.target_pos = [np.random.uniform(-2, -1), np.random.uniform(-2, 2), 0.0] - else: - self.target_pos = np.array([-1.5, 0.0, 0.0]) - - self.door_handle_vis.set_position(pos=np.array(p.getLinkState(self.door.body_id, self.door_handle_link_id)[0])) - - def reset(self): - self.reset_interactive_objects() - self.stage = 0 - self.prev_stage = self.stage - return super(InteractiveNavigateEnv, self).reset() - - # wrap the specified dimensions of the states to [-pi, pi] - def wrap_to_pi(self, states, indices): - states[indices] = states[indices] - 2.0 * np.pi * np.floor((states[indices] + np.pi) / (np.pi * 2)) - return states - - def get_state(self, collision_links=[]): - state = super(InteractiveNavigateEnv, self).get_state(collision_links) - # self.state_stats['sensor'].append(state['sensor']) - # self.state_stats['auxiliary_sensor'].append(state['auxiliary_sensor']) - if self.normalize_observation: - for key in state: - obs_min = self.observation_normalizer[key][0] - obs_max = self.observation_normalizer[key][1] - obs_avg = (self.observation_normalizer[key][1] + self.observation_normalizer[key][0]) / 2.0 - obs_mag = (self.observation_normalizer[key][1] - self.observation_normalizer[key][0]) / 2.0 - # clipped = np.clip(state[key], obs_min, obs_max) - # if np.sum(state[key] == clipped) / float(state[key].shape[0]) < 0.8: - # print("WARNING: more than 20% of the observations are clipped for key: {}".format(key)) - state[key] = (np.clip(state[key], obs_min, obs_max) - obs_avg) / obs_mag # normalize to [-1, 1] - # self.state_stats['rgb'].append(state['rgb']) - # self.state_stats['depth'].append(state['depth']) - return state - - def get_additional_states(self): - additional_states = self.robots[0].get_end_effector_position() - assert len(additional_states) == self.additional_states_dim, 'additional states dimension mismatch' - return additional_states - - def get_auxiliary_sensor(self, collision_links=[]): - auxiliary_sensor = np.zeros(self.auxiliary_sensor_dim) - robot_state = self.robots[0].calc_state() - assert self.auxiliary_sensor_dim == 66 - assert robot_state.shape[0] == 31 - - robot_state = self.wrap_to_pi(robot_state, np.arange(7, 28, 3)) # wrap wheel and arm joint pos to [-pi, pi] - - end_effector_pos = self.robots[0].get_end_effector_position() - self.robots[0].get_position() - end_effector_pos = rotate_vector_3d(end_effector_pos, *self.robots[0].get_rpy()) - auxiliary_sensor[:3] = self.robots[0].get_position() # x, y, z - auxiliary_sensor[3:6] = end_effector_pos # arm_x, arm_y_ arm_z (local) - auxiliary_sensor[6:11] = robot_state[1:6] # vx, vy, vz, roll, pitch - auxiliary_sensor[11:46:5] = robot_state[7:28:3] # pos for wheel 1, 2, arm joint 1, 2, 3, 4, 5 - auxiliary_sensor[12:47:5] = robot_state[8:29:3] # vel for wheel 1, 2, arm joint 1, 2, 3, 4, 5 - auxiliary_sensor[13:48:5] = robot_state[9:30:3] # trq for wheel 1, 2, arm joint 1, 2, 3, 4, 5 - auxiliary_sensor[14:49:5] = np.cos(robot_state[7:28:3]) # cos(pos) for wheel 1, 2, arm joint 1, 2, 3, 4, 5 - auxiliary_sensor[15:50:5] = np.sin(robot_state[7:28:3]) # sin(pos) for wheel 1, 2, arm joint 1, 2, 3, 4, 5 - auxiliary_sensor[46:49] = robot_state[28:31] # v_roll, v_pitch, v_yaw - - roll, pitch, yaw = self.robots[0].get_rpy() - cos_yaw, sin_yaw = np.cos(yaw), np.sin(yaw) - door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0] - cos_door_angle, sin_door_angle = np.cos(door_angle), np.sin(door_angle) - has_door_handle_in_hand = 1.0 if self.stage == self.stage_open_door else -1.0 - door_pos = np.array([0, 0, -0.02]) - target_pos = self.target_pos - robot_pos = self.robots[0].get_position() - door_pos_local = rotate_vector_3d(door_pos - robot_pos, roll, pitch, yaw) - target_pos_local = rotate_vector_3d(target_pos - robot_pos, roll, pitch, yaw) - collision_links_flatten = [item for sublist in collision_links for item in sublist] - has_collision = 1.0 if len(collision_links_flatten) > 0 else -1.0 - - auxiliary_sensor[49:52] = np.array([yaw, cos_yaw, sin_yaw]) - auxiliary_sensor[52:56] = np.array([door_angle, cos_door_angle, sin_door_angle, has_door_handle_in_hand]) - auxiliary_sensor[56:59] = target_pos - auxiliary_sensor[59:62] = door_pos_local - auxiliary_sensor[62:65] = target_pos_local - auxiliary_sensor[65] = has_collision - - return auxiliary_sensor - - def filter_collision_links(self, collision_links): - collision_links = super(InteractiveNavigateEnv, self).filter_collision_links(collision_links) - # ignore collision between hand and door - collision_links = [[item for item in sublist - if not (item[2] == self.door.body_id and item[3] in [32, 33])] - for sublist in collision_links] - return collision_links + def step_dynamic_objects(self): + """ + Apply actions to dynamic objects (default: temporally extended random walk) + """ + if self.current_step % self.dynamic_objects_action_repeat == 0: + self.dynamic_objects_last_actions = [robot.action_space.sample() for robot in self.dynamic_objects] + for robot, action in zip(self.dynamic_objects, self.dynamic_objects_last_actions): + robot.apply_action(action) def step(self, action): - self.current_step += 1 + """ + Step dynamic objects as well + """ + if self.track == 'dynamic': + self.step_dynamic_objects() - dist = np.linalg.norm( - np.array(p.getLinkState(self.door.body_id, self.door_handle_link_id)[0]) - - np.array(p.getLinkState(self.robots[0].robot_ids[0], self.jr_end_effector_link_id)[0]) - ) - # print('dist', dist) + return super(NavigateRandomEnvSim2Real, self).step(action) - self.prev_stage = self.stage - if self.stage == self.stage_get_to_door_handle and dist < self.door_handle_dist_thresh: - assert self.cid is None - self.cid = p.createConstraint(self.robots[0].robot_ids[0], self.jr_end_effector_link_id, - self.door.body_id, self.door_handle_link_id, - p.JOINT_POINT2POINT, [0, 0, 0], - [0, 0.0, 0], [0, 0, 0]) - p.changeConstraint(self.cid, maxForce=500) - self.stage = self.stage_open_door - print("stage open_door") + def reset(self): + """ + Reset episode + """ + self.floor_num = self.scene.get_random_floor() + # reset "virtual floor" to the correct height + self.scene.reset_floor(floor=self.floor_num, additional_elevation=0.05) - if self.stage == self.stage_open_door and p.getJointState(self.door.body_id, 1)[0] > self.door_angle: - assert self.cid is not None - p.removeConstraint(self.cid) - self.cid = None - self.stage = self.stage_get_to_target_pos - print("stage get to target pos") + if self.track == 'interactive': + self.reset_interactive_objects() - door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0] + state = NavigateEnv.reset(self) - # door is pushed in the wrong direction, gradually reset it back to the neutral state - if door_angle < -0.01: - max_force = 10000 - p.setJointMotorControl2(bodyUniqueId=self.door.body_id, - jointIndex=self.door_axis_link_id, - controlMode=p.POSITION_CONTROL, - targetPosition=0.0, - positionGain=1, - force=max_force) - # door is pushed in the correct direction - else: - # if the door has not been opened, overwrite the previous position control with a trivial one - if self.stage != self.stage_get_to_target_pos: - max_force = 0 - p.setJointMotorControl2(bodyUniqueId=self.door.body_id, - jointIndex=self.door_axis_link_id, - controlMode=p.POSITION_CONTROL, - targetPosition=door_angle, - positionGain=0, - velocityGain=0, - force=max_force) - - # if the door has already been opened, try to set velocity to 0 so that it's more difficult for - # the agent to move the door on its way to the target position - else: - max_force = 100 - p.setJointMotorControl2(bodyUniqueId=self.door.body_id, - jointIndex=self.door_axis_link_id, - controlMode=p.POSITION_CONTROL, - targetPosition=door_angle, - targetVelocity=0.0, - positionGain=0, - velocityGain=1, - force=max_force) - - return super(InteractiveNavigateEnv, self).step(action) - - def get_potential(self): - door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0] - door_handle_pos = p.getLinkState(self.door.body_id, self.door_handle_link_id)[0] - if self.stage == self.stage_get_to_door_handle: - potential = l2_distance(door_handle_pos, self.robots[0].get_end_effector_position()) - elif self.stage == self.stage_open_door: - potential = -door_angle - elif self.stage == self.stage_get_to_target_pos: - potential = l2_distance(self.target_pos, self.get_position_of_interest()) - return potential - - def get_reward(self, collision_links=[], action=None, info={}): - reward = 0.0 - - if self.reward_type == 'dense': - if self.stage != self.prev_stage: - # advance to the next stage - self.potential = self.get_potential() - reward += self.success_reward / 2.0 - else: - new_potential = self.get_potential() - potential_reward = self.potential - new_potential - reward += potential_reward * self.potential_reward_weight # |potential_reward| ~= 0.1 per step - self.potential = new_potential - elif self.reward_type == "l2": - new_potential = self.get_l2_potential() - potential_reward = self.potential - new_potential - reward += potential_reward * self.potential_reward_weight - self.potential = new_potential - elif self.reward_type == 'stage_sparse': - if self.stage != self.prev_stage: - reward += self.success_reward / 2.0 - - base_moving = np.any(np.abs(action[:2]) >= 0.01) - arm_moving = np.any(np.abs(action[2:]) >= 0.01) - electricity_reward = float(base_moving) + float(arm_moving) - self.energy_cost += electricity_reward - reward += electricity_reward * self.electricity_reward_weight - - collision_links_flatten = [item for sublist in collision_links for item in sublist] - collision_reward = float(len(collision_links_flatten) > 0) - self.collision_step += int(len(collision_links_flatten) > 0) - reward += collision_reward * self.collision_reward_weight # |collision_reward| ~= 1.0 per step if collision - info['collision_reward'] = collision_reward * self.collision_reward_weight # expose collision reward to info - # self.reward_stats.append(np.abs(collision_reward * self.collision_reward_weight)) - - # goal reached - if l2_distance(self.target_pos, self.get_position_of_interest()) < self.dist_tol: - reward += self.success_reward # |success_reward| = 10.0 - - # death penalty - floor_height = 0.0 if self.floor_num is None else self.scene.get_floor_height(self.floor_num) - if self.robots[0].get_position()[2] > floor_height + self.death_z_thresh: - reward -= self.success_reward * 1.0 - - # push door the wrong way - # door_angle = p.getJointState(self.door.body_id, self.door_axis_link_id)[0] - # if door_angle > (10.0 / 180.0 * np.pi): - # reward -= self.success_reward * 1.0 - - # print("get_reward (stage %d): %f" % (self.stage, reward)) - return reward, info + if self.track == 'dynamic': + self.reset_dynamic_objects() + state = self.get_state() + return state if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--robot', - '-r', - choices=['turtlebot', 'jr'], - required=True, - help='which robot [turtlebot|jr]') parser.add_argument( '--config', '-c', @@ -1312,50 +845,31 @@ if __name__ == '__main__': default='headless', help='which mode for simulation (default: headless)') parser.add_argument('--env_type', - choices=['deterministic', 'random', 'interactive', 'ig'], + choices=['deterministic', 'random', 'sim2real'], default='deterministic', - help='which environment type (deterministic | random | interactive | ig') + help='which environment type (deterministic | random | sim2real)') + parser.add_argument('--sim2real_track', + choices=['static', 'interactive', 'dynamic'], + default='static', + help='which sim2real track (static | interactive | dynamic)') args = parser.parse_args() - if args.robot == 'turtlebot': - config_filename = os.path.join(os.path.dirname(gibson2.__file__), - '../examples/configs/turtlebot_p2p_nav_discrete.yaml') \ - if args.config is None else args.config - elif args.robot == 'jr': - config_filename = os.path.join(os.path.dirname(gibson2.__file__), - '../examples/configs/jr2_reaching.yaml') \ - if args.config is None else args.config if args.env_type == 'deterministic': - nav_env = NavigateEnv(config_file=config_filename, + nav_env = NavigateEnv(config_file=args.config, mode=args.mode, action_timestep=1.0 / 10.0, physics_timestep=1.0 / 40.0) elif args.env_type == 'random': - nav_env = NavigateRandomEnv(config_file=config_filename, + nav_env = NavigateRandomEnv(config_file=args.config, mode=args.mode, action_timestep=1.0 / 10.0, physics_timestep=1.0 / 40.0) - elif args.env_type == 'ig': - nav_env = InteractiveGibsonNavigateEnv(config_file=config_filename, - mode=args.mode, - action_timestep=1.0 / 10.0, - physics_timestep=1 / 40.0) - else: - nav_env = InteractiveNavigateEnv(config_file=config_filename, - mode=args.mode, - action_timestep=1.0 / 10.0, - random_position=False, - physics_timestep=1.0 / 40.0, - arena='only_ll_obstacles') - - # # Sample code: manually set action using slide bar UI - # debug_params = [ - # p.addUserDebugParameter('link1', -1.0, 1.0, -0.5), - # p.addUserDebugParameter('link2', -1.0, 1.0, 0.5), - # p.addUserDebugParameter('link3', -1.0, 1.0, 0.5), - # p.addUserDebugParameter('link4', -1.0, 1.0, 0.5), - # p.addUserDebugParameter('link5', -1.0, 1.0, 0.0), - # ] + elif args.env_type == 'sim2real': + nav_env = NavigateRandomEnvSim2Real(config_file=args.config, + mode=args.mode, + action_timestep=1.0 / 10.0, + physics_timestep=1.0 / 40.0, + track=args.sim2real_track) for episode in range(100): print('Episode: {}'.format(episode)) @@ -1363,18 +877,6 @@ if __name__ == '__main__': nav_env.reset() for step in range(50): # 500 steps, 50s world time action = nav_env.action_space.sample() - # action[:] = 0.5 - # action[:] = -1.0 - # action[:] = -1.0 / 3 - # if nav_env.stage == 0: - # action[:2] = 0.5 - # elif nav_env.stage == 1: - # action[:2] = -0.5 - - # action = np.zeros(nav_env.action_space.shape) - # debug_param_values = [p.readUserDebugParameter(debug_param) for debug_param in debug_params] - # action[2:] = np.array(debug_param_values) - state, reward, done, _ = nav_env.step(action) # print('reward', reward) if done: diff --git a/gibson2/utils/utils.py b/gibson2/utils/utils.py index eb9f04d6d..5d26be0df 100644 --- a/gibson2/utils/utils.py +++ b/gibson2/utils/utils.py @@ -1,6 +1,6 @@ import numpy as np import yaml - +from scipy.spatial.transform import Rotation as R # File I/O related def parse_config(config): @@ -8,19 +8,29 @@ def parse_config(config): config_data = yaml.load(f, Loader=yaml.FullLoader) return config_data - # Geometry related def rotate_vector_3d(v, r, p, y): - """Rotates vector by roll, pitch and yaw counterclockwise""" - rot_x = np.array([[1, 0, 0], [0, np.cos(-r), -np.sin(-r)], [0, np.sin(-r), np.cos(-r)]]) - rot_y = np.array([[np.cos(-p), 0, np.sin(-p)], [0, 1, 0], [-np.sin(-p), 0, np.cos(-p)]]) - rot_z = np.array([[np.cos(-y), -np.sin(-y), 0], [np.sin(-y), np.cos(-y), 0], [0, 0, 1]]) - return np.dot(rot_x, np.dot(rot_y, np.dot(rot_z, v))) + """Rotates 3d vector by roll, pitch and yaw counterclockwise""" + local_to_global = R.from_euler('xyz', [r, p, y]).as_dcm() + global_to_local = local_to_global.T + return np.dot(global_to_local, v) +def rotate_vector_2d(v, yaw): + """Rotates 2d vector by yaw counterclockwise""" + local_to_global = R.from_euler('z', yaw).as_dcm() + global_to_local = local_to_global.T + global_to_local = global_to_local[:2, :2] + if len(v.shape) == 1: + return np.dot(global_to_local, v) + elif len(v.shape) == 2: + return np.dot(global_to_local, v.T).T + else: + print('Incorrect input shape for rotate_vector_2d', v.shape) + return v def l2_distance(v1, v2): """Returns the L2 distance between vector v1 and v2.""" - return np.linalg.norm(v1 - v2) + return np.linalg.norm(np.array(v1) - np.array(v2)) def quatFromXYZW(xyzw, seq): diff --git a/setup.py b/setup.py index 1d27a8115..b549f5d1c 100644 --- a/setup.py +++ b/setup.py @@ -110,7 +110,7 @@ setup( 'gym==0.12', 'Pillow==5.4.1', 'PyYAML==5.3', - 'pybullet==2.4.1', + 'pybullet==2.4.9', 'transforms3d==0.3.1', 'tqdm==4.19.9', 'Pillow==5.4.1',