diff --git a/examples/agents/random_husky_flagrun.py b/examples/agents/random_husky_flagrun.py new file mode 100644 index 000000000..793d39a21 --- /dev/null +++ b/examples/agents/random_husky_flagrun.py @@ -0,0 +1,56 @@ +from __future__ import print_function +import time +import numpy as np +import sys +import gym +from PIL import Image +from realenv.core.render.profiler import Profiler +from realenv.envs.husky_env import HuskyFlagRunEnv +import pybullet as p + + +class RandomAgent(object): + """The world's simplest agent""" + + def __init__(self, action_space, is_discrete=False): + self.action_space = action_space + self.is_discrete = is_discrete + + def act(self, observation, reward=None): + if self.is_discrete: + action = np.random.randint(self.action_space.n) + else: + action = np.zeros(self.action_space.shape[0]) + if (np.random.random() < 0.2): + action[np.random.choice(action.shape[0], 1)] = np.random.randint(-1, 2) + return action + + +if __name__ == '__main__': + env = HuskyFlagRunEnv(human=True, timestep=1.0 / (4 * 22), frame_skip=4, enable_sensors=True, is_discrete=True) + obs = env.reset() + agent = RandomAgent(env.action_space, is_discrete=True) + assert (not obs is None) + + while 1: + frame = 0 + score = 0 + restart_delay = 0 + obs = env.reset() + assert (not obs is None) + while True: + time.sleep(0.01) + a = agent.act(obs) + print("action", a) + with Profiler("Agent step function"): + obs, r, done, meta = env.step(a) + score += r + frame += 1 + + if not done and frame < 60: continue + if restart_delay == 0: + print("score=%0.2f in %i frames" % (score, frame)) + restart_delay = 200 * 4 # 2 sec at 60 fps + else: + restart_delay -= 1 + if restart_delay == 0: break \ No newline at end of file diff --git a/examples/train/train_husky_flagrun.py b/examples/train/train_husky_flagrun.py new file mode 100644 index 000000000..5ce84b38f --- /dev/null +++ b/examples/train/train_husky_flagrun.py @@ -0,0 +1,44 @@ +# add parent dir to find package. Only needed for source code build, pip install doesn't need it. +import os, inspect + +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(os.path.dirname(currentdir)) +os.sys.path.insert(0, parentdir) + +import gym +from realenv.envs.husky_env import HuskyFlagRunEnv + +from baselines import deepq + +import datetime + + +def callback(lcl, glb): + # stop training if reward exceeds 199 + total = sum(lcl['episode_rewards'][-101:-1]) / 100 + totalt = lcl['t'] + is_solved = totalt > 2000 and total >= -50 + is_solved = False + return is_solved + + +def main(): + env = HuskyFlagRunEnv(human=True, is_discrete=True, enable_sensors=True) + model = deepq.models.mlp([64]) + act = deepq.learn( + env, + q_func=model, + lr=1e-3, + max_timesteps=100000, + buffer_size=50000, + exploration_fraction=0.1, + exploration_final_eps=0.02, + print_freq=10, + callback=callback + ) + print("Saving model to humanoid_sensor_model.pkl") + act.save("humanoid_sensor_model.pkl") + + +if __name__ == '__main__': + main() diff --git a/realenv/configs.py b/realenv/configs.py index c289a3690..44e62676d 100644 --- a/realenv/configs.py +++ b/realenv/configs.py @@ -1,6 +1,6 @@ ## Scene configurations -SCENE_TYPE = 'building' -#SCENE_TYPE = 'stadium' +#SCENE_TYPE = 'building' +SCENE_TYPE = 'stadium' ## WORKAROUND (hzyjerry): scaling building instead of agent, this is because diff --git a/realenv/envs/env_bases.py b/realenv/envs/env_bases.py index ea07300d5..9e874a942 100644 --- a/realenv/envs/env_bases.py +++ b/realenv/envs/env_bases.py @@ -171,6 +171,7 @@ class Camera: lookat = [x,y,z] distance = 10 yaw = 10 - p.resetDebugVisualizerCamera(distance, yaw, -20, lookat) + if MAKE_VIDEO: + p.resetDebugVisualizerCamera(distance, yaw, -20, lookat) diff --git a/realenv/envs/env_modalities.py b/realenv/envs/env_modalities.py index 6bed3adf5..2f0f46267 100644 --- a/realenv/envs/env_modalities.py +++ b/realenv/envs/env_modalities.py @@ -147,8 +147,8 @@ class SensorRobotEnv(BaseEnv): if self.human: humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id) humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset']) - - p.resetDebugVisualizerCamera(self.tracking_camera['distance'],self.tracking_camera['yaw'], self.tracking_camera['pitch'],humanPos); ## demo: kitchen, living room + if MAKE_VIDEO: + p.resetDebugVisualizerCamera(self.tracking_camera['distance'],self.tracking_camera['yaw'], self.tracking_camera['pitch'],humanPos); ## demo: kitchen, living room #p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos); ## demo: stairs eye_pos = self.robot.eyes.current_position() diff --git a/realenv/envs/husky_env.py b/realenv/envs/husky_env.py index 107c65646..ddc5eec0e 100644 --- a/realenv/envs/husky_env.py +++ b/realenv/envs/husky_env.py @@ -3,10 +3,15 @@ from realenv.core.physics.robot_locomotors import Husky from transforms3d import quaternions import numpy as np import pybullet as p +from realenv.core.physics.scene_stadium import SinglePlayerStadiumScene +import pybullet_data HUMANOID_TIMESTEP = 1.0/(4 * 22) HUMANOID_FRAMESKIP = 4 +import os +from realenv.configs import * + class HuskyEnv: metadata = { 'render.modes' : ['human', 'rgb_array'], @@ -62,6 +67,8 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv): obs = SensorRobotEnv._reset(self) self.nframe = 0 return obs + + def _step(self, a=None): self.nframe += 1 @@ -141,8 +148,8 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv): if self.human: humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id) humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset']) - - p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'], + if MAKE_VIDEO: + p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'], self.tracking_camera['pitch'], humanPos) ## demo: kitchen, living room # p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos); ## demo: stairs @@ -163,5 +170,165 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv): self.tracking_camera['yaw'] = yaw ## living roon self.tracking_camera['pitch'] = -10 + self.tracking_camera['distance'] = distance + self.tracking_camera['z_offset'] = offset + + +class HuskyFlagRunEnv(HuskyEnv, SensorRobotEnv): + def __init__(self, human=True, timestep=HUMANOID_TIMESTEP, + frame_skip=HUMANOID_FRAMESKIP, enable_sensors=False, + is_discrete=False): + self.human = human + self.timestep = timestep + self.frame_skip = frame_skip + HuskyEnv.__init__(self, is_discrete) + SensorRobotEnv.__init__(self) + self.nframe = 0 + assert (isinstance(self.scene, SinglePlayerStadiumScene)) + self.flag_timeout = 1 + + self.visualid = p.createVisualShape(p.GEOM_MESH, fileName=os.path.join(pybullet_data.getDataPath(), 'cube.obj'), + meshScale=[0.5, 0.5, 0.5], rgbaColor=[1, 0, 0, 0.7]) + + self.lastid = None + + def _reset(self): + obs = SensorRobotEnv._reset(self) + self.nframe = 0 + return obs + + def flag_reposition(self): + + self.walk_target_x = self.np_random.uniform(low=-self.scene.stadium_halflen, + high=+self.scene.stadium_halflen) + self.walk_target_y = self.np_random.uniform(low=-self.scene.stadium_halfwidth, + high=+self.scene.stadium_halfwidth) + + more_compact = 0.5 # set to 1.0 whole football field + self.walk_target_x *= more_compact + self.walk_target_y *= more_compact + + self.flag = None + #self.flag = self.scene.cpp_world.debug_sphere(self.walk_target_x, self.walk_target_y, 0.2, 0.2, 0xFF8080) + self.flag_timeout = 600 / self.scene.frame_skip + #print('targetxy', self.flagid, self.walk_target_x, self.walk_target_y, p.getBasePositionAndOrientation(self.flagid)) + #p.resetBasePositionAndOrientation(self.flagid, posObj = [self.walk_target_x, self.walk_target_y, 0.5], ornObj = [0,0,0,0]) + if self.lastid: + p.removeBody(self.lastid) + + self.lastid = p.createMultiBody(baseVisualShapeIndex=self.visualid, baseCollisionShapeIndex=-1, basePosition=[self.walk_target_x, self.walk_target_y, 0.5]) + + self.robot.walk_target_x = self.walk_target_x + self.robot.walk_target_y = self.walk_target_y + + def _step(self, a=None): + + if self.flag_timeout <= 0: + self.flag_reposition() + + self.nframe += 1 + self.flag_timeout -= 1 + + # dummy state if a is None + if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions + if not a is None: + self.robot.apply_action(a) + self.scene.global_step() + + state = self.robot.calc_state() # also calculates self.joints_at_limit + + alive = float(self.robot.alive_bonus(state[0] + self.robot.initial_z, self.robot.body_rpy[ + 1])) # state[0] is body height above ground, body_rpy[1] is pitch + + done = False + print(self.nframe) + # done = alive < 0 + if not np.isfinite(state).all(): + print("~INF~", state) + done = True + + potential_old = self.potential + self.potential = self.robot.calc_potential() + progress = float(self.potential - potential_old) + + feet_collision_cost = 0.0 + for i, f in enumerate( + self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code + # print(f.contact_list()) + contact_ids = set((x[2], x[4]) for x in f.contact_list()) + # print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) ) + if (self.ground_ids & contact_ids): + # see Issue 63: https://github.com/openai/roboschool/issues/63 + # feet_collision_cost += self.foot_collision_cost + self.robot.feet_contact[i] = 1.0 + else: + self.robot.feet_contact[i] = 0.0 + # print(self.robot.feet_contact) + + if not a is None: + electricity_cost = self.electricity_cost * float(np.abs( + a * self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking + electricity_cost += self.stall_torque_cost * float(np.square(a).mean()) + else: + electricity_cost = 0 + + joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit) + debugmode = 0 + if (debugmode): + print("alive=") + print(alive) + print("progress") + print(progress) + print("electricity_cost") + print(electricity_cost) + print("joints_at_limit_cost") + print(joints_at_limit_cost) + print("feet_collision_cost") + print(feet_collision_cost) + + self.rewards = [ + # alive, + progress, + # electricity_cost, + # joints_at_limit_cost, + # feet_collision_cost + ] + + print(self.robot.walk_target_dist) + if (debugmode): + print("rewards=") + print(self.rewards) + print("sum rewards") + print(sum(self.rewards)) + if not a is None: + self.HUD(state, a, done) + self.reward += sum(self.rewards) + + if self.human: + humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id) + humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset']) + + if MAKE_VIDEO: + p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'], + self.tracking_camera['pitch'], humanPos) ## demo: kitchen, living room + # p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos); ## demo: stairs + + eye_pos = self.robot.eyes.current_position() + x, y, z, w = self.robot.eyes.current_orientation() + eye_quat = quaternions.qmult([w, x, y, z], self.robot.eye_offset_orn) + print(sum(self.rewards)) + return state, sum(self.rewards), bool(done), {"eye_pos": eye_pos, "eye_quat": eye_quat} + + # self.tracking_camera['pitch'] = -45 ## stairs + yaw = 90 ## demo: living room + # yaw = 30 ## demo: kitchen + offset = 0.5 + distance = 0.7 ## living room + # self.tracking_camera['yaw'] = 90 ## demo: stairs + + + self.tracking_camera['yaw'] = yaw ## living roon + self.tracking_camera['pitch'] = -10 + self.tracking_camera['distance'] = distance self.tracking_camera['z_offset'] = offset \ No newline at end of file