add husky flagrun

2017-11-02 15:04:37 -07:00 · 2017-11-02 15:04:37 -07:00 · 621f9d1b76
parent ba7fb4865c
commit 621f9d1b76
6 changed files with 275 additions and 7 deletions
--- a/examples/agents/random_husky_flagrun.py
+++ b/examples/agents/random_husky_flagrun.py
@ -0,0 +1,56 @@
+from __future__ import print_function
+import time
+import numpy as np
+import sys
+import gym
+from PIL import Image
+from realenv.core.render.profiler import Profiler
+from realenv.envs.husky_env import HuskyFlagRunEnv
+import pybullet as p
+
+
+class RandomAgent(object):
+    """The world's simplest agent"""
+
+    def __init__(self, action_space, is_discrete=False):
+        self.action_space = action_space
+        self.is_discrete = is_discrete
+
+    def act(self, observation, reward=None):
+        if self.is_discrete:
+            action = np.random.randint(self.action_space.n)
+        else:
+            action = np.zeros(self.action_space.shape[0])
+            if (np.random.random() < 0.2):
+                action[np.random.choice(action.shape[0], 1)] = np.random.randint(-1, 2)
+        return action
+
+
+if __name__ == '__main__':
+    env = HuskyFlagRunEnv(human=True, timestep=1.0 / (4 * 22), frame_skip=4, enable_sensors=True, is_discrete=True)
+    obs = env.reset()
+    agent = RandomAgent(env.action_space, is_discrete=True)
+    assert (not obs is None)
+
+    while 1:
+        frame = 0
+        score = 0
+        restart_delay = 0
+        obs = env.reset()
+        assert (not obs is None)
+        while True:
+            time.sleep(0.01)
+            a = agent.act(obs)
+            print("action", a)
+            with Profiler("Agent step function"):
+                obs, r, done, meta = env.step(a)
+            score += r
+            frame += 1
+
+            if not done and frame < 60: continue
+            if restart_delay == 0:
+                print("score=%0.2f in %i frames" % (score, frame))
+                restart_delay = 200 * 4  # 2 sec at 60 fps
+            else:
+                restart_delay -= 1
+                if restart_delay == 0: break
--- a/examples/train/train_husky_flagrun.py
+++ b/examples/train/train_husky_flagrun.py
@ -0,0 +1,44 @@
+# add parent dir to find package. Only needed for source code build, pip install doesn't need it.
+import os, inspect
+
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0, parentdir)
+
+import gym
+from realenv.envs.husky_env import HuskyFlagRunEnv
+
+from baselines import deepq
+
+import datetime
+
+
+def callback(lcl, glb):
+    # stop training if reward exceeds 199
+    total = sum(lcl['episode_rewards'][-101:-1]) / 100
+    totalt = lcl['t']
+    is_solved = totalt > 2000 and total >= -50
+    is_solved = False
+    return is_solved
+
+
+def main():
+    env = HuskyFlagRunEnv(human=True, is_discrete=True, enable_sensors=True)
+    model = deepq.models.mlp([64])
+    act = deepq.learn(
+        env,
+        q_func=model,
+        lr=1e-3,
+        max_timesteps=100000,
+        buffer_size=50000,
+        exploration_fraction=0.1,
+        exploration_final_eps=0.02,
+        print_freq=10,
+        callback=callback
+    )
+    print("Saving model to humanoid_sensor_model.pkl")
+    act.save("humanoid_sensor_model.pkl")
+
+
+if __name__ == '__main__':
+    main()
--- a/realenv/configs.py
+++ b/realenv/configs.py
@ -1,6 +1,6 @@
 ## Scene configurations
-SCENE_TYPE = 'building'
-#SCENE_TYPE = 'stadium'
+#SCENE_TYPE = 'building'
+SCENE_TYPE = 'stadium'


 ## WORKAROUND (hzyjerry): scaling building instead of agent, this is because
--- a/realenv/envs/env_bases.py
+++ b/realenv/envs/env_bases.py
@ -171,6 +171,7 @@ class Camera:
        lookat = [x,y,z]
        distance = 10
        yaw = 10
-        p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
+        if MAKE_VIDEO:
+            p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)


--- a/realenv/envs/env_modalities.py
+++ b/realenv/envs/env_modalities.py
@ -147,8 +147,8 @@ class SensorRobotEnv(BaseEnv):
        if self.human:
            humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id)
            humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset'])
-            
-            p.resetDebugVisualizerCamera(self.tracking_camera['distance'],self.tracking_camera['yaw'], self.tracking_camera['pitch'],humanPos);       ## demo: kitchen, living room
+            if MAKE_VIDEO:
+                p.resetDebugVisualizerCamera(self.tracking_camera['distance'],self.tracking_camera['yaw'], self.tracking_camera['pitch'],humanPos);       ## demo: kitchen, living room
            #p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos);        ## demo: stairs

        eye_pos = self.robot.eyes.current_position()
--- a/realenv/envs/husky_env.py
+++ b/realenv/envs/husky_env.py
@ -3,10 +3,15 @@ from realenv.core.physics.robot_locomotors import Husky
 from transforms3d import quaternions
 import numpy as np
 import pybullet as p
+from realenv.core.physics.scene_stadium import SinglePlayerStadiumScene
+import pybullet_data

 HUMANOID_TIMESTEP  = 1.0/(4 * 22)
 HUMANOID_FRAMESKIP = 4

+import os
+from realenv.configs import *
+
 class HuskyEnv:
    metadata = {
        'render.modes' : ['human', 'rgb_array'],
@ -62,6 +67,8 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
        obs = SensorRobotEnv._reset(self)
        self.nframe = 0
        return obs
+
+
    def _step(self, a=None):
        self.nframe += 1

@ -141,8 +148,8 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
        if self.human:
            humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id)
            humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset'])
-
-            p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'],
+            if MAKE_VIDEO:
+                p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'],
                                         self.tracking_camera['pitch'], humanPos)  ## demo: kitchen, living room
            # p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos);        ## demo: stairs

@ -163,5 +170,165 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
        self.tracking_camera['yaw'] = yaw   ## living roon
        self.tracking_camera['pitch'] = -10
        
+        self.tracking_camera['distance'] = distance
+        self.tracking_camera['z_offset'] = offset
+
+
+class HuskyFlagRunEnv(HuskyEnv, SensorRobotEnv):
+    def __init__(self, human=True, timestep=HUMANOID_TIMESTEP,
+                 frame_skip=HUMANOID_FRAMESKIP, enable_sensors=False,
+                 is_discrete=False):
+        self.human = human
+        self.timestep = timestep
+        self.frame_skip = frame_skip
+        HuskyEnv.__init__(self, is_discrete)
+        SensorRobotEnv.__init__(self)
+        self.nframe = 0
+        assert (isinstance(self.scene, SinglePlayerStadiumScene))
+        self.flag_timeout = 1
+
+        self.visualid = p.createVisualShape(p.GEOM_MESH, fileName=os.path.join(pybullet_data.getDataPath(), 'cube.obj'),
+                                       meshScale=[0.5, 0.5, 0.5], rgbaColor=[1, 0, 0, 0.7])
+
+        self.lastid = None
+
+    def _reset(self):
+        obs = SensorRobotEnv._reset(self)
+        self.nframe = 0
+        return obs
+
+    def flag_reposition(self):
+
+        self.walk_target_x = self.np_random.uniform(low=-self.scene.stadium_halflen,
+                                                    high=+self.scene.stadium_halflen)
+        self.walk_target_y = self.np_random.uniform(low=-self.scene.stadium_halfwidth,
+                                                    high=+self.scene.stadium_halfwidth)
+
+        more_compact = 0.5  # set to 1.0 whole football field
+        self.walk_target_x *= more_compact
+        self.walk_target_y *= more_compact
+
+        self.flag = None
+        #self.flag = self.scene.cpp_world.debug_sphere(self.walk_target_x, self.walk_target_y, 0.2, 0.2, 0xFF8080)
+        self.flag_timeout = 600 / self.scene.frame_skip
+        #print('targetxy', self.flagid, self.walk_target_x, self.walk_target_y, p.getBasePositionAndOrientation(self.flagid))
+        #p.resetBasePositionAndOrientation(self.flagid, posObj = [self.walk_target_x, self.walk_target_y, 0.5], ornObj = [0,0,0,0])
+        if self.lastid:
+            p.removeBody(self.lastid)
+
+        self.lastid = p.createMultiBody(baseVisualShapeIndex=self.visualid, baseCollisionShapeIndex=-1, basePosition=[self.walk_target_x, self.walk_target_y, 0.5])
+
+        self.robot.walk_target_x = self.walk_target_x
+        self.robot.walk_target_y = self.walk_target_y
+
+    def _step(self, a=None):
+
+        if self.flag_timeout <= 0:
+            self.flag_reposition()
+
+        self.nframe += 1
+        self.flag_timeout -= 1
+
+        # dummy state if a is None
+        if not self.scene.multiplayer:  # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
+            if not a is None:
+                self.robot.apply_action(a)
+            self.scene.global_step()
+
+        state = self.robot.calc_state()  # also calculates self.joints_at_limit
+
+        alive = float(self.robot.alive_bonus(state[0] + self.robot.initial_z, self.robot.body_rpy[
+            1]))  # state[0] is body height above ground, body_rpy[1] is pitch
+
+        done = False
+        print(self.nframe)
+        # done = alive < 0
+        if not np.isfinite(state).all():
+            print("~INF~", state)
+            done = True
+
+        potential_old = self.potential
+        self.potential = self.robot.calc_potential()
+        progress = float(self.potential - potential_old)
+
+        feet_collision_cost = 0.0
+        for i, f in enumerate(
+                self.robot.feet):  # TODO: Maybe calculating feet contacts could be done within the robot code
+            # print(f.contact_list())
+            contact_ids = set((x[2], x[4]) for x in f.contact_list())
+            # print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
+            if (self.ground_ids & contact_ids):
+                # see Issue 63: https://github.com/openai/roboschool/issues/63
+                # feet_collision_cost += self.foot_collision_cost
+                self.robot.feet_contact[i] = 1.0
+            else:
+                self.robot.feet_contact[i] = 0.0
+        # print(self.robot.feet_contact)
+
+        if not a is None:
+            electricity_cost = self.electricity_cost * float(np.abs(
+                a * self.robot.joint_speeds).mean())  # let's assume we have DC motor with controller, and reverse current braking
+            electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
+        else:
+            electricity_cost = 0
+
+        joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
+        debugmode = 0
+        if (debugmode):
+            print("alive=")
+            print(alive)
+            print("progress")
+            print(progress)
+            print("electricity_cost")
+            print(electricity_cost)
+            print("joints_at_limit_cost")
+            print(joints_at_limit_cost)
+            print("feet_collision_cost")
+            print(feet_collision_cost)
+
+        self.rewards = [
+            # alive,
+            progress,
+            # electricity_cost,
+            # joints_at_limit_cost,
+            # feet_collision_cost
+        ]
+
+        print(self.robot.walk_target_dist)
+        if (debugmode):
+            print("rewards=")
+            print(self.rewards)
+            print("sum rewards")
+            print(sum(self.rewards))
+        if not a is None:
+            self.HUD(state, a, done)
+        self.reward += sum(self.rewards)
+
+        if self.human:
+            humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id)
+            humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset'])
+
+            if MAKE_VIDEO:
+                p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'],
+                                             self.tracking_camera['pitch'], humanPos)  ## demo: kitchen, living room
+            # p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos);        ## demo: stairs
+
+        eye_pos = self.robot.eyes.current_position()
+        x, y, z, w = self.robot.eyes.current_orientation()
+        eye_quat = quaternions.qmult([w, x, y, z], self.robot.eye_offset_orn)
+        print(sum(self.rewards))
+        return state, sum(self.rewards), bool(done), {"eye_pos": eye_pos, "eye_quat": eye_quat}
+
+        # self.tracking_camera['pitch'] = -45 ## stairs
+        yaw = 90  ## demo: living room
+        # yaw = 30    ## demo: kitchen
+        offset = 0.5
+        distance = 0.7  ## living room
+        # self.tracking_camera['yaw'] = 90     ## demo: stairs
+
+
+        self.tracking_camera['yaw'] = yaw  ## living roon
+        self.tracking_camera['pitch'] = -10
+
        self.tracking_camera['distance'] = distance
        self.tracking_camera['z_offset'] = offset