Husky environment (sensor + camera)

2017-10-26 12:03:43 -07:00 · 2017-10-26 12:03:43 -07:00 · f42ef6d3e9
parent 13b2b0b65b
commit f42ef6d3e9
8 changed files with 210 additions and 16 deletions
--- a/examples/agents/random_husky_camera.py
+++ b/examples/agents/random_husky_camera.py
@ -0,0 +1,62 @@
+from __future__ import print_function
+import time
+import numpy as np
+import sys
+import gym
+from PIL import Image
+from realenv.core.render.profiler import Profiler
+from realenv.envs.husky_env import HuskyCameraEnv
+import pybullet as p
+
+
+class RandomAgent(object):
+    """The world's simplest agent"""
+    def __init__(self, action_space):
+        self.action_space = action_space
+        self.time = 0
+        self.repeat = 1
+        self.action_last  = np.zeros(self.action_space.shape[0])
+
+    def act(self, observation, reward=None):
+        if self.time < self.repeat:
+            self.time = self.time + 1
+            return self.action_last
+        else:
+            self.time = 0
+            action = np.zeros(self.action_space.shape[0])
+            action[np.random.choice(action.shape[0], 1)] = np.random.choice(2, 1) - 1
+            self.action_last = action
+            return action
+
+if __name__ == '__main__':
+    env = HuskyCameraEnv(human=True, enable_sensors=True)
+    env.reset()
+    agent = RandomAgent(env.action_space)
+    ob = None
+
+    try:
+        while 1:
+            frame = 0
+            score = 0
+            restart_delay = 0
+            obs = env.reset()
+            while True:
+                time.sleep(0.01)
+                a = agent.act(obs)
+                with Profiler("Agent step function"):
+                    obs, r, done, meta = env.step(a)
+                score += r
+                frame += 1
+
+                if not done and frame < 60: continue
+                if restart_delay==0:
+                    print("score=%0.2f in %i frames" % (score, frame))
+                    restart_delay = 20 * 4  # 2 sec at 60 fps
+                else:
+                    restart_delay -= 1
+                    if restart_delay==0: break
+
+
+    except KeyboardInterrupt:
+        env._end()
+        print("Program finished")
--- a/examples/agents/random_husky_sensor.py
+++ b/examples/agents/random_husky_sensor.py
@ -0,0 +1,63 @@
+from __future__ import print_function
+import time
+import numpy as np
+import sys
+import gym
+from PIL import Image
+from realenv.core.render.profiler import Profiler
+from realenv.envs.husky_env import HuskySensorEnv
+import pybullet as p
+
+
+class RandomAgent(object):
+    """The world's simplest agent"""
+    def __init__(self, action_space):
+        self.action_space = action_space
+        self.time = 0
+        self.repeat = 1
+        self.action_last  = np.zeros(self.action_space.shape[0])
+
+    def act(self, observation, reward=None):
+        if self.time < self.repeat:
+            self.time = self.time + 1
+            return self.action_last
+        else:
+            self.time = 0
+            action = np.zeros(self.action_space.shape[0])
+            #action[np.random.randint(0, len(action))] = 1
+            action = [0] * self.action_space.shape[0]
+            self.action_last = action
+            return action
+
+if __name__ == '__main__':
+    env = HuskySensorEnv(human=True, enable_sensors=True)
+    env.reset()
+    agent = RandomAgent(env.action_space)
+    ob = None
+
+    try:
+        while 1:
+            frame = 0
+            score = 0
+            restart_delay = 0
+            obs = env.reset()
+            while True:
+                time.sleep(0.01)
+                a = agent.act(obs)
+                with Profiler("Agent step function"):
+                    obs, r, done, meta = env.step(a)
+                score += r
+                frame += 1
+
+                if not done and frame < 60: continue
+                if restart_delay==0:
+                    print("score=%0.2f in %i frames" % (score, frame))
+                    restart_delay = 20 * 4  # 2 sec at 60 fps
+                else:
+                    restart_delay -= 1
+                    if restart_delay==0: break
+
+
+    except KeyboardInterrupt:
+        env._end()
+        print("Program finished")
--- a/examples/train/train_humanoid_sensor.py
+++ b/examples/train/train_humanoid_sensor.py
@ -0,0 +1,43 @@
+#add parent dir to find package. Only needed for source code build, pip install doesn't need it.
+import os, inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0,parentdir)
+
+import gym
+from realenv.envs.humanoid_env import HumanoidSensorEnv
+
+from baselines import deepq
+
+import datetime
+
+
+def callback(lcl, glb):
+    # stop training if reward exceeds 199
+    total = sum(lcl['episode_rewards'][-101:-1]) / 100
+    totalt = lcl['t']
+    is_solved = totalt > 2000 and total >= -50
+    return is_solved
+
+
+def main():
+  
+    env = HumanoidSensorEnv(human=True)
+    model = deepq.models.mlp([64])
+    act = deepq.learn(
+        env,
+        q_func=model,
+        lr=1e-3,
+        max_timesteps=10000,
+        buffer_size=50000,
+        exploration_fraction=0.1,
+        exploration_final_eps=0.02,
+        print_freq=10,
+        callback=callback
+    )
+    print("Saving model to humanoid_sensor_model.pkl")
+    act.save("humanoid_sensor_model.pkl")
+
+
+if __name__ == '__main__':
+    main()
--- a/realenv/core/physics/robot_locomotors.py
+++ b/realenv/core/physics/robot_locomotors.py
@ -224,8 +224,22 @@ class Humanoid(WalkerBase):
 class Husky(WalkerBase):
 	foot_list = ['front_left_wheel_link', 'front_right_wheel_link', 'rear_left_wheel_link', 'rear_right_wheel_link']

-	def __init__(self):
+	def __init__(self, is_discrete):
+		self.is_discrete = is_discrete
 		WalkerBase.__init__(self, "husky.urdf", "husky_robot", action_dim=4, obs_dim=20, power=2.5)
+		if self.is_discrete:
+			self.action_space = gym.spaces.Discrete(2 ** action_dim)
+
+	def _step(self, action):
+		if self.is_discrete:
+			realaction = []
+			action_count = action + 1
+			while action_count > 0:
+				realaction.append(int(action_count/2))
+			action = action_count
+		else:
+			realaction = action
+		WalkerBase.step(self, action)

 	def alive_bonus(self, z, pitch):
 		return +1 if z > 0.26 else -1  # 0.25 is central sphere rad, die if it scrapes the ground
--- a/realenv/core/render/README.md
+++ b/realenv/core/render/README.md
@ -8,7 +8,7 @@ Install opencv, pytorch and others
 ```shell
 conda install -c menpo opencv -y
 conda install pytorch torchvision cuda80 -c soumith
-conda install progressbar pillow
+conda install pillow
 pip install zmq
 ```

--- a/realenv/envs/env_modalities.py
+++ b/realenv/envs/env_modalities.py
@ -1,13 +1,12 @@
 from realenv.core.physics.scene_building import SinglePlayerBuildingScene
 from realenv.data.datasets import ViewDataSet3D, get_model_path
 from realenv.core.render.show_3d2 import PCRenderer
-from realenv.core.physics.physics_env import PhysicsEnv
 from realenv.envs.env_bases import MJCFBaseEnv
 import realenv
 from gym import error
 from gym.utils import seeding
 import pybullet as p
-import progressbar
+from tqdm import *
 import subprocess, os, signal
 import numpy as np
 import sys
@ -55,6 +54,7 @@ class SensorRobotEnv(MJCFBaseEnv):
            self.ground_ids = set([(self.building_scene.building_obj, 0)])
            p.configureDebugVisualizer(p.COV_ENABLE_RENDERING,1)
        for i in range (p.getNumBodies()):
+            print(p.getBodyInfo(i)[0].decode(), self.robot_body.get_name())
            if (p.getBodyInfo(i)[0].decode() == self.robot_body.get_name()):
               self.robot_tracking_id=i
        i = 0
@ -263,10 +263,8 @@ class CameraRobotEnv(SensorRobotEnv):
        uuids, rts = self.dataset.get_scene_info(scene_id)

        targets, sources, source_depths, poses = [], [], [], []
-        pbar  = progressbar.ProgressBar(widgets=[
-            ' [ Initializing Environment ] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ',])
        
-        for k,v in pbar(uuids):
+        for k,v in tqdm((uuids)):
            data = self.dataset[v]
            target, target_depth = data[1], data[3]
            if self.scale_up !=1:
@ -303,9 +301,9 @@ class CameraRobotEnv(SensorRobotEnv):
                filename = tb.tb_frame.f_code.co_filename
                name = tb.tb_frame.f_code.co_name
                lineno = tb.tb_lineno
-                print '   File "%.500s", line %d, in %.500s' %(filename, lineno, name)
+                print('   File "%.500s", line %d, in %.500s' %(filename, lineno, name))
                tb = tb.tb_next
-            print ' %s: %s' %(exctype.__name__, value)
+            print(' %s: %s' %(exctype.__name__, value))
        sys.excepthook = camera_multi_excepthook

        dr_path = os.path.join(os.path.dirname(os.path.abspath(realenv.__file__)), 'core', 'channels', 'depth_render')
--- a/realenv/envs/humanoid_env.py
+++ b/realenv/envs/humanoid_env.py
@ -1,5 +1,5 @@
 from realenv.envs.env_modalities import CameraRobotEnv, SensorRobotEnv
-from realenv.core.physics.robot_locomotors import Humanoid, Ant, Husky
+from realenv.core.physics.robot_locomotors import Humanoid
 import gym

 HUMANOID_TIMESTEP  = 1.0/(4 * 22)
--- a/realenv/envs/husky_env.py
+++ b/realenv/envs/husky_env.py
@ -1,18 +1,32 @@
 from realenv.envs.env_modalities import CameraRobotEnv, SensorRobotEnv
 from realenv.core.physics.robot_locomotors import Husky

+HUMANOID_TIMESTEP  = 1.0/(4 * 22)
+HUMANOID_FRAMESKIP = 4

 class HuskyEnv:
-    def __init__(self):
-        self.robot = Husky()
-        PhysicsExtendedEnv.__init__(self, self.robot)
-
+    metadata = {
+        'render.modes' : ['human', 'rgb_array'],
+        'video.frames_per_second' : 30
+    }
+    def __init__(self, is_discrete=False):
+        self.robot = Husky(is_discrete)
+        

 class HuskyCameraEnv(HuskyEnv, CameraRobotEnv):
-    pass
+    def __init__(self, human=True, timestep=HUMANOID_TIMESTEP, 
+        frame_skip=HUMANOID_FRAMESKIP, enable_sensors=False,
+        is_discrete=False):
+        HuskyEnv.__init__(self, is_discrete)
+        CameraRobotEnv.__init__(self, human, timestep=timestep, 
+            frame_skip=frame_skip, enable_sensors=enable_sensors)

 class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
-    pass
+    def __init__(self, human=True, timestep=HUMANOID_TIMESTEP, 
+        frame_skip=HUMANOID_FRAMESKIP, enable_sensors=False,
+        is_discrete=False):
+        HuskyEnv.__init__(self, is_discrete)
+        SensorRobotEnv.__init__(self, human)