Husky environment (sensor + camera)

This commit is contained in:
hzyjerry 2017-10-26 12:03:43 -07:00
parent 13b2b0b65b
commit f42ef6d3e9
8 changed files with 210 additions and 16 deletions

View File

@ -0,0 +1,62 @@
from __future__ import print_function
import time
import numpy as np
import sys
import gym
from PIL import Image
from realenv.core.render.profiler import Profiler
from realenv.envs.husky_env import HuskyCameraEnv
import pybullet as p
class RandomAgent(object):
"""The world's simplest agent"""
def __init__(self, action_space):
self.action_space = action_space
self.time = 0
self.repeat = 1
self.action_last = np.zeros(self.action_space.shape[0])
def act(self, observation, reward=None):
if self.time < self.repeat:
self.time = self.time + 1
return self.action_last
else:
self.time = 0
action = np.zeros(self.action_space.shape[0])
action[np.random.choice(action.shape[0], 1)] = np.random.choice(2, 1) - 1
self.action_last = action
return action
if __name__ == '__main__':
env = HuskyCameraEnv(human=True, enable_sensors=True)
env.reset()
agent = RandomAgent(env.action_space)
ob = None
try:
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
while True:
time.sleep(0.01)
a = agent.act(obs)
with Profiler("Agent step function"):
obs, r, done, meta = env.step(a)
score += r
frame += 1
if not done and frame < 60: continue
if restart_delay==0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 20 * 4 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay==0: break
except KeyboardInterrupt:
env._end()
print("Program finished")

View File

@ -0,0 +1,63 @@
from __future__ import print_function
import time
import numpy as np
import sys
import gym
from PIL import Image
from realenv.core.render.profiler import Profiler
from realenv.envs.husky_env import HuskySensorEnv
import pybullet as p
class RandomAgent(object):
"""The world's simplest agent"""
def __init__(self, action_space):
self.action_space = action_space
self.time = 0
self.repeat = 1
self.action_last = np.zeros(self.action_space.shape[0])
def act(self, observation, reward=None):
if self.time < self.repeat:
self.time = self.time + 1
return self.action_last
else:
self.time = 0
action = np.zeros(self.action_space.shape[0])
#action[np.random.randint(0, len(action))] = 1
action = [0] * self.action_space.shape[0]
self.action_last = action
return action
if __name__ == '__main__':
env = HuskySensorEnv(human=True, enable_sensors=True)
env.reset()
agent = RandomAgent(env.action_space)
ob = None
try:
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
while True:
time.sleep(0.01)
a = agent.act(obs)
with Profiler("Agent step function"):
obs, r, done, meta = env.step(a)
score += r
frame += 1
if not done and frame < 60: continue
if restart_delay==0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 20 * 4 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay==0: break
except KeyboardInterrupt:
env._end()
print("Program finished")

View File

@ -0,0 +1,43 @@
#add parent dir to find package. Only needed for source code build, pip install doesn't need it.
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0,parentdir)
import gym
from realenv.envs.humanoid_env import HumanoidSensorEnv
from baselines import deepq
import datetime
def callback(lcl, glb):
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
is_solved = totalt > 2000 and total >= -50
return is_solved
def main():
env = HumanoidSensorEnv(human=True)
model = deepq.models.mlp([64])
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=10000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to humanoid_sensor_model.pkl")
act.save("humanoid_sensor_model.pkl")
if __name__ == '__main__':
main()

View File

@ -224,8 +224,22 @@ class Humanoid(WalkerBase):
class Husky(WalkerBase):
foot_list = ['front_left_wheel_link', 'front_right_wheel_link', 'rear_left_wheel_link', 'rear_right_wheel_link']
def __init__(self):
def __init__(self, is_discrete):
self.is_discrete = is_discrete
WalkerBase.__init__(self, "husky.urdf", "husky_robot", action_dim=4, obs_dim=20, power=2.5)
if self.is_discrete:
self.action_space = gym.spaces.Discrete(2 ** action_dim)
def _step(self, action):
if self.is_discrete:
realaction = []
action_count = action + 1
while action_count > 0:
realaction.append(int(action_count/2))
action = action_count
else:
realaction = action
WalkerBase.step(self, action)
def alive_bonus(self, z, pitch):
return +1 if z > 0.26 else -1 # 0.25 is central sphere rad, die if it scrapes the ground

View File

@ -8,7 +8,7 @@ Install opencv, pytorch and others
```shell
conda install -c menpo opencv -y
conda install pytorch torchvision cuda80 -c soumith
conda install progressbar pillow
conda install pillow
pip install zmq
```

View File

@ -1,13 +1,12 @@
from realenv.core.physics.scene_building import SinglePlayerBuildingScene
from realenv.data.datasets import ViewDataSet3D, get_model_path
from realenv.core.render.show_3d2 import PCRenderer
from realenv.core.physics.physics_env import PhysicsEnv
from realenv.envs.env_bases import MJCFBaseEnv
import realenv
from gym import error
from gym.utils import seeding
import pybullet as p
import progressbar
from tqdm import *
import subprocess, os, signal
import numpy as np
import sys
@ -55,6 +54,7 @@ class SensorRobotEnv(MJCFBaseEnv):
self.ground_ids = set([(self.building_scene.building_obj, 0)])
p.configureDebugVisualizer(p.COV_ENABLE_RENDERING,1)
for i in range (p.getNumBodies()):
print(p.getBodyInfo(i)[0].decode(), self.robot_body.get_name())
if (p.getBodyInfo(i)[0].decode() == self.robot_body.get_name()):
self.robot_tracking_id=i
i = 0
@ -263,10 +263,8 @@ class CameraRobotEnv(SensorRobotEnv):
uuids, rts = self.dataset.get_scene_info(scene_id)
targets, sources, source_depths, poses = [], [], [], []
pbar = progressbar.ProgressBar(widgets=[
' [ Initializing Environment ] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ',])
for k,v in pbar(uuids):
for k,v in tqdm((uuids)):
data = self.dataset[v]
target, target_depth = data[1], data[3]
if self.scale_up !=1:
@ -303,9 +301,9 @@ class CameraRobotEnv(SensorRobotEnv):
filename = tb.tb_frame.f_code.co_filename
name = tb.tb_frame.f_code.co_name
lineno = tb.tb_lineno
print ' File "%.500s", line %d, in %.500s' %(filename, lineno, name)
print(' File "%.500s", line %d, in %.500s' %(filename, lineno, name))
tb = tb.tb_next
print ' %s: %s' %(exctype.__name__, value)
print(' %s: %s' %(exctype.__name__, value))
sys.excepthook = camera_multi_excepthook
dr_path = os.path.join(os.path.dirname(os.path.abspath(realenv.__file__)), 'core', 'channels', 'depth_render')

View File

@ -1,5 +1,5 @@
from realenv.envs.env_modalities import CameraRobotEnv, SensorRobotEnv
from realenv.core.physics.robot_locomotors import Humanoid, Ant, Husky
from realenv.core.physics.robot_locomotors import Humanoid
import gym
HUMANOID_TIMESTEP = 1.0/(4 * 22)

View File

@ -1,18 +1,32 @@
from realenv.envs.env_modalities import CameraRobotEnv, SensorRobotEnv
from realenv.core.physics.robot_locomotors import Husky
HUMANOID_TIMESTEP = 1.0/(4 * 22)
HUMANOID_FRAMESKIP = 4
class HuskyEnv:
def __init__(self):
self.robot = Husky()
PhysicsExtendedEnv.__init__(self, self.robot)
metadata = {
'render.modes' : ['human', 'rgb_array'],
'video.frames_per_second' : 30
}
def __init__(self, is_discrete=False):
self.robot = Husky(is_discrete)
class HuskyCameraEnv(HuskyEnv, CameraRobotEnv):
pass
def __init__(self, human=True, timestep=HUMANOID_TIMESTEP,
frame_skip=HUMANOID_FRAMESKIP, enable_sensors=False,
is_discrete=False):
HuskyEnv.__init__(self, is_discrete)
CameraRobotEnv.__init__(self, human, timestep=timestep,
frame_skip=frame_skip, enable_sensors=enable_sensors)
class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
pass
def __init__(self, human=True, timestep=HUMANOID_TIMESTEP,
frame_skip=HUMANOID_FRAMESKIP, enable_sensors=False,
is_discrete=False):
HuskyEnv.__init__(self, is_discrete)
SensorRobotEnv.__init__(self, human)