add husky flagrun

This commit is contained in:
fxia22 2017-11-02 15:04:37 -07:00
parent ba7fb4865c
commit 621f9d1b76
6 changed files with 275 additions and 7 deletions

View File

@ -0,0 +1,56 @@
from __future__ import print_function
import time
import numpy as np
import sys
import gym
from PIL import Image
from realenv.core.render.profiler import Profiler
from realenv.envs.husky_env import HuskyFlagRunEnv
import pybullet as p
class RandomAgent(object):
"""The world's simplest agent"""
def __init__(self, action_space, is_discrete=False):
self.action_space = action_space
self.is_discrete = is_discrete
def act(self, observation, reward=None):
if self.is_discrete:
action = np.random.randint(self.action_space.n)
else:
action = np.zeros(self.action_space.shape[0])
if (np.random.random() < 0.2):
action[np.random.choice(action.shape[0], 1)] = np.random.randint(-1, 2)
return action
if __name__ == '__main__':
env = HuskyFlagRunEnv(human=True, timestep=1.0 / (4 * 22), frame_skip=4, enable_sensors=True, is_discrete=True)
obs = env.reset()
agent = RandomAgent(env.action_space, is_discrete=True)
assert (not obs is None)
while 1:
frame = 0
score = 0
restart_delay = 0
obs = env.reset()
assert (not obs is None)
while True:
time.sleep(0.01)
a = agent.act(obs)
print("action", a)
with Profiler("Agent step function"):
obs, r, done, meta = env.step(a)
score += r
frame += 1
if not done and frame < 60: continue
if restart_delay == 0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 200 * 4 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay == 0: break

View File

@ -0,0 +1,44 @@
# add parent dir to find package. Only needed for source code build, pip install doesn't need it.
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0, parentdir)
import gym
from realenv.envs.husky_env import HuskyFlagRunEnv
from baselines import deepq
import datetime
def callback(lcl, glb):
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
is_solved = totalt > 2000 and total >= -50
is_solved = False
return is_solved
def main():
env = HuskyFlagRunEnv(human=True, is_discrete=True, enable_sensors=True)
model = deepq.models.mlp([64])
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=100000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to humanoid_sensor_model.pkl")
act.save("humanoid_sensor_model.pkl")
if __name__ == '__main__':
main()

View File

@ -1,6 +1,6 @@
## Scene configurations
SCENE_TYPE = 'building'
#SCENE_TYPE = 'stadium'
#SCENE_TYPE = 'building'
SCENE_TYPE = 'stadium'
## WORKAROUND (hzyjerry): scaling building instead of agent, this is because

View File

@ -171,6 +171,7 @@ class Camera:
lookat = [x,y,z]
distance = 10
yaw = 10
p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
if MAKE_VIDEO:
p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)

View File

@ -147,8 +147,8 @@ class SensorRobotEnv(BaseEnv):
if self.human:
humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id)
humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset'])
p.resetDebugVisualizerCamera(self.tracking_camera['distance'],self.tracking_camera['yaw'], self.tracking_camera['pitch'],humanPos); ## demo: kitchen, living room
if MAKE_VIDEO:
p.resetDebugVisualizerCamera(self.tracking_camera['distance'],self.tracking_camera['yaw'], self.tracking_camera['pitch'],humanPos); ## demo: kitchen, living room
#p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos); ## demo: stairs
eye_pos = self.robot.eyes.current_position()

View File

@ -3,10 +3,15 @@ from realenv.core.physics.robot_locomotors import Husky
from transforms3d import quaternions
import numpy as np
import pybullet as p
from realenv.core.physics.scene_stadium import SinglePlayerStadiumScene
import pybullet_data
HUMANOID_TIMESTEP = 1.0/(4 * 22)
HUMANOID_FRAMESKIP = 4
import os
from realenv.configs import *
class HuskyEnv:
metadata = {
'render.modes' : ['human', 'rgb_array'],
@ -62,6 +67,8 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
obs = SensorRobotEnv._reset(self)
self.nframe = 0
return obs
def _step(self, a=None):
self.nframe += 1
@ -141,8 +148,8 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
if self.human:
humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id)
humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset'])
p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'],
if MAKE_VIDEO:
p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'],
self.tracking_camera['pitch'], humanPos) ## demo: kitchen, living room
# p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos); ## demo: stairs
@ -163,5 +170,165 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
self.tracking_camera['yaw'] = yaw ## living roon
self.tracking_camera['pitch'] = -10
self.tracking_camera['distance'] = distance
self.tracking_camera['z_offset'] = offset
class HuskyFlagRunEnv(HuskyEnv, SensorRobotEnv):
def __init__(self, human=True, timestep=HUMANOID_TIMESTEP,
frame_skip=HUMANOID_FRAMESKIP, enable_sensors=False,
is_discrete=False):
self.human = human
self.timestep = timestep
self.frame_skip = frame_skip
HuskyEnv.__init__(self, is_discrete)
SensorRobotEnv.__init__(self)
self.nframe = 0
assert (isinstance(self.scene, SinglePlayerStadiumScene))
self.flag_timeout = 1
self.visualid = p.createVisualShape(p.GEOM_MESH, fileName=os.path.join(pybullet_data.getDataPath(), 'cube.obj'),
meshScale=[0.5, 0.5, 0.5], rgbaColor=[1, 0, 0, 0.7])
self.lastid = None
def _reset(self):
obs = SensorRobotEnv._reset(self)
self.nframe = 0
return obs
def flag_reposition(self):
self.walk_target_x = self.np_random.uniform(low=-self.scene.stadium_halflen,
high=+self.scene.stadium_halflen)
self.walk_target_y = self.np_random.uniform(low=-self.scene.stadium_halfwidth,
high=+self.scene.stadium_halfwidth)
more_compact = 0.5 # set to 1.0 whole football field
self.walk_target_x *= more_compact
self.walk_target_y *= more_compact
self.flag = None
#self.flag = self.scene.cpp_world.debug_sphere(self.walk_target_x, self.walk_target_y, 0.2, 0.2, 0xFF8080)
self.flag_timeout = 600 / self.scene.frame_skip
#print('targetxy', self.flagid, self.walk_target_x, self.walk_target_y, p.getBasePositionAndOrientation(self.flagid))
#p.resetBasePositionAndOrientation(self.flagid, posObj = [self.walk_target_x, self.walk_target_y, 0.5], ornObj = [0,0,0,0])
if self.lastid:
p.removeBody(self.lastid)
self.lastid = p.createMultiBody(baseVisualShapeIndex=self.visualid, baseCollisionShapeIndex=-1, basePosition=[self.walk_target_x, self.walk_target_y, 0.5])
self.robot.walk_target_x = self.walk_target_x
self.robot.walk_target_y = self.walk_target_y
def _step(self, a=None):
if self.flag_timeout <= 0:
self.flag_reposition()
self.nframe += 1
self.flag_timeout -= 1
# dummy state if a is None
if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
if not a is None:
self.robot.apply_action(a)
self.scene.global_step()
state = self.robot.calc_state() # also calculates self.joints_at_limit
alive = float(self.robot.alive_bonus(state[0] + self.robot.initial_z, self.robot.body_rpy[
1])) # state[0] is body height above ground, body_rpy[1] is pitch
done = False
print(self.nframe)
# done = alive < 0
if not np.isfinite(state).all():
print("~INF~", state)
done = True
potential_old = self.potential
self.potential = self.robot.calc_potential()
progress = float(self.potential - potential_old)
feet_collision_cost = 0.0
for i, f in enumerate(
self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
# print(f.contact_list())
contact_ids = set((x[2], x[4]) for x in f.contact_list())
# print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
if (self.ground_ids & contact_ids):
# see Issue 63: https://github.com/openai/roboschool/issues/63
# feet_collision_cost += self.foot_collision_cost
self.robot.feet_contact[i] = 1.0
else:
self.robot.feet_contact[i] = 0.0
# print(self.robot.feet_contact)
if not a is None:
electricity_cost = self.electricity_cost * float(np.abs(
a * self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking
electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
else:
electricity_cost = 0
joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
debugmode = 0
if (debugmode):
print("alive=")
print(alive)
print("progress")
print(progress)
print("electricity_cost")
print(electricity_cost)
print("joints_at_limit_cost")
print(joints_at_limit_cost)
print("feet_collision_cost")
print(feet_collision_cost)
self.rewards = [
# alive,
progress,
# electricity_cost,
# joints_at_limit_cost,
# feet_collision_cost
]
print(self.robot.walk_target_dist)
if (debugmode):
print("rewards=")
print(self.rewards)
print("sum rewards")
print(sum(self.rewards))
if not a is None:
self.HUD(state, a, done)
self.reward += sum(self.rewards)
if self.human:
humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id)
humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset'])
if MAKE_VIDEO:
p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'],
self.tracking_camera['pitch'], humanPos) ## demo: kitchen, living room
# p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos); ## demo: stairs
eye_pos = self.robot.eyes.current_position()
x, y, z, w = self.robot.eyes.current_orientation()
eye_quat = quaternions.qmult([w, x, y, z], self.robot.eye_offset_orn)
print(sum(self.rewards))
return state, sum(self.rewards), bool(done), {"eye_pos": eye_pos, "eye_quat": eye_quat}
# self.tracking_camera['pitch'] = -45 ## stairs
yaw = 90 ## demo: living room
# yaw = 30 ## demo: kitchen
offset = 0.5
distance = 0.7 ## living room
# self.tracking_camera['yaw'] = 90 ## demo: stairs
self.tracking_camera['yaw'] = yaw ## living roon
self.tracking_camera['pitch'] = -10
self.tracking_camera['distance'] = distance
self.tracking_camera['z_offset'] = offset