add husky flagrun
This commit is contained in:
parent
ba7fb4865c
commit
621f9d1b76
|
@ -0,0 +1,56 @@
|
|||
from __future__ import print_function
|
||||
import time
|
||||
import numpy as np
|
||||
import sys
|
||||
import gym
|
||||
from PIL import Image
|
||||
from realenv.core.render.profiler import Profiler
|
||||
from realenv.envs.husky_env import HuskyFlagRunEnv
|
||||
import pybullet as p
|
||||
|
||||
|
||||
class RandomAgent(object):
|
||||
"""The world's simplest agent"""
|
||||
|
||||
def __init__(self, action_space, is_discrete=False):
|
||||
self.action_space = action_space
|
||||
self.is_discrete = is_discrete
|
||||
|
||||
def act(self, observation, reward=None):
|
||||
if self.is_discrete:
|
||||
action = np.random.randint(self.action_space.n)
|
||||
else:
|
||||
action = np.zeros(self.action_space.shape[0])
|
||||
if (np.random.random() < 0.2):
|
||||
action[np.random.choice(action.shape[0], 1)] = np.random.randint(-1, 2)
|
||||
return action
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = HuskyFlagRunEnv(human=True, timestep=1.0 / (4 * 22), frame_skip=4, enable_sensors=True, is_discrete=True)
|
||||
obs = env.reset()
|
||||
agent = RandomAgent(env.action_space, is_discrete=True)
|
||||
assert (not obs is None)
|
||||
|
||||
while 1:
|
||||
frame = 0
|
||||
score = 0
|
||||
restart_delay = 0
|
||||
obs = env.reset()
|
||||
assert (not obs is None)
|
||||
while True:
|
||||
time.sleep(0.01)
|
||||
a = agent.act(obs)
|
||||
print("action", a)
|
||||
with Profiler("Agent step function"):
|
||||
obs, r, done, meta = env.step(a)
|
||||
score += r
|
||||
frame += 1
|
||||
|
||||
if not done and frame < 60: continue
|
||||
if restart_delay == 0:
|
||||
print("score=%0.2f in %i frames" % (score, frame))
|
||||
restart_delay = 200 * 4 # 2 sec at 60 fps
|
||||
else:
|
||||
restart_delay -= 1
|
||||
if restart_delay == 0: break
|
|
@ -0,0 +1,44 @@
|
|||
# add parent dir to find package. Only needed for source code build, pip install doesn't need it.
|
||||
import os, inspect
|
||||
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import gym
|
||||
from realenv.envs.husky_env import HuskyFlagRunEnv
|
||||
|
||||
from baselines import deepq
|
||||
|
||||
import datetime
|
||||
|
||||
|
||||
def callback(lcl, glb):
|
||||
# stop training if reward exceeds 199
|
||||
total = sum(lcl['episode_rewards'][-101:-1]) / 100
|
||||
totalt = lcl['t']
|
||||
is_solved = totalt > 2000 and total >= -50
|
||||
is_solved = False
|
||||
return is_solved
|
||||
|
||||
|
||||
def main():
|
||||
env = HuskyFlagRunEnv(human=True, is_discrete=True, enable_sensors=True)
|
||||
model = deepq.models.mlp([64])
|
||||
act = deepq.learn(
|
||||
env,
|
||||
q_func=model,
|
||||
lr=1e-3,
|
||||
max_timesteps=100000,
|
||||
buffer_size=50000,
|
||||
exploration_fraction=0.1,
|
||||
exploration_final_eps=0.02,
|
||||
print_freq=10,
|
||||
callback=callback
|
||||
)
|
||||
print("Saving model to humanoid_sensor_model.pkl")
|
||||
act.save("humanoid_sensor_model.pkl")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,6 +1,6 @@
|
|||
## Scene configurations
|
||||
SCENE_TYPE = 'building'
|
||||
#SCENE_TYPE = 'stadium'
|
||||
#SCENE_TYPE = 'building'
|
||||
SCENE_TYPE = 'stadium'
|
||||
|
||||
|
||||
## WORKAROUND (hzyjerry): scaling building instead of agent, this is because
|
||||
|
|
|
@ -171,6 +171,7 @@ class Camera:
|
|||
lookat = [x,y,z]
|
||||
distance = 10
|
||||
yaw = 10
|
||||
p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
|
||||
if MAKE_VIDEO:
|
||||
p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
|
||||
|
||||
|
||||
|
|
|
@ -147,8 +147,8 @@ class SensorRobotEnv(BaseEnv):
|
|||
if self.human:
|
||||
humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id)
|
||||
humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset'])
|
||||
|
||||
p.resetDebugVisualizerCamera(self.tracking_camera['distance'],self.tracking_camera['yaw'], self.tracking_camera['pitch'],humanPos); ## demo: kitchen, living room
|
||||
if MAKE_VIDEO:
|
||||
p.resetDebugVisualizerCamera(self.tracking_camera['distance'],self.tracking_camera['yaw'], self.tracking_camera['pitch'],humanPos); ## demo: kitchen, living room
|
||||
#p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos); ## demo: stairs
|
||||
|
||||
eye_pos = self.robot.eyes.current_position()
|
||||
|
|
|
@ -3,10 +3,15 @@ from realenv.core.physics.robot_locomotors import Husky
|
|||
from transforms3d import quaternions
|
||||
import numpy as np
|
||||
import pybullet as p
|
||||
from realenv.core.physics.scene_stadium import SinglePlayerStadiumScene
|
||||
import pybullet_data
|
||||
|
||||
HUMANOID_TIMESTEP = 1.0/(4 * 22)
|
||||
HUMANOID_FRAMESKIP = 4
|
||||
|
||||
import os
|
||||
from realenv.configs import *
|
||||
|
||||
class HuskyEnv:
|
||||
metadata = {
|
||||
'render.modes' : ['human', 'rgb_array'],
|
||||
|
@ -62,6 +67,8 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
|
|||
obs = SensorRobotEnv._reset(self)
|
||||
self.nframe = 0
|
||||
return obs
|
||||
|
||||
|
||||
def _step(self, a=None):
|
||||
self.nframe += 1
|
||||
|
||||
|
@ -141,8 +148,8 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
|
|||
if self.human:
|
||||
humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id)
|
||||
humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset'])
|
||||
|
||||
p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'],
|
||||
if MAKE_VIDEO:
|
||||
p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'],
|
||||
self.tracking_camera['pitch'], humanPos) ## demo: kitchen, living room
|
||||
# p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos); ## demo: stairs
|
||||
|
||||
|
@ -163,5 +170,165 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
|
|||
self.tracking_camera['yaw'] = yaw ## living roon
|
||||
self.tracking_camera['pitch'] = -10
|
||||
|
||||
self.tracking_camera['distance'] = distance
|
||||
self.tracking_camera['z_offset'] = offset
|
||||
|
||||
|
||||
class HuskyFlagRunEnv(HuskyEnv, SensorRobotEnv):
|
||||
def __init__(self, human=True, timestep=HUMANOID_TIMESTEP,
|
||||
frame_skip=HUMANOID_FRAMESKIP, enable_sensors=False,
|
||||
is_discrete=False):
|
||||
self.human = human
|
||||
self.timestep = timestep
|
||||
self.frame_skip = frame_skip
|
||||
HuskyEnv.__init__(self, is_discrete)
|
||||
SensorRobotEnv.__init__(self)
|
||||
self.nframe = 0
|
||||
assert (isinstance(self.scene, SinglePlayerStadiumScene))
|
||||
self.flag_timeout = 1
|
||||
|
||||
self.visualid = p.createVisualShape(p.GEOM_MESH, fileName=os.path.join(pybullet_data.getDataPath(), 'cube.obj'),
|
||||
meshScale=[0.5, 0.5, 0.5], rgbaColor=[1, 0, 0, 0.7])
|
||||
|
||||
self.lastid = None
|
||||
|
||||
def _reset(self):
|
||||
obs = SensorRobotEnv._reset(self)
|
||||
self.nframe = 0
|
||||
return obs
|
||||
|
||||
def flag_reposition(self):
|
||||
|
||||
self.walk_target_x = self.np_random.uniform(low=-self.scene.stadium_halflen,
|
||||
high=+self.scene.stadium_halflen)
|
||||
self.walk_target_y = self.np_random.uniform(low=-self.scene.stadium_halfwidth,
|
||||
high=+self.scene.stadium_halfwidth)
|
||||
|
||||
more_compact = 0.5 # set to 1.0 whole football field
|
||||
self.walk_target_x *= more_compact
|
||||
self.walk_target_y *= more_compact
|
||||
|
||||
self.flag = None
|
||||
#self.flag = self.scene.cpp_world.debug_sphere(self.walk_target_x, self.walk_target_y, 0.2, 0.2, 0xFF8080)
|
||||
self.flag_timeout = 600 / self.scene.frame_skip
|
||||
#print('targetxy', self.flagid, self.walk_target_x, self.walk_target_y, p.getBasePositionAndOrientation(self.flagid))
|
||||
#p.resetBasePositionAndOrientation(self.flagid, posObj = [self.walk_target_x, self.walk_target_y, 0.5], ornObj = [0,0,0,0])
|
||||
if self.lastid:
|
||||
p.removeBody(self.lastid)
|
||||
|
||||
self.lastid = p.createMultiBody(baseVisualShapeIndex=self.visualid, baseCollisionShapeIndex=-1, basePosition=[self.walk_target_x, self.walk_target_y, 0.5])
|
||||
|
||||
self.robot.walk_target_x = self.walk_target_x
|
||||
self.robot.walk_target_y = self.walk_target_y
|
||||
|
||||
def _step(self, a=None):
|
||||
|
||||
if self.flag_timeout <= 0:
|
||||
self.flag_reposition()
|
||||
|
||||
self.nframe += 1
|
||||
self.flag_timeout -= 1
|
||||
|
||||
# dummy state if a is None
|
||||
if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
|
||||
if not a is None:
|
||||
self.robot.apply_action(a)
|
||||
self.scene.global_step()
|
||||
|
||||
state = self.robot.calc_state() # also calculates self.joints_at_limit
|
||||
|
||||
alive = float(self.robot.alive_bonus(state[0] + self.robot.initial_z, self.robot.body_rpy[
|
||||
1])) # state[0] is body height above ground, body_rpy[1] is pitch
|
||||
|
||||
done = False
|
||||
print(self.nframe)
|
||||
# done = alive < 0
|
||||
if not np.isfinite(state).all():
|
||||
print("~INF~", state)
|
||||
done = True
|
||||
|
||||
potential_old = self.potential
|
||||
self.potential = self.robot.calc_potential()
|
||||
progress = float(self.potential - potential_old)
|
||||
|
||||
feet_collision_cost = 0.0
|
||||
for i, f in enumerate(
|
||||
self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
|
||||
# print(f.contact_list())
|
||||
contact_ids = set((x[2], x[4]) for x in f.contact_list())
|
||||
# print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
|
||||
if (self.ground_ids & contact_ids):
|
||||
# see Issue 63: https://github.com/openai/roboschool/issues/63
|
||||
# feet_collision_cost += self.foot_collision_cost
|
||||
self.robot.feet_contact[i] = 1.0
|
||||
else:
|
||||
self.robot.feet_contact[i] = 0.0
|
||||
# print(self.robot.feet_contact)
|
||||
|
||||
if not a is None:
|
||||
electricity_cost = self.electricity_cost * float(np.abs(
|
||||
a * self.robot.joint_speeds).mean()) # let's assume we have DC motor with controller, and reverse current braking
|
||||
electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
|
||||
else:
|
||||
electricity_cost = 0
|
||||
|
||||
joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
|
||||
debugmode = 0
|
||||
if (debugmode):
|
||||
print("alive=")
|
||||
print(alive)
|
||||
print("progress")
|
||||
print(progress)
|
||||
print("electricity_cost")
|
||||
print(electricity_cost)
|
||||
print("joints_at_limit_cost")
|
||||
print(joints_at_limit_cost)
|
||||
print("feet_collision_cost")
|
||||
print(feet_collision_cost)
|
||||
|
||||
self.rewards = [
|
||||
# alive,
|
||||
progress,
|
||||
# electricity_cost,
|
||||
# joints_at_limit_cost,
|
||||
# feet_collision_cost
|
||||
]
|
||||
|
||||
print(self.robot.walk_target_dist)
|
||||
if (debugmode):
|
||||
print("rewards=")
|
||||
print(self.rewards)
|
||||
print("sum rewards")
|
||||
print(sum(self.rewards))
|
||||
if not a is None:
|
||||
self.HUD(state, a, done)
|
||||
self.reward += sum(self.rewards)
|
||||
|
||||
if self.human:
|
||||
humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id)
|
||||
humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset'])
|
||||
|
||||
if MAKE_VIDEO:
|
||||
p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'],
|
||||
self.tracking_camera['pitch'], humanPos) ## demo: kitchen, living room
|
||||
# p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos); ## demo: stairs
|
||||
|
||||
eye_pos = self.robot.eyes.current_position()
|
||||
x, y, z, w = self.robot.eyes.current_orientation()
|
||||
eye_quat = quaternions.qmult([w, x, y, z], self.robot.eye_offset_orn)
|
||||
print(sum(self.rewards))
|
||||
return state, sum(self.rewards), bool(done), {"eye_pos": eye_pos, "eye_quat": eye_quat}
|
||||
|
||||
# self.tracking_camera['pitch'] = -45 ## stairs
|
||||
yaw = 90 ## demo: living room
|
||||
# yaw = 30 ## demo: kitchen
|
||||
offset = 0.5
|
||||
distance = 0.7 ## living room
|
||||
# self.tracking_camera['yaw'] = 90 ## demo: stairs
|
||||
|
||||
|
||||
self.tracking_camera['yaw'] = yaw ## living roon
|
||||
self.tracking_camera['pitch'] = -10
|
||||
|
||||
self.tracking_camera['distance'] = distance
|
||||
self.tracking_camera['z_offset'] = offset
|
Loading…
Reference in New Issue