some changes to husky

2017-10-29 16:07:49 -07:00 · 2017-10-29 16:07:49 -07:00 · a51339e3be
parent 83dc58947e
commit a51339e3be
2 changed files with 104 additions and 3 deletions
--- a/realenv/core/physics/robot_locomotors.py
+++ b/realenv/core/physics/robot_locomotors.py
@ -231,14 +231,17 @@ class Husky(WalkerBase):
 		self.is_discrete = is_discrete
 		WalkerBase.__init__(self, "husky.urdf", "husky_robot", action_dim=4, obs_dim=20, power=2.5)
 		if self.is_discrete:
-			self.action_space = gym.spaces.Discrete(4)
+			self.action_space = gym.spaces.Discrete(5)
 		## specific offset for husky.urdf
 		#self.eye_offset_orn = euler2quat(np.pi/2, 0, np.pi/2, axes='sxyz')
 		self.eye_offset_orn = euler2quat(np.pi/2, 0, np.pi/2, axes='sxyz')

 		self.torque = 0.1
-		self.action_list = [[self.torque,self.torque,self.torque,self.torque], [-self.torque,-self.torque,-self.torque,-self.torque], [self.torque,-self.torque,self.torque,-self.torque],[-self.torque,self.torque,-self.torque,self.torque]]
-        
+		self.action_list = [[0, 0, 0.9 * self.torque, 0.9 * self.torque],
+							[- 1.5 * self.torque, - 1.5 * self.torque, 0, 0],
+							[self.torque, -self.torque, self.torque, -self.torque],
+							[-self.torque, self.torque, -self.torque, self.torque], [0, 0, 0, 0]]
+
 	def apply_action(self, action):
 		if self.is_discrete:
 			realaction = self.action_list[action]
--- a/realenv/envs/husky_env.py
+++ b/realenv/envs/husky_env.py
@ -1,5 +1,8 @@
 from realenv.envs.env_modalities import CameraRobotEnv, SensorRobotEnv
 from realenv.core.physics.robot_locomotors import Husky
+from transforms3d import quaternions
+import numpy as np
+import pybullet as p

 HUMANOID_TIMESTEP  = 1.0/(4 * 22)
 HUMANOID_FRAMESKIP = 4
@ -37,6 +40,101 @@ class HuskySensorEnv(HuskyEnv, SensorRobotEnv):
        self.frame_skip = frame_skip
        HuskyEnv.__init__(self, is_discrete)
        SensorRobotEnv.__init__(self)
+        self.nframe = 0
+
+    def  _reset(self):
+        obs = SensorRobotEnv._reset(self)
+        self.nframe = 0
+        return obs
+    def _step(self, a=None):
+        self.nframe += 1
+
+        # dummy state if a is None
+        if not self.scene.multiplayer:  # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
+            if not a is None:
+                self.robot.apply_action(a)
+            self.scene.global_step()
+
+        state = self.robot.calc_state()  # also calculates self.joints_at_limit
+
+        alive = float(self.robot.alive_bonus(state[0] + self.robot.initial_z, self.robot.body_rpy[
+            1]))  # state[0] is body height above ground, body_rpy[1] is pitch
+
+        done = self.nframe > 100
+        print(self.nframe)
+        #done = alive < 0
+        if not np.isfinite(state).all():
+            print("~INF~", state)
+            done = True
+
+        potential_old = self.potential
+        self.potential = self.robot.calc_potential()
+        progress = float(self.potential - potential_old)
+
+        feet_collision_cost = 0.0
+        for i, f in enumerate(
+                self.robot.feet):  # TODO: Maybe calculating feet contacts could be done within the robot code
+            # print(f.contact_list())
+            contact_ids = set((x[2], x[4]) for x in f.contact_list())
+            # print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
+            if (self.ground_ids & contact_ids):
+                # see Issue 63: https://github.com/openai/roboschool/issues/63
+                # feet_collision_cost += self.foot_collision_cost
+                self.robot.feet_contact[i] = 1.0
+            else:
+                self.robot.feet_contact[i] = 0.0
+        # print(self.robot.feet_contact)
+
+        if not a is None:
+            electricity_cost = self.electricity_cost * float(np.abs(
+                a * self.robot.joint_speeds).mean())  # let's assume we have DC motor with controller, and reverse current braking
+            electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
+        else:
+            electricity_cost = 0
+
+        joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
+        debugmode = 0
+        if (debugmode):
+            print("alive=")
+            print(alive)
+            print("progress")
+            print(progress)
+            print("electricity_cost")
+            print(electricity_cost)
+            print("joints_at_limit_cost")
+            print(joints_at_limit_cost)
+            print("feet_collision_cost")
+            print(feet_collision_cost)
+
+        self.rewards = [
+            alive,
+            progress,
+            electricity_cost,
+            joints_at_limit_cost,
+            feet_collision_cost
+        ]
+        if (debugmode):
+            print("rewards=")
+            print(self.rewards)
+            print("sum rewards")
+            print(sum(self.rewards))
+        if not a is None:
+            self.HUD(state, a, done)
+        self.reward += sum(self.rewards)
+
+        if self.human:
+            humanPos, humanOrn = p.getBasePositionAndOrientation(self.robot_tracking_id)
+            humanPos = (humanPos[0], humanPos[1], humanPos[2] + self.tracking_camera['z_offset'])
+
+            p.resetDebugVisualizerCamera(self.tracking_camera['distance'], self.tracking_camera['yaw'],
+                                         self.tracking_camera['pitch'], humanPos)  ## demo: kitchen, living room
+            # p.resetDebugVisualizerCamera(distance,yaw,-42,humanPos);        ## demo: stairs
+
+        eye_pos = self.robot.eyes.current_position()
+        x, y, z, w = self.robot.eyes.current_orientation()
+        eye_quat = quaternions.qmult([w, x, y, z], self.robot.eye_offset_orn)
+
+        return state, sum(self.rewards), bool(done), {"eye_pos": eye_pos, "eye_quat": eye_quat}