0

我正在使用 rl-baselines-zoo 3 在 colab 上使用我的自定义环境运行 ddpg。在我在那个动物园仓库中使用显示视频功能后,它说它无法连接到服务器。它在其他内置环境上运行良好,所以我想这是我的环境问题。拜托,需要一些帮助...我从动物园的教程中设置了所有东西

追溯:

pybullet build time: Jul 12 2021 20:46:20
/usr/local/lib/python3.7/dist-packages/gym/logger.py:30: UserWarning:

WARN: Box bound precision lowered by casting to float32

startThreads creating 1 threads.
starting thread 0
started thread 0 
argc=2
argv[0] = --unused
argv[1] = --start_demo_name=Physics Server
ExampleBrowserThreadFunc started
X11 functions dynamically loaded using dlopen/dlsym OK!
X11 functions dynamically loaded using dlopen/dlsym OK!
Creating context
Created GL 3.3 context
Direct GLX rendering context obtained
Making context current
GL_VENDOR=VMware, Inc.
GL_RENDERER=llvmpipe (LLVM 10.0.0, 256 bits)
GL_VERSION=3.3 (Core Profile) Mesa 20.0.8
GL_SHADING_LANGUAGE_VERSION=3.30
pthread_getconcurrency()=0
Version = 3.3 (Core Profile) Mesa 20.0.8
Vendor = VMware, Inc.
Renderer = llvmpipe (LLVM 10.0.0, 256 bits)
b3Printf: Selected demo: Physics Server
startThreads creating 1 threads.
starting thread 0
started thread 0 
MotionThreadFunc thread started
ven = VMware, Inc.
ven = VMware, Inc.
Wrapping the env in a VecTransposeImage.
tcmalloc: large alloc 3276800000 bytes == 0x556b03bda000 @  0x7f7cad04a001 0x7f7caa3f554f 0x7f7caa445b58 0x7f7caa449b17 0x7f7caa4e8203 0x556a81194d54 0x556a81194a50 0x556a81209105 0x556a812037ad 0x556a81196c9f 0x556a811d7d79 0x556a811d4cc4 0x556a81196ea1 0x556a81205bb5 0x556a8119630a 0x556a812087f0 0x556a812037ad 0x556a811963ea 0x556a8120460e 0x556a812034ae 0x556a811963ea 0x556a8120532a 0x556a812034ae 0x556a812031b3 0x556a81201660 0x556a81194b59 0x556a81194a50 0x556a81208453 0x556a812034ae 0x556a811963ea 0x556a812043b5
tcmalloc: large alloc 3276800000 bytes == 0x556bc78da000 @  0x7f7cad04a001 0x7f7caa3f554f 0x7f7caa445b58 0x7f7caa449b17 0x7f7caa4e8203 0x556a81194d54 0x556a81194a50 0x556a81209105 0x556a812037ad 0x556a81196c9f 0x556a811d7d79 0x556a811d4cc4 0x556a81196ea1 0x556a81205bb5 0x556a8119630a 0x556a812087f0 0x556a812037ad 0x556a811963ea 0x556a8120460e 0x556a812034ae 0x556a811963ea 0x556a8120532a 0x556a812034ae 0x556a812031b3 0x556a81201660 0x556a81194b59 0x556a81194a50 0x556a81208453 0x556a812034ae 0x556a811963ea 0x556a812043b5
/content/gdrive/My Drive/hsr/rl-baselines3-zoo/logs/ddpg/FoodHuntingHSR-v0_3/videos/final-model-ddpg-FoodHuntingHSR-v0-step-0-to-step-200.mp4
/usr/local/lib/python3.7/dist-packages/gym/logger.py:30: UserWarning:

WARN: Tried to pass invalid video frame, marking as broken: Your frame has data type float32, but we require uint8 (i.e. RGB values from 0-255).

Saving video to /content/gdrive/My Drive/hsr/rl-baselines3-zoo/logs/ddpg/FoodHuntingHSR-v0_3/videos/final-model-ddpg-FoodHuntingHSR-v0-step-0-to-step-200.mp4
numActiveThreads = 0
stopping threads
destroy semaphore
semaphore destroyed
Thread with taskId 0 exiting
Thread TERMINATED
destroy main semaphore
main semaphore destroyed
finished
numActiveThreads = 0
btShutDownExampleBrowser stopping threads
Thread with taskId 0 exiting
Thread TERMINATED
destroy semaphore
semaphore destroyed
destroy main semaphore
main semaphore destroyed
Exception ignored in: <function VecVideoRecorder.__del__ at 0x7f7c2b5cc200>
Traceback (most recent call last):
  File "/content/gdrive/My Drive/hsr/stable-baselines3/stable_baselines3/common/vec_env/vec_video_recorder.py", line 114, in __del__
  File "/content/gdrive/My Drive/hsr/stable-baselines3/stable_baselines3/common/vec_env/vec_video_recorder.py", line 110, in close
  File "/content/gdrive/My Drive/hsr/stable-baselines3/stable_baselines3/common/vec_env/base_vec_env.py", line 278, in close
  File "/content/gdrive/My Drive/hsr/stable-baselines3/stable_baselines3/common/vec_env/dummy_vec_env.py", line 67, in close
  File "/content/gdrive/My Drive/hsr/stable-baselines3/stable_baselines3/common/monitor.py", line 113, in close
  File "/usr/local/lib/python3.7/dist-packages/gym/core.py", line 243, in close
  File "/usr/local/lib/python3.7/dist-packages/gym/core.py", line 243, in close
  File "/content/gdrive/My Drive/hsr/PyLIS/gym-foodhunting/gym_foodhunting/foodhunting/gym_foodhunting.py", line 538, in close
pybullet.error: Not connected to physics server
class FoodHuntingEnv(gym.Env):
    metadata = {'render.modes': ['human','rgb_array']}

    GRAVITY = -10.0
    BULLET_STEPS = 120 # p.setTimeStep(1.0 / 240.0), so 1 gym step == 0.5 sec.

    def __init__(self, render=False, robot_model=R2D2, max_steps=500, num_foods=3, num_fakes=0, object_size=1.0, object_radius_scale=1.0, object_radius_offset=1.0, object_angle_scale=1.0):
        """Initialize environment.
        """
        ### gym variables
        self.observation_space = robot_model.getObservationSpace() # classmethod
        self.action_space = robot_model.getActionSpace() # classmethod
        self.reward_range = (-1.0, 1.0)
        self.seed()
        ### pybullet settings
        self.ifrender = render
        self.physicsClient = p.connect(p.GUI if self.ifrender else p.DIRECT)
        
      
        p.setAdditionalSearchPath(pybullet_data.getDataPath())
        ### env variables
        self.robot_model = robot_model
        self.max_steps = max_steps
        self.num_foods = num_foods
        self.num_fakes = num_fakes
        self.object_size = object_size
        self.object_radius_scale = object_radius_scale
        self.object_radius_offset = object_radius_offset
        self.object_angle_scale = object_angle_scale
        self.plane_id = None
        self.robot = None
        self.object_ids = []
        ### episode variables
        self.steps = 0
        self.episode_rewards = 0.0

    def close(self):
        """Close environment.
        """
        p.disconnect(self.physicsClient)
    
    
    def reset(self):
        """Reset environment.
        """
        self.steps = 0
        self.episode_rewards = 0
        p.resetSimulation()
        # p.setTimeStep(1.0 / 240.0)
        p.setGravity(0, 0, self.GRAVITY)
        self.plane_id = p.loadURDF('plane.urdf')
        self.robot = self.robot_model()
        self.object_ids = []
        for i, (pos, orn) in enumerate(self._generateObjectPositions(num=(self.num_foods+self.num_fakes), radius_scale=self.object_radius_scale, radius_offset=self.object_radius_offset, angle_scale=self.object_angle_scale)):
            if i < self.num_foods:
                urdfPath = 'food_sphere.urdf'
            else:
                urdfPath = 'food_cube.urdf'
            object_id = p.loadURDF(urdfPath, pos, orn, globalScaling=self.object_size)
            self.object_ids.append(object_id)
        for i in range(self.BULLET_STEPS):
            p.stepSimulation()
        obs = self._getObservation()
        #print('reset laile')
        #self.robot.printAllJointInfo()
        return obs

    def step(self, action):
        """Apply action to environment, then return observation and reward.
        """
        self.steps += 1
        self.robot.setAction(action)
        reward = -1.0 * float(self.num_foods) / float(self.max_steps) # so agent needs to eat foods quickly
        for i in range(self.BULLET_STEPS):
            p.stepSimulation()
            reward += self._getReward()
        self.episode_rewards += reward
        obs = self._getObservation()
        done = self._isDone()
        pos, orn = self.robot.getPositionAndOrientation()
        info = { 'steps': self.steps, 'pos': pos, 'orn': orn }
        if done:
            #print('Done laile')
            info['episode'] = { 'r': self.episode_rewards, 'l': self.steps }
            # print(self.episode_rewards, self.steps)
        #print(self.robot.getBaseRollPosition(), self.robot.getTorsoLiftPosition(), self.robot.getHeadPosition(), self.robot.getArmPosition(), self.robot.getWristPosition(), self.robot.getGripperPosition()) # for HSR debug
        #print(self.robot.getHeadPosition(), self.robot.getGripperPosition()) # for R2D2 debug
        return obs, reward, done, info

    def render(self, mode='human', close=False):
        """This is a dummy function. This environment cannot control rendering timing.
        """
        if mode != 'rgb_array':
          return np.array([])
        return self._getObservation()
        

    def seed(self, seed=None):
        """Set random seed.
        """
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _getReward(self):
        """Detect contact points and return reward.
        """
        reward = 0
        contacted_object_ids = [ object_id for object_id in self.object_ids if self.robot.isContact(object_id) ]
        for object_id in contacted_object_ids:
            reward += 1 if self._isFood(object_id) else -1
            p.removeBody(object_id)
            self.object_ids.remove(object_id)
        return reward

    def _getObservation(self):
        """Get observation.
        """
        obs = self.robot.getObservation()
        return obs

    def _isFood(self, object_id):
        """Check if object_id is a food.
        """
        baseLink, urdfPath = p.getBodyInfo(object_id)
        return urdfPath == b'food_sphere.urdf' # otherwise, fake

    def _isDone(self):
        """Check if episode is done.
        """
        #print(self.object_ids,'self')
        available_object_ids = [ object_id for object_id in self.object_ids if self._isFood(object_id) ]
        #print(available_object_ids)
        return self.steps >= self.max_steps or len(available_object_ids) <= 0

    def _generateObjectPositions(self, num=1, retry=100, radius_scale=1.0, radius_offset=1.0, angle_scale=1.0, angle_offset=0.5*np.pi, z=1.5, near_distance=0.5):
        """Generate food positions randomly.
        """
        def genPos():
            r = radius_scale * self.np_random.rand() + radius_offset
            a = -np.pi * angle_scale + angle_offset
            b =  np.pi * angle_scale + angle_offset
            ang = (b - a) * self.np_random.rand() + a
            return np.array([r * np.sin(ang), r * np.cos(ang), z])
        def isNear(pos, poss):
            for p, o in poss:
                if np.linalg.norm(p - pos) < near_distance:
                    return True
            return False
        def genPosRetry(poss):
            for i in range(retry):
                pos = genPos()
                if not isNear(pos, poss):
                    return pos
            return genPos()
        poss = []
        for i in range(num):
            pos = genPosRetry(poss)
            orn = p.getQuaternionFromEuler([0.0, 0.0, 2.0*np.pi*self.np_random.rand()])
            poss.append((pos, orn))
        return poss
4

0 回答 0