git reimport

This commit is contained in:
2019-03-15 13:54:11 +04:00
commit 0c6baed398
232 changed files with 14071 additions and 0 deletions

14
Pipfile Normal file
View File

@@ -0,0 +1,14 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
numpy = "*"
keras = "*"
keras-rl = "*"
[dev-packages]
[requires]
python_version = "3.7"

175
Pipfile.lock generated Normal file
View File

@@ -0,0 +1,175 @@
{
"_meta": {
"hash": {
"sha256": "a227a69980d84f05f5a653bcc6f1cc461e95f114074a0baae20d457cc84e7ebc"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.7"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"h5py": {
"hashes": [
"sha256:0f8cd2acbacf3177b4427ed42639c911667b1f24d923388ab1f8ad466a12be5e",
"sha256:11277e3879098f921ee9e29105b20591e1dfdd44963357399f2abaa1a280c560",
"sha256:1241dec0c94ac32f3285cac1d6f44beabf80423e422ab03bd2686d731a8a9294",
"sha256:17b8187de0b3a945d8e8d031e7eb6ece2fce90791f9c5fde36f4396bf38fdde1",
"sha256:2f30007d0796788a454c1293262f19f25e6428317d3d386f78138fba2a44e37d",
"sha256:308e0758587ee16d4e73e7f2f8aae8351091e343bf0a43d2f697f9535465c816",
"sha256:37cacddf0e8209905f52537a8cf71da0dd9a4de62bd79247274c97b24a408997",
"sha256:38a23bb599748adf23d77f74885c0de6f4a7d9baa42f74e476bbf90fba2b47dd",
"sha256:47ab18b7b7bbc36fd2b606289b703b6f0ee915b923d6ad94dd17ac80ebffc280",
"sha256:486c78330af0bf33f5077b51d1888c0739c3cd1a03d5aade0d48572b3b5690ca",
"sha256:4e2183458d6ef1ae87dfb5d6acd0786359336cd9ac0ece6396c09b59fdaa3bd6",
"sha256:51d0595c3e58814c831f6cd2b664a5bf9590e26262c1d541b380d041e4fcb3c0",
"sha256:56d259d56822b70881760b243957f04a0cf133f0ec65eae6a33f562826aee899",
"sha256:5e6e777653169a3cc24ea56bb3d8c845ea391f8914c35bb6f350b0753a52891c",
"sha256:62bfb0ebb0f59e5dccc0b0dbbc0fc40dd1d1e09d04c0dc71f89790231531d4a2",
"sha256:67d89b64debfa021b54aa6f24bbf008403bd144748a0148596b518bce80d2fc4",
"sha256:6bf38571f555fa214493ec6349d29024cc5f313bf1715b09f236c553fd22ae4d",
"sha256:9214ca445c18a37bfe9c165982c0e317e2f21f035c8d635d1c6d9fcbaf35b7a8",
"sha256:ab0c52850428d2e86029935389379c2c97f752e76b616da851deec8a4484f8ec",
"sha256:b2eff336697d8dfd712c5d93fef9f4e4d3e97d9d8c258801836b8664a239e07a",
"sha256:bb33fabc0b8f3fe3bb0f8d6821b2fad5b2a64c27a0808e8d1c5c1e3362062064",
"sha256:bd5353ab342bae1262b04745934cc1565df4cbc8d6a979a0c98f42209bd5c265",
"sha256:bd73444efd1ac06dac27b8405bbe8791a02fd1bc8a2fa0e575257f90b7b57467",
"sha256:bd932236a2ef91a75fee5d7f4ace80ab494c5a59cd092a67c9785ddb7fdc218c",
"sha256:c45650de228ace7731e4280e14fb687f6d5c29cd666c5b22b42492b035e994d6",
"sha256:d5c0c01da45f901a3d429e7ef9e7e22baa869e1affb8715f1bf94e6a30020740",
"sha256:d75035db5bde802a29f4f29f18bb7548863d29ac90ccbf2c04c11799bbbba2c3",
"sha256:dda88206dc9464923f27f601000bc5b152ac0bd6d0122f098d4f239150a70076",
"sha256:e1c2ac5d0aa232c0f60fecc6bd1122346885086a176f939b91058c4c980cc226",
"sha256:e626c65a8587921ebc7fb8d31a49addfdd0b9a9aa96315ea484c09803337b955"
],
"version": "==2.8.0"
},
"keras": {
"hashes": [
"sha256:794d0c92c6c4122f1f0fcf3a7bc2f49054c6a54ddbef8d8ffafca62795d760b6",
"sha256:90b610a3dbbf6d257b20a079eba3fdf2eed2158f64066a7c6f7227023fd60bc9"
],
"index": "pypi",
"version": "==2.2.4"
},
"keras-applications": {
"hashes": [
"sha256:721dda4fa4e043e5bbd6f52a2996885c4639a7130ae478059b3798d0706f5ae7",
"sha256:a03af60ddc9c5afdae4d5c9a8dd4ca857550e0b793733a5072e0725829b87017"
],
"version": "==1.0.6"
},
"keras-preprocessing": {
"hashes": [
"sha256:90d04c1750bccceef88ac09475c291b4b5f6aa1eaf0603167061b1aa8b043c61",
"sha256:ef2e482c4336fcf7180244d06f4374939099daa3183816e82aee7755af35b754"
],
"version": "==1.0.5"
},
"keras-rl": {
"hashes": [
"sha256:7bbbb24c8f4560a03f59fb062a5003da102de033bc8cd7d06b69b4c1b48ec054"
],
"index": "pypi",
"version": "==0.4.2"
},
"numpy": {
"hashes": [
"sha256:032df9b6571c5f1d41ea6f6a189223208cb488990373aa686aca55570fcccb42",
"sha256:094f8a83e5bd0a44a7557fa24a46db6ba7d5299c389ddbc9e0e18722f567fb63",
"sha256:1c0c80e74759fa4942298044274f2c11b08c86230b25b8b819e55e644f5ff2b6",
"sha256:2aa0910eaeb603b1a5598193cc3bc8eacf1baf6c95cbc3955eb8e15fa380c133",
"sha256:2f5ebc7a04885c7d69e5daa05208faef4db7f1ae6a99f4d36962df8cd54cdc76",
"sha256:32a07241cb624e104b88b08dea2851bf4ec5d65a1f599d7735041ced7171fd7a",
"sha256:3c7959f750b54b445f14962a3ddc41b9eadbab00b86da55fbb1967b2b79aad10",
"sha256:3d8f9273c763a139a99e65c2a3c10f1109df30bedae7f011b10d95c538364704",
"sha256:63bca71691339d2d6f8a7c970821f2b12098a53afccc0190d4e1555e75e5223a",
"sha256:7ae9c3baff3b989859c88e0168ad10902118595b996bf781eaf011bb72428798",
"sha256:866a7c8774ccc7d603667fad95456b4cf56d79a2bb5a7648ac9f0082e0b9416e",
"sha256:8bc4b92a273659e44ca3f3a2f8786cfa39d8302223bcfe7df794429c63d5f5a1",
"sha256:919f65e0732195474897b1cafefb4d4e7c2bb8174a725e506b62e9096e4df28d",
"sha256:9d1598573d310104acb90377f0a8c2319f737084689f5eb18012becaf345cda5",
"sha256:9fff90c88bfaad2901be50453d5cd7897a826c1d901f0654ee1d73ab3a48cd18",
"sha256:a245464ddf6d90e2d6287e9cef6bcfda2a99467fdcf1b677b99cd0b6c7b43de2",
"sha256:a988db28f54e104a01e8573ceb6f28202b4c15635b1450b2e3b2b822c6564f9b",
"sha256:b12fe6f31babb9477aa0f9692730654b3ee0e71f33b4568170dfafd439caf0a2",
"sha256:b7599ff4acd23f5de983e3aec772153b1043e131487a5c6ad0f94b41a828877a",
"sha256:c9f4dafd6065c4c782be84cd67ceeb9b1d4380af60a7af32be10ebecd723385e",
"sha256:ce3622b73ccd844ba301c1aea65d36cf9d8331e7c25c16b1725d0f14db99aaf4",
"sha256:d0f36a24cf8061a2c03e151be3418146717505b9b4ec17502fa3bbdb04ec1431",
"sha256:d263f8f14f2da0c079c0297e829e550d8f2c4e0ffef215506bd1d0ddd2bff3de",
"sha256:d8837ff272800668aabdfe70b966631914b0d6513aed4fc1b1428446f771834d",
"sha256:ef694fe72a3995aa778a5095bda946e0d31f7efabd5e8063ad8c6238ab7d3f78",
"sha256:f1fd1a6f40a501ba4035f5ed2c1f4faa68245d1407bf97d2ee401e4f23d1720b",
"sha256:fa337b6bd5fe2b8c4e705f4102186feb9985de9bb8536d32d5129a658f1789e0",
"sha256:febd31cd0d2fd2509ca2ec53cb339f8bf593c1bd245b9fc55c1917a68532a0af"
],
"index": "pypi",
"version": "==1.15.3"
},
"pyyaml": {
"hashes": [
"sha256:3d7da3009c0f3e783b2c873687652d83b1bbfd5c88e9813fb7e5b03c0dd3108b",
"sha256:3ef3092145e9b70e3ddd2c7ad59bdd0252a94dfe3949721633e41344de00a6bf",
"sha256:40c71b8e076d0550b2e6380bada1f1cd1017b882f7e16f09a65be98e017f211a",
"sha256:558dd60b890ba8fd982e05941927a3911dc409a63dcb8b634feaa0cda69330d3",
"sha256:a7c28b45d9f99102fa092bb213aa12e0aaf9a6a1f5e395d36166639c1f96c3a1",
"sha256:aa7dd4a6a427aed7df6fb7f08a580d68d9b118d90310374716ae90b710280af1",
"sha256:bc558586e6045763782014934bfaf39d48b8ae85a2713117d16c39864085c613",
"sha256:d46d7982b62e0729ad0175a9bc7e10a566fc07b224d2c79fafb5e032727eaa04",
"sha256:d5eef459e30b09f5a098b9cea68bebfeb268697f78d647bd255a085371ac7f3f",
"sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537",
"sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531"
],
"version": "==3.13"
},
"scipy": {
"hashes": [
"sha256:0611ee97296265af4a21164a5323f8c1b4e8e15c582d3dfa7610825900136bb7",
"sha256:08237eda23fd8e4e54838258b124f1cd141379a5f281b0a234ca99b38918c07a",
"sha256:0e645dbfc03f279e1946cf07c9c754c2a1859cb4a41c5f70b25f6b3a586b6dbd",
"sha256:0e9bb7efe5f051ea7212555b290e784b82f21ffd0f655405ac4f87e288b730b3",
"sha256:108c16640849e5827e7d51023efb3bd79244098c3f21e4897a1007720cb7ce37",
"sha256:340ef70f5b0f4e2b4b43c8c8061165911bc6b2ad16f8de85d9774545e2c47463",
"sha256:3ad73dfc6f82e494195144bd3a129c7241e761179b7cb5c07b9a0ede99c686f3",
"sha256:3b243c77a822cd034dad53058d7c2abf80062aa6f4a32e9799c95d6391558631",
"sha256:404a00314e85eca9d46b80929571b938e97a143b4f2ddc2b2b3c91a4c4ead9c5",
"sha256:423b3ff76957d29d1cce1bc0d62ebaf9a3fdfaf62344e3fdec14619bb7b5ad3a",
"sha256:42d9149a2fff7affdd352d157fa5717033767857c11bd55aa4a519a44343dfef",
"sha256:625f25a6b7d795e8830cb70439453c9f163e6870e710ec99eba5722775b318f3",
"sha256:698c6409da58686f2df3d6f815491fd5b4c2de6817a45379517c92366eea208f",
"sha256:729f8f8363d32cebcb946de278324ab43d28096f36593be6281ca1ee86ce6559",
"sha256:8190770146a4c8ed5d330d5b5ad1c76251c63349d25c96b3094875b930c44692",
"sha256:878352408424dffaa695ffedf2f9f92844e116686923ed9aa8626fc30d32cfd1",
"sha256:8b984f0821577d889f3c7ca8445564175fb4ac7c7f9659b7c60bef95b2b70e76",
"sha256:8f841bbc21d3dad2111a94c490fb0a591b8612ffea86b8e5571746ae76a3deac",
"sha256:c22b27371b3866c92796e5d7907e914f0e58a36d3222c5d436ddd3f0e354227a",
"sha256:d0cdd5658b49a722783b8b4f61a6f1f9c75042d0e29a30ccb6cacc9b25f6d9e2",
"sha256:d40dc7f494b06dcee0d303e51a00451b2da6119acbeaccf8369f2d29e28917ac",
"sha256:d8491d4784aceb1f100ddb8e31239c54e4afab8d607928a9f7ef2469ec35ae01",
"sha256:dfc5080c38dde3f43d8fbb9c0539a7839683475226cf83e4b24363b227dfe552",
"sha256:e24e22c8d98d3c704bb3410bce9b69e122a8de487ad3dbfe9985d154e5c03a40",
"sha256:e7a01e53163818d56eabddcafdc2090e9daba178aad05516b20c6591c4811020",
"sha256:ee677635393414930541a096fc8e61634304bb0153e4e02b75685b11eba14cae",
"sha256:f0521af1b722265d824d6ad055acfe9bd3341765735c44b5a4d0069e189a0f40",
"sha256:f25c281f12c0da726c6ed00535ca5d1622ec755c30a3f8eafef26cf43fede694"
],
"version": "==1.1.0"
},
"six": {
"hashes": [
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
],
"version": "==1.11.0"
}
},
"develop": {}
}

1
libs/gym Submodule

Submodule libs/gym added at e944885e3b

169
pilesos2.log Normal file
View File

@@ -0,0 +1,169 @@
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.LEFT
Action.LEFT
Action.FORWARD
Action.SUCK
Action.LEFT
Action.FORWARD
Action.LEFT
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.SUCK
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.LEFT
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.LEFT
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.LEFT
Action.LEFT
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.LEFT
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.SUCK
Action.RIGHT
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.LEFT
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.FORWARD
Action.RIGHT
Action.FORWARD
Action.FORWARD
Action.DIE

0
src/__init__.py Normal file
View File

14
src/gym/__init__.py Normal file
View File

@@ -0,0 +1,14 @@
import distutils.version
import os
import sys
import warnings
from gym import error
from gym.utils import reraise
from gym.version import VERSION as __version__
from gym.core import Env, GoalEnv, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
from gym.envs import make, spec
from gym import logger
__all__ = ["Env", "Space", "Wrapper", "make", "spec"]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

343
src/gym/core.py Normal file
View File

@@ -0,0 +1,343 @@
from gym import logger
import gym
from gym import error
from gym.utils import closer
env_closer = closer.Closer()
# Env-related abstractions
class Env(object):
"""The main OpenAI Gym class. It encapsulates an environment with
arbitrary behind-the-scenes dynamics. An environment can be
partially or fully observed.
The main API methods that users of this class need to know are:
step
reset
render
close
seed
And set the following attributes:
action_space: The Space object corresponding to valid actions
observation_space: The Space object corresponding to valid observations
reward_range: A tuple corresponding to the min and max possible rewards
Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
The methods are accessed publicly as "step", "reset", etc.. The
non-underscored versions are wrapper methods to which we may add
functionality over time.
"""
# Set this in SOME subclasses
metadata = {'render.modes': []}
reward_range = (-float('inf'), float('inf'))
spec = None
# Set these in ALL subclasses
action_space = None
observation_space = None
def step(self, action):
"""Run one timestep of the environment's dynamics. When end of
episode is reached, you are responsible for calling `reset()`
to reset this environment's state.
Accepts an action and returns a tuple (observation, reward, done, info).
Args:
action (object): an action provided by the environment
Returns:
observation (object): agent's observation of the current environment
reward (float) : amount of reward returned after previous action
done (boolean): whether the episode has ended, in which case further step() calls will return undefined results
info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
"""
raise NotImplementedError
def reset(self):
"""Resets the state of the environment and returns an initial observation.
Returns: observation (object): the initial observation of the
space.
"""
raise NotImplementedError
def render(self, mode='human'):
"""Renders the environment.
The set of supported modes varies per environment. (And some
environments do not support rendering at all.) By convention,
if mode is:
- human: render to the current display or terminal and
return nothing. Usually for human consumption.
- rgb_array: Return an numpy.ndarray with shape (x, y, 3),
representing RGB values for an x-by-y pixel image, suitable
for turning into a video.
- ansi: Return a string (str) or StringIO.StringIO containing a
terminal-style text representation. The text can include newlines
and ANSI escape sequences (e.g. for colors).
Note:
Make sure that your class's metadata 'render.modes' key includes
the list of supported modes. It's recommended to call super()
in implementations to use the functionality of this method.
Args:
mode (str): the mode to render with
close (bool): close all open renderings
Example:
class MyEnv(Env):
metadata = {'render.modes': ['human', 'rgb_array']}
def render(self, mode='human'):
if mode == 'rgb_array':
return np.array(...) # return RGB frame suitable for video
elif mode is 'human':
... # pop up a window and render
else:
super(MyEnv, self).render(mode=mode) # just raise an exception
"""
raise NotImplementedError
def close(self):
"""Override _close in your subclass to perform any necessary cleanup.
Environments will automatically close() themselves when
garbage collected or when the program exits.
"""
return
def seed(self, seed=None):
"""Sets the seed for this env's random number generator(s).
Note:
Some environments use multiple pseudorandom number generators.
We want to capture all such seeds used in order to ensure that
there aren't accidental correlations between multiple generators.
Returns:
list<bigint>: Returns the list of seeds used in this env's random
number generators. The first value in the list should be the
"main" seed, or the value which a reproducer should pass to
'seed'. Often, the main seed equals the provided 'seed', but
this won't be true if seed=None, for example.
"""
logger.warn("Could not seed environment %s", self)
return
@property
def unwrapped(self):
"""Completely unwrap this env.
Returns:
gym.Env: The base non-wrapped gym.Env instance
"""
return self
def __str__(self):
if self.spec is None:
return '<{} instance>'.format(type(self).__name__)
else:
return '<{}<{}>>'.format(type(self).__name__, self.spec.id)
class GoalEnv(Env):
"""A goal-based environment. It functions just as any regular OpenAI Gym environment but it
imposes a required structure on the observation_space. More concretely, the observation
space is required to contain at least three elements, namely `observation`, `desired_goal`, and
`achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve.
`achieved_goal` is the goal that it currently achieved instead. `observation` contains the
actual observations of the environment as per usual.
"""
def reset(self):
# Enforce that each GoalEnv uses a Goal-compatible observation space.
if not isinstance(self.observation_space, gym.spaces.Dict):
raise error.Error('GoalEnv requires an observation space of type gym.spaces.Dict')
result = super(GoalEnv, self).reset()
for key in ['observation', 'achieved_goal', 'desired_goal']:
if key not in result:
raise error.Error('GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(key))
return result
def compute_reward(self, achieved_goal, desired_goal, info):
"""Compute the step reward. This externalizes the reward function and makes
it dependent on an a desired goal and the one that was achieved. If you wish to include
additional rewards that are independent of the goal, you can include the necessary values
to derive it in info and compute it accordingly.
Args:
achieved_goal (object): the goal that was achieved during execution
desired_goal (object): the desired goal that we asked the agent to attempt to achieve
info (dict): an info dictionary with additional information
Returns:
float: The reward that corresponds to the provided achieved goal w.r.t. to the desired
goal. Note that the following should always hold true:
ob, reward, done, info = env.step()
assert reward == env.compute_reward(ob['achieved_goal'], ob['goal'], info)
"""
raise NotImplementedError()
# Space-related abstractions
class Space(object):
"""Defines the observation and action spaces, so you can write generic
code that applies to any Env. For example, you can choose a random
action.
"""
def __init__(self, shape=None, dtype=None):
import numpy as np # takes about 300-400ms to import, so we load lazily
self.shape = None if shape is None else tuple(shape)
self.dtype = None if dtype is None else np.dtype(dtype)
def sample(self):
"""
Uniformly randomly sample a random element of this space
"""
raise NotImplementedError
def contains(self, x):
"""
Return boolean specifying if x is a valid
member of this space
"""
raise NotImplementedError
__contains__ = contains
def to_jsonable(self, sample_n):
"""Convert a batch of samples from this space to a JSONable data type."""
# By default, assume identity is JSONable
return sample_n
def from_jsonable(self, sample_n):
"""Convert a JSONable data type to a batch of samples from this space."""
# By default, assume identity is JSONable
return sample_n
warn_once = True
def deprecated_warn_once(text):
global warn_once
if not warn_once: return
warn_once = False
logger.warn(text)
class Wrapper(Env):
env = None
def __init__(self, env):
self.env = env
self.action_space = self.env.action_space
self.observation_space = self.env.observation_space
self.reward_range = self.env.reward_range
self.metadata = self.env.metadata
@classmethod
def class_name(cls):
return cls.__name__
def step(self, action):
if hasattr(self, "_step"):
deprecated_warn_once("%s doesn't implement 'step' method, but it implements deprecated '_step' method." % type(self))
self.step = self._step
return self.step(action)
else:
deprecated_warn_once("%s doesn't implement 'step' method, " % type(self) +
"which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.")
return self.env.step(action)
def reset(self, **kwargs):
if hasattr(self, "_reset"):
deprecated_warn_once("%s doesn't implement 'reset' method, but it implements deprecated '_reset' method." % type(self))
self.reset = self._reset
return self._reset(**kwargs)
else:
deprecated_warn_once("%s doesn't implement 'reset' method, " % type(self) +
"which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.")
return self.env.reset(**kwargs)
def render(self, mode='human', **kwargs):
return self.env.render(mode, **kwargs)
def close(self):
if self.env:
return self.env.close()
def seed(self, seed=None):
return self.env.seed(seed)
def compute_reward(self, achieved_goal, desired_goal, info):
return self.env.compute_reward(achieved_goal, desired_goal, info)
def __str__(self):
return '<{}{}>'.format(type(self).__name__, self.env)
def __repr__(self):
return str(self)
@property
def unwrapped(self):
return self.env.unwrapped
@property
def spec(self):
return self.env.spec
class ObservationWrapper(Wrapper):
def step(self, action):
observation, reward, done, info = self.env.step(action)
return self.observation(observation), reward, done, info
def reset(self, **kwargs):
observation = self.env.reset(**kwargs)
return self.observation(observation)
def observation(self, observation):
deprecated_warn_once("%s doesn't implement 'observation' method. Maybe it implements deprecated '_observation' method." % type(self))
return self._observation(observation)
class RewardWrapper(Wrapper):
def reset(self):
return self.env.reset()
def step(self, action):
observation, reward, done, info = self.env.step(action)
return observation, self.reward(reward), done, info
def reward(self, reward):
deprecated_warn_once("%s doesn't implement 'reward' method. Maybe it implements deprecated '_reward' method." % type(self))
return self._reward(reward)
class ActionWrapper(Wrapper):
def step(self, action):
action = self.action(action)
return self.env.step(action)
def reset(self):
return self.env.reset()
def action(self, action):
deprecated_warn_once("%s doesn't implement 'action' method. Maybe it implements deprecated '_action' method." % type(self))
return self._action(action)
def reverse_action(self, action):
deprecated_warn_once("%s doesn't implement 'reverse_action' method. Maybe it implements deprecated '_reverse_action' method." % type(self))
return self._reverse_action(action)

113
src/gym/envs/README.md Normal file
View File

@@ -0,0 +1,113 @@
# Envs
These are the core integrated environments. Note that we may later
restructure any of the files, but will keep the environments available
at the relevant package's top-level. So for example, you should access
`AntEnv` as follows:
```
# Will be supported in future releases
from gym.envs import mujoco
mujoco.AntEnv
```
Rather than:
```
# May break in future releases
from gym.envs.mujoco import ant
ant.AntEnv
```
## How to create new environments for Gym
* Create a new repo called gym-foo, which should also be a PIP package.
* A good example is https://github.com/openai/gym-soccer.
* It should have at least the following files:
```sh
gym-foo/
README.md
setup.py
gym_foo/
__init__.py
envs/
__init__.py
foo_env.py
foo_extrahard_env.py
```
* `gym-foo/setup.py` should have:
```python
from setuptools import setup
setup(name='gym_foo',
version='0.0.1',
install_requires=['gym'] # And any other dependencies foo needs
)
```
* `gym-foo/gym_foo/__init__.py` should have:
```python
from gym.envs.registration import register
register(
id='foo-v0',
entry_point='gym_foo.envs:FooEnv',
)
register(
id='foo-extrahard-v0',
entry_point='gym_foo.envs:FooExtraHardEnv',
)
```
* `gym-foo/gym_foo/envs/__init__.py` should have:
```python
from gym_foo.envs.foo_env import FooEnv
from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv
```
* `gym-foo/gym_foo/envs/foo_env.py` should look something like:
```python
import gym
from gym import error, spaces, utils
from gym.utils import seeding
class FooEnv(gym.Env):
metadata = {'render.modes': ['human']}
def __init__(self):
...
def step(self, action):
...
def reset(self):
...
def render(self, mode='human', close=False):
...
```
## How to add new environments to Gym, within this repo (not recommended for new environments)
1. Write your environment in an existing collection or a new collection. All collections are subfolders of `/gym/envs'.
2. Import your environment into the `__init__.py` file of the collection. This file will be located at `/gym/envs/my_collection/__init__.py`. Add `from gym.envs.my_collection.my_awesome_env import MyEnv` to this file.
3. Register your env in `/gym/envs/__init__.py`:
```
register(
id='MyEnv-v0',
entry_point='gym.envs.my_collection:MyEnv',
)
```
4. Add your environment to the scoreboard in `/gym/scoreboard/__init__.py`:
```
add_task(
id='MyEnv-v0',
summary="Super cool environment",
group='my_collection',
contributor='mygithubhandle',
)
```

540
src/gym/envs/__init__.py Normal file
View File

@@ -0,0 +1,540 @@
from gym.envs.registration import registry, register, make, spec
# Algorithmic
# ----------------------------------------
register(
id='Copy-v0',
entry_point='gym.envs.algorithmic:CopyEnv',
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='RepeatCopy-v0',
entry_point='gym.envs.algorithmic:RepeatCopyEnv',
max_episode_steps=200,
reward_threshold=75.0,
)
register(
id='ReversedAddition-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows': 2},
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='ReversedAddition3-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows': 3},
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='DuplicatedInput-v0',
entry_point='gym.envs.algorithmic:DuplicatedInputEnv',
max_episode_steps=200,
reward_threshold=9.0,
)
register(
id='Reverse-v0',
entry_point='gym.envs.algorithmic:ReverseEnv',
max_episode_steps=200,
reward_threshold=25.0,
)
# Classic
# ----------------------------------------
register(
id='CartPole-v0',
entry_point='gym.envs.classic_control:CartPoleEnv',
max_episode_steps=200,
reward_threshold=195.0,
)
register(
id='CartPole-v1',
entry_point='gym.envs.classic_control:CartPoleEnv',
max_episode_steps=500,
reward_threshold=475.0,
)
register(
id='MountainCar-v0',
entry_point='gym.envs.classic_control:MountainCarEnv',
max_episode_steps=200,
reward_threshold=-110.0,
)
register(
id='MountainCarContinuous-v0',
entry_point='gym.envs.classic_control:Continuous_MountainCarEnv',
max_episode_steps=999,
reward_threshold=90.0,
)
register(
id='Pendulum-v0',
entry_point='gym.envs.classic_control:PendulumEnv',
max_episode_steps=200,
)
register(
id='Acrobot-v1',
entry_point='gym.envs.classic_control:AcrobotEnv',
max_episode_steps=500,
)
# Box2d
# ----------------------------------------
register(
id='LunarLander-v2',
entry_point='gym.envs.box2d:LunarLander',
max_episode_steps=1000,
reward_threshold=200,
)
register(
id='LunarLanderContinuous-v2',
entry_point='gym.envs.box2d:LunarLanderContinuous',
max_episode_steps=1000,
reward_threshold=200,
)
register(
id='BipedalWalker-v2',
entry_point='gym.envs.box2d:BipedalWalker',
max_episode_steps=1600,
reward_threshold=300,
)
register(
id='BipedalWalkerHardcore-v2',
entry_point='gym.envs.box2d:BipedalWalkerHardcore',
max_episode_steps=2000,
reward_threshold=300,
)
register(
id='CarRacing-v0',
entry_point='gym.envs.box2d:CarRacing',
max_episode_steps=1000,
reward_threshold=900,
)
# Toy Text
# ----------------------------------------
register(
id='Blackjack-v0',
entry_point='gym.envs.toy_text:BlackjackEnv',
)
register(
id='KellyCoinflip-v0',
entry_point='gym.envs.toy_text:KellyCoinflipEnv',
reward_threshold=246.61,
)
register(
id='KellyCoinflipGeneralized-v0',
entry_point='gym.envs.toy_text:KellyCoinflipGeneralizedEnv',
)
register(
id='FrozenLake-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name': '4x4'},
max_episode_steps=100,
reward_threshold=0.78, # optimum = .8196
)
register(
id='FrozenLake8x8-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name': '8x8'},
max_episode_steps=200,
reward_threshold=0.99, # optimum = 1
)
register(
id='CliffWalking-v0',
entry_point='gym.envs.toy_text:CliffWalkingEnv',
)
register(
id='NChain-v0',
entry_point='gym.envs.toy_text:NChainEnv',
max_episode_steps=1000,
)
register(
id='Roulette-v0',
entry_point='gym.envs.toy_text:RouletteEnv',
max_episode_steps=100,
)
register(
id='Taxi-v2',
entry_point='gym.envs.toy_text.taxi:TaxiEnv',
reward_threshold=8, # optimum = 8.46
max_episode_steps=200,
)
register(
id='GuessingGame-v0',
entry_point='gym.envs.toy_text.guessing_game:GuessingGame',
max_episode_steps=200,
)
register(
id='HotterColder-v0',
entry_point='gym.envs.toy_text.hotter_colder:HotterColder',
max_episode_steps=200,
)
# Mujoco
# ----------------------------------------
# 2D
register(
id='Reacher-v2',
entry_point='gym.envs.mujoco:ReacherEnv',
max_episode_steps=50,
reward_threshold=-3.75,
)
register(
id='Pusher-v2',
entry_point='gym.envs.mujoco:PusherEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='Thrower-v2',
entry_point='gym.envs.mujoco:ThrowerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='Striker-v2',
entry_point='gym.envs.mujoco:StrikerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='InvertedPendulum-v2',
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id='InvertedDoublePendulum-v2',
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id='HalfCheetah-v2',
entry_point='gym.envs.mujoco:HalfCheetahEnv',
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id='Hopper-v2',
entry_point='gym.envs.mujoco:HopperEnv',
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id='Swimmer-v2',
entry_point='gym.envs.mujoco:SwimmerEnv',
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id='Walker2d-v2',
max_episode_steps=1000,
entry_point='gym.envs.mujoco:Walker2dEnv',
)
register(
id='Ant-v2',
entry_point='gym.envs.mujoco:AntEnv',
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id='Humanoid-v2',
entry_point='gym.envs.mujoco:HumanoidEnv',
max_episode_steps=1000,
)
register(
id='HumanoidStandup-v2',
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
max_episode_steps=1000,
)
# Robotics
# ----------------------------------------
def _merge(a, b):
a.update(b)
return a
for reward_type in ['sparse', 'dense']:
suffix = 'Dense' if reward_type == 'dense' else ''
kwargs = {
'reward_type': reward_type,
}
# Fetch
register(
id='FetchSlide{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchSlideEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='FetchPickAndPlace{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchPickAndPlaceEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='FetchReach{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchReachEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='FetchPush{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchPushEnv',
kwargs=kwargs,
max_episode_steps=50,
)
# Hand
register(
id='HandReach{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandReachEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='HandManipulateBlockRotateZ{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'z'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateParallel{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'parallel'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateXYZ{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockFull{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Alias for "Full"
register(
id='HandManipulateBlock{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateEggRotate{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateEggFull{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Alias for "Full"
register(
id='HandManipulateEgg{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulatePenRotate{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulatePenFull{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Alias for "Full"
register(
id='HandManipulatePen{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Atari
# ----------------------------------------
# # print ', '.join(["'{}'".format(name.split('.')[0]) for name in atari_py.list_games()])
for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
'centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk',
'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']:
for obs_type in ['image', 'ram']:
# space_invaders should yield SpaceInvaders-v0 and SpaceInvaders-ram-v0
name = ''.join([g.capitalize() for g in game.split('_')])
if obs_type == 'ram':
name = '{}-ram'.format(name)
nondeterministic = False
if game == 'elevator_action' and obs_type == 'ram':
# ElevatorAction-ram-v0 seems to yield slightly
# non-deterministic observations about 10% of the time. We
# should track this down eventually, but for now we just
# mark it as nondeterministic.
nondeterministic = True
register(
id='{}-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'repeat_action_probability': 0.25},
max_episode_steps=10000,
nondeterministic=nondeterministic,
)
register(
id='{}-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
# Standard Deterministic (as in the original DeepMind paper)
if game == 'space_invaders':
frameskip = 3
else:
frameskip = 4
# Use a deterministic frame skip.
register(
id='{}Deterministic-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip, 'repeat_action_probability': 0.25},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
register(
id='{}Deterministic-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
register(
id='{}NoFrameskip-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1, 'repeat_action_probability': 0.25},
# A frameskip of 1 means we get every frame
max_episode_steps=frameskip * 100000,
nondeterministic=nondeterministic,
)
# No frameskip. (Atari has no entropy source, so these are
# deterministic environments.)
register(
id='{}NoFrameskip-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1}, # A frameskip of 1 means we get every frame
max_episode_steps=frameskip * 100000,
nondeterministic=nondeterministic,
)
# Unit test
# ---------
register(
id='CubeCrash-v0',
entry_point='gym.envs.unittest:CubeCrash',
reward_threshold=0.9,
)
register(
id='CubeCrashSparse-v0',
entry_point='gym.envs.unittest:CubeCrashSparse',
reward_threshold=0.9,
)
register(
id='CubeCrashScreenBecomesBlack-v0',
entry_point='gym.envs.unittest:CubeCrashScreenBecomesBlack',
reward_threshold=0.9,
)
register(
id='MemorizeDigits-v0',
entry_point='gym.envs.unittest:MemorizeDigits',
reward_threshold=20,
)
register(
id='Pilesos-v0',
entry_point='gym.envs.pilesos:PilesosEnv',
max_episode_steps=5000,
)

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,5 @@
from gym.envs.algorithmic.copy_ import CopyEnv
from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv
from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv
from gym.envs.algorithmic.reverse import ReverseEnv
from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv

View File

@@ -0,0 +1,326 @@
"""
Algorithmic environments have the following traits in common:
- A 1-d "input tape" or 2-d "input grid" of characters
- A target string which is a deterministic function of the input characters
Agents control a read head that moves over the input tape. Observations consist
of the single character currently under the read head. The read head may fall
off the end of the tape in any direction. When this happens, agents will observe
a special blank character (with index=env.base) until they get back in bounds.
Actions consist of 3 sub-actions:
- Direction to move the read head (left or right, plus up and down for 2-d envs)
- Whether to write to the output tape
- Which character to write (ignored if the above sub-action is 0)
An episode ends when:
- The agent writes the full target string to the output tape.
- The agent writes an incorrect character.
- The agent runs out the time limit. (Which is fairly conservative.)
Reward schedule:
write a correct character: +1
write a wrong character: -.5
run out the clock: -1
otherwise: 0
In the beginning, input strings will be fairly short. After an environment has
been consistently solved over some window of episodes, the environment will
increase the average length of generated strings. Typical env specs require
leveling up many times to reach their reward threshold.
"""
from gym import Env, logger
from gym.spaces import Discrete, Tuple
from gym.utils import colorize, seeding
import numpy as np
from six import StringIO
import sys
import math
class AlgorithmicEnv(Env):
metadata = {'render.modes': ['human', 'ansi']}
# Only 'promote' the length of generated input strings if the worst of the
# last n episodes was no more than this far from the maximum reward
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -1.0
def __init__(self, base=10, chars=False, starting_min_length=2):
"""
base: Number of distinct characters.
chars: If True, use uppercase alphabet. Otherwise, digits. Only affects
rendering.
starting_min_length: Minimum input string length. Ramps up as episodes
are consistently solved.
"""
self.base = base
# Keep track of this many past episodes
self.last = 10
# Cumulative reward earned this episode
self.episode_total_reward = None
# Running tally of reward shortfalls. e.g. if there were 10 points to earn and
# we got 8, we'd append -2
AlgorithmicEnv.reward_shortfalls = []
if chars:
self.charmap = [chr(ord('A')+i) for i in range(base)]
else:
self.charmap = [str(i) for i in range(base)]
self.charmap.append(' ')
# TODO: Not clear why this is a class variable rather than instance.
# Could lead to some spooky action at a distance if someone is working
# with multiple algorithmic envs at once. Also makes testing tricky.
AlgorithmicEnv.min_length = starting_min_length
# Three sub-actions:
# 1. Move read head left or write (or up/down)
# 2. Write or not
# 3. Which character to write. (Ignored if should_write=0)
self.action_space = Tuple(
[Discrete(len(self.MOVEMENTS)), Discrete(2), Discrete(self.base)]
)
# Can see just what is on the input tape (one of n characters, or nothing)
self.observation_space = Discrete(self.base + 1)
self.seed()
self.reset()
@classmethod
def _movement_idx(kls, movement_name):
return kls.MOVEMENTS.index(movement_name)
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _get_obs(self, pos=None):
"""Return an observation corresponding to the given read head position
(or the current read head position, if none is given)."""
raise NotImplemented
def _get_str_obs(self, pos=None):
ret = self._get_obs(pos)
return self.charmap[ret]
def _get_str_target(self, pos):
"""Return the ith character of the target string (or " " if index
out of bounds)."""
if pos < 0 or len(self.target) <= pos:
return " "
else:
return self.charmap[self.target[pos]]
def render_observation(self):
"""Return a string representation of the input tape/grid."""
raise NotImplementedError
def render(self, mode='human'):
outfile = StringIO() if mode == 'ansi' else sys.stdout
inp = "Total length of input instance: %d, step: %d\n" % (self.input_width, self.time)
outfile.write(inp)
x, y, action = self.read_head_position, self.write_head_position, self.last_action
if action is not None:
inp_act, out_act, pred = action
outfile.write("=" * (len(inp) - 1) + "\n")
y_str = "Output Tape : "
target_str = "Targets : "
if action is not None:
pred_str = self.charmap[pred]
x_str = self.render_observation()
for i in range(-2, len(self.target) + 2):
target_str += self._get_str_target(i)
if i < y - 1:
y_str += self._get_str_target(i)
elif i == (y - 1):
if action is not None and out_act == 1:
color = 'green' if pred == self.target[i] else 'red'
y_str += colorize(pred_str, color, highlight=True)
else:
y_str += self._get_str_target(i)
outfile.write(x_str)
outfile.write(y_str + "\n")
outfile.write(target_str + "\n\n")
if action is not None:
outfile.write("Current reward : %.3f\n" % self.last_reward)
outfile.write("Cumulative reward : %.3f\n" % self.episode_total_reward)
move = self.MOVEMENTS[inp_act]
outfile.write("Action : Tuple(move over input: %s,\n" % move)
out_act = out_act == 1
outfile.write(" write to the output tape: %s,\n" % out_act)
outfile.write(" prediction: %s)\n" % pred_str)
else:
outfile.write("\n" * 5)
return outfile
@property
def input_width(self):
return len(self.input_data)
def step(self, action):
assert self.action_space.contains(action)
self.last_action = action
inp_act, out_act, pred = action
done = False
reward = 0.0
self.time += 1
assert 0 <= self.write_head_position
if out_act == 1:
try:
correct = pred == self.target[self.write_head_position]
except IndexError:
logger.warn("It looks like you're calling step() even though this "+
"environment has already returned done=True. You should always call "+
"reset() once you receive done=True. Any further steps are undefined "+
"behaviour.")
correct = False
if correct:
reward = 1.0
else:
# Bail as soon as a wrong character is written to the tape
reward = -0.5
done = True
self.write_head_position += 1
if self.write_head_position >= len(self.target):
done = True
self._move(inp_act)
if self.time > self.time_limit:
reward = -1.0
done = True
obs = self._get_obs()
self.last_reward = reward
self.episode_total_reward += reward
return (obs, reward, done, {})
@property
def time_limit(self):
"""If an agent takes more than this many timesteps, end the episode
immediately and return a negative reward."""
# (Seemingly arbitrary)
return self.input_width + len(self.target) + 4
def _check_levelup(self):
"""Called between episodes. Update our running record of episode rewards
and, if appropriate, 'level up' minimum input length."""
if self.episode_total_reward is None:
# This is before the first episode/call to reset(). Nothing to do
return
AlgorithmicEnv.reward_shortfalls.append(self.episode_total_reward - len(self.target))
AlgorithmicEnv.reward_shortfalls = AlgorithmicEnv.reward_shortfalls[-self.last:]
if len(AlgorithmicEnv.reward_shortfalls) == self.last and \
min(AlgorithmicEnv.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION and \
AlgorithmicEnv.min_length < 30:
AlgorithmicEnv.min_length += 1
AlgorithmicEnv.reward_shortfalls = []
def reset(self):
self._check_levelup()
self.last_action = None
self.last_reward = 0
self.read_head_position = self.READ_HEAD_START
self.write_head_position = 0
self.episode_total_reward = 0.0
self.time = 0
length = self.np_random.randint(3) + AlgorithmicEnv.min_length
self.input_data = self.generate_input_data(length)
self.target = self.target_from_input_data(self.input_data)
return self._get_obs()
def generate_input_data(self, size):
raise NotImplemented
def target_from_input_data(self, input_data):
raise NotImplemented("Subclasses must implement")
def _move(self, movement):
raise NotImplemented
class TapeAlgorithmicEnv(AlgorithmicEnv):
"""An algorithmic env with a 1-d input tape."""
MOVEMENTS = ['left', 'right']
READ_HEAD_START = 0
def _move(self, movement):
named = self.MOVEMENTS[movement]
self.read_head_position += 1 if named == 'right' else -1
def _get_obs(self, pos=None):
if pos is None:
pos = self.read_head_position
if pos < 0:
return self.base
if isinstance(pos, np.ndarray):
pos = pos.item()
try:
return self.input_data[pos]
except IndexError:
return self.base
def generate_input_data(self, size):
return [self.np_random.randint(self.base) for _ in range(size)]
def render_observation(self):
x = self.read_head_position
x_str = "Observation Tape : "
for i in range(-2, self.input_width + 2):
if i == x:
x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True)
else:
x_str += self._get_str_obs(np.array([i]))
x_str += "\n"
return x_str
class GridAlgorithmicEnv(AlgorithmicEnv):
"""An algorithmic env with a 2-d input grid."""
MOVEMENTS = ['left', 'right', 'up', 'down']
READ_HEAD_START = (0, 0)
def __init__(self, rows, *args, **kwargs):
self.rows = rows
AlgorithmicEnv.__init__(self, *args, **kwargs)
def _move(self, movement):
named = self.MOVEMENTS[movement]
x, y = self.read_head_position
if named == 'left':
x -= 1
elif named == 'right':
x += 1
elif named == 'up':
y -= 1
elif named == 'down':
y += 1
else:
raise ValueError("Unrecognized direction: {}".format(named))
self.read_head_position = x, y
def generate_input_data(self, size):
return [
[self.np_random.randint(self.base) for _ in range(self.rows)]
for __ in range(size)
]
def _get_obs(self, pos=None):
if pos is None:
pos = self.read_head_position
x, y = pos
if any(idx < 0 for idx in pos):
return self.base
try:
return self.input_data[x][y]
except IndexError:
return self.base
def render_observation(self):
x = self.read_head_position
label = "Observation Grid : "
x_str = ""
for j in range(-1, self.rows+1):
if j != -1:
x_str += " " * len(label)
for i in range(-2, self.input_width + 2):
if i == x[0] and j == x[1]:
x_str += colorize(self._get_str_obs((i, j)), 'green', highlight=True)
else:
x_str += self._get_str_obs((i, j))
x_str += "\n"
x_str = label + x_str
return x_str

View File

@@ -0,0 +1,13 @@
"""
Task is to copy content from the input tape to
the output tape. http://arxiv.org/abs/1511.07275
"""
from gym.envs.algorithmic import algorithmic_env
class CopyEnv(algorithmic_env.TapeAlgorithmicEnv):
def __init__(self, base=5, chars=True):
super(CopyEnv, self).__init__(base=base, chars=chars)
def target_from_input_data(self, input_data):
return input_data

View File

@@ -0,0 +1,24 @@
"""
Task is to return every nth character from the input tape.
http://arxiv.org/abs/1511.07275
"""
from __future__ import division
from gym.envs.algorithmic import algorithmic_env
class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
def __init__(self, duplication=2, base=5):
self.duplication = duplication
super(DuplicatedInputEnv, self).__init__(base=base, chars=True)
def generate_input_data(self, size):
res = []
if size < self.duplication:
size = self.duplication
for i in range(size//self.duplication):
char = self.np_random.randint(self.base)
for _ in range(self.duplication):
res.append(char)
return res
def target_from_input_data(self, input_data):
return [input_data[i] for i in range(0, len(input_data), self.duplication)]

View File

@@ -0,0 +1,15 @@
"""
Task is to copy content multiple times from the input tape to
the output tape. http://arxiv.org/abs/1511.07275
"""
from gym.envs.algorithmic import algorithmic_env
class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
def __init__(self, base=5):
super(RepeatCopyEnv, self).__init__(base=base, chars=True)
self.last = 50
def target_from_input_data(self, input_data):
return input_data + list(reversed(input_data)) + input_data

View File

@@ -0,0 +1,15 @@
"""
Task is to reverse content over the input tape.
http://arxiv.org/abs/1511.07275
"""
from gym.envs.algorithmic import algorithmic_env
class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
def __init__(self, base=2):
super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1)
self.last = 50
def target_from_input_data(self, input_str):
return list(reversed(input_str))

View File

@@ -0,0 +1,30 @@
from __future__ import division
import numpy as np
from gym.envs.algorithmic import algorithmic_env
class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
def __init__(self, rows=2, base=3):
super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)
def target_from_input_data(self, input_strings):
curry = 0
target = []
for digits in input_strings:
total = sum(digits) + curry
target.append(total % self.base)
curry = total // self.base
if curry > 0:
target.append(curry)
return target
@property
def time_limit(self):
# Quirk preserved for the sake of consistency: add the length of the input
# rather than the length of the desired output (which may differ if there's
# an extra carried digit).
# TODO: It seems like this time limit is so strict as to make Addition3-v0
# unsolvable, since agents aren't even given enough time steps to look at
# all the digits. (The solutions on the scoreboard seem to only work by
# save-scumming.)
return self.input_width*2 + 4

View File

@@ -0,0 +1,239 @@
from gym.envs import algorithmic as alg
import unittest
# All concrete subclasses of AlgorithmicEnv
ALL_ENVS = [
alg.copy_.CopyEnv,
alg.duplicated_input.DuplicatedInputEnv,
alg.repeat_copy.RepeatCopyEnv,
alg.reverse.ReverseEnv,
alg.reversed_addition.ReversedAdditionEnv,
]
ALL_TAPE_ENVS = [env for env in ALL_ENVS
if issubclass(env, alg.algorithmic_env.TapeAlgorithmicEnv)]
ALL_GRID_ENVS = [env for env in ALL_ENVS
if issubclass(env, alg.algorithmic_env.GridAlgorithmicEnv)]
def imprint(env, input_arr):
"""Monkey-patch the given environment so that when reset() is called, the
input tape/grid will be set to the given data, rather than being randomly
generated."""
env.generate_input_data = lambda _: input_arr
class TestAlgorithmicEnvInteractions(unittest.TestCase):
"""Test some generic behaviour not specific to any particular algorithmic
environment. Movement, allocation of rewards, etc."""
CANNED_INPUT = [0, 1]
ENV_KLS = alg.copy_.CopyEnv
LEFT, RIGHT = ENV_KLS._movement_idx('left'), ENV_KLS._movement_idx('right')
def setUp(self):
self.env = self.ENV_KLS(base=2, chars=True)
imprint(self.env, self.CANNED_INPUT)
def test_successful_interaction(self):
obs = self.env.reset()
self.assertEqual(obs, 0)
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 0])
self.assertEqual(obs, 1)
self.assertGreater(reward, 0)
self.assertFalse(done)
obs, reward, done, _ = self.env.step([self.LEFT, 1, 1])
self.assertTrue(done)
self.assertGreater(reward, 0)
def test_bad_output_fail_fast(self):
obs = self.env.reset()
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
self.assertTrue(done)
self.assertLess(reward, 0)
def test_levelup(self):
obs = self.env.reset()
# Kind of a hack
alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls = []
min_length = self.env.min_length
for i in range(self.env.last):
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 0])
self.assertFalse(done)
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
self.assertTrue(done)
self.env.reset()
if i < self.env.last-1:
self.assertEqual(len(alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls), i+1)
else:
# Should have leveled up on the last iteration
self.assertEqual(self.env.min_length, min_length+1)
self.assertEqual(len(alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls), 0)
def test_walk_off_the_end(self):
obs = self.env.reset()
# Walk off the end
obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
self.assertEqual(obs, self.env.base)
self.assertEqual(r, 0)
self.assertFalse(done)
# Walk further off track
obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
self.assertEqual(obs, self.env.base)
self.assertFalse(done)
# Return to the first input character
obs, r, done, _ = self.env.step([self.RIGHT, 0, 0])
self.assertEqual(obs, self.env.base)
self.assertFalse(done)
obs, r, done, _ = self.env.step([self.RIGHT, 0, 0])
self.assertEqual(obs, 0)
def test_grid_naviation(self):
env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=6)
N,S,E,W = [env._movement_idx(named_dir) for named_dir in ['up', 'down', 'right', 'left']]
# Corresponds to a grid that looks like...
# 0 1 2
# 3 4 5
canned = [ [0, 3], [1, 4], [2, 5] ]
imprint(env, canned)
obs = env.reset()
self.assertEqual(obs, 0)
navigation = [
(S, 3), (N, 0), (E, 1), (S, 4), (S, 6), (E, 6), (N, 5), (N, 2), (W, 1)
]
for (movement, expected_obs) in navigation:
obs, reward, done, _ = env.step([movement, 0, 0])
self.assertEqual(reward, 0)
self.assertFalse(done)
self.assertEqual(obs, expected_obs)
def test_grid_success(self):
env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=3)
canned = [ [1, 2], [1, 0], [2, 2] ]
imprint(env, canned)
obs = env.reset()
target = [0, 2, 1, 1]
self.assertEqual(env.target, target)
self.assertEqual(obs, 1)
for i, target_digit in enumerate(target):
obs, reward, done, _ = env.step([0, 1, target_digit])
self.assertGreater(reward, 0)
self.assertEqual(done, i==len(target)-1)
def test_sane_time_limit(self):
obs = self.env.reset()
self.assertLess(self.env.time_limit, 100)
for _ in range(100):
obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
if done:
return
self.fail("Time limit wasn't enforced")
def test_rendering(self):
env = self.env
obs = env.reset()
self.assertEqual(env._get_str_obs(), 'A')
self.assertEqual(env._get_str_obs(1), 'B')
self.assertEqual(env._get_str_obs(-1), ' ')
self.assertEqual(env._get_str_obs(2), ' ')
self.assertEqual(env._get_str_target(0), 'A')
self.assertEqual(env._get_str_target(1), 'B')
# Test numerical alphabet rendering
env = self.ENV_KLS(base=3, chars=False)
imprint(env, self.CANNED_INPUT)
env.reset()
self.assertEqual(env._get_str_obs(), '0')
self.assertEqual(env._get_str_obs(1), '1')
class TestTargets(unittest.TestCase):
"""Test the rules mapping input strings/grids to target outputs."""
def test_reverse_target(self):
input_expected = [
([0], [0]),
([0, 1], [1, 0]),
([1, 1], [1, 1]),
([1, 0, 1], [1, 0, 1]),
([0, 0, 1, 1], [1, 1, 0, 0]),
]
env = alg.reverse.ReverseEnv()
for input_arr, expected in input_expected:
target = env.target_from_input_data(input_arr)
self.assertEqual(target, expected)
def test_reversed_addition_target(self):
env = alg.reversed_addition.ReversedAdditionEnv(base=3)
input_expected = [
([[1,1], [1,1]], [2, 2]),
([[2,2], [0,1]], [1, 2]),
([[2,1], [1,1], [1,1], [1,0]], [0, 0, 0, 2]),
]
for (input_grid, expected_target) in input_expected:
self.assertEqual(env.target_from_input_data(input_grid), expected_target)
def test_reversed_addition_3rows(self):
env = alg.reversed_addition.ReversedAdditionEnv(base=3, rows=3)
input_expected = [
([[1,1,0],[0,1,1]], [2, 2]),
([[1,1,2],[0,1,1]], [1,0,1]),
]
for (input_grid, expected_target) in input_expected:
self.assertEqual(env.target_from_input_data(input_grid), expected_target)
def test_copy_target(self):
env = alg.copy_.CopyEnv()
self.assertEqual(env.target_from_input_data([0, 1, 2]), [0, 1, 2])
def test_duplicated_input_target(self):
env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
self.assertEqual(env.target_from_input_data([0, 0, 0, 0, 1, 1]), [0, 0, 1])
def test_repeat_copy_target(self):
env = alg.repeat_copy.RepeatCopyEnv()
self.assertEqual(env.target_from_input_data([0, 1, 2]), [0, 1, 2, 2, 1, 0, 0, 1, 2])
class TestInputGeneration(unittest.TestCase):
"""Test random input generation.
"""
def test_tape_inputs(self):
for env_kls in ALL_TAPE_ENVS:
env = env_kls()
for size in range(2,5):
input_tape = env.generate_input_data(size)
self.assertTrue(all(0<=x<=env.base for x in input_tape),
"Invalid input tape from env {}: {}".format(env_kls, input_tape))
# DuplicatedInput needs to generate inputs with even length,
# so it may be short one
self.assertLessEqual(len(input_tape), size)
def test_grid_inputs(self):
for env_kls in ALL_GRID_ENVS:
env = env_kls()
for size in range(2, 5):
input_grid = env.generate_input_data(size)
# Should get "size" sublists, each of length self.rows (not the
# opposite, as you might expect)
self.assertEqual(len(input_grid), size)
self.assertTrue(all(len(col) == env.rows for col in input_grid))
self.assertTrue(all(0<=x<=env.base for x in input_grid[0]))
def test_duplicatedinput_inputs(self):
"""The duplicated_input env needs to generate strings with the appropriate
amount of repetiion."""
env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
input_tape = env.generate_input_data(4)
self.assertEqual(len(input_tape), 4)
self.assertEqual(input_tape[0], input_tape[1])
self.assertEqual(input_tape[2], input_tape[3])
# If requested input size isn't a multiple of duplication, go lower
input_tape = env.generate_input_data(3)
self.assertEqual(len(input_tape), 2)
self.assertEqual(input_tape[0], input_tape[1])
# If requested input size is *less than* duplication, go up
input_tape = env.generate_input_data(1)
self.assertEqual(len(input_tape), 2)
self.assertEqual(input_tape[0], input_tape[1])
env = alg.duplicated_input.DuplicatedInputEnv(duplication=3)
input_tape = env.generate_input_data(6)
self.assertEqual(len(input_tape), 6)
self.assertEqual(input_tape[0], input_tape[1])
self.assertEqual(input_tape[1], input_tape[2])
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1 @@
from gym.envs.atari.atari_env import AtariEnv

View File

@@ -0,0 +1,192 @@
import numpy as np
import os
import gym
from gym import error, spaces
from gym import utils
from gym.utils import seeding
try:
import atari_py
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)".format(e))
def to_ram(ale):
ram_size = ale.getRAMSize()
ram = np.zeros((ram_size),dtype=np.uint8)
ale.getRAM(ram)
return ram
class AtariEnv(gym.Env, utils.EzPickle):
metadata = {'render.modes': ['human', 'rgb_array']}
def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
"""Frameskip should be either a tuple (indicating a random range to
choose from, with the top value exclude), or an int."""
utils.EzPickle.__init__(self, game, obs_type, frameskip, repeat_action_probability)
assert obs_type in ('ram', 'image')
self.game_path = atari_py.get_game_path(game)
if not os.path.exists(self.game_path):
raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
self._obs_type = obs_type
self.frameskip = frameskip
self.ale = atari_py.ALEInterface()
self.viewer = None
# Tune (or disable) ALE's action repeat:
# https://github.com/openai/gym/issues/349
assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)
self.seed()
self._action_set = self.ale.getMinimalActionSet()
self.action_space = spaces.Discrete(len(self._action_set))
(screen_width,screen_height) = self.ale.getScreenDims()
if self._obs_type == 'ram':
self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
elif self._obs_type == 'image':
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
else:
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
def seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed. This gets passed as a uint, but gets
# checked as an int elsewhere, so we need to keep it below
# 2**31.
seed2 = seeding.hash_seed(seed1 + 1) % 2**31
# Empirically, we need to seed before loading the ROM.
self.ale.setInt(b'random_seed', seed2)
self.ale.loadROM(self.game_path)
return [seed1, seed2]
def step(self, a):
reward = 0.0
action = self._action_set[a]
if isinstance(self.frameskip, int):
num_steps = self.frameskip
else:
num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])
for _ in range(num_steps):
reward += self.ale.act(action)
ob = self._get_obs()
return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}
def _get_image(self):
return self.ale.getScreenRGB2()
def _get_ram(self):
return to_ram(self.ale)
@property
def _n_actions(self):
return len(self._action_set)
def _get_obs(self):
if self._obs_type == 'ram':
return self._get_ram()
elif self._obs_type == 'image':
img = self._get_image()
return img
# return: (states, observations)
def reset(self):
self.ale.reset_game()
return self._get_obs()
def render(self, mode='human'):
img = self._get_image()
if mode == 'rgb_array':
return img
elif mode == 'human':
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.SimpleImageViewer()
self.viewer.imshow(img)
return self.viewer.isopen
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
def get_action_meanings(self):
return [ACTION_MEANING[i] for i in self._action_set]
def get_keys_to_action(self):
KEYWORD_TO_KEY = {
'UP': ord('w'),
'DOWN': ord('s'),
'LEFT': ord('a'),
'RIGHT': ord('d'),
'FIRE': ord(' '),
}
keys_to_action = {}
for action_id, action_meaning in enumerate(self.get_action_meanings()):
keys = []
for keyword, key in KEYWORD_TO_KEY.items():
if keyword in action_meaning:
keys.append(key)
keys = tuple(sorted(keys))
assert keys not in keys_to_action
keys_to_action[keys] = action_id
return keys_to_action
def clone_state(self):
"""Clone emulator state w/o system state. Restoring this state will
*not* give an identical environment. For complete cloning and restoring
of the full state, see `{clone,restore}_full_state()`."""
state_ref = self.ale.cloneState()
state = self.ale.encodeState(state_ref)
self.ale.deleteState(state_ref)
return state
def restore_state(self, state):
"""Restore emulator state w/o system state."""
state_ref = self.ale.decodeState(state)
self.ale.restoreState(state_ref)
self.ale.deleteState(state_ref)
def clone_full_state(self):
"""Clone emulator state w/ system state including pseudorandomness.
Restoring this state will give an identical environment."""
state_ref = self.ale.cloneSystemState()
state = self.ale.encodeState(state_ref)
self.ale.deleteState(state_ref)
return state
def restore_full_state(self, state):
"""Restore emulator state w/ system state including pseudorandomness."""
state_ref = self.ale.decodeState(state)
self.ale.restoreSystemState(state_ref)
self.ale.deleteState(state_ref)
ACTION_MEANING = {
0 : "NOOP",
1 : "FIRE",
2 : "UP",
3 : "RIGHT",
4 : "LEFT",
5 : "DOWN",
6 : "UPRIGHT",
7 : "UPLEFT",
8 : "DOWNRIGHT",
9 : "DOWNLEFT",
10 : "UPFIRE",
11 : "RIGHTFIRE",
12 : "LEFTFIRE",
13 : "DOWNFIRE",
14 : "UPRIGHTFIRE",
15 : "UPLEFTFIRE",
16 : "DOWNRIGHTFIRE",
17 : "DOWNLEFTFIRE",
}

View File

@@ -0,0 +1,4 @@
from gym.envs.box2d.lunar_lander import LunarLander
from gym.envs.box2d.lunar_lander import LunarLanderContinuous
from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
from gym.envs.box2d.car_racing import CarRacing

View File

@@ -0,0 +1,581 @@
import sys, math
import numpy as np
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
import gym
from gym import spaces
from gym.utils import colorize, seeding, EzPickle
# This is simple 4-joints walker robot environment.
#
# There are two versions:
#
# - Normal, with slightly uneven terrain.
#
# - Hardcore with ladders, stumps, pitfalls.
#
# Reward is given for moving forward, total 300+ points up to the far end. If the robot falls,
# it gets -100. Applying motor torque costs a small amount of points, more optimal agent
# will get better score.
#
# Heuristic is provided for testing, it's also useful to get demonstrations to
# learn from. To run heuristic:
#
# python gym/envs/box2d/bipedal_walker.py
#
# State consists of hull angle speed, angular velocity, horizontal speed, vertical speed,
# position of joints and joints angular speed, legs contact with ground, and 10 lidar
# rangefinder measurements to help to deal with the hardcore version. There's no coordinates
# in the state vector. Lidar is less useful in normal version, but it works.
#
# To solve the game you need to get 300 points in 1600 time steps.
#
# To solve hardcore version you need 300 points in 2000 time steps.
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MOTORS_TORQUE = 80
SPEED_HIP = 4
SPEED_KNEE = 6
LIDAR_RANGE = 160/SCALE
INITIAL_RANDOM = 5
HULL_POLY =[
(-30,+9), (+6,+9), (+34,+1),
(+34,-8), (-30,-8)
]
LEG_DOWN = -8/SCALE
LEG_W, LEG_H = 8/SCALE, 34/SCALE
VIEWPORT_W = 600
VIEWPORT_H = 400
TERRAIN_STEP = 14/SCALE
TERRAIN_LENGTH = 200 # in steps
TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4
TERRAIN_GRASS = 10 # low long are grass spots, in steps
TERRAIN_STARTPAD = 20 # in steps
FRICTION = 2.5
HULL_FD = fixtureDef(
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]),
density=5.0,
friction=0.1,
categoryBits=0x0020,
maskBits=0x001, # collide only with ground
restitution=0.0) # 0.99 bouncy
LEG_FD = fixtureDef(
shape=polygonShape(box=(LEG_W/2, LEG_H/2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
LOWER_FD = fixtureDef(
shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body:
self.env.game_over = True
for leg in [self.env.legs[1], self.env.legs[3]]:
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
leg.ground_contact = True
def EndContact(self, contact):
for leg in [self.env.legs[1], self.env.legs[3]]:
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
leg.ground_contact = False
class BipedalWalker(gym.Env, EzPickle):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : FPS
}
hardcore = False
def __init__(self):
EzPickle.__init__(self)
self.seed()
self.viewer = None
self.world = Box2D.b2World()
self.terrain = None
self.hull = None
self.prev_shaping = None
self.fd_polygon = fixtureDef(
shape = polygonShape(vertices=
[(0, 0),
(1, 0),
(1, -1),
(0, -1)]),
friction = FRICTION)
self.fd_edge = fixtureDef(
shape = edgeShape(vertices=
[(0, 0),
(1, 1)]),
friction = FRICTION,
categoryBits=0x0001,
)
self.reset()
high = np.array([np.inf]*24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
self.observation_space = spaces.Box(-high, high)
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _destroy(self):
if not self.terrain: return
self.world.contactListener = None
for t in self.terrain:
self.world.DestroyBody(t)
self.terrain = []
self.world.DestroyBody(self.hull)
self.hull = None
for leg in self.legs:
self.world.DestroyBody(leg)
self.legs = []
self.joints = []
def _generate_terrain(self, hardcore):
GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
state = GRASS
velocity = 0.0
y = TERRAIN_HEIGHT
counter = TERRAIN_STARTPAD
oneshot = False
self.terrain = []
self.terrain_x = []
self.terrain_y = []
for i in range(TERRAIN_LENGTH):
x = i*TERRAIN_STEP
self.terrain_x.append(x)
if state==GRASS and not oneshot:
velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1)/SCALE #1
y += velocity
elif state==PIT and oneshot:
counter = self.np_random.randint(3, 5)
poly = [
(x, y),
(x+TERRAIN_STEP, y),
(x+TERRAIN_STEP, y-4*TERRAIN_STEP),
(x, y-4*TERRAIN_STEP),
]
self.fd_polygon.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
self.fd_polygon.shape.vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
counter += 2
original_y = y
elif state==PIT and not oneshot:
y = original_y
if counter > 1:
y -= 4*TERRAIN_STEP
elif state==STUMP and oneshot:
counter = self.np_random.randint(1, 3)
poly = [
(x, y),
(x+counter*TERRAIN_STEP, y),
(x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP),
(x, y+counter*TERRAIN_STEP),
]
self.fd_polygon.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
elif state==STAIRS and oneshot:
stair_height = +1 if self.np_random.rand() > 0.5 else -1
stair_width = self.np_random.randint(4, 5)
stair_steps = self.np_random.randint(3, 5)
original_y = y
for s in range(stair_steps):
poly = [
(x+( s*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
(x+((1+s)*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
(x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
(x+( s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
]
self.fd_polygon.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
counter = stair_steps*stair_width
elif state==STAIRS and not oneshot:
s = stair_steps*stair_width - counter - stair_height
n = s/stair_width
y = original_y + (n*stair_height)*TERRAIN_STEP
oneshot = False
self.terrain_y.append(y)
counter -= 1
if counter==0:
counter = self.np_random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
if state==GRASS and hardcore:
state = self.np_random.randint(1, _STATES_)
oneshot = True
else:
state = GRASS
oneshot = True
self.terrain_poly = []
for i in range(TERRAIN_LENGTH-1):
poly = [
(self.terrain_x[i], self.terrain_y[i]),
(self.terrain_x[i+1], self.terrain_y[i+1])
]
self.fd_edge.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_edge)
color = (0.3, 1.0 if i%2==0 else 0.8, 0.3)
t.color1 = color
t.color2 = color
self.terrain.append(t)
color = (0.4, 0.6, 0.3)
poly += [ (poly[1][0], 0), (poly[0][0], 0) ]
self.terrain_poly.append( (poly, color) )
self.terrain.reverse()
def _generate_clouds(self):
# Sorry for the clouds, couldn't resist
self.cloud_poly = []
for i in range(TERRAIN_LENGTH//20):
x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
y = VIEWPORT_H/SCALE*3/4
poly = [
(x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
for a in range(5) ]
x1 = min( [p[0] for p in poly] )
x2 = max( [p[0] for p in poly] )
self.cloud_poly.append( (poly,x1,x2) )
def reset(self):
self._destroy()
self.world.contactListener_bug_workaround = ContactDetector(self)
self.world.contactListener = self.world.contactListener_bug_workaround
self.game_over = False
self.prev_shaping = None
self.scroll = 0.0
self.lidar_render = 0
W = VIEWPORT_W/SCALE
H = VIEWPORT_H/SCALE
self._generate_terrain(self.hardcore)
self._generate_clouds()
init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2
init_y = TERRAIN_HEIGHT+2*LEG_H
self.hull = self.world.CreateDynamicBody(
position = (init_x, init_y),
fixtures = HULL_FD
)
self.hull.color1 = (0.5,0.4,0.9)
self.hull.color2 = (0.3,0.3,0.5)
self.hull.ApplyForceToCenter((self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
self.legs = []
self.joints = []
for i in [-1,+1]:
leg = self.world.CreateDynamicBody(
position = (init_x, init_y - LEG_H/2 - LEG_DOWN),
angle = (i*0.05),
fixtures = LEG_FD
)
leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=leg,
localAnchorA=(0, LEG_DOWN),
localAnchorB=(0, LEG_H/2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed = i,
lowerAngle = -0.8,
upperAngle = 1.1,
)
self.legs.append(leg)
self.joints.append(self.world.CreateJoint(rjd))
lower = self.world.CreateDynamicBody(
position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN),
angle = (i*0.05),
fixtures = LOWER_FD
)
lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
rjd = revoluteJointDef(
bodyA=leg,
bodyB=lower,
localAnchorA=(0, -LEG_H/2),
localAnchorB=(0, LEG_H/2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed = 1,
lowerAngle = -1.6,
upperAngle = -0.1,
)
lower.ground_contact = False
self.legs.append(lower)
self.joints.append(self.world.CreateJoint(rjd))
self.drawlist = self.terrain + self.legs + [self.hull]
class LidarCallback(Box2D.b2.rayCastCallback):
def ReportFixture(self, fixture, point, normal, fraction):
if (fixture.filterData.categoryBits & 1) == 0:
return 1
self.p2 = point
self.fraction = fraction
return 0
self.lidar = [LidarCallback() for _ in range(10)]
return self.step(np.array([0,0,0,0]))[0]
def step(self, action):
#self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
control_speed = False # Should be easier as well
if control_speed:
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
else:
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
self.joints[0].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
self.joints[1].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
self.joints[2].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
self.joints[3].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1))
self.world.Step(1.0/FPS, 6*30, 2*30)
pos = self.hull.position
vel = self.hull.linearVelocity
for i in range(10):
self.lidar[i].fraction = 1.0
self.lidar[i].p1 = pos
self.lidar[i].p2 = (
pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE,
pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE)
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
state = [
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
2.0*self.hull.angularVelocity/FPS,
0.3*vel.x*(VIEWPORT_W/SCALE)/FPS, # Normalized to get -1..1 range
0.3*vel.y*(VIEWPORT_H/SCALE)/FPS,
self.joints[0].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
self.joints[0].speed / SPEED_HIP,
self.joints[1].angle + 1.0,
self.joints[1].speed / SPEED_KNEE,
1.0 if self.legs[1].ground_contact else 0.0,
self.joints[2].angle,
self.joints[2].speed / SPEED_HIP,
self.joints[3].angle + 1.0,
self.joints[3].speed / SPEED_KNEE,
1.0 if self.legs[3].ground_contact else 0.0
]
state += [l.fraction for l in self.lidar]
assert len(state)==24
self.scroll = pos.x - VIEWPORT_W/SCALE/5
shaping = 130*pos[0]/SCALE # moving forward is a way to receive reward (normalized to get 300 on completion)
shaping -= 5.0*abs(state[0]) # keep head straight, other than that and falling, any behavior is unpunished
reward = 0
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
for a in action:
reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
# normalized to about -50.0 using heuristic, more optimal agent should spend less
done = False
if self.game_over or pos[0] < 0:
reward = -100
done = True
if pos[0] > (TERRAIN_LENGTH-TERRAIN_GRASS)*TERRAIN_STEP:
done = True
return np.array(state), reward, done, {}
def render(self, mode='human'):
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE)
self.viewer.draw_polygon( [
(self.scroll, 0),
(self.scroll+VIEWPORT_W/SCALE, 0),
(self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE),
(self.scroll, VIEWPORT_H/SCALE),
], color=(0.9, 0.9, 1.0) )
for poly,x1,x2 in self.cloud_poly:
if x2 < self.scroll/2: continue
if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue
self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1))
for poly, color in self.terrain_poly:
if poly[1][0] < self.scroll: continue
if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue
self.viewer.draw_polygon(poly, color=color)
self.lidar_render = (self.lidar_render+1) % 100
i = self.lidar_render
if i < 2*len(self.lidar):
l = self.lidar[i] if i < len(self.lidar) else self.lidar[len(self.lidar)-i-1]
self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 )
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
t = rendering.Transform(translation=trans*f.shape.pos)
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
else:
path = [trans*v for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1)
path.append(path[0])
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
flagy1 = TERRAIN_HEIGHT
flagy2 = flagy1 + 50/SCALE
x = TERRAIN_STEP*3
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 )
f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)]
self.viewer.draw_polygon(f, color=(0.9,0.2,0) )
self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 )
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
class BipedalWalkerHardcore(BipedalWalker):
hardcore = True
if __name__=="__main__":
# Heurisic: suboptimal, have no notion of balance.
env = BipedalWalker()
env.reset()
steps = 0
total_reward = 0
a = np.array([0.0, 0.0, 0.0, 0.0])
STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1,2,3
SPEED = 0.29 # Will fall forward on higher speed
state = STAY_ON_ONE_LEG
moving_leg = 0
supporting_leg = 1 - moving_leg
SUPPORT_KNEE_ANGLE = +0.1
supporting_knee_angle = SUPPORT_KNEE_ANGLE
while True:
s, r, done, info = env.step(a)
total_reward += r
if steps % 20 == 0 or done:
print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4] ]))
print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9] ]))
print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]]))
steps += 1
contact0 = s[8]
contact1 = s[13]
moving_s_base = 4 + 5*moving_leg
supporting_s_base = 4 + 5*supporting_leg
hip_targ = [None,None] # -0.8 .. +1.1
knee_targ = [None,None] # -0.6 .. +0.9
hip_todo = [0.0, 0.0]
knee_todo = [0.0, 0.0]
if state==STAY_ON_ONE_LEG:
hip_targ[moving_leg] = 1.1
knee_targ[moving_leg] = -0.6
supporting_knee_angle += 0.03
if s[2] > SPEED: supporting_knee_angle += 0.03
supporting_knee_angle = min( supporting_knee_angle, SUPPORT_KNEE_ANGLE )
knee_targ[supporting_leg] = supporting_knee_angle
if s[supporting_s_base+0] < 0.10: # supporting leg is behind
state = PUT_OTHER_DOWN
if state==PUT_OTHER_DOWN:
hip_targ[moving_leg] = +0.1
knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
knee_targ[supporting_leg] = supporting_knee_angle
if s[moving_s_base+4]:
state = PUSH_OFF
supporting_knee_angle = min( s[moving_s_base+2], SUPPORT_KNEE_ANGLE )
if state==PUSH_OFF:
knee_targ[moving_leg] = supporting_knee_angle
knee_targ[supporting_leg] = +1.0
if s[supporting_s_base+2] > 0.88 or s[2] > 1.2*SPEED:
state = STAY_ON_ONE_LEG
moving_leg = 1 - moving_leg
supporting_leg = 1 - moving_leg
if hip_targ[0]: hip_todo[0] = 0.9*(hip_targ[0] - s[4]) - 0.25*s[5]
if hip_targ[1]: hip_todo[1] = 0.9*(hip_targ[1] - s[9]) - 0.25*s[10]
if knee_targ[0]: knee_todo[0] = 4.0*(knee_targ[0] - s[6]) - 0.25*s[7]
if knee_targ[1]: knee_todo[1] = 4.0*(knee_targ[1] - s[11]) - 0.25*s[12]
hip_todo[0] -= 0.9*(0-s[0]) - 1.5*s[1] # PID to keep head strait
hip_todo[1] -= 0.9*(0-s[0]) - 1.5*s[1]
knee_todo[0] -= 15.0*s[3] # vertical speed, to damp oscillations
knee_todo[1] -= 15.0*s[3]
a[0] = hip_todo[0]
a[1] = knee_todo[0]
a[2] = hip_todo[1]
a[3] = knee_todo[1]
a = np.clip(0.5*a, -1.0, 1.0)
env.render()
if done: break

View File

@@ -0,0 +1,244 @@
import numpy as np
import math
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
# Top-down car dynamics simulation.
#
# Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
# This simulation is a bit more detailed, with wheels rotation.
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
SIZE = 0.02
ENGINE_POWER = 100000000*SIZE*SIZE
WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
FRICTION_LIMIT = 1000000*SIZE*SIZE # friction ~= mass ~= size^2 (calculated implicitly using density)
WHEEL_R = 27
WHEEL_W = 14
WHEELPOS = [
(-55,+80), (+55,+80),
(-55,-82), (+55,-82)
]
HULL_POLY1 =[
(-60,+130), (+60,+130),
(+60,+110), (-60,+110)
]
HULL_POLY2 =[
(-15,+120), (+15,+120),
(+20, +20), (-20, 20)
]
HULL_POLY3 =[
(+25, +20),
(+50, -10),
(+50, -40),
(+20, -90),
(-20, -90),
(-50, -40),
(-50, -10),
(-25, +20)
]
HULL_POLY4 =[
(-50,-120), (+50,-120),
(+50,-90), (-50,-90)
]
WHEEL_COLOR = (0.0,0.0,0.0)
WHEEL_WHITE = (0.3,0.3,0.3)
MUD_COLOR = (0.4,0.4,0.0)
class Car:
def __init__(self, world, init_angle, init_x, init_y):
self.world = world
self.hull = self.world.CreateDynamicBody(
position = (init_x, init_y),
angle = init_angle,
fixtures = [
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY1 ]), density=1.0),
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY2 ]), density=1.0),
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY3 ]), density=1.0),
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY4 ]), density=1.0)
]
)
self.hull.color = (0.8,0.0,0.0)
self.wheels = []
self.fuel_spent = 0.0
WHEEL_POLY = [
(-WHEEL_W,+WHEEL_R), (+WHEEL_W,+WHEEL_R),
(+WHEEL_W,-WHEEL_R), (-WHEEL_W,-WHEEL_R)
]
for wx,wy in WHEELPOS:
front_k = 1.0 if wy > 0 else 1.0
w = self.world.CreateDynamicBody(
position = (init_x+wx*SIZE, init_y+wy*SIZE),
angle = init_angle,
fixtures = fixtureDef(
shape=polygonShape(vertices=[ (x*front_k*SIZE,y*front_k*SIZE) for x,y in WHEEL_POLY ]),
density=0.1,
categoryBits=0x0020,
maskBits=0x001,
restitution=0.0)
)
w.wheel_rad = front_k*WHEEL_R*SIZE
w.color = WHEEL_COLOR
w.gas = 0.0
w.brake = 0.0
w.steer = 0.0
w.phase = 0.0 # wheel angle
w.omega = 0.0 # angular velocity
w.skid_start = None
w.skid_particle = None
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=w,
localAnchorA=(wx*SIZE,wy*SIZE),
localAnchorB=(0,0),
enableMotor=True,
enableLimit=True,
maxMotorTorque=180*900*SIZE*SIZE,
motorSpeed = 0,
lowerAngle = -0.4,
upperAngle = +0.4,
)
w.joint = self.world.CreateJoint(rjd)
w.tiles = set()
w.userData = w
self.wheels.append(w)
self.drawlist = self.wheels + [self.hull]
self.particles = []
def gas(self, gas):
'control: rear wheel drive'
gas = np.clip(gas, 0, 1)
for w in self.wheels[2:4]:
diff = gas - w.gas
if diff > 0.1: diff = 0.1 # gradually increase, but stop immediately
w.gas += diff
def brake(self, b):
'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation'
for w in self.wheels:
w.brake = b
def steer(self, s):
'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position'
self.wheels[0].steer = s
self.wheels[1].steer = s
def step(self, dt):
for w in self.wheels:
# Steer each wheel
dir = np.sign(w.steer - w.joint.angle)
val = abs(w.steer - w.joint.angle)
w.joint.motorSpeed = dir*min(50.0*val, 3.0)
# Position => friction_limit
grass = True
friction_limit = FRICTION_LIMIT*0.6 # Grass friction if no tile
for tile in w.tiles:
friction_limit = max(friction_limit, FRICTION_LIMIT*tile.road_friction)
grass = False
# Force
forw = w.GetWorldVector( (0,1) )
side = w.GetWorldVector( (1,0) )
v = w.linearVelocity
vf = forw[0]*v[0] + forw[1]*v[1] # forward speed
vs = side[0]*v[0] + side[1]*v[1] # side speed
# WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
# WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
# domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0) # small coef not to divide by zero
self.fuel_spent += dt*ENGINE_POWER*w.gas
if w.brake >= 0.9:
w.omega = 0
elif w.brake > 0:
BRAKE_FORCE = 15 # radians per second
dir = -np.sign(w.omega)
val = BRAKE_FORCE*w.brake
if abs(val) > abs(w.omega): val = abs(w.omega) # low speed => same as = 0
w.omega += dir*val
w.phase += w.omega*dt
vr = w.omega*w.wheel_rad # rotating wheel speed
f_force = -vf + vr # force direction is direction of speed difference
p_force = -vs
# Physically correct is to always apply friction_limit until speed is equal.
# But dt is finite, that will lead to oscillations if difference is already near zero.
f_force *= 205000*SIZE*SIZE # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
p_force *= 205000*SIZE*SIZE
force = np.sqrt(np.square(f_force) + np.square(p_force))
# Skid trace
if abs(force) > 2.0*friction_limit:
if w.skid_particle and w.skid_particle.grass==grass and len(w.skid_particle.poly) < 30:
w.skid_particle.poly.append( (w.position[0], w.position[1]) )
elif w.skid_start is None:
w.skid_start = w.position
else:
w.skid_particle = self._create_particle( w.skid_start, w.position, grass )
w.skid_start = None
else:
w.skid_start = None
w.skid_particle = None
if abs(force) > friction_limit:
f_force /= force
p_force /= force
force = friction_limit # Correct physics here
f_force *= force
p_force *= force
w.omega -= dt*f_force*w.wheel_rad/WHEEL_MOMENT_OF_INERTIA
w.ApplyForceToCenter( (
p_force*side[0] + f_force*forw[0],
p_force*side[1] + f_force*forw[1]), True )
def draw(self, viewer, draw_particles=True):
if draw_particles:
for p in self.particles:
viewer.draw_polyline(p.poly, color=p.color, linewidth=5)
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
path = [trans*v for v in f.shape.vertices]
viewer.draw_polygon(path, color=obj.color)
if "phase" not in obj.__dict__: continue
a1 = obj.phase
a2 = obj.phase + 1.2 # radians
s1 = math.sin(a1)
s2 = math.sin(a2)
c1 = math.cos(a1)
c2 = math.cos(a2)
if s1>0 and s2>0: continue
if s1>0: c1 = np.sign(c1)
if s2>0: c2 = np.sign(c2)
white_poly = [
(-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
(+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
]
viewer.draw_polygon([trans*v for v in white_poly], color=WHEEL_WHITE)
def _create_particle(self, point1, point2, grass):
class Particle:
pass
p = Particle()
p.color = WHEEL_COLOR if not grass else MUD_COLOR
p.ttl = 1
p.poly = [(point1[0],point1[1]), (point2[0],point2[1])]
p.grass = grass
self.particles.append(p)
while len(self.particles) > 30:
self.particles.pop(0)
return p
def destroy(self):
self.world.DestroyBody(self.hull)
self.hull = None
for w in self.wheels:
self.world.DestroyBody(w)
self.wheels = []

View File

@@ -0,0 +1,498 @@
import sys, math
import numpy as np
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
import gym
from gym import spaces
from gym.envs.box2d.car_dynamics import Car
from gym.utils import colorize, seeding, EzPickle
import pyglet
from pyglet import gl
# Easiest continuous control task to learn from pixels, a top-down racing environment.
# Discreet control is reasonable in this environment as well, on/off discretisation is
# fine.
#
# State consists of STATE_W x STATE_H pixels.
#
# Reward is -0.1 every frame and +1000/N for every track tile visited, where N is
# the total number of tiles in track. For example, if you have finished in 732 frames,
# your reward is 1000 - 0.1*732 = 926.8 points.
#
# Game is solved when agent consistently gets 900+ points. Track is random every episode.
#
# Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that
# is far off the track, then it will get -100 and die.
#
# Some indicators shown at the bottom of the window and the state RGB buffer. From
# left to right: true speed, four ABS sensors, steering wheel position, gyroscope.
#
# To play yourself (it's rather fast for humans), type:
#
# python gym/envs/box2d/car_racing.py
#
# Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the
# same time.
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
STATE_W = 96 # less than Atari 160x192
STATE_H = 96
VIDEO_W = 600
VIDEO_H = 400
WINDOW_W = 1200
WINDOW_H = 1000
SCALE = 6.0 # Track scale
TRACK_RAD = 900/SCALE # Track is heavily morphed circle with this radius
PLAYFIELD = 2000/SCALE # Game over boundary
FPS = 50
ZOOM = 2.7 # Camera zoom
ZOOM_FOLLOW = True # Set to False for fixed view (don't use zoom)
TRACK_DETAIL_STEP = 21/SCALE
TRACK_TURN_RATE = 0.31
TRACK_WIDTH = 40/SCALE
BORDER = 8/SCALE
BORDER_MIN_COUNT = 4
ROAD_COLOR = [0.4, 0.4, 0.4]
class FrictionDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
self._contact(contact, True)
def EndContact(self, contact):
self._contact(contact, False)
def _contact(self, contact, begin):
tile = None
obj = None
u1 = contact.fixtureA.body.userData
u2 = contact.fixtureB.body.userData
if u1 and "road_friction" in u1.__dict__:
tile = u1
obj = u2
if u2 and "road_friction" in u2.__dict__:
tile = u2
obj = u1
if not tile: return
tile.color[0] = ROAD_COLOR[0]
tile.color[1] = ROAD_COLOR[1]
tile.color[2] = ROAD_COLOR[2]
if not obj or "tiles" not in obj.__dict__: return
if begin:
obj.tiles.add(tile)
#print tile.road_friction, "ADD", len(obj.tiles)
if not tile.road_visited:
tile.road_visited = True
self.env.reward += 1000.0/len(self.env.track)
self.env.tile_visited_count += 1
else:
obj.tiles.remove(tile)
#print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works)
class CarRacing(gym.Env, EzPickle):
metadata = {
'render.modes': ['human', 'rgb_array', 'state_pixels'],
'video.frames_per_second' : FPS
}
def __init__(self):
EzPickle.__init__(self)
self.seed()
self.contactListener_keepref = FrictionDetector(self)
self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
self.viewer = None
self.invisible_state_window = None
self.invisible_video_window = None
self.road = None
self.car = None
self.reward = 0.0
self.prev_reward = 0.0
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _destroy(self):
if not self.road: return
for t in self.road:
self.world.DestroyBody(t)
self.road = []
self.car.destroy()
def _create_track(self):
CHECKPOINTS = 12
# Create checkpoints
checkpoints = []
for c in range(CHECKPOINTS):
alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
if c==0:
alpha = 0
rad = 1.5*TRACK_RAD
if c==CHECKPOINTS-1:
alpha = 2*math.pi*c/CHECKPOINTS
self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
rad = 1.5*TRACK_RAD
checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
#print "\n".join(str(h) for h in checkpoints)
#self.road_poly = [ ( # uncomment this to see checkpoints
# [ (tx,ty) for a,tx,ty in checkpoints ],
# (0.7,0.7,0.9) ) ]
self.road = []
# Go from one checkpoint to another to create track
x, y, beta = 1.5*TRACK_RAD, 0, 0
dest_i = 0
laps = 0
track = []
no_freeze = 2500
visited_other_side = False
while 1:
alpha = math.atan2(y, x)
if visited_other_side and alpha > 0:
laps += 1
visited_other_side = False
if alpha < 0:
visited_other_side = True
alpha += 2*math.pi
while True: # Find destination from checkpoints
failed = True
while True:
dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
if alpha <= dest_alpha:
failed = False
break
dest_i += 1
if dest_i % len(checkpoints) == 0: break
if not failed: break
alpha -= 2*math.pi
continue
r1x = math.cos(beta)
r1y = math.sin(beta)
p1x = -r1y
p1y = r1x
dest_dx = dest_x - x # vector towards destination
dest_dy = dest_y - y
proj = r1x*dest_dx + r1y*dest_dy # destination vector projected on rad
while beta - alpha > 1.5*math.pi: beta -= 2*math.pi
while beta - alpha < -1.5*math.pi: beta += 2*math.pi
prev_beta = beta
proj *= SCALE
if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj))
x += p1x*TRACK_DETAIL_STEP
y += p1y*TRACK_DETAIL_STEP
track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
if laps > 4: break
no_freeze -= 1
if no_freeze==0: break
#print "\n".join([str(t) for t in enumerate(track)])
# Find closed loop range i1..i2, first loop should be ignored, second is OK
i1, i2 = -1, -1
i = len(track)
while True:
i -= 1
if i==0: return False # Failed
pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
if pass_through_start and i2==-1:
i2 = i
elif pass_through_start and i1==-1:
i1 = i
break
print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
assert i1!=-1
assert i2!=-1
track = track[i1:i2-1]
first_beta = track[0][1]
first_perp_x = math.cos(first_beta)
first_perp_y = math.sin(first_beta)
# Length of perpendicular jump to put together head and tail
well_glued_together = np.sqrt(
np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
if well_glued_together > TRACK_DETAIL_STEP:
return False
# Red-white border on hard turns
border = [False]*len(track)
for i in range(len(track)):
good = True
oneside = 0
for neg in range(BORDER_MIN_COUNT):
beta1 = track[i-neg-0][1]
beta2 = track[i-neg-1][1]
good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
oneside += np.sign(beta1 - beta2)
good &= abs(oneside) == BORDER_MIN_COUNT
border[i] = good
for i in range(len(track)):
for neg in range(BORDER_MIN_COUNT):
border[i-neg] |= border[i]
# Create tiles
for i in range(len(track)):
alpha1, beta1, x1, y1 = track[i]
alpha2, beta2, x2, y2 = track[i-1]
road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1))
road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1))
road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2))
road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2))
t = self.world.CreateStaticBody( fixtures = fixtureDef(
shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l])
))
t.userData = t
c = 0.01*(i%3)
t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
t.road_visited = False
t.road_friction = 1.0
t.fixtures[0].sensor = True
self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color ))
self.road.append(t)
if border[i]:
side = np.sign(beta2 - beta1)
b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1))
b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2))
b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
self.track = track
return True
def reset(self):
self._destroy()
self.reward = 0.0
self.prev_reward = 0.0
self.tile_visited_count = 0
self.t = 0.0
self.road_poly = []
self.human_render = False
while True:
success = self._create_track()
if success: break
print("retry to generate track (normal if there are not many of this messages)")
self.car = Car(self.world, *self.track[0][1:4])
return self.step(None)[0]
def step(self, action):
if action is not None:
self.car.steer(-action[0])
self.car.gas(action[1])
self.car.brake(action[2])
self.car.step(1.0/FPS)
self.world.Step(1.0/FPS, 6*30, 2*30)
self.t += 1.0/FPS
self.state = self.render("state_pixels")
step_reward = 0
done = False
if action is not None: # First step without action, called from reset()
self.reward -= 0.1
# We actually don't want to count fuel spent, we want car to be faster.
#self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER
self.car.fuel_spent = 0.0
step_reward = self.reward - self.prev_reward
self.prev_reward = self.reward
if self.tile_visited_count==len(self.track):
done = True
x, y = self.car.hull.position
if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
done = True
step_reward = -100
return self.state, step_reward, done, {}
def render(self, mode='human'):
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
self.score_label = pyglet.text.Label('0000', font_size=36,
x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center',
color=(255,255,255,255))
self.transform = rendering.Transform()
if "t" not in self.__dict__: return # reset() not called yet
zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second
zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W
zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W
scroll_x = self.car.hull.position[0]
scroll_y = self.car.hull.position[1]
angle = -self.car.hull.angle
vel = self.car.hull.linearVelocity
if np.linalg.norm(vel) > 0.5:
angle = math.atan2(vel[0], vel[1])
self.transform.set_scale(zoom, zoom)
self.transform.set_translation(
WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
self.transform.set_rotation(angle)
self.car.draw(self.viewer, mode!="state_pixels")
arr = None
win = self.viewer.window
if mode != 'state_pixels':
win.switch_to()
win.dispatch_events()
if mode=="rgb_array" or mode=="state_pixels":
win.clear()
t = self.transform
if mode=='rgb_array':
VP_W = VIDEO_W
VP_H = VIDEO_H
else:
VP_W = STATE_W
VP_H = STATE_H
gl.glViewport(0, 0, VP_W, VP_H)
t.enable()
self.render_road()
for geom in self.viewer.onetime_geoms:
geom.render()
t.disable()
self.render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf
image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
arr = arr.reshape(VP_H, VP_W, 4)
arr = arr[::-1, :, 0:3]
if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video.
win.flip()
if mode=='human':
self.human_render = True
win.clear()
t = self.transform
gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
t.enable()
self.render_road()
for geom in self.viewer.onetime_geoms:
geom.render()
t.disable()
self.render_indicators(WINDOW_W, WINDOW_H)
win.flip()
self.viewer.onetime_geoms = []
return arr
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
def render_road(self):
gl.glBegin(gl.GL_QUADS)
gl.glColor4f(0.4, 0.8, 0.4, 1.0)
gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
gl.glColor4f(0.4, 0.9, 0.4, 1.0)
k = PLAYFIELD/20.0
for x in range(-20, 20, 2):
for y in range(-20, 20, 2):
gl.glVertex3f(k*x + k, k*y + 0, 0)
gl.glVertex3f(k*x + 0, k*y + 0, 0)
gl.glVertex3f(k*x + 0, k*y + k, 0)
gl.glVertex3f(k*x + k, k*y + k, 0)
for poly, color in self.road_poly:
gl.glColor4f(color[0], color[1], color[2], 1)
for p in poly:
gl.glVertex3f(p[0], p[1], 0)
gl.glEnd()
def render_indicators(self, W, H):
gl.glBegin(gl.GL_QUADS)
s = W/40.0
h = H/40.0
gl.glColor4f(0,0,0,1)
gl.glVertex3f(W, 0, 0)
gl.glVertex3f(W, 5*h, 0)
gl.glVertex3f(0, 5*h, 0)
gl.glVertex3f(0, 0, 0)
def vertical_ind(place, val, color):
gl.glColor4f(color[0], color[1], color[2], 1)
gl.glVertex3f((place+0)*s, h + h*val, 0)
gl.glVertex3f((place+1)*s, h + h*val, 0)
gl.glVertex3f((place+1)*s, h, 0)
gl.glVertex3f((place+0)*s, h, 0)
def horiz_ind(place, val, color):
gl.glColor4f(color[0], color[1], color[2], 1)
gl.glVertex3f((place+0)*s, 4*h , 0)
gl.glVertex3f((place+val)*s, 4*h, 0)
gl.glVertex3f((place+val)*s, 2*h, 0)
gl.glVertex3f((place+0)*s, 2*h, 0)
true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
vertical_ind(5, 0.02*true_speed, (1,1,1))
vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
gl.glEnd()
self.score_label.text = "%04i" % self.reward
self.score_label.draw()
if __name__=="__main__":
from pyglet.window import key
a = np.array( [0.0, 0.0, 0.0] )
def key_press(k, mod):
global restart
if k==0xff0d: restart = True
if k==key.LEFT: a[0] = -1.0
if k==key.RIGHT: a[0] = +1.0
if k==key.UP: a[1] = +1.0
if k==key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
def key_release(k, mod):
if k==key.LEFT and a[0]==-1.0: a[0] = 0
if k==key.RIGHT and a[0]==+1.0: a[0] = 0
if k==key.UP: a[1] = 0
if k==key.DOWN: a[2] = 0
env = CarRacing()
env.render()
record_video = False
if record_video:
env.monitor.start('/tmp/video-test', force=True)
env.viewer.window.on_key_press = key_press
env.viewer.window.on_key_release = key_release
while True:
env.reset()
total_reward = 0.0
steps = 0
restart = False
while True:
s, r, done, info = env.step(a)
total_reward += r
if steps % 200 == 0 or done:
print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
#import matplotlib.pyplot as plt
#plt.imshow(s)
#plt.savefig("test.jpeg")
steps += 1
if not record_video: # Faster, but you can as well call env.render() every time to play full window.
env.render()
if done or restart: break
env.close()

View File

@@ -0,0 +1,420 @@
import sys, math
import numpy as np
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
import gym
from gym import spaces
from gym.utils import seeding, EzPickle
# Rocket trajectory optimization is a classic topic in Optimal Control.
#
# According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
# turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
#
# Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
# Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
# If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
# comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
# engine is -0.3 points each frame. Solved is 200 points.
#
# Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
# on its first attempt. Please see source code for details.
#
# Too see heuristic landing, run:
#
# python gym/envs/box2d/lunar_lander.py
#
# To play yourself, run:
#
# python examples/agents/keyboard_agent.py LunarLander-v2
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MAIN_ENGINE_POWER = 13.0
SIDE_ENGINE_POWER = 0.6
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
LANDER_POLY =[
(-14,+17), (-17,0), (-17,-10),
(+17,-10), (+17,0), (+14,+17)
]
LEG_AWAY = 20
LEG_DOWN = 18
LEG_W, LEG_H = 2, 8
LEG_SPRING_TORQUE = 40
SIDE_ENGINE_HEIGHT = 14.0
SIDE_ENGINE_AWAY = 12.0
VIEWPORT_W = 600
VIEWPORT_H = 400
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if self.env.lander==contact.fixtureA.body or self.env.lander==contact.fixtureB.body:
self.env.game_over = True
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = True
def EndContact(self, contact):
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = False
class LunarLander(gym.Env, EzPickle):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : FPS
}
continuous = False
def __init__(self):
EzPickle.__init__(self)
self.seed()
self.viewer = None
self.world = Box2D.b2World()
self.moon = None
self.lander = None
self.particles = []
self.prev_reward = None
# useful range is -1 .. +1, but spikes can be higher
self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)
if self.continuous:
# Action is two floats [main engine, left-right engines].
# Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
# Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
self.action_space = spaces.Box(-1, +1, (2,), dtype=np.float32)
else:
# Nop, fire left engine, main engine, right engine
self.action_space = spaces.Discrete(4)
self.reset()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _destroy(self):
if not self.moon: return
self.world.contactListener = None
self._clean_particles(True)
self.world.DestroyBody(self.moon)
self.moon = None
self.world.DestroyBody(self.lander)
self.lander = None
self.world.DestroyBody(self.legs[0])
self.world.DestroyBody(self.legs[1])
def reset(self):
self._destroy()
self.world.contactListener_keepref = ContactDetector(self)
self.world.contactListener = self.world.contactListener_keepref
self.game_over = False
self.prev_shaping = None
W = VIEWPORT_W/SCALE
H = VIEWPORT_H/SCALE
# terrain
CHUNKS = 11
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
self.helipad_x1 = chunk_x[CHUNKS//2-1]
self.helipad_x2 = chunk_x[CHUNKS//2+1]
self.helipad_y = H/4
height[CHUNKS//2-2] = self.helipad_y
height[CHUNKS//2-1] = self.helipad_y
height[CHUNKS//2+0] = self.helipad_y
height[CHUNKS//2+1] = self.helipad_y
height[CHUNKS//2+2] = self.helipad_y
smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
self.moon = self.world.CreateStaticBody( shapes=edgeShape(vertices=[(0, 0), (W, 0)]) )
self.sky_polys = []
for i in range(CHUNKS-1):
p1 = (chunk_x[i], smooth_y[i])
p2 = (chunk_x[i+1], smooth_y[i+1])
self.moon.CreateEdgeFixture(
vertices=[p1,p2],
density=0,
friction=0.1)
self.sky_polys.append( [p1, p2, (p2[0],H), (p1[0],H)] )
self.moon.color1 = (0.0,0.0,0.0)
self.moon.color2 = (0.0,0.0,0.0)
initial_y = VIEWPORT_H/SCALE
self.lander = self.world.CreateDynamicBody(
position = (VIEWPORT_W/SCALE/2, initial_y),
angle=0.0,
fixtures = fixtureDef(
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in LANDER_POLY ]),
density=5.0,
friction=0.1,
categoryBits=0x0010,
maskBits=0x001, # collide only with ground
restitution=0.0) # 0.99 bouncy
)
self.lander.color1 = (0.5,0.4,0.9)
self.lander.color2 = (0.3,0.3,0.5)
self.lander.ApplyForceToCenter( (
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
), True)
self.legs = []
for i in [-1,+1]:
leg = self.world.CreateDynamicBody(
position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
angle = (i*0.05),
fixtures = fixtureDef(
shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
)
leg.ground_contact = False
leg.color1 = (0.5,0.4,0.9)
leg.color2 = (0.3,0.3,0.5)
rjd = revoluteJointDef(
bodyA=self.lander,
bodyB=leg,
localAnchorA=(0, 0),
localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE),
enableMotor=True,
enableLimit=True,
maxMotorTorque=LEG_SPRING_TORQUE,
motorSpeed=+0.3*i # low enough not to jump back into the sky
)
if i==-1:
rjd.lowerAngle = +0.9 - 0.5 # Yes, the most esoteric numbers here, angles legs have freedom to travel within
rjd.upperAngle = +0.9
else:
rjd.lowerAngle = -0.9
rjd.upperAngle = -0.9 + 0.5
leg.joint = self.world.CreateJoint(rjd)
self.legs.append(leg)
self.drawlist = [self.lander] + self.legs
return self.step(np.array([0,0]) if self.continuous else 0)[0]
def _create_particle(self, mass, x, y, ttl):
p = self.world.CreateDynamicBody(
position = (x,y),
angle=0.0,
fixtures = fixtureDef(
shape=circleShape(radius=2/SCALE, pos=(0,0)),
density=mass,
friction=0.1,
categoryBits=0x0100,
maskBits=0x001, # collide only with ground
restitution=0.3)
)
p.ttl = ttl
self.particles.append(p)
self._clean_particles(False)
return p
def _clean_particles(self, all):
while self.particles and (all or self.particles[0].ttl<0):
self.world.DestroyBody(self.particles.pop(0))
def step(self, action):
if self.continuous:
action = np.clip(action, -1, +1).astype(np.float32)
else:
assert self.action_space.contains(action), "%r (%s) invalid " % (action, type(action))
# Engines
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
side = (-tip[1], tip[0]);
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
m_power = 0.0
if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
# Main engine
if self.continuous:
m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0
assert m_power>=0.5 and m_power <= 1.0
else:
m_power = 1.0
ox = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power) # particles are just a decoration, 3.5 is here to make particle speed adequate
p.ApplyLinearImpulse( ( ox*MAIN_ENGINE_POWER*m_power, oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
s_power = 0.0
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
# Orientation engines
if self.continuous:
direction = np.sign(action[1])
s_power = np.clip(np.abs(action[1]), 0.5,1.0)
assert s_power>=0.5 and s_power <= 1.0
else:
direction = action-2
s_power = 1.0
ox = tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
p.ApplyLinearImpulse( ( ox*SIDE_ENGINE_POWER*s_power, oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
self.world.Step(1.0/FPS, 6*30, 2*30)
pos = self.lander.position
vel = self.lander.linearVelocity
state = [
(pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
(pos.y - (self.helipad_y+LEG_DOWN/SCALE)) / (VIEWPORT_H/SCALE/2),
vel.x*(VIEWPORT_W/SCALE/2)/FPS,
vel.y*(VIEWPORT_H/SCALE/2)/FPS,
self.lander.angle,
20.0*self.lander.angularVelocity/FPS,
1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0
]
assert len(state)==8
reward = 0
shaping = \
- 100*np.sqrt(state[0]*state[0] + state[1]*state[1]) \
- 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
- 100*abs(state[4]) + 10*state[6] + 10*state[7] # And ten points for legs contact, the idea is if you
# lose contact again after landing, you get negative reward
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
reward -= m_power*0.30 # less fuel spent is better, about -30 for heurisic landing
reward -= s_power*0.03
done = False
if self.game_over or abs(state[0]) >= 1.0:
done = True
reward = -100
if not self.lander.awake:
done = True
reward = +100
return np.array(state, dtype=np.float32), reward, done, {}
def render(self, mode='human'):
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
for obj in self.particles:
obj.ttl -= 0.15
obj.color1 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
obj.color2 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
self._clean_particles(False)
for p in self.sky_polys:
self.viewer.draw_polygon(p, color=(0,0,0))
for obj in self.particles + self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
t = rendering.Transform(translation=trans*f.shape.pos)
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t)
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t)
else:
path = [trans*v for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1)
path.append(path[0])
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
for x in [self.helipad_x1, self.helipad_x2]:
flagy1 = self.helipad_y
flagy2 = flagy1 + 50/SCALE
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(1,1,1) )
self.viewer.draw_polygon( [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)], color=(0.8,0.8,0) )
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
class LunarLanderContinuous(LunarLander):
continuous = True
def heuristic(env, s):
# Heuristic for:
# 1. Testing.
# 2. Demonstration rollout.
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
if angle_targ < -0.4: angle_targ = -0.4
hover_targ = 0.55*np.abs(s[0]) # target y should be proporional to horizontal offset
# PID controller: s[4] angle, s[5] angularSpeed
angle_todo = (angle_targ - s[4])*0.5 - (s[5])*1.0
#print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
# PID controller: s[1] vertical coordinate s[3] vertical speed
hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
#print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
if s[6] or s[7]: # legs have contact
angle_todo = 0
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
if env.continuous:
a = np.array( [hover_todo*20 - 1, -angle_todo*20] )
a = np.clip(a, -1, +1)
else:
a = 0
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
elif angle_todo < -0.05: a = 3
elif angle_todo > +0.05: a = 1
return a
def demo_heuristic_lander(env, seed=None, render=False):
env.seed(seed)
total_reward = 0
steps = 0
s = env.reset()
while True:
a = heuristic(env, s)
s, r, done, info = env.step(a)
total_reward += r
if render:
still_open = env.render()
if still_open == False: break
if steps % 20 == 0 or done:
print("observations:", " ".join(["{:+0.2f}".format(x) for x in s]))
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
steps += 1
if done: break
return total_reward
if __name__ == '__main__':
demo_heuristic_lander(LunarLander(), render=True)

View File

@@ -0,0 +1,13 @@
from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander
def test_lunar_lander():
_test_lander(LunarLander(), seed=0)
def test_lunar_lander_continuous():
_test_lander(LunarLanderContinuous(), seed=0)
def _test_lander(env, seed=None, render=False):
total_reward = demo_heuristic_lander(env, seed=seed, render=render)
assert total_reward > 100

View File

@@ -0,0 +1,6 @@
from gym.envs.classic_control.cartpole import CartPoleEnv
from gym.envs.classic_control.mountain_car import MountainCarEnv
from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
from gym.envs.classic_control.pendulum import PendulumEnv
from gym.envs.classic_control.acrobot import AcrobotEnv

View File

@@ -0,0 +1,304 @@
"""classic Acrobot task"""
from gym import core, spaces
from gym.utils import seeding
import numpy as np
from numpy import sin, cos, pi
__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
__credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
"William Dabney", "Jonathan P. How"]
__license__ = "BSD 3-Clause"
__author__ = "Christoph Dann <cdann@cdann.de>"
# SOURCE:
# https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
class AcrobotEnv(core.Env):
"""
Acrobot is a 2-link pendulum with only the second joint actuated
Intitially, both links point downwards. The goal is to swing the
end-effector at a height at least the length of one link above the base.
Both links can swing freely and can pass by each other, i.e., they don't
collide when they have the same angle.
**STATE:**
The state consists of the sin() and cos() of the two rotational joint
angles and the joint angular velocities :
[cos(theta1) sin(theta1) cos(theta2) sin(theta2) thetaDot1 thetaDot2].
For the first link, an angle of 0 corresponds to the link pointing downwards.
The angle of the second link is relative to the angle of the first link.
An angle of 0 corresponds to having the same angle between the two links.
A state of [1, 0, 1, 0, ..., ...] means that both links point downwards.
**ACTIONS:**
The action is either applying +1, 0 or -1 torque on the joint between
the two pendulum links.
.. note::
The dynamics equations were missing some terms in the NIPS paper which
are present in the book. R. Sutton confirmed in personal correspondance
that the experimental results shown in the paper and the book were
generated with the equations shown in the book.
However, there is the option to run the domain with the paper equations
by setting book_or_nips = 'nips'
**REFERENCE:**
.. seealso::
R. Sutton: Generalization in Reinforcement Learning:
Successful Examples Using Sparse Coarse Coding (NIPS 1996)
.. seealso::
R. Sutton and A. G. Barto:
Reinforcement learning: An introduction.
Cambridge: MIT press, 1998.
.. warning::
This version of the domain uses the Runge-Kutta method for integrating
the system dynamics and is more realistic, but also considerably harder
than the original version which employs Euler integration,
see the AcrobotLegacy class.
"""
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 15
}
dt = .2
LINK_LENGTH_1 = 1. # [m]
LINK_LENGTH_2 = 1. # [m]
LINK_MASS_1 = 1. #: [kg] mass of link 1
LINK_MASS_2 = 1. #: [kg] mass of link 2
LINK_COM_POS_1 = 0.5 #: [m] position of the center of mass of link 1
LINK_COM_POS_2 = 0.5 #: [m] position of the center of mass of link 2
LINK_MOI = 1. #: moments of inertia for both links
MAX_VEL_1 = 4 * np.pi
MAX_VEL_2 = 9 * np.pi
AVAIL_TORQUE = [-1., 0., +1]
torque_noise_max = 0.
#: use dynamics equations from the nips paper or the book
book_or_nips = "book"
action_arrow = None
domain_fig = None
actions_num = 3
def __init__(self):
self.viewer = None
high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low=low, high=high)
self.action_space = spaces.Discrete(3)
self.state = None
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def reset(self):
self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
return self._get_ob()
def step(self, a):
s = self.state
torque = self.AVAIL_TORQUE[a]
# Add noise to the force action
if self.torque_noise_max > 0:
torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)
# Now, augment the state with our force action so it can be passed to
# _dsdt
s_augmented = np.append(s, torque)
ns = rk4(self._dsdt, s_augmented, [0, self.dt])
# only care about final timestep of integration returned by integrator
ns = ns[-1]
ns = ns[:4] # omit action
# ODEINT IS TOO SLOW!
# ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
# self.s_continuous = ns_continuous[-1] # We only care about the state
# at the ''final timestep'', self.dt
ns[0] = wrap(ns[0], -pi, pi)
ns[1] = wrap(ns[1], -pi, pi)
ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
self.state = ns
terminal = self._terminal()
reward = -1. if not terminal else 0.
return (self._get_ob(), reward, terminal, {})
def _get_ob(self):
s = self.state
return np.array([cos(s[0]), np.sin(s[0]), cos(s[1]), sin(s[1]), s[2], s[3]])
def _terminal(self):
s = self.state
return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.)
def _dsdt(self, s_augmented, t):
m1 = self.LINK_MASS_1
m2 = self.LINK_MASS_2
l1 = self.LINK_LENGTH_1
lc1 = self.LINK_COM_POS_1
lc2 = self.LINK_COM_POS_2
I1 = self.LINK_MOI
I2 = self.LINK_MOI
g = 9.8
a = s_augmented[-1]
s = s_augmented[:-1]
theta1 = s[0]
theta2 = s[1]
dtheta1 = s[2]
dtheta2 = s[3]
d1 = m1 * lc1 ** 2 + m2 * \
(l1 ** 2 + lc2 ** 2 + 2 * l1 * lc2 * np.cos(theta2)) + I1 + I2
d2 = m2 * (lc2 ** 2 + l1 * lc2 * np.cos(theta2)) + I2
phi2 = m2 * lc2 * g * np.cos(theta1 + theta2 - np.pi / 2.)
phi1 = - m2 * l1 * lc2 * dtheta2 ** 2 * np.sin(theta2) \
- 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * np.sin(theta2) \
+ (m1 * lc1 + m2 * l1) * g * np.cos(theta1 - np.pi / 2) + phi2
if self.book_or_nips == "nips":
# the following line is consistent with the description in the
# paper
ddtheta2 = (a + d2 / d1 * phi1 - phi2) / \
(m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
else:
# the following line is consistent with the java implementation and the
# book
ddtheta2 = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1 ** 2 * np.sin(theta2) - phi2) \
/ (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
ddtheta1 = -(d2 * ddtheta2 + phi1) / d1
return (dtheta1, dtheta2, ddtheta1, ddtheta2, 0.)
def render(self, mode='human'):
from gym.envs.classic_control import rendering
s = self.state
if self.viewer is None:
self.viewer = rendering.Viewer(500,500)
bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # 2.2 for default
self.viewer.set_bounds(-bound,bound,-bound,bound)
if s is None: return None
p1 = [-self.LINK_LENGTH_1 *
np.cos(s[0]), self.LINK_LENGTH_1 * np.sin(s[0])]
p2 = [p1[0] - self.LINK_LENGTH_2 * np.cos(s[0] + s[1]),
p1[1] + self.LINK_LENGTH_2 * np.sin(s[0] + s[1])]
xys = np.array([[0,0], p1, p2])[:,::-1]
thetas = [s[0]-np.pi/2, s[0]+s[1]-np.pi/2]
link_lengths = [self.LINK_LENGTH_1, self.LINK_LENGTH_2]
self.viewer.draw_line((-2.2, 1), (2.2, 1))
for ((x,y),th,llen) in zip(xys, thetas, link_lengths):
l,r,t,b = 0, llen, .1, -.1
jtransform = rendering.Transform(rotation=th, translation=(x,y))
link = self.viewer.draw_polygon([(l,b), (l,t), (r,t), (r,b)])
link.add_attr(jtransform)
link.set_color(0,.8, .8)
circ = self.viewer.draw_circle(.1)
circ.set_color(.8, .8, 0)
circ.add_attr(jtransform)
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None
def wrap(x, m, M):
"""
:param x: a scalar
:param m: minimum possible value in range
:param M: maximum possible value in range
Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
"""
diff = M - m
while x > M:
x = x - diff
while x < m:
x = x + diff
return x
def bound(x, m, M=None):
"""
:param x: scalar
Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
"""
if M is None:
M = m[1]
m = m[0]
# bound x between min (m) and Max (M)
return min(max(x, m), M)
def rk4(derivs, y0, t, *args, **kwargs):
"""
Integrate 1D or ND system of ODEs using 4-th order Runge-Kutta.
This is a toy implementation which may be useful if you find
yourself stranded on a system w/o scipy. Otherwise use
:func:`scipy.integrate`.
*y0*
initial state vector
*t*
sample times
*derivs*
returns the derivative of the system and has the
signature ``dy = derivs(yi, ti)``
*args*
additional arguments passed to the derivative function
*kwargs*
additional keyword arguments passed to the derivative function
Example 1 ::
## 2D system
def derivs6(x,t):
d1 = x[0] + 2*x[1]
d2 = -3*x[0] + 4*x[1]
return (d1, d2)
dt = 0.0005
t = arange(0.0, 2.0, dt)
y0 = (1,2)
yout = rk4(derivs6, y0, t)
Example 2::
## 1D system
alpha = 2
def derivs(x,t):
return -alpha*x + exp(-t)
y0 = 1
yout = rk4(derivs, y0, t)
If you have access to scipy, you should probably be using the
scipy.integrate tools rather than this function.
"""
try:
Ny = len(y0)
except TypeError:
yout = np.zeros((len(t),), np.float_)
else:
yout = np.zeros((len(t), Ny), np.float_)
yout[0] = y0
for i in np.arange(len(t) - 1):
thist = t[i]
dt = t[i + 1] - thist
dt2 = dt / 2.0
y0 = yout[i]
k1 = np.asarray(derivs(y0, thist, *args, **kwargs))
k2 = np.asarray(derivs(y0 + dt2 * k1, thist + dt2, *args, **kwargs))
k3 = np.asarray(derivs(y0 + dt2 * k2, thist + dt2, *args, **kwargs))
k4 = np.asarray(derivs(y0 + dt * k3, thist + dt, *args, **kwargs))
yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4)
return yout

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

View File

@@ -0,0 +1,193 @@
"""
Classic cart-pole system implemented by Rich Sutton et al.
Copied from http://incompleteideas.net/sutton/book/code/pole.c
permalink: https://perma.cc/C9ZM-652R
"""
import math
import gym
from gym import spaces, logger
from gym.utils import seeding
import numpy as np
class CartPoleEnv(gym.Env):
"""
Description:
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
Source:
This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson
Observation:
Type: Box(4)
Num Observation Min Max
0 Cart Position -4.8 4.8
1 Cart Velocity -Inf Inf
2 Pole Angle -24° 24°
3 Pole Velocity At Tip -Inf Inf
Actions:
Type: Discrete(2)
Num Action
0 Push cart to the left
1 Push cart to the right
Note: The amount the velocity is reduced or increased is not fixed as it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it
Reward:
Reward is 1 for every step taken, including the termination step
Starting State:
All observations are assigned a uniform random value between ±0.05
Episode Termination:
Pole Angle is more than ±12°
Cart Position is more than ±2.4 (center of the cart reaches the edge of the display)
Episode length is greater than 200
Solved Requirements
Considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.
"""
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
def __init__(self):
self.gravity = 9.8
self.masscart = 1.0
self.masspole = 0.1
self.total_mass = (self.masspole + self.masscart)
self.length = 0.5 # actually half the pole's length
self.polemass_length = (self.masspole * self.length)
self.force_mag = 10.0
self.tau = 0.02 # seconds between state updates
self.kinematics_integrator = 'euler'
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
high = np.array([
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max])
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
self.seed()
self.viewer = None
self.state = None
self.steps_beyond_done = None
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, action):
assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
state = self.state
x, x_dot, theta, theta_dot = state
force = self.force_mag if action==1 else -self.force_mag
costheta = math.cos(theta)
sintheta = math.sin(theta)
temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
if self.kinematics_integrator == 'euler':
x = x + self.tau * x_dot
x_dot = x_dot + self.tau * xacc
theta = theta + self.tau * theta_dot
theta_dot = theta_dot + self.tau * thetaacc
else: # semi-implicit euler
x_dot = x_dot + self.tau * xacc
x = x + self.tau * x_dot
theta_dot = theta_dot + self.tau * thetaacc
theta = theta + self.tau * theta_dot
self.state = (x,x_dot,theta,theta_dot)
done = x < -self.x_threshold \
or x > self.x_threshold \
or theta < -self.theta_threshold_radians \
or theta > self.theta_threshold_radians
done = bool(done)
if not done:
reward = 1.0
elif self.steps_beyond_done is None:
# Pole just fell!
self.steps_beyond_done = 0
reward = 1.0
else:
if self.steps_beyond_done == 0:
logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
self.steps_beyond_done += 1
reward = 0.0
return np.array(self.state), reward, done, {}
def reset(self):
self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
self.steps_beyond_done = None
return np.array(self.state)
def render(self, mode='human'):
screen_width = 600
screen_height = 400
world_width = self.x_threshold*2
scale = screen_width/world_width
carty = 100 # TOP OF CART
polewidth = 10.0
polelen = scale * (2 * self.length)
cartwidth = 50.0
cartheight = 30.0
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
l,r,t,b = -cartwidth/2, cartwidth/2, cartheight/2, -cartheight/2
axleoffset =cartheight/4.0
cart = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
self.carttrans = rendering.Transform()
cart.add_attr(self.carttrans)
self.viewer.add_geom(cart)
l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
pole = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
pole.set_color(.8,.6,.4)
self.poletrans = rendering.Transform(translation=(0, axleoffset))
pole.add_attr(self.poletrans)
pole.add_attr(self.carttrans)
self.viewer.add_geom(pole)
self.axle = rendering.make_circle(polewidth/2)
self.axle.add_attr(self.poletrans)
self.axle.add_attr(self.carttrans)
self.axle.set_color(.5,.5,.8)
self.viewer.add_geom(self.axle)
self.track = rendering.Line((0,carty), (screen_width,carty))
self.track.set_color(0,0,0)
self.viewer.add_geom(self.track)
self._pole_geom = pole
if self.state is None: return None
# Edit the pole polygon vertex
pole = self._pole_geom
l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
pole.v = [(l,b), (l,t), (r,t), (r,b)]
x = self.state
cartx = x[0]*scale+screen_width/2.0 # MIDDLE OF CART
self.carttrans.set_translation(cartx, carty)
self.poletrans.set_rotation(-x[2])
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None

View File

@@ -0,0 +1,144 @@
# -*- coding: utf-8 -*-
"""
@author: Olivier Sigaud
A merge between two sources:
* Adaptation of the MountainCar Environment from the "FAReinforcement" library
of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch'
and then modified by Arnaud de Broissia
* the OpenAI/gym MountainCar environment
itself from
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""
import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
class Continuous_MountainCarEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
def __init__(self):
self.min_action = -1.0
self.max_action = 1.0
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
self.power = 0.0015
self.low_state = np.array([self.min_position, -self.max_speed])
self.high_state = np.array([self.max_position, self.max_speed])
self.viewer = None
self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,))
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state)
self.seed()
self.reset()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, action):
position = self.state[0]
velocity = self.state[1]
force = min(max(action[0], -1.0), 1.0)
velocity += force*self.power -0.0025 * math.cos(3*position)
if (velocity > self.max_speed): velocity = self.max_speed
if (velocity < -self.max_speed): velocity = -self.max_speed
position += velocity
if (position > self.max_position): position = self.max_position
if (position < self.min_position): position = self.min_position
if (position==self.min_position and velocity<0): velocity = 0
done = bool(position >= self.goal_position)
reward = 0
if done:
reward = 100.0
reward-= math.pow(action[0],2)*0.1
self.state = np.array([position, velocity])
return self.state, reward, done, {}
def reset(self):
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return np.array(self.state)
# def get_state(self):
# return self.state
def _height(self, xs):
return np.sin(3 * xs)*.45+.55
def render(self, mode='human'):
screen_width = 600
screen_height = 400
world_width = self.max_position - self.min_position
scale = screen_width/world_width
carwidth=40
carheight=20
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs-self.min_position)*scale, ys*scale))
self.track = rendering.make_polyline(xys)
self.track.set_linewidth(4)
self.viewer.add_geom(self.track)
clearance = 10
l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
car.add_attr(rendering.Transform(translation=(0, clearance)))
self.cartrans = rendering.Transform()
car.add_attr(self.cartrans)
self.viewer.add_geom(car)
frontwheel = rendering.make_circle(carheight/2.5)
frontwheel.set_color(.5, .5, .5)
frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
frontwheel.add_attr(self.cartrans)
self.viewer.add_geom(frontwheel)
backwheel = rendering.make_circle(carheight/2.5)
backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
backwheel.add_attr(self.cartrans)
backwheel.set_color(.5, .5, .5)
self.viewer.add_geom(backwheel)
flagx = (self.goal_position-self.min_position)*scale
flagy1 = self._height(self.goal_position)*scale
flagy2 = flagy1 + 50
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
self.viewer.add_geom(flagpole)
flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
flag.set_color(.8,.8,0)
self.viewer.add_geom(flag)
pos = self.state[0]
self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
self.cartrans.set_rotation(math.cos(3 * pos))
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None

View File

@@ -0,0 +1,119 @@
"""
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""
import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
class MountainCarEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
def __init__(self):
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.5
self.low = np.array([self.min_position, -self.max_speed])
self.high = np.array([self.max_position, self.max_speed])
self.viewer = None
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high)
self.seed()
self.reset()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, action):
assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
position, velocity = self.state
velocity += (action-1)*0.001 + math.cos(3*position)*(-0.0025)
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
position += velocity
position = np.clip(position, self.min_position, self.max_position)
if (position==self.min_position and velocity<0): velocity = 0
done = bool(position >= self.goal_position)
reward = -1.0
self.state = (position, velocity)
return np.array(self.state), reward, done, {}
def reset(self):
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return np.array(self.state)
def _height(self, xs):
return np.sin(3 * xs)*.45+.55
def render(self, mode='human'):
screen_width = 600
screen_height = 400
world_width = self.max_position - self.min_position
scale = screen_width/world_width
carwidth=40
carheight=20
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs-self.min_position)*scale, ys*scale))
self.track = rendering.make_polyline(xys)
self.track.set_linewidth(4)
self.viewer.add_geom(self.track)
clearance = 10
l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
car.add_attr(rendering.Transform(translation=(0, clearance)))
self.cartrans = rendering.Transform()
car.add_attr(self.cartrans)
self.viewer.add_geom(car)
frontwheel = rendering.make_circle(carheight/2.5)
frontwheel.set_color(.5, .5, .5)
frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
frontwheel.add_attr(self.cartrans)
self.viewer.add_geom(frontwheel)
backwheel = rendering.make_circle(carheight/2.5)
backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
backwheel.add_attr(self.cartrans)
backwheel.set_color(.5, .5, .5)
self.viewer.add_geom(backwheel)
flagx = (self.goal_position-self.min_position)*scale
flagy1 = self._height(self.goal_position)*scale
flagy2 = flagy1 + 50
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
self.viewer.add_geom(flagpole)
flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
flag.set_color(.8,.8,0)
self.viewer.add_geom(flag)
pos = self.state[0]
self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
self.cartrans.set_rotation(math.cos(3 * pos))
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None

View File

@@ -0,0 +1,90 @@
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
class PendulumEnv(gym.Env):
metadata = {
'render.modes' : ['human', 'rgb_array'],
'video.frames_per_second' : 30
}
def __init__(self):
self.max_speed=8
self.max_torque=2.
self.dt=.05
self.viewer = None
high = np.array([1., 1., self.max_speed])
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32)
self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self,u):
th, thdot = self.state # th := theta
g = 10.
m = 1.
l = 1.
dt = self.dt
u = np.clip(u, -self.max_torque, self.max_torque)[0]
self.last_u = u # for rendering
costs = angle_normalize(th)**2 + .1*thdot**2 + .001*(u**2)
newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*u) * dt
newth = th + newthdot*dt
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) #pylint: disable=E1111
self.state = np.array([newth, newthdot])
return self._get_obs(), -costs, False, {}
def reset(self):
high = np.array([np.pi, 1])
self.state = self.np_random.uniform(low=-high, high=high)
self.last_u = None
return self._get_obs()
def _get_obs(self):
theta, thetadot = self.state
return np.array([np.cos(theta), np.sin(theta), thetadot])
def render(self, mode='human'):
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500,500)
self.viewer.set_bounds(-2.2,2.2,-2.2,2.2)
rod = rendering.make_capsule(1, .2)
rod.set_color(.8, .3, .3)
self.pole_transform = rendering.Transform()
rod.add_attr(self.pole_transform)
self.viewer.add_geom(rod)
axle = rendering.make_circle(.05)
axle.set_color(0,0,0)
self.viewer.add_geom(axle)
fname = path.join(path.dirname(__file__), "assets/clockwise.png")
self.img = rendering.Image(fname, 1., 1.)
self.imgtrans = rendering.Transform()
self.img.add_attr(self.imgtrans)
self.viewer.add_onetime(self.img)
self.pole_transform.set_rotation(self.state[0] + np.pi/2)
if self.last_u:
self.imgtrans.scale = (-self.last_u/2, np.abs(self.last_u)/2)
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None
def angle_normalize(x):
return (((x+np.pi) % (2*np.pi)) - np.pi)

View File

@@ -0,0 +1,359 @@
"""
2D rendering framework
"""
from __future__ import division
import os
import six
import sys
if "Apple" in sys.version:
if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
# (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
from gym.utils import reraise
from gym import error
try:
import pyglet
except ImportError as e:
reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
try:
from pyglet.gl import *
except ImportError as e:
reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
import math
import numpy as np
RAD2DEG = 57.29577951308232
def get_display(spec):
"""Convert a display specification (such as :0) into an actual Display
object.
Pyglet only supports multiple Displays on Linux.
"""
if spec is None:
return None
elif isinstance(spec, six.string_types):
return pyglet.canvas.Display(spec)
else:
raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
class Viewer(object):
def __init__(self, width, height, display=None):
display = get_display(display)
self.width = width
self.height = height
self.window = pyglet.window.Window(width=width, height=height, display=display)
self.window.on_close = self.window_closed_by_user
self.isopen = True
self.geoms = []
self.onetime_geoms = []
self.transform = Transform()
glEnable(GL_BLEND)
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
def close(self):
self.window.close()
def window_closed_by_user(self):
self.isopen = False
def set_bounds(self, left, right, bottom, top):
assert right > left and top > bottom
scalex = self.width/(right-left)
scaley = self.height/(top-bottom)
self.transform = Transform(
translation=(-left*scalex, -bottom*scaley),
scale=(scalex, scaley))
def add_geom(self, geom):
self.geoms.append(geom)
def add_onetime(self, geom):
self.onetime_geoms.append(geom)
def render(self, return_rgb_array=False):
glClearColor(1,1,1,1)
self.window.clear()
self.window.switch_to()
self.window.dispatch_events()
self.transform.enable()
for geom in self.geoms:
geom.render()
for geom in self.onetime_geoms:
geom.render()
self.transform.disable()
arr = None
if return_rgb_array:
buffer = pyglet.image.get_buffer_manager().get_color_buffer()
image_data = buffer.get_image_data()
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
# In https://github.com/openai/gym-http-api/issues/2, we
# discovered that someone using Xmonad on Arch was having
# a window of size 598 x 398, though a 600 x 400 window
# was requested. (Guess Xmonad was preserving a pixel for
# the boundary.) So we use the buffer height/width rather
# than the requested one.
arr = arr.reshape(buffer.height, buffer.width, 4)
arr = arr[::-1,:,0:3]
self.window.flip()
self.onetime_geoms = []
return arr if return_rgb_array else self.isopen
# Convenience
def draw_circle(self, radius=10, res=30, filled=True, **attrs):
geom = make_circle(radius=radius, res=res, filled=filled)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def draw_polygon(self, v, filled=True, **attrs):
geom = make_polygon(v=v, filled=filled)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def draw_polyline(self, v, **attrs):
geom = make_polyline(v=v)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def draw_line(self, start, end, **attrs):
geom = Line(start, end)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def get_array(self):
self.window.flip()
image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
self.window.flip()
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
arr = arr.reshape(self.height, self.width, 4)
return arr[::-1,:,0:3]
def __del__(self):
self.close()
def _add_attrs(geom, attrs):
if "color" in attrs:
geom.set_color(*attrs["color"])
if "linewidth" in attrs:
geom.set_linewidth(attrs["linewidth"])
class Geom(object):
def __init__(self):
self._color=Color((0, 0, 0, 1.0))
self.attrs = [self._color]
def render(self):
for attr in reversed(self.attrs):
attr.enable()
self.render1()
for attr in self.attrs:
attr.disable()
def render1(self):
raise NotImplementedError
def add_attr(self, attr):
self.attrs.append(attr)
def set_color(self, r, g, b):
self._color.vec4 = (r, g, b, 1)
class Attr(object):
def enable(self):
raise NotImplementedError
def disable(self):
pass
class Transform(Attr):
def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
self.set_translation(*translation)
self.set_rotation(rotation)
self.set_scale(*scale)
def enable(self):
glPushMatrix()
glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
glScalef(self.scale[0], self.scale[1], 1)
def disable(self):
glPopMatrix()
def set_translation(self, newx, newy):
self.translation = (float(newx), float(newy))
def set_rotation(self, new):
self.rotation = float(new)
def set_scale(self, newx, newy):
self.scale = (float(newx), float(newy))
class Color(Attr):
def __init__(self, vec4):
self.vec4 = vec4
def enable(self):
glColor4f(*self.vec4)
class LineStyle(Attr):
def __init__(self, style):
self.style = style
def enable(self):
glEnable(GL_LINE_STIPPLE)
glLineStipple(1, self.style)
def disable(self):
glDisable(GL_LINE_STIPPLE)
class LineWidth(Attr):
def __init__(self, stroke):
self.stroke = stroke
def enable(self):
glLineWidth(self.stroke)
class Point(Geom):
def __init__(self):
Geom.__init__(self)
def render1(self):
glBegin(GL_POINTS) # draw point
glVertex3f(0.0, 0.0, 0.0)
glEnd()
class FilledPolygon(Geom):
def __init__(self, v):
Geom.__init__(self)
self.v = v
def render1(self):
if len(self.v) == 4 : glBegin(GL_QUADS)
elif len(self.v) > 4 : glBegin(GL_POLYGON)
else: glBegin(GL_TRIANGLES)
for p in self.v:
glVertex3f(p[0], p[1],0) # draw each vertex
glEnd()
def make_circle(radius=10, res=30, filled=True):
points = []
for i in range(res):
ang = 2*math.pi*i / res
points.append((math.cos(ang)*radius, math.sin(ang)*radius))
if filled:
return FilledPolygon(points)
else:
return PolyLine(points, True)
def make_polygon(v, filled=True):
if filled: return FilledPolygon(v)
else: return PolyLine(v, True)
def make_polyline(v):
return PolyLine(v, False)
def make_capsule(length, width):
l, r, t, b = 0, length, width/2, -width/2
box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
circ0 = make_circle(width/2)
circ1 = make_circle(width/2)
circ1.add_attr(Transform(translation=(length, 0)))
geom = Compound([box, circ0, circ1])
return geom
class Compound(Geom):
def __init__(self, gs):
Geom.__init__(self)
self.gs = gs
for g in self.gs:
g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
def render1(self):
for g in self.gs:
g.render()
class PolyLine(Geom):
def __init__(self, v, close):
Geom.__init__(self)
self.v = v
self.close = close
self.linewidth = LineWidth(1)
self.add_attr(self.linewidth)
def render1(self):
glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
for p in self.v:
glVertex3f(p[0], p[1],0) # draw each vertex
glEnd()
def set_linewidth(self, x):
self.linewidth.stroke = x
class Line(Geom):
def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
Geom.__init__(self)
self.start = start
self.end = end
self.linewidth = LineWidth(1)
self.add_attr(self.linewidth)
def render1(self):
glBegin(GL_LINES)
glVertex2f(*self.start)
glVertex2f(*self.end)
glEnd()
class Image(Geom):
def __init__(self, fname, width, height):
Geom.__init__(self)
self.width = width
self.height = height
img = pyglet.image.load(fname)
self.img = img
self.flip = False
def render1(self):
self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
# ================================================================
class SimpleImageViewer(object):
def __init__(self, display=None, maxwidth=500):
self.window = None
self.isopen = False
self.display = display
self.maxwidth = maxwidth
def imshow(self, arr):
if self.window is None:
height, width, _channels = arr.shape
if width > self.maxwidth:
scale = self.maxwidth / width
width = int(scale * width)
height = int(scale * height)
self.window = pyglet.window.Window(width=width, height=height,
display=self.display, vsync=False, resizable=True)
self.width = width
self.height = height
self.isopen = True
@self.window.event
def on_resize(width, height):
self.width = width
self.height = height
@self.window.event
def on_close():
self.isopen = False
assert len(arr.shape) == 3, "You passed in an image with the wrong number shape"
image = pyglet.image.ImageData(arr.shape[1], arr.shape[0],
'RGB', arr.tobytes(), pitch=arr.shape[1]*-3)
gl.glTexParameteri(gl.GL_TEXTURE_2D,
gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
texture = image.get_texture()
texture.width = self.width
texture.height = self.height
self.window.clear()
self.window.switch_to()
self.window.dispatch_events()
texture.blit(0, 0) # draw
self.window.flip()
def close(self):
if self.isopen:
self.window.close()
self.isopen = False
def __del__(self):
self.close()

View File

@@ -0,0 +1,16 @@
from gym.envs.mujoco.mujoco_env import MujocoEnv
# ^^^^^ so that user gets the correct error
# message if mujoco is not installed correctly
from gym.envs.mujoco.ant import AntEnv
from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
from gym.envs.mujoco.hopper import HopperEnv
from gym.envs.mujoco.walker2d import Walker2dEnv
from gym.envs.mujoco.humanoid import HumanoidEnv
from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
from gym.envs.mujoco.reacher import ReacherEnv
from gym.envs.mujoco.swimmer import SwimmerEnv
from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
from gym.envs.mujoco.pusher import PusherEnv
from gym.envs.mujoco.thrower import ThrowerEnv
from gym.envs.mujoco.striker import StrikerEnv

View File

@@ -0,0 +1,45 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
utils.EzPickle.__init__(self)
def step(self, a):
xposbefore = self.get_body_com("torso")[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.get_body_com("torso")[0]
forward_reward = (xposafter - xposbefore)/self.dt
ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
notdone = np.isfinite(state).all() \
and state[2] >= 0.2 and state[2] <= 1.0
done = not notdone
ob = self._get_obs()
return ob, reward, done, dict(
reward_forward=forward_reward,
reward_ctrl=-ctrl_cost,
reward_contact=-contact_cost,
reward_survive=survive_reward)
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
])
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.distance = self.model.stat.extent * 0.5

View File

@@ -0,0 +1,81 @@
<mujoco model="ant">
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.01"/>
<custom>
<numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
</custom>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 0.75">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
<joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
<body name="front_left_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
<body name="aux_1" pos="0.2 0.2 0">
<joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 0.2 0">
<joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="front_right_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
<body name="aux_2" pos="-0.2 0.2 0">
<joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 0.2 0">
<joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
<body name="aux_3" pos="-0.2 -0.2 0">
<joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 -0.2 0">
<joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="right_back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
<body name="aux_4" pos="0.2 -0.2 0">
<joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 -0.2 0">
<joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,96 @@
<!-- Cheetah Model
The state space is populated with joints in the order that they are
defined in this file. The actuators also operate on joints.
State-Space (name/joint/parameter):
- rootx slider position (m)
- rootz slider position (m)
- rooty hinge angle (rad)
- bthigh hinge angle (rad)
- bshin hinge angle (rad)
- bfoot hinge angle (rad)
- fthigh hinge angle (rad)
- fshin hinge angle (rad)
- ffoot hinge angle (rad)
- rootx slider velocity (m/s)
- rootz slider velocity (m/s)
- rooty hinge angular velocity (rad/s)
- bthigh hinge angular velocity (rad/s)
- bshin hinge angular velocity (rad/s)
- bfoot hinge angular velocity (rad/s)
- fthigh hinge angular velocity (rad/s)
- fshin hinge angular velocity (rad/s)
- ffoot hinge angular velocity (rad/s)
Actuators (name/actuator/parameter):
- bthigh hinge torque (N m)
- bshin hinge torque (N m)
- bfoot hinge torque (N m)
- fthigh hinge torque (N m)
- fshin hinge torque (N m)
- ffoot hinge torque (N m)
-->
<mujoco model="cheetah">
<compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
<default>
<joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
<geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
<motor ctrllimited="true" ctrlrange="-1 1"/>
</default>
<size nstack="300000" nuser_geom="1"/>
<option gravity="0 0 -9.81" timestep="0.01"/>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 .7">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
<geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
<geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
<!-- <site name='tip' pos='.15 0 .11'/>-->
<body name="bthigh" pos="-.5 0 0">
<joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
<geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
<body name="bshin" pos=".16 0 -.25">
<joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
<geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
<body name="bfoot" pos="-.28 0 -.14">
<joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
<geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
</body>
</body>
</body>
<body name="fthigh" pos=".5 0 0">
<joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
<geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
<body name="fshin" pos="-.14 0 -.24">
<joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
<geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
<body name="ffoot" pos=".13 0 -.18">
<joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
<geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor gear="120" joint="bthigh" name="bthigh"/>
<motor gear="90" joint="bshin" name="bshin"/>
<motor gear="60" joint="bfoot" name="bfoot"/>
<motor gear="120" joint="fthigh" name="fthigh"/>
<motor gear="60" joint="fshin" name="fshin"/>
<motor gear="30" joint="ffoot" name="ffoot"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,48 @@
<mujoco model="hopper">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<visual>
<map znear="0.02"/>
</visual>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.13/2 0 0.1">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@@ -0,0 +1,121 @@
<mujoco model="humanoid">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
<!-- <flags solverstat="enable" energy="enable"/>-->
</option>
<size nkey="5" nuser_geom="1"/>
<visual>
<map fogend="5" fogstart="3"/>
</visual>
<asset>
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
<body name="torso" pos="0 0 1.4">
<camera name="track" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
<geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258"/>
<geom fromto="-.01 -.06 -.12 -.01 .06 -.12" name="uwaist" size="0.06" type="capsule"/>
<body name="lwaist" pos="-.01 0 -0.260" quat="1.000 0 -0.002 0">
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
<body name="pelvis" pos="0 0 -0.165" quat="1.000 0 -0.002 0">
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
<body name="right_thigh" pos="0 -0.1 -0.04">
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule"/>
<body name="right_shin" pos="0 0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
<geom fromto="0 0 0 0 0 -.3" name="right_shin1" size="0.049" type="capsule"/>
<body name="right_foot" pos="0 0 -0.45">
<geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
</body>
</body>
</body>
<body name="left_thigh" pos="0 0.1 -0.04">
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0 -0.01 -.34" name="left_thigh1" size="0.06" type="capsule"/>
<body name="left_shin" pos="0 -0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 0 0 -.3" name="left_shin1" size="0.049" type="capsule"/>
<body name="left_foot" pos="0 0 -0.45">
<geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
</body>
</body>
</body>
</body>
</body>
<body name="right_upper_arm" pos="0 -0.17 0.06">
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
<body name="right_lower_arm" pos=".18 -.18 -.18">
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
<camera pos="0 0 0"/>
</body>
</body>
<body name="left_upper_arm" pos="0 0.17 0.06">
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
<body name="left_lower_arm" pos=".18 .18 -.18">
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
</body>
</body>
</body>
</worldbody>
<tendon>
<fixed name="left_hipknee">
<joint coef="-1" joint="left_hip_y"/>
<joint coef="1" joint="left_knee"/>
</fixed>
<fixed name="right_hipknee">
<joint coef="-1" joint="right_hip_y"/>
<joint coef="1" joint="right_knee"/>
</fixed>
</tendon>
<actuator>
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
<motor gear="200" joint="right_knee" name="right_knee"/>
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
<motor gear="200" joint="left_knee" name="left_knee"/>
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
<motor gear="25" joint="right_elbow" name="right_elbow"/>
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
<motor gear="25" joint="left_elbow" name="left_elbow"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,121 @@
<mujoco model="humanoidstandup">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
<!-- <flags solverstat="enable" energy="enable"/>-->
</option>
<size nkey="5" nuser_geom="1"/>
<visual>
<map fogend="5" fogstart="3"/>
</visual>
<asset>
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
<body name="torso" pos="0 0 .105">
<camera name="track" mode="trackcom" pos="0 -3 .5" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
<geom name="head" pos="-.15 0 0" size=".09" type="sphere" user="258"/>
<geom fromto=".11 -.06 0 .11 .06 0" name="uwaist" size="0.06" type="capsule"/>
<body name="lwaist" pos=".21 0 0" quat="1.000 0 -0.002 0">
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
<body name="pelvis" pos="0.165 0 0" quat="1.000 0 -0.002 0">
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
<body name="right_thigh" pos="0 -0.1 0">
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0.34 0.01 0" name="right_thigh1" size="0.06" type="capsule"/>
<body name="right_shin" pos="0.403 0.01 0">
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
<geom fromto="0 0 0 0.3 0 0" name="right_shin1" size="0.049" type="capsule"/>
<body name="right_foot" pos="0.35 0 -.10">
<geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
</body>
</body>
</body>
<body name="left_thigh" pos="0 0.1 0">
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0.34 -0.01 0" name="left_thigh1" size="0.06" type="capsule"/>
<body name="left_shin" pos="0.403 -0.01 0">
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 0.3 0 0" name="left_shin1" size="0.049" type="capsule"/>
<body name="left_foot" pos="0.35 0 -.1">
<geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
</body>
</body>
</body>
</body>
</body>
<body name="right_upper_arm" pos="0 -0.17 0.06">
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
<body name="right_lower_arm" pos=".18 -.18 -.18">
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
<camera pos="0 0 0"/>
</body>
</body>
<body name="left_upper_arm" pos="0 0.17 0.06">
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
<body name="left_lower_arm" pos=".18 .18 -.18">
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
</body>
</body>
</body>
</worldbody>
<tendon>
<fixed name="left_hipknee">
<joint coef="-1" joint="left_hip_y"/>
<joint coef="1" joint="left_knee"/>
</fixed>
<fixed name="right_hipknee">
<joint coef="-1" joint="right_hip_y"/>
<joint coef="1" joint="right_knee"/>
</fixed>
</tendon>
<actuator>
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
<motor gear="200" joint="right_knee" name="right_knee"/>
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
<motor gear="200" joint="left_knee" name="left_knee"/>
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
<motor gear="25" joint="right_elbow" name="right_elbow"/>
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
<motor gear="25" joint="left_elbow" name="left_elbow"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,47 @@
<!-- Cartpole Model
The state space is populated with joints in the order that they are
defined in this file. The actuators also operate on joints.
State-Space (name/joint/parameter):
- cart slider position (m)
- pole hinge angle (rad)
- cart slider velocity (m/s)
- pole hinge angular velocity (rad/s)
Actuators (name/actuator/parameter):
- cart motor force x (N)
-->
<mujoco model="cartpole">
<compiler coordinate="local" inertiafromgeom="true"/>
<custom>
<numeric data="2" name="frame_skip"/>
</custom>
<default>
<joint damping="0.05"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
</default>
<option gravity="1e-5 0 -9.81" integrator="RK4" timestep="0.01"/>
<size nstack="3000"/>
<worldbody>
<geom name="floor" pos="0 0 -3.0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
<body name="cart" pos="0 0 0">
<joint axis="1 0 0" limited="true" margin="0.01" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
<body name="pole" pos="0 0 0">
<joint axis="0 1 0" name="hinge" pos="0 0 0" type="hinge"/>
<geom fromto="0 0 0 0 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
<body name="pole2" pos="0 0 0.6">
<joint axis="0 1 0" name="hinge2" pos="0 0 0" type="hinge"/>
<geom fromto="0 0 0 0 0 0.6" name="cpole2" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
<site name="tip" pos="0 0 .6" size="0.01 0.01"/>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1 1" gear="500" joint="slider" name="slide"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,27 @@
<mujoco model="inverted pendulum">
<compiler inertiafromgeom="true"/>
<default>
<joint armature="0" damping="1" limited="true"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
<tendon/>
<motor ctrlrange="-3 3"/>
</default>
<option gravity="0 0 -9.81" integrator="RK4" timestep="0.02"/>
<size nstack="3000"/>
<worldbody>
<!--geom name="ground" type="plane" pos="0 0 0" /-->
<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
<body name="cart" pos="0 0 0">
<joint axis="1 0 0" limited="true" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
<body name="pole" pos="0 0 0">
<joint axis="0 1 0" name="hinge" pos="0 0 0" range="-90 90" type="hinge"/>
<geom fromto="0 0 0 0.001 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/>
<!-- <body name="pole2" pos="0.001 0 0.6"><joint name="hinge2" type="hinge" pos="0 0 0" axis="0 1 0"/><geom name="cpole2" type="capsule" fromto="0 0 0 0 0 0.6" size="0.05 0.3" rgba="0.7 0 0.7 1"/><site name="tip2" pos="0 0 .6"/></body>-->
</body>
</body>
</worldbody>
<actuator>
<motor gear="100" joint="slider" name="slide"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,31 @@
<mujoco>
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.02"/>
<default>
<joint armature="0" damping="0" limited="false"/>
<geom conaffinity="0" condim="3" density="100" friction="1 0.5 0.5" margin="0" rgba="0.8 0.6 0.4 1"/>
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 0">
<geom name="pointbody" pos="0 0 0.5" size="0.5" type="sphere"/>
<geom name="pointarrow" pos="0.6 0 0.5" size="0.5 0.1 0.1" type="box"/>
<joint axis="1 0 0" name="ballx" pos="0 0 0" type="slide"/>
<joint axis="0 1 0" name="bally" pos="0 0 0" type="slide"/>
<joint axis="0 0 1" limited="false" name="rot" pos="0 0 0" type="hinge"/>
</body>
</worldbody>
<actuator>
<!-- Those are just dummy actuators for providing ranges -->
<motor ctrllimited="true" ctrlrange="-1 1" joint="ballx"/>
<motor ctrllimited="true" ctrlrange="-0.25 0.25" joint="rot"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,91 @@
<mujoco model="arm3d">
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
<option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
<default>
<joint armature='0.04' damping="1" limited="true"/>
<geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
</default>
<worldbody>
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
<geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
<body name="r_shoulder_lift_link" pos="0.1 0 0">
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
<body name="r_upper_arm_roll_link" pos="0 0 0">
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
<body name="r_upper_arm_link" pos="0 0 0">
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
<body name="r_elbow_flex_link" pos="0.4 0 0">
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
<body name="r_forearm_roll_link" pos="0 0 0">
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
<body name="r_forearm_link" pos="0 0 0">
<geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
<body name="r_wrist_flex_link" pos="0.321 0 0">
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
<body name="r_wrist_roll_link" pos="0 0 0">
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
<body name="tips_arm" pos="0 0 0">
<geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
<geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
</body>
<geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
<geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
<geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<!--<body name="object" pos="0.55 -0.3 -0.275" >-->
<body name="object" pos="0.45 -0.05 -0.275" >
<geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
<geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
<joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
<joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
</body>
<body name="goal" pos="0.45 -0.05 -0.3230">
<geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
<joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
<joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
</body>
</worldbody>
<actuator>
<motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,39 @@
<mujoco model="reacher">
<compiler angle="radian" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
</default>
<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
<worldbody>
<!-- Arena -->
<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto=" .3 -.3 .01 .3 .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto="-.3 .3 .01 .3 .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<!-- Arm -->
<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
<body name="body0" pos="0 0 .01">
<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
<joint axis="0 0 1" limited="false" name="joint0" pos="0 0 0" type="hinge"/>
<body name="body1" pos="0.1 0 0">
<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-3.0 3.0" type="hinge"/>
<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
<body name="fingertip" pos="0.11 0 0">
<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
</body>
</body>
</body>
<!-- Target -->
<body name="target" pos=".1 -.1 .01">
<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,101 @@
<mujoco model="arm3d">
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
<option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler"/>
<default>
<joint armature='0.04' damping="1" limited="true"/>
<geom friction=".0 .0 .0" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
</default>
<worldbody>
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
<geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05"/>
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05"/>
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03"/>
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03"/>
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
<body name="r_shoulder_lift_link" pos="0.1 0 0">
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
<body name="r_upper_arm_roll_link" pos="0 0 0">
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
<body name="r_upper_arm_link" pos="0 0 0">
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
<body name="r_elbow_flex_link" pos="0.4 0 0">
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
<body name="r_forearm_roll_link" pos="0 0 0">
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
<body name="r_forearm_link" pos="0 0 0">
<geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
<body name="r_wrist_flex_link" pos="0.321 0 0">
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
<body name="r_wrist_roll_link" pos="0 0 0">
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
<body name="tips_arm" pos="0 0 0">
<geom conaffinity="1" contype="1" name="tip_arml" pos="0.017 0 0" size="0.003 0.12 0.05" type="box" />
</body>
<geom conaffinity="1" contype="1" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" type="capsule" />
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<body name="object" pos="0 0 -0.270" >
<geom type="sphere" rgba="1 1 1 1" pos="0 0 0" size="0.05 0.05 0.05" contype="1" conaffinity="0"/>
<joint name="obj_slidey" armature="0.1" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
<joint name="obj_slidex" armature="0.1" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
</body>
<body name="goal" pos="0.0 0.0 -0.3230">
<geom rgba="1. 1. 1. 0" pos="0 0 0" type="box" size="0.01 0.01 0.01" contype="0" conaffinity="0"/>
<body pos="0 0 0">
<geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
</body>
<body name="coaster" pos="0 0 0">
<geom rgba="1. 1. 1. 1" type="cylinder" size="0.08 0.001 0.1" density='1000000' contype="0" conaffinity="0"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 0.785">
<geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 -0.785">
<geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
</body>
<joint name="goal_free" type="free" pos="0 0 0" limited="false" damping="0"/>
</body>
</worldbody>
<actuator>
<motor joint="r_shoulder_pan_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_shoulder_lift_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_upper_arm_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_elbow_flex_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_forearm_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_wrist_flex_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_wrist_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,38 @@
<mujoco model="swimmer">
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1"/>
<default>
<geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1"/>
<joint armature='0.1' />
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane"/>
<!-- ================= SWIMMER ================= /-->
<body name="torso" pos="0 0 0">
<geom density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule"/>
<joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide"/>
<joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide"/>
<joint axis="0 0 1" name="rot" pos="0 0 0" type="hinge"/>
<body name="mid" pos="0.5 0 0">
<geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
<joint axis="0 0 1" limited="true" name="rot2" pos="0 0 0" range="-100 100" type="hinge"/>
<body name="back" pos="-1 0 0">
<geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
<joint axis="0 0 1" limited="true" name="rot3" pos="0 0 0" range="-100 100" type="hinge"/>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot2"/>
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot3"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,127 @@
<mujoco model="arm3d">
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
<option timestep="0.01" gravity="0 0 -9.81" iterations="20" integrator="Euler"/>
<default>
<joint armature='0.75' damping="1" limited="true"/>
<geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
</default>
<worldbody>
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
<geom type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" density="0.0001"/>
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" density="0.0001"/>
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" density="0.0001"/>
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" density="0.0001"/>
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" density="1"/>
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-0.4854 1.214602" damping="1.0"/>
<body name="r_shoulder_lift_link" pos="0.1 0 0">
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" density="0.0001"/>
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 0.7963" damping="1.0"/>
<body name="r_upper_arm_roll_link" pos="0 0 0">
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="1.0"/>
<body name="r_upper_arm_link" pos="0 0 0">
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" density="0.0001"/>
<body name="r_elbow_flex_link" pos="0.4 0 0">
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" density="0.0001"/>
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.7 0.7" damping="1.0"/>
<body name="r_forearm_roll_link" pos="0 0 0">
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping="1.0" range="-1.5 1.5"/>
<body name="r_forearm_link" pos="0 0 0" axisangle="1 0 0 0.392">
<geom name="fa" type="capsule" fromto="0 0 0 0 0 0.291" size="0.05" density="0.0001"/>
<body name="r_wrist_flex_link" pos="0 0 0.321" axisangle="0 0 1 1.57">
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" density="0.0001"/>
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-1.0 1.0" damping="1.0" />
<body name="r_wrist_roll_link" pos="0 0 0" axisangle="0 1 0 -1.178">
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="0 1 0" damping="1.0" range="0 2.25"/>
<geom type="capsule" fromto="0 -0.05 0 0 0.05 0" size="0.01" contype="1" conaffinity="1" density="0.0001"/>
<body pos="0 0 0" axisangle="0 0 1 0.392">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density=".0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 1.57">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 1.178">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 0.785">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 1.96">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 2.355">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 2.74">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<body name="goal" pos="0.575 0.5 -0.328">
<geom rgba="1 1 1 1" type="box" pos="0 0 0.005" size="0.075 0.075 0.001" contype="1" conaffinity="1" density="1000"/>
<geom rgba="1 1 1 1" type="box" pos="0.0 0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="box" pos="0.0 -0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="box" pos="0.075 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="box" pos="-0.076 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="capsule" fromto="0.073 0.073 0.0075 0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="capsule" fromto="0.073 -0.073 0.0075 0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="capsule" fromto="-0.073 0.073 0.0075 -0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="capsule" fromto="-0.073 -0.073 0.0075 -0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
<joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="1.0"/>
<joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="1.0"/>
</body>
<body name="ball" pos="0.5 -0.8 0.275">
<geom rgba="1. 1. 1. 1" type="sphere" size="0.03 0.03 0.1" density='25' contype="1" conaffinity="1"/>
<joint name="ball_free" type="free" armature='0' damping="0" limited="false"/>
</body>
</worldbody>
<actuator>
<motor joint="r_shoulder_pan_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
<motor joint="r_shoulder_lift_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
<motor joint="r_upper_arm_roll_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
<motor joint="r_elbow_flex_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
<motor joint="r_forearm_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
<motor joint="r_wrist_flex_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
<motor joint="r_wrist_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true"/>
</actuator>
</mujoco>

View File

@@ -0,0 +1,62 @@
<mujoco model="walker2d">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="0.01" damping=".1" limited="true"/>
<geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.2/2 0 0.1">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="0.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
<!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
<body name="thigh_left" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.05" type="capsule"/>
<body name="leg_left" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.04" type="capsule"/>
<body name="foot_left" pos="0.2/2 0 0.1">
<joint axis="0 -1 0" name="foot_left_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="1.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
<!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@@ -0,0 +1,34 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
utils.EzPickle.__init__(self)
def step(self, action):
xposbefore = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = - 0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore)/self.dt
reward = reward_ctrl + reward_run
done = False
return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
])
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.distance = self.model.stat.extent * 0.5

View File

@@ -0,0 +1,40 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4)
utils.EzPickle.__init__(self)
def step(self, a):
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
s = self.state_vector()
done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
(height > .7) and (abs(ang) < .2))
ob = self._get_obs()
return ob, reward, done, {}
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[1:],
np.clip(self.sim.data.qvel.flat, -10, 10)
])
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.trackbodyid = 2
self.viewer.cam.distance = self.model.stat.extent * 0.75
self.viewer.cam.lookat[2] = 1.15
self.viewer.cam.elevation = -20

View File

@@ -0,0 +1,51 @@
import numpy as np
from gym.envs.mujoco import mujoco_env
from gym import utils
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5)
utils.EzPickle.__init__(self)
def _get_obs(self):
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat])
def step(self, a):
pos_before = mass_center(self.model, self.sim)
self.do_simulation(a, self.frame_skip)
pos_after = mass_center(self.model, self.sim)
alive_bonus = 5.0
data = self.sim.data
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.sim.data.qpos
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.trackbodyid = 1
self.viewer.cam.distance = self.model.stat.extent * 1.0
self.viewer.cam.lookat[2] = 2.0
self.viewer.cam.elevation = -20

View File

@@ -0,0 +1,45 @@
from gym.envs.mujoco import mujoco_env
from gym import utils
import numpy as np
class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
utils.EzPickle.__init__(self)
def _get_obs(self):
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat])
def step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.sim.data.qpos[2]
data = self.sim.data
uph_cost = (pos_after - 0) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
done = bool(False)
return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.trackbodyid = 1
self.viewer.cam.distance = self.model.stat.extent * 1.0
self.viewer.cam.lookat[2] = 0.8925
self.viewer.cam.elevation = -20

View File

@@ -0,0 +1,43 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5)
utils.EzPickle.__init__(self)
def step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = alive_bonus - dist_penalty - vel_penalty
done = bool(y <= 1)
return ob, r, done, {}
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10)
]).ravel()
def reset_model(self):
self.set_state(
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
self.init_qvel + self.np_random.randn(self.model.nv) * .1
)
return self._get_obs()
def viewer_setup(self):
v = self.viewer
v.cam.trackbodyid = 0
v.cam.distance = self.model.stat.extent * 0.5
v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]

View File

@@ -0,0 +1,30 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
def step(self, a):
reward = 1.0
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
done = not notdone
return ob, reward, done, {}
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
def viewer_setup(self):
v = self.viewer
v.cam.trackbodyid = 0
v.cam.distance = self.model.stat.extent

View File

@@ -0,0 +1,145 @@
import os
from gym import error, spaces
from gym.utils import seeding
import numpy as np
from os import path
import gym
import six
try:
import mujoco_py
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
DEFAULT_SIZE = 500
class MujocoEnv(gym.Env):
"""Superclass for all MuJoCo environments.
"""
def __init__(self, model_path, frame_skip):
if model_path.startswith("/"):
fullpath = model_path
else:
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
if not path.exists(fullpath):
raise IOError("File %s does not exist" % fullpath)
self.frame_skip = frame_skip
self.model = mujoco_py.load_model_from_path(fullpath)
self.sim = mujoco_py.MjSim(self.model)
self.data = self.sim.data
self.viewer = None
self._viewers = {}
self.metadata = {
'render.modes': ['human', 'rgb_array', 'depth_array'],
'video.frames_per_second': int(np.round(1.0 / self.dt))
}
self.init_qpos = self.sim.data.qpos.ravel().copy()
self.init_qvel = self.sim.data.qvel.ravel().copy()
observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
assert not done
self.obs_dim = observation.size
bounds = self.model.actuator_ctrlrange.copy()
low = bounds[:, 0]
high = bounds[:, 1]
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
high = np.inf*np.ones(self.obs_dim)
low = -high
self.observation_space = spaces.Box(low, high, dtype=np.float32)
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
# methods to override:
# ----------------------------
def reset_model(self):
"""
Reset the robot degrees of freedom (qpos and qvel).
Implement this in each subclass.
"""
raise NotImplementedError
def viewer_setup(self):
"""
This method is called when the viewer is initialized.
Optionally implement this method, if you need to tinker with camera position
and so forth.
"""
pass
# -----------------------------
def reset(self):
self.sim.reset()
ob = self.reset_model()
return ob
def set_state(self, qpos, qvel):
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
old_state = self.sim.get_state()
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
old_state.act, old_state.udd_state)
self.sim.set_state(new_state)
self.sim.forward()
@property
def dt(self):
return self.model.opt.timestep * self.frame_skip
def do_simulation(self, ctrl, n_frames):
self.sim.data.ctrl[:] = ctrl
for _ in range(n_frames):
self.sim.step()
def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
if mode == 'rgb_array':
self._get_viewer(mode).render(width, height)
# window size used for old mujoco-py:
data = self._get_viewer(mode).read_pixels(width, height, depth=False)
# original image is upside-down, so flip it
return data[::-1, :, :]
elif mode == 'depth_array':
self._get_viewer(mode).render(width, height)
# window size used for old mujoco-py:
# Extract depth part of the read_pixels() tuple
data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
# original image is upside-down, so flip it
return data[::-1, :]
elif mode == 'human':
self._get_viewer(mode).render()
def close(self):
if self.viewer is not None:
# self.viewer.finish()
self.viewer = None
self._viewers = {}
def _get_viewer(self, mode):
self.viewer = self._viewers.get(mode)
if self.viewer is None:
if mode == 'human':
self.viewer = mujoco_py.MjViewer(self.sim)
elif mode == 'rgb_array' or mode == 'depth_array':
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
self.viewer_setup()
self._viewers[mode] = self.viewer
return self.viewer
def get_body_com(self, body_name):
return self.data.get_body_xpos(body_name)
def state_vector(self):
return np.concatenate([
self.sim.data.qpos.flat,
self.sim.data.qvel.flat
])

View File

@@ -0,0 +1,57 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
import mujoco_py
class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5)
def step(self, a):
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
reward_near = - np.linalg.norm(vec_1)
reward_dist = - np.linalg.norm(vec_2)
reward_ctrl = - np.square(a).sum()
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid = -1
self.viewer.cam.distance = 4.0
def reset_model(self):
qpos = self.init_qpos
self.goal_pos = np.asarray([0, 0])
while True:
self.cylinder_pos = np.concatenate([
self.np_random.uniform(low=-0.3, high=0, size=1),
self.np_random.uniform(low=-0.2, high=0.2, size=1)])
if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
break
qpos[-4:-2] = self.cylinder_pos
qpos[-2:] = self.goal_pos
qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
high=0.005, size=self.model.nv)
qvel[-4:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
])

View File

@@ -0,0 +1,43 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2)
def step(self, a):
vec = self.get_body_com("fingertip")-self.get_body_com("target")
reward_dist = - np.linalg.norm(vec)
reward_ctrl = - np.square(a).sum()
reward = reward_dist + reward_ctrl
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
def reset_model(self):
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
while True:
self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
if np.linalg.norm(self.goal) < 2:
break
qpos[-2:] = self.goal
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
qvel[-2:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
theta = self.sim.data.qpos.flat[:2]
return np.concatenate([
np.cos(theta),
np.sin(theta),
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat[:2],
self.get_body_com("fingertip") - self.get_body_com("target")
])

View File

@@ -0,0 +1,75 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
self._striked = False
self._min_strike_dist = np.inf
self.strike_threshold = 0.1
mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5)
def step(self, a):
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2))
if np.linalg.norm(vec_1) < self.strike_threshold:
self._striked = True
self._strike_pos = self.get_body_com("tips_arm")
if self._striked:
vec_3 = self.get_body_com("object") - self._strike_pos
reward_near = - np.linalg.norm(vec_3)
else:
reward_near = - np.linalg.norm(vec_1)
reward_dist = - np.linalg.norm(self._min_strike_dist)
reward_ctrl = - np.square(a).sum()
reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
self.viewer.cam.distance = 4.0
def reset_model(self):
self._min_strike_dist = np.inf
self._striked = False
self._strike_pos = None
qpos = self.init_qpos
self.ball = np.array([0.5, -0.175])
while True:
self.goal = np.concatenate([
self.np_random.uniform(low=0.15, high=0.7, size=1),
self.np_random.uniform(low=0.1, high=1.0, size=1)])
if np.linalg.norm(self.ball - self.goal) > 0.17:
break
qpos[-9:-7] = [self.ball[1], self.ball[0]]
qpos[-7:-5] = self.goal
diff = self.ball - self.goal
angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
qpos[-1] = angle / 3.14
qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
size=self.model.nv)
qvel[7:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
])

View File

@@ -0,0 +1,31 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4)
utils.EzPickle.__init__(self)
def step(self, a):
ctrl_cost_coeff = 0.0001
xposbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.sim.data.qpos[0]
reward_fwd = (xposafter - xposbefore) / self.dt
reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
reward = reward_fwd + reward_ctrl
ob = self._get_obs()
return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
def _get_obs(self):
qpos = self.sim.data.qpos
qvel = self.sim.data.qvel
return np.concatenate([qpos.flat[2:], qvel.flat])
def reset_model(self):
self.set_state(
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
)
return self._get_obs()

View File

@@ -0,0 +1,60 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
self._ball_hit_ground = False
self._ball_hit_location = None
mujoco_env.MujocoEnv.__init__(self, 'thrower.xml', 5)
def step(self, a):
ball_xy = self.get_body_com("ball")[:2]
goal_xy = self.get_body_com("goal")[:2]
if not self._ball_hit_ground and self.get_body_com("ball")[2] < -0.25:
self._ball_hit_ground = True
self._ball_hit_location = self.get_body_com("ball")
if self._ball_hit_ground:
ball_hit_xy = self._ball_hit_location[:2]
reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy)
else:
reward_dist = -np.linalg.norm(ball_xy - goal_xy)
reward_ctrl = - np.square(a).sum()
reward = reward_dist + 0.002 * reward_ctrl
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
self.viewer.cam.distance = 4.0
def reset_model(self):
self._ball_hit_ground = False
self._ball_hit_location = None
qpos = self.init_qpos
self.goal = np.array([self.np_random.uniform(low=-0.3, high=0.3),
self.np_random.uniform(low=-0.3, high=0.3)])
qpos[-9:-7] = self.goal
qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
high=0.005, size=self.model.nv)
qvel[7:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("r_wrist_roll_link"),
self.get_body_com("ball"),
self.get_body_com("goal"),
])

View File

@@ -0,0 +1,40 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4)
utils.EzPickle.__init__(self)
def step(self, a):
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = ((posafter - posbefore) / self.dt)
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
done = not (height > 0.8 and height < 2.0 and
ang > -1.0 and ang < 1.0)
ob = self._get_obs()
return ob, reward, done, {}
def _get_obs(self):
qpos = self.sim.data.qpos
qvel = self.sim.data.qvel
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
def reset_model(self):
self.set_state(
self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.trackbodyid = 2
self.viewer.cam.distance = self.model.stat.extent * 0.5
self.viewer.cam.lookat[2] = 1.15
self.viewer.cam.elevation = -20

View File

@@ -0,0 +1 @@
from gym.envs.pilesos.pilesos import PilesosEnv

View File

@@ -0,0 +1,46 @@
import numpy as np
from gym import core, spaces
from pilesos.mapstate import MapState
class PilesosEnv(core.Env):
metadata = {}
IS_HOME = True
DIRT = False
ACTIONS = ['F', 'R', 'L', 'S', 'T']
state: MapState
def __init__(self):
self.viewer = None
low = np.array([0, 0, 0])
high = np.array([1, 1, 1])
self.observation_space = spaces.Box(low=low, high=high, dtype=int)
self.action_space = spaces.Discrete(5)
self.state = MapState.default()
self.current_obs = self.state.get_current_obs()
def reset(self):
self.state = MapState.default()
return self._get_ob()
def step(self, action):
action_char = self.ACTIONS[action]
self.current_obs = self.state.act(action_char)
has_finished = action_char == 'T'
return self._get_ob(), self.state.last_reward, has_finished, {}
def render(self, mode='human'):
raise NotImplementedError()
def close(self):
pass
def _get_ob(self):
return np.array([int(self.current_obs.has_bumped), int(self.current_obs.has_dirt), int(self.current_obs.is_home)])

View File

@@ -0,0 +1,191 @@
import re
from gym import error, logger
# This format is true today, but it's *not* an official spec.
# [username/](env-name)-v(version) env-name is group 1, version is group 2
#
# 2016-10-31: We're experimentally expanding the environment ID format
# to include an optional username.
env_id_re = re.compile(r'^(?:[\w:-]+\/)?([\w:.-]+)-v(\d+)$')
def load(name):
import pkg_resources # takes ~400ms to load, so we import it lazily
entry_point = pkg_resources.EntryPoint.parse('x={}'.format(name))
result = entry_point.load(False)
return result
class EnvSpec(object):
"""A specification for a particular instance of the environment. Used
to register the parameters for official evaluations.
Args:
id (str): The official environment ID
entry_point (Optional[str]): The Python entrypoint of the environment class (e.g. module.name:Class)
trials (int): The number of trials to average reward over
reward_threshold (Optional[int]): The reward threshold before the task is considered solved
local_only: True iff the environment is to be used only on the local machine (e.g. debugging envs)
kwargs (dict): The kwargs to pass to the environment class
nondeterministic (bool): Whether this environment is non-deterministic even after seeding
tags (dict[str:any]): A set of arbitrary key-value tags on this environment, including simple property=True tags
Attributes:
id (str): The official environment ID
trials (int): The number of trials run in official evaluation
"""
def __init__(self, id, entry_point=None, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None, max_episode_seconds=None, timestep_limit=None):
self.id = id
# Evaluation parameters
self.trials = trials
self.reward_threshold = reward_threshold
# Environment properties
self.nondeterministic = nondeterministic
if tags is None:
tags = {}
self.tags = tags
# BACKWARDS COMPAT 2017/1/18
if tags.get('wrapper_config.TimeLimit.max_episode_steps'):
max_episode_steps = tags.get('wrapper_config.TimeLimit.max_episode_steps')
# TODO: Add the following deprecation warning after 2017/02/18
# warnings.warn("DEPRECATION WARNING wrapper_config.TimeLimit has been deprecated. Replace any calls to `register(tags={'wrapper_config.TimeLimit.max_episode_steps': 200)}` with `register(max_episode_steps=200)`. This change was made 2017/1/31 and is included in gym version 0.8.0. If you are getting many of these warnings, you may need to update universe past version 0.21.3")
tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps
######
# BACKWARDS COMPAT 2017/1/31
if timestep_limit is not None:
max_episode_steps = timestep_limit
# TODO: Add the following deprecation warning after 2017/03/01
# warnings.warn("register(timestep_limit={}) is deprecated. Use register(max_episode_steps={}) instead.".format(timestep_limit, timestep_limit))
######
self.max_episode_steps = max_episode_steps
self.max_episode_seconds = max_episode_seconds
# We may make some of these other parameters public if they're
# useful.
match = env_id_re.search(id)
if not match:
raise error.Error('Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id, env_id_re.pattern))
self._env_name = match.group(1)
self._entry_point = entry_point
self._local_only = local_only
self._kwargs = {} if kwargs is None else kwargs
def make(self):
"""Instantiates an instance of the environment with appropriate kwargs"""
if self._entry_point is None:
raise error.Error('Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)'.format(self.id))
elif callable(self._entry_point):
env = self._entry_point(**self._kwargs)
else:
cls = load(self._entry_point)
env = cls(**self._kwargs)
# Make the enviroment aware of which spec it came from.
env.unwrapped.spec = self
return env
def __repr__(self):
return "EnvSpec({})".format(self.id)
@property
def timestep_limit(self):
return self.max_episode_steps
@timestep_limit.setter
def timestep_limit(self, value):
self.max_episode_steps = value
class EnvRegistry(object):
"""Register an env by ID. IDs remain stable over time and are
guaranteed to resolve to the same environment dynamics (or be
desupported). The goal is that results on a particular environment
should always be comparable, and not depend on the version of the
code that was running.
"""
def __init__(self):
self.env_specs = {}
def make(self, id):
logger.info('Making new env: %s', id)
spec = self.spec(id)
env = spec.make()
# We used to have people override _reset/_step rather than
# reset/step. Set _gym_disable_underscore_compat = True on
# your environment if you use these methods and don't want
# compatibility code to be invoked.
if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
patch_deprecated_methods(env)
if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
from gym.wrappers.time_limit import TimeLimit
env = TimeLimit(env,
max_episode_steps=env.spec.max_episode_steps,
max_episode_seconds=env.spec.max_episode_seconds)
return env
def all(self):
return self.env_specs.values()
def spec(self, id):
match = env_id_re.search(id)
if not match:
raise error.Error('Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id.encode('utf-8'), env_id_re.pattern))
try:
return self.env_specs[id]
except KeyError:
# Parse the env name and check to see if it matches the non-version
# part of a valid env (could also check the exact number here)
env_name = match.group(1)
matching_envs = [valid_env_name for valid_env_name, valid_env_spec in self.env_specs.items()
if env_name == valid_env_spec._env_name]
if matching_envs:
raise error.DeprecatedEnv('Env {} not found (valid versions include {})'.format(id, matching_envs))
else:
raise error.UnregisteredEnv('No registered env with id: {}'.format(id))
def register(self, id, **kwargs):
if id in self.env_specs:
raise error.Error('Cannot re-register id: {}'.format(id))
self.env_specs[id] = EnvSpec(id, **kwargs)
# Have a global registry
registry = EnvRegistry()
def register(id, **kwargs):
return registry.register(id, **kwargs)
def make(id):
return registry.make(id)
def spec(id):
return registry.spec(id)
warn_once = True
def patch_deprecated_methods(env):
"""
Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method.
For backward compatibility, this makes it possible to work with unmodified environments.
"""
global warn_once
if warn_once:
logger.warn("Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior." % str(type(env)))
warn_once = False
env.reset = env._reset
env.step = env._step
env.seed = env._seed
def render(mode):
return env._render(mode, close=False)
def close():
env._render("human", close=True)
env.render = render
env.close = close

View File

@@ -0,0 +1,54 @@
# Robotics environments
Details and documentation on these robotics environments are available in our [blog post](https://blog.openai.com/ingredients-for-robotics-research/), the accompanying [technical report](https://arxiv.org/abs/1802.09464), and the [Gym website](https://gym.openai.com/envs/#robotics).
If you use these environments, please cite the following paper:
```
@misc{1802.09464,
Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba},
Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research},
Year = {2018},
Eprint = {arXiv:1802.09464},
}
```
## Fetch environments
<img src="https://blog.openai.com/content/images/2018/02/fetch-reach.png" width="500">
[FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position.
<img src="https://blog.openai.com/content/images/2018/02/fetch-slide.png" width="500">
[FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
<img src="https://blog.openai.com/content/images/2018/02/fetch-push.png" width="500">
[FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position.
<img src="https://blog.openai.com/content/images/2018/02/fetch-pickandplace.png" width="500">
[FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
## Shadow Dexterous Hand environments
<img src="https://blog.openai.com/content/images/2018/02/hand-reach.png" width="500">
[HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
<img src="https://blog.openai.com/content/images/2018/02/hand-block.png" width="500">
[HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
<img src="https://blog.openai.com/content/images/2018/02/hand-egg.png" width="500">
[HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
<img src="https://blog.openai.com/content/images/2018/02/hand-pen.png" width="500">
[HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.

View File

@@ -0,0 +1,10 @@
from gym.envs.robotics.fetch_env import FetchEnv
from gym.envs.robotics.fetch.slide import FetchSlideEnv
from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv
from gym.envs.robotics.fetch.push import FetchPushEnv
from gym.envs.robotics.fetch.reach import FetchReachEnv
from gym.envs.robotics.hand.reach import HandReachEnv
from gym.envs.robotics.hand.manipulate import HandBlockEnv
from gym.envs.robotics.hand.manipulate import HandEggEnv
from gym.envs.robotics.hand.manipulate import HandPenEnv

View File

@@ -0,0 +1,222 @@
# Fetch Robotics
The model of the [Fetch](http://fetchrobotics.com/platforms-research-development/) is based on [models provided by Fetch](https://github.com/fetchrobotics/fetch_ros/tree/indigo-devel/fetch_description). It was adapted and refined by OpenAI.
# ShadowHand
The model of the [ShadowHand](https://www.shadowrobot.com/products/dexterous-hand/) is based on [models provided by ShadowRobot](https://github.com/shadow-robot/sr_common/tree/kinetic-devel/sr_description/hand/model), and on code used under the following license:
(C) Vikash Kumar, CSE, UW. Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Additional license notices:
Sources : 1) Manipulator and Manipulation in High Dimensional Spaces. Vikash Kumar, Ph.D. Thesis, CSE, Univ. of Washington. 2016.
Mujoco :: Advanced physics simulation engine
Source : www.roboti.us
Version : 1.40
Released : 17Jan'17
Author :: Vikash Kumar
Contacts : vikash@openai.com
Last edits : 3Apr'17

View File

@@ -0,0 +1,35 @@
<?xml version="1.0" encoding="utf-8"?>
<mujoco>
<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
<option timestep="0.002">
<flag warmstart="enable"></flag>
</option>
<include file="shared.xml"></include>
<worldbody>
<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
<body name="floor0" pos="0.8 0.75 0">
<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
</body>
<include file="robot.xml"></include>
<body pos="1.3 0.75 0.2" name="table0">
<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
</body>
<body name="object0" pos="0.025 0.025 0.025">
<joint name="object0:joint" type="free" damping="0.01"></joint>
<geom size="0.025 0.025 0.025" type="box" condim="3" name="object0" material="block_mat" mass="2"></geom>
<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
</body>
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
</worldbody>
<actuator>
<position ctrllimited="true" ctrlrange="0 0.2" joint="robot0:l_gripper_finger_joint" kp="30000" name="robot0:l_gripper_finger_joint" user="1"></position>
<position ctrllimited="true" ctrlrange="0 0.2" joint="robot0:r_gripper_finger_joint" kp="30000" name="robot0:r_gripper_finger_joint" user="1"></position>
</actuator>
</mujoco>

View File

@@ -0,0 +1,32 @@
<?xml version="1.0" encoding="utf-8"?>
<mujoco>
<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
<option timestep="0.002">
<flag warmstart="enable"></flag>
</option>
<include file="shared.xml"></include>
<worldbody>
<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.70 1" type="plane" condim="3" material="floor_mat"></geom>
<body name="floor0" pos="0.8 0.75 0">
<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
</body>
<include file="robot.xml"></include>
<body pos="1.3 0.75 0.2" name="table0">
<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
</body>
<body name="object0" pos="0.025 0.025 0.025">
<joint name="object0:joint" type="free" damping="0.01"></joint>
<geom size="0.025 0.025 0.025" type="box" condim="3" name="object0" material="block_mat" mass="2"></geom>
<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
</body>
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
</worldbody>
<actuator></actuator>
</mujoco>

View File

@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="utf-8"?>
<mujoco>
<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
<option timestep="0.002">
<flag warmstart="enable"></flag>
</option>
<include file="shared.xml"></include>
<worldbody>
<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
<body name="floor0" pos="0.8 0.75 0">
<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
</body>
<include file="robot.xml"></include>
<body pos="1.3 0.75 0.2" name="table0">
<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
</body>
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
</worldbody>
<actuator></actuator>
</mujoco>

View File

@@ -0,0 +1,123 @@
<mujoco>
<body mocap="true" name="robot0:mocap" pos="0 0 0">
<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.7" size="0.005 0.005 0.005" type="box"></geom>
<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="1 0.005 0.005" type="box"></geom>
<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 1 0.001" type="box"></geom>
<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 0.005 1" type="box"></geom>
</body>
<body childclass="robot0:fetch" name="robot0:base_link" pos="0.2869 0.2641 0">
<joint armature="0.0001" axis="1 0 0" damping="1e+11" name="robot0:slide0" pos="0 0 0" type="slide"></joint>
<joint armature="0.0001" axis="0 1 0" damping="1e+11" name="robot0:slide1" pos="0 0 0" type="slide"></joint>
<joint armature="0.0001" axis="0 0 1" damping="1e+11" name="robot0:slide2" pos="0 0 0" type="slide"></joint>
<inertial diaginertia="1.2869 1.2236 0.9868" mass="70.1294" pos="-0.0036 0 0.0014" quat="0.7605 -0.0133 -0.0061 0.6491"></inertial>
<geom mesh="robot0:base_link" name="robot0:base_link" material="robot0:base_mat" class="robot0:grey"></geom>
<body name="robot0:torso_lift_link" pos="-0.0869 0 0.3774">
<inertial diaginertia="0.3365 0.3354 0.0943" mass="10.7796" pos="-0.0013 -0.0009 0.2935" quat="0.9993 -0.0006 0.0336 0.0185"></inertial>
<joint axis="0 0 1" damping="1e+07" name="robot0:torso_lift_joint" range="0.0386 0.3861" type="slide"></joint>
<geom mesh="robot0:torso_lift_link" name="robot0:torso_lift_link" material="robot0:torso_mat"></geom>
<body name="robot0:head_pan_link" pos="0.0531 0 0.603">
<inertial diaginertia="0.0185 0.0128 0.0095" mass="2.2556" pos="0.0321 0.0161 0.039" quat="0.5148 0.5451 -0.453 0.4823"></inertial>
<joint axis="0 0 1" name="robot0:head_pan_joint" range="-1.57 1.57"></joint>
<geom mesh="robot0:head_pan_link" name="robot0:head_pan_link" material="robot0:head_mat" class="robot0:grey"></geom>
<body name="robot0:head_tilt_link" pos="0.1425 0 0.058">
<inertial diaginertia="0.0063 0.0059 0.0014" mass="0.9087" pos="0.0081 0.0025 0.0113" quat="0.6458 0.66 -0.274 0.2689"></inertial>
<joint axis="0 1 0" damping="1000" name="robot0:head_tilt_joint" range="-0.76 1.45" ref="0.06"></joint>
<geom mesh="robot0:head_tilt_link" name="robot0:head_tilt_link" material="robot0:head_mat" class="robot0:blue"></geom>
<body name="robot0:head_camera_link" pos="0.055 0 0.0225">
<inertial diaginertia="0 0 0" mass="0" pos="0.055 0 0.0225"></inertial>
<body name="robot0:head_camera_rgb_frame" pos="0 0.02 0">
<inertial diaginertia="0 0 0" mass="0" pos="0 0.02 0"></inertial>
<body name="robot0:head_camera_rgb_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
<inertial diaginertia="0 0 0" mass="0" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></inertial>
<camera euler="3.1415 0 0" fovy="50" name="head_camera_rgb" pos="0 0 0"></camera>
</body>
</body>
<body name="robot0:head_camera_depth_frame" pos="0 0.045 0">
<inertial diaginertia="0 0 0" mass="0" pos="0 0.045 0"></inertial>
<body name="robot0:head_camera_depth_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
<inertial diaginertia="0 0 0" mass="0" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></inertial>
</body>
</body>
</body>
</body>
</body>
<body name="robot0:shoulder_pan_link" pos="0.1195 0 0.3486">
<inertial diaginertia="0.009 0.0086 0.0041" mass="2.5587" pos="0.0927 -0.0056 0.0564" quat="-0.1364 0.7624 -0.1562 0.613"></inertial>
<joint axis="0 0 1" name="robot0:shoulder_pan_joint" range="-1.6056 1.6056"></joint>
<geom mesh="robot0:shoulder_pan_link" name="robot0:shoulder_pan_link" material="robot0:arm_mat"></geom>
<body name="robot0:shoulder_lift_link" pos="0.117 0 0.06">
<inertial diaginertia="0.0116 0.0112 0.0023" mass="2.6615" pos="0.1432 0.0072 -0.0001" quat="0.4382 0.4382 0.555 0.555"></inertial>
<joint axis="0 1 0" name="robot0:shoulder_lift_joint" range="-1.221 1.518"></joint>
<geom mesh="robot0:shoulder_lift_link" name="robot0:shoulder_lift_link" material="robot0:arm_mat" class="robot0:blue"></geom>
<body name="robot0:upperarm_roll_link" pos="0.219 0 0">
<inertial diaginertia="0.0047 0.0045 0.0019" mass="2.3311" pos="0.1165 0.0014 0" quat="-0.0136 0.707 0.0136 0.707"></inertial>
<joint axis="1 0 0" limited="false" name="robot0:upperarm_roll_joint"></joint>
<geom mesh="robot0:upperarm_roll_link" name="robot0:upperarm_roll_link" material="robot0:arm_mat"></geom>
<body name="robot0:elbow_flex_link" pos="0.133 0 0">
<inertial diaginertia="0.0086 0.0084 0.002" mass="2.1299" pos="0.1279 0.0073 0" quat="0.4332 0.4332 0.5589 0.5589"></inertial>
<joint axis="0 1 0" name="robot0:elbow_flex_joint" range="-2.251 2.251"></joint>
<geom mesh="robot0:elbow_flex_link" name="robot0:elbow_flex_link" material="robot0:arm_mat" class="robot0:blue"></geom>
<body name="robot0:forearm_roll_link" pos="0.197 0 0">
<inertial diaginertia="0.0035 0.0031 0.0015" mass="1.6563" pos="0.1097 -0.0266 0" quat="-0.0715 0.7035 0.0715 0.7035"></inertial>
<joint armature="2.7538" axis="1 0 0" damping="3.5247" frictionloss="0" limited="false" name="robot0:forearm_roll_joint" stiffness="10"></joint>
<geom mesh="robot0:forearm_roll_link" name="robot0:forearm_roll_link" material="robot0:arm_mat"></geom>
<body name="robot0:wrist_flex_link" pos="0.1245 0 0">
<inertial diaginertia="0.0042 0.0042 0.0018" mass="1.725" pos="0.0882 0.0009 -0.0001" quat="0.4895 0.4895 0.5103 0.5103"></inertial>
<joint axis="0 1 0" name="robot0:wrist_flex_joint" range="-2.16 2.16"></joint>
<geom mesh="robot0:wrist_flex_link" name="robot0:wrist_flex_link" material="robot0:arm_mat" class="robot0:blue"></geom>
<body name="robot0:wrist_roll_link" pos="0.1385 0 0">
<inertial diaginertia="0.0001 0.0001 0.0001" mass="0.1354" pos="0.0095 0.0004 -0.0002"></inertial>
<joint axis="1 0 0" limited="false" name="robot0:wrist_roll_joint"></joint>
<geom mesh="robot0:wrist_roll_link" name="robot0:wrist_roll_link" material="robot0:arm_mat"></geom>
<body euler="0 0 0" name="robot0:gripper_link" pos="0.1664 0 0">
<inertial diaginertia="0.0024 0.0019 0.0013" mass="1.5175" pos="-0.09 -0.0001 -0.0017" quat="0 0.7071 0 0.7071"></inertial>
<geom mesh="robot0:gripper_link" name="robot0:gripper_link" material="robot0:gripper_mat"></geom>
<body name="robot0:gipper_camera_link" pos="0.055 0 0.0225">
<body name="robot0:gripper_camera_rgb_frame" pos="0 0.02 0">
<body name="robot0:gripper_camera_rgb_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
<camera euler="3.1415 0 0" fovy="50" name="gripper_camera_rgb" pos="0 0 0"></camera>
</body>
</body>
<body name="robot0:gripper_camera_depth_frame" pos="0 0.045 0">
<body name="robot0:gripper_camera_depth_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></body>
</body>
</body>
<body childclass="robot0:fetchGripper" name="robot0:r_gripper_finger_link" pos="0 0.0159 0">
<inertial diaginertia="0.1 0.1 0.1" mass="4" pos="-0.01 0 0"></inertial>
<joint axis="0 1 0" name="robot0:r_gripper_finger_joint" range="0 0.05"></joint>
<geom pos="0 -0.008 0" size="0.0385 0.007 0.0135" type="box" name="robot0:r_gripper_finger_link" material="robot0:gripper_finger_mat" condim="4" friction="1 0.05 0.01"></geom>
</body>
<body childclass="robot0:fetchGripper" name="robot0:l_gripper_finger_link" pos="0 -0.0159 0">
<inertial diaginertia="0.1 0.1 0.1" mass="4" pos="-0.01 0 0"></inertial>
<joint axis="0 -1 0" name="robot0:l_gripper_finger_joint" range="0 0.05"></joint>
<geom pos="0 0.008 0" size="0.0385 0.007 0.0135" type="box" name="robot0:l_gripper_finger_link" material="robot0:gripper_finger_mat" condim="4" friction="1 0.05 0.01"></geom>
</body>
<site name="robot0:grip" pos="0.02 0 0" rgba="0 0 0 0" size="0.02 0.02 0.02"></site>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<body name="robot0:estop_link" pos="-0.1246 0.2389 0.3113" quat="0.7071 0.7071 0 0">
<inertial diaginertia="0 0 0" mass="0.002" pos="0.0024 -0.0033 0.0067" quat="0.3774 -0.1814 0.1375 0.8977"></inertial>
<geom mesh="robot0:estop_link" rgba="0.8 0 0 1" name="robot0:estop_link"></geom>
</body>
<body name="robot0:laser_link" pos="0.235 0 0.2878" quat="0 1 0 0">
<inertial diaginertia="0 0 0" mass="0.0083" pos="-0.0306 0.0007 0.0552" quat="0.5878 0.5378 -0.4578 0.3945"></inertial>
<geom mesh="robot0:laser_link" rgba="0.7922 0.8196 0.9333 1" name="robot0:laser_link"></geom>
<camera euler="1.55 -1.55 3.14" fovy="25" name="lidar" pos="0 0 0.02"></camera>
</body>
<body name="robot0:torso_fixed_link" pos="-0.0869 0 0.3774">
<inertial diaginertia="0.3865 0.3394 0.1009" mass="13.2775" pos="-0.0722 0.0057 0.2656" quat="0.9995 0.0249 0.0177 0.011"></inertial>
<geom mesh="robot0:torso_fixed_link" name="robot0:torso_fixed_link" class="robot0:blue"></geom>
</body>
<body name="robot0:external_camera_body_0" pos="0 0 0">
<camera euler="0 0.75 1.57" fovy="43.3" name="external_camera_0" pos="1.3 0 1.2"></camera>
</body>
</body>
</mujoco>

View File

@@ -0,0 +1,66 @@
<mujoco>
<asset>
<texture type="skybox" builtin="gradient" rgb1="0.44 0.85 0.56" rgb2="0.46 0.87 0.58" width="32" height="32"></texture>
<texture name="texture_block" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
<material name="floor_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
<material name="table_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.93 0.93 0.93 1"></material>
<material name="block_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
<material name="puck_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
<material name="robot0:geomMat" shininess="0.03" specular="0.4"></material>
<material name="robot0:gripper_finger_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
<material name="robot0:gripper_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
<material name="robot0:arm_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
<material name="robot0:head_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
<material name="robot0:torso_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
<material name="robot0:base_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
<mesh file="base_link_collision.stl" name="robot0:base_link"></mesh>
<mesh file="bellows_link_collision.stl" name="robot0:bellows_link"></mesh>
<mesh file="elbow_flex_link_collision.stl" name="robot0:elbow_flex_link"></mesh>
<mesh file="estop_link.stl" name="robot0:estop_link"></mesh>
<mesh file="forearm_roll_link_collision.stl" name="robot0:forearm_roll_link"></mesh>
<mesh file="gripper_link.stl" name="robot0:gripper_link"></mesh>
<mesh file="head_pan_link_collision.stl" name="robot0:head_pan_link"></mesh>
<mesh file="head_tilt_link_collision.stl" name="robot0:head_tilt_link"></mesh>
<mesh file="l_wheel_link_collision.stl" name="robot0:l_wheel_link"></mesh>
<mesh file="laser_link.stl" name="robot0:laser_link"></mesh>
<mesh file="r_wheel_link_collision.stl" name="robot0:r_wheel_link"></mesh>
<mesh file="torso_lift_link_collision.stl" name="robot0:torso_lift_link"></mesh>
<mesh file="shoulder_pan_link_collision.stl" name="robot0:shoulder_pan_link"></mesh>
<mesh file="shoulder_lift_link_collision.stl" name="robot0:shoulder_lift_link"></mesh>
<mesh file="upperarm_roll_link_collision.stl" name="robot0:upperarm_roll_link"></mesh>
<mesh file="wrist_flex_link_collision.stl" name="robot0:wrist_flex_link"></mesh>
<mesh file="wrist_roll_link_collision.stl" name="robot0:wrist_roll_link"></mesh>
<mesh file="torso_fixed_link.stl" name="robot0:torso_fixed_link"></mesh>
</asset>
<equality>
<weld body1="robot0:mocap" body2="robot0:gripper_link" solimp="0.9 0.95 0.001" solref="0.02 1"></weld>
</equality>
<contact>
<exclude body1="robot0:r_gripper_finger_link" body2="robot0:l_gripper_finger_link"></exclude>
<exclude body1="robot0:torso_lift_link" body2="robot0:torso_fixed_link"></exclude>
<exclude body1="robot0:torso_lift_link" body2="robot0:shoulder_pan_link"></exclude>
</contact>
<default>
<default class="robot0:fetch">
<geom margin="0.001" material="robot0:geomMat" rgba="1 1 1 1" solimp="0.99 0.99 0.01" solref="0.01 1" type="mesh" user="0"></geom>
<joint armature="1" damping="50" frictionloss="0" stiffness="0"></joint>
<default class="robot0:fetchGripper">
<geom condim="4" margin="0.001" type="box" user="0" rgba="0.356 0.361 0.376 1.0"></geom>
<joint armature="100" damping="1000" limited="true" solimplimit="0.99 0.999 0.01" solreflimit="0.01 1" type="slide"></joint>
</default>
<default class="robot0:grey">
<geom rgba="0.356 0.361 0.376 1.0"></geom>
</default>
<default class="robot0:blue">
<geom rgba="0.086 0.506 0.767 1.0"></geom>
</default>
</default>
</default>
</mujoco>

View File

@@ -0,0 +1,32 @@
<?xml version="1.0" encoding="utf-8"?>
<mujoco>
<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
<option timestep="0.002">
<flag warmstart="enable"></flag>
</option>
<include file="shared.xml"></include>
<worldbody>
<geom name="floor0" pos="1 0.75 0" size="1.05 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
<body name="floor0" pos="1 0.75 0">
<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
</body>
<include file="robot.xml"></include>
<body name="table0" pos="1.32441906 0.75018422 0.2">
<geom size="0.625 0.45 0.2" type="box" condim="3" name="table0" material="table_mat" mass="2000" friction="0.1 0.005 0.0001"></geom>
</body>
<body name="object0" pos="0.025 0.025 0.02">
<joint name="object0:joint" type="free" damping="0.01"></joint>
<geom size="0.025 0.02" type="cylinder" condim="3" name="object0" material="puck_mat" friction="0.1 0.005 0.0001" mass="2"></geom>
<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
</body>
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
</worldbody>
<actuator></actuator>
</mujoco>

View File

@@ -0,0 +1,41 @@
<?xml version="1.0" encoding="utf-8"?>
<mujoco>
<compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
<option timestep="0.002" iterations="20" apirate="200">
<flag warmstart="enable"></flag>
</option>
<include file="shared.xml"></include>
<asset>
<include file="shared_asset.xml"></include>
<texture name="texture:object" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
<texture name="texture:hidden" file="block_hidden.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
<material name="material:object" texture="texture:object" specular="1" shininess="0.3" reflectance="0"></material>
<material name="material:hidden" texture="texture:hidden" specular="1" shininess="0.3" reflectance="0"></material>
<material name="material:target" texture="texture:object" specular="1" shininess="0.3" reflectance="0" rgba="1 1 1 0.5"></material>
</asset>
<worldbody>
<geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
<body name="floor0" pos="1 1 0"></body>
<include file="robot.xml"></include>
<body name="object" pos="1 0.87 0.2">
<geom name="object" type="box" size="0.025 0.025 0.025" material="material:object" condim="4" density="567"></geom>
<geom name="object_hidden" type="box" size="0.024 0.024 0.024" material="material:hidden" condim="4" contype="0" conaffinity="0" mass="0"></geom>
<site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
<joint name="object:joint" type="free" damping="0.01"></joint>
</body>
<body name="target" pos="1 0.87 0.2">
<geom name="target" type="box" size="0.025 0.025 0.025" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
<site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
<joint name="target:joint" type="free" damping="0.01"></joint>
</body>
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
</worldbody>
</mujoco>

View File

@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="utf-8"?>
<mujoco>
<compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
<option timestep="0.002" iterations="20" apirate="200">
<flag warmstart="enable"></flag>
</option>
<include file="shared.xml"></include>
<asset>
<include file="shared_asset.xml"></include>
<texture name="texture:object" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
<texture name="texture:hidden" file="block_hidden.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
<material name="material:object" texture="texture:object" specular="1" shininess="0.3" reflectance="0"></material>
<material name="material:hidden" texture="texture:hidden" specular="1" shininess="0.3" reflectance="0"></material>
<material name="material:target" texture="texture:object" specular="1" shininess="0.3" reflectance="0" rgba="1 1 1 0.5"></material>
</asset>
<worldbody>
<geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
<body name="floor0" pos="1 1 0"></body>
<include file="robot.xml"></include>
<body name="object" pos="1 0.87 0.2">
<geom name="object" type="ellipsoid" size="0.03 0.03 0.04" material="material:object" condim="4"></geom>
<geom name="object_hidden" type="ellipsoid" size="0.029 0.029 0.03" material="material:hidden" condim="4" contype="0" conaffinity="0" mass="0"></geom>
<site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
<joint name="object:joint" type="free" damping="0.01"></joint>
</body>
<body name="target" pos="1 0.87 0.2">
<geom name="target" type="ellipsoid" size="0.03 0.03 0.04" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
<site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
<joint name="target:joint" type="free" damping="0.01"></joint>
</body>
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
</worldbody>
</mujoco>

View File

@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="utf-8"?>
<mujoco>
<compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
<option timestep="0.002" iterations="20" apirate="200">
<flag warmstart="enable"></flag>
</option>
<include file="shared.xml"></include>
<asset>
<include file="shared_asset.xml"></include>
<material name="material:object" specular="0" shininess="0.5" reflectance="0.0" rgba="0.46 0.81 0.88 1.0"></material>
<material name="material:target" specular="0" shininess="0.5" reflectance="0.0" rgba="0.46 0.81 0.88 0.5"></material>
</asset>
<worldbody>
<geom name="floor0" pos="1 1 -0.2" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
<body name="floor0" pos="1 1 0"></body>
<include file="robot.xml"></include>
<body name="object" pos="1 0.87 0.2" euler="-1 1 0">
<geom name="object" type="capsule" size="0.008 0.1" material="material:object" condim="4"></geom>
<site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
<site name="object:top" pos="0 0 0.1" rgba="1 0 0 1" size="0.0081"></site>
<site name="object:bottom" pos="0 0 -0.1" rgba="0 1 0 1" size="0.0081"></site>
<joint name="object:joint" type="free" damping="0.01"></joint>
</body>
<body name="target" pos="1 0.87 0.2" euler="-1 1 0">
<geom name="target" type="capsule" size="0.008 0.1" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
<site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
<site name="target:top" pos="0 0 0.1" rgba="1 0 0 0.5" size="0.0081"></site>
<site name="target:bottom" pos="0 0 -0.1" rgba="0 1 0 0.5" size="0.0081"></site>
<joint name="target:joint" type="free" damping="0.01"></joint>
</body>
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
</worldbody>
</mujoco>

View File

@@ -0,0 +1,34 @@
<?xml version="1.0" encoding="utf-8"?>
<mujoco>
<compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
<option timestep="0.002" iterations="20" apirate="200">
<flag warmstart="enable"></flag>
</option>
<include file="shared.xml"></include>
<asset>
<include file="shared_asset.xml"></include>
</asset>
<worldbody>
<geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
<body name="floor0" pos="1 1 0">
<site name="target0" pos="0 0 0" size="0.005" rgba="1 0 0 1" type="sphere"></site>
<site name="target1" pos="0 0 0" size="0.005" rgba="0 1 0 1" type="sphere"></site>
<site name="target2" pos="0 0 0" size="0.005" rgba="0 0 1 1" type="sphere"></site>
<site name="target3" pos="0 0 0" size="0.005" rgba="1 1 0 1" type="sphere"></site>
<site name="target4" pos="0 0 0" size="0.005" rgba="1 0 1 1" type="sphere"></site>
<site name="finger0" pos="0 0 0" size="0.01" rgba="1 0 0 0.2" type="sphere"></site>
<site name="finger1" pos="0 0 0" size="0.01" rgba="0 1 0 0.2" type="sphere"></site>
<site name="finger2" pos="0 0 0" size="0.01" rgba="0 0 1 0.2" type="sphere"></site>
<site name="finger3" pos="0 0 0" size="0.01" rgba="1 1 0 0.2" type="sphere"></site>
<site name="finger4" pos="0 0 0" size="0.01" rgba="1 0 1 0.2" type="sphere"></site>
</body>
<include file="robot.xml"></include>
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
</worldbody>
</mujoco>

View File

@@ -0,0 +1,160 @@
<!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
<mujoco>
<body name="robot0:hand mount" pos="1 1.25 0.15" euler="1.5708 0 3.14159">
<inertial mass="0.1" pos="0 0 0" diaginertia="0.001 0.001 0.001"></inertial>
<body childclass="robot0:asset_class" name="robot0:forearm" pos="0 0.01 0" euler="0 0 0">
<inertial pos="0.001 -0.002 0.29" quat="0.982 -0.016 0 -0.188" mass="4" diaginertia="0.01 0.01 0.0075"></inertial>
<geom class="robot0:D_Vizual" pos="0 0.01 0.04" name="robot0:V_forearm" mesh="robot0:forearm" euler="0 0 1.57"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_forearm" type="mesh" mesh="robot0:forearm_cvx" pos="0 0.01 0.04" euler="0 0 1.57" rgba="0.4 0.5 0.6 0.7"></geom>
<body name="robot0:wrist" pos="0 0 0.256">
<inertial pos="0.003 0 0.016" quat="0.504 0.496 0.495 0.504" mass="0.3" diaginertia="0.001 0.001 0.001"></inertial>
<joint name="robot0:WRJ1" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.489 0.14" damping="0.5" armature="0.005" user="1123"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_wrist" mesh="robot0:wrist"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_wrist" type="capsule" pos="0 0 0" quat="0.707 0.707 0 0" size="0.015 0.01" rgba="0.4 0.5 0.6 0.1"></geom>
<body name="robot0:palm" pos="0 0 0.034">
<inertial pos="0.006 0 0.036" quat="0.716 0.044 0.075 0.693" mass="0.3" diaginertia="0.001 0.001 0.001"></inertial>
<joint name="robot0:WRJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="-0.698 0.489" damping="0.5" armature="0.005" user="1122"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_palm" mesh="robot0:palm"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_palm0" type="box" pos="0.011 0 0.038" size="0.032 0.0111 0.049" rgba="0.4 0.5 0.6 0.1"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_palm1" type="box" pos="-0.032 0 0.014" size="0.011 0.0111 0.025" rgba="0.4 0.5 0.6 0.1"></geom>
<body name="robot0:ffknuckle" pos="0.033 0 0.095">
<inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:FFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1103"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_ffknuckle" mesh="robot0:knuckle"></geom>
<body name="robot0:ffproximal" pos="0 0 0">
<inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:FFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1102"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_ffproximal" mesh="robot0:F3"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_ffproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
<body name="robot0:ffmiddle" pos="0 0 0.045">
<inertial pos="0 0 0.011" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:FFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1101"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_ffmiddle" mesh="robot0:F2"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_ffmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
<body name="robot0:ffdistal" pos="0 0 0.025">
<inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:FFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1100"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_ffdistal" pos="0 0 0.001" mesh="robot0:F1"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_ffdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
<site name="robot0:S_fftip" pos="0 0 0.026" group="3"></site>
<site class="robot0:D_Touch" name="robot0:Tch_fftip"></site>
</body>
</body>
</body>
</body>
<body name="robot0:mfknuckle" pos="0.011 0 0.099">
<inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:MFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1107"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_mfknuckle" mesh="robot0:knuckle"></geom>
<body name="robot0:mfproximal" pos="0 0 0">
<inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:MFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1106"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_mfproximal" mesh="robot0:F3"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_mfproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
<body name="robot0:mfmiddle" pos="0 0 0.045">
<inertial pos="0 0 0.012" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:MFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1105"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_mfmiddle" mesh="robot0:F2"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_mfmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
<body name="robot0:mfdistal" pos="0 0 0.025">
<inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:MFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1104"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_mfdistal" mesh="robot0:F1"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_mfdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
<site name="robot0:S_mftip" pos="0 0 0.026" group="3"></site>
<site class="robot0:D_Touch" name="robot0:Tch_mftip"></site>
</body>
</body>
</body>
</body>
<body name="robot0:rfknuckle" pos="-0.011 0 0.095">
<inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:RFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1111"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_rfknuckle" mesh="robot0:knuckle"></geom>
<body name="robot0:rfproximal" pos="0 0 0">
<inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:RFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1110"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_rfproximal" mesh="robot0:F3"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_rfproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
<body name="robot0:rfmiddle" pos="0 0 0.045">
<inertial pos="0 0 0.012" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:RFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1109"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_rfmiddle" mesh="robot0:F2"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_rfmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
<body name="robot0:rfdistal" pos="0 0 0.025">
<inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:RFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1108"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_rfdistal" mesh="robot0:F1" pos="0 0 0.001"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_rfdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
<site name="robot0:S_rftip" pos="0 0 0.026" group="3"></site>
<site class="robot0:D_Touch" name="robot0:Tch_rftip"></site>
</body>
</body>
</body>
</body>
<body name="robot0:lfmetacarpal" pos="-0.017 0 0.044">
<inertial pos="-0.014 0.001 0.014" quat="0.709 -0.092 -0.063 0.696" mass="0.075" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:LFJ4" type="hinge" pos="0 0 0" axis="0.571 0 0.821" range="0 0.785" user="1116"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_lfmetacarpal" pos="-0.016 0 -0.023" mesh="robot0:lfmetacarpal"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_lfmetacarpal" type="box" pos="-0.0165 0 0.01" size="0.0095 0.0111 0.025" rgba="0.4 0.5 0.6 0.2"></geom>
<body name="robot0:lfknuckle" pos="-0.017 0 0.044">
<inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:LFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1115"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_lfknuckle" mesh="robot0:knuckle"></geom>
<body name="robot0:lfproximal" pos="0 0 0">
<inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:LFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1114"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_lfproximal" mesh="robot0:F3"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_lfproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
<body name="robot0:lfmiddle" pos="0 0 0.045">
<inertial pos="0 0 0.012" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:LFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1113"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_lfmiddle" mesh="robot0:F2"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_lfmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
<body name="robot0:lfdistal" pos="0 0 0.025">
<inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:LFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1112"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_lfdistal" mesh="robot0:F1" pos="0 0 0.001"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_lfdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
<site name="robot0:S_lftip" pos="0 0 0.026" group="3"></site>
<site class="robot0:D_Touch" name="robot0:Tch_lftip"></site>
</body>
</body>
</body>
</body>
</body>
<body name="robot0:thbase" pos="0.034 -0.009 0.029" axisangle="0 1 0 0.785">
<inertial pos="0 0 0" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:THJ4" type="hinge" pos="0 0 0" axis="0 0 -1" range="-1.047 1.047" user="1121"></joint>
<geom name="robot0:V_thbase" type="box" group="1" pos="0 0 0" size="0.001 0.001 0.001"></geom>
<body name="robot0:thproximal" pos="0 0 0">
<inertial pos="0 0 0.017" quat="0.982 0 0.001 0.191" mass="0.016" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:THJ3" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.222" user="1120"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_thproximal" mesh="robot0:TH3_z"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_thproximal" type="capsule" pos="0 0 0.019" size="0.013 0.019" rgba="0.4 0.5 0.6 0.1"></geom>
<body name="robot0:thhub" pos="0 0 0.038">
<inertial pos="0 0 0" mass="0.002" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:THJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="-0.209 0.209" user="1119"></joint>
<geom name="robot0:V_thhub" type="box" group="1" pos="0 0 0" size="0.001 0.001 0.001"></geom>
<body name="robot0:thmiddle" pos="0 0 0">
<inertial pos="0 0 0.016" quat="1 -0.001 -0.007 0.003" mass="0.016" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:THJ1" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.524 0.524" user="1118"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_thmiddle" mesh="robot0:TH2_z"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_thmiddle" type="capsule" pos="0 0 0.016" size="0.011 0.016"></geom>
<body name="robot0:thdistal" pos="0 0 0.032">
<inertial pos="0 0 0.016" quat="0.999 -0.005 -0.047 0.005" mass="0.016" diaginertia="1e-05 1e-05 1e-05"></inertial>
<joint name="robot0:THJ0" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.571 0" user="1117"></joint>
<geom class="robot0:D_Vizual" name="robot0:V_thdistal" mesh="robot0:TH1_z"></geom>
<geom class="robot0:DC_Hand" name="robot0:C_thdistal" type="capsule" pos="0 0 0.013" size="0.00918 0.013" condim="4"></geom>
<site name="robot0:S_thtip" pos="0 0 0.0275" group="3"></site>
<site class="robot0:D_Touch" name="robot0:Tch_thtip" size="0.005 0.011 0.016" pos="-0.005 0 0.02"></site>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</mujoco>

View File

@@ -0,0 +1,254 @@
<!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
<mujoco>
<size njmax="500" nconmax="100" nuser_jnt="1" nuser_site="1" nuser_tendon="1" nuser_sensor="1" nuser_actuator="16" nstack="600000"></size>
<visual>
<map fogstart="3" fogend="5" force="0.1"></map>
<quality shadowsize="4096"></quality>
</visual>
<default>
<default class="robot0:asset_class">
<geom friction="1 0.005 0.001" condim="3" margin="0.0005" contype="1" conaffinity="1"></geom>
<joint limited="true" damping="0.1" armature="0.001" margin="0.01" frictionloss="0.001"></joint>
<site size="0.005" rgba="0.4 0.9 0.4 1"></site>
<general ctrllimited="true" forcelimited="true"></general>
</default>
<default class="robot0:D_Touch">
<site type="box" size="0.009 0.004 0.013" pos="0 -0.004 0.018" rgba="0.8 0.8 0.8 0.15" group="4"></site>
</default>
<default class="robot0:DC_Hand">
<geom material="robot0:MatColl" contype="1" conaffinity="0" group="4"></geom>
</default>
<default class="robot0:D_Vizual">
<geom material="robot0:MatViz" contype="0" conaffinity="0" group="1" type="mesh"></geom>
</default>
<default class="robot0:free">
<joint type="free" damping="0" armature="0" limited="false"></joint>
</default>
</default>
<contact>
<pair geom1="robot0:C_ffdistal" geom2="robot0:C_thdistal" condim="1"></pair>
<pair geom1="robot0:C_ffmiddle" geom2="robot0:C_thdistal" condim="1"></pair>
<pair geom1="robot0:C_ffproximal" geom2="robot0:C_thdistal" condim="1"></pair>
<pair geom1="robot0:C_mfproximal" geom2="robot0:C_thdistal" condim="1"></pair>
<pair geom1="robot0:C_mfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
<pair geom1="robot0:C_rfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
<pair geom1="robot0:C_lfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
<pair geom1="robot0:C_palm0" geom2="robot0:C_thdistal" condim="1"></pair>
<pair geom1="robot0:C_mfdistal" geom2="robot0:C_ffdistal" condim="1"></pair>
<pair geom1="robot0:C_rfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
<pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
<pair geom1="robot0:C_mfproximal" geom2="robot0:C_ffproximal" condim="1"></pair>
<pair geom1="robot0:C_rfproximal" geom2="robot0:C_mfproximal" condim="1"></pair>
<pair geom1="robot0:C_lfproximal" geom2="robot0:C_rfproximal" condim="1"></pair>
<pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
<pair geom1="robot0:C_lfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
<pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfmiddle" condim="1"></pair>
<pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfdistal" condim="1"></pair>
<pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfmiddle" condim="1"></pair>
</contact>
<tendon>
<fixed name="robot0:T_WRJ1r" limited="true" range="-0.032 0.032" user="1236">
<joint joint="robot0:WRJ1" coef="0.0325"></joint>
</fixed>
<fixed name="robot0:T_WRJ1l" limited="true" range="-0.032 0.032" user="1237">
<joint joint="robot0:WRJ1" coef="-0.0325"></joint>
</fixed>
<fixed name="robot0:T_WRJ0u" limited="true" range="-0.032 0.032" user="1236">
<joint joint="robot0:WRJ0" coef="0.0175"></joint>
</fixed>
<fixed name="robot0:T_WRJ0d" limited="true" range="-0.032 0.032" user="1237">
<joint joint="robot0:WRJ0" coef="-0.0175"></joint>
</fixed>
<fixed name="robot0:T_FFJ3r" limited="true" range="-0.018 0.018" user="1204">
<joint joint="robot0:FFJ3" coef="0.01"></joint>
</fixed>
<fixed name="robot0:T_FFJ3l" limited="true" range="-0.018 0.018" user="1205">
<joint joint="robot0:FFJ3" coef="-0.01"></joint>
</fixed>
<fixed name="robot0:T_FFJ2u" limited="true" range="-0.007 0.03" user="1202">
<joint joint="robot0:FFJ2" coef="0.01"></joint>
</fixed>
<fixed name="robot0:T_FFJ2d" limited="true" range="-0.03 0.007" user="1203">
<joint joint="robot0:FFJ2" coef="-0.01"></joint>
</fixed>
<fixed name="robot0:T_FFJ1c" limited="true" range="-0.001 0.001">
<joint joint="robot0:FFJ0" coef="0.00705"></joint>
<joint joint="robot0:FFJ1" coef="-0.00805"></joint>
</fixed>
<fixed name="robot0:T_FFJ1u" limited="true" range="-0.007 0.03" user="1200">
<joint joint="robot0:FFJ0" coef="0.00705"></joint>
<joint joint="robot0:FFJ1" coef="0.00805"></joint>
</fixed>
<fixed name="robot0:T_FFJ1d" limited="true" range="-0.03 0.007" user="1201">
<joint joint="robot0:FFJ0" coef="-0.00705"></joint>
<joint joint="robot0:FFJ1" coef="-0.00805"></joint>
</fixed>
<fixed name="robot0:T_MFJ3r" limited="true" range="-0.018 0.018" user="1210">
<joint joint="robot0:MFJ3" coef="0.01"></joint>
</fixed>
<fixed name="robot0:T_MFJ3l" limited="true" range="-0.018 0.018" user="1211">
<joint joint="robot0:MFJ3" coef="-0.01"></joint>
</fixed>
<fixed name="robot0:T_MFJ2u" limited="true" range="-0.007 0.03" user="1208">
<joint joint="robot0:MFJ2" coef="0.01"></joint>
</fixed>
<fixed name="robot0:T_MFJ2d" limited="true" range="-0.03 0.007" user="1209">
<joint joint="robot0:MFJ2" coef="-0.01"></joint>
</fixed>
<fixed name="robot0:T_MFJ1c" limited="true" range="-0.001 0.001">
<joint joint="robot0:MFJ0" coef="0.00705"></joint>
<joint joint="robot0:MFJ1" coef="-0.00805"></joint>
</fixed>
<fixed name="robot0:T_MFJ1u" limited="true" range="-0.007 0.03" user="1206">
<joint joint="robot0:MFJ0" coef="0.00705"></joint>
<joint joint="robot0:MFJ1" coef="0.00805"></joint>
</fixed>
<fixed name="robot0:T_MFJ1d" limited="true" range="-0.03 0.007" user="1207">
<joint joint="robot0:MFJ0" coef="-0.00705"></joint>
<joint joint="robot0:MFJ1" coef="-0.00805"></joint>
</fixed>
<fixed name="robot0:T_RFJ3r" limited="true" range="-0.018 0.018" user="1216">
<joint joint="robot0:RFJ3" coef="0.01"></joint>
</fixed>
<fixed name="robot0:T_RFJ3l" limited="true" range="-0.018 0.018" user="1217">
<joint joint="robot0:RFJ3" coef="-0.01"></joint>
</fixed>
<fixed name="robot0:T_RFJ2u" limited="true" range="-0.007 0.03" user="1214">
<joint joint="robot0:RFJ2" coef="0.01"></joint>
</fixed>
<fixed name="robot0:T_RFJ2d" limited="true" range="-0.03 0.007" user="1215">
<joint joint="robot0:RFJ2" coef="-0.01"></joint>
</fixed>
<fixed name="robot0:T_RFJ1c" limited="true" range="-0.001 0.001">
<joint joint="robot0:RFJ0" coef="0.00705"></joint>
<joint joint="robot0:RFJ1" coef="-0.00805"></joint>
</fixed>
<fixed name="robot0:T_RFJ1u" limited="true" range="-0.007 0.03" user="1212">
<joint joint="robot0:RFJ0" coef="0.00705"></joint>
<joint joint="robot0:RFJ1" coef="0.00805"></joint>
</fixed>
<fixed name="robot0:T_RFJ1d" limited="true" range="-0.03 0.007" user="1213">
<joint joint="robot0:RFJ0" coef="-0.00705"></joint>
<joint joint="robot0:RFJ1" coef="-0.00805"></joint>
</fixed>
<fixed name="robot0:T_LFJ4u" limited="true" range="-0.007 0.03" user="1224">
<joint joint="robot0:LFJ4" coef="0.01"></joint>
</fixed>
<fixed name="robot0:T_LFJ4d" limited="true" range="-0.03 0.007" user="1225">
<joint joint="robot0:LFJ4" coef="-0.01"></joint>
</fixed>
<fixed name="robot0:T_LFJ3r" limited="true" range="-0.018 0.018" user="1222">
<joint joint="robot0:LFJ3" coef="0.01"></joint>
</fixed>
<fixed name="robot0:T_LFJ3l" limited="true" range="-0.018 0.018" user="1223">
<joint joint="robot0:LFJ3" coef="-0.01"></joint>
</fixed>
<fixed name="robot0:T_LFJ2u" limited="true" range="-0.007 0.03" user="1220">
<joint joint="robot0:LFJ2" coef="0.01"></joint>
</fixed>
<fixed name="robot0:T_LFJ2d" limited="true" range="-0.03 0.007" user="1221">
<joint joint="robot0:LFJ2" coef="-0.01"></joint>
</fixed>
<fixed name="robot0:T_LFJ1c" limited="true" range="-0.001 0.001">
<joint joint="robot0:LFJ0" coef="0.00705"></joint>
<joint joint="robot0:LFJ1" coef="-0.00805"></joint>
</fixed>
<fixed name="robot0:T_LFJ1u" limited="true" range="-0.007 0.03" user="1218">
<joint joint="robot0:LFJ0" coef="0.00705"></joint>
<joint joint="robot0:LFJ1" coef="0.00805"></joint>
</fixed>
<fixed name="robot0:T_LFJ1d" limited="true" range="-0.03 0.007" user="1219">
<joint joint="robot0:LFJ0" coef="-0.00705"></joint>
<joint joint="robot0:LFJ1" coef="-0.00805"></joint>
</fixed>
<fixed name="robot0:T_THJ4a" limited="true" range="-0.018 0.018" user="1234">
<joint joint="robot0:THJ4" coef="0.01636"></joint>
</fixed>
<fixed name="robot0:T_THJ4c" limited="true" range="-0.018 0.018" user="1235">
<joint joint="robot0:THJ4" coef="-0.01636"></joint>
</fixed>
<fixed name="robot0:T_THJ3u" limited="true" range="-0.007 0.03" user="1232">
<joint joint="robot0:THJ3" coef="0.01"></joint>
</fixed>
<fixed name="robot0:T_THJ3d" limited="true" range="-0.03 0.007" user="1233">
<joint joint="robot0:THJ3" coef="-0.01"></joint>
</fixed>
<fixed name="robot0:T_THJ2u" limited="true" range="-0.018 0.018" user="1230">
<joint joint="robot0:THJ2" coef="0.011"></joint>
</fixed>
<fixed name="robot0:T_THJ2d" limited="true" range="-0.018 0.018" user="1231">
<joint joint="robot0:THJ2" coef="-0.011"></joint>
</fixed>
<fixed name="robot0:T_THJ1r" limited="true" range="-0.018 0.018" user="1228">
<joint joint="robot0:THJ1" coef="0.011"></joint>
</fixed>
<fixed name="robot0:T_THJ1l" limited="true" range="-0.018 0.018" user="1229">
<joint joint="robot0:THJ1" coef="-0.011"></joint>
</fixed>
<fixed name="robot0:T_THJ0r" limited="true" range="-0.03 0.007" user="1226">
<joint joint="robot0:THJ0" coef="0.009"></joint>
</fixed>
<fixed name="robot0:T_THJ0l" limited="true" range="-0.007 0.03" user="1227">
<joint joint="robot0:THJ0" coef="-0.009"></joint>
</fixed>
</tendon>
<sensor>
<jointpos name="robot0:Sjp_WRJ1" joint="robot0:WRJ1"></jointpos>
<jointpos name="robot0:Sjp_WRJ0" joint="robot0:WRJ0"></jointpos>
<jointpos name="robot0:Sjp_FFJ3" joint="robot0:FFJ3"></jointpos>
<jointpos name="robot0:Sjp_FFJ2" joint="robot0:FFJ2"></jointpos>
<jointpos name="robot0:Sjp_FFJ1" joint="robot0:FFJ1"></jointpos>
<jointpos name="robot0:Sjp_FFJ0" joint="robot0:FFJ0"></jointpos>
<jointpos name="robot0:Sjp_MFJ3" joint="robot0:MFJ3"></jointpos>
<jointpos name="robot0:Sjp_MFJ2" joint="robot0:MFJ2"></jointpos>
<jointpos name="robot0:Sjp_MFJ1" joint="robot0:MFJ1"></jointpos>
<jointpos name="robot0:Sjp_MFJ0" joint="robot0:MFJ0"></jointpos>
<jointpos name="robot0:Sjp_RFJ3" joint="robot0:RFJ3"></jointpos>
<jointpos name="robot0:Sjp_RFJ2" joint="robot0:RFJ2"></jointpos>
<jointpos name="robot0:Sjp_RFJ1" joint="robot0:RFJ1"></jointpos>
<jointpos name="robot0:Sjp_RFJ0" joint="robot0:RFJ0"></jointpos>
<jointpos name="robot0:Sjp_LFJ4" joint="robot0:LFJ4"></jointpos>
<jointpos name="robot0:Sjp_LFJ3" joint="robot0:LFJ3"></jointpos>
<jointpos name="robot0:Sjp_LFJ2" joint="robot0:LFJ2"></jointpos>
<jointpos name="robot0:Sjp_LFJ1" joint="robot0:LFJ1"></jointpos>
<jointpos name="robot0:Sjp_LFJ0" joint="robot0:LFJ0"></jointpos>
<jointpos name="robot0:Sjp_THJ4" joint="robot0:THJ4"></jointpos>
<jointpos name="robot0:Sjp_THJ3" joint="robot0:THJ3"></jointpos>
<jointpos name="robot0:Sjp_THJ2" joint="robot0:THJ2"></jointpos>
<jointpos name="robot0:Sjp_THJ1" joint="robot0:THJ1"></jointpos>
<jointpos name="robot0:Sjp_THJ0" joint="robot0:THJ0"></jointpos>
<touch name="robot0:ST_Tch_fftip" site="robot0:Tch_fftip"></touch>
<touch name="robot0:ST_Tch_mftip" site="robot0:Tch_mftip"></touch>
<touch name="robot0:ST_Tch_rftip" site="robot0:Tch_rftip"></touch>
<touch name="robot0:ST_Tch_lftip" site="robot0:Tch_lftip"></touch>
<touch name="robot0:ST_Tch_thtip" site="robot0:Tch_thtip"></touch>
</sensor>
<actuator>
<position name="robot0:A_WRJ1" class="robot0:asset_class" user="2038" joint="robot0:WRJ1" ctrlrange="-0.489 0.14" kp="5" forcerange="-4.785 4.785"></position>
<position name="robot0:A_WRJ0" class="robot0:asset_class" user="2036" joint="robot0:WRJ0" ctrlrange="-0.698 0.489" kp="5" forcerange="-2.175 2.175"></position>
<position name="robot0:A_FFJ3" class="robot0:asset_class" user="2004" joint="robot0:FFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
<position name="robot0:A_FFJ2" class="robot0:asset_class" user="2002" joint="robot0:FFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
<position name="robot0:A_FFJ1" class="robot0:asset_class" user="2000" joint="robot0:FFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
<position name="robot0:A_MFJ3" class="robot0:asset_class" user="2010" joint="robot0:MFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
<position name="robot0:A_MFJ2" class="robot0:asset_class" user="2008" joint="robot0:MFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
<position name="robot0:A_MFJ1" class="robot0:asset_class" user="2006" joint="robot0:MFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
<position name="robot0:A_RFJ3" class="robot0:asset_class" user="2016" joint="robot0:RFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
<position name="robot0:A_RFJ2" class="robot0:asset_class" user="2014" joint="robot0:RFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
<position name="robot0:A_RFJ1" class="robot0:asset_class" user="2012" joint="robot0:RFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
<position name="robot0:A_LFJ4" class="robot0:asset_class" user="2024" joint="robot0:LFJ4" ctrlrange="0 0.785" kp="1" forcerange="-0.9 0.9"></position>
<position name="robot0:A_LFJ3" class="robot0:asset_class" user="2022" joint="robot0:LFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
<position name="robot0:A_LFJ2" class="robot0:asset_class" user="2020" joint="robot0:LFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
<position name="robot0:A_LFJ1" class="robot0:asset_class" user="2018" joint="robot0:LFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
<position name="robot0:A_THJ4" class="robot0:asset_class" user="2034" joint="robot0:THJ4" ctrlrange="-1.047 1.047" kp="1" forcerange="-2.3722 2.3722"></position>
<position name="robot0:A_THJ3" class="robot0:asset_class" user="2032" joint="robot0:THJ3" ctrlrange="0 1.222" kp="1" forcerange="-1.45 1.45"></position>
<position name="robot0:A_THJ2" class="robot0:asset_class" user="2030" joint="robot0:THJ2" ctrlrange="-0.209 0.209" kp="1" forcerange="-0.99 0.99"></position>
<position name="robot0:A_THJ1" class="robot0:asset_class" user="2028" joint="robot0:THJ1" ctrlrange="-0.524 0.524" kp="1" forcerange="-0.99 0.99"></position>
<position name="robot0:A_THJ0" class="robot0:asset_class" user="2026" joint="robot0:THJ0" ctrlrange="-1.571 0" kp="1" forcerange="-0.81 0.81"></position>
</actuator>
</mujoco>

View File

@@ -0,0 +1,26 @@
<!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
<mujoco>
<texture type="skybox" builtin="gradient" rgb1="0.44 0.85 0.56" rgb2="0.46 0.87 0.58" width="32" height="32"></texture>
<texture name="robot0:texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.15 0.2" width="512" height="512"></texture>
<texture name="robot0:texgeom" type="cube" builtin="flat" mark="cross" width="127" height="127" rgb1="0.3 0.6 0.5" rgb2="0.3 0.6 0.5" markrgb="0 0 0" random="0.01"></texture>
<material name="robot0:MatGnd" reflectance="0.5" texture="robot0:texplane" texrepeat="1 1" texuniform="true"></material>
<material name="robot0:MatColl" specular="1" shininess="0.3" reflectance="0.5" rgba="0.4 0.5 0.6 1"></material>
<material name="robot0:MatViz" specular="0.75" shininess="0.1" reflectance="0.5" rgba="0.93 0.93 0.93 1"></material>
<material name="robot0:object" texture="robot0:texgeom" texuniform="false"></material>
<material name="floor_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 0"></material>
<mesh name="robot0:forearm" file="forearm_electric.stl"></mesh>
<mesh name="robot0:forearm_cvx" file="forearm_electric_cvx.stl"></mesh>
<mesh name="robot0:wrist" scale="0.001 0.001 0.001" file="wrist.stl"></mesh>
<mesh name="robot0:palm" scale="0.001 0.001 0.001" file="palm.stl"></mesh>
<mesh name="robot0:knuckle" scale="0.001 0.001 0.001" file="knuckle.stl"></mesh>
<mesh name="robot0:F3" scale="0.001 0.001 0.001" file="F3.stl"></mesh>
<mesh name="robot0:F2" scale="0.001 0.001 0.001" file="F2.stl"></mesh>
<mesh name="robot0:F1" scale="0.001 0.001 0.001" file="F1.stl"></mesh>
<mesh name="robot0:lfmetacarpal" scale="0.001 0.001 0.001" file="lfmetacarpal.stl"></mesh>
<mesh name="robot0:TH3_z" scale="0.001 0.001 0.001" file="TH3_z.stl"></mesh>
<mesh name="robot0:TH2_z" scale="0.001 0.001 0.001" file="TH2_z.stl"></mesh>
<mesh name="robot0:TH1_z" scale="0.001 0.001 0.001" file="TH1_z.stl"></mesh>
</mujoco>

View File

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More