git reimport

2019-03-15 13:54:11 +04:00
commit 0c6baed398
232 changed files with 14071 additions and 0 deletions
--- a/14
+++ b/14
@@ -0,0 +1,14 @@
 [[source]]
 url = "https://pypi.org/simple"
 verify_ssl = true
 name = "pypi"
 [packages]
 numpy = "*"
 keras = "*"
 keras-rl = "*"
 [dev-packages]
 [requires]
 python_version = "3.7"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -0,0 +1,175 @@
 {
    "_meta": {
        "hash": {
            "sha256": "a227a69980d84f05f5a653bcc6f1cc461e95f114074a0baae20d457cc84e7ebc"
        },
        "pipfile-spec": 6,
        "requires": {
            "python_version": "3.7"
        },
        "sources": [
            {
                "name": "pypi",
                "url": "https://pypi.org/simple",
                "verify_ssl": true
            }
        ]
    },
    "default": {
        "h5py": {
            "hashes": [
                "sha256:0f8cd2acbacf3177b4427ed42639c911667b1f24d923388ab1f8ad466a12be5e",
                "sha256:11277e3879098f921ee9e29105b20591e1dfdd44963357399f2abaa1a280c560",
                "sha256:1241dec0c94ac32f3285cac1d6f44beabf80423e422ab03bd2686d731a8a9294",
                "sha256:17b8187de0b3a945d8e8d031e7eb6ece2fce90791f9c5fde36f4396bf38fdde1",
                "sha256:2f30007d0796788a454c1293262f19f25e6428317d3d386f78138fba2a44e37d",
                "sha256:308e0758587ee16d4e73e7f2f8aae8351091e343bf0a43d2f697f9535465c816",
                "sha256:37cacddf0e8209905f52537a8cf71da0dd9a4de62bd79247274c97b24a408997",
                "sha256:38a23bb599748adf23d77f74885c0de6f4a7d9baa42f74e476bbf90fba2b47dd",
                "sha256:47ab18b7b7bbc36fd2b606289b703b6f0ee915b923d6ad94dd17ac80ebffc280",
                "sha256:486c78330af0bf33f5077b51d1888c0739c3cd1a03d5aade0d48572b3b5690ca",
                "sha256:4e2183458d6ef1ae87dfb5d6acd0786359336cd9ac0ece6396c09b59fdaa3bd6",
                "sha256:51d0595c3e58814c831f6cd2b664a5bf9590e26262c1d541b380d041e4fcb3c0",
                "sha256:56d259d56822b70881760b243957f04a0cf133f0ec65eae6a33f562826aee899",
                "sha256:5e6e777653169a3cc24ea56bb3d8c845ea391f8914c35bb6f350b0753a52891c",
                "sha256:62bfb0ebb0f59e5dccc0b0dbbc0fc40dd1d1e09d04c0dc71f89790231531d4a2",
                "sha256:67d89b64debfa021b54aa6f24bbf008403bd144748a0148596b518bce80d2fc4",
                "sha256:6bf38571f555fa214493ec6349d29024cc5f313bf1715b09f236c553fd22ae4d",
                "sha256:9214ca445c18a37bfe9c165982c0e317e2f21f035c8d635d1c6d9fcbaf35b7a8",
                "sha256:ab0c52850428d2e86029935389379c2c97f752e76b616da851deec8a4484f8ec",
                "sha256:b2eff336697d8dfd712c5d93fef9f4e4d3e97d9d8c258801836b8664a239e07a",
                "sha256:bb33fabc0b8f3fe3bb0f8d6821b2fad5b2a64c27a0808e8d1c5c1e3362062064",
                "sha256:bd5353ab342bae1262b04745934cc1565df4cbc8d6a979a0c98f42209bd5c265",
                "sha256:bd73444efd1ac06dac27b8405bbe8791a02fd1bc8a2fa0e575257f90b7b57467",
                "sha256:bd932236a2ef91a75fee5d7f4ace80ab494c5a59cd092a67c9785ddb7fdc218c",
                "sha256:c45650de228ace7731e4280e14fb687f6d5c29cd666c5b22b42492b035e994d6",
                "sha256:d5c0c01da45f901a3d429e7ef9e7e22baa869e1affb8715f1bf94e6a30020740",
                "sha256:d75035db5bde802a29f4f29f18bb7548863d29ac90ccbf2c04c11799bbbba2c3",
                "sha256:dda88206dc9464923f27f601000bc5b152ac0bd6d0122f098d4f239150a70076",
                "sha256:e1c2ac5d0aa232c0f60fecc6bd1122346885086a176f939b91058c4c980cc226",
                "sha256:e626c65a8587921ebc7fb8d31a49addfdd0b9a9aa96315ea484c09803337b955"
            ],
            "version": "==2.8.0"
        },
        "keras": {
            "hashes": [
                "sha256:794d0c92c6c4122f1f0fcf3a7bc2f49054c6a54ddbef8d8ffafca62795d760b6",
                "sha256:90b610a3dbbf6d257b20a079eba3fdf2eed2158f64066a7c6f7227023fd60bc9"
            ],
            "index": "pypi",
            "version": "==2.2.4"
        },
        "keras-applications": {
            "hashes": [
                "sha256:721dda4fa4e043e5bbd6f52a2996885c4639a7130ae478059b3798d0706f5ae7",
                "sha256:a03af60ddc9c5afdae4d5c9a8dd4ca857550e0b793733a5072e0725829b87017"
            ],
            "version": "==1.0.6"
        },
        "keras-preprocessing": {
            "hashes": [
                "sha256:90d04c1750bccceef88ac09475c291b4b5f6aa1eaf0603167061b1aa8b043c61",
                "sha256:ef2e482c4336fcf7180244d06f4374939099daa3183816e82aee7755af35b754"
            ],
            "version": "==1.0.5"
        },
        "keras-rl": {
            "hashes": [
                "sha256:7bbbb24c8f4560a03f59fb062a5003da102de033bc8cd7d06b69b4c1b48ec054"
            ],
            "index": "pypi",
            "version": "==0.4.2"
        },
        "numpy": {
            "hashes": [
                "sha256:032df9b6571c5f1d41ea6f6a189223208cb488990373aa686aca55570fcccb42",
                "sha256:094f8a83e5bd0a44a7557fa24a46db6ba7d5299c389ddbc9e0e18722f567fb63",
                "sha256:1c0c80e74759fa4942298044274f2c11b08c86230b25b8b819e55e644f5ff2b6",
                "sha256:2aa0910eaeb603b1a5598193cc3bc8eacf1baf6c95cbc3955eb8e15fa380c133",
                "sha256:2f5ebc7a04885c7d69e5daa05208faef4db7f1ae6a99f4d36962df8cd54cdc76",
                "sha256:32a07241cb624e104b88b08dea2851bf4ec5d65a1f599d7735041ced7171fd7a",
                "sha256:3c7959f750b54b445f14962a3ddc41b9eadbab00b86da55fbb1967b2b79aad10",
                "sha256:3d8f9273c763a139a99e65c2a3c10f1109df30bedae7f011b10d95c538364704",
                "sha256:63bca71691339d2d6f8a7c970821f2b12098a53afccc0190d4e1555e75e5223a",
                "sha256:7ae9c3baff3b989859c88e0168ad10902118595b996bf781eaf011bb72428798",
                "sha256:866a7c8774ccc7d603667fad95456b4cf56d79a2bb5a7648ac9f0082e0b9416e",
                "sha256:8bc4b92a273659e44ca3f3a2f8786cfa39d8302223bcfe7df794429c63d5f5a1",
                "sha256:919f65e0732195474897b1cafefb4d4e7c2bb8174a725e506b62e9096e4df28d",
                "sha256:9d1598573d310104acb90377f0a8c2319f737084689f5eb18012becaf345cda5",
                "sha256:9fff90c88bfaad2901be50453d5cd7897a826c1d901f0654ee1d73ab3a48cd18",
                "sha256:a245464ddf6d90e2d6287e9cef6bcfda2a99467fdcf1b677b99cd0b6c7b43de2",
                "sha256:a988db28f54e104a01e8573ceb6f28202b4c15635b1450b2e3b2b822c6564f9b",
                "sha256:b12fe6f31babb9477aa0f9692730654b3ee0e71f33b4568170dfafd439caf0a2",
                "sha256:b7599ff4acd23f5de983e3aec772153b1043e131487a5c6ad0f94b41a828877a",
                "sha256:c9f4dafd6065c4c782be84cd67ceeb9b1d4380af60a7af32be10ebecd723385e",
                "sha256:ce3622b73ccd844ba301c1aea65d36cf9d8331e7c25c16b1725d0f14db99aaf4",
                "sha256:d0f36a24cf8061a2c03e151be3418146717505b9b4ec17502fa3bbdb04ec1431",
                "sha256:d263f8f14f2da0c079c0297e829e550d8f2c4e0ffef215506bd1d0ddd2bff3de",
                "sha256:d8837ff272800668aabdfe70b966631914b0d6513aed4fc1b1428446f771834d",
                "sha256:ef694fe72a3995aa778a5095bda946e0d31f7efabd5e8063ad8c6238ab7d3f78",
                "sha256:f1fd1a6f40a501ba4035f5ed2c1f4faa68245d1407bf97d2ee401e4f23d1720b",
                "sha256:fa337b6bd5fe2b8c4e705f4102186feb9985de9bb8536d32d5129a658f1789e0",
                "sha256:febd31cd0d2fd2509ca2ec53cb339f8bf593c1bd245b9fc55c1917a68532a0af"
            ],
            "index": "pypi",
            "version": "==1.15.3"
        },
        "pyyaml": {
            "hashes": [
                "sha256:3d7da3009c0f3e783b2c873687652d83b1bbfd5c88e9813fb7e5b03c0dd3108b",
                "sha256:3ef3092145e9b70e3ddd2c7ad59bdd0252a94dfe3949721633e41344de00a6bf",
                "sha256:40c71b8e076d0550b2e6380bada1f1cd1017b882f7e16f09a65be98e017f211a",
                "sha256:558dd60b890ba8fd982e05941927a3911dc409a63dcb8b634feaa0cda69330d3",
                "sha256:a7c28b45d9f99102fa092bb213aa12e0aaf9a6a1f5e395d36166639c1f96c3a1",
                "sha256:aa7dd4a6a427aed7df6fb7f08a580d68d9b118d90310374716ae90b710280af1",
                "sha256:bc558586e6045763782014934bfaf39d48b8ae85a2713117d16c39864085c613",
                "sha256:d46d7982b62e0729ad0175a9bc7e10a566fc07b224d2c79fafb5e032727eaa04",
                "sha256:d5eef459e30b09f5a098b9cea68bebfeb268697f78d647bd255a085371ac7f3f",
                "sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537",
                "sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531"
            ],
            "version": "==3.13"
        },
        "scipy": {
            "hashes": [
                "sha256:0611ee97296265af4a21164a5323f8c1b4e8e15c582d3dfa7610825900136bb7",
                "sha256:08237eda23fd8e4e54838258b124f1cd141379a5f281b0a234ca99b38918c07a",
                "sha256:0e645dbfc03f279e1946cf07c9c754c2a1859cb4a41c5f70b25f6b3a586b6dbd",
                "sha256:0e9bb7efe5f051ea7212555b290e784b82f21ffd0f655405ac4f87e288b730b3",
                "sha256:108c16640849e5827e7d51023efb3bd79244098c3f21e4897a1007720cb7ce37",
                "sha256:340ef70f5b0f4e2b4b43c8c8061165911bc6b2ad16f8de85d9774545e2c47463",
                "sha256:3ad73dfc6f82e494195144bd3a129c7241e761179b7cb5c07b9a0ede99c686f3",
                "sha256:3b243c77a822cd034dad53058d7c2abf80062aa6f4a32e9799c95d6391558631",
                "sha256:404a00314e85eca9d46b80929571b938e97a143b4f2ddc2b2b3c91a4c4ead9c5",
                "sha256:423b3ff76957d29d1cce1bc0d62ebaf9a3fdfaf62344e3fdec14619bb7b5ad3a",
                "sha256:42d9149a2fff7affdd352d157fa5717033767857c11bd55aa4a519a44343dfef",
                "sha256:625f25a6b7d795e8830cb70439453c9f163e6870e710ec99eba5722775b318f3",
                "sha256:698c6409da58686f2df3d6f815491fd5b4c2de6817a45379517c92366eea208f",
                "sha256:729f8f8363d32cebcb946de278324ab43d28096f36593be6281ca1ee86ce6559",
                "sha256:8190770146a4c8ed5d330d5b5ad1c76251c63349d25c96b3094875b930c44692",
                "sha256:878352408424dffaa695ffedf2f9f92844e116686923ed9aa8626fc30d32cfd1",
                "sha256:8b984f0821577d889f3c7ca8445564175fb4ac7c7f9659b7c60bef95b2b70e76",
                "sha256:8f841bbc21d3dad2111a94c490fb0a591b8612ffea86b8e5571746ae76a3deac",
                "sha256:c22b27371b3866c92796e5d7907e914f0e58a36d3222c5d436ddd3f0e354227a",
                "sha256:d0cdd5658b49a722783b8b4f61a6f1f9c75042d0e29a30ccb6cacc9b25f6d9e2",
                "sha256:d40dc7f494b06dcee0d303e51a00451b2da6119acbeaccf8369f2d29e28917ac",
                "sha256:d8491d4784aceb1f100ddb8e31239c54e4afab8d607928a9f7ef2469ec35ae01",
                "sha256:dfc5080c38dde3f43d8fbb9c0539a7839683475226cf83e4b24363b227dfe552",
                "sha256:e24e22c8d98d3c704bb3410bce9b69e122a8de487ad3dbfe9985d154e5c03a40",
                "sha256:e7a01e53163818d56eabddcafdc2090e9daba178aad05516b20c6591c4811020",
                "sha256:ee677635393414930541a096fc8e61634304bb0153e4e02b75685b11eba14cae",
                "sha256:f0521af1b722265d824d6ad055acfe9bd3341765735c44b5a4d0069e189a0f40",
                "sha256:f25c281f12c0da726c6ed00535ca5d1622ec755c30a3f8eafef26cf43fede694"
            ],
            "version": "==1.1.0"
        },
        "six": {
            "hashes": [
                "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
                "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
            ],
            "version": "==1.11.0"
        }
    },
    "develop": {}
 }
--- a/libs/gym
+++ b/libs/gym
--- a/pilesos2.log
+++ b/pilesos2.log
@@ -0,0 +1,169 @@
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.LEFT
 Action.LEFT
 Action.FORWARD
 Action.SUCK
 Action.LEFT
 Action.FORWARD
 Action.LEFT
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.SUCK
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.LEFT
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.LEFT
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.LEFT
 Action.LEFT
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.LEFT
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.SUCK
 Action.RIGHT
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.LEFT
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.FORWARD
 Action.RIGHT
 Action.FORWARD
 Action.FORWARD
 Action.DIE
--- a/src/init.py
+++ b/src/init.py
--- a/src/gym/init.py
+++ b/src/gym/init.py
@@ -0,0 +1,14 @@
 import distutils.version
 import os
 import sys
 import warnings
 from gym import error
 from gym.utils import reraise
 from gym.version import VERSION as __version__
 from gym.core import Env, GoalEnv, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
 from gym.envs import make, spec
 from gym import logger
 __all__ = ["Env", "Space", "Wrapper", "make", "spec"]
--- a/src/gym/pycache/init.cpython-37.pyc
+++ b/src/gym/pycache/init.cpython-37.pyc
--- a/src/gym/pycache/core.cpython-37.pyc
+++ b/src/gym/pycache/core.cpython-37.pyc
--- a/src/gym/pycache/error.cpython-37.pyc
+++ b/src/gym/pycache/error.cpython-37.pyc
--- a/src/gym/pycache/logger.cpython-37.pyc
+++ b/src/gym/pycache/logger.cpython-37.pyc
--- a/src/gym/pycache/version.cpython-37.pyc
+++ b/src/gym/pycache/version.cpython-37.pyc
--- a/src/gym/core.py
+++ b/src/gym/core.py
@@ -0,0 +1,343 @@
 from gym import logger
 import gym
 from gym import error
 from gym.utils import closer
 env_closer = closer.Closer()
 # Env-related abstractions
 class Env(object):
    """The main OpenAI Gym class. It encapsulates an environment with
    arbitrary behind-the-scenes dynamics. An environment can be
    partially or fully observed.
    The main API methods that users of this class need to know are:
        step
        reset
        render
        close
        seed
    And set the following attributes:
        action_space: The Space object corresponding to valid actions
        observation_space: The Space object corresponding to valid observations
        reward_range: A tuple corresponding to the min and max possible rewards
    Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
    The methods are accessed publicly as "step", "reset", etc.. The
    non-underscored versions are wrapper methods to which we may add
    functionality over time.
    """
    # Set this in SOME subclasses
    metadata = {'render.modes': []}
    reward_range = (-float('inf'), float('inf'))
    spec = None
    # Set these in ALL subclasses
    action_space = None
    observation_space = None
    def step(self, action):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        Args:
            action (object): an action provided by the environment
        Returns:
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (boolean): whether the episode has ended, in which case further step() calls will return undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        """
        raise NotImplementedError
    def reset(self):
        """Resets the state of the environment and returns an initial observation.
        Returns: observation (object): the initial observation of the
            space.
        """
        raise NotImplementedError
    def render(self, mode='human'):
        """Renders the environment.
        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.) By convention,
        if mode is:
        - human: render to the current display or terminal and
          return nothing. Usually for human consumption.
        - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
          representing RGB values for an x-by-y pixel image, suitable
          for turning into a video.
        - ansi: Return a string (str) or StringIO.StringIO containing a
          terminal-style text representation. The text can include newlines
          and ANSI escape sequences (e.g. for colors).
        Note:
            Make sure that your class's metadata 'render.modes' key includes
              the list of supported modes. It's recommended to call super()
              in implementations to use the functionality of this method.
        Args:
            mode (str): the mode to render with
            close (bool): close all open renderings
        Example:
        class MyEnv(Env):
            metadata = {'render.modes': ['human', 'rgb_array']}
            def render(self, mode='human'):
                if mode == 'rgb_array':
                    return np.array(...) # return RGB frame suitable for video
                elif mode is 'human':
                    ... # pop up a window and render
                else:
                    super(MyEnv, self).render(mode=mode) # just raise an exception
        """
        raise NotImplementedError
    def close(self):
        """Override _close in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        return
    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).
        Note:
            Some environments use multiple pseudorandom number generators.
            We want to capture all such seeds used in order to ensure that
            there aren't accidental correlations between multiple generators.
        Returns:
            list<bigint>: Returns the list of seeds used in this env's random
              number generators. The first value in the list should be the
              "main" seed, or the value which a reproducer should pass to
              'seed'. Often, the main seed equals the provided 'seed', but
              this won't be true if seed=None, for example.
        """
        logger.warn("Could not seed environment %s", self)
        return
    @property
    def unwrapped(self):
        """Completely unwrap this env.
        Returns:
            gym.Env: The base non-wrapped gym.Env instance
        """
        return self
    def __str__(self):
        if self.spec is None:
            return '<{} instance>'.format(type(self).__name__)
        else:
            return '<{}<{}>>'.format(type(self).__name__, self.spec.id)
 class GoalEnv(Env):
    """A goal-based environment. It functions just as any regular OpenAI Gym environment but it
    imposes a required structure on the observation_space. More concretely, the observation
    space is required to contain at least three elements, namely `observation`, `desired_goal`, and
    `achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve.
    `achieved_goal` is the goal that it currently achieved instead. `observation` contains the
    actual observations of the environment as per usual.
    """
    def reset(self):
        # Enforce that each GoalEnv uses a Goal-compatible observation space.
        if not isinstance(self.observation_space, gym.spaces.Dict):
            raise error.Error('GoalEnv requires an observation space of type gym.spaces.Dict')
        result = super(GoalEnv, self).reset()
        for key in ['observation', 'achieved_goal', 'desired_goal']:
            if key not in result:
                raise error.Error('GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(key))
        return result
    def compute_reward(self, achieved_goal, desired_goal, info):
        """Compute the step reward. This externalizes the reward function and makes
        it dependent on an a desired goal and the one that was achieved. If you wish to include
        additional rewards that are independent of the goal, you can include the necessary values
        to derive it in info and compute it accordingly.
        Args:
            achieved_goal (object): the goal that was achieved during execution
            desired_goal (object): the desired goal that we asked the agent to attempt to achieve
            info (dict): an info dictionary with additional information
        Returns:
            float: The reward that corresponds to the provided achieved goal w.r.t. to the desired
            goal. Note that the following should always hold true:
                ob, reward, done, info = env.step()
                assert reward == env.compute_reward(ob['achieved_goal'], ob['goal'], info)
        """
        raise NotImplementedError()
 # Space-related abstractions
 class Space(object):
    """Defines the observation and action spaces, so you can write generic
    code that applies to any Env. For example, you can choose a random
    action.
    """
    def __init__(self, shape=None, dtype=None):
        import numpy as np # takes about 300-400ms to import, so we load lazily
        self.shape = None if shape is None else tuple(shape)
        self.dtype = None if dtype is None else np.dtype(dtype)
    def sample(self):
        """
        Uniformly randomly sample a random element of this space
        """
        raise NotImplementedError
    def contains(self, x):
        """
        Return boolean specifying if x is a valid
        member of this space
        """
        raise NotImplementedError
    __contains__ = contains
    def to_jsonable(self, sample_n):
        """Convert a batch of samples from this space to a JSONable data type."""
        # By default, assume identity is JSONable
        return sample_n
    def from_jsonable(self, sample_n):
        """Convert a JSONable data type to a batch of samples from this space."""
        # By default, assume identity is JSONable
        return sample_n
 warn_once = True
 def deprecated_warn_once(text):
    global warn_once
    if not warn_once: return
    warn_once = False
    logger.warn(text)
 class Wrapper(Env):
    env = None
    def __init__(self, env):
        self.env = env
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        self.reward_range = self.env.reward_range
        self.metadata = self.env.metadata
    @classmethod
    def class_name(cls):
        return cls.__name__
    def step(self, action):
        if hasattr(self, "_step"):
            deprecated_warn_once("%s doesn't implement 'step' method, but it implements deprecated '_step' method." % type(self))
            self.step = self._step
            return self.step(action)
        else:
            deprecated_warn_once("%s doesn't implement 'step' method, " % type(self) +
                "which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.")
            return self.env.step(action)
    def reset(self, **kwargs):
        if hasattr(self, "_reset"):
            deprecated_warn_once("%s doesn't implement 'reset' method, but it implements deprecated '_reset' method." % type(self))
            self.reset = self._reset
            return self._reset(**kwargs)
        else:
            deprecated_warn_once("%s doesn't implement 'reset' method, " % type(self) +
                "which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.")
            return self.env.reset(**kwargs)
    def render(self, mode='human', **kwargs):
        return self.env.render(mode, **kwargs)
    def close(self):
        if self.env:
            return self.env.close()
    def seed(self, seed=None):
        return self.env.seed(seed)
    def compute_reward(self, achieved_goal, desired_goal, info):
        return self.env.compute_reward(achieved_goal, desired_goal, info)
    def __str__(self):
        return '<{}{}>'.format(type(self).__name__, self.env)
    def __repr__(self):
        return str(self)
    @property
    def unwrapped(self):
        return self.env.unwrapped
    @property
    def spec(self):
        return self.env.spec
 class ObservationWrapper(Wrapper):
    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        return self.observation(observation), reward, done, info
    def reset(self, **kwargs):
        observation = self.env.reset(**kwargs)
        return self.observation(observation)
    def observation(self, observation):
        deprecated_warn_once("%s doesn't implement 'observation' method. Maybe it implements deprecated '_observation' method." % type(self))
        return self._observation(observation)
 class RewardWrapper(Wrapper):
    def reset(self):
        return self.env.reset()
    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        return observation, self.reward(reward), done, info
    def reward(self, reward):
        deprecated_warn_once("%s doesn't implement 'reward' method. Maybe it implements deprecated '_reward' method." % type(self))
        return self._reward(reward)
 class ActionWrapper(Wrapper):
    def step(self, action):
        action = self.action(action)
        return self.env.step(action)
    def reset(self):
        return self.env.reset()
    def action(self, action):
        deprecated_warn_once("%s doesn't implement 'action' method. Maybe it implements deprecated '_action' method." % type(self))
        return self._action(action)
    def reverse_action(self, action):
        deprecated_warn_once("%s doesn't implement 'reverse_action' method. Maybe it implements deprecated '_reverse_action' method." % type(self))
        return self._reverse_action(action)
--- a/src/gym/envs/README.md
+++ b/src/gym/envs/README.md
@@ -0,0 +1,113 @@
 # Envs
 These are the core integrated environments. Note that we may later
 restructure any of the files, but will keep the environments available
 at the relevant package's top-level. So for example, you should access
 `AntEnv` as follows:
 ```
 # Will be supported in future releases
 from gym.envs import mujoco
 mujoco.AntEnv
 ```
 Rather than:
 ```
 # May break in future releases
 from gym.envs.mujoco import ant
 ant.AntEnv
 ```
 ## How to create new environments for Gym
 * Create a new repo called gym-foo, which should also be a PIP package.
 * A good example is https://github.com/openai/gym-soccer.
 * It should have at least the following files:
  ```sh
  gym-foo/
    README.md
    setup.py
    gym_foo/
      __init__.py
      envs/
        __init__.py
        foo_env.py
        foo_extrahard_env.py
  ```
 * `gym-foo/setup.py` should have:
  ```python
  from setuptools import setup
  setup(name='gym_foo',
        version='0.0.1',
        install_requires=['gym']  # And any other dependencies foo needs
  )  
  ```
 * `gym-foo/gym_foo/__init__.py` should have:
  ```python
  from gym.envs.registration import register
  register(
      id='foo-v0',
      entry_point='gym_foo.envs:FooEnv',
  )
  register(
      id='foo-extrahard-v0',
      entry_point='gym_foo.envs:FooExtraHardEnv',
  )
  ```
 * `gym-foo/gym_foo/envs/__init__.py` should have:
  ```python
  from gym_foo.envs.foo_env import FooEnv
  from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv
  ```
 * `gym-foo/gym_foo/envs/foo_env.py` should look something like:
  ```python
  import gym
  from gym import error, spaces, utils
  from gym.utils import seeding
  class FooEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    def __init__(self):
      ...
    def step(self, action):
      ...
    def reset(self):
      ...
    def render(self, mode='human', close=False):
      ...
  ```
 ## How to add new environments to Gym, within this repo (not recommended for new environments)
 1. Write your environment in an existing collection or a new collection. All collections are subfolders of `/gym/envs'.
 2. Import your environment into the `__init__.py` file of the collection. This file will be located at `/gym/envs/my_collection/__init__.py`. Add `from gym.envs.my_collection.my_awesome_env import MyEnv` to this file.
 3. Register your env in `/gym/envs/__init__.py`:
 ```
 register(
 		id='MyEnv-v0',
 		entry_point='gym.envs.my_collection:MyEnv',
 )
 ```
 4. Add your environment to the scoreboard in `/gym/scoreboard/__init__.py`:
 ```
 add_task(
 		id='MyEnv-v0',
 		summary="Super cool environment",
 		group='my_collection',
 		contributor='mygithubhandle',
 )
 ```
--- a/src/gym/envs/init.py
+++ b/src/gym/envs/init.py
@@ -0,0 +1,540 @@
 from gym.envs.registration import registry, register, make, spec
 # Algorithmic
 # ----------------------------------------
 register(
  id='Copy-v0',
  entry_point='gym.envs.algorithmic:CopyEnv',
  max_episode_steps=200,
  reward_threshold=25.0,
 )
 register(
  id='RepeatCopy-v0',
  entry_point='gym.envs.algorithmic:RepeatCopyEnv',
  max_episode_steps=200,
  reward_threshold=75.0,
 )
 register(
  id='ReversedAddition-v0',
  entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
  kwargs={'rows': 2},
  max_episode_steps=200,
  reward_threshold=25.0,
 )
 register(
  id='ReversedAddition3-v0',
  entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
  kwargs={'rows': 3},
  max_episode_steps=200,
  reward_threshold=25.0,
 )
 register(
  id='DuplicatedInput-v0',
  entry_point='gym.envs.algorithmic:DuplicatedInputEnv',
  max_episode_steps=200,
  reward_threshold=9.0,
 )
 register(
  id='Reverse-v0',
  entry_point='gym.envs.algorithmic:ReverseEnv',
  max_episode_steps=200,
  reward_threshold=25.0,
 )
 # Classic
 # ----------------------------------------
 register(
  id='CartPole-v0',
  entry_point='gym.envs.classic_control:CartPoleEnv',
  max_episode_steps=200,
  reward_threshold=195.0,
 )
 register(
  id='CartPole-v1',
  entry_point='gym.envs.classic_control:CartPoleEnv',
  max_episode_steps=500,
  reward_threshold=475.0,
 )
 register(
  id='MountainCar-v0',
  entry_point='gym.envs.classic_control:MountainCarEnv',
  max_episode_steps=200,
  reward_threshold=-110.0,
 )
 register(
  id='MountainCarContinuous-v0',
  entry_point='gym.envs.classic_control:Continuous_MountainCarEnv',
  max_episode_steps=999,
  reward_threshold=90.0,
 )
 register(
  id='Pendulum-v0',
  entry_point='gym.envs.classic_control:PendulumEnv',
  max_episode_steps=200,
 )
 register(
  id='Acrobot-v1',
  entry_point='gym.envs.classic_control:AcrobotEnv',
  max_episode_steps=500,
 )
 # Box2d
 # ----------------------------------------
 register(
  id='LunarLander-v2',
  entry_point='gym.envs.box2d:LunarLander',
  max_episode_steps=1000,
  reward_threshold=200,
 )
 register(
  id='LunarLanderContinuous-v2',
  entry_point='gym.envs.box2d:LunarLanderContinuous',
  max_episode_steps=1000,
  reward_threshold=200,
 )
 register(
  id='BipedalWalker-v2',
  entry_point='gym.envs.box2d:BipedalWalker',
  max_episode_steps=1600,
  reward_threshold=300,
 )
 register(
  id='BipedalWalkerHardcore-v2',
  entry_point='gym.envs.box2d:BipedalWalkerHardcore',
  max_episode_steps=2000,
  reward_threshold=300,
 )
 register(
  id='CarRacing-v0',
  entry_point='gym.envs.box2d:CarRacing',
  max_episode_steps=1000,
  reward_threshold=900,
 )
 # Toy Text
 # ----------------------------------------
 register(
  id='Blackjack-v0',
  entry_point='gym.envs.toy_text:BlackjackEnv',
 )
 register(
  id='KellyCoinflip-v0',
  entry_point='gym.envs.toy_text:KellyCoinflipEnv',
  reward_threshold=246.61,
 )
 register(
  id='KellyCoinflipGeneralized-v0',
  entry_point='gym.envs.toy_text:KellyCoinflipGeneralizedEnv',
 )
 register(
  id='FrozenLake-v0',
  entry_point='gym.envs.toy_text:FrozenLakeEnv',
  kwargs={'map_name': '4x4'},
  max_episode_steps=100,
  reward_threshold=0.78,  # optimum = .8196
 )
 register(
  id='FrozenLake8x8-v0',
  entry_point='gym.envs.toy_text:FrozenLakeEnv',
  kwargs={'map_name': '8x8'},
  max_episode_steps=200,
  reward_threshold=0.99,  # optimum = 1
 )
 register(
  id='CliffWalking-v0',
  entry_point='gym.envs.toy_text:CliffWalkingEnv',
 )
 register(
  id='NChain-v0',
  entry_point='gym.envs.toy_text:NChainEnv',
  max_episode_steps=1000,
 )
 register(
  id='Roulette-v0',
  entry_point='gym.envs.toy_text:RouletteEnv',
  max_episode_steps=100,
 )
 register(
  id='Taxi-v2',
  entry_point='gym.envs.toy_text.taxi:TaxiEnv',
  reward_threshold=8,  # optimum = 8.46
  max_episode_steps=200,
 )
 register(
  id='GuessingGame-v0',
  entry_point='gym.envs.toy_text.guessing_game:GuessingGame',
  max_episode_steps=200,
 )
 register(
  id='HotterColder-v0',
  entry_point='gym.envs.toy_text.hotter_colder:HotterColder',
  max_episode_steps=200,
 )
 # Mujoco
 # ----------------------------------------
 # 2D
 register(
  id='Reacher-v2',
  entry_point='gym.envs.mujoco:ReacherEnv',
  max_episode_steps=50,
  reward_threshold=-3.75,
 )
 register(
  id='Pusher-v2',
  entry_point='gym.envs.mujoco:PusherEnv',
  max_episode_steps=100,
  reward_threshold=0.0,
 )
 register(
  id='Thrower-v2',
  entry_point='gym.envs.mujoco:ThrowerEnv',
  max_episode_steps=100,
  reward_threshold=0.0,
 )
 register(
  id='Striker-v2',
  entry_point='gym.envs.mujoco:StrikerEnv',
  max_episode_steps=100,
  reward_threshold=0.0,
 )
 register(
  id='InvertedPendulum-v2',
  entry_point='gym.envs.mujoco:InvertedPendulumEnv',
  max_episode_steps=1000,
  reward_threshold=950.0,
 )
 register(
  id='InvertedDoublePendulum-v2',
  entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
  max_episode_steps=1000,
  reward_threshold=9100.0,
 )
 register(
  id='HalfCheetah-v2',
  entry_point='gym.envs.mujoco:HalfCheetahEnv',
  max_episode_steps=1000,
  reward_threshold=4800.0,
 )
 register(
  id='Hopper-v2',
  entry_point='gym.envs.mujoco:HopperEnv',
  max_episode_steps=1000,
  reward_threshold=3800.0,
 )
 register(
  id='Swimmer-v2',
  entry_point='gym.envs.mujoco:SwimmerEnv',
  max_episode_steps=1000,
  reward_threshold=360.0,
 )
 register(
  id='Walker2d-v2',
  max_episode_steps=1000,
  entry_point='gym.envs.mujoco:Walker2dEnv',
 )
 register(
  id='Ant-v2',
  entry_point='gym.envs.mujoco:AntEnv',
  max_episode_steps=1000,
  reward_threshold=6000.0,
 )
 register(
  id='Humanoid-v2',
  entry_point='gym.envs.mujoco:HumanoidEnv',
  max_episode_steps=1000,
 )
 register(
  id='HumanoidStandup-v2',
  entry_point='gym.envs.mujoco:HumanoidStandupEnv',
  max_episode_steps=1000,
 )
 # Robotics
 # ----------------------------------------
 def _merge(a, b):
  a.update(b)
  return a
 for reward_type in ['sparse', 'dense']:
  suffix = 'Dense' if reward_type == 'dense' else ''
  kwargs = {
    'reward_type': reward_type,
  }
  # Fetch
  register(
    id='FetchSlide{}-v1'.format(suffix),
    entry_point='gym.envs.robotics:FetchSlideEnv',
    kwargs=kwargs,
    max_episode_steps=50,
  )
  register(
    id='FetchPickAndPlace{}-v1'.format(suffix),
    entry_point='gym.envs.robotics:FetchPickAndPlaceEnv',
    kwargs=kwargs,
    max_episode_steps=50,
  )
  register(
    id='FetchReach{}-v1'.format(suffix),
    entry_point='gym.envs.robotics:FetchReachEnv',
    kwargs=kwargs,
    max_episode_steps=50,
  )
  register(
    id='FetchPush{}-v1'.format(suffix),
    entry_point='gym.envs.robotics:FetchPushEnv',
    kwargs=kwargs,
    max_episode_steps=50,
  )
  # Hand
  register(
    id='HandReach{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandReachEnv',
    kwargs=kwargs,
    max_episode_steps=50,
  )
  register(
    id='HandManipulateBlockRotateZ{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandBlockEnv',
    kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'z'}, kwargs),
    max_episode_steps=100,
  )
  register(
    id='HandManipulateBlockRotateParallel{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandBlockEnv',
    kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'parallel'}, kwargs),
    max_episode_steps=100,
  )
  register(
    id='HandManipulateBlockRotateXYZ{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandBlockEnv',
    kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
    max_episode_steps=100,
  )
  register(
    id='HandManipulateBlockFull{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandBlockEnv',
    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
    max_episode_steps=100,
  )
  # Alias for "Full"
  register(
    id='HandManipulateBlock{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandBlockEnv',
    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
    max_episode_steps=100,
  )
  register(
    id='HandManipulateEggRotate{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandEggEnv',
    kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
    max_episode_steps=100,
  )
  register(
    id='HandManipulateEggFull{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandEggEnv',
    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
    max_episode_steps=100,
  )
  # Alias for "Full"
  register(
    id='HandManipulateEgg{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandEggEnv',
    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
    max_episode_steps=100,
  )
  register(
    id='HandManipulatePenRotate{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandPenEnv',
    kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
    max_episode_steps=100,
  )
  register(
    id='HandManipulatePenFull{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandPenEnv',
    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
    max_episode_steps=100,
  )
  # Alias for "Full"
  register(
    id='HandManipulatePen{}-v0'.format(suffix),
    entry_point='gym.envs.robotics:HandPenEnv',
    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
    max_episode_steps=100,
  )
 # Atari
 # ----------------------------------------
 # # print ', '.join(["'{}'".format(name.split('.')[0]) for name in atari_py.list_games()])
 for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
             'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
             'centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk',
             'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
             'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
             'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
             'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
             'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
             'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']:
  for obs_type in ['image', 'ram']:
    # space_invaders should yield SpaceInvaders-v0 and SpaceInvaders-ram-v0
    name = ''.join([g.capitalize() for g in game.split('_')])
    if obs_type == 'ram':
      name = '{}-ram'.format(name)
    nondeterministic = False
    if game == 'elevator_action' and obs_type == 'ram':
      # ElevatorAction-ram-v0 seems to yield slightly
      # non-deterministic observations about 10% of the time. We
      # should track this down eventually, but for now we just
      # mark it as nondeterministic.
      nondeterministic = True
    register(
      id='{}-v0'.format(name),
      entry_point='gym.envs.atari:AtariEnv',
      kwargs={'game': game, 'obs_type': obs_type, 'repeat_action_probability': 0.25},
      max_episode_steps=10000,
      nondeterministic=nondeterministic,
    )
    register(
      id='{}-v4'.format(name),
      entry_point='gym.envs.atari:AtariEnv',
      kwargs={'game': game, 'obs_type': obs_type},
      max_episode_steps=100000,
      nondeterministic=nondeterministic,
    )
    # Standard Deterministic (as in the original DeepMind paper)
    if game == 'space_invaders':
      frameskip = 3
    else:
      frameskip = 4
    # Use a deterministic frame skip.
    register(
      id='{}Deterministic-v0'.format(name),
      entry_point='gym.envs.atari:AtariEnv',
      kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip, 'repeat_action_probability': 0.25},
      max_episode_steps=100000,
      nondeterministic=nondeterministic,
    )
    register(
      id='{}Deterministic-v4'.format(name),
      entry_point='gym.envs.atari:AtariEnv',
      kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip},
      max_episode_steps=100000,
      nondeterministic=nondeterministic,
    )
    register(
      id='{}NoFrameskip-v0'.format(name),
      entry_point='gym.envs.atari:AtariEnv',
      kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1, 'repeat_action_probability': 0.25},
      # A frameskip of 1 means we get every frame
      max_episode_steps=frameskip * 100000,
      nondeterministic=nondeterministic,
    )
    # No frameskip. (Atari has no entropy source, so these are
    # deterministic environments.)
    register(
      id='{}NoFrameskip-v4'.format(name),
      entry_point='gym.envs.atari:AtariEnv',
      kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1},  # A frameskip of 1 means we get every frame
      max_episode_steps=frameskip * 100000,
      nondeterministic=nondeterministic,
    )
 # Unit test
 # ---------
 register(
  id='CubeCrash-v0',
  entry_point='gym.envs.unittest:CubeCrash',
  reward_threshold=0.9,
 )
 register(
  id='CubeCrashSparse-v0',
  entry_point='gym.envs.unittest:CubeCrashSparse',
  reward_threshold=0.9,
 )
 register(
  id='CubeCrashScreenBecomesBlack-v0',
  entry_point='gym.envs.unittest:CubeCrashScreenBecomesBlack',
  reward_threshold=0.9,
 )
 register(
  id='MemorizeDigits-v0',
  entry_point='gym.envs.unittest:MemorizeDigits',
  reward_threshold=20,
 )
 register(
  id='Pilesos-v0',
  entry_point='gym.envs.pilesos:PilesosEnv',
  max_episode_steps=5000,
 )
--- a/src/gym/envs/pycache/init.cpython-37.pyc
+++ b/src/gym/envs/pycache/init.cpython-37.pyc
--- a/src/gym/envs/pycache/registration.cpython-37.pyc
+++ b/src/gym/envs/pycache/registration.cpython-37.pyc
--- a/src/gym/envs/algorithmic/init.py
+++ b/src/gym/envs/algorithmic/init.py
@@ -0,0 +1,5 @@
 from gym.envs.algorithmic.copy_ import CopyEnv
 from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv
 from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv
 from gym.envs.algorithmic.reverse import ReverseEnv
 from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv
--- a/src/gym/envs/algorithmic/algorithmic_env.py
+++ b/src/gym/envs/algorithmic/algorithmic_env.py
@@ -0,0 +1,326 @@
 """
 Algorithmic environments have the following traits in common:
 - A 1-d "input tape" or 2-d "input grid" of characters
 - A target string which is a deterministic function of the input characters
 Agents control a read head that moves over the input tape. Observations consist
 of the single character currently under the read head. The read head may fall
 off the end of the tape in any direction. When this happens, agents will observe
 a special blank character (with index=env.base) until they get back in bounds.
 Actions consist of 3 sub-actions:
    - Direction to move the read head (left or right, plus up and down for 2-d envs)
    - Whether to write to the output tape
    - Which character to write (ignored if the above sub-action is 0)
 An episode ends when:
    - The agent writes the full target string to the output tape.
    - The agent writes an incorrect character.
    - The agent runs out the time limit. (Which is fairly conservative.)
 Reward schedule:
    write a correct character: +1
    write a wrong character: -.5
    run out the clock: -1
    otherwise: 0
 In the beginning, input strings will be fairly short. After an environment has
 been consistently solved over some window of episodes, the environment will 
 increase the average length of generated strings. Typical env specs require
 leveling up many times to reach their reward threshold.
 """
 from gym import Env, logger
 from gym.spaces import Discrete, Tuple
 from gym.utils import colorize, seeding
 import numpy as np
 from six import StringIO
 import sys
 import math
 class AlgorithmicEnv(Env):
    metadata = {'render.modes': ['human', 'ansi']}
    # Only 'promote' the length of generated input strings if the worst of the 
    # last n episodes was no more than this far from the maximum reward
    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -1.0
    def __init__(self, base=10, chars=False, starting_min_length=2):
        """
        base: Number of distinct characters. 
        chars: If True, use uppercase alphabet. Otherwise, digits. Only affects
               rendering.
        starting_min_length: Minimum input string length. Ramps up as episodes 
                             are consistently solved.
        """
        self.base = base
        # Keep track of this many past episodes
        self.last = 10
        # Cumulative reward earned this episode
        self.episode_total_reward = None
        # Running tally of reward shortfalls. e.g. if there were 10 points to earn and
        # we got 8, we'd append -2
        AlgorithmicEnv.reward_shortfalls = []
        if chars:
            self.charmap = [chr(ord('A')+i) for i in range(base)]
        else:
            self.charmap = [str(i) for i in range(base)]
        self.charmap.append(' ')
        # TODO: Not clear why this is a class variable rather than instance. 
        # Could lead to some spooky action at a distance if someone is working
        # with multiple algorithmic envs at once. Also makes testing tricky.
        AlgorithmicEnv.min_length = starting_min_length
        # Three sub-actions:
        #       1. Move read head left or write (or up/down)
        #       2. Write or not
        #       3. Which character to write. (Ignored if should_write=0)
        self.action_space = Tuple(
            [Discrete(len(self.MOVEMENTS)), Discrete(2), Discrete(self.base)]
        )
        # Can see just what is on the input tape (one of n characters, or nothing)
        self.observation_space = Discrete(self.base + 1)
        self.seed()
        self.reset()
    @classmethod
    def _movement_idx(kls, movement_name):
        return kls.MOVEMENTS.index(movement_name)
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def _get_obs(self, pos=None):
        """Return an observation corresponding to the given read head position
        (or the current read head position, if none is given)."""
        raise NotImplemented
    def _get_str_obs(self, pos=None):
        ret = self._get_obs(pos)
        return self.charmap[ret]
    def _get_str_target(self, pos):
        """Return the ith character of the target string (or " " if index
        out of bounds)."""
        if pos < 0 or len(self.target) <= pos:
            return " "
        else:
            return self.charmap[self.target[pos]]
    def render_observation(self):
        """Return a string representation of the input tape/grid."""
        raise NotImplementedError
    def render(self, mode='human'):
        outfile = StringIO() if mode == 'ansi' else sys.stdout
        inp = "Total length of input instance: %d, step: %d\n" % (self.input_width, self.time)
        outfile.write(inp)
        x, y, action = self.read_head_position, self.write_head_position, self.last_action
        if action is not None:
            inp_act, out_act, pred = action
        outfile.write("=" * (len(inp) - 1) + "\n")
        y_str =      "Output Tape         : "
        target_str = "Targets             : "
        if action is not None:
            pred_str = self.charmap[pred]
        x_str = self.render_observation()
        for i in range(-2, len(self.target) + 2):
            target_str += self._get_str_target(i)
            if i < y - 1:
                y_str += self._get_str_target(i)
            elif i == (y - 1):
                if action is not None and out_act == 1:
                    color = 'green' if pred == self.target[i] else 'red'
                    y_str += colorize(pred_str, color, highlight=True)
                else:
                    y_str += self._get_str_target(i)
        outfile.write(x_str)
        outfile.write(y_str + "\n")
        outfile.write(target_str + "\n\n")
        if action is not None:
            outfile.write("Current reward      :   %.3f\n" % self.last_reward)
            outfile.write("Cumulative reward   :   %.3f\n" % self.episode_total_reward)
            move = self.MOVEMENTS[inp_act]
            outfile.write("Action              :   Tuple(move over input: %s,\n" % move)
            out_act = out_act == 1
            outfile.write("                              write to the output tape: %s,\n" % out_act)
            outfile.write("                              prediction: %s)\n" % pred_str)
        else:
            outfile.write("\n" * 5)
        return outfile
    @property
    def input_width(self):
        return len(self.input_data)
    def step(self, action):
        assert self.action_space.contains(action)
        self.last_action = action
        inp_act, out_act, pred = action
        done = False
        reward = 0.0
        self.time += 1
        assert 0 <= self.write_head_position
        if out_act == 1:
            try:
                correct = pred == self.target[self.write_head_position]
            except IndexError:
                logger.warn("It looks like you're calling step() even though this "+
                    "environment has already returned done=True. You should always call "+
                    "reset() once you receive done=True. Any further steps are undefined "+
                    "behaviour.")
                correct = False
            if correct:
                reward = 1.0
            else:
                # Bail as soon as a wrong character is written to the tape
                reward = -0.5
                done = True
            self.write_head_position += 1
            if self.write_head_position >= len(self.target):
                done = True
        self._move(inp_act)
        if self.time > self.time_limit:
            reward = -1.0
            done = True
        obs = self._get_obs()
        self.last_reward = reward
        self.episode_total_reward += reward
        return (obs, reward, done, {})
    @property
    def time_limit(self):
        """If an agent takes more than this many timesteps, end the episode
        immediately and return a negative reward."""
        # (Seemingly arbitrary)
        return self.input_width + len(self.target) + 4
    def _check_levelup(self):
        """Called between episodes. Update our running record of episode rewards 
        and, if appropriate, 'level up' minimum input length."""
        if self.episode_total_reward is None:
            # This is before the first episode/call to reset(). Nothing to do
            return
        AlgorithmicEnv.reward_shortfalls.append(self.episode_total_reward - len(self.target))
        AlgorithmicEnv.reward_shortfalls = AlgorithmicEnv.reward_shortfalls[-self.last:]
        if len(AlgorithmicEnv.reward_shortfalls) == self.last and \
          min(AlgorithmicEnv.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION and \
          AlgorithmicEnv.min_length < 30:
            AlgorithmicEnv.min_length += 1
            AlgorithmicEnv.reward_shortfalls = []
    def reset(self):
        self._check_levelup()
        self.last_action = None
        self.last_reward = 0
        self.read_head_position = self.READ_HEAD_START
        self.write_head_position = 0
        self.episode_total_reward = 0.0
        self.time = 0
        length = self.np_random.randint(3) + AlgorithmicEnv.min_length
        self.input_data = self.generate_input_data(length)
        self.target = self.target_from_input_data(self.input_data)
        return self._get_obs()
    def generate_input_data(self, size):
        raise NotImplemented
    def target_from_input_data(self, input_data):
        raise NotImplemented("Subclasses must implement")
    def _move(self, movement):
        raise NotImplemented
 class TapeAlgorithmicEnv(AlgorithmicEnv):
    """An algorithmic env with a 1-d input tape."""
    MOVEMENTS = ['left', 'right']
    READ_HEAD_START = 0
    def _move(self, movement):
        named = self.MOVEMENTS[movement]
        self.read_head_position += 1 if named == 'right' else -1
    def _get_obs(self, pos=None):
        if pos is None:
            pos = self.read_head_position
        if pos < 0:
            return self.base
        if isinstance(pos, np.ndarray):
            pos = pos.item()
        try:
            return self.input_data[pos]
        except IndexError:
            return self.base
    def generate_input_data(self, size):
        return [self.np_random.randint(self.base) for _ in range(size)]
    def render_observation(self):
        x = self.read_head_position
        x_str =      "Observation Tape    : "
        for i in range(-2, self.input_width + 2):
            if i == x:
                x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True)
            else:
                x_str += self._get_str_obs(np.array([i]))
        x_str += "\n"
        return x_str
 class GridAlgorithmicEnv(AlgorithmicEnv):
    """An algorithmic env with a 2-d input grid."""
    MOVEMENTS = ['left', 'right', 'up', 'down']
    READ_HEAD_START = (0, 0)
    def __init__(self, rows, *args, **kwargs):
        self.rows = rows
        AlgorithmicEnv.__init__(self, *args, **kwargs)
    def _move(self, movement):
        named = self.MOVEMENTS[movement]
        x, y = self.read_head_position
        if named == 'left':
            x -= 1
        elif named == 'right':
            x += 1
        elif named == 'up':
            y -= 1
        elif named == 'down':
            y += 1
        else:
            raise ValueError("Unrecognized direction: {}".format(named))
        self.read_head_position = x, y
    def generate_input_data(self, size):
        return [
            [self.np_random.randint(self.base) for _ in range(self.rows)]
            for __ in range(size)
        ]
    def _get_obs(self, pos=None):
        if pos is None:
            pos = self.read_head_position
        x, y = pos
        if any(idx < 0 for idx in pos):
            return self.base
        try:
            return self.input_data[x][y]
        except IndexError:
            return self.base
    def render_observation(self):
        x = self.read_head_position
        label =      "Observation Grid    : "
        x_str = ""
        for j in range(-1, self.rows+1):
            if j != -1:
                x_str += " " * len(label)
            for i in range(-2, self.input_width + 2):
                if i == x[0] and j == x[1]:
                    x_str += colorize(self._get_str_obs((i, j)), 'green', highlight=True)
                else:
                    x_str += self._get_str_obs((i, j))
            x_str += "\n"
        x_str = label + x_str
        return x_str
--- a/src/gym/envs/algorithmic/copy_.py
+++ b/src/gym/envs/algorithmic/copy_.py
@@ -0,0 +1,13 @@
 """
 Task is to copy content from the input tape to
 the output tape. http://arxiv.org/abs/1511.07275
 """
 from gym.envs.algorithmic import algorithmic_env
 class CopyEnv(algorithmic_env.TapeAlgorithmicEnv):
    def __init__(self, base=5, chars=True):
        super(CopyEnv, self).__init__(base=base, chars=chars)
    def target_from_input_data(self, input_data):
        return input_data
--- a/src/gym/envs/algorithmic/duplicated_input.py
+++ b/src/gym/envs/algorithmic/duplicated_input.py
@@ -0,0 +1,24 @@
 """
 Task is to return every nth character from the input tape.
 http://arxiv.org/abs/1511.07275
 """
 from __future__ import division
 from gym.envs.algorithmic import algorithmic_env
 class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
    def __init__(self, duplication=2, base=5):
        self.duplication = duplication
        super(DuplicatedInputEnv, self).__init__(base=base, chars=True)
    def generate_input_data(self, size):
        res = []
        if size < self.duplication:
            size = self.duplication
        for i in range(size//self.duplication):
            char = self.np_random.randint(self.base)
            for _ in range(self.duplication):
                res.append(char)
        return res
    def target_from_input_data(self, input_data):
        return [input_data[i] for i in range(0, len(input_data), self.duplication)]
--- a/src/gym/envs/algorithmic/repeat_copy.py
+++ b/src/gym/envs/algorithmic/repeat_copy.py
@@ -0,0 +1,15 @@
 """
 Task is to copy content multiple times from the input tape to
 the output tape. http://arxiv.org/abs/1511.07275
 """
 from gym.envs.algorithmic import algorithmic_env
 class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
    def __init__(self, base=5):
        super(RepeatCopyEnv, self).__init__(base=base, chars=True)
        self.last = 50
    def target_from_input_data(self, input_data):
        return input_data + list(reversed(input_data)) + input_data
--- a/src/gym/envs/algorithmic/reverse.py
+++ b/src/gym/envs/algorithmic/reverse.py
@@ -0,0 +1,15 @@
 """
 Task is to reverse content over the input tape.
 http://arxiv.org/abs/1511.07275
 """
 from gym.envs.algorithmic import algorithmic_env
 class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
    def __init__(self, base=2):
        super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1)
        self.last = 50
    def target_from_input_data(self, input_str):
        return list(reversed(input_str))
--- a/src/gym/envs/algorithmic/reversed_addition.py
+++ b/src/gym/envs/algorithmic/reversed_addition.py
@@ -0,0 +1,30 @@
 from __future__ import division
 import numpy as np
 from gym.envs.algorithmic import algorithmic_env
 class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
    def __init__(self, rows=2, base=3):
        super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)
    def target_from_input_data(self, input_strings):
        curry = 0
        target = []
        for digits in input_strings:
            total = sum(digits) + curry
            target.append(total % self.base)
            curry = total // self.base
        if curry > 0:
            target.append(curry)
        return target
    @property
    def time_limit(self):
        # Quirk preserved for the sake of consistency: add the length of the input
        # rather than the length of the desired output (which may differ if there's
        # an extra carried digit).
        # TODO: It seems like this time limit is so strict as to make Addition3-v0
        # unsolvable, since agents aren't even given enough time steps to look at
        # all the digits. (The solutions on the scoreboard seem to only work by
        # save-scumming.)
        return self.input_width*2 + 4
--- a/src/gym/envs/algorithmic/tests/init.py
+++ b/src/gym/envs/algorithmic/tests/init.py
--- a/src/gym/envs/algorithmic/tests/test_algorithmic.py
+++ b/src/gym/envs/algorithmic/tests/test_algorithmic.py
@@ -0,0 +1,239 @@
 from gym.envs import algorithmic as alg
 import unittest
 # All concrete subclasses of AlgorithmicEnv
 ALL_ENVS = [
    alg.copy_.CopyEnv, 
    alg.duplicated_input.DuplicatedInputEnv,
    alg.repeat_copy.RepeatCopyEnv,
    alg.reverse.ReverseEnv,
    alg.reversed_addition.ReversedAdditionEnv,
 ]
 ALL_TAPE_ENVS = [env for env in ALL_ENVS 
    if issubclass(env, alg.algorithmic_env.TapeAlgorithmicEnv)]
 ALL_GRID_ENVS = [env for env in ALL_ENVS 
    if issubclass(env, alg.algorithmic_env.GridAlgorithmicEnv)]
 def imprint(env, input_arr):
    """Monkey-patch the given environment so that when reset() is called, the
    input tape/grid will be set to the given data, rather than being randomly
    generated."""
    env.generate_input_data = lambda _: input_arr
 class TestAlgorithmicEnvInteractions(unittest.TestCase):
    """Test some generic behaviour not specific to any particular algorithmic
    environment. Movement, allocation of rewards, etc."""
    CANNED_INPUT = [0, 1]
    ENV_KLS = alg.copy_.CopyEnv
    LEFT, RIGHT = ENV_KLS._movement_idx('left'), ENV_KLS._movement_idx('right')
    def setUp(self):
        self.env = self.ENV_KLS(base=2, chars=True)
        imprint(self.env, self.CANNED_INPUT)
    def test_successful_interaction(self):
        obs = self.env.reset()
        self.assertEqual(obs, 0)
        obs, reward, done, _ = self.env.step([self.RIGHT, 1, 0])
        self.assertEqual(obs, 1)
        self.assertGreater(reward, 0)
        self.assertFalse(done)
        obs, reward, done, _ = self.env.step([self.LEFT, 1, 1])
        self.assertTrue(done)
        self.assertGreater(reward, 0)
    def test_bad_output_fail_fast(self):
        obs = self.env.reset()
        obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
        self.assertTrue(done)
        self.assertLess(reward, 0)
    def test_levelup(self):
        obs = self.env.reset()
        # Kind of a hack
        alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls = []
        min_length = self.env.min_length
        for i in range(self.env.last):
            obs, reward, done, _ = self.env.step([self.RIGHT, 1, 0])
            self.assertFalse(done)
            obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
            self.assertTrue(done)
            self.env.reset()
            if i < self.env.last-1:
                self.assertEqual(len(alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls), i+1)
            else:
                # Should have leveled up on the last iteration
                self.assertEqual(self.env.min_length, min_length+1)
                self.assertEqual(len(alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls), 0)
    def test_walk_off_the_end(self):
        obs = self.env.reset()
        # Walk off the end
        obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
        self.assertEqual(obs, self.env.base)
        self.assertEqual(r, 0)
        self.assertFalse(done)
        # Walk further off track
        obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
        self.assertEqual(obs, self.env.base)
        self.assertFalse(done)
        # Return to the first input character
        obs, r, done, _ = self.env.step([self.RIGHT, 0, 0])
        self.assertEqual(obs, self.env.base)
        self.assertFalse(done)
        obs, r, done, _ = self.env.step([self.RIGHT, 0, 0])
        self.assertEqual(obs, 0)
    def test_grid_naviation(self):
        env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=6)
        N,S,E,W = [env._movement_idx(named_dir) for named_dir in ['up', 'down', 'right', 'left']]
        # Corresponds to a grid that looks like...
        #       0 1 2
        #       3 4 5
        canned = [ [0, 3], [1, 4], [2, 5] ]
        imprint(env, canned)
        obs = env.reset()
        self.assertEqual(obs, 0)
        navigation = [
          (S, 3), (N, 0), (E, 1), (S, 4), (S, 6), (E, 6), (N, 5), (N, 2), (W, 1)
        ]
        for (movement, expected_obs) in navigation:
            obs, reward, done, _ = env.step([movement, 0, 0])
            self.assertEqual(reward, 0)
            self.assertFalse(done)
            self.assertEqual(obs, expected_obs)
    def test_grid_success(self):
        env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=3)
        canned = [ [1, 2], [1, 0], [2, 2] ]
        imprint(env, canned)
        obs = env.reset()
        target = [0, 2, 1, 1]
        self.assertEqual(env.target, target)
        self.assertEqual(obs, 1)
        for i, target_digit in enumerate(target):
            obs, reward, done, _ = env.step([0, 1, target_digit])
            self.assertGreater(reward, 0)
            self.assertEqual(done, i==len(target)-1)
    def test_sane_time_limit(self):
        obs = self.env.reset()
        self.assertLess(self.env.time_limit, 100)
        for _ in range(100):
            obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
            if done:
                return
        self.fail("Time limit wasn't enforced")
    def test_rendering(self):
        env = self.env
        obs = env.reset()
        self.assertEqual(env._get_str_obs(), 'A')
        self.assertEqual(env._get_str_obs(1), 'B')
        self.assertEqual(env._get_str_obs(-1), ' ')
        self.assertEqual(env._get_str_obs(2), ' ')
        self.assertEqual(env._get_str_target(0), 'A')
        self.assertEqual(env._get_str_target(1), 'B')
        # Test numerical alphabet rendering
        env = self.ENV_KLS(base=3, chars=False)
        imprint(env, self.CANNED_INPUT)
        env.reset()
        self.assertEqual(env._get_str_obs(), '0')
        self.assertEqual(env._get_str_obs(1), '1')
 class TestTargets(unittest.TestCase):
    """Test the rules mapping input strings/grids to target outputs."""
    def test_reverse_target(self):
        input_expected = [
            ([0], [0]),
            ([0, 1], [1, 0]),
            ([1, 1], [1, 1]),
            ([1, 0, 1], [1, 0, 1]),
            ([0, 0, 1, 1], [1, 1, 0, 0]),
        ]
        env = alg.reverse.ReverseEnv()
        for input_arr, expected in input_expected:
            target = env.target_from_input_data(input_arr)
            self.assertEqual(target, expected)
    def test_reversed_addition_target(self):
        env = alg.reversed_addition.ReversedAdditionEnv(base=3)
        input_expected = [
            ([[1,1], [1,1]], [2, 2]),
            ([[2,2], [0,1]], [1, 2]),
            ([[2,1], [1,1], [1,1], [1,0]], [0, 0, 0, 2]),
        ]
        for (input_grid, expected_target) in input_expected:
            self.assertEqual(env.target_from_input_data(input_grid), expected_target)
    def test_reversed_addition_3rows(self):
        env = alg.reversed_addition.ReversedAdditionEnv(base=3, rows=3)
        input_expected = [
            ([[1,1,0],[0,1,1]], [2, 2]),
            ([[1,1,2],[0,1,1]], [1,0,1]),
        ]
        for (input_grid, expected_target) in input_expected:
            self.assertEqual(env.target_from_input_data(input_grid), expected_target)
    def test_copy_target(self):
        env = alg.copy_.CopyEnv()
        self.assertEqual(env.target_from_input_data([0, 1, 2]), [0, 1, 2])
    def test_duplicated_input_target(self):
        env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
        self.assertEqual(env.target_from_input_data([0, 0, 0, 0, 1, 1]), [0, 0, 1])
    def test_repeat_copy_target(self):
        env = alg.repeat_copy.RepeatCopyEnv()
        self.assertEqual(env.target_from_input_data([0, 1, 2]), [0, 1, 2, 2, 1, 0, 0, 1, 2])
 class TestInputGeneration(unittest.TestCase):
    """Test random input generation.
    """
    def test_tape_inputs(self):
        for env_kls in ALL_TAPE_ENVS:
            env = env_kls()
            for size in range(2,5):
                input_tape = env.generate_input_data(size)
                self.assertTrue(all(0<=x<=env.base for x in input_tape),
                "Invalid input tape from env {}: {}".format(env_kls, input_tape))
                # DuplicatedInput needs to generate inputs with even length,
                # so it may be short one
                self.assertLessEqual(len(input_tape), size)
    def test_grid_inputs(self):
        for env_kls in ALL_GRID_ENVS:
            env = env_kls()
            for size in range(2, 5):
                input_grid = env.generate_input_data(size)
                # Should get "size" sublists, each of length self.rows (not the
                # opposite, as you might expect)
                self.assertEqual(len(input_grid), size)
                self.assertTrue(all(len(col) == env.rows for col in input_grid))
                self.assertTrue(all(0<=x<=env.base for x in input_grid[0]))
    def test_duplicatedinput_inputs(self):
        """The duplicated_input env needs to generate strings with the appropriate
        amount of repetiion."""
        env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
        input_tape = env.generate_input_data(4)
        self.assertEqual(len(input_tape), 4)
        self.assertEqual(input_tape[0], input_tape[1])
        self.assertEqual(input_tape[2], input_tape[3])
        # If requested input size isn't a multiple of duplication, go lower
        input_tape = env.generate_input_data(3)
        self.assertEqual(len(input_tape), 2)
        self.assertEqual(input_tape[0], input_tape[1])
        # If requested input size is *less than* duplication, go up
        input_tape = env.generate_input_data(1)
        self.assertEqual(len(input_tape), 2)
        self.assertEqual(input_tape[0], input_tape[1])
        env = alg.duplicated_input.DuplicatedInputEnv(duplication=3)
        input_tape = env.generate_input_data(6)
        self.assertEqual(len(input_tape), 6)
        self.assertEqual(input_tape[0], input_tape[1])
        self.assertEqual(input_tape[1], input_tape[2])
 if __name__ == '__main__':
    unittest.main()
--- a/src/gym/envs/atari/init.py
+++ b/src/gym/envs/atari/init.py
@@ -0,0 +1 @@
 from gym.envs.atari.atari_env import AtariEnv
--- a/src/gym/envs/atari/atari_env.py
+++ b/src/gym/envs/atari/atari_env.py
@@ -0,0 +1,192 @@
 import numpy as np
 import os
 import gym
 from gym import error, spaces
 from gym import utils
 from gym.utils import seeding
 try:
    import atari_py
 except ImportError as e:
    raise error.DependencyNotInstalled("{}. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)".format(e))
 def to_ram(ale):
    ram_size = ale.getRAMSize()
    ram = np.zeros((ram_size),dtype=np.uint8)
    ale.getRAM(ram)
    return ram
 class AtariEnv(gym.Env, utils.EzPickle):
    metadata = {'render.modes': ['human', 'rgb_array']}
    def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""
        utils.EzPickle.__init__(self, game, obs_type, frameskip, repeat_action_probability)
        assert obs_type in ('ram', 'image')
        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None
        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)
        self.seed()
        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        (screen_width,screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        # Empirically, we need to seed before loading the ROM.
        self.ale.setInt(b'random_seed', seed2)
        self.ale.loadROM(self.game_path)
        return [seed1, seed2]
    def step(self, a):
        reward = 0.0
        action = self._action_set[a]
        if isinstance(self.frameskip, int):
            num_steps = self.frameskip
        else:
            num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])
        for _ in range(num_steps):
            reward += self.ale.act(action)
        ob = self._get_obs()
        return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}
    def _get_image(self):
        return self.ale.getScreenRGB2()
    def _get_ram(self):
        return to_ram(self.ale)
    @property
    def _n_actions(self):
        return len(self._action_set)
    def _get_obs(self):
        if self._obs_type == 'ram':
            return self._get_ram()
        elif self._obs_type == 'image':
            img = self._get_image()
        return img
    # return: (states, observations)
    def reset(self):
        self.ale.reset_game()
        return self._get_obs()
    def render(self, mode='human'):
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)
            return self.viewer.isopen
    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self._action_set]
    def get_keys_to_action(self):
        KEYWORD_TO_KEY = {
            'UP':      ord('w'),
            'DOWN':    ord('s'),
            'LEFT':    ord('a'),
            'RIGHT':   ord('d'),
            'FIRE':    ord(' '),
        }
        keys_to_action = {}
        for action_id, action_meaning in enumerate(self.get_action_meanings()):
            keys = []
            for keyword, key in KEYWORD_TO_KEY.items():
                if keyword in action_meaning:
                    keys.append(key)
            keys = tuple(sorted(keys))
            assert keys not in keys_to_action
            keys_to_action[keys] = action_id
        return keys_to_action
    def clone_state(self):
        """Clone emulator state w/o system state. Restoring this state will
        *not* give an identical environment. For complete cloning and restoring
        of the full state, see `{clone,restore}_full_state()`."""
        state_ref = self.ale.cloneState()
        state = self.ale.encodeState(state_ref)
        self.ale.deleteState(state_ref)
        return state
    def restore_state(self, state):
        """Restore emulator state w/o system state."""
        state_ref = self.ale.decodeState(state)
        self.ale.restoreState(state_ref)
        self.ale.deleteState(state_ref)
    def clone_full_state(self):
        """Clone emulator state w/ system state including pseudorandomness.
        Restoring this state will give an identical environment."""
        state_ref = self.ale.cloneSystemState()
        state = self.ale.encodeState(state_ref)
        self.ale.deleteState(state_ref)
        return state
    def restore_full_state(self, state):
        """Restore emulator state w/ system state including pseudorandomness."""
        state_ref = self.ale.decodeState(state)
        self.ale.restoreSystemState(state_ref)
        self.ale.deleteState(state_ref)
 ACTION_MEANING = {
    0 : "NOOP",
    1 : "FIRE",
    2 : "UP",
    3 : "RIGHT",
    4 : "LEFT",
    5 : "DOWN",
    6 : "UPRIGHT",
    7 : "UPLEFT",
    8 : "DOWNRIGHT",
    9 : "DOWNLEFT",
    10 : "UPFIRE",
    11 : "RIGHTFIRE",
    12 : "LEFTFIRE",
    13 : "DOWNFIRE",
    14 : "UPRIGHTFIRE",
    15 : "UPLEFTFIRE",
    16 : "DOWNRIGHTFIRE",
    17 : "DOWNLEFTFIRE",
 }
--- a/src/gym/envs/box2d/init.py
+++ b/src/gym/envs/box2d/init.py
@@ -0,0 +1,4 @@
 from gym.envs.box2d.lunar_lander import LunarLander
 from gym.envs.box2d.lunar_lander import LunarLanderContinuous
 from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
 from gym.envs.box2d.car_racing import CarRacing
--- a/src/gym/envs/box2d/bipedal_walker.py
+++ b/src/gym/envs/box2d/bipedal_walker.py
@@ -0,0 +1,581 @@
 import sys, math
 import numpy as np
 import Box2D
 from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
 import gym
 from gym import spaces
 from gym.utils import colorize, seeding, EzPickle
 # This is simple 4-joints walker robot environment.
 #
 # There are two versions:
 #
 # - Normal, with slightly uneven terrain.
 #
 # - Hardcore with ladders, stumps, pitfalls.
 #
 # Reward is given for moving forward, total 300+ points up to the far end. If the robot falls,
 # it gets -100. Applying motor torque costs a small amount of points, more optimal agent
 # will get better score.
 #
 # Heuristic is provided for testing, it's also useful to get demonstrations to
 # learn from. To run heuristic:
 #
 # python gym/envs/box2d/bipedal_walker.py
 #
 # State consists of hull angle speed, angular velocity, horizontal speed, vertical speed,
 # position of joints and joints angular speed, legs contact with ground, and 10 lidar
 # rangefinder measurements to help to deal with the hardcore version. There's no coordinates
 # in the state vector. Lidar is less useful in normal version, but it works.
 #
 # To solve the game you need to get 300 points in 1600 time steps.
 #
 # To solve hardcore version you need 300 points in 2000 time steps.
 #
 # Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 FPS    = 50
 SCALE  = 30.0   # affects how fast-paced the game is, forces should be adjusted as well
 MOTORS_TORQUE = 80
 SPEED_HIP     = 4
 SPEED_KNEE    = 6
 LIDAR_RANGE   = 160/SCALE
 INITIAL_RANDOM = 5
 HULL_POLY =[
    (-30,+9), (+6,+9), (+34,+1),
    (+34,-8), (-30,-8)
    ]
 LEG_DOWN = -8/SCALE
 LEG_W, LEG_H = 8/SCALE, 34/SCALE
 VIEWPORT_W = 600
 VIEWPORT_H = 400
 TERRAIN_STEP   = 14/SCALE
 TERRAIN_LENGTH = 200     # in steps
 TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4
 TERRAIN_GRASS    = 10    # low long are grass spots, in steps
 TERRAIN_STARTPAD = 20    # in steps
 FRICTION = 2.5
 HULL_FD = fixtureDef(
                shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]),
                density=5.0,
                friction=0.1,
                categoryBits=0x0020,
                maskBits=0x001,  # collide only with ground
                restitution=0.0) # 0.99 bouncy
 LEG_FD = fixtureDef(
                    shape=polygonShape(box=(LEG_W/2, LEG_H/2)),
                    density=1.0,
                    restitution=0.0,
                    categoryBits=0x0020,
                    maskBits=0x001)
 LOWER_FD = fixtureDef(
                    shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)),
                    density=1.0,
                    restitution=0.0,
                    categoryBits=0x0020,
                    maskBits=0x001)
 class ContactDetector(contactListener):
    def __init__(self, env):
        contactListener.__init__(self)
        self.env = env
    def BeginContact(self, contact):
        if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body:
            self.env.game_over = True
        for leg in [self.env.legs[1], self.env.legs[3]]:
            if leg in [contact.fixtureA.body, contact.fixtureB.body]:
                leg.ground_contact = True
    def EndContact(self, contact):
        for leg in [self.env.legs[1], self.env.legs[3]]:
            if leg in [contact.fixtureA.body, contact.fixtureB.body]:
                leg.ground_contact = False
 class BipedalWalker(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : FPS
    }
    hardcore = False
    def __init__(self):
        EzPickle.__init__(self)
        self.seed()
        self.viewer = None
        self.world = Box2D.b2World()
        self.terrain = None
        self.hull = None
        self.prev_shaping = None
        self.fd_polygon = fixtureDef(
                        shape = polygonShape(vertices=
                        [(0, 0),
                         (1, 0),
                         (1, -1),
                         (0, -1)]),
                        friction = FRICTION)
        self.fd_edge = fixtureDef(
                    shape = edgeShape(vertices=
                    [(0, 0),
                     (1, 1)]),
                    friction = FRICTION,
                    categoryBits=0x0001,
                )
        self.reset()
        high = np.array([np.inf]*24)
        self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
        self.observation_space = spaces.Box(-high, high)
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def _destroy(self):
        if not self.terrain: return
        self.world.contactListener = None
        for t in self.terrain:
            self.world.DestroyBody(t)
        self.terrain = []
        self.world.DestroyBody(self.hull)
        self.hull = None
        for leg in self.legs:
            self.world.DestroyBody(leg)
        self.legs = []
        self.joints = []
    def _generate_terrain(self, hardcore):
        GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
        state    = GRASS
        velocity = 0.0
        y        = TERRAIN_HEIGHT
        counter  = TERRAIN_STARTPAD
        oneshot  = False
        self.terrain   = []
        self.terrain_x = []
        self.terrain_y = []
        for i in range(TERRAIN_LENGTH):
            x = i*TERRAIN_STEP
            self.terrain_x.append(x)
            if state==GRASS and not oneshot:
                velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
                if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1)/SCALE   #1
                y += velocity
            elif state==PIT and oneshot:
                counter = self.np_random.randint(3, 5)
                poly = [
                    (x,              y),
                    (x+TERRAIN_STEP, y),
                    (x+TERRAIN_STEP, y-4*TERRAIN_STEP),
                    (x,              y-4*TERRAIN_STEP),
                    ]
                self.fd_polygon.shape.vertices=poly
                t = self.world.CreateStaticBody(
                    fixtures = self.fd_polygon)
                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
                self.terrain.append(t)
                self.fd_polygon.shape.vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]
                t = self.world.CreateStaticBody(
                    fixtures = self.fd_polygon)
                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
                self.terrain.append(t)
                counter += 2
                original_y = y
            elif state==PIT and not oneshot:
                y = original_y
                if counter > 1:
                    y -= 4*TERRAIN_STEP
            elif state==STUMP and oneshot:
                counter = self.np_random.randint(1, 3)
                poly = [
                    (x,                      y),
                    (x+counter*TERRAIN_STEP, y),
                    (x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP),
                    (x,                      y+counter*TERRAIN_STEP),
                    ]
                self.fd_polygon.shape.vertices=poly
                t = self.world.CreateStaticBody(
                    fixtures = self.fd_polygon)
                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
                self.terrain.append(t)
            elif state==STAIRS and oneshot:
                stair_height = +1 if self.np_random.rand() > 0.5 else -1
                stair_width = self.np_random.randint(4, 5)
                stair_steps = self.np_random.randint(3, 5)
                original_y = y
                for s in range(stair_steps):
                    poly = [
                        (x+(    s*stair_width)*TERRAIN_STEP, y+(   s*stair_height)*TERRAIN_STEP),
                        (x+((1+s)*stair_width)*TERRAIN_STEP, y+(   s*stair_height)*TERRAIN_STEP),
                        (x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
                        (x+(    s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
                        ]
                    self.fd_polygon.shape.vertices=poly
                    t = self.world.CreateStaticBody(
                        fixtures = self.fd_polygon)
                    t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
                    self.terrain.append(t)
                counter = stair_steps*stair_width
            elif state==STAIRS and not oneshot:
                s = stair_steps*stair_width - counter - stair_height
                n = s/stair_width
                y = original_y + (n*stair_height)*TERRAIN_STEP
            oneshot = False
            self.terrain_y.append(y)
            counter -= 1
            if counter==0:
                counter = self.np_random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
                if state==GRASS and hardcore:
                    state = self.np_random.randint(1, _STATES_)
                    oneshot = True
                else:
                    state = GRASS
                    oneshot = True
        self.terrain_poly = []
        for i in range(TERRAIN_LENGTH-1):
            poly = [
                (self.terrain_x[i],   self.terrain_y[i]),
                (self.terrain_x[i+1], self.terrain_y[i+1])
                ]
            self.fd_edge.shape.vertices=poly
            t = self.world.CreateStaticBody(
                fixtures = self.fd_edge)
            color = (0.3, 1.0 if i%2==0 else 0.8, 0.3)
            t.color1 = color
            t.color2 = color
            self.terrain.append(t)
            color = (0.4, 0.6, 0.3)
            poly += [ (poly[1][0], 0), (poly[0][0], 0) ]
            self.terrain_poly.append( (poly, color) )
        self.terrain.reverse()
    def _generate_clouds(self):
        # Sorry for the clouds, couldn't resist
        self.cloud_poly   = []
        for i in range(TERRAIN_LENGTH//20):
            x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
            y = VIEWPORT_H/SCALE*3/4
            poly = [
                (x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
                 y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
                for a in range(5) ]
            x1 = min( [p[0] for p in poly] )
            x2 = max( [p[0] for p in poly] )
            self.cloud_poly.append( (poly,x1,x2) )
    def reset(self):
        self._destroy()
        self.world.contactListener_bug_workaround = ContactDetector(self)
        self.world.contactListener = self.world.contactListener_bug_workaround
        self.game_over = False
        self.prev_shaping = None
        self.scroll = 0.0
        self.lidar_render = 0
        W = VIEWPORT_W/SCALE
        H = VIEWPORT_H/SCALE
        self._generate_terrain(self.hardcore)
        self._generate_clouds()
        init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2
        init_y = TERRAIN_HEIGHT+2*LEG_H
        self.hull = self.world.CreateDynamicBody(
            position = (init_x, init_y),
            fixtures = HULL_FD
                )
        self.hull.color1 = (0.5,0.4,0.9)
        self.hull.color2 = (0.3,0.3,0.5)
        self.hull.ApplyForceToCenter((self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
        self.legs = []
        self.joints = []
        for i in [-1,+1]:
            leg = self.world.CreateDynamicBody(
                position = (init_x, init_y - LEG_H/2 - LEG_DOWN),
                angle = (i*0.05),
                fixtures = LEG_FD
                )
            leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
            leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
            rjd = revoluteJointDef(
                bodyA=self.hull,
                bodyB=leg,
                localAnchorA=(0, LEG_DOWN),
                localAnchorB=(0, LEG_H/2),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=MOTORS_TORQUE,
                motorSpeed = i,
                lowerAngle = -0.8,
                upperAngle = 1.1,
                )
            self.legs.append(leg)
            self.joints.append(self.world.CreateJoint(rjd))
            lower = self.world.CreateDynamicBody(
                position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN),
                angle = (i*0.05),
                fixtures = LOWER_FD
                )
            lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
            lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
            rjd = revoluteJointDef(
                bodyA=leg,
                bodyB=lower,
                localAnchorA=(0, -LEG_H/2),
                localAnchorB=(0, LEG_H/2),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=MOTORS_TORQUE,
                motorSpeed = 1,
                lowerAngle = -1.6,
                upperAngle = -0.1,
                )
            lower.ground_contact = False
            self.legs.append(lower)
            self.joints.append(self.world.CreateJoint(rjd))
        self.drawlist = self.terrain + self.legs + [self.hull]
        class LidarCallback(Box2D.b2.rayCastCallback):
            def ReportFixture(self, fixture, point, normal, fraction):
                if (fixture.filterData.categoryBits & 1) == 0:
                    return 1
                self.p2 = point
                self.fraction = fraction
                return 0
        self.lidar = [LidarCallback() for _ in range(10)]
        return self.step(np.array([0,0,0,0]))[0]
    def step(self, action):
        #self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
        control_speed = False  # Should be easier as well
        if control_speed:
            self.joints[0].motorSpeed = float(SPEED_HIP  * np.clip(action[0], -1, 1))
            self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
            self.joints[2].motorSpeed = float(SPEED_HIP  * np.clip(action[2], -1, 1))
            self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
        else:
            self.joints[0].motorSpeed     = float(SPEED_HIP     * np.sign(action[0]))
            self.joints[0].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1))
            self.joints[1].motorSpeed     = float(SPEED_KNEE    * np.sign(action[1]))
            self.joints[1].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1))
            self.joints[2].motorSpeed     = float(SPEED_HIP     * np.sign(action[2]))
            self.joints[2].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1))
            self.joints[3].motorSpeed     = float(SPEED_KNEE    * np.sign(action[3]))
            self.joints[3].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1))
        self.world.Step(1.0/FPS, 6*30, 2*30)
        pos = self.hull.position
        vel = self.hull.linearVelocity
        for i in range(10):
            self.lidar[i].fraction = 1.0
            self.lidar[i].p1 = pos
            self.lidar[i].p2 = (
                pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE,
                pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE)
            self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
        state = [
            self.hull.angle,        # Normal angles up to 0.5 here, but sure more is possible.
            2.0*self.hull.angularVelocity/FPS,
            0.3*vel.x*(VIEWPORT_W/SCALE)/FPS,  # Normalized to get -1..1 range
            0.3*vel.y*(VIEWPORT_H/SCALE)/FPS,
            self.joints[0].angle,   # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
            self.joints[0].speed / SPEED_HIP,
            self.joints[1].angle + 1.0,
            self.joints[1].speed / SPEED_KNEE,
            1.0 if self.legs[1].ground_contact else 0.0,
            self.joints[2].angle,
            self.joints[2].speed / SPEED_HIP,
            self.joints[3].angle + 1.0,
            self.joints[3].speed / SPEED_KNEE,
            1.0 if self.legs[3].ground_contact else 0.0
            ]
        state += [l.fraction for l in self.lidar]
        assert len(state)==24
        self.scroll = pos.x - VIEWPORT_W/SCALE/5
        shaping  = 130*pos[0]/SCALE   # moving forward is a way to receive reward (normalized to get 300 on completion)
        shaping -= 5.0*abs(state[0])  # keep head straight, other than that and falling, any behavior is unpunished
        reward = 0
        if self.prev_shaping is not None:
            reward = shaping - self.prev_shaping
        self.prev_shaping = shaping
        for a in action:
            reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
            # normalized to about -50.0 using heuristic, more optimal agent should spend less
        done = False
        if self.game_over or pos[0] < 0:
            reward = -100
            done   = True
        if pos[0] > (TERRAIN_LENGTH-TERRAIN_GRASS)*TERRAIN_STEP:
            done   = True
        return np.array(state), reward, done, {}
    def render(self, mode='human'):
        from gym.envs.classic_control import rendering
        if self.viewer is None:
            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
        self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE)
        self.viewer.draw_polygon( [
            (self.scroll,                  0),
            (self.scroll+VIEWPORT_W/SCALE, 0),
            (self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE),
            (self.scroll,                  VIEWPORT_H/SCALE),
            ], color=(0.9, 0.9, 1.0) )
        for poly,x1,x2 in self.cloud_poly:
            if x2 < self.scroll/2: continue
            if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue
            self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1))
        for poly, color in self.terrain_poly:
            if poly[1][0] < self.scroll: continue
            if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue
            self.viewer.draw_polygon(poly, color=color)
        self.lidar_render = (self.lidar_render+1) % 100
        i = self.lidar_render
        if i < 2*len(self.lidar):
            l = self.lidar[i] if i < len(self.lidar) else self.lidar[len(self.lidar)-i-1]
            self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 )
        for obj in self.drawlist:
            for f in obj.fixtures:
                trans = f.body.transform
                if type(f.shape) is circleShape:
                    t = rendering.Transform(translation=trans*f.shape.pos)
                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
                else:
                    path = [trans*v for v in f.shape.vertices]
                    self.viewer.draw_polygon(path, color=obj.color1)
                    path.append(path[0])
                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
        flagy1 = TERRAIN_HEIGHT
        flagy2 = flagy1 + 50/SCALE
        x = TERRAIN_STEP*3
        self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 )
        f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)]
        self.viewer.draw_polygon(f, color=(0.9,0.2,0) )
        self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 )
        return self.viewer.render(return_rgb_array = mode=='rgb_array')
    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
 class BipedalWalkerHardcore(BipedalWalker):
    hardcore = True
 if __name__=="__main__":
    # Heurisic: suboptimal, have no notion of balance.
    env = BipedalWalker()
    env.reset()
    steps = 0
    total_reward = 0
    a = np.array([0.0, 0.0, 0.0, 0.0])
    STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1,2,3
    SPEED = 0.29  # Will fall forward on higher speed
    state = STAY_ON_ONE_LEG
    moving_leg = 0
    supporting_leg = 1 - moving_leg
    SUPPORT_KNEE_ANGLE = +0.1
    supporting_knee_angle = SUPPORT_KNEE_ANGLE
    while True:
        s, r, done, info = env.step(a)
        total_reward += r
        if steps % 20 == 0 or done:
            print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
            print("step {} total_reward {:+0.2f}".format(steps, total_reward))
            print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4] ]))
            print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9] ]))
            print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]]))
        steps += 1
        contact0 = s[8]
        contact1 = s[13]
        moving_s_base = 4 + 5*moving_leg
        supporting_s_base = 4 + 5*supporting_leg
        hip_targ  = [None,None]   # -0.8 .. +1.1
        knee_targ = [None,None]   # -0.6 .. +0.9
        hip_todo  = [0.0, 0.0]
        knee_todo = [0.0, 0.0]
        if state==STAY_ON_ONE_LEG:
            hip_targ[moving_leg]  = 1.1
            knee_targ[moving_leg] = -0.6
            supporting_knee_angle += 0.03
            if s[2] > SPEED: supporting_knee_angle += 0.03
            supporting_knee_angle = min( supporting_knee_angle, SUPPORT_KNEE_ANGLE )
            knee_targ[supporting_leg] = supporting_knee_angle
            if s[supporting_s_base+0] < 0.10: # supporting leg is behind
                state = PUT_OTHER_DOWN
        if state==PUT_OTHER_DOWN:
            hip_targ[moving_leg]  = +0.1
            knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
            knee_targ[supporting_leg] = supporting_knee_angle
            if s[moving_s_base+4]:
                state = PUSH_OFF
                supporting_knee_angle = min( s[moving_s_base+2], SUPPORT_KNEE_ANGLE )
        if state==PUSH_OFF:
            knee_targ[moving_leg] = supporting_knee_angle
            knee_targ[supporting_leg] = +1.0
            if s[supporting_s_base+2] > 0.88 or s[2] > 1.2*SPEED:
                state = STAY_ON_ONE_LEG
                moving_leg = 1 - moving_leg
                supporting_leg = 1 - moving_leg
        if hip_targ[0]: hip_todo[0] = 0.9*(hip_targ[0] - s[4]) - 0.25*s[5]
        if hip_targ[1]: hip_todo[1] = 0.9*(hip_targ[1] - s[9]) - 0.25*s[10]
        if knee_targ[0]: knee_todo[0] = 4.0*(knee_targ[0] - s[6])  - 0.25*s[7]
        if knee_targ[1]: knee_todo[1] = 4.0*(knee_targ[1] - s[11]) - 0.25*s[12]
        hip_todo[0] -= 0.9*(0-s[0]) - 1.5*s[1] # PID to keep head strait
        hip_todo[1] -= 0.9*(0-s[0]) - 1.5*s[1]
        knee_todo[0] -= 15.0*s[3]  # vertical speed, to damp oscillations
        knee_todo[1] -= 15.0*s[3]
        a[0] = hip_todo[0]
        a[1] = knee_todo[0]
        a[2] = hip_todo[1]
        a[3] = knee_todo[1]
        a = np.clip(0.5*a, -1.0, 1.0)
        env.render()
        if done: break
--- a/src/gym/envs/box2d/car_dynamics.py
+++ b/src/gym/envs/box2d/car_dynamics.py
@@ -0,0 +1,244 @@
 import numpy as np
 import math
 import Box2D
 from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
 # Top-down car dynamics simulation.
 #
 # Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
 # This simulation is a bit more detailed, with wheels rotation.
 #
 # Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 SIZE = 0.02
 ENGINE_POWER            = 100000000*SIZE*SIZE
 WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
 FRICTION_LIMIT          = 1000000*SIZE*SIZE     # friction ~= mass ~= size^2 (calculated implicitly using density)
 WHEEL_R  = 27
 WHEEL_W  = 14
 WHEELPOS = [
    (-55,+80), (+55,+80),
    (-55,-82), (+55,-82)
    ]
 HULL_POLY1 =[
    (-60,+130), (+60,+130),
    (+60,+110), (-60,+110)
    ]
 HULL_POLY2 =[
    (-15,+120), (+15,+120),
    (+20, +20), (-20,  20)
    ]
 HULL_POLY3 =[
    (+25, +20),
    (+50, -10),
    (+50, -40),
    (+20, -90),
    (-20, -90),
    (-50, -40),
    (-50, -10),
    (-25, +20)
    ]
 HULL_POLY4 =[
    (-50,-120), (+50,-120),
    (+50,-90),  (-50,-90)
    ]
 WHEEL_COLOR = (0.0,0.0,0.0)
 WHEEL_WHITE = (0.3,0.3,0.3)
 MUD_COLOR   = (0.4,0.4,0.0)
 class Car:
    def __init__(self, world, init_angle, init_x, init_y):
        self.world = world
        self.hull = self.world.CreateDynamicBody(
            position = (init_x, init_y),
            angle = init_angle,
            fixtures = [
                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY1 ]), density=1.0),
                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY2 ]), density=1.0),
                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY3 ]), density=1.0),
                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY4 ]), density=1.0)
                ]
            )
        self.hull.color = (0.8,0.0,0.0)
        self.wheels = []
        self.fuel_spent = 0.0
        WHEEL_POLY = [
            (-WHEEL_W,+WHEEL_R), (+WHEEL_W,+WHEEL_R),
            (+WHEEL_W,-WHEEL_R), (-WHEEL_W,-WHEEL_R)
            ]
        for wx,wy in WHEELPOS:
            front_k = 1.0 if wy > 0 else 1.0
            w = self.world.CreateDynamicBody(
                position = (init_x+wx*SIZE, init_y+wy*SIZE),
                angle = init_angle,
                fixtures = fixtureDef(
                    shape=polygonShape(vertices=[ (x*front_k*SIZE,y*front_k*SIZE) for x,y in WHEEL_POLY ]),
                    density=0.1,
                    categoryBits=0x0020,
                    maskBits=0x001,
                    restitution=0.0)
                    )
            w.wheel_rad = front_k*WHEEL_R*SIZE
            w.color = WHEEL_COLOR
            w.gas   = 0.0
            w.brake = 0.0
            w.steer = 0.0
            w.phase = 0.0  # wheel angle
            w.omega = 0.0  # angular velocity
            w.skid_start = None
            w.skid_particle = None
            rjd = revoluteJointDef(
                bodyA=self.hull,
                bodyB=w,
                localAnchorA=(wx*SIZE,wy*SIZE),
                localAnchorB=(0,0),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=180*900*SIZE*SIZE,
                motorSpeed = 0,
                lowerAngle = -0.4,
                upperAngle = +0.4,
                )
            w.joint = self.world.CreateJoint(rjd)
            w.tiles = set()
            w.userData = w
            self.wheels.append(w)
        self.drawlist =  self.wheels + [self.hull]
        self.particles = []
    def gas(self, gas):
        'control: rear wheel drive'
        gas = np.clip(gas, 0, 1)
        for w in self.wheels[2:4]:
            diff = gas - w.gas
            if diff > 0.1: diff = 0.1  # gradually increase, but stop immediately
            w.gas += diff
    def brake(self, b):
        'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation'
        for w in self.wheels:
            w.brake = b
    def steer(self, s):
        'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position'
        self.wheels[0].steer = s
        self.wheels[1].steer = s
    def step(self, dt):
        for w in self.wheels:
            # Steer each wheel
            dir = np.sign(w.steer - w.joint.angle)
            val = abs(w.steer - w.joint.angle)
            w.joint.motorSpeed = dir*min(50.0*val, 3.0)
            # Position => friction_limit
            grass = True
            friction_limit = FRICTION_LIMIT*0.6  # Grass friction if no tile
            for tile in w.tiles:
                friction_limit = max(friction_limit, FRICTION_LIMIT*tile.road_friction)
                grass = False
            # Force
            forw = w.GetWorldVector( (0,1) )
            side = w.GetWorldVector( (1,0) )
            v = w.linearVelocity
            vf = forw[0]*v[0] + forw[1]*v[1]  # forward speed
            vs = side[0]*v[0] + side[1]*v[1]  # side speed
            # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
            # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
            # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
            w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)  # small coef not to divide by zero
            self.fuel_spent += dt*ENGINE_POWER*w.gas
            if w.brake >= 0.9:
                w.omega = 0
            elif w.brake > 0:
                BRAKE_FORCE = 15    # radians per second
                dir = -np.sign(w.omega)
                val = BRAKE_FORCE*w.brake
                if abs(val) > abs(w.omega): val = abs(w.omega)  # low speed => same as = 0
                w.omega += dir*val
            w.phase += w.omega*dt
            vr = w.omega*w.wheel_rad  # rotating wheel speed
            f_force = -vf + vr        # force direction is direction of speed difference
            p_force = -vs
            # Physically correct is to always apply friction_limit until speed is equal.
            # But dt is finite, that will lead to oscillations if difference is already near zero.
            f_force *= 205000*SIZE*SIZE  # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
            p_force *= 205000*SIZE*SIZE
            force = np.sqrt(np.square(f_force) + np.square(p_force))
            # Skid trace
            if abs(force) > 2.0*friction_limit:
                if w.skid_particle and w.skid_particle.grass==grass and len(w.skid_particle.poly) < 30:
                    w.skid_particle.poly.append( (w.position[0], w.position[1]) )
                elif w.skid_start is None:
                    w.skid_start = w.position
                else:
                    w.skid_particle = self._create_particle( w.skid_start, w.position, grass )
                    w.skid_start = None
            else:
                w.skid_start = None
                w.skid_particle = None
            if abs(force) > friction_limit:
                f_force /= force
                p_force /= force
                force = friction_limit  # Correct physics here
                f_force *= force
                p_force *= force
            w.omega -= dt*f_force*w.wheel_rad/WHEEL_MOMENT_OF_INERTIA
            w.ApplyForceToCenter( (
                p_force*side[0] + f_force*forw[0],
                p_force*side[1] + f_force*forw[1]), True )
    def draw(self, viewer, draw_particles=True):
        if draw_particles:
            for p in self.particles:
                viewer.draw_polyline(p.poly, color=p.color, linewidth=5)
        for obj in self.drawlist:
            for f in obj.fixtures:
                trans = f.body.transform
                path = [trans*v for v in f.shape.vertices]
                viewer.draw_polygon(path, color=obj.color)
                if "phase" not in obj.__dict__: continue
                a1 = obj.phase
                a2 = obj.phase + 1.2  # radians
                s1 = math.sin(a1)
                s2 = math.sin(a2)
                c1 = math.cos(a1)
                c2 = math.cos(a2)
                if s1>0 and s2>0: continue
                if s1>0: c1 = np.sign(c1)
                if s2>0: c2 = np.sign(c2)
                white_poly = [
                    (-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
                    (+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
                    ]
                viewer.draw_polygon([trans*v for v in white_poly], color=WHEEL_WHITE)
    def _create_particle(self, point1, point2, grass):
        class Particle:
            pass
        p = Particle()
        p.color = WHEEL_COLOR if not grass else MUD_COLOR
        p.ttl = 1
        p.poly = [(point1[0],point1[1]), (point2[0],point2[1])]
        p.grass = grass
        self.particles.append(p)
        while len(self.particles) > 30:
            self.particles.pop(0)
        return p
    def destroy(self):
        self.world.DestroyBody(self.hull)
        self.hull = None
        for w in self.wheels:
            self.world.DestroyBody(w)
        self.wheels = []
--- a/src/gym/envs/box2d/car_racing.py
+++ b/src/gym/envs/box2d/car_racing.py
@@ -0,0 +1,498 @@
 import sys, math
 import numpy as np
 import Box2D
 from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
 import gym
 from gym import spaces
 from gym.envs.box2d.car_dynamics import Car
 from gym.utils import colorize, seeding, EzPickle
 import pyglet
 from pyglet import gl
 # Easiest continuous control task to learn from pixels, a top-down racing environment.
 # Discreet control is reasonable in this environment as well, on/off discretisation is
 # fine.
 #
 # State consists of STATE_W x STATE_H pixels.
 #
 # Reward is -0.1 every frame and +1000/N for every track tile visited, where N is
 # the total number of tiles in track. For example, if you have finished in 732 frames,
 # your reward is 1000 - 0.1*732 = 926.8 points.
 #
 # Game is solved when agent consistently gets 900+ points. Track is random every episode.
 #
 # Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that
 # is far off the track, then it will get -100 and die.
 #
 # Some indicators shown at the bottom of the window and the state RGB buffer. From
 # left to right: true speed, four ABS sensors, steering wheel position, gyroscope.
 #
 # To play yourself (it's rather fast for humans), type:
 #
 # python gym/envs/box2d/car_racing.py
 #
 # Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the
 # same time.
 #
 # Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 STATE_W = 96   # less than Atari 160x192
 STATE_H = 96
 VIDEO_W = 600
 VIDEO_H = 400
 WINDOW_W = 1200
 WINDOW_H = 1000
 SCALE       = 6.0        # Track scale
 TRACK_RAD   = 900/SCALE  # Track is heavily morphed circle with this radius
 PLAYFIELD   = 2000/SCALE # Game over boundary
 FPS         = 50
 ZOOM        = 2.7        # Camera zoom
 ZOOM_FOLLOW = True       # Set to False for fixed view (don't use zoom)
 TRACK_DETAIL_STEP = 21/SCALE
 TRACK_TURN_RATE = 0.31
 TRACK_WIDTH = 40/SCALE
 BORDER = 8/SCALE
 BORDER_MIN_COUNT = 4
 ROAD_COLOR = [0.4, 0.4, 0.4]
 class FrictionDetector(contactListener):
    def __init__(self, env):
        contactListener.__init__(self)
        self.env = env
    def BeginContact(self, contact):
        self._contact(contact, True)
    def EndContact(self, contact):
        self._contact(contact, False)
    def _contact(self, contact, begin):
        tile = None
        obj = None
        u1 = contact.fixtureA.body.userData
        u2 = contact.fixtureB.body.userData
        if u1 and "road_friction" in u1.__dict__:
            tile = u1
            obj  = u2
        if u2 and "road_friction" in u2.__dict__:
            tile = u2
            obj  = u1
        if not tile: return
        tile.color[0] = ROAD_COLOR[0]
        tile.color[1] = ROAD_COLOR[1]
        tile.color[2] = ROAD_COLOR[2]
        if not obj or "tiles" not in obj.__dict__: return
        if begin:
            obj.tiles.add(tile)
            #print tile.road_friction, "ADD", len(obj.tiles)
            if not tile.road_visited:
                tile.road_visited = True
                self.env.reward += 1000.0/len(self.env.track)
                self.env.tile_visited_count += 1
        else:
            obj.tiles.remove(tile)
            #print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works)
 class CarRacing(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second' : FPS
    }
    def __init__(self):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def _destroy(self):
        if not self.road: return
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.car.destroy()
    def _create_track(self):
        CHECKPOINTS = 12
        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
            alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
            if c==0:
                alpha = 0
                rad = 1.5*TRACK_RAD
            if c==CHECKPOINTS-1:
                alpha = 2*math.pi*c/CHECKPOINTS
                self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
                rad = 1.5*TRACK_RAD
            checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
        #print "\n".join(str(h) for h in checkpoints)
        #self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []
        # Go from one checkpoint to another to create track
        x, y, beta = 1.5*TRACK_RAD, 0, 0
        dest_i = 0
        laps = 0
        track = []
        no_freeze = 2500
        visited_other_side = False
        while 1:
            alpha = math.atan2(y, x)
            if visited_other_side and alpha > 0:
                laps += 1
                visited_other_side = False
            if alpha < 0:
                visited_other_side = True
                alpha += 2*math.pi
            while True: # Find destination from checkpoints
                failed = True
                while True:
                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
                    if alpha <= dest_alpha:
                        failed = False
                        break
                    dest_i += 1
                    if dest_i % len(checkpoints) == 0: break
                if not failed: break
                alpha -= 2*math.pi
                continue
            r1x = math.cos(beta)
            r1y = math.sin(beta)
            p1x = -r1y
            p1y = r1x
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x*dest_dx + r1y*dest_dy  # destination vector projected on rad
            while beta - alpha >  1.5*math.pi: beta -= 2*math.pi
            while beta - alpha < -1.5*math.pi: beta += 2*math.pi
            prev_beta = beta
            proj *= SCALE
            if proj >  0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
            if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj))
            x += p1x*TRACK_DETAIL_STEP
            y += p1y*TRACK_DETAIL_STEP
            track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
            if laps > 4: break
            no_freeze -= 1
            if no_freeze==0: break
        #print "\n".join([str(t) for t in enumerate(track)])
        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
            if i==0: return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
            if pass_through_start and i2==-1:
                i2 = i
            elif pass_through_start and i1==-1:
                i1 = i
                break
        print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
        assert i1!=-1
        assert i2!=-1
        track = track[i1:i2-1]
        first_beta = track[0][1]
        first_perp_x = math.cos(first_beta)
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
            np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
            np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False
        # Red-white border on hard turns
        border = [False]*len(track)
        for i in range(len(track)):
            good = True
            oneside = 0
            for neg in range(BORDER_MIN_COUNT):
                beta1 = track[i-neg-0][1]
                beta2 = track[i-neg-1][1]
                good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
                oneside += np.sign(beta1 - beta2)
            good &= abs(oneside) == BORDER_MIN_COUNT
            border[i] = good
        for i in range(len(track)):
            for neg in range(BORDER_MIN_COUNT):
                border[i-neg] |= border[i]
        # Create tiles
        for i in range(len(track)):
            alpha1, beta1, x1, y1 = track[i]
            alpha2, beta2, x2, y2 = track[i-1]
            road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1))
            road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1))
            road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2))
            road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2))
            t = self.world.CreateStaticBody( fixtures = fixtureDef(
                shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l])
                ))
            t.userData = t
            c = 0.01*(i%3)
            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
            t.road_visited = False
            t.road_friction = 1.0
            t.fixtures[0].sensor = True
            self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color ))
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side* TRACK_WIDTH        *math.cos(beta1), y1 + side* TRACK_WIDTH        *math.sin(beta1))
                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
                b2_l = (x2 + side* TRACK_WIDTH        *math.cos(beta2), y2 + side* TRACK_WIDTH        *math.sin(beta2))
                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
                self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
        self.track = track
        return True
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False
        while True:
            success = self._create_track()
            if success: break
            print("retry to generate track (normal if there are not many of this messages)")
        self.car = Car(self.world, *self.track[0][1:4])
        return self.step(None)[0]
    def step(self, action):
        if action is not None:
            self.car.steer(-action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])
        self.car.step(1.0/FPS)
        self.world.Step(1.0/FPS, 6*30, 2*30)
        self.t += 1.0/FPS
        self.state = self.render("state_pixels")
        step_reward = 0
        done = False
        if action is not None: # First step without action, called from reset()
            self.reward -= 0.1
            # We actually don't want to count fuel spent, we want car to be faster.
            #self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
            if self.tile_visited_count==len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
                done = True
                step_reward = -100
        return self.state, step_reward, done, {}
    def render(self, mode='human'):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.score_label = pyglet.text.Label('0000', font_size=36,
                x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center',
                color=(255,255,255,255))
            self.transform = rendering.Transform()
        if "t" not in self.__dict__: return  # reset() not called yet
        zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
        vel = self.car.hull.linearVelocity
        if np.linalg.norm(vel) > 0.5:
            angle = math.atan2(vel[0], vel[1])
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
            WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
        self.transform.set_rotation(angle)
        self.car.draw(self.viewer, mode!="state_pixels")
        arr = None
        win = self.viewer.window
        if mode != 'state_pixels':
            win.switch_to()
            win.dispatch_events()
        if mode=="rgb_array" or mode=="state_pixels":
            win.clear()
            t = self.transform
            if mode=='rgb_array':
                VP_W = VIDEO_W
                VP_H = VIDEO_H
            else:
                VP_W = STATE_W
                VP_H = STATE_H
            gl.glViewport(0, 0, VP_W, VP_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self.render_indicators(WINDOW_W, WINDOW_H)  # TODO: find why 2x needed, wtf
            image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]
        if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video.
            win.flip()
        if mode=='human':
            self.human_render = True
            win.clear()
            t = self.transform
            gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            self.render_indicators(WINDOW_W, WINDOW_H)
            win.flip()
        self.viewer.onetime_geoms = []
        return arr
    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
    def render_road(self):
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
        k = PLAYFIELD/20.0
        for x in range(-20, 20, 2):
            for y in range(-20, 20, 2):
                gl.glVertex3f(k*x + k, k*y + 0, 0)
                gl.glVertex3f(k*x + 0, k*y + 0, 0)
                gl.glVertex3f(k*x + 0, k*y + k, 0)
                gl.glVertex3f(k*x + k, k*y + k, 0)
        for poly, color in self.road_poly:
            gl.glColor4f(color[0], color[1], color[2], 1)
            for p in poly:
                gl.glVertex3f(p[0], p[1], 0)
        gl.glEnd()
    def render_indicators(self, W, H):
        gl.glBegin(gl.GL_QUADS)
        s = W/40.0
        h = H/40.0
        gl.glColor4f(0,0,0,1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5*h, 0)
        gl.glVertex3f(0, 5*h, 0)
        gl.glVertex3f(0, 0, 0)
        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place+0)*s, h + h*val, 0)
            gl.glVertex3f((place+1)*s, h + h*val, 0)
            gl.glVertex3f((place+1)*s, h, 0)
            gl.glVertex3f((place+0)*s, h, 0)
        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place+0)*s, 4*h , 0)
            gl.glVertex3f((place+val)*s, 4*h, 0)
            gl.glVertex3f((place+val)*s, 2*h, 0)
            gl.glVertex3f((place+0)*s, 2*h, 0)
        true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
        vertical_ind(5, 0.02*true_speed, (1,1,1))
        vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
        vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
        vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
        vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
        horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
        horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
 if __name__=="__main__":
    from pyglet.window import key
    a = np.array( [0.0, 0.0, 0.0] )
    def key_press(k, mod):
        global restart
        if k==0xff0d: restart = True
        if k==key.LEFT:  a[0] = -1.0
        if k==key.RIGHT: a[0] = +1.0
        if k==key.UP:    a[1] = +1.0
        if k==key.DOWN:  a[2] = +0.8   # set 1.0 for wheels to block to zero rotation
    def key_release(k, mod):
        if k==key.LEFT  and a[0]==-1.0: a[0] = 0
        if k==key.RIGHT and a[0]==+1.0: a[0] = 0
        if k==key.UP:    a[1] = 0
        if k==key.DOWN:  a[2] = 0
    env = CarRacing()
    env.render()
    record_video = False
    if record_video:
        env.monitor.start('/tmp/video-test', force=True)
    env.viewer.window.on_key_press = key_press
    env.viewer.window.on_key_release = key_release
    while True:
        env.reset()
        total_reward = 0.0
        steps = 0
        restart = False
        while True:
            s, r, done, info = env.step(a)
            total_reward += r
            if steps % 200 == 0 or done:
                print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
                print("step {} total_reward {:+0.2f}".format(steps, total_reward))
                #import matplotlib.pyplot as plt
                #plt.imshow(s)
                #plt.savefig("test.jpeg")
            steps += 1
            if not record_video: # Faster, but you can as well call env.render() every time to play full window.
                env.render()
            if done or restart: break
    env.close()
--- a/src/gym/envs/box2d/lunar_lander.py
+++ b/src/gym/envs/box2d/lunar_lander.py
@@ -0,0 +1,420 @@
 import sys, math
 import numpy as np
 import Box2D
 from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
 import gym
 from gym import spaces
 from gym.utils import seeding, EzPickle
 # Rocket trajectory optimization is a classic topic in Optimal Control.
 #
 # According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
 # turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
 #
 # Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
 # Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
 # If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
 # comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
 # engine is -0.3 points each frame. Solved is 200 points.
 #
 # Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
 # on its first attempt. Please see source code for details.
 #
 # Too see heuristic landing, run:
 #
 # python gym/envs/box2d/lunar_lander.py
 #
 # To play yourself, run:
 #
 # python examples/agents/keyboard_agent.py LunarLander-v2
 #
 # Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 FPS    = 50
 SCALE  = 30.0   # affects how fast-paced the game is, forces should be adjusted as well
 MAIN_ENGINE_POWER  = 13.0
 SIDE_ENGINE_POWER  =  0.6
 INITIAL_RANDOM = 1000.0   # Set 1500 to make game harder
 LANDER_POLY =[
    (-14,+17), (-17,0), (-17,-10),
    (+17,-10), (+17,0), (+14,+17)
    ]
 LEG_AWAY = 20
 LEG_DOWN = 18
 LEG_W, LEG_H = 2, 8
 LEG_SPRING_TORQUE = 40
 SIDE_ENGINE_HEIGHT = 14.0
 SIDE_ENGINE_AWAY   = 12.0
 VIEWPORT_W = 600
 VIEWPORT_H = 400
 class ContactDetector(contactListener):
    def __init__(self, env):
        contactListener.__init__(self)
        self.env = env
    def BeginContact(self, contact):
        if self.env.lander==contact.fixtureA.body or self.env.lander==contact.fixtureB.body:
            self.env.game_over = True
        for i in range(2):
            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
                self.env.legs[i].ground_contact = True
    def EndContact(self, contact):
        for i in range(2):
            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
                self.env.legs[i].ground_contact = False
 class LunarLander(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : FPS
    }
    continuous = False
    def __init__(self):
        EzPickle.__init__(self)
        self.seed()
        self.viewer = None
        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
        self.particles = []
        self.prev_reward = None
        # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)
        if self.continuous:
            # Action is two floats [main engine, left-right engines].
            # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
            # Left-right:  -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
            self.action_space = spaces.Box(-1, +1, (2,), dtype=np.float32)
        else:
            # Nop, fire left engine, main engine, right engine
            self.action_space = spaces.Discrete(4)
        self.reset()
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def _destroy(self):
        if not self.moon: return
        self.world.contactListener = None
        self._clean_particles(True)
        self.world.DestroyBody(self.moon)
        self.moon = None
        self.world.DestroyBody(self.lander)
        self.lander = None
        self.world.DestroyBody(self.legs[0])
        self.world.DestroyBody(self.legs[1])
    def reset(self):
        self._destroy()
        self.world.contactListener_keepref = ContactDetector(self)
        self.world.contactListener = self.world.contactListener_keepref
        self.game_over = False
        self.prev_shaping = None
        W = VIEWPORT_W/SCALE
        H = VIEWPORT_H/SCALE
        # terrain
        CHUNKS = 11
        height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
        chunk_x  = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
        self.helipad_x1 = chunk_x[CHUNKS//2-1]
        self.helipad_x2 = chunk_x[CHUNKS//2+1]
        self.helipad_y  = H/4
        height[CHUNKS//2-2] = self.helipad_y
        height[CHUNKS//2-1] = self.helipad_y
        height[CHUNKS//2+0] = self.helipad_y
        height[CHUNKS//2+1] = self.helipad_y
        height[CHUNKS//2+2] = self.helipad_y
        smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
        self.moon = self.world.CreateStaticBody( shapes=edgeShape(vertices=[(0, 0), (W, 0)]) )
        self.sky_polys = []
        for i in range(CHUNKS-1):
            p1 = (chunk_x[i],   smooth_y[i])
            p2 = (chunk_x[i+1], smooth_y[i+1])
            self.moon.CreateEdgeFixture(
                vertices=[p1,p2],
                density=0,
                friction=0.1)
            self.sky_polys.append( [p1, p2, (p2[0],H), (p1[0],H)] )
        self.moon.color1 = (0.0,0.0,0.0)
        self.moon.color2 = (0.0,0.0,0.0)
        initial_y = VIEWPORT_H/SCALE
        self.lander = self.world.CreateDynamicBody(
            position = (VIEWPORT_W/SCALE/2, initial_y),
            angle=0.0,
            fixtures = fixtureDef(
                shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in LANDER_POLY ]),
                density=5.0,
                friction=0.1,
                categoryBits=0x0010,
                maskBits=0x001,  # collide only with ground
                restitution=0.0) # 0.99 bouncy
                )
        self.lander.color1 = (0.5,0.4,0.9)
        self.lander.color2 = (0.3,0.3,0.5)
        self.lander.ApplyForceToCenter( (
            self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
            self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
            ), True)
        self.legs = []
        for i in [-1,+1]:
            leg = self.world.CreateDynamicBody(
                position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
                angle = (i*0.05),
                fixtures = fixtureDef(
                    shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
                    density=1.0,
                    restitution=0.0,
                    categoryBits=0x0020,
                    maskBits=0x001)
                )
            leg.ground_contact = False
            leg.color1 = (0.5,0.4,0.9)
            leg.color2 = (0.3,0.3,0.5)
            rjd = revoluteJointDef(
                bodyA=self.lander,
                bodyB=leg,
                localAnchorA=(0, 0),
                localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=LEG_SPRING_TORQUE,
                motorSpeed=+0.3*i  # low enough not to jump back into the sky
                )
            if i==-1:
                rjd.lowerAngle = +0.9 - 0.5  # Yes, the most esoteric numbers here, angles legs have freedom to travel within
                rjd.upperAngle = +0.9
            else:
                rjd.lowerAngle = -0.9
                rjd.upperAngle = -0.9 + 0.5
            leg.joint = self.world.CreateJoint(rjd)
            self.legs.append(leg)
        self.drawlist = [self.lander] + self.legs
        return self.step(np.array([0,0]) if self.continuous else 0)[0]
    def _create_particle(self, mass, x, y, ttl):
        p = self.world.CreateDynamicBody(
            position = (x,y),
            angle=0.0,
            fixtures = fixtureDef(
                shape=circleShape(radius=2/SCALE, pos=(0,0)),
                density=mass,
                friction=0.1,
                categoryBits=0x0100,
                maskBits=0x001,  # collide only with ground
                restitution=0.3)
                )
        p.ttl = ttl
        self.particles.append(p)
        self._clean_particles(False)
        return p
    def _clean_particles(self, all):
        while self.particles and (all or self.particles[0].ttl<0):
            self.world.DestroyBody(self.particles.pop(0))
    def step(self, action):
        if self.continuous:
            action = np.clip(action, -1, +1).astype(np.float32)
        else:
            assert self.action_space.contains(action), "%r (%s) invalid " % (action, type(action))
        # Engines
        tip  = (math.sin(self.lander.angle), math.cos(self.lander.angle))
        side = (-tip[1], tip[0]);
        dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
        m_power = 0.0
        if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
            # Main engine
            if self.continuous:
                m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5   # 0.5..1.0
                assert m_power>=0.5 and m_power <= 1.0
            else:
                m_power = 1.0
            ox =  tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1]   # 4 is move a bit downwards, +-2 for randomness
            oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
            impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
            p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power)    # particles are just a decoration, 3.5 is here to make particle speed adequate
            p.ApplyLinearImpulse(           ( ox*MAIN_ENGINE_POWER*m_power,  oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
            self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
        s_power = 0.0
        if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
            # Orientation engines
            if self.continuous:
                direction = np.sign(action[1])
                s_power = np.clip(np.abs(action[1]), 0.5,1.0)
                assert s_power>=0.5 and s_power <= 1.0
            else:
                direction = action-2
                s_power = 1.0
            ox =  tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
            oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
            impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
            p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
            p.ApplyLinearImpulse(           ( ox*SIDE_ENGINE_POWER*s_power,  oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
            self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
        self.world.Step(1.0/FPS, 6*30, 2*30)
        pos = self.lander.position
        vel = self.lander.linearVelocity
        state = [
            (pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
            (pos.y - (self.helipad_y+LEG_DOWN/SCALE)) / (VIEWPORT_H/SCALE/2),
            vel.x*(VIEWPORT_W/SCALE/2)/FPS,
            vel.y*(VIEWPORT_H/SCALE/2)/FPS,
            self.lander.angle,
            20.0*self.lander.angularVelocity/FPS,
            1.0 if self.legs[0].ground_contact else 0.0,
            1.0 if self.legs[1].ground_contact else 0.0
            ]
        assert len(state)==8
        reward = 0
        shaping = \
            - 100*np.sqrt(state[0]*state[0] + state[1]*state[1]) \
            - 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
            - 100*abs(state[4]) + 10*state[6] + 10*state[7]   # And ten points for legs contact, the idea is if you
                                                              # lose contact again after landing, you get negative reward
        if self.prev_shaping is not None:
            reward = shaping - self.prev_shaping
        self.prev_shaping = shaping
        reward -= m_power*0.30  # less fuel spent is better, about -30 for heurisic landing
        reward -= s_power*0.03
        done = False
        if self.game_over or abs(state[0]) >= 1.0:
            done   = True
            reward = -100
        if not self.lander.awake:
            done   = True
            reward = +100
        return np.array(state, dtype=np.float32), reward, done, {}
    def render(self, mode='human'):
        from gym.envs.classic_control import rendering
        if self.viewer is None:
            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
            self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
        for obj in self.particles:
            obj.ttl -= 0.15
            obj.color1 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
            obj.color2 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
        self._clean_particles(False)
        for p in self.sky_polys:
            self.viewer.draw_polygon(p, color=(0,0,0))
        for obj in self.particles + self.drawlist:
            for f in obj.fixtures:
                trans = f.body.transform
                if type(f.shape) is circleShape:
                    t = rendering.Transform(translation=trans*f.shape.pos)
                    self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t)
                    self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t)
                else:
                    path = [trans*v for v in f.shape.vertices]
                    self.viewer.draw_polygon(path, color=obj.color1)
                    path.append(path[0])
                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
        for x in [self.helipad_x1, self.helipad_x2]:
            flagy1 = self.helipad_y
            flagy2 = flagy1 + 50/SCALE
            self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(1,1,1) )
            self.viewer.draw_polygon( [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)], color=(0.8,0.8,0) )
        return self.viewer.render(return_rgb_array = mode=='rgb_array')
    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
 class LunarLanderContinuous(LunarLander):
    continuous = True
 def heuristic(env, s):
    # Heuristic for:
    # 1. Testing. 
    # 2. Demonstration rollout.
    angle_targ = s[0]*0.5 + s[2]*1.0         # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
    if angle_targ >  0.4: angle_targ =  0.4  # more than 0.4 radians (22 degrees) is bad
    if angle_targ < -0.4: angle_targ = -0.4
    hover_targ = 0.55*np.abs(s[0])           # target y should be proporional to horizontal offset
    # PID controller: s[4] angle, s[5] angularSpeed
    angle_todo = (angle_targ - s[4])*0.5 - (s[5])*1.0
    #print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
    # PID controller: s[1] vertical coordinate s[3] vertical speed
    hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
    #print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
    if s[6] or s[7]: # legs have contact
        angle_todo = 0
        hover_todo = -(s[3])*0.5  # override to reduce fall speed, that's all we need after contact
    if env.continuous:
        a = np.array( [hover_todo*20 - 1, -angle_todo*20] )
        a = np.clip(a, -1, +1)
    else:
        a = 0
        if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
        elif angle_todo < -0.05: a = 3
        elif angle_todo > +0.05: a = 1
    return a
 def demo_heuristic_lander(env, seed=None, render=False):
    env.seed(seed)
    total_reward = 0
    steps = 0
    s = env.reset()
    while True:
        a = heuristic(env, s)
        s, r, done, info = env.step(a)
        total_reward += r
        if render:
            still_open = env.render()
            if still_open == False: break
        if steps % 20 == 0 or done:
            print("observations:", " ".join(["{:+0.2f}".format(x) for x in s]))
            print("step {} total_reward {:+0.2f}".format(steps, total_reward))
        steps += 1
        if done: break
    return total_reward
 if __name__ == '__main__':
    demo_heuristic_lander(LunarLander(), render=True)
--- a/src/gym/envs/box2d/test_lunar_lander.py
+++ b/src/gym/envs/box2d/test_lunar_lander.py
@@ -0,0 +1,13 @@
 from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander
 def test_lunar_lander():
    _test_lander(LunarLander(), seed=0)
 def test_lunar_lander_continuous():
    _test_lander(LunarLanderContinuous(), seed=0)
 def _test_lander(env, seed=None, render=False):
    total_reward = demo_heuristic_lander(env, seed=seed, render=render)
    assert total_reward > 100
--- a/src/gym/envs/classic_control/init.py
+++ b/src/gym/envs/classic_control/init.py
@@ -0,0 +1,6 @@
 from gym.envs.classic_control.cartpole import CartPoleEnv
 from gym.envs.classic_control.mountain_car import MountainCarEnv
 from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
 from gym.envs.classic_control.pendulum import PendulumEnv
 from gym.envs.classic_control.acrobot import AcrobotEnv
--- a/src/gym/envs/classic_control/acrobot.py
+++ b/src/gym/envs/classic_control/acrobot.py
@@ -0,0 +1,304 @@
 """classic Acrobot task"""
 from gym import core, spaces
 from gym.utils import seeding
 import numpy as np
 from numpy import sin, cos, pi
 __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
 __credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
               "William Dabney", "Jonathan P. How"]
 __license__ = "BSD 3-Clause"
 __author__ = "Christoph Dann <cdann@cdann.de>"
 # SOURCE:
 # https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
 class AcrobotEnv(core.Env):
    """
    Acrobot is a 2-link pendulum with only the second joint actuated
    Intitially, both links point downwards. The goal is to swing the
    end-effector at a height at least the length of one link above the base.
    Both links can swing freely and can pass by each other, i.e., they don't
    collide when they have the same angle.
    **STATE:**
    The state consists of the sin() and cos() of the two rotational joint
    angles and the joint angular velocities :
    [cos(theta1) sin(theta1) cos(theta2) sin(theta2) thetaDot1 thetaDot2].
    For the first link, an angle of 0 corresponds to the link pointing downwards.
    The angle of the second link is relative to the angle of the first link.
    An angle of 0 corresponds to having the same angle between the two links.
    A state of [1, 0, 1, 0, ..., ...] means that both links point downwards.
    **ACTIONS:**
    The action is either applying +1, 0 or -1 torque on the joint between
    the two pendulum links.
    .. note::
        The dynamics equations were missing some terms in the NIPS paper which
        are present in the book. R. Sutton confirmed in personal correspondance
        that the experimental results shown in the paper and the book were
        generated with the equations shown in the book.
        However, there is the option to run the domain with the paper equations
        by setting book_or_nips = 'nips'
    **REFERENCE:**
    .. seealso::
        R. Sutton: Generalization in Reinforcement Learning:
        Successful Examples Using Sparse Coarse Coding (NIPS 1996)
    .. seealso::
        R. Sutton and A. G. Barto:
        Reinforcement learning: An introduction.
        Cambridge: MIT press, 1998.
    .. warning::
        This version of the domain uses the Runge-Kutta method for integrating
        the system dynamics and is more realistic, but also considerably harder
        than the original version which employs Euler integration,
        see the AcrobotLegacy class.
    """
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : 15
    }
    dt = .2
    LINK_LENGTH_1 = 1.  # [m]
    LINK_LENGTH_2 = 1.  # [m]
    LINK_MASS_1 = 1.  #: [kg] mass of link 1
    LINK_MASS_2 = 1.  #: [kg] mass of link 2
    LINK_COM_POS_1 = 0.5  #: [m] position of the center of mass of link 1
    LINK_COM_POS_2 = 0.5  #: [m] position of the center of mass of link 2
    LINK_MOI = 1.  #: moments of inertia for both links
    MAX_VEL_1 = 4 * np.pi
    MAX_VEL_2 = 9 * np.pi
    AVAIL_TORQUE = [-1., 0., +1]
    torque_noise_max = 0.
    #: use dynamics equations from the nips paper or the book
    book_or_nips = "book"
    action_arrow = None
    domain_fig = None
    actions_num = 3
    def __init__(self):
        self.viewer = None
        high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
        low = -high
        self.observation_space = spaces.Box(low=low, high=high)
        self.action_space = spaces.Discrete(3)
        self.state = None
        self.seed()
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def reset(self):
        self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
        return self._get_ob()
    def step(self, a):
        s = self.state
        torque = self.AVAIL_TORQUE[a]
        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)
        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, torque)
        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]
        ns = ns[:4]  # omit action
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt
        ns[0] = wrap(ns[0], -pi, pi)
        ns[1] = wrap(ns[1], -pi, pi)
        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
        self.state = ns
        terminal = self._terminal()
        reward = -1. if not terminal else 0.
        return (self._get_ob(), reward, terminal, {})
    def _get_ob(self):
        s = self.state
        return np.array([cos(s[0]), np.sin(s[0]), cos(s[1]), sin(s[1]), s[2], s[3]])
    def _terminal(self):
        s = self.state
        return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.)
    def _dsdt(self, s_augmented, t):
        m1 = self.LINK_MASS_1
        m2 = self.LINK_MASS_2
        l1 = self.LINK_LENGTH_1
        lc1 = self.LINK_COM_POS_1
        lc2 = self.LINK_COM_POS_2
        I1 = self.LINK_MOI
        I2 = self.LINK_MOI
        g = 9.8
        a = s_augmented[-1]
        s = s_augmented[:-1]
        theta1 = s[0]
        theta2 = s[1]
        dtheta1 = s[2]
        dtheta2 = s[3]
        d1 = m1 * lc1 ** 2 + m2 * \
            (l1 ** 2 + lc2 ** 2 + 2 * l1 * lc2 * np.cos(theta2)) + I1 + I2
        d2 = m2 * (lc2 ** 2 + l1 * lc2 * np.cos(theta2)) + I2
        phi2 = m2 * lc2 * g * np.cos(theta1 + theta2 - np.pi / 2.)
        phi1 = - m2 * l1 * lc2 * dtheta2 ** 2 * np.sin(theta2) \
               - 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * np.sin(theta2)  \
            + (m1 * lc1 + m2 * l1) * g * np.cos(theta1 - np.pi / 2) + phi2
        if self.book_or_nips == "nips":
            # the following line is consistent with the description in the
            # paper
            ddtheta2 = (a + d2 / d1 * phi1 - phi2) / \
                (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
        else:
            # the following line is consistent with the java implementation and the
            # book
            ddtheta2 = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1 ** 2 * np.sin(theta2) - phi2) \
                / (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
        ddtheta1 = -(d2 * ddtheta2 + phi1) / d1
        return (dtheta1, dtheta2, ddtheta1, ddtheta2, 0.)
    def render(self, mode='human'):
        from gym.envs.classic_control import rendering
        s = self.state
        if self.viewer is None:
            self.viewer = rendering.Viewer(500,500)
            bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2  # 2.2 for default
            self.viewer.set_bounds(-bound,bound,-bound,bound)
        if s is None: return None
        p1 = [-self.LINK_LENGTH_1 *
              np.cos(s[0]), self.LINK_LENGTH_1 * np.sin(s[0])]
        p2 = [p1[0] - self.LINK_LENGTH_2 * np.cos(s[0] + s[1]),
              p1[1] + self.LINK_LENGTH_2 * np.sin(s[0] + s[1])]
        xys = np.array([[0,0], p1, p2])[:,::-1]
        thetas = [s[0]-np.pi/2, s[0]+s[1]-np.pi/2]
        link_lengths = [self.LINK_LENGTH_1, self.LINK_LENGTH_2]
        self.viewer.draw_line((-2.2, 1), (2.2, 1))
        for ((x,y),th,llen) in zip(xys, thetas, link_lengths):
            l,r,t,b = 0, llen, .1, -.1
            jtransform = rendering.Transform(rotation=th, translation=(x,y))
            link = self.viewer.draw_polygon([(l,b), (l,t), (r,t), (r,b)])
            link.add_attr(jtransform)
            link.set_color(0,.8, .8)
            circ = self.viewer.draw_circle(.1)
            circ.set_color(.8, .8, 0)
            circ.add_attr(jtransform)
        return self.viewer.render(return_rgb_array = mode=='rgb_array')
    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None
 def wrap(x, m, M):
    """
    :param x: a scalar
    :param m: minimum possible value in range
    :param M: maximum possible value in range
    Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
    truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
    For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
    """
    diff = M - m
    while x > M:
        x = x - diff
    while x < m:
        x = x + diff
    return x
 def bound(x, m, M=None):
    """
    :param x: scalar
    Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
    have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
    """
    if M is None:
        M = m[1]
        m = m[0]
    # bound x between min (m) and Max (M)
    return min(max(x, m), M)
 def rk4(derivs, y0, t, *args, **kwargs):
    """
    Integrate 1D or ND system of ODEs using 4-th order Runge-Kutta.
    This is a toy implementation which may be useful if you find
    yourself stranded on a system w/o scipy.  Otherwise use
    :func:`scipy.integrate`.
    *y0*
        initial state vector
    *t*
        sample times
    *derivs*
        returns the derivative of the system and has the
        signature ``dy = derivs(yi, ti)``
    *args*
        additional arguments passed to the derivative function
    *kwargs*
        additional keyword arguments passed to the derivative function
    Example 1 ::
        ## 2D system
        def derivs6(x,t):
            d1 =  x[0] + 2*x[1]
            d2 =  -3*x[0] + 4*x[1]
            return (d1, d2)
        dt = 0.0005
        t = arange(0.0, 2.0, dt)
        y0 = (1,2)
        yout = rk4(derivs6, y0, t)
    Example 2::
        ## 1D system
        alpha = 2
        def derivs(x,t):
            return -alpha*x + exp(-t)
        y0 = 1
        yout = rk4(derivs, y0, t)
    If you have access to scipy, you should probably be using the
    scipy.integrate tools rather than this function.
    """
    try:
        Ny = len(y0)
    except TypeError:
        yout = np.zeros((len(t),), np.float_)
    else:
        yout = np.zeros((len(t), Ny), np.float_)
    yout[0] = y0
    for i in np.arange(len(t) - 1):
        thist = t[i]
        dt = t[i + 1] - thist
        dt2 = dt / 2.0
        y0 = yout[i]
        k1 = np.asarray(derivs(y0, thist, *args, **kwargs))
        k2 = np.asarray(derivs(y0 + dt2 * k1, thist + dt2, *args, **kwargs))
        k3 = np.asarray(derivs(y0 + dt2 * k2, thist + dt2, *args, **kwargs))
        k4 = np.asarray(derivs(y0 + dt * k3, thist + dt, *args, **kwargs))
        yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4)
    return yout
--- a/src/gym/envs/classic_control/assets/clockwise.png
+++ b/src/gym/envs/classic_control/assets/clockwise.png
--- a/src/gym/envs/classic_control/cartpole.py
+++ b/src/gym/envs/classic_control/cartpole.py
@@ -0,0 +1,193 @@
 """
 Classic cart-pole system implemented by Rich Sutton et al.
 Copied from http://incompleteideas.net/sutton/book/code/pole.c
 permalink: https://perma.cc/C9ZM-652R
 """
 import math
 import gym
 from gym import spaces, logger
 from gym.utils import seeding
 import numpy as np
 class CartPoleEnv(gym.Env):
    """
    Description:
        A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
    Source:
        This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson
    Observation: 
        Type: Box(4)
        Num	Observation                 Min         Max
        0	Cart Position             -4.8            4.8
        1	Cart Velocity             -Inf            Inf
        2	Pole Angle                 -24°           24°
        3	Pole Velocity At Tip      -Inf            Inf
    Actions:
        Type: Discrete(2)
        Num	Action
        0	Push cart to the left
        1	Push cart to the right
        Note: The amount the velocity is reduced or increased is not fixed as it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it
    Reward:
        Reward is 1 for every step taken, including the termination step
    Starting State:
        All observations are assigned a uniform random value between ±0.05
    Episode Termination:
        Pole Angle is more than ±12°
        Cart Position is more than ±2.4 (center of the cart reaches the edge of the display)
        Episode length is greater than 200
        Solved Requirements
        Considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.
    """
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : 50
    }
    def __init__(self):
        self.gravity = 9.8
        self.masscart = 1.0
        self.masspole = 0.1
        self.total_mass = (self.masspole + self.masscart)
        self.length = 0.5 # actually half the pole's length
        self.polemass_length = (self.masspole * self.length)
        self.force_mag = 10.0
        self.tau = 0.02  # seconds between state updates
        self.kinematics_integrator = 'euler'
        # Angle at which to fail the episode
        self.theta_threshold_radians = 12 * 2 * math.pi / 360
        self.x_threshold = 2.4
        # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
        high = np.array([
            self.x_threshold * 2,
            np.finfo(np.float32).max,
            self.theta_threshold_radians * 2,
            np.finfo(np.float32).max])
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
        self.seed()
        self.viewer = None
        self.state = None
        self.steps_beyond_done = None
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        state = self.state
        x, x_dot, theta, theta_dot = state
        force = self.force_mag if action==1 else -self.force_mag
        costheta = math.cos(theta)
        sintheta = math.sin(theta)
        temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
        xacc  = temp - self.polemass_length * thetaacc * costheta / self.total_mass
        if self.kinematics_integrator == 'euler':
            x  = x + self.tau * x_dot
            x_dot = x_dot + self.tau * xacc
            theta = theta + self.tau * theta_dot
            theta_dot = theta_dot + self.tau * thetaacc
        else: # semi-implicit euler
            x_dot = x_dot + self.tau * xacc
            x  = x + self.tau * x_dot
            theta_dot = theta_dot + self.tau * thetaacc
            theta = theta + self.tau * theta_dot
        self.state = (x,x_dot,theta,theta_dot)
        done =  x < -self.x_threshold \
                or x > self.x_threshold \
                or theta < -self.theta_threshold_radians \
                or theta > self.theta_threshold_radians
        done = bool(done)
        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0
        return np.array(self.state), reward, done, {}
    def reset(self):
        self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
        self.steps_beyond_done = None
        return np.array(self.state)
    def render(self, mode='human'):
        screen_width = 600
        screen_height = 400
        world_width = self.x_threshold*2
        scale = screen_width/world_width
        carty = 100 # TOP OF CART
        polewidth = 10.0
        polelen = scale * (2 * self.length)
        cartwidth = 50.0
        cartheight = 30.0
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(screen_width, screen_height)
            l,r,t,b = -cartwidth/2, cartwidth/2, cartheight/2, -cartheight/2
            axleoffset =cartheight/4.0
            cart = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
            self.carttrans = rendering.Transform()
            cart.add_attr(self.carttrans)
            self.viewer.add_geom(cart)
            l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
            pole = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
            pole.set_color(.8,.6,.4)
            self.poletrans = rendering.Transform(translation=(0, axleoffset))
            pole.add_attr(self.poletrans)
            pole.add_attr(self.carttrans)
            self.viewer.add_geom(pole)
            self.axle = rendering.make_circle(polewidth/2)
            self.axle.add_attr(self.poletrans)
            self.axle.add_attr(self.carttrans)
            self.axle.set_color(.5,.5,.8)
            self.viewer.add_geom(self.axle)
            self.track = rendering.Line((0,carty), (screen_width,carty))
            self.track.set_color(0,0,0)
            self.viewer.add_geom(self.track)
            self._pole_geom = pole
        if self.state is None: return None
        # Edit the pole polygon vertex
        pole = self._pole_geom
        l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
        pole.v = [(l,b), (l,t), (r,t), (r,b)]
        x = self.state
        cartx = x[0]*scale+screen_width/2.0 # MIDDLE OF CART
        self.carttrans.set_translation(cartx, carty)
        self.poletrans.set_rotation(-x[2])
        return self.viewer.render(return_rgb_array = mode=='rgb_array')
    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None
--- a/src/gym/envs/classic_control/continuous_mountain_car.py
+++ b/src/gym/envs/classic_control/continuous_mountain_car.py
@@ -0,0 +1,144 @@
 # -*- coding: utf-8 -*-
 """
@author: Olivier Sigaud
 A merge between two sources:
 * Adaptation of the MountainCar Environment from the "FAReinforcement" library
 of Jose Antonio Martin H. (version 1.0), adapted by  'Tom Schaul, tom@idsia.ch'
 and then modified by Arnaud de Broissia
 * the OpenAI/gym MountainCar environment
 itself from 
 http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
 permalink: https://perma.cc/6Z2N-PFWC
 """
 import math
 import gym
 from gym import spaces
 from gym.utils import seeding
 import numpy as np
 class Continuous_MountainCarEnv(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30
    }
    def __init__(self):
        self.min_action = -1.0
        self.max_action = 1.0
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
        self.power = 0.0015
        self.low_state = np.array([self.min_position, -self.max_speed])
        self.high_state = np.array([self.max_position, self.max_speed])
        self.viewer = None
        self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,))
        self.observation_space = spaces.Box(low=self.low_state, high=self.high_state)
        self.seed()
        self.reset()
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def step(self, action):
        position = self.state[0]
        velocity = self.state[1]
        force = min(max(action[0], -1.0), 1.0)
        velocity += force*self.power -0.0025 * math.cos(3*position)
        if (velocity > self.max_speed): velocity = self.max_speed
        if (velocity < -self.max_speed): velocity = -self.max_speed
        position += velocity
        if (position > self.max_position): position = self.max_position
        if (position < self.min_position): position = self.min_position
        if (position==self.min_position and velocity<0): velocity = 0
        done = bool(position >= self.goal_position)
        reward = 0
        if done:
            reward = 100.0
        reward-= math.pow(action[0],2)*0.1
        self.state = np.array([position, velocity])
        return self.state, reward, done, {}
    def reset(self):
        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
        return np.array(self.state)
 #    def get_state(self):
 #        return self.state
    def _height(self, xs):
        return np.sin(3 * xs)*.45+.55
    def render(self, mode='human'):
        screen_width = 600
        screen_height = 400
        world_width = self.max_position - self.min_position
        scale = screen_width/world_width
        carwidth=40
        carheight=20
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(screen_width, screen_height)
            xs = np.linspace(self.min_position, self.max_position, 100)
            ys = self._height(xs)
            xys = list(zip((xs-self.min_position)*scale, ys*scale))
            self.track = rendering.make_polyline(xys)
            self.track.set_linewidth(4)
            self.viewer.add_geom(self.track)
            clearance = 10
            l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
            car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
            car.add_attr(rendering.Transform(translation=(0, clearance)))
            self.cartrans = rendering.Transform()
            car.add_attr(self.cartrans)
            self.viewer.add_geom(car)
            frontwheel = rendering.make_circle(carheight/2.5)
            frontwheel.set_color(.5, .5, .5)
            frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
            frontwheel.add_attr(self.cartrans)
            self.viewer.add_geom(frontwheel)
            backwheel = rendering.make_circle(carheight/2.5)
            backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
            backwheel.add_attr(self.cartrans)
            backwheel.set_color(.5, .5, .5)
            self.viewer.add_geom(backwheel)
            flagx = (self.goal_position-self.min_position)*scale
            flagy1 = self._height(self.goal_position)*scale
            flagy2 = flagy1 + 50
            flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
            self.viewer.add_geom(flagpole)
            flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
            flag.set_color(.8,.8,0)
            self.viewer.add_geom(flag)
        pos = self.state[0]
        self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
        self.cartrans.set_rotation(math.cos(3 * pos))
        return self.viewer.render(return_rgb_array = mode=='rgb_array')
    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None
--- a/src/gym/envs/classic_control/mountain_car.py
+++ b/src/gym/envs/classic_control/mountain_car.py
@@ -0,0 +1,119 @@
 """
 http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
 permalink: https://perma.cc/6Z2N-PFWC
 """
 import math
 import gym
 from gym import spaces
 from gym.utils import seeding
 import numpy as np
 class MountainCarEnv(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30
    }
    def __init__(self):
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.5
        self.low = np.array([self.min_position, -self.max_speed])
        self.high = np.array([self.max_position, self.max_speed])
        self.viewer = None
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(self.low, self.high)
        self.seed()
        self.reset()
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
        position, velocity = self.state
        velocity += (action-1)*0.001 + math.cos(3*position)*(-0.0025)
        velocity = np.clip(velocity, -self.max_speed, self.max_speed)
        position += velocity
        position = np.clip(position, self.min_position, self.max_position)
        if (position==self.min_position and velocity<0): velocity = 0
        done = bool(position >= self.goal_position)
        reward = -1.0
        self.state = (position, velocity)
        return np.array(self.state), reward, done, {}
    def reset(self):
        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
        return np.array(self.state)
    def _height(self, xs):
        return np.sin(3 * xs)*.45+.55
    def render(self, mode='human'):
        screen_width = 600
        screen_height = 400
        world_width = self.max_position - self.min_position
        scale = screen_width/world_width
        carwidth=40
        carheight=20
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(screen_width, screen_height)
            xs = np.linspace(self.min_position, self.max_position, 100)
            ys = self._height(xs)
            xys = list(zip((xs-self.min_position)*scale, ys*scale))
            self.track = rendering.make_polyline(xys)
            self.track.set_linewidth(4)
            self.viewer.add_geom(self.track)
            clearance = 10
            l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
            car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
            car.add_attr(rendering.Transform(translation=(0, clearance)))
            self.cartrans = rendering.Transform()
            car.add_attr(self.cartrans)
            self.viewer.add_geom(car)
            frontwheel = rendering.make_circle(carheight/2.5)
            frontwheel.set_color(.5, .5, .5)
            frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
            frontwheel.add_attr(self.cartrans)
            self.viewer.add_geom(frontwheel)
            backwheel = rendering.make_circle(carheight/2.5)
            backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
            backwheel.add_attr(self.cartrans)
            backwheel.set_color(.5, .5, .5)
            self.viewer.add_geom(backwheel)
            flagx = (self.goal_position-self.min_position)*scale
            flagy1 = self._height(self.goal_position)*scale
            flagy2 = flagy1 + 50
            flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
            self.viewer.add_geom(flagpole)
            flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
            flag.set_color(.8,.8,0)
            self.viewer.add_geom(flag)
        pos = self.state[0]
        self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
        self.cartrans.set_rotation(math.cos(3 * pos))
        return self.viewer.render(return_rgb_array = mode=='rgb_array')
    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None
--- a/src/gym/envs/classic_control/pendulum.py
+++ b/src/gym/envs/classic_control/pendulum.py
@@ -0,0 +1,90 @@
 import gym
 from gym import spaces
 from gym.utils import seeding
 import numpy as np
 from os import path
 class PendulumEnv(gym.Env):
    metadata = {
        'render.modes' : ['human', 'rgb_array'],
        'video.frames_per_second' : 30
    }
    def __init__(self):
        self.max_speed=8
        self.max_torque=2.
        self.dt=.05
        self.viewer = None
        high = np.array([1., 1., self.max_speed])
        self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32)
        self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
        self.seed()
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def step(self,u):
        th, thdot = self.state # th := theta
        g = 10.
        m = 1.
        l = 1.
        dt = self.dt
        u = np.clip(u, -self.max_torque, self.max_torque)[0]
        self.last_u = u # for rendering
        costs = angle_normalize(th)**2 + .1*thdot**2 + .001*(u**2)
        newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*u) * dt
        newth = th + newthdot*dt
        newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) #pylint: disable=E1111
        self.state = np.array([newth, newthdot])
        return self._get_obs(), -costs, False, {}
    def reset(self):
        high = np.array([np.pi, 1])
        self.state = self.np_random.uniform(low=-high, high=high)
        self.last_u = None
        return self._get_obs()
    def _get_obs(self):
        theta, thetadot = self.state
        return np.array([np.cos(theta), np.sin(theta), thetadot])
    def render(self, mode='human'):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(500,500)
            self.viewer.set_bounds(-2.2,2.2,-2.2,2.2)
            rod = rendering.make_capsule(1, .2)
            rod.set_color(.8, .3, .3)
            self.pole_transform = rendering.Transform()
            rod.add_attr(self.pole_transform)
            self.viewer.add_geom(rod)
            axle = rendering.make_circle(.05)
            axle.set_color(0,0,0)
            self.viewer.add_geom(axle)
            fname = path.join(path.dirname(__file__), "assets/clockwise.png")
            self.img = rendering.Image(fname, 1., 1.)
            self.imgtrans = rendering.Transform()
            self.img.add_attr(self.imgtrans)
        self.viewer.add_onetime(self.img)
        self.pole_transform.set_rotation(self.state[0] + np.pi/2)
        if self.last_u:
            self.imgtrans.scale = (-self.last_u/2, np.abs(self.last_u)/2)
        return self.viewer.render(return_rgb_array = mode=='rgb_array')
    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None
 def angle_normalize(x):
    return (((x+np.pi) % (2*np.pi)) - np.pi)
--- a/src/gym/envs/classic_control/rendering.py
+++ b/src/gym/envs/classic_control/rendering.py
@@ -0,0 +1,359 @@
 """
 2D rendering framework
 """
 from __future__ import division
 import os
 import six
 import sys
 if "Apple" in sys.version:
    if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
        os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
        # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
 from gym.utils import reraise
 from gym import error
 try:
    import pyglet
 except ImportError as e:
    reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
 try:
    from pyglet.gl import *
 except ImportError as e:
    reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
 import math
 import numpy as np
 RAD2DEG = 57.29577951308232
 def get_display(spec):
    """Convert a display specification (such as :0) into an actual Display
    object.
    Pyglet only supports multiple Displays on Linux.
    """
    if spec is None:
        return None
    elif isinstance(spec, six.string_types):
        return pyglet.canvas.Display(spec)
    else:
        raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
 class Viewer(object):
    def __init__(self, width, height, display=None):
        display = get_display(display)
        self.width = width
        self.height = height
        self.window = pyglet.window.Window(width=width, height=height, display=display)
        self.window.on_close = self.window_closed_by_user
        self.isopen = True
        self.geoms = []
        self.onetime_geoms = []
        self.transform = Transform()
        glEnable(GL_BLEND)
        glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
    def close(self):
        self.window.close()
    def window_closed_by_user(self):
        self.isopen = False
    def set_bounds(self, left, right, bottom, top):
        assert right > left and top > bottom
        scalex = self.width/(right-left)
        scaley = self.height/(top-bottom)
        self.transform = Transform(
            translation=(-left*scalex, -bottom*scaley),
            scale=(scalex, scaley))
    def add_geom(self, geom):
        self.geoms.append(geom)
    def add_onetime(self, geom):
        self.onetime_geoms.append(geom)
    def render(self, return_rgb_array=False):
        glClearColor(1,1,1,1)
        self.window.clear()
        self.window.switch_to()
        self.window.dispatch_events()
        self.transform.enable()
        for geom in self.geoms:
            geom.render()
        for geom in self.onetime_geoms:
            geom.render()
        self.transform.disable()
        arr = None
        if return_rgb_array:
            buffer = pyglet.image.get_buffer_manager().get_color_buffer()
            image_data = buffer.get_image_data()
            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
            # In https://github.com/openai/gym-http-api/issues/2, we
            # discovered that someone using Xmonad on Arch was having
            # a window of size 598 x 398, though a 600 x 400 window
            # was requested. (Guess Xmonad was preserving a pixel for
            # the boundary.) So we use the buffer height/width rather
            # than the requested one.
            arr = arr.reshape(buffer.height, buffer.width, 4)
            arr = arr[::-1,:,0:3]
        self.window.flip()
        self.onetime_geoms = []
        return arr if return_rgb_array else self.isopen
    # Convenience
    def draw_circle(self, radius=10, res=30, filled=True, **attrs):
        geom = make_circle(radius=radius, res=res, filled=filled)
        _add_attrs(geom, attrs)
        self.add_onetime(geom)
        return geom
    def draw_polygon(self, v, filled=True, **attrs):
        geom = make_polygon(v=v, filled=filled)
        _add_attrs(geom, attrs)
        self.add_onetime(geom)
        return geom
    def draw_polyline(self, v, **attrs):
        geom = make_polyline(v=v)
        _add_attrs(geom, attrs)
        self.add_onetime(geom)
        return geom
    def draw_line(self, start, end, **attrs):
        geom = Line(start, end)
        _add_attrs(geom, attrs)
        self.add_onetime(geom)
        return geom
    def get_array(self):
        self.window.flip()
        image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
        self.window.flip()
        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
        arr = arr.reshape(self.height, self.width, 4)
        return arr[::-1,:,0:3]
    def __del__(self):
        self.close()
 def _add_attrs(geom, attrs):
    if "color" in attrs:
        geom.set_color(*attrs["color"])
    if "linewidth" in attrs:
        geom.set_linewidth(attrs["linewidth"])
 class Geom(object):
    def __init__(self):
        self._color=Color((0, 0, 0, 1.0))
        self.attrs = [self._color]
    def render(self):
        for attr in reversed(self.attrs):
            attr.enable()
        self.render1()
        for attr in self.attrs:
            attr.disable()
    def render1(self):
        raise NotImplementedError
    def add_attr(self, attr):
        self.attrs.append(attr)
    def set_color(self, r, g, b):
        self._color.vec4 = (r, g, b, 1)
 class Attr(object):
    def enable(self):
        raise NotImplementedError
    def disable(self):
        pass
 class Transform(Attr):
    def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
        self.set_translation(*translation)
        self.set_rotation(rotation)
        self.set_scale(*scale)
    def enable(self):
        glPushMatrix()
        glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
        glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
        glScalef(self.scale[0], self.scale[1], 1)
    def disable(self):
        glPopMatrix()
    def set_translation(self, newx, newy):
        self.translation = (float(newx), float(newy))
    def set_rotation(self, new):
        self.rotation = float(new)
    def set_scale(self, newx, newy):
        self.scale = (float(newx), float(newy))
 class Color(Attr):
    def __init__(self, vec4):
        self.vec4 = vec4
    def enable(self):
        glColor4f(*self.vec4)
 class LineStyle(Attr):
    def __init__(self, style):
        self.style = style
    def enable(self):
        glEnable(GL_LINE_STIPPLE)
        glLineStipple(1, self.style)
    def disable(self):
        glDisable(GL_LINE_STIPPLE)
 class LineWidth(Attr):
    def __init__(self, stroke):
        self.stroke = stroke
    def enable(self):
        glLineWidth(self.stroke)
 class Point(Geom):
    def __init__(self):
        Geom.__init__(self)
    def render1(self):
        glBegin(GL_POINTS) # draw point
        glVertex3f(0.0, 0.0, 0.0)
        glEnd()
 class FilledPolygon(Geom):
    def __init__(self, v):
        Geom.__init__(self)
        self.v = v
    def render1(self):
        if   len(self.v) == 4 : glBegin(GL_QUADS)
        elif len(self.v)  > 4 : glBegin(GL_POLYGON)
        else: glBegin(GL_TRIANGLES)
        for p in self.v:
            glVertex3f(p[0], p[1],0)  # draw each vertex
        glEnd()
 def make_circle(radius=10, res=30, filled=True):
    points = []
    for i in range(res):
        ang = 2*math.pi*i / res
        points.append((math.cos(ang)*radius, math.sin(ang)*radius))
    if filled:
        return FilledPolygon(points)
    else:
        return PolyLine(points, True)
 def make_polygon(v, filled=True):
    if filled: return FilledPolygon(v)
    else: return PolyLine(v, True)
 def make_polyline(v):
    return PolyLine(v, False)
 def make_capsule(length, width):
    l, r, t, b = 0, length, width/2, -width/2
    box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
    circ0 = make_circle(width/2)
    circ1 = make_circle(width/2)
    circ1.add_attr(Transform(translation=(length, 0)))
    geom = Compound([box, circ0, circ1])
    return geom
 class Compound(Geom):
    def __init__(self, gs):
        Geom.__init__(self)
        self.gs = gs
        for g in self.gs:
            g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
    def render1(self):
        for g in self.gs:
            g.render()
 class PolyLine(Geom):
    def __init__(self, v, close):
        Geom.__init__(self)
        self.v = v
        self.close = close
        self.linewidth = LineWidth(1)
        self.add_attr(self.linewidth)
    def render1(self):
        glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
        for p in self.v:
            glVertex3f(p[0], p[1],0)  # draw each vertex
        glEnd()
    def set_linewidth(self, x):
        self.linewidth.stroke = x
 class Line(Geom):
    def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
        Geom.__init__(self)
        self.start = start
        self.end = end
        self.linewidth = LineWidth(1)
        self.add_attr(self.linewidth)
    def render1(self):
        glBegin(GL_LINES)
        glVertex2f(*self.start)
        glVertex2f(*self.end)
        glEnd()
 class Image(Geom):
    def __init__(self, fname, width, height):
        Geom.__init__(self)
        self.width = width
        self.height = height
        img = pyglet.image.load(fname)
        self.img = img
        self.flip = False
    def render1(self):
        self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
 # ================================================================
 class SimpleImageViewer(object):
    def __init__(self, display=None, maxwidth=500):
        self.window = None
        self.isopen = False
        self.display = display
        self.maxwidth = maxwidth
    def imshow(self, arr):
        if self.window is None:
            height, width, _channels = arr.shape
            if width > self.maxwidth:
                scale = self.maxwidth / width
                width = int(scale * width)
                height = int(scale * height)
            self.window = pyglet.window.Window(width=width, height=height, 
                display=self.display, vsync=False, resizable=True)            
            self.width = width
            self.height = height
            self.isopen = True
            @self.window.event
            def on_resize(width, height):
                self.width = width
                self.height = height
            @self.window.event
            def on_close():
                self.isopen = False
        assert len(arr.shape) == 3, "You passed in an image with the wrong number shape"
        image = pyglet.image.ImageData(arr.shape[1], arr.shape[0], 
            'RGB', arr.tobytes(), pitch=arr.shape[1]*-3)
        gl.glTexParameteri(gl.GL_TEXTURE_2D, 
            gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
        texture = image.get_texture()
        texture.width = self.width
        texture.height = self.height
        self.window.clear()
        self.window.switch_to()
        self.window.dispatch_events()
        texture.blit(0, 0) # draw
        self.window.flip()
    def close(self):
        if self.isopen:
            self.window.close()
            self.isopen = False
    def __del__(self):
        self.close()
--- a/src/gym/envs/mujoco/init.py
+++ b/src/gym/envs/mujoco/init.py
@@ -0,0 +1,16 @@
 from gym.envs.mujoco.mujoco_env import MujocoEnv
 # ^^^^^ so that user gets the correct error
 # message if mujoco is not installed correctly
 from gym.envs.mujoco.ant import AntEnv
 from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
 from gym.envs.mujoco.hopper import HopperEnv
 from gym.envs.mujoco.walker2d import Walker2dEnv
 from gym.envs.mujoco.humanoid import HumanoidEnv
 from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
 from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
 from gym.envs.mujoco.reacher import ReacherEnv
 from gym.envs.mujoco.swimmer import SwimmerEnv
 from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
 from gym.envs.mujoco.pusher import PusherEnv
 from gym.envs.mujoco.thrower import ThrowerEnv
 from gym.envs.mujoco.striker import StrikerEnv
--- a/src/gym/envs/mujoco/ant.py
+++ b/src/gym/envs/mujoco/ant.py
@@ -0,0 +1,45 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
        utils.EzPickle.__init__(self)
    def step(self, a):
        xposbefore = self.get_body_com("torso")[0]
        self.do_simulation(a, self.frame_skip)
        xposafter = self.get_body_com("torso")[0]
        forward_reward = (xposafter - xposbefore)/self.dt
        ctrl_cost = .5 * np.square(a).sum()
        contact_cost = 0.5 * 1e-3 * np.sum(
            np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
        survive_reward = 1.0
        reward = forward_reward - ctrl_cost - contact_cost + survive_reward
        state = self.state_vector()
        notdone = np.isfinite(state).all() \
            and state[2] >= 0.2 and state[2] <= 1.0
        done = not notdone
        ob = self._get_obs()
        return ob, reward, done, dict(
            reward_forward=forward_reward,
            reward_ctrl=-ctrl_cost,
            reward_contact=-contact_cost,
            reward_survive=survive_reward)
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos.flat[2:],
            self.sim.data.qvel.flat,
            np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
        ])
    def reset_model(self):
        qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
        self.set_state(qpos, qvel)
        return self._get_obs()
    def viewer_setup(self):
        self.viewer.cam.distance = self.model.stat.extent * 0.5
--- a/src/gym/envs/mujoco/assets/ant.xml
+++ b/src/gym/envs/mujoco/assets/ant.xml
@@ -0,0 +1,81 @@
 <mujoco model="ant">
  <compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
  <option integrator="RK4" timestep="0.01"/>
  <custom>
    <numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
  </custom>
  <default>
    <joint armature="1" damping="1" limited="true"/>
    <geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
  </default>
  <asset>
    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
    <material name="geom" texture="texgeom" texuniform="true"/>
  </asset>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
    <body name="torso" pos="0 0 0.75">
      <camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
      <geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
      <joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
      <body name="front_left_leg" pos="0 0 0">
        <geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
        <body name="aux_1" pos="0.2 0.2 0">
          <joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
          <body pos="0.2 0.2 0">
            <joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
            <geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
          </body>
        </body>
      </body>
      <body name="front_right_leg" pos="0 0 0">
        <geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
        <body name="aux_2" pos="-0.2 0.2 0">
          <joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
          <body pos="-0.2 0.2 0">
            <joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
            <geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
          </body>
        </body>
      </body>
      <body name="back_leg" pos="0 0 0">
        <geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
        <body name="aux_3" pos="-0.2 -0.2 0">
          <joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
          <body pos="-0.2 -0.2 0">
            <joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
            <geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
          </body>
        </body>
      </body>
      <body name="right_back_leg" pos="0 0 0">
        <geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
        <body name="aux_4" pos="0.2 -0.2 0">
          <joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
          <body pos="0.2 -0.2 0">
            <joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
            <geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
  </actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/half_cheetah.xml
+++ b/src/gym/envs/mujoco/assets/half_cheetah.xml
@@ -0,0 +1,96 @@
 <!-- Cheetah Model
    The state space is populated with joints in the order that they are
    defined in this file. The actuators also operate on joints.
    State-Space (name/joint/parameter):
        - rootx     slider      position (m)
        - rootz     slider      position (m)
        - rooty     hinge       angle (rad)
        - bthigh    hinge       angle (rad)
        - bshin     hinge       angle (rad)
        - bfoot     hinge       angle (rad)
        - fthigh    hinge       angle (rad)
        - fshin     hinge       angle (rad)
        - ffoot     hinge       angle (rad)
        - rootx     slider      velocity (m/s)
        - rootz     slider      velocity (m/s)
        - rooty     hinge       angular velocity (rad/s)
        - bthigh    hinge       angular velocity (rad/s)
        - bshin     hinge       angular velocity (rad/s)
        - bfoot     hinge       angular velocity (rad/s)
        - fthigh    hinge       angular velocity (rad/s)
        - fshin     hinge       angular velocity (rad/s)
        - ffoot     hinge       angular velocity (rad/s)
    Actuators (name/actuator/parameter):
        - bthigh    hinge       torque (N m)
        - bshin     hinge       torque (N m)
        - bfoot     hinge       torque (N m)
        - fthigh    hinge       torque (N m)
        - fshin     hinge       torque (N m)
        - ffoot     hinge       torque (N m)
 -->
 <mujoco model="cheetah">
  <compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
  <default>
    <joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
    <geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
    <motor ctrllimited="true" ctrlrange="-1 1"/>
  </default>
  <size nstack="300000" nuser_geom="1"/>
  <option gravity="0 0 -9.81" timestep="0.01"/>
  <asset>
    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
    <material name="geom" texture="texgeom" texuniform="true"/>
  </asset>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
    <body name="torso" pos="0 0 .7">
      <camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
      <geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
      <geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
      <!-- <site name='tip'  pos='.15 0 .11'/>-->
      <body name="bthigh" pos="-.5 0 0">
        <joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
        <geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
        <body name="bshin" pos=".16 0 -.25">
          <joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
          <geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
          <body name="bfoot" pos="-.28 0 -.14">
            <joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
            <geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
          </body>
        </body>
      </body>
      <body name="fthigh" pos=".5 0 0">
        <joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
        <geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
        <body name="fshin" pos="-.14 0 -.24">
          <joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
          <geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
          <body name="ffoot" pos=".13 0 -.18">
            <joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
            <geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <motor gear="120" joint="bthigh" name="bthigh"/>
    <motor gear="90" joint="bshin" name="bshin"/>
    <motor gear="60" joint="bfoot" name="bfoot"/>
    <motor gear="120" joint="fthigh" name="fthigh"/>
    <motor gear="60" joint="fshin" name="fshin"/>
    <motor gear="30" joint="ffoot" name="ffoot"/>
  </actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/hopper.xml
+++ b/src/gym/envs/mujoco/assets/hopper.xml
@@ -0,0 +1,48 @@
 <mujoco model="hopper">
  <compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
  <default>
    <joint armature="1" damping="1" limited="true"/>
    <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
    <motor ctrllimited="true" ctrlrange="-.4 .4"/>
  </default>
  <option integrator="RK4" timestep="0.002"/>
  <visual>
    <map znear="0.02"/>
  </visual>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
    <body name="torso" pos="0 0 1.25">
      <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
      <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
      <body name="thigh" pos="0 0 1.05">
        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
        <body name="leg" pos="0 0 0.35">
          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
          <body name="foot" pos="0.13/2 0 0.1">
            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
            <geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
  </actuator>
    <asset>
        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
            width="100" height="100"/>
        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
        <material name="geom" texture="texgeom" texuniform="true"/>
    </asset>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/humanoid.xml
+++ b/src/gym/envs/mujoco/assets/humanoid.xml
@@ -0,0 +1,121 @@
 <mujoco model="humanoid">
    <compiler angle="degree" inertiafromgeom="true"/>
    <default>
        <joint armature="1" damping="1" limited="true"/>
        <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
        <motor ctrllimited="true" ctrlrange="-.4 .4"/>
    </default>
    <option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
        <!-- <flags solverstat="enable" energy="enable"/>-->
    </option>
    <size nkey="5" nuser_geom="1"/>
    <visual>
        <map fogend="5" fogstart="3"/>
    </visual>
    <asset>
        <texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
        <!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
        <material name="geom" texture="texgeom" texuniform="true"/>
    </asset>
    <worldbody>
        <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
        <geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
        <!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
        <body name="torso" pos="0 0 1.4">
            <camera name="track" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1"/>
            <joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
            <geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
            <geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258"/>
            <geom fromto="-.01 -.06 -.12 -.01 .06 -.12" name="uwaist" size="0.06" type="capsule"/>
            <body name="lwaist" pos="-.01 0 -0.260" quat="1.000 0 -0.002 0">
                <geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
                <joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
                <joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
                <body name="pelvis" pos="0 0 -0.165" quat="1.000 0 -0.002 0">
                    <joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
                    <geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
                    <body name="right_thigh" pos="0 -0.1 -0.04">
                        <joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
                        <joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
                        <joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
                        <geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule"/>
                        <body name="right_shin" pos="0 0.01 -0.403">
                            <joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
                            <geom fromto="0 0 0 0 0 -.3" name="right_shin1" size="0.049" type="capsule"/>
                            <body name="right_foot" pos="0 0 -0.45">
                                <geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
                            </body>
                        </body>
                    </body>
                    <body name="left_thigh" pos="0 0.1 -0.04">
                        <joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
                        <joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
                        <joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
                        <geom fromto="0 0 0 0 -0.01 -.34" name="left_thigh1" size="0.06" type="capsule"/>
                        <body name="left_shin" pos="0 -0.01 -0.403">
                            <joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
                            <geom fromto="0 0 0 0 0 -.3" name="left_shin1" size="0.049" type="capsule"/>
                            <body name="left_foot" pos="0 0 -0.45">
                                <geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
                            </body>
                        </body>
                    </body>
                </body>
            </body>
            <body name="right_upper_arm" pos="0 -0.17 0.06">
                <joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
                <joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
                <geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
                <body name="right_lower_arm" pos=".18 -.18 -.18">
                    <joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
                    <geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
                    <geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
                    <camera pos="0 0 0"/>
                </body>
            </body>
            <body name="left_upper_arm" pos="0 0.17 0.06">
                <joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
                <joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
                <geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
                <body name="left_lower_arm" pos=".18 .18 -.18">
                    <joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
                    <geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
                    <geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
                </body>
            </body>
        </body>
    </worldbody>
    <tendon>
        <fixed name="left_hipknee">
            <joint coef="-1" joint="left_hip_y"/>
            <joint coef="1" joint="left_knee"/>
        </fixed>
        <fixed name="right_hipknee">
            <joint coef="-1" joint="right_hip_y"/>
            <joint coef="1" joint="right_knee"/>
        </fixed>
    </tendon>
    <actuator>
        <motor gear="100" joint="abdomen_y" name="abdomen_y"/>
        <motor gear="100" joint="abdomen_z" name="abdomen_z"/>
        <motor gear="100" joint="abdomen_x" name="abdomen_x"/>
        <motor gear="100" joint="right_hip_x" name="right_hip_x"/>
        <motor gear="100" joint="right_hip_z" name="right_hip_z"/>
        <motor gear="300" joint="right_hip_y" name="right_hip_y"/>
        <motor gear="200" joint="right_knee" name="right_knee"/>
        <motor gear="100" joint="left_hip_x" name="left_hip_x"/>
        <motor gear="100" joint="left_hip_z" name="left_hip_z"/>
        <motor gear="300" joint="left_hip_y" name="left_hip_y"/>
        <motor gear="200" joint="left_knee" name="left_knee"/>
        <motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
        <motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
        <motor gear="25" joint="right_elbow" name="right_elbow"/>
        <motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
        <motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
        <motor gear="25" joint="left_elbow" name="left_elbow"/>
    </actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/humanoidstandup.xml
+++ b/src/gym/envs/mujoco/assets/humanoidstandup.xml
@@ -0,0 +1,121 @@
 <mujoco model="humanoidstandup">
    <compiler angle="degree" inertiafromgeom="true"/>
    <default>
        <joint armature="1" damping="1" limited="true"/>
        <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
        <motor ctrllimited="true" ctrlrange="-.4 .4"/>
    </default>
    <option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
        <!-- <flags solverstat="enable" energy="enable"/>-->
    </option>
    <size nkey="5" nuser_geom="1"/>
    <visual>
        <map fogend="5" fogstart="3"/>
    </visual>
    <asset>
        <texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
        <!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
        <material name="geom" texture="texgeom" texuniform="true"/>
    </asset>
    <worldbody>
        <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
        <geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
        <!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
        <body name="torso" pos="0 0 .105">
            <camera name="track" mode="trackcom" pos="0 -3 .5" xyaxes="1 0 0 0 0 1"/>
            <joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
            <geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
            <geom name="head" pos="-.15 0 0" size=".09" type="sphere" user="258"/>
            <geom fromto=".11 -.06 0 .11 .06 0" name="uwaist" size="0.06" type="capsule"/>
            <body name="lwaist" pos=".21 0 0" quat="1.000 0 -0.002 0">
                <geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
                <joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
                <joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
                <body name="pelvis" pos="0.165 0 0" quat="1.000 0 -0.002 0">
                    <joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
                    <geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
                    <body name="right_thigh" pos="0 -0.1 0">
                        <joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
                        <joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
                        <joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
                        <geom fromto="0 0 0 0.34 0.01 0" name="right_thigh1" size="0.06" type="capsule"/>
                        <body name="right_shin" pos="0.403 0.01 0">
                            <joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
                            <geom fromto="0 0 0 0.3 0 0" name="right_shin1" size="0.049" type="capsule"/>
                            <body name="right_foot" pos="0.35 0 -.10">
                                <geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
                            </body>
                        </body>
                    </body>
                    <body name="left_thigh" pos="0 0.1 0">
                        <joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
                        <joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
                        <joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
                        <geom fromto="0 0 0 0.34 -0.01 0" name="left_thigh1" size="0.06" type="capsule"/>
                        <body name="left_shin" pos="0.403 -0.01 0">
                            <joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
                            <geom fromto="0 0 0 0.3 0 0" name="left_shin1" size="0.049" type="capsule"/>
                            <body name="left_foot" pos="0.35 0 -.1">
                                <geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
                            </body>
                        </body>
                    </body>
                </body>
            </body>
            <body name="right_upper_arm" pos="0 -0.17 0.06">
                <joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
                <joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
                <geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
                <body name="right_lower_arm" pos=".18 -.18 -.18">
                    <joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
                    <geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
                    <geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
                    <camera pos="0 0 0"/>
                </body>
            </body>
            <body name="left_upper_arm" pos="0 0.17 0.06">
                <joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
                <joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
                <geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
                <body name="left_lower_arm" pos=".18 .18 -.18">
                    <joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
                    <geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
                    <geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
                </body>
            </body>
        </body>
    </worldbody>
    <tendon>
        <fixed name="left_hipknee">
            <joint coef="-1" joint="left_hip_y"/>
            <joint coef="1" joint="left_knee"/>
        </fixed>
        <fixed name="right_hipknee">
            <joint coef="-1" joint="right_hip_y"/>
            <joint coef="1" joint="right_knee"/>
        </fixed>
    </tendon>
    <actuator>
        <motor gear="100" joint="abdomen_y" name="abdomen_y"/>
        <motor gear="100" joint="abdomen_z" name="abdomen_z"/>
        <motor gear="100" joint="abdomen_x" name="abdomen_x"/>
        <motor gear="100" joint="right_hip_x" name="right_hip_x"/>
        <motor gear="100" joint="right_hip_z" name="right_hip_z"/>
        <motor gear="300" joint="right_hip_y" name="right_hip_y"/>
        <motor gear="200" joint="right_knee" name="right_knee"/>
        <motor gear="100" joint="left_hip_x" name="left_hip_x"/>
        <motor gear="100" joint="left_hip_z" name="left_hip_z"/>
        <motor gear="300" joint="left_hip_y" name="left_hip_y"/>
        <motor gear="200" joint="left_knee" name="left_knee"/>
        <motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
        <motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
        <motor gear="25" joint="right_elbow" name="right_elbow"/>
        <motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
        <motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
        <motor gear="25" joint="left_elbow" name="left_elbow"/>
    </actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/inverted_double_pendulum.xml
+++ b/src/gym/envs/mujoco/assets/inverted_double_pendulum.xml
@@ -0,0 +1,47 @@
 <!-- Cartpole Model
    The state space is populated with joints in the order that they are
    defined in this file. The actuators also operate on joints.
    State-Space (name/joint/parameter):
        - cart      slider      position (m)
        - pole      hinge       angle (rad)
        - cart      slider      velocity (m/s)
        - pole      hinge       angular velocity (rad/s)
    Actuators (name/actuator/parameter):
        - cart      motor       force x (N)
 -->
 <mujoco model="cartpole">
  <compiler coordinate="local" inertiafromgeom="true"/>
  <custom>
    <numeric data="2" name="frame_skip"/>
  </custom>
  <default>
    <joint damping="0.05"/>
    <geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
  </default>
  <option gravity="1e-5 0 -9.81" integrator="RK4" timestep="0.01"/>
  <size nstack="3000"/>
  <worldbody>
    <geom name="floor" pos="0 0 -3.0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
    <geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
    <body name="cart" pos="0 0 0">
      <joint axis="1 0 0" limited="true" margin="0.01" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
      <geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
      <body name="pole" pos="0 0 0">
        <joint axis="0 1 0" name="hinge" pos="0 0 0" type="hinge"/>
        <geom fromto="0 0 0 0 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
        <body name="pole2" pos="0 0 0.6">
          <joint axis="0 1 0" name="hinge2" pos="0 0 0" type="hinge"/>
          <geom fromto="0 0 0 0 0 0.6" name="cpole2" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
          <site name="tip" pos="0 0 .6" size="0.01 0.01"/>
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <motor ctrllimited="true" ctrlrange="-1 1" gear="500" joint="slider" name="slide"/>
  </actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/inverted_pendulum.xml
+++ b/src/gym/envs/mujoco/assets/inverted_pendulum.xml
@@ -0,0 +1,27 @@
 <mujoco model="inverted pendulum">
 	<compiler inertiafromgeom="true"/>
 	<default>
 		<joint armature="0" damping="1" limited="true"/>
 		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
 		<tendon/>
 		<motor ctrlrange="-3 3"/>
 	</default>
 	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.02"/>
 	<size nstack="3000"/>
 	<worldbody>
 		<!--geom name="ground" type="plane" pos="0 0 0" /-->
 		<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
 		<body name="cart" pos="0 0 0">
 			<joint axis="1 0 0" limited="true" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
 			<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
 			<body name="pole" pos="0 0 0">
 				<joint axis="0 1 0" name="hinge" pos="0 0 0" range="-90 90" type="hinge"/>
 				<geom fromto="0 0 0 0.001 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/>
 				<!--                 <body name="pole2" pos="0.001 0 0.6"><joint name="hinge2" type="hinge" pos="0 0 0" axis="0 1 0"/><geom name="cpole2" type="capsule" fromto="0 0 0 0 0 0.6" size="0.05 0.3" rgba="0.7 0 0.7 1"/><site name="tip2" pos="0 0 .6"/></body>-->
 			</body>
 		</body>
 	</worldbody>
 	<actuator>
 		<motor gear="100" joint="slider" name="slide"/>
 	</actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/point.xml
+++ b/src/gym/envs/mujoco/assets/point.xml
@@ -0,0 +1,31 @@
 <mujoco>
  <compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
  <option integrator="RK4" timestep="0.02"/>
  <default>
    <joint armature="0" damping="0" limited="false"/>
    <geom conaffinity="0" condim="3" density="100" friction="1 0.5 0.5" margin="0" rgba="0.8 0.6 0.4 1"/>
  </default>
  <asset>
    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
    <material name="geom" texture="texgeom" texuniform="true"/>
  </asset>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
    <body name="torso" pos="0 0 0">
      <geom name="pointbody" pos="0 0 0.5" size="0.5" type="sphere"/>
      <geom name="pointarrow" pos="0.6 0 0.5" size="0.5 0.1 0.1" type="box"/>
      <joint axis="1 0 0" name="ballx" pos="0 0 0" type="slide"/>
      <joint axis="0 1 0" name="bally" pos="0 0 0" type="slide"/>
      <joint axis="0 0 1" limited="false" name="rot" pos="0 0 0" type="hinge"/>
    </body>
  </worldbody>
  <actuator>
    <!-- Those are just dummy actuators for providing ranges -->
    <motor ctrllimited="true" ctrlrange="-1 1" joint="ballx"/>
    <motor ctrllimited="true" ctrlrange="-0.25 0.25" joint="rot"/>
  </actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/pusher.xml
+++ b/src/gym/envs/mujoco/assets/pusher.xml
@@ -0,0 +1,91 @@
 <mujoco model="arm3d">
  <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
  <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
  <default>
    <joint armature='0.04' damping="1" limited="true"/>
    <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
  </default>
  <worldbody>
    <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
    <geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
    <body name="r_shoulder_pan_link" pos="0 -0.6 0">
      <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
      <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
      <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
      <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
      <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
      <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
      <body name="r_shoulder_lift_link" pos="0.1 0 0">
        <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
        <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
        <body name="r_upper_arm_roll_link" pos="0 0 0">
          <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
          <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
          <body name="r_upper_arm_link" pos="0 0 0">
            <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
            <body name="r_elbow_flex_link" pos="0.4 0 0">
              <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
              <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
              <body name="r_forearm_roll_link" pos="0 0 0">
                <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
                <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
                <body name="r_forearm_link" pos="0 0 0">
                  <geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
                  <body name="r_wrist_flex_link" pos="0.321 0 0">
                    <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
                    <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
                    <body name="r_wrist_roll_link" pos="0 0 0">
                      <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
                      <body name="tips_arm" pos="0 0 0">
                        <geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
                        <geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
                      </body>
                      <geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
                      <geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
                      <geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
                    </body>
                  </body>
                </body>
              </body>
            </body>
          </body>
        </body>
      </body>
    </body>
    <!--<body name="object" pos="0.55 -0.3 -0.275" >-->
    <body name="object" pos="0.45 -0.05 -0.275" >
      <geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
      <geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
      <joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
      <joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
    </body>
    <body name="goal" pos="0.45 -0.05 -0.3230">
      <geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
      <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
      <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/> 
    </body>
  </worldbody>
  <actuator>
    <motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
    <motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
    <motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
    <motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
    <motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
    <motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
    <motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
  </actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/reacher.xml
+++ b/src/gym/envs/mujoco/assets/reacher.xml
@@ -0,0 +1,39 @@
 <mujoco model="reacher">
 	<compiler angle="radian" inertiafromgeom="true"/>
 	<default>
 		<joint armature="1" damping="1" limited="true"/>
 		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
 	</default>
 	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
 	<worldbody>
 		<!-- Arena -->
 		<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
 		<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
 		<geom conaffinity="0" fromto=" .3 -.3 .01 .3  .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
 		<geom conaffinity="0" fromto="-.3  .3 .01 .3  .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
 		<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
 		<!-- Arm -->
 		<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
 		<body name="body0" pos="0 0 .01">
 			<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
 			<joint axis="0 0 1" limited="false" name="joint0" pos="0 0 0" type="hinge"/>
 			<body name="body1" pos="0.1 0 0">
 				<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-3.0 3.0" type="hinge"/>
 				<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
 				<body name="fingertip" pos="0.11 0 0">
 					<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
 				</body>
 			</body>
 		</body>
 		<!-- Target -->
 		<body name="target" pos=".1 -.1 .01">
 			<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
 			<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
 			<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
 		</body>
 	</worldbody>
 	<actuator>
 		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
 		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
 	</actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/striker.xml
+++ b/src/gym/envs/mujoco/assets/striker.xml
@@ -0,0 +1,101 @@
 <mujoco model="arm3d">
  <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
  <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler"/>
  <default>
    <joint armature='0.04' damping="1" limited="true"/>
    <geom friction=".0 .0 .0" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
  </default>
  <worldbody>
    <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
    <geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
    <body name="r_shoulder_pan_link" pos="0 -0.6 0">
      <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05"/>
      <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05"/>
      <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03"/>
      <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03"/>
      <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
      <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
      <body name="r_shoulder_lift_link" pos="0.1 0 0">
        <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
        <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
        <body name="r_upper_arm_roll_link" pos="0 0 0">
          <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
          <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
          <body name="r_upper_arm_link" pos="0 0 0">
            <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
            <body name="r_elbow_flex_link" pos="0.4 0 0">
              <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
              <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
              <body name="r_forearm_roll_link" pos="0 0 0">
                <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
                <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
                <body name="r_forearm_link" pos="0 0 0">
                  <geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
                  <body name="r_wrist_flex_link" pos="0.321 0 0">
                    <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
                    <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
                    <body name="r_wrist_roll_link" pos="0 0 0">
                      <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
                      <body name="tips_arm" pos="0 0 0">
                        <geom conaffinity="1" contype="1" name="tip_arml" pos="0.017 0 0" size="0.003 0.12 0.05" type="box" />
                      </body>
                      <geom conaffinity="1" contype="1" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" type="capsule" />
                    </body>
                  </body>
                </body>
              </body>
            </body>
          </body>
        </body>
      </body>
    </body>
    <body name="object" pos="0 0 -0.270" >
      <geom type="sphere" rgba="1 1 1 1" pos="0 0 0" size="0.05 0.05 0.05" contype="1" conaffinity="0"/>
      <joint name="obj_slidey" armature="0.1" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
      <joint name="obj_slidex" armature="0.1" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
    </body>
    <body name="goal" pos="0.0 0.0 -0.3230">
      <geom rgba="1. 1. 1. 0" pos="0 0 0" type="box" size="0.01 0.01 0.01" contype="0" conaffinity="0"/>
      <body pos="0 0 0">
        <geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
      </body>
      <body name="coaster" pos="0 0 0">
        <geom rgba="1. 1. 1. 1" type="cylinder" size="0.08 0.001 0.1" density='1000000' contype="0" conaffinity="0"/>
      </body>
      <body pos="0 0 0" axisangle="0 0 1 0.785">
        <geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
      </body>
      <body pos="0 0 0" axisangle="0 0 1 -0.785">
        <geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
      </body>
      <joint name="goal_free" type="free" pos="0 0 0" limited="false" damping="0"/>
    </body>
  </worldbody>
  <actuator>
    <motor joint="r_shoulder_pan_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
    <motor joint="r_shoulder_lift_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
    <motor joint="r_upper_arm_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
    <motor joint="r_elbow_flex_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
    <motor joint="r_forearm_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
    <motor joint="r_wrist_flex_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
    <motor joint="r_wrist_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true"/>
  </actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/swimmer.xml
+++ b/src/gym/envs/mujoco/assets/swimmer.xml
@@ -0,0 +1,38 @@
 <mujoco model="swimmer">
  <compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
  <option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1"/>
  <default>
    <geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1"/>
    <joint armature='0.1'  />
  </default>
  <asset>
    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
    <material name="geom" texture="texgeom" texuniform="true"/>
  </asset>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane"/>
    <!--  ================= SWIMMER ================= /-->
    <body name="torso" pos="0 0 0">
      <geom density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule"/>
      <joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide"/>
      <joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide"/>
      <joint axis="0 0 1" name="rot" pos="0 0 0" type="hinge"/>
      <body name="mid" pos="0.5 0 0">
        <geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
        <joint axis="0 0 1" limited="true" name="rot2" pos="0 0 0" range="-100 100" type="hinge"/>
        <body name="back" pos="-1 0 0">
          <geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
          <joint axis="0 0 1" limited="true" name="rot3" pos="0 0 0" range="-100 100" type="hinge"/>
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot2"/>
    <motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot3"/>
  </actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/thrower.xml
+++ b/src/gym/envs/mujoco/assets/thrower.xml
@@ -0,0 +1,127 @@
 <mujoco model="arm3d">
  <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
  <option timestep="0.01" gravity="0 0 -9.81" iterations="20" integrator="Euler"/>
  <default>
    <joint armature='0.75' damping="1" limited="true"/>
    <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
  </default>
  <worldbody>
    <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
    <geom type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
    <body name="r_shoulder_pan_link" pos="0 -0.6 0">
      <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" density="0.0001"/>
      <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" density="0.0001"/>
      <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" density="0.0001"/>
      <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" density="0.0001"/>
      <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" density="1"/>
      <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-0.4854 1.214602" damping="1.0"/>
      <body name="r_shoulder_lift_link" pos="0.1 0 0">
        <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" density="0.0001"/>
        <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 0.7963" damping="1.0"/>
        <body name="r_upper_arm_roll_link" pos="0 0 0">
          <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
          <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="1.0"/>
          <body name="r_upper_arm_link" pos="0 0 0">
            <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" density="0.0001"/>
            <body name="r_elbow_flex_link" pos="0.4 0 0">
              <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" density="0.0001"/>
              <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.7 0.7" damping="1.0"/>
              <body name="r_forearm_roll_link" pos="0 0 0">
                <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
                <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping="1.0" range="-1.5 1.5"/>
                <body name="r_forearm_link" pos="0 0 0" axisangle="1 0 0 0.392">
                  <geom name="fa" type="capsule" fromto="0 0 0 0 0 0.291" size="0.05" density="0.0001"/>
                  <body name="r_wrist_flex_link" pos="0 0 0.321" axisangle="0 0 1 1.57">
                    <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" density="0.0001"/>
                    <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-1.0 1.0" damping="1.0" />
                    <body name="r_wrist_roll_link" pos="0 0 0" axisangle="0 1 0 -1.178">
                      <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="0 1 0" damping="1.0" range="0 2.25"/>
                      <geom type="capsule" fromto="0 -0.05 0 0 0.05 0" size="0.01" contype="1" conaffinity="1" density="0.0001"/>
                      <body pos="0 0 0" axisangle="0 0 1 0.392">
                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density=".0001"/>
                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                      </body>
                      <body pos="0 0 0" axisangle="0 0 1 1.57">
                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                      </body>
                      <body pos="0 0 0" axisangle="0 0 1 1.178">
                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                      </body>
                      <body pos="0 0 0" axisangle="0 0 1 0.785">
                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                      </body>
                      <body pos="0 0 0" axisangle="0 0 1 1.96">
                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                      </body>
                      <body pos="0 0 0" axisangle="0 0 1 2.355">
                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                      </body>
                      <body pos="0 0 0" axisangle="0 0 1 2.74">
                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
                      </body>
                    </body>
                  </body>
                </body>
              </body>
            </body>
          </body>
        </body>
      </body>
    </body>
    <body name="goal" pos="0.575 0.5 -0.328">
      <geom rgba="1 1 1 1" type="box" pos="0 0 0.005" size="0.075 0.075 0.001" contype="1" conaffinity="1" density="1000"/>
      <geom rgba="1 1 1 1" type="box" pos="0.0 0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
      <geom rgba="1 1 1 1" type="box" pos="0.0 -0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
      <geom rgba="1 1 1 1" type="box" pos="0.075 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
      <geom rgba="1 1 1 1" type="box" pos="-0.076 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
      <geom rgba="1 1 1 1" type="capsule" fromto="0.073 0.073 0.0075 0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
      <geom rgba="1 1 1 1" type="capsule" fromto="0.073 -0.073 0.0075 0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
      <geom rgba="1 1 1 1" type="capsule" fromto="-0.073 0.073 0.0075 -0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
      <geom rgba="1 1 1 1" type="capsule" fromto="-0.073 -0.073 0.0075 -0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
      <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="1.0"/>
      <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="1.0"/>
    </body>
    <body name="ball" pos="0.5 -0.8 0.275">
      <geom rgba="1. 1. 1. 1" type="sphere" size="0.03 0.03 0.1" density='25' contype="1" conaffinity="1"/>
      <joint name="ball_free" type="free" armature='0' damping="0" limited="false"/>
    </body>
  </worldbody>
  <actuator>
    <motor joint="r_shoulder_pan_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
    <motor joint="r_shoulder_lift_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
    <motor joint="r_upper_arm_roll_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
    <motor joint="r_elbow_flex_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
    <motor joint="r_forearm_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
    <motor joint="r_wrist_flex_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
    <motor joint="r_wrist_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true"/>
  </actuator>
 </mujoco>
--- a/src/gym/envs/mujoco/assets/walker2d.xml
+++ b/src/gym/envs/mujoco/assets/walker2d.xml
@@ -0,0 +1,62 @@
 <mujoco model="walker2d">
  <compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
  <default>
    <joint armature="0.01" damping=".1" limited="true"/>
    <geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
  </default>
  <option integrator="RK4" timestep="0.002"/>
  <worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
    <body name="torso" pos="0 0 1.25">
      <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
      <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
      <body name="thigh" pos="0 0 1.05">
        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
        <body name="leg" pos="0 0 0.35">
          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
          <body name="foot" pos="0.2/2 0 0.1">
            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
            <geom friction="0.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
          </body>
        </body>
      </body>
      <!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
      <body name="thigh_left" pos="0 0 1.05">
        <joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.05" type="capsule"/>
        <body name="leg_left" pos="0 0 0.35">
          <joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.04" type="capsule"/>
          <body name="foot_left" pos="0.2/2 0 0.1">
            <joint axis="0 -1 0" name="foot_left_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
            <geom friction="1.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.06" type="capsule"/>
          </body>
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
    <!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
  </actuator>
    <asset>
        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
            width="100" height="100"/>
        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
        <material name="geom" texture="texgeom" texuniform="true"/>
    </asset>
 </mujoco>
--- a/src/gym/envs/mujoco/half_cheetah.py
+++ b/src/gym/envs/mujoco/half_cheetah.py
@@ -0,0 +1,34 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
        utils.EzPickle.__init__(self)
    def step(self, action):
        xposbefore = self.sim.data.qpos[0]
        self.do_simulation(action, self.frame_skip)
        xposafter = self.sim.data.qpos[0]
        ob = self._get_obs()
        reward_ctrl = - 0.1 * np.square(action).sum()
        reward_run = (xposafter - xposbefore)/self.dt
        reward = reward_ctrl + reward_run
        done = False
        return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos.flat[1:],
            self.sim.data.qvel.flat,
        ])
    def reset_model(self):
        qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
        self.set_state(qpos, qvel)
        return self._get_obs()
    def viewer_setup(self):
        self.viewer.cam.distance = self.model.stat.extent * 0.5
--- a/src/gym/envs/mujoco/hopper.py
+++ b/src/gym/envs/mujoco/hopper.py
@@ -0,0 +1,40 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4)
        utils.EzPickle.__init__(self)
    def step(self, a):
        posbefore = self.sim.data.qpos[0]
        self.do_simulation(a, self.frame_skip)
        posafter, height, ang = self.sim.data.qpos[0:3]
        alive_bonus = 1.0
        reward = (posafter - posbefore) / self.dt
        reward += alive_bonus
        reward -= 1e-3 * np.square(a).sum()
        s = self.state_vector()
        done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
                    (height > .7) and (abs(ang) < .2))
        ob = self._get_obs()
        return ob, reward, done, {}
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos.flat[1:],
            np.clip(self.sim.data.qvel.flat, -10, 10)
        ])
    def reset_model(self):
        qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
        qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
        self.set_state(qpos, qvel)
        return self._get_obs()
    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 2
        self.viewer.cam.distance = self.model.stat.extent * 0.75
        self.viewer.cam.lookat[2] = 1.15
        self.viewer.cam.elevation = -20
--- a/src/gym/envs/mujoco/humanoid.py
+++ b/src/gym/envs/mujoco/humanoid.py
@@ -0,0 +1,51 @@
 import numpy as np
 from gym.envs.mujoco import mujoco_env
 from gym import utils
 def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
 class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5)
        utils.EzPickle.__init__(self)
    def _get_obs(self):
        data = self.sim.data
        return np.concatenate([data.qpos.flat[2:],
                               data.qvel.flat,
                               data.cinert.flat,
                               data.cvel.flat,
                               data.qfrc_actuator.flat,
                               data.cfrc_ext.flat])
    def step(self, a):
        pos_before = mass_center(self.model, self.sim)
        self.do_simulation(a, self.frame_skip)
        pos_after = mass_center(self.model, self.sim)
        alive_bonus = 5.0
        data = self.sim.data
        lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
        quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
        quad_impact_cost = min(quad_impact_cost, 10)
        reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
        qpos = self.sim.data.qpos
        done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
        return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
    def reset_model(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
        )
        return self._get_obs()
    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 1
        self.viewer.cam.distance = self.model.stat.extent * 1.0
        self.viewer.cam.lookat[2] = 2.0
        self.viewer.cam.elevation = -20
--- a/src/gym/envs/mujoco/humanoidstandup.py
+++ b/src/gym/envs/mujoco/humanoidstandup.py
@@ -0,0 +1,45 @@
 from gym.envs.mujoco import mujoco_env
 from gym import utils
 import numpy as np
 class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
        utils.EzPickle.__init__(self)
    def _get_obs(self):
        data = self.sim.data
        return np.concatenate([data.qpos.flat[2:],
                               data.qvel.flat,
                               data.cinert.flat,
                               data.cvel.flat,
                               data.qfrc_actuator.flat,
                               data.cfrc_ext.flat])
    def step(self, a):
        self.do_simulation(a, self.frame_skip)
        pos_after = self.sim.data.qpos[2]
        data = self.sim.data
        uph_cost = (pos_after - 0) / self.model.opt.timestep
        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
        quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
        quad_impact_cost = min(quad_impact_cost, 10)
        reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
        done = bool(False)
        return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost)
    def reset_model(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
        )
        return self._get_obs()
    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 1
        self.viewer.cam.distance = self.model.stat.extent * 1.0
        self.viewer.cam.lookat[2] = 0.8925
        self.viewer.cam.elevation = -20
--- a/src/gym/envs/mujoco/inverted_double_pendulum.py
+++ b/src/gym/envs/mujoco/inverted_double_pendulum.py
@@ -0,0 +1,43 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5)
        utils.EzPickle.__init__(self)
    def step(self, action):
        self.do_simulation(action, self.frame_skip)
        ob = self._get_obs()
        x, _, y = self.sim.data.site_xpos[0]
        dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
        v1, v2 = self.sim.data.qvel[1:3]
        vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
        alive_bonus = 10
        r = alive_bonus - dist_penalty - vel_penalty
        done = bool(y <= 1)
        return ob, r, done, {}
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos[:1],  # cart x pos
            np.sin(self.sim.data.qpos[1:]),  # link angles
            np.cos(self.sim.data.qpos[1:]),
            np.clip(self.sim.data.qvel, -10, 10),
            np.clip(self.sim.data.qfrc_constraint, -10, 10)
        ]).ravel()
    def reset_model(self):
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
            self.init_qvel + self.np_random.randn(self.model.nv) * .1
        )
        return self._get_obs()
    def viewer_setup(self):
        v = self.viewer
        v.cam.trackbodyid = 0
        v.cam.distance = self.model.stat.extent * 0.5
        v.cam.lookat[2] = 0.12250000000000005  # v.model.stat.center[2]
--- a/src/gym/envs/mujoco/inverted_pendulum.py
+++ b/src/gym/envs/mujoco/inverted_pendulum.py
@@ -0,0 +1,30 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
        mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
    def step(self, a):
        reward = 1.0
        self.do_simulation(a, self.frame_skip)
        ob = self._get_obs()
        notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
        done = not notdone
        return ob, reward, done, {}
    def reset_model(self):
        qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
        qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
        self.set_state(qpos, qvel)
        return self._get_obs()
    def _get_obs(self):
        return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
    def viewer_setup(self):
        v = self.viewer
        v.cam.trackbodyid = 0
        v.cam.distance = self.model.stat.extent
--- a/src/gym/envs/mujoco/mujoco_env.py
+++ b/src/gym/envs/mujoco/mujoco_env.py
@@ -0,0 +1,145 @@
 import os
 from gym import error, spaces
 from gym.utils import seeding
 import numpy as np
 from os import path
 import gym
 import six
 try:
    import mujoco_py
 except ImportError as e:
    raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
 DEFAULT_SIZE = 500
 class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self, model_path, frame_skip):
        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = mujoco_py.load_model_from_path(fullpath)
        self.sim = mujoco_py.MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self._viewers = {}
        self.metadata = {
            'render.modes': ['human', 'rgb_array', 'depth_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.init_qpos = self.sim.data.qpos.ravel().copy()
        self.init_qvel = self.sim.data.qvel.ravel().copy()
        observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = observation.size
        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, dtype=np.float32)
        self.seed()
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    # methods to override:
    # ----------------------------
    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError
    def viewer_setup(self):
        """
        This method is called when the viewer is initialized.
        Optionally implement this method, if you need to tinker with camera position
        and so forth.
        """
        pass
    # -----------------------------
    def reset(self):
        self.sim.reset()
        ob = self.reset_model()
        return ob
    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        old_state = self.sim.get_state()
        new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
                                         old_state.act, old_state.udd_state)
        self.sim.set_state(new_state)
        self.sim.forward()
    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip
    def do_simulation(self, ctrl, n_frames):
        self.sim.data.ctrl[:] = ctrl
        for _ in range(n_frames):
            self.sim.step()
    def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
        if mode == 'rgb_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            data = self._get_viewer(mode).read_pixels(width, height, depth=False)
            # original image is upside-down, so flip it
            return data[::-1, :, :]
        elif mode == 'depth_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            # Extract depth part of the read_pixels() tuple
            data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
            # original image is upside-down, so flip it
            return data[::-1, :]
        elif mode == 'human':
            self._get_viewer(mode).render()
    def close(self):
        if self.viewer is not None:
            # self.viewer.finish()
            self.viewer = None
            self._viewers = {}
    def _get_viewer(self, mode):
        self.viewer = self._viewers.get(mode)
        if self.viewer is None:
            if mode == 'human':
                self.viewer = mujoco_py.MjViewer(self.sim)
            elif mode == 'rgb_array' or mode == 'depth_array':
                self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
            self.viewer_setup()
            self._viewers[mode] = self.viewer
        return self.viewer
    def get_body_com(self, body_name):
        return self.data.get_body_xpos(body_name)
    def state_vector(self):
        return np.concatenate([
            self.sim.data.qpos.flat,
            self.sim.data.qvel.flat
        ])
--- a/src/gym/envs/mujoco/pusher.py
+++ b/src/gym/envs/mujoco/pusher.py
@@ -0,0 +1,57 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 import mujoco_py
 class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
        mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5)
    def step(self, a):
        vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
        vec_2 = self.get_body_com("object") - self.get_body_com("goal")
        reward_near = - np.linalg.norm(vec_1)
        reward_dist = - np.linalg.norm(vec_2)
        reward_ctrl = - np.square(a).sum()
        reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
        self.do_simulation(a, self.frame_skip)
        ob = self._get_obs()
        done = False
        return ob, reward, done, dict(reward_dist=reward_dist,
                reward_ctrl=reward_ctrl)
    def viewer_setup(self):
        self.viewer.cam.trackbodyid = -1
        self.viewer.cam.distance = 4.0
    def reset_model(self):
        qpos = self.init_qpos
        self.goal_pos = np.asarray([0, 0])
        while True:
            self.cylinder_pos = np.concatenate([
                    self.np_random.uniform(low=-0.3, high=0, size=1),
                    self.np_random.uniform(low=-0.2, high=0.2, size=1)])
            if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
                break
        qpos[-4:-2] = self.cylinder_pos
        qpos[-2:] = self.goal_pos
        qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
                high=0.005, size=self.model.nv)
        qvel[-4:] = 0
        self.set_state(qpos, qvel)
        return self._get_obs()
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos.flat[:7],
            self.sim.data.qvel.flat[:7],
            self.get_body_com("tips_arm"),
            self.get_body_com("object"),
            self.get_body_com("goal"),
        ])
--- a/src/gym/envs/mujoco/reacher.py
+++ b/src/gym/envs/mujoco/reacher.py
@@ -0,0 +1,43 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
        mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2)
    def step(self, a):
        vec = self.get_body_com("fingertip")-self.get_body_com("target")
        reward_dist = - np.linalg.norm(vec)
        reward_ctrl = - np.square(a).sum()
        reward = reward_dist + reward_ctrl
        self.do_simulation(a, self.frame_skip)
        ob = self._get_obs()
        done = False
        return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 0
    def reset_model(self):
        qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
        while True:
            self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
            if np.linalg.norm(self.goal) < 2:
                break
        qpos[-2:] = self.goal
        qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
        qvel[-2:] = 0
        self.set_state(qpos, qvel)
        return self._get_obs()
    def _get_obs(self):
        theta = self.sim.data.qpos.flat[:2]
        return np.concatenate([
            np.cos(theta),
            np.sin(theta),
            self.sim.data.qpos.flat[2:],
            self.sim.data.qvel.flat[:2],
            self.get_body_com("fingertip") - self.get_body_com("target")
        ])
--- a/src/gym/envs/mujoco/striker.py
+++ b/src/gym/envs/mujoco/striker.py
@@ -0,0 +1,75 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
        self._striked = False
        self._min_strike_dist = np.inf
        self.strike_threshold = 0.1
        mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5)
    def step(self, a):
        vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
        vec_2 = self.get_body_com("object") - self.get_body_com("goal")
        self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2))
        if np.linalg.norm(vec_1) < self.strike_threshold:
            self._striked = True
            self._strike_pos = self.get_body_com("tips_arm")
        if self._striked:
            vec_3 = self.get_body_com("object") - self._strike_pos
            reward_near = - np.linalg.norm(vec_3)
        else:
            reward_near = - np.linalg.norm(vec_1)
        reward_dist = - np.linalg.norm(self._min_strike_dist)
        reward_ctrl = - np.square(a).sum()
        reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
        self.do_simulation(a, self.frame_skip)
        ob = self._get_obs()
        done = False
        return ob, reward, done, dict(reward_dist=reward_dist,
                reward_ctrl=reward_ctrl)
    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 0
        self.viewer.cam.distance = 4.0
    def reset_model(self):
        self._min_strike_dist = np.inf
        self._striked = False
        self._strike_pos = None
        qpos = self.init_qpos
        self.ball = np.array([0.5, -0.175])
        while True:
            self.goal = np.concatenate([
                    self.np_random.uniform(low=0.15, high=0.7, size=1),
                    self.np_random.uniform(low=0.1, high=1.0, size=1)])
            if np.linalg.norm(self.ball - self.goal) > 0.17:
                break
        qpos[-9:-7] = [self.ball[1], self.ball[0]]
        qpos[-7:-5] = self.goal
        diff = self.ball - self.goal
        angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
        qpos[-1] = angle / 3.14
        qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
                size=self.model.nv)
        qvel[7:] = 0
        self.set_state(qpos, qvel)
        return self._get_obs()
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos.flat[:7],
            self.sim.data.qvel.flat[:7],
            self.get_body_com("tips_arm"),
            self.get_body_com("object"),
            self.get_body_com("goal"),
        ])
--- a/src/gym/envs/mujoco/swimmer.py
+++ b/src/gym/envs/mujoco/swimmer.py
@@ -0,0 +1,31 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4)
        utils.EzPickle.__init__(self)
    def step(self, a):
        ctrl_cost_coeff = 0.0001
        xposbefore = self.sim.data.qpos[0]
        self.do_simulation(a, self.frame_skip)
        xposafter = self.sim.data.qpos[0]
        reward_fwd = (xposafter - xposbefore) / self.dt
        reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
        reward = reward_fwd + reward_ctrl
        ob = self._get_obs()
        return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
    def _get_obs(self):
        qpos = self.sim.data.qpos
        qvel = self.sim.data.qvel
        return np.concatenate([qpos.flat[2:], qvel.flat])
    def reset_model(self):
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
        )
        return self._get_obs()
--- a/src/gym/envs/mujoco/thrower.py
+++ b/src/gym/envs/mujoco/thrower.py
@@ -0,0 +1,60 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
        self._ball_hit_ground = False
        self._ball_hit_location = None
        mujoco_env.MujocoEnv.__init__(self, 'thrower.xml', 5)
    def step(self, a):
        ball_xy = self.get_body_com("ball")[:2]
        goal_xy = self.get_body_com("goal")[:2]
        if not self._ball_hit_ground and self.get_body_com("ball")[2] < -0.25:
            self._ball_hit_ground = True
            self._ball_hit_location = self.get_body_com("ball")
        if self._ball_hit_ground:
            ball_hit_xy = self._ball_hit_location[:2]
            reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy)
        else:
            reward_dist = -np.linalg.norm(ball_xy - goal_xy)
        reward_ctrl = - np.square(a).sum()
        reward = reward_dist + 0.002 * reward_ctrl
        self.do_simulation(a, self.frame_skip)
        ob = self._get_obs()
        done = False
        return ob, reward, done, dict(reward_dist=reward_dist,
                reward_ctrl=reward_ctrl)
    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 0
        self.viewer.cam.distance = 4.0
    def reset_model(self):
        self._ball_hit_ground = False
        self._ball_hit_location = None
        qpos = self.init_qpos
        self.goal = np.array([self.np_random.uniform(low=-0.3, high=0.3),
                              self.np_random.uniform(low=-0.3, high=0.3)])
        qpos[-9:-7] = self.goal
        qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
                high=0.005, size=self.model.nv)
        qvel[7:] = 0
        self.set_state(qpos, qvel)
        return self._get_obs()
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos.flat[:7],
            self.sim.data.qvel.flat[:7],
            self.get_body_com("r_wrist_roll_link"),
            self.get_body_com("ball"),
            self.get_body_com("goal"),
        ])
--- a/src/gym/envs/mujoco/walker2d.py
+++ b/src/gym/envs/mujoco/walker2d.py
@@ -0,0 +1,40 @@
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4)
        utils.EzPickle.__init__(self)
    def step(self, a):
        posbefore = self.sim.data.qpos[0]
        self.do_simulation(a, self.frame_skip)
        posafter, height, ang = self.sim.data.qpos[0:3]
        alive_bonus = 1.0
        reward = ((posafter - posbefore) / self.dt)
        reward += alive_bonus
        reward -= 1e-3 * np.square(a).sum()
        done = not (height > 0.8 and height < 2.0 and
                    ang > -1.0 and ang < 1.0)
        ob = self._get_obs()
        return ob, reward, done, {}
    def _get_obs(self):
        qpos = self.sim.data.qpos
        qvel = self.sim.data.qvel
        return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
    def reset_model(self):
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
        )
        return self._get_obs()
    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 2
        self.viewer.cam.distance = self.model.stat.extent * 0.5
        self.viewer.cam.lookat[2] = 1.15
        self.viewer.cam.elevation = -20
--- a/src/gym/envs/pilesos/init.py
+++ b/src/gym/envs/pilesos/init.py
@@ -0,0 +1 @@
 from gym.envs.pilesos.pilesos import PilesosEnv
--- a/src/gym/envs/pilesos/pycache/init.cpython-37.pyc
+++ b/src/gym/envs/pilesos/pycache/init.cpython-37.pyc
--- a/src/gym/envs/pilesos/pycache/pilesos.cpython-37.pyc
+++ b/src/gym/envs/pilesos/pycache/pilesos.cpython-37.pyc
--- a/src/gym/envs/pilesos/pilesos.py
+++ b/src/gym/envs/pilesos/pilesos.py
@@ -0,0 +1,46 @@
 import numpy as np
 from gym import core, spaces
 from pilesos.mapstate import MapState
 class PilesosEnv(core.Env):
  metadata = {}
  IS_HOME = True
  DIRT = False
  ACTIONS = ['F', 'R', 'L', 'S', 'T']
  state: MapState
  def __init__(self):
    self.viewer = None
    low = np.array([0, 0, 0])
    high = np.array([1, 1, 1])
    self.observation_space = spaces.Box(low=low, high=high, dtype=int)
    self.action_space = spaces.Discrete(5)
    self.state = MapState.default()
    self.current_obs = self.state.get_current_obs()
  def reset(self):
    self.state = MapState.default()
    return self._get_ob()
  def step(self, action):
    action_char = self.ACTIONS[action]
    self.current_obs = self.state.act(action_char)
    has_finished = action_char == 'T'
    return self._get_ob(), self.state.last_reward, has_finished, {}
  def render(self, mode='human'):
    raise NotImplementedError()
  def close(self):
    pass
  def _get_ob(self):
    return np.array([int(self.current_obs.has_bumped), int(self.current_obs.has_dirt), int(self.current_obs.is_home)])
--- a/src/gym/envs/registration.py
+++ b/src/gym/envs/registration.py
@@ -0,0 +1,191 @@
 import re
 from gym import error, logger
 # This format is true today, but it's *not* an official spec.
 # [username/](env-name)-v(version)    env-name is group 1, version is group 2
 #
 # 2016-10-31: We're experimentally expanding the environment ID format
 # to include an optional username.
 env_id_re = re.compile(r'^(?:[\w:-]+\/)?([\w:.-]+)-v(\d+)$')
 def load(name):
    import pkg_resources # takes ~400ms to load, so we import it lazily
    entry_point = pkg_resources.EntryPoint.parse('x={}'.format(name))
    result = entry_point.load(False)
    return result
 class EnvSpec(object):
    """A specification for a particular instance of the environment. Used
    to register the parameters for official evaluations.
    Args:
        id (str): The official environment ID
        entry_point (Optional[str]): The Python entrypoint of the environment class (e.g. module.name:Class)
        trials (int): The number of trials to average reward over
        reward_threshold (Optional[int]): The reward threshold before the task is considered solved
        local_only: True iff the environment is to be used only on the local machine (e.g. debugging envs)
        kwargs (dict): The kwargs to pass to the environment class
        nondeterministic (bool): Whether this environment is non-deterministic even after seeding
        tags (dict[str:any]): A set of arbitrary key-value tags on this environment, including simple property=True tags
    Attributes:
        id (str): The official environment ID
        trials (int): The number of trials run in official evaluation
    """
    def __init__(self, id, entry_point=None, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None, max_episode_seconds=None, timestep_limit=None):
        self.id = id
        # Evaluation parameters
        self.trials = trials
        self.reward_threshold = reward_threshold
        # Environment properties
        self.nondeterministic = nondeterministic
        if tags is None:
            tags = {}
        self.tags = tags
        # BACKWARDS COMPAT 2017/1/18
        if tags.get('wrapper_config.TimeLimit.max_episode_steps'):
            max_episode_steps = tags.get('wrapper_config.TimeLimit.max_episode_steps')
            # TODO: Add the following deprecation warning after 2017/02/18
            # warnings.warn("DEPRECATION WARNING wrapper_config.TimeLimit has been deprecated. Replace any calls to `register(tags={'wrapper_config.TimeLimit.max_episode_steps': 200)}` with `register(max_episode_steps=200)`. This change was made 2017/1/31 and is included in gym version 0.8.0. If you are getting many of these warnings, you may need to update universe past version 0.21.3")
        tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps
        ######
        # BACKWARDS COMPAT 2017/1/31
        if timestep_limit is not None:
            max_episode_steps = timestep_limit
            # TODO: Add the following deprecation warning after 2017/03/01
            # warnings.warn("register(timestep_limit={}) is deprecated. Use register(max_episode_steps={}) instead.".format(timestep_limit, timestep_limit))
        ######
        self.max_episode_steps = max_episode_steps
        self.max_episode_seconds = max_episode_seconds
        # We may make some of these other parameters public if they're
        # useful.
        match = env_id_re.search(id)
        if not match:
            raise error.Error('Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id, env_id_re.pattern))
        self._env_name = match.group(1)
        self._entry_point = entry_point
        self._local_only = local_only
        self._kwargs = {} if kwargs is None else kwargs
    def make(self):
        """Instantiates an instance of the environment with appropriate kwargs"""
        if self._entry_point is None:
            raise error.Error('Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)'.format(self.id))
        elif callable(self._entry_point):
            env = self._entry_point(**self._kwargs)
        else:
            cls = load(self._entry_point)
            env = cls(**self._kwargs)
        # Make the enviroment aware of which spec it came from.
        env.unwrapped.spec = self
        return env
    def __repr__(self):
        return "EnvSpec({})".format(self.id)
    @property
    def timestep_limit(self):
        return self.max_episode_steps
    @timestep_limit.setter
    def timestep_limit(self, value):
        self.max_episode_steps = value
 class EnvRegistry(object):
    """Register an env by ID. IDs remain stable over time and are
    guaranteed to resolve to the same environment dynamics (or be
    desupported). The goal is that results on a particular environment
    should always be comparable, and not depend on the version of the
    code that was running.
    """
    def __init__(self):
        self.env_specs = {}
    def make(self, id):
        logger.info('Making new env: %s', id)
        spec = self.spec(id)
        env = spec.make()
        # We used to have people override _reset/_step rather than
        # reset/step. Set _gym_disable_underscore_compat = True on
        # your environment if you use these methods and don't want
        # compatibility code to be invoked.
        if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
            patch_deprecated_methods(env)
        if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
            from gym.wrappers.time_limit import TimeLimit
            env = TimeLimit(env,
                            max_episode_steps=env.spec.max_episode_steps,
                            max_episode_seconds=env.spec.max_episode_seconds)
        return env
    def all(self):
        return self.env_specs.values()
    def spec(self, id):
        match = env_id_re.search(id)
        if not match:
            raise error.Error('Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id.encode('utf-8'), env_id_re.pattern))
        try:
            return self.env_specs[id]
        except KeyError:
            # Parse the env name and check to see if it matches the non-version
            # part of a valid env (could also check the exact number here)
            env_name = match.group(1)
            matching_envs = [valid_env_name for valid_env_name, valid_env_spec in self.env_specs.items()
                             if env_name == valid_env_spec._env_name]
            if matching_envs:
                raise error.DeprecatedEnv('Env {} not found (valid versions include {})'.format(id, matching_envs))
            else:
                raise error.UnregisteredEnv('No registered env with id: {}'.format(id))
    def register(self, id, **kwargs):
        if id in self.env_specs:
            raise error.Error('Cannot re-register id: {}'.format(id))
        self.env_specs[id] = EnvSpec(id, **kwargs)
 # Have a global registry
 registry = EnvRegistry()
 def register(id, **kwargs):
    return registry.register(id, **kwargs)
 def make(id):
    return registry.make(id)
 def spec(id):
    return registry.spec(id)
 warn_once = True
 def patch_deprecated_methods(env):
    """
    Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method.
    For backward compatibility, this makes it possible to work with unmodified environments.
    """
    global warn_once
    if warn_once:
        logger.warn("Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior." % str(type(env)))
        warn_once = False
    env.reset = env._reset
    env.step  = env._step
    env.seed  = env._seed
    def render(mode):
        return env._render(mode, close=False)
    def close():
        env._render("human", close=True)
    env.render = render
    env.close = close
--- a/src/gym/envs/robotics/README.md
+++ b/src/gym/envs/robotics/README.md
@@ -0,0 +1,54 @@
 # Robotics environments
 Details and documentation on these robotics environments are available in our [blog post](https://blog.openai.com/ingredients-for-robotics-research/), the accompanying [technical report](https://arxiv.org/abs/1802.09464), and the [Gym website](https://gym.openai.com/envs/#robotics).
 If you use these environments, please cite the following paper:
 ```
@misc{1802.09464,
  Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba},
  Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research},
  Year = {2018},
  Eprint = {arXiv:1802.09464},
 }
 ```
 ## Fetch environments
 <img src="https://blog.openai.com/content/images/2018/02/fetch-reach.png" width="500">
 [FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position.
 <img src="https://blog.openai.com/content/images/2018/02/fetch-slide.png" width="500">
 [FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
 <img src="https://blog.openai.com/content/images/2018/02/fetch-push.png" width="500">
 [FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position.
 <img src="https://blog.openai.com/content/images/2018/02/fetch-pickandplace.png" width="500">
 [FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
 ## Shadow Dexterous Hand environments
 <img src="https://blog.openai.com/content/images/2018/02/hand-reach.png" width="500">
 [HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
 <img src="https://blog.openai.com/content/images/2018/02/hand-block.png" width="500">
 [HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
 <img src="https://blog.openai.com/content/images/2018/02/hand-egg.png" width="500">
 [HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
 <img src="https://blog.openai.com/content/images/2018/02/hand-pen.png" width="500">
 [HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.
--- a/src/gym/envs/robotics/init.py
+++ b/src/gym/envs/robotics/init.py
@@ -0,0 +1,10 @@
 from gym.envs.robotics.fetch_env import FetchEnv
 from gym.envs.robotics.fetch.slide import FetchSlideEnv
 from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv
 from gym.envs.robotics.fetch.push import FetchPushEnv
 from gym.envs.robotics.fetch.reach import FetchReachEnv
 from gym.envs.robotics.hand.reach import HandReachEnv
 from gym.envs.robotics.hand.manipulate import HandBlockEnv
 from gym.envs.robotics.hand.manipulate import HandEggEnv
 from gym.envs.robotics.hand.manipulate import HandPenEnv
--- a/src/gym/envs/robotics/assets/LICENSE.md
+++ b/src/gym/envs/robotics/assets/LICENSE.md
@@ -0,0 +1,222 @@
 # Fetch Robotics
 The model of the [Fetch](http://fetchrobotics.com/platforms-research-development/) is based on [models provided by Fetch](https://github.com/fetchrobotics/fetch_ros/tree/indigo-devel/fetch_description). It was adapted and refined by OpenAI.
 # ShadowHand
 The model of the [ShadowHand](https://www.shadowrobot.com/products/dexterous-hand/) is based on [models provided by ShadowRobot](https://github.com/shadow-robot/sr_common/tree/kinetic-devel/sr_description/hand/model), and on code used under the following license:
 (C) Vikash Kumar, CSE, UW. Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 	                                 Apache License
 	                           Version 2.0, January 2004
 	                        http://www.apache.org/licenses/
 	   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 	   1. Definitions.
 	      "License" shall mean the terms and conditions for use, reproduction,
 	      and distribution as defined by Sections 1 through 9 of this document.
 	      "Licensor" shall mean the copyright owner or entity authorized by
 	      the copyright owner that is granting the License.
 	      "Legal Entity" shall mean the union of the acting entity and all
 	      other entities that control, are controlled by, or are under common
 	      control with that entity. For the purposes of this definition,
 	      "control" means (i) the power, direct or indirect, to cause the
 	      direction or management of such entity, whether by contract or
 	      otherwise, or (ii) ownership of fifty percent (50%) or more of the
 	      outstanding shares, or (iii) beneficial ownership of such entity.
 	      "You" (or "Your") shall mean an individual or Legal Entity
 	      exercising permissions granted by this License.
 	      "Source" form shall mean the preferred form for making modifications,
 	      including but not limited to software source code, documentation
 	      source, and configuration files.
 	      "Object" form shall mean any form resulting from mechanical
 	      transformation or translation of a Source form, including but
 	      not limited to compiled object code, generated documentation,
 	      and conversions to other media types.
 	      "Work" shall mean the work of authorship, whether in Source or
 	      Object form, made available under the License, as indicated by a
 	      copyright notice that is included in or attached to the work
 	      (an example is provided in the Appendix below).
 	      "Derivative Works" shall mean any work, whether in Source or Object
 	      form, that is based on (or derived from) the Work and for which the
 	      editorial revisions, annotations, elaborations, or other modifications
 	      represent, as a whole, an original work of authorship. For the purposes
 	      of this License, Derivative Works shall not include works that remain
 	      separable from, or merely link (or bind by name) to the interfaces of,
 	      the Work and Derivative Works thereof.
 	      "Contribution" shall mean any work of authorship, including
 	      the original version of the Work and any modifications or additions
 	      to that Work or Derivative Works thereof, that is intentionally
 	      submitted to Licensor for inclusion in the Work by the copyright owner
 	      or by an individual or Legal Entity authorized to submit on behalf of
 	      the copyright owner. For the purposes of this definition, "submitted"
 	      means any form of electronic, verbal, or written communication sent
 	      to the Licensor or its representatives, including but not limited to
 	      communication on electronic mailing lists, source code control systems,
 	      and issue tracking systems that are managed by, or on behalf of, the
 	      Licensor for the purpose of discussing and improving the Work, but
 	      excluding communication that is conspicuously marked or otherwise
 	      designated in writing by the copyright owner as "Not a Contribution."
 	      "Contributor" shall mean Licensor and any individual or Legal Entity
 	      on behalf of whom a Contribution has been received by Licensor and
 	      subsequently incorporated within the Work.
 	   2. Grant of Copyright License. Subject to the terms and conditions of
 	      this License, each Contributor hereby grants to You a perpetual,
 	      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 	      copyright license to reproduce, prepare Derivative Works of,
 	      publicly display, publicly perform, sublicense, and distribute the
 	      Work and such Derivative Works in Source or Object form.
 	   3. Grant of Patent License. Subject to the terms and conditions of
 	      this License, each Contributor hereby grants to You a perpetual,
 	      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 	      (except as stated in this section) patent license to make, have made,
 	      use, offer to sell, sell, import, and otherwise transfer the Work,
 	      where such license applies only to those patent claims licensable
 	      by such Contributor that are necessarily infringed by their
 	      Contribution(s) alone or by combination of their Contribution(s)
 	      with the Work to which such Contribution(s) was submitted. If You
 	      institute patent litigation against any entity (including a
 	      cross-claim or counterclaim in a lawsuit) alleging that the Work
 	      or a Contribution incorporated within the Work constitutes direct
 	      or contributory patent infringement, then any patent licenses
 	      granted to You under this License for that Work shall terminate
 	      as of the date such litigation is filed.
 	   4. Redistribution. You may reproduce and distribute copies of the
 	      Work or Derivative Works thereof in any medium, with or without
 	      modifications, and in Source or Object form, provided that You
 	      meet the following conditions:
 	      (a) You must give any other recipients of the Work or
 	          Derivative Works a copy of this License; and
 	      (b) You must cause any modified files to carry prominent notices
 	          stating that You changed the files; and
 	      (c) You must retain, in the Source form of any Derivative Works
 	          that You distribute, all copyright, patent, trademark, and
 	          attribution notices from the Source form of the Work,
 	          excluding those notices that do not pertain to any part of
 	          the Derivative Works; and
 	      (d) If the Work includes a "NOTICE" text file as part of its
 	          distribution, then any Derivative Works that You distribute must
 	          include a readable copy of the attribution notices contained
 	          within such NOTICE file, excluding those notices that do not
 	          pertain to any part of the Derivative Works, in at least one
 	          of the following places: within a NOTICE text file distributed
 	          as part of the Derivative Works; within the Source form or
 	          documentation, if provided along with the Derivative Works; or,
 	          within a display generated by the Derivative Works, if and
 	          wherever such third-party notices normally appear. The contents
 	          of the NOTICE file are for informational purposes only and
 	          do not modify the License. You may add Your own attribution
 	          notices within Derivative Works that You distribute, alongside
 	          or as an addendum to the NOTICE text from the Work, provided
 	          that such additional attribution notices cannot be construed
 	          as modifying the License.
 	      You may add Your own copyright statement to Your modifications and
 	      may provide additional or different license terms and conditions
 	      for use, reproduction, or distribution of Your modifications, or
 	      for any such Derivative Works as a whole, provided Your use,
 	      reproduction, and distribution of the Work otherwise complies with
 	      the conditions stated in this License.
 	   5. Submission of Contributions. Unless You explicitly state otherwise,
 	      any Contribution intentionally submitted for inclusion in the Work
 	      by You to the Licensor shall be under the terms and conditions of
 	      this License, without any additional terms or conditions.
 	      Notwithstanding the above, nothing herein shall supersede or modify
 	      the terms of any separate license agreement you may have executed
 	      with Licensor regarding such Contributions.
 	   6. Trademarks. This License does not grant permission to use the trade
 	      names, trademarks, service marks, or product names of the Licensor,
 	      except as required for reasonable and customary use in describing the
 	      origin of the Work and reproducing the content of the NOTICE file.
 	   7. Disclaimer of Warranty. Unless required by applicable law or
 	      agreed to in writing, Licensor provides the Work (and each
 	      Contributor provides its Contributions) on an "AS IS" BASIS,
 	      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 	      implied, including, without limitation, any warranties or conditions
 	      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
 	      PARTICULAR PURPOSE. You are solely responsible for determining the
 	      appropriateness of using or redistributing the Work and assume any
 	      risks associated with Your exercise of permissions under this License.
 	   8. Limitation of Liability. In no event and under no legal theory,
 	      whether in tort (including negligence), contract, or otherwise,
 	      unless required by applicable law (such as deliberate and grossly
 	      negligent acts) or agreed to in writing, shall any Contributor be
 	      liable to You for damages, including any direct, indirect, special,
 	      incidental, or consequential damages of any character arising as a
 	      result of this License or out of the use or inability to use the
 	      Work (including but not limited to damages for loss of goodwill,
 	      work stoppage, computer failure or malfunction, or any and all
 	      other commercial damages or losses), even if such Contributor
 	      has been advised of the possibility of such damages.
 	   9. Accepting Warranty or Additional Liability. While redistributing
 	      the Work or Derivative Works thereof, You may choose to offer,
 	      and charge a fee for, acceptance of support, warranty, indemnity,
 	      or other liability obligations and/or rights consistent with this
 	      License. However, in accepting such obligations, You may act only
 	      on Your own behalf and on Your sole responsibility, not on behalf
 	      of any other Contributor, and only if You agree to indemnify,
 	      defend, and hold each Contributor harmless for any liability
 	      incurred by, or claims asserted against, such Contributor by reason
 	      of your accepting any such warranty or additional liability.
 	   END OF TERMS AND CONDITIONS
 	   APPENDIX: How to apply the Apache License to your work.
 	      To apply the Apache License to your work, attach the following
 	      boilerplate notice, with the fields enclosed by brackets "[]"
 	      replaced with your own identifying information. (Don't include
 	      the brackets!)  The text should be enclosed in the appropriate
 	      comment syntax for the file format. We also recommend that a
 	      file or class name and description of purpose be included on the
 	      same "printed page" as the copyright notice for easier
 	      identification within third-party archives.
 	   Copyright [yyyy] [name of copyright owner]
 	   Licensed under the Apache License, Version 2.0 (the "License");
 	   you may not use this file except in compliance with the License.
 	   You may obtain a copy of the License at
 	       http://www.apache.org/licenses/LICENSE-2.0
 	   Unless required by applicable law or agreed to in writing, software
 	   distributed under the License is distributed on an "AS IS" BASIS,
 	   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 	   See the License for the specific language governing permissions and
 	   limitations under the License.
 Additional license notices:
 	Sources		: 1) Manipulator and Manipulation in High Dimensional Spaces. Vikash Kumar, Ph.D. Thesis, CSE, Univ. of Washington. 2016.
 	Mujoco		:: Advanced physics simulation engine
 		Source		: www.roboti.us
 		Version		: 1.40
 		Released 	: 17Jan'17
 	Author		:: Vikash Kumar
 		Contacts 	: vikash@openai.com
 		Last edits 	: 3Apr'17
--- a/src/gym/envs/robotics/assets/fetch/pick_and_place.xml
+++ b/src/gym/envs/robotics/assets/fetch/pick_and_place.xml
@@ -0,0 +1,35 @@
 <?xml version="1.0" encoding="utf-8"?>
 <mujoco>
 	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
 	<option timestep="0.002">
 		<flag warmstart="enable"></flag>
 	</option>
 	<include file="shared.xml"></include>
 	<worldbody>
 		<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
 		<body name="floor0" pos="0.8 0.75 0">
 			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
 		</body>
 		<include file="robot.xml"></include>
 		<body pos="1.3 0.75 0.2" name="table0">
 			<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
 		</body>
 		<body name="object0" pos="0.025 0.025 0.025">
 			<joint name="object0:joint" type="free" damping="0.01"></joint>
 			<geom size="0.025 0.025 0.025" type="box" condim="3" name="object0" material="block_mat" mass="2"></geom>
 			<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
 		</body>
 		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
 	</worldbody>
 	<actuator>
 		<position ctrllimited="true" ctrlrange="0 0.2" joint="robot0:l_gripper_finger_joint" kp="30000" name="robot0:l_gripper_finger_joint" user="1"></position>
 		<position ctrllimited="true" ctrlrange="0 0.2" joint="robot0:r_gripper_finger_joint" kp="30000" name="robot0:r_gripper_finger_joint" user="1"></position>
 	</actuator>
 </mujoco>
--- a/src/gym/envs/robotics/assets/fetch/push.xml
+++ b/src/gym/envs/robotics/assets/fetch/push.xml
@@ -0,0 +1,32 @@
 <?xml version="1.0" encoding="utf-8"?>
 <mujoco>
 	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
 	<option timestep="0.002">
 		<flag warmstart="enable"></flag>
 	</option>
 	<include file="shared.xml"></include>
 	<worldbody>
 		<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.70 1" type="plane" condim="3" material="floor_mat"></geom>
 		<body name="floor0" pos="0.8 0.75 0">
 			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
 		</body>
 		<include file="robot.xml"></include>
 		<body pos="1.3 0.75 0.2" name="table0">
 			<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
 		</body>
 		<body name="object0" pos="0.025 0.025 0.025">
 			<joint name="object0:joint" type="free" damping="0.01"></joint>
 			<geom size="0.025 0.025 0.025" type="box" condim="3" name="object0" material="block_mat" mass="2"></geom>
 			<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
 		</body>
 		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
 	</worldbody>
 	<actuator></actuator>
 </mujoco>
--- a/src/gym/envs/robotics/assets/fetch/reach.xml
+++ b/src/gym/envs/robotics/assets/fetch/reach.xml
@@ -0,0 +1,26 @@
 <?xml version="1.0" encoding="utf-8"?>
 <mujoco>
 	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
 	<option timestep="0.002">
 		<flag warmstart="enable"></flag>
 	</option>
 	<include file="shared.xml"></include>
 	<worldbody>
 		<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
 		<body name="floor0" pos="0.8 0.75 0">
 			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
 		</body>
 		<include file="robot.xml"></include>
 		<body pos="1.3 0.75 0.2" name="table0">
 			<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
 		</body>
 		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
 	</worldbody>
 	<actuator></actuator>
 </mujoco>
--- a/src/gym/envs/robotics/assets/fetch/robot.xml
+++ b/src/gym/envs/robotics/assets/fetch/robot.xml
@@ -0,0 +1,123 @@
 <mujoco>
 	<body mocap="true" name="robot0:mocap" pos="0 0 0">
 		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.7" size="0.005 0.005 0.005" type="box"></geom>
 		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="1 0.005 0.005" type="box"></geom>
 		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 1 0.001" type="box"></geom>
 		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 0.005 1" type="box"></geom>
 	</body>
 	<body childclass="robot0:fetch" name="robot0:base_link" pos="0.2869 0.2641 0">
 		<joint armature="0.0001" axis="1 0 0" damping="1e+11" name="robot0:slide0" pos="0 0 0" type="slide"></joint>
 		<joint armature="0.0001" axis="0 1 0" damping="1e+11" name="robot0:slide1" pos="0 0 0" type="slide"></joint>
 		<joint armature="0.0001" axis="0 0 1" damping="1e+11" name="robot0:slide2" pos="0 0 0" type="slide"></joint>
 		<inertial diaginertia="1.2869 1.2236 0.9868" mass="70.1294" pos="-0.0036 0 0.0014" quat="0.7605 -0.0133 -0.0061 0.6491"></inertial>
 		<geom mesh="robot0:base_link" name="robot0:base_link" material="robot0:base_mat" class="robot0:grey"></geom>
 		<body name="robot0:torso_lift_link" pos="-0.0869 0 0.3774">
 			<inertial diaginertia="0.3365 0.3354 0.0943" mass="10.7796" pos="-0.0013 -0.0009 0.2935" quat="0.9993 -0.0006 0.0336 0.0185"></inertial>
 			<joint axis="0 0 1" damping="1e+07" name="robot0:torso_lift_joint" range="0.0386 0.3861" type="slide"></joint>
 			<geom mesh="robot0:torso_lift_link" name="robot0:torso_lift_link" material="robot0:torso_mat"></geom>
 			<body name="robot0:head_pan_link" pos="0.0531 0 0.603">
 				<inertial diaginertia="0.0185 0.0128 0.0095" mass="2.2556" pos="0.0321 0.0161 0.039" quat="0.5148 0.5451 -0.453 0.4823"></inertial>
 				<joint axis="0 0 1" name="robot0:head_pan_joint" range="-1.57 1.57"></joint>
 				<geom mesh="robot0:head_pan_link" name="robot0:head_pan_link" material="robot0:head_mat" class="robot0:grey"></geom>
 				<body name="robot0:head_tilt_link" pos="0.1425 0 0.058">
 					<inertial diaginertia="0.0063 0.0059 0.0014" mass="0.9087" pos="0.0081 0.0025 0.0113" quat="0.6458 0.66 -0.274 0.2689"></inertial>
 					<joint axis="0 1 0" damping="1000" name="robot0:head_tilt_joint" range="-0.76 1.45" ref="0.06"></joint>
 					<geom mesh="robot0:head_tilt_link" name="robot0:head_tilt_link" material="robot0:head_mat" class="robot0:blue"></geom>
 					<body name="robot0:head_camera_link" pos="0.055 0 0.0225">
 						<inertial diaginertia="0 0 0" mass="0" pos="0.055 0 0.0225"></inertial>
 						<body name="robot0:head_camera_rgb_frame" pos="0 0.02 0">
 							<inertial diaginertia="0 0 0" mass="0" pos="0 0.02 0"></inertial>
 							<body name="robot0:head_camera_rgb_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
 								<inertial diaginertia="0 0 0" mass="0" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></inertial>
 								<camera euler="3.1415 0 0" fovy="50" name="head_camera_rgb" pos="0 0 0"></camera>
 							</body>
 						</body>
 						<body name="robot0:head_camera_depth_frame" pos="0 0.045 0">
 							<inertial diaginertia="0 0 0" mass="0" pos="0 0.045 0"></inertial>
 							<body name="robot0:head_camera_depth_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
 								<inertial diaginertia="0 0 0" mass="0" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></inertial>
 							</body>
 						</body>
 					</body>
 				</body>
 			</body>
 			<body name="robot0:shoulder_pan_link" pos="0.1195 0 0.3486">
 				<inertial diaginertia="0.009 0.0086 0.0041" mass="2.5587" pos="0.0927 -0.0056 0.0564" quat="-0.1364 0.7624 -0.1562 0.613"></inertial>
 				<joint axis="0 0 1" name="robot0:shoulder_pan_joint" range="-1.6056 1.6056"></joint>
 				<geom mesh="robot0:shoulder_pan_link" name="robot0:shoulder_pan_link" material="robot0:arm_mat"></geom>
 				<body name="robot0:shoulder_lift_link" pos="0.117 0 0.06">
 					<inertial diaginertia="0.0116 0.0112 0.0023" mass="2.6615" pos="0.1432 0.0072 -0.0001" quat="0.4382 0.4382 0.555 0.555"></inertial>
 					<joint axis="0 1 0" name="robot0:shoulder_lift_joint" range="-1.221 1.518"></joint>
 					<geom mesh="robot0:shoulder_lift_link" name="robot0:shoulder_lift_link" material="robot0:arm_mat" class="robot0:blue"></geom>
 					<body name="robot0:upperarm_roll_link" pos="0.219 0 0">
 						<inertial diaginertia="0.0047 0.0045 0.0019" mass="2.3311" pos="0.1165 0.0014 0" quat="-0.0136 0.707 0.0136 0.707"></inertial>
 						<joint axis="1 0 0" limited="false" name="robot0:upperarm_roll_joint"></joint>
 						<geom mesh="robot0:upperarm_roll_link" name="robot0:upperarm_roll_link" material="robot0:arm_mat"></geom>
 						<body name="robot0:elbow_flex_link" pos="0.133 0 0">
 							<inertial diaginertia="0.0086 0.0084 0.002" mass="2.1299" pos="0.1279 0.0073 0" quat="0.4332 0.4332 0.5589 0.5589"></inertial>
 							<joint axis="0 1 0" name="robot0:elbow_flex_joint" range="-2.251 2.251"></joint>
 							<geom mesh="robot0:elbow_flex_link" name="robot0:elbow_flex_link" material="robot0:arm_mat" class="robot0:blue"></geom>
 							<body name="robot0:forearm_roll_link" pos="0.197 0 0">
 								<inertial diaginertia="0.0035 0.0031 0.0015" mass="1.6563" pos="0.1097 -0.0266 0" quat="-0.0715 0.7035 0.0715 0.7035"></inertial>
 								<joint armature="2.7538" axis="1 0 0" damping="3.5247" frictionloss="0" limited="false" name="robot0:forearm_roll_joint" stiffness="10"></joint>
 								<geom mesh="robot0:forearm_roll_link" name="robot0:forearm_roll_link" material="robot0:arm_mat"></geom>
 								<body name="robot0:wrist_flex_link" pos="0.1245 0 0">
 									<inertial diaginertia="0.0042 0.0042 0.0018" mass="1.725" pos="0.0882 0.0009 -0.0001" quat="0.4895 0.4895 0.5103 0.5103"></inertial>
 									<joint axis="0 1 0" name="robot0:wrist_flex_joint" range="-2.16 2.16"></joint>
 									<geom mesh="robot0:wrist_flex_link" name="robot0:wrist_flex_link" material="robot0:arm_mat" class="robot0:blue"></geom>
 									<body name="robot0:wrist_roll_link" pos="0.1385 0 0">
 										<inertial diaginertia="0.0001 0.0001 0.0001" mass="0.1354" pos="0.0095 0.0004 -0.0002"></inertial>
 										<joint axis="1 0 0" limited="false" name="robot0:wrist_roll_joint"></joint>
 										<geom mesh="robot0:wrist_roll_link" name="robot0:wrist_roll_link" material="robot0:arm_mat"></geom>
 										<body euler="0 0 0" name="robot0:gripper_link" pos="0.1664 0 0">
 											<inertial diaginertia="0.0024 0.0019 0.0013" mass="1.5175" pos="-0.09 -0.0001 -0.0017" quat="0 0.7071 0 0.7071"></inertial>
 											<geom mesh="robot0:gripper_link" name="robot0:gripper_link" material="robot0:gripper_mat"></geom>
 											<body name="robot0:gipper_camera_link" pos="0.055 0 0.0225">
 												<body name="robot0:gripper_camera_rgb_frame" pos="0 0.02 0">
 													<body name="robot0:gripper_camera_rgb_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
 														<camera euler="3.1415 0 0" fovy="50" name="gripper_camera_rgb" pos="0 0 0"></camera>
 													</body>
 												</body>
 												<body name="robot0:gripper_camera_depth_frame" pos="0 0.045 0">
 													<body name="robot0:gripper_camera_depth_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></body>
 												</body>
 											</body>
 											<body childclass="robot0:fetchGripper" name="robot0:r_gripper_finger_link" pos="0 0.0159 0">
 												<inertial diaginertia="0.1 0.1 0.1" mass="4" pos="-0.01 0 0"></inertial>
 												<joint axis="0 1 0" name="robot0:r_gripper_finger_joint" range="0 0.05"></joint>
 												<geom pos="0 -0.008 0" size="0.0385 0.007 0.0135" type="box" name="robot0:r_gripper_finger_link" material="robot0:gripper_finger_mat" condim="4" friction="1 0.05 0.01"></geom>
 											</body>
 											<body childclass="robot0:fetchGripper" name="robot0:l_gripper_finger_link" pos="0 -0.0159 0">
 												<inertial diaginertia="0.1 0.1 0.1" mass="4" pos="-0.01 0 0"></inertial>
 												<joint axis="0 -1 0" name="robot0:l_gripper_finger_joint" range="0 0.05"></joint>
 												<geom pos="0 0.008 0" size="0.0385 0.007 0.0135" type="box" name="robot0:l_gripper_finger_link" material="robot0:gripper_finger_mat" condim="4" friction="1 0.05 0.01"></geom>
 											</body>
 											<site name="robot0:grip" pos="0.02 0 0" rgba="0 0 0 0" size="0.02 0.02 0.02"></site>
 										</body>
 									</body>
 								</body>
 							</body>
 						</body>
 					</body>
 				</body>
 			</body>
 		</body>
 		<body name="robot0:estop_link" pos="-0.1246 0.2389 0.3113" quat="0.7071 0.7071 0 0">
 			<inertial diaginertia="0 0 0" mass="0.002" pos="0.0024 -0.0033 0.0067" quat="0.3774 -0.1814 0.1375 0.8977"></inertial>
 			<geom mesh="robot0:estop_link" rgba="0.8 0 0 1" name="robot0:estop_link"></geom>
 		</body>
 		<body name="robot0:laser_link" pos="0.235 0 0.2878" quat="0 1 0 0">
 			<inertial diaginertia="0 0 0" mass="0.0083" pos="-0.0306 0.0007 0.0552" quat="0.5878 0.5378 -0.4578 0.3945"></inertial>
 			<geom mesh="robot0:laser_link" rgba="0.7922 0.8196 0.9333 1" name="robot0:laser_link"></geom>
 			<camera euler="1.55 -1.55 3.14" fovy="25" name="lidar" pos="0 0 0.02"></camera>
 		</body>
 		<body name="robot0:torso_fixed_link" pos="-0.0869 0 0.3774">
 			<inertial diaginertia="0.3865 0.3394 0.1009" mass="13.2775" pos="-0.0722 0.0057 0.2656" quat="0.9995 0.0249 0.0177 0.011"></inertial>
 			<geom mesh="robot0:torso_fixed_link" name="robot0:torso_fixed_link" class="robot0:blue"></geom>
 		</body>
 		<body name="robot0:external_camera_body_0" pos="0 0 0">
 			<camera euler="0 0.75 1.57" fovy="43.3" name="external_camera_0" pos="1.3 0 1.2"></camera>
 		</body>
 	</body>
 </mujoco>
--- a/src/gym/envs/robotics/assets/fetch/shared.xml
+++ b/src/gym/envs/robotics/assets/fetch/shared.xml
@@ -0,0 +1,66 @@
 <mujoco>
    <asset>
        <texture type="skybox" builtin="gradient" rgb1="0.44 0.85 0.56" rgb2="0.46 0.87 0.58" width="32" height="32"></texture>
        <texture name="texture_block" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
        <material name="floor_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
        <material name="table_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.93 0.93 0.93 1"></material>
        <material name="block_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
        <material name="puck_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
        <material name="robot0:geomMat" shininess="0.03" specular="0.4"></material>
        <material name="robot0:gripper_finger_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
        <material name="robot0:gripper_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
        <material name="robot0:arm_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
        <material name="robot0:head_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
        <material name="robot0:torso_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
        <material name="robot0:base_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
        <mesh file="base_link_collision.stl" name="robot0:base_link"></mesh>
        <mesh file="bellows_link_collision.stl" name="robot0:bellows_link"></mesh>
        <mesh file="elbow_flex_link_collision.stl" name="robot0:elbow_flex_link"></mesh>
        <mesh file="estop_link.stl" name="robot0:estop_link"></mesh>
        <mesh file="forearm_roll_link_collision.stl" name="robot0:forearm_roll_link"></mesh>
        <mesh file="gripper_link.stl" name="robot0:gripper_link"></mesh>
        <mesh file="head_pan_link_collision.stl" name="robot0:head_pan_link"></mesh>
        <mesh file="head_tilt_link_collision.stl" name="robot0:head_tilt_link"></mesh>
        <mesh file="l_wheel_link_collision.stl" name="robot0:l_wheel_link"></mesh>
        <mesh file="laser_link.stl" name="robot0:laser_link"></mesh>
        <mesh file="r_wheel_link_collision.stl" name="robot0:r_wheel_link"></mesh>
        <mesh file="torso_lift_link_collision.stl" name="robot0:torso_lift_link"></mesh>
        <mesh file="shoulder_pan_link_collision.stl" name="robot0:shoulder_pan_link"></mesh>
        <mesh file="shoulder_lift_link_collision.stl" name="robot0:shoulder_lift_link"></mesh>
        <mesh file="upperarm_roll_link_collision.stl" name="robot0:upperarm_roll_link"></mesh>
        <mesh file="wrist_flex_link_collision.stl" name="robot0:wrist_flex_link"></mesh>
        <mesh file="wrist_roll_link_collision.stl" name="robot0:wrist_roll_link"></mesh>
        <mesh file="torso_fixed_link.stl" name="robot0:torso_fixed_link"></mesh>
    </asset>
    <equality>
        <weld body1="robot0:mocap" body2="robot0:gripper_link" solimp="0.9 0.95 0.001" solref="0.02 1"></weld>
    </equality>
    <contact>
        <exclude body1="robot0:r_gripper_finger_link" body2="robot0:l_gripper_finger_link"></exclude>
        <exclude body1="robot0:torso_lift_link" body2="robot0:torso_fixed_link"></exclude>
        <exclude body1="robot0:torso_lift_link" body2="robot0:shoulder_pan_link"></exclude>
    </contact>
    <default>
        <default class="robot0:fetch">
            <geom margin="0.001" material="robot0:geomMat" rgba="1 1 1 1" solimp="0.99 0.99 0.01" solref="0.01 1" type="mesh" user="0"></geom>
            <joint armature="1" damping="50" frictionloss="0" stiffness="0"></joint>
            <default class="robot0:fetchGripper">
                <geom condim="4" margin="0.001" type="box" user="0" rgba="0.356 0.361 0.376 1.0"></geom>
                <joint armature="100" damping="1000" limited="true" solimplimit="0.99 0.999 0.01" solreflimit="0.01 1" type="slide"></joint>
            </default>
            <default class="robot0:grey">
                <geom rgba="0.356 0.361 0.376 1.0"></geom>
            </default>
            <default class="robot0:blue">
                <geom rgba="0.086 0.506 0.767 1.0"></geom>
            </default>
        </default>
    </default>
 </mujoco>
--- a/src/gym/envs/robotics/assets/fetch/slide.xml
+++ b/src/gym/envs/robotics/assets/fetch/slide.xml
@@ -0,0 +1,32 @@
 <?xml version="1.0" encoding="utf-8"?>
 <mujoco>
 	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
 	<option timestep="0.002">
 		<flag warmstart="enable"></flag>
 	</option>
 	<include file="shared.xml"></include>
 	<worldbody>
 		<geom name="floor0" pos="1 0.75 0" size="1.05 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
 		<body name="floor0" pos="1 0.75 0">
 			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
 		</body>
 		<include file="robot.xml"></include>
 		<body name="table0" pos="1.32441906 0.75018422 0.2">
 			<geom size="0.625 0.45 0.2" type="box" condim="3" name="table0" material="table_mat" mass="2000" friction="0.1 0.005 0.0001"></geom>
 		</body>
 		<body name="object0" pos="0.025 0.025 0.02">
 			<joint name="object0:joint" type="free" damping="0.01"></joint>
 			<geom size="0.025 0.02" type="cylinder" condim="3" name="object0" material="puck_mat" friction="0.1 0.005 0.0001" mass="2"></geom>
 			<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
 		</body>
 		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
 	</worldbody>
 	<actuator></actuator>
 </mujoco>
--- a/src/gym/envs/robotics/assets/hand/manipulate_block.xml
+++ b/src/gym/envs/robotics/assets/hand/manipulate_block.xml
@@ -0,0 +1,41 @@
 <?xml version="1.0" encoding="utf-8"?>
 <mujoco>
    <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
    <option timestep="0.002" iterations="20" apirate="200">
        <flag warmstart="enable"></flag>
    </option>
    <include file="shared.xml"></include>
    <asset>
        <include file="shared_asset.xml"></include>
        <texture name="texture:object" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
        <texture name="texture:hidden" file="block_hidden.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
        <material name="material:object" texture="texture:object" specular="1" shininess="0.3" reflectance="0"></material>
        <material name="material:hidden" texture="texture:hidden" specular="1" shininess="0.3" reflectance="0"></material>
        <material name="material:target" texture="texture:object" specular="1" shininess="0.3" reflectance="0" rgba="1 1 1 0.5"></material>
    </asset>
    <worldbody>
        <geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
        <body name="floor0" pos="1 1 0"></body>
        <include file="robot.xml"></include>
        <body name="object" pos="1 0.87 0.2">
            <geom name="object" type="box" size="0.025 0.025 0.025" material="material:object" condim="4" density="567"></geom>
            <geom name="object_hidden" type="box" size="0.024 0.024 0.024" material="material:hidden" condim="4" contype="0" conaffinity="0" mass="0"></geom>
            <site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
            <joint name="object:joint" type="free" damping="0.01"></joint>
        </body>
        <body name="target" pos="1 0.87 0.2">
            <geom name="target" type="box" size="0.025 0.025 0.025" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
            <site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
            <joint name="target:joint" type="free" damping="0.01"></joint>
        </body>
        <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
    </worldbody>
 </mujoco>
--- a/src/gym/envs/robotics/assets/hand/manipulate_egg.xml
+++ b/src/gym/envs/robotics/assets/hand/manipulate_egg.xml
@@ -0,0 +1,40 @@
 <?xml version="1.0" encoding="utf-8"?>
 <mujoco>
    <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
    <option timestep="0.002" iterations="20" apirate="200">
        <flag warmstart="enable"></flag>
    </option>
    <include file="shared.xml"></include>
    <asset>
        <include file="shared_asset.xml"></include>
        <texture name="texture:object" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
        <texture name="texture:hidden" file="block_hidden.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
        <material name="material:object" texture="texture:object" specular="1" shininess="0.3" reflectance="0"></material>
        <material name="material:hidden" texture="texture:hidden" specular="1" shininess="0.3" reflectance="0"></material>
        <material name="material:target" texture="texture:object" specular="1" shininess="0.3" reflectance="0" rgba="1 1 1 0.5"></material>
    </asset>
    <worldbody>
        <geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
        <body name="floor0" pos="1 1 0"></body>
        <include file="robot.xml"></include>
        <body name="object" pos="1 0.87 0.2">
            <geom name="object" type="ellipsoid" size="0.03 0.03 0.04" material="material:object" condim="4"></geom>
            <geom name="object_hidden" type="ellipsoid" size="0.029 0.029 0.03" material="material:hidden" condim="4" contype="0" conaffinity="0" mass="0"></geom>
            <site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
            <joint name="object:joint" type="free" damping="0.01"></joint>
        </body>
        <body name="target" pos="1 0.87 0.2">
            <geom name="target" type="ellipsoid" size="0.03 0.03 0.04" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
            <site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
            <joint name="target:joint" type="free" damping="0.01"></joint>
        </body>
        <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
    </worldbody>
 </mujoco>
--- a/src/gym/envs/robotics/assets/hand/manipulate_pen.xml
+++ b/src/gym/envs/robotics/assets/hand/manipulate_pen.xml
@@ -0,0 +1,40 @@
 <?xml version="1.0" encoding="utf-8"?>
 <mujoco>
    <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
    <option timestep="0.002" iterations="20" apirate="200">
        <flag warmstart="enable"></flag>
    </option>
    <include file="shared.xml"></include>
    <asset>
        <include file="shared_asset.xml"></include>
        <material name="material:object" specular="0" shininess="0.5" reflectance="0.0" rgba="0.46 0.81 0.88 1.0"></material>
        <material name="material:target" specular="0" shininess="0.5" reflectance="0.0" rgba="0.46 0.81 0.88 0.5"></material>
    </asset>
    <worldbody>
        <geom name="floor0" pos="1 1 -0.2" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
        <body name="floor0" pos="1 1 0"></body>
        <include file="robot.xml"></include>
        <body name="object" pos="1 0.87 0.2" euler="-1 1 0">
            <geom name="object" type="capsule" size="0.008 0.1" material="material:object" condim="4"></geom>
            <site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
            <site name="object:top" pos="0 0 0.1" rgba="1 0 0 1" size="0.0081"></site>
            <site name="object:bottom" pos="0 0 -0.1" rgba="0 1 0 1" size="0.0081"></site>
            <joint name="object:joint" type="free" damping="0.01"></joint>
        </body>
        <body name="target" pos="1 0.87 0.2" euler="-1 1 0">
            <geom name="target" type="capsule" size="0.008 0.1" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
            <site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
            <site name="target:top" pos="0 0 0.1" rgba="1 0 0 0.5" size="0.0081"></site>
            <site name="target:bottom" pos="0 0 -0.1" rgba="0 1 0 0.5" size="0.0081"></site>
            <joint name="target:joint" type="free" damping="0.01"></joint>
        </body>
        <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
    </worldbody>
 </mujoco>
--- a/src/gym/envs/robotics/assets/hand/reach.xml
+++ b/src/gym/envs/robotics/assets/hand/reach.xml
@@ -0,0 +1,34 @@
 <?xml version="1.0" encoding="utf-8"?>
 <mujoco>
    <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
    <option timestep="0.002" iterations="20" apirate="200">
        <flag warmstart="enable"></flag>
    </option>
    <include file="shared.xml"></include>
    <asset>
        <include file="shared_asset.xml"></include>
    </asset>
    <worldbody>
        <geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
        <body name="floor0" pos="1 1 0">
            <site name="target0" pos="0 0 0" size="0.005" rgba="1 0 0 1" type="sphere"></site>
            <site name="target1" pos="0 0 0" size="0.005" rgba="0 1 0 1" type="sphere"></site>
            <site name="target2" pos="0 0 0" size="0.005" rgba="0 0 1 1" type="sphere"></site>
            <site name="target3" pos="0 0 0" size="0.005" rgba="1 1 0 1" type="sphere"></site>
            <site name="target4" pos="0 0 0" size="0.005" rgba="1 0 1 1" type="sphere"></site>
            <site name="finger0" pos="0 0 0" size="0.01" rgba="1 0 0 0.2" type="sphere"></site>
            <site name="finger1" pos="0 0 0" size="0.01" rgba="0 1 0 0.2" type="sphere"></site>
            <site name="finger2" pos="0 0 0" size="0.01" rgba="0 0 1 0.2" type="sphere"></site>
            <site name="finger3" pos="0 0 0" size="0.01" rgba="1 1 0 0.2" type="sphere"></site>
            <site name="finger4" pos="0 0 0" size="0.01" rgba="1 0 1 0.2" type="sphere"></site>
        </body>
        <include file="robot.xml"></include>
        <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
    </worldbody>
 </mujoco>
--- a/src/gym/envs/robotics/assets/hand/robot.xml
+++ b/src/gym/envs/robotics/assets/hand/robot.xml
@@ -0,0 +1,160 @@
 <!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
 <mujoco>
    <body name="robot0:hand mount" pos="1 1.25 0.15" euler="1.5708 0 3.14159">
        <inertial mass="0.1" pos="0 0 0" diaginertia="0.001 0.001 0.001"></inertial>
        <body childclass="robot0:asset_class" name="robot0:forearm" pos="0 0.01 0" euler="0 0 0">
            <inertial pos="0.001 -0.002 0.29" quat="0.982 -0.016 0 -0.188" mass="4" diaginertia="0.01 0.01 0.0075"></inertial>
            <geom class="robot0:D_Vizual" pos="0 0.01 0.04" name="robot0:V_forearm" mesh="robot0:forearm" euler="0 0 1.57"></geom>
            <geom class="robot0:DC_Hand" name="robot0:C_forearm" type="mesh" mesh="robot0:forearm_cvx" pos="0 0.01 0.04" euler="0 0 1.57" rgba="0.4 0.5 0.6 0.7"></geom>
            <body name="robot0:wrist" pos="0 0 0.256">
                <inertial pos="0.003 0 0.016" quat="0.504 0.496 0.495 0.504" mass="0.3" diaginertia="0.001 0.001 0.001"></inertial>
                <joint name="robot0:WRJ1" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.489 0.14" damping="0.5" armature="0.005" user="1123"></joint>
                <geom class="robot0:D_Vizual" name="robot0:V_wrist" mesh="robot0:wrist"></geom>
                <geom class="robot0:DC_Hand" name="robot0:C_wrist" type="capsule" pos="0 0 0" quat="0.707 0.707 0 0" size="0.015 0.01" rgba="0.4 0.5 0.6 0.1"></geom>
                <body name="robot0:palm" pos="0 0 0.034">
                    <inertial pos="0.006 0 0.036" quat="0.716 0.044 0.075 0.693" mass="0.3" diaginertia="0.001 0.001 0.001"></inertial>
                    <joint name="robot0:WRJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="-0.698 0.489" damping="0.5" armature="0.005" user="1122"></joint>
                    <geom class="robot0:D_Vizual" name="robot0:V_palm" mesh="robot0:palm"></geom>
                    <geom class="robot0:DC_Hand" name="robot0:C_palm0" type="box" pos="0.011 0 0.038" size="0.032 0.0111 0.049" rgba="0.4 0.5 0.6 0.1"></geom>
                    <geom class="robot0:DC_Hand" name="robot0:C_palm1" type="box" pos="-0.032 0 0.014" size="0.011 0.0111 0.025" rgba="0.4 0.5 0.6 0.1"></geom>
                    <body name="robot0:ffknuckle" pos="0.033 0 0.095">
                        <inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
                        <joint name="robot0:FFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1103"></joint>
                        <geom class="robot0:D_Vizual" name="robot0:V_ffknuckle" mesh="robot0:knuckle"></geom>
                        <body name="robot0:ffproximal" pos="0 0 0">
                            <inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
                            <joint name="robot0:FFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1102"></joint>
                            <geom class="robot0:D_Vizual" name="robot0:V_ffproximal" mesh="robot0:F3"></geom>
                            <geom class="robot0:DC_Hand" name="robot0:C_ffproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
                            <body name="robot0:ffmiddle" pos="0 0 0.045">
                                <inertial pos="0 0 0.011" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                <joint name="robot0:FFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1101"></joint>
                                <geom class="robot0:D_Vizual" name="robot0:V_ffmiddle" mesh="robot0:F2"></geom>
                                <geom class="robot0:DC_Hand" name="robot0:C_ffmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
                                <body name="robot0:ffdistal" pos="0 0 0.025">
                                    <inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                    <joint name="robot0:FFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1100"></joint>
                                    <geom class="robot0:D_Vizual" name="robot0:V_ffdistal" pos="0 0 0.001" mesh="robot0:F1"></geom>
                                    <geom class="robot0:DC_Hand" name="robot0:C_ffdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
                                    <site name="robot0:S_fftip" pos="0 0 0.026" group="3"></site>
                                    <site class="robot0:D_Touch" name="robot0:Tch_fftip"></site>
                                </body>
                            </body>
                        </body>
                    </body>
                    <body name="robot0:mfknuckle" pos="0.011 0 0.099">
                        <inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
                        <joint name="robot0:MFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1107"></joint>
                        <geom class="robot0:D_Vizual" name="robot0:V_mfknuckle" mesh="robot0:knuckle"></geom>
                        <body name="robot0:mfproximal" pos="0 0 0">
                            <inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
                            <joint name="robot0:MFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1106"></joint>
                            <geom class="robot0:D_Vizual" name="robot0:V_mfproximal" mesh="robot0:F3"></geom>
                            <geom class="robot0:DC_Hand" name="robot0:C_mfproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
                            <body name="robot0:mfmiddle" pos="0 0 0.045">
                                <inertial pos="0 0 0.012" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                <joint name="robot0:MFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1105"></joint>
                                <geom class="robot0:D_Vizual" name="robot0:V_mfmiddle" mesh="robot0:F2"></geom>
                                <geom class="robot0:DC_Hand" name="robot0:C_mfmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
                                <body name="robot0:mfdistal" pos="0 0 0.025">
                                    <inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                    <joint name="robot0:MFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1104"></joint>
                                    <geom class="robot0:D_Vizual" name="robot0:V_mfdistal" mesh="robot0:F1"></geom>
                                    <geom class="robot0:DC_Hand" name="robot0:C_mfdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
                                    <site name="robot0:S_mftip" pos="0 0 0.026" group="3"></site>
                                    <site class="robot0:D_Touch" name="robot0:Tch_mftip"></site>
                                </body>
                            </body>
                        </body>
                    </body>
                    <body name="robot0:rfknuckle" pos="-0.011 0 0.095">
                        <inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
                        <joint name="robot0:RFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1111"></joint>
                        <geom class="robot0:D_Vizual" name="robot0:V_rfknuckle" mesh="robot0:knuckle"></geom>
                        <body name="robot0:rfproximal" pos="0 0 0">
                            <inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
                            <joint name="robot0:RFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1110"></joint>
                            <geom class="robot0:D_Vizual" name="robot0:V_rfproximal" mesh="robot0:F3"></geom>
                            <geom class="robot0:DC_Hand" name="robot0:C_rfproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
                            <body name="robot0:rfmiddle" pos="0 0 0.045">
                                <inertial pos="0 0 0.012" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                <joint name="robot0:RFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1109"></joint>
                                <geom class="robot0:D_Vizual" name="robot0:V_rfmiddle" mesh="robot0:F2"></geom>
                                <geom class="robot0:DC_Hand" name="robot0:C_rfmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
                                <body name="robot0:rfdistal" pos="0 0 0.025">
                                    <inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                    <joint name="robot0:RFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1108"></joint>
                                    <geom class="robot0:D_Vizual" name="robot0:V_rfdistal" mesh="robot0:F1" pos="0 0 0.001"></geom>
                                    <geom class="robot0:DC_Hand" name="robot0:C_rfdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
                                    <site name="robot0:S_rftip" pos="0 0 0.026" group="3"></site>
                                    <site class="robot0:D_Touch" name="robot0:Tch_rftip"></site>
                                </body>
                            </body>
                        </body>
                    </body>
                    <body name="robot0:lfmetacarpal" pos="-0.017 0 0.044">
                        <inertial pos="-0.014 0.001 0.014" quat="0.709 -0.092 -0.063 0.696" mass="0.075" diaginertia="1e-05 1e-05 1e-05"></inertial>
                        <joint name="robot0:LFJ4" type="hinge" pos="0 0 0" axis="0.571 0 0.821" range="0 0.785" user="1116"></joint>
                        <geom class="robot0:D_Vizual" name="robot0:V_lfmetacarpal" pos="-0.016 0 -0.023" mesh="robot0:lfmetacarpal"></geom>
                        <geom class="robot0:DC_Hand" name="robot0:C_lfmetacarpal" type="box" pos="-0.0165 0 0.01" size="0.0095 0.0111 0.025" rgba="0.4 0.5 0.6 0.2"></geom>
                        <body name="robot0:lfknuckle" pos="-0.017 0 0.044">
                            <inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
                            <joint name="robot0:LFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1115"></joint>
                            <geom class="robot0:D_Vizual" name="robot0:V_lfknuckle" mesh="robot0:knuckle"></geom>
                            <body name="robot0:lfproximal" pos="0 0 0">
                                <inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                <joint name="robot0:LFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1114"></joint>
                                <geom class="robot0:D_Vizual" name="robot0:V_lfproximal" mesh="robot0:F3"></geom>
                                <geom class="robot0:DC_Hand" name="robot0:C_lfproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
                                <body name="robot0:lfmiddle" pos="0 0 0.045">
                                    <inertial pos="0 0 0.012" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                    <joint name="robot0:LFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1113"></joint>
                                    <geom class="robot0:D_Vizual" name="robot0:V_lfmiddle" mesh="robot0:F2"></geom>
                                    <geom class="robot0:DC_Hand" name="robot0:C_lfmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
                                    <body name="robot0:lfdistal" pos="0 0 0.025">
                                        <inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                        <joint name="robot0:LFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1112"></joint>
                                        <geom class="robot0:D_Vizual" name="robot0:V_lfdistal" mesh="robot0:F1" pos="0 0 0.001"></geom>
                                        <geom class="robot0:DC_Hand" name="robot0:C_lfdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
                                        <site name="robot0:S_lftip" pos="0 0 0.026" group="3"></site>
                                        <site class="robot0:D_Touch" name="robot0:Tch_lftip"></site>
                                    </body>
                                </body>
                            </body>
                        </body>
                    </body>
                    <body name="robot0:thbase" pos="0.034 -0.009 0.029" axisangle="0 1 0 0.785">
                        <inertial pos="0 0 0" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
                        <joint name="robot0:THJ4" type="hinge" pos="0 0 0" axis="0 0 -1" range="-1.047 1.047" user="1121"></joint>
                        <geom name="robot0:V_thbase" type="box" group="1" pos="0 0 0" size="0.001 0.001 0.001"></geom>
                        <body name="robot0:thproximal" pos="0 0 0">
                            <inertial pos="0 0 0.017" quat="0.982 0 0.001 0.191" mass="0.016" diaginertia="1e-05 1e-05 1e-05"></inertial>
                            <joint name="robot0:THJ3" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.222" user="1120"></joint>
                            <geom class="robot0:D_Vizual" name="robot0:V_thproximal" mesh="robot0:TH3_z"></geom>
                            <geom class="robot0:DC_Hand" name="robot0:C_thproximal" type="capsule" pos="0 0 0.019" size="0.013 0.019" rgba="0.4 0.5 0.6 0.1"></geom>
                            <body name="robot0:thhub" pos="0 0 0.038">
                                <inertial pos="0 0 0" mass="0.002" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                <joint name="robot0:THJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="-0.209 0.209" user="1119"></joint>
                                <geom name="robot0:V_thhub" type="box" group="1" pos="0 0 0" size="0.001 0.001 0.001"></geom>
                                <body name="robot0:thmiddle" pos="0 0 0">
                                    <inertial pos="0 0 0.016" quat="1 -0.001 -0.007 0.003" mass="0.016" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                    <joint name="robot0:THJ1" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.524 0.524" user="1118"></joint>
                                    <geom class="robot0:D_Vizual" name="robot0:V_thmiddle" mesh="robot0:TH2_z"></geom>
                                    <geom class="robot0:DC_Hand" name="robot0:C_thmiddle" type="capsule" pos="0 0 0.016" size="0.011 0.016"></geom>
                                    <body name="robot0:thdistal" pos="0 0 0.032">
                                        <inertial pos="0 0 0.016" quat="0.999 -0.005 -0.047 0.005" mass="0.016" diaginertia="1e-05 1e-05 1e-05"></inertial>
                                        <joint name="robot0:THJ0" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.571 0" user="1117"></joint>
                                        <geom class="robot0:D_Vizual" name="robot0:V_thdistal" mesh="robot0:TH1_z"></geom>
                                        <geom class="robot0:DC_Hand" name="robot0:C_thdistal" type="capsule" pos="0 0 0.013" size="0.00918 0.013" condim="4"></geom>
                                        <site name="robot0:S_thtip" pos="0 0 0.0275" group="3"></site>
                                        <site class="robot0:D_Touch" name="robot0:Tch_thtip" size="0.005 0.011 0.016" pos="-0.005 0 0.02"></site>
                                    </body>
                                </body>
                            </body>
                        </body>
                    </body>
                </body>
            </body>
        </body>
    </body>
 </mujoco>
--- a/src/gym/envs/robotics/assets/hand/shared.xml
+++ b/src/gym/envs/robotics/assets/hand/shared.xml
@@ -0,0 +1,254 @@
 <!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
 <mujoco>
    <size njmax="500" nconmax="100" nuser_jnt="1" nuser_site="1" nuser_tendon="1" nuser_sensor="1" nuser_actuator="16" nstack="600000"></size>
    <visual>
        <map fogstart="3" fogend="5" force="0.1"></map>
        <quality shadowsize="4096"></quality>
    </visual>
    <default>
        <default class="robot0:asset_class">
            <geom friction="1 0.005 0.001" condim="3" margin="0.0005" contype="1" conaffinity="1"></geom>
            <joint limited="true" damping="0.1" armature="0.001" margin="0.01" frictionloss="0.001"></joint>
            <site size="0.005" rgba="0.4 0.9 0.4 1"></site>
            <general ctrllimited="true" forcelimited="true"></general>
        </default>
        <default class="robot0:D_Touch">
            <site type="box" size="0.009 0.004 0.013" pos="0 -0.004 0.018" rgba="0.8 0.8 0.8 0.15" group="4"></site>
        </default>
        <default class="robot0:DC_Hand">
            <geom material="robot0:MatColl" contype="1" conaffinity="0" group="4"></geom>
        </default>
        <default class="robot0:D_Vizual">
            <geom material="robot0:MatViz" contype="0" conaffinity="0" group="1" type="mesh"></geom>
        </default>
        <default class="robot0:free">
            <joint type="free" damping="0" armature="0" limited="false"></joint>
        </default>
    </default>
    <contact>
        <pair geom1="robot0:C_ffdistal" geom2="robot0:C_thdistal" condim="1"></pair>
        <pair geom1="robot0:C_ffmiddle" geom2="robot0:C_thdistal" condim="1"></pair>
        <pair geom1="robot0:C_ffproximal" geom2="robot0:C_thdistal" condim="1"></pair>
        <pair geom1="robot0:C_mfproximal" geom2="robot0:C_thdistal" condim="1"></pair>
        <pair geom1="robot0:C_mfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
        <pair geom1="robot0:C_rfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
        <pair geom1="robot0:C_palm0" geom2="robot0:C_thdistal" condim="1"></pair>
        <pair geom1="robot0:C_mfdistal" geom2="robot0:C_ffdistal" condim="1"></pair>
        <pair geom1="robot0:C_rfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
        <pair geom1="robot0:C_mfproximal" geom2="robot0:C_ffproximal" condim="1"></pair>
        <pair geom1="robot0:C_rfproximal" geom2="robot0:C_mfproximal" condim="1"></pair>
        <pair geom1="robot0:C_lfproximal" geom2="robot0:C_rfproximal" condim="1"></pair>
        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfmiddle" condim="1"></pair>
        <pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfdistal" condim="1"></pair>
        <pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfmiddle" condim="1"></pair>
    </contact>
    <tendon>
        <fixed name="robot0:T_WRJ1r" limited="true" range="-0.032 0.032" user="1236">
            <joint joint="robot0:WRJ1" coef="0.0325"></joint>
        </fixed>
        <fixed name="robot0:T_WRJ1l" limited="true" range="-0.032 0.032" user="1237">
            <joint joint="robot0:WRJ1" coef="-0.0325"></joint>
        </fixed>
        <fixed name="robot0:T_WRJ0u" limited="true" range="-0.032 0.032" user="1236">
            <joint joint="robot0:WRJ0" coef="0.0175"></joint>
        </fixed>
        <fixed name="robot0:T_WRJ0d" limited="true" range="-0.032 0.032" user="1237">
            <joint joint="robot0:WRJ0" coef="-0.0175"></joint>
        </fixed>
        <fixed name="robot0:T_FFJ3r" limited="true" range="-0.018 0.018" user="1204">
            <joint joint="robot0:FFJ3" coef="0.01"></joint>
        </fixed>
        <fixed name="robot0:T_FFJ3l" limited="true" range="-0.018 0.018" user="1205">
            <joint joint="robot0:FFJ3" coef="-0.01"></joint>
        </fixed>
        <fixed name="robot0:T_FFJ2u" limited="true" range="-0.007 0.03" user="1202">
            <joint joint="robot0:FFJ2" coef="0.01"></joint>
        </fixed>
        <fixed name="robot0:T_FFJ2d" limited="true" range="-0.03 0.007" user="1203">
            <joint joint="robot0:FFJ2" coef="-0.01"></joint>
        </fixed>
        <fixed name="robot0:T_FFJ1c" limited="true" range="-0.001 0.001">
            <joint joint="robot0:FFJ0" coef="0.00705"></joint>
            <joint joint="robot0:FFJ1" coef="-0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_FFJ1u" limited="true" range="-0.007 0.03" user="1200">
            <joint joint="robot0:FFJ0" coef="0.00705"></joint>
            <joint joint="robot0:FFJ1" coef="0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_FFJ1d" limited="true" range="-0.03 0.007" user="1201">
            <joint joint="robot0:FFJ0" coef="-0.00705"></joint>
            <joint joint="robot0:FFJ1" coef="-0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_MFJ3r" limited="true" range="-0.018 0.018" user="1210">
            <joint joint="robot0:MFJ3" coef="0.01"></joint>
        </fixed>
        <fixed name="robot0:T_MFJ3l" limited="true" range="-0.018 0.018" user="1211">
            <joint joint="robot0:MFJ3" coef="-0.01"></joint>
        </fixed>
        <fixed name="robot0:T_MFJ2u" limited="true" range="-0.007 0.03" user="1208">
            <joint joint="robot0:MFJ2" coef="0.01"></joint>
        </fixed>
        <fixed name="robot0:T_MFJ2d" limited="true" range="-0.03 0.007" user="1209">
            <joint joint="robot0:MFJ2" coef="-0.01"></joint>
        </fixed>
        <fixed name="robot0:T_MFJ1c" limited="true" range="-0.001 0.001">
            <joint joint="robot0:MFJ0" coef="0.00705"></joint>
            <joint joint="robot0:MFJ1" coef="-0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_MFJ1u" limited="true" range="-0.007 0.03" user="1206">
            <joint joint="robot0:MFJ0" coef="0.00705"></joint>
            <joint joint="robot0:MFJ1" coef="0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_MFJ1d" limited="true" range="-0.03 0.007" user="1207">
            <joint joint="robot0:MFJ0" coef="-0.00705"></joint>
            <joint joint="robot0:MFJ1" coef="-0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_RFJ3r" limited="true" range="-0.018 0.018" user="1216">
            <joint joint="robot0:RFJ3" coef="0.01"></joint>
        </fixed>
        <fixed name="robot0:T_RFJ3l" limited="true" range="-0.018 0.018" user="1217">
            <joint joint="robot0:RFJ3" coef="-0.01"></joint>
        </fixed>
        <fixed name="robot0:T_RFJ2u" limited="true" range="-0.007 0.03" user="1214">
            <joint joint="robot0:RFJ2" coef="0.01"></joint>
        </fixed>
        <fixed name="robot0:T_RFJ2d" limited="true" range="-0.03 0.007" user="1215">
            <joint joint="robot0:RFJ2" coef="-0.01"></joint>
        </fixed>
        <fixed name="robot0:T_RFJ1c" limited="true" range="-0.001 0.001">
            <joint joint="robot0:RFJ0" coef="0.00705"></joint>
            <joint joint="robot0:RFJ1" coef="-0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_RFJ1u" limited="true" range="-0.007 0.03" user="1212">
            <joint joint="robot0:RFJ0" coef="0.00705"></joint>
            <joint joint="robot0:RFJ1" coef="0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_RFJ1d" limited="true" range="-0.03 0.007" user="1213">
            <joint joint="robot0:RFJ0" coef="-0.00705"></joint>
            <joint joint="robot0:RFJ1" coef="-0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_LFJ4u" limited="true" range="-0.007 0.03" user="1224">
            <joint joint="robot0:LFJ4" coef="0.01"></joint>
        </fixed>
        <fixed name="robot0:T_LFJ4d" limited="true" range="-0.03 0.007" user="1225">
            <joint joint="robot0:LFJ4" coef="-0.01"></joint>
        </fixed>
        <fixed name="robot0:T_LFJ3r" limited="true" range="-0.018 0.018" user="1222">
            <joint joint="robot0:LFJ3" coef="0.01"></joint>
        </fixed>
        <fixed name="robot0:T_LFJ3l" limited="true" range="-0.018 0.018" user="1223">
            <joint joint="robot0:LFJ3" coef="-0.01"></joint>
        </fixed>
        <fixed name="robot0:T_LFJ2u" limited="true" range="-0.007 0.03" user="1220">
            <joint joint="robot0:LFJ2" coef="0.01"></joint>
        </fixed>
        <fixed name="robot0:T_LFJ2d" limited="true" range="-0.03 0.007" user="1221">
            <joint joint="robot0:LFJ2" coef="-0.01"></joint>
        </fixed>
        <fixed name="robot0:T_LFJ1c" limited="true" range="-0.001 0.001">
            <joint joint="robot0:LFJ0" coef="0.00705"></joint>
            <joint joint="robot0:LFJ1" coef="-0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_LFJ1u" limited="true" range="-0.007 0.03" user="1218">
            <joint joint="robot0:LFJ0" coef="0.00705"></joint>
            <joint joint="robot0:LFJ1" coef="0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_LFJ1d" limited="true" range="-0.03 0.007" user="1219">
            <joint joint="robot0:LFJ0" coef="-0.00705"></joint>
            <joint joint="robot0:LFJ1" coef="-0.00805"></joint>
        </fixed>
        <fixed name="robot0:T_THJ4a" limited="true" range="-0.018 0.018" user="1234">
            <joint joint="robot0:THJ4" coef="0.01636"></joint>
        </fixed>
        <fixed name="robot0:T_THJ4c" limited="true" range="-0.018 0.018" user="1235">
            <joint joint="robot0:THJ4" coef="-0.01636"></joint>
        </fixed>
        <fixed name="robot0:T_THJ3u" limited="true" range="-0.007 0.03" user="1232">
            <joint joint="robot0:THJ3" coef="0.01"></joint>
        </fixed>
        <fixed name="robot0:T_THJ3d" limited="true" range="-0.03 0.007" user="1233">
            <joint joint="robot0:THJ3" coef="-0.01"></joint>
        </fixed>
        <fixed name="robot0:T_THJ2u" limited="true" range="-0.018 0.018" user="1230">
            <joint joint="robot0:THJ2" coef="0.011"></joint>
        </fixed>
        <fixed name="robot0:T_THJ2d" limited="true" range="-0.018 0.018" user="1231">
            <joint joint="robot0:THJ2" coef="-0.011"></joint>
        </fixed>
        <fixed name="robot0:T_THJ1r" limited="true" range="-0.018 0.018" user="1228">
            <joint joint="robot0:THJ1" coef="0.011"></joint>
        </fixed>
        <fixed name="robot0:T_THJ1l" limited="true" range="-0.018 0.018" user="1229">
            <joint joint="robot0:THJ1" coef="-0.011"></joint>
        </fixed>
        <fixed name="robot0:T_THJ0r" limited="true" range="-0.03 0.007" user="1226">
            <joint joint="robot0:THJ0" coef="0.009"></joint>
        </fixed>
        <fixed name="robot0:T_THJ0l" limited="true" range="-0.007 0.03" user="1227">
            <joint joint="robot0:THJ0" coef="-0.009"></joint>
        </fixed>
    </tendon>
    <sensor>
        <jointpos name="robot0:Sjp_WRJ1" joint="robot0:WRJ1"></jointpos>
        <jointpos name="robot0:Sjp_WRJ0" joint="robot0:WRJ0"></jointpos>
        <jointpos name="robot0:Sjp_FFJ3" joint="robot0:FFJ3"></jointpos>
        <jointpos name="robot0:Sjp_FFJ2" joint="robot0:FFJ2"></jointpos>
        <jointpos name="robot0:Sjp_FFJ1" joint="robot0:FFJ1"></jointpos>
        <jointpos name="robot0:Sjp_FFJ0" joint="robot0:FFJ0"></jointpos>
        <jointpos name="robot0:Sjp_MFJ3" joint="robot0:MFJ3"></jointpos>
        <jointpos name="robot0:Sjp_MFJ2" joint="robot0:MFJ2"></jointpos>
        <jointpos name="robot0:Sjp_MFJ1" joint="robot0:MFJ1"></jointpos>
        <jointpos name="robot0:Sjp_MFJ0" joint="robot0:MFJ0"></jointpos>
        <jointpos name="robot0:Sjp_RFJ3" joint="robot0:RFJ3"></jointpos>
        <jointpos name="robot0:Sjp_RFJ2" joint="robot0:RFJ2"></jointpos>
        <jointpos name="robot0:Sjp_RFJ1" joint="robot0:RFJ1"></jointpos>
        <jointpos name="robot0:Sjp_RFJ0" joint="robot0:RFJ0"></jointpos>
        <jointpos name="robot0:Sjp_LFJ4" joint="robot0:LFJ4"></jointpos>
        <jointpos name="robot0:Sjp_LFJ3" joint="robot0:LFJ3"></jointpos>
        <jointpos name="robot0:Sjp_LFJ2" joint="robot0:LFJ2"></jointpos>
        <jointpos name="robot0:Sjp_LFJ1" joint="robot0:LFJ1"></jointpos>
        <jointpos name="robot0:Sjp_LFJ0" joint="robot0:LFJ0"></jointpos>
        <jointpos name="robot0:Sjp_THJ4" joint="robot0:THJ4"></jointpos>
        <jointpos name="robot0:Sjp_THJ3" joint="robot0:THJ3"></jointpos>
        <jointpos name="robot0:Sjp_THJ2" joint="robot0:THJ2"></jointpos>
        <jointpos name="robot0:Sjp_THJ1" joint="robot0:THJ1"></jointpos>
        <jointpos name="robot0:Sjp_THJ0" joint="robot0:THJ0"></jointpos>
        <touch name="robot0:ST_Tch_fftip" site="robot0:Tch_fftip"></touch>
        <touch name="robot0:ST_Tch_mftip" site="robot0:Tch_mftip"></touch>
        <touch name="robot0:ST_Tch_rftip" site="robot0:Tch_rftip"></touch>
        <touch name="robot0:ST_Tch_lftip" site="robot0:Tch_lftip"></touch>
        <touch name="robot0:ST_Tch_thtip" site="robot0:Tch_thtip"></touch>
    </sensor>
    <actuator>
        <position name="robot0:A_WRJ1" class="robot0:asset_class" user="2038" joint="robot0:WRJ1" ctrlrange="-0.489 0.14" kp="5" forcerange="-4.785 4.785"></position>
        <position name="robot0:A_WRJ0" class="robot0:asset_class" user="2036" joint="robot0:WRJ0" ctrlrange="-0.698 0.489" kp="5" forcerange="-2.175 2.175"></position>
        <position name="robot0:A_FFJ3" class="robot0:asset_class" user="2004" joint="robot0:FFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
        <position name="robot0:A_FFJ2" class="robot0:asset_class" user="2002" joint="robot0:FFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
        <position name="robot0:A_FFJ1" class="robot0:asset_class" user="2000" joint="robot0:FFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
        <position name="robot0:A_MFJ3" class="robot0:asset_class" user="2010" joint="robot0:MFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
        <position name="robot0:A_MFJ2" class="robot0:asset_class" user="2008" joint="robot0:MFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
        <position name="robot0:A_MFJ1" class="robot0:asset_class" user="2006" joint="robot0:MFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
        <position name="robot0:A_RFJ3" class="robot0:asset_class" user="2016" joint="robot0:RFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
        <position name="robot0:A_RFJ2" class="robot0:asset_class" user="2014" joint="robot0:RFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
        <position name="robot0:A_RFJ1" class="robot0:asset_class" user="2012" joint="robot0:RFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
        <position name="robot0:A_LFJ4" class="robot0:asset_class" user="2024" joint="robot0:LFJ4" ctrlrange="0 0.785" kp="1" forcerange="-0.9 0.9"></position>
        <position name="robot0:A_LFJ3" class="robot0:asset_class" user="2022" joint="robot0:LFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
        <position name="robot0:A_LFJ2" class="robot0:asset_class" user="2020" joint="robot0:LFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
        <position name="robot0:A_LFJ1" class="robot0:asset_class" user="2018" joint="robot0:LFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
        <position name="robot0:A_THJ4" class="robot0:asset_class" user="2034" joint="robot0:THJ4" ctrlrange="-1.047 1.047" kp="1" forcerange="-2.3722 2.3722"></position>
        <position name="robot0:A_THJ3" class="robot0:asset_class" user="2032" joint="robot0:THJ3" ctrlrange="0 1.222" kp="1" forcerange="-1.45 1.45"></position>
        <position name="robot0:A_THJ2" class="robot0:asset_class" user="2030" joint="robot0:THJ2" ctrlrange="-0.209 0.209" kp="1" forcerange="-0.99 0.99"></position>
        <position name="robot0:A_THJ1" class="robot0:asset_class" user="2028" joint="robot0:THJ1" ctrlrange="-0.524 0.524" kp="1" forcerange="-0.99 0.99"></position>
        <position name="robot0:A_THJ0" class="robot0:asset_class" user="2026" joint="robot0:THJ0" ctrlrange="-1.571 0" kp="1" forcerange="-0.81 0.81"></position>
    </actuator>
 </mujoco>
--- a/src/gym/envs/robotics/assets/hand/shared_asset.xml
+++ b/src/gym/envs/robotics/assets/hand/shared_asset.xml
@@ -0,0 +1,26 @@
 <!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
 <mujoco>
    <texture type="skybox" builtin="gradient" rgb1="0.44 0.85 0.56" rgb2="0.46 0.87 0.58" width="32" height="32"></texture>
    <texture name="robot0:texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.15 0.2" width="512" height="512"></texture>
    <texture name="robot0:texgeom" type="cube" builtin="flat" mark="cross" width="127" height="127" rgb1="0.3 0.6 0.5" rgb2="0.3 0.6 0.5" markrgb="0 0 0" random="0.01"></texture>
    <material name="robot0:MatGnd" reflectance="0.5" texture="robot0:texplane" texrepeat="1 1" texuniform="true"></material>
    <material name="robot0:MatColl" specular="1" shininess="0.3" reflectance="0.5" rgba="0.4 0.5 0.6 1"></material>
    <material name="robot0:MatViz" specular="0.75" shininess="0.1" reflectance="0.5" rgba="0.93 0.93 0.93 1"></material>
    <material name="robot0:object" texture="robot0:texgeom" texuniform="false"></material>
    <material name="floor_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 0"></material>
    <mesh name="robot0:forearm" file="forearm_electric.stl"></mesh>
    <mesh name="robot0:forearm_cvx" file="forearm_electric_cvx.stl"></mesh>
    <mesh name="robot0:wrist" scale="0.001 0.001 0.001" file="wrist.stl"></mesh>
    <mesh name="robot0:palm" scale="0.001 0.001 0.001" file="palm.stl"></mesh>
    <mesh name="robot0:knuckle" scale="0.001 0.001 0.001" file="knuckle.stl"></mesh>
    <mesh name="robot0:F3" scale="0.001 0.001 0.001" file="F3.stl"></mesh>
    <mesh name="robot0:F2" scale="0.001 0.001 0.001" file="F2.stl"></mesh>
    <mesh name="robot0:F1" scale="0.001 0.001 0.001" file="F1.stl"></mesh>
    <mesh name="robot0:lfmetacarpal" scale="0.001 0.001 0.001" file="lfmetacarpal.stl"></mesh>
    <mesh name="robot0:TH3_z" scale="0.001 0.001 0.001" file="TH3_z.stl"></mesh>
    <mesh name="robot0:TH2_z" scale="0.001 0.001 0.001" file="TH2_z.stl"></mesh>
    <mesh name="robot0:TH1_z" scale="0.001 0.001 0.001" file="TH1_z.stl"></mesh>
 </mujoco>
--- a/src/gym/envs/robotics/assets/stls/.get
+++ b/src/gym/envs/robotics/assets/stls/.get
--- a/src/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/estop_link.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/estop_link.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/gripper_link.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/gripper_link.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1 @@`
							`from gym.envs.atari.atari_env import AtariEnv`
		`@@ -0,0 +1 @@`
							`from gym.envs.pilesos.pilesos import PilesosEnv`