git reimport

2019-03-15 13:54:11 +04:00
commit 0c6baed398
232 changed files with 14071 additions and 0 deletions
--- a/14
+++ b/14
@@ -0,0 +1,14 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+numpy = "*"
+keras = "*"
+keras-rl = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.7"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -0,0 +1,175 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "a227a69980d84f05f5a653bcc6f1cc461e95f114074a0baae20d457cc84e7ebc"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.7"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "h5py": {
+            "hashes": [
+                "sha256:0f8cd2acbacf3177b4427ed42639c911667b1f24d923388ab1f8ad466a12be5e",
+                "sha256:11277e3879098f921ee9e29105b20591e1dfdd44963357399f2abaa1a280c560",
+                "sha256:1241dec0c94ac32f3285cac1d6f44beabf80423e422ab03bd2686d731a8a9294",
+                "sha256:17b8187de0b3a945d8e8d031e7eb6ece2fce90791f9c5fde36f4396bf38fdde1",
+                "sha256:2f30007d0796788a454c1293262f19f25e6428317d3d386f78138fba2a44e37d",
+                "sha256:308e0758587ee16d4e73e7f2f8aae8351091e343bf0a43d2f697f9535465c816",
+                "sha256:37cacddf0e8209905f52537a8cf71da0dd9a4de62bd79247274c97b24a408997",
+                "sha256:38a23bb599748adf23d77f74885c0de6f4a7d9baa42f74e476bbf90fba2b47dd",
+                "sha256:47ab18b7b7bbc36fd2b606289b703b6f0ee915b923d6ad94dd17ac80ebffc280",
+                "sha256:486c78330af0bf33f5077b51d1888c0739c3cd1a03d5aade0d48572b3b5690ca",
+                "sha256:4e2183458d6ef1ae87dfb5d6acd0786359336cd9ac0ece6396c09b59fdaa3bd6",
+                "sha256:51d0595c3e58814c831f6cd2b664a5bf9590e26262c1d541b380d041e4fcb3c0",
+                "sha256:56d259d56822b70881760b243957f04a0cf133f0ec65eae6a33f562826aee899",
+                "sha256:5e6e777653169a3cc24ea56bb3d8c845ea391f8914c35bb6f350b0753a52891c",
+                "sha256:62bfb0ebb0f59e5dccc0b0dbbc0fc40dd1d1e09d04c0dc71f89790231531d4a2",
+                "sha256:67d89b64debfa021b54aa6f24bbf008403bd144748a0148596b518bce80d2fc4",
+                "sha256:6bf38571f555fa214493ec6349d29024cc5f313bf1715b09f236c553fd22ae4d",
+                "sha256:9214ca445c18a37bfe9c165982c0e317e2f21f035c8d635d1c6d9fcbaf35b7a8",
+                "sha256:ab0c52850428d2e86029935389379c2c97f752e76b616da851deec8a4484f8ec",
+                "sha256:b2eff336697d8dfd712c5d93fef9f4e4d3e97d9d8c258801836b8664a239e07a",
+                "sha256:bb33fabc0b8f3fe3bb0f8d6821b2fad5b2a64c27a0808e8d1c5c1e3362062064",
+                "sha256:bd5353ab342bae1262b04745934cc1565df4cbc8d6a979a0c98f42209bd5c265",
+                "sha256:bd73444efd1ac06dac27b8405bbe8791a02fd1bc8a2fa0e575257f90b7b57467",
+                "sha256:bd932236a2ef91a75fee5d7f4ace80ab494c5a59cd092a67c9785ddb7fdc218c",
+                "sha256:c45650de228ace7731e4280e14fb687f6d5c29cd666c5b22b42492b035e994d6",
+                "sha256:d5c0c01da45f901a3d429e7ef9e7e22baa869e1affb8715f1bf94e6a30020740",
+                "sha256:d75035db5bde802a29f4f29f18bb7548863d29ac90ccbf2c04c11799bbbba2c3",
+                "sha256:dda88206dc9464923f27f601000bc5b152ac0bd6d0122f098d4f239150a70076",
+                "sha256:e1c2ac5d0aa232c0f60fecc6bd1122346885086a176f939b91058c4c980cc226",
+                "sha256:e626c65a8587921ebc7fb8d31a49addfdd0b9a9aa96315ea484c09803337b955"
+            ],
+            "version": "==2.8.0"
+        },
+        "keras": {
+            "hashes": [
+                "sha256:794d0c92c6c4122f1f0fcf3a7bc2f49054c6a54ddbef8d8ffafca62795d760b6",
+                "sha256:90b610a3dbbf6d257b20a079eba3fdf2eed2158f64066a7c6f7227023fd60bc9"
+            ],
+            "index": "pypi",
+            "version": "==2.2.4"
+        },
+        "keras-applications": {
+            "hashes": [
+                "sha256:721dda4fa4e043e5bbd6f52a2996885c4639a7130ae478059b3798d0706f5ae7",
+                "sha256:a03af60ddc9c5afdae4d5c9a8dd4ca857550e0b793733a5072e0725829b87017"
+            ],
+            "version": "==1.0.6"
+        },
+        "keras-preprocessing": {
+            "hashes": [
+                "sha256:90d04c1750bccceef88ac09475c291b4b5f6aa1eaf0603167061b1aa8b043c61",
+                "sha256:ef2e482c4336fcf7180244d06f4374939099daa3183816e82aee7755af35b754"
+            ],
+            "version": "==1.0.5"
+        },
+        "keras-rl": {
+            "hashes": [
+                "sha256:7bbbb24c8f4560a03f59fb062a5003da102de033bc8cd7d06b69b4c1b48ec054"
+            ],
+            "index": "pypi",
+            "version": "==0.4.2"
+        },
+        "numpy": {
+            "hashes": [
+                "sha256:032df9b6571c5f1d41ea6f6a189223208cb488990373aa686aca55570fcccb42",
+                "sha256:094f8a83e5bd0a44a7557fa24a46db6ba7d5299c389ddbc9e0e18722f567fb63",
+                "sha256:1c0c80e74759fa4942298044274f2c11b08c86230b25b8b819e55e644f5ff2b6",
+                "sha256:2aa0910eaeb603b1a5598193cc3bc8eacf1baf6c95cbc3955eb8e15fa380c133",
+                "sha256:2f5ebc7a04885c7d69e5daa05208faef4db7f1ae6a99f4d36962df8cd54cdc76",
+                "sha256:32a07241cb624e104b88b08dea2851bf4ec5d65a1f599d7735041ced7171fd7a",
+                "sha256:3c7959f750b54b445f14962a3ddc41b9eadbab00b86da55fbb1967b2b79aad10",
+                "sha256:3d8f9273c763a139a99e65c2a3c10f1109df30bedae7f011b10d95c538364704",
+                "sha256:63bca71691339d2d6f8a7c970821f2b12098a53afccc0190d4e1555e75e5223a",
+                "sha256:7ae9c3baff3b989859c88e0168ad10902118595b996bf781eaf011bb72428798",
+                "sha256:866a7c8774ccc7d603667fad95456b4cf56d79a2bb5a7648ac9f0082e0b9416e",
+                "sha256:8bc4b92a273659e44ca3f3a2f8786cfa39d8302223bcfe7df794429c63d5f5a1",
+                "sha256:919f65e0732195474897b1cafefb4d4e7c2bb8174a725e506b62e9096e4df28d",
+                "sha256:9d1598573d310104acb90377f0a8c2319f737084689f5eb18012becaf345cda5",
+                "sha256:9fff90c88bfaad2901be50453d5cd7897a826c1d901f0654ee1d73ab3a48cd18",
+                "sha256:a245464ddf6d90e2d6287e9cef6bcfda2a99467fdcf1b677b99cd0b6c7b43de2",
+                "sha256:a988db28f54e104a01e8573ceb6f28202b4c15635b1450b2e3b2b822c6564f9b",
+                "sha256:b12fe6f31babb9477aa0f9692730654b3ee0e71f33b4568170dfafd439caf0a2",
+                "sha256:b7599ff4acd23f5de983e3aec772153b1043e131487a5c6ad0f94b41a828877a",
+                "sha256:c9f4dafd6065c4c782be84cd67ceeb9b1d4380af60a7af32be10ebecd723385e",
+                "sha256:ce3622b73ccd844ba301c1aea65d36cf9d8331e7c25c16b1725d0f14db99aaf4",
+                "sha256:d0f36a24cf8061a2c03e151be3418146717505b9b4ec17502fa3bbdb04ec1431",
+                "sha256:d263f8f14f2da0c079c0297e829e550d8f2c4e0ffef215506bd1d0ddd2bff3de",
+                "sha256:d8837ff272800668aabdfe70b966631914b0d6513aed4fc1b1428446f771834d",
+                "sha256:ef694fe72a3995aa778a5095bda946e0d31f7efabd5e8063ad8c6238ab7d3f78",
+                "sha256:f1fd1a6f40a501ba4035f5ed2c1f4faa68245d1407bf97d2ee401e4f23d1720b",
+                "sha256:fa337b6bd5fe2b8c4e705f4102186feb9985de9bb8536d32d5129a658f1789e0",
+                "sha256:febd31cd0d2fd2509ca2ec53cb339f8bf593c1bd245b9fc55c1917a68532a0af"
+            ],
+            "index": "pypi",
+            "version": "==1.15.3"
+        },
+        "pyyaml": {
+            "hashes": [
+                "sha256:3d7da3009c0f3e783b2c873687652d83b1bbfd5c88e9813fb7e5b03c0dd3108b",
+                "sha256:3ef3092145e9b70e3ddd2c7ad59bdd0252a94dfe3949721633e41344de00a6bf",
+                "sha256:40c71b8e076d0550b2e6380bada1f1cd1017b882f7e16f09a65be98e017f211a",
+                "sha256:558dd60b890ba8fd982e05941927a3911dc409a63dcb8b634feaa0cda69330d3",
+                "sha256:a7c28b45d9f99102fa092bb213aa12e0aaf9a6a1f5e395d36166639c1f96c3a1",
+                "sha256:aa7dd4a6a427aed7df6fb7f08a580d68d9b118d90310374716ae90b710280af1",
+                "sha256:bc558586e6045763782014934bfaf39d48b8ae85a2713117d16c39864085c613",
+                "sha256:d46d7982b62e0729ad0175a9bc7e10a566fc07b224d2c79fafb5e032727eaa04",
+                "sha256:d5eef459e30b09f5a098b9cea68bebfeb268697f78d647bd255a085371ac7f3f",
+                "sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537",
+                "sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531"
+            ],
+            "version": "==3.13"
+        },
+        "scipy": {
+            "hashes": [
+                "sha256:0611ee97296265af4a21164a5323f8c1b4e8e15c582d3dfa7610825900136bb7",
+                "sha256:08237eda23fd8e4e54838258b124f1cd141379a5f281b0a234ca99b38918c07a",
+                "sha256:0e645dbfc03f279e1946cf07c9c754c2a1859cb4a41c5f70b25f6b3a586b6dbd",
+                "sha256:0e9bb7efe5f051ea7212555b290e784b82f21ffd0f655405ac4f87e288b730b3",
+                "sha256:108c16640849e5827e7d51023efb3bd79244098c3f21e4897a1007720cb7ce37",
+                "sha256:340ef70f5b0f4e2b4b43c8c8061165911bc6b2ad16f8de85d9774545e2c47463",
+                "sha256:3ad73dfc6f82e494195144bd3a129c7241e761179b7cb5c07b9a0ede99c686f3",
+                "sha256:3b243c77a822cd034dad53058d7c2abf80062aa6f4a32e9799c95d6391558631",
+                "sha256:404a00314e85eca9d46b80929571b938e97a143b4f2ddc2b2b3c91a4c4ead9c5",
+                "sha256:423b3ff76957d29d1cce1bc0d62ebaf9a3fdfaf62344e3fdec14619bb7b5ad3a",
+                "sha256:42d9149a2fff7affdd352d157fa5717033767857c11bd55aa4a519a44343dfef",
+                "sha256:625f25a6b7d795e8830cb70439453c9f163e6870e710ec99eba5722775b318f3",
+                "sha256:698c6409da58686f2df3d6f815491fd5b4c2de6817a45379517c92366eea208f",
+                "sha256:729f8f8363d32cebcb946de278324ab43d28096f36593be6281ca1ee86ce6559",
+                "sha256:8190770146a4c8ed5d330d5b5ad1c76251c63349d25c96b3094875b930c44692",
+                "sha256:878352408424dffaa695ffedf2f9f92844e116686923ed9aa8626fc30d32cfd1",
+                "sha256:8b984f0821577d889f3c7ca8445564175fb4ac7c7f9659b7c60bef95b2b70e76",
+                "sha256:8f841bbc21d3dad2111a94c490fb0a591b8612ffea86b8e5571746ae76a3deac",
+                "sha256:c22b27371b3866c92796e5d7907e914f0e58a36d3222c5d436ddd3f0e354227a",
+                "sha256:d0cdd5658b49a722783b8b4f61a6f1f9c75042d0e29a30ccb6cacc9b25f6d9e2",
+                "sha256:d40dc7f494b06dcee0d303e51a00451b2da6119acbeaccf8369f2d29e28917ac",
+                "sha256:d8491d4784aceb1f100ddb8e31239c54e4afab8d607928a9f7ef2469ec35ae01",
+                "sha256:dfc5080c38dde3f43d8fbb9c0539a7839683475226cf83e4b24363b227dfe552",
+                "sha256:e24e22c8d98d3c704bb3410bce9b69e122a8de487ad3dbfe9985d154e5c03a40",
+                "sha256:e7a01e53163818d56eabddcafdc2090e9daba178aad05516b20c6591c4811020",
+                "sha256:ee677635393414930541a096fc8e61634304bb0153e4e02b75685b11eba14cae",
+                "sha256:f0521af1b722265d824d6ad055acfe9bd3341765735c44b5a4d0069e189a0f40",
+                "sha256:f25c281f12c0da726c6ed00535ca5d1622ec755c30a3f8eafef26cf43fede694"
+            ],
+            "version": "==1.1.0"
+        },
+        "six": {
+            "hashes": [
+                "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
+                "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
+            ],
+            "version": "==1.11.0"
+        }
+    },
+    "develop": {}
+}
--- a/libs/gym
+++ b/libs/gym
--- a/pilesos2.log
+++ b/pilesos2.log
@@ -0,0 +1,169 @@
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.LEFT
+Action.LEFT
+Action.FORWARD
+Action.SUCK
+Action.LEFT
+Action.FORWARD
+Action.LEFT
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.SUCK
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.LEFT
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.LEFT
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.LEFT
+Action.LEFT
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.LEFT
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.SUCK
+Action.RIGHT
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.LEFT
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.FORWARD
+Action.RIGHT
+Action.FORWARD
+Action.FORWARD
+Action.DIE
--- a/src/init.py
+++ b/src/init.py
--- a/src/gym/init.py
+++ b/src/gym/init.py
@@ -0,0 +1,14 @@
+import distutils.version
+import os
+import sys
+import warnings
+
+from gym import error
+from gym.utils import reraise
+from gym.version import VERSION as __version__
+
+from gym.core import Env, GoalEnv, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
+from gym.envs import make, spec
+from gym import logger
+
+__all__ = ["Env", "Space", "Wrapper", "make", "spec"]
--- a/src/gym/pycache/init.cpython-37.pyc
+++ b/src/gym/pycache/init.cpython-37.pyc
--- a/src/gym/pycache/core.cpython-37.pyc
+++ b/src/gym/pycache/core.cpython-37.pyc
--- a/src/gym/pycache/error.cpython-37.pyc
+++ b/src/gym/pycache/error.cpython-37.pyc
--- a/src/gym/pycache/logger.cpython-37.pyc
+++ b/src/gym/pycache/logger.cpython-37.pyc
--- a/src/gym/pycache/version.cpython-37.pyc
+++ b/src/gym/pycache/version.cpython-37.pyc
--- a/src/gym/core.py
+++ b/src/gym/core.py
@@ -0,0 +1,343 @@
+from gym import logger
+
+import gym
+from gym import error
+from gym.utils import closer
+
+env_closer = closer.Closer()
+
+# Env-related abstractions
+
+class Env(object):
+    """The main OpenAI Gym class. It encapsulates an environment with
+    arbitrary behind-the-scenes dynamics. An environment can be
+    partially or fully observed.
+
+    The main API methods that users of this class need to know are:
+
+        step
+        reset
+        render
+        close
+        seed
+
+    And set the following attributes:
+
+        action_space: The Space object corresponding to valid actions
+        observation_space: The Space object corresponding to valid observations
+        reward_range: A tuple corresponding to the min and max possible rewards
+
+    Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
+
+    The methods are accessed publicly as "step", "reset", etc.. The
+    non-underscored versions are wrapper methods to which we may add
+    functionality over time.
+    """
+
+    # Set this in SOME subclasses
+    metadata = {'render.modes': []}
+    reward_range = (-float('inf'), float('inf'))
+    spec = None
+
+    # Set these in ALL subclasses
+    action_space = None
+    observation_space = None
+
+    def step(self, action):
+        """Run one timestep of the environment's dynamics. When end of
+        episode is reached, you are responsible for calling `reset()`
+        to reset this environment's state.
+
+        Accepts an action and returns a tuple (observation, reward, done, info).
+
+        Args:
+            action (object): an action provided by the environment
+
+        Returns:
+            observation (object): agent's observation of the current environment
+            reward (float) : amount of reward returned after previous action
+            done (boolean): whether the episode has ended, in which case further step() calls will return undefined results
+            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
+        """
+        raise NotImplementedError
+
+    def reset(self):
+        """Resets the state of the environment and returns an initial observation.
+
+        Returns: observation (object): the initial observation of the
+            space.
+        """
+        raise NotImplementedError
+
+    def render(self, mode='human'):
+        """Renders the environment.
+
+        The set of supported modes varies per environment. (And some
+        environments do not support rendering at all.) By convention,
+        if mode is:
+
+        - human: render to the current display or terminal and
+          return nothing. Usually for human consumption.
+        - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
+          representing RGB values for an x-by-y pixel image, suitable
+          for turning into a video.
+        - ansi: Return a string (str) or StringIO.StringIO containing a
+          terminal-style text representation. The text can include newlines
+          and ANSI escape sequences (e.g. for colors).
+
+        Note:
+            Make sure that your class's metadata 'render.modes' key includes
+              the list of supported modes. It's recommended to call super()
+              in implementations to use the functionality of this method.
+
+        Args:
+            mode (str): the mode to render with
+            close (bool): close all open renderings
+
+        Example:
+
+        class MyEnv(Env):
+            metadata = {'render.modes': ['human', 'rgb_array']}
+
+            def render(self, mode='human'):
+                if mode == 'rgb_array':
+                    return np.array(...) # return RGB frame suitable for video
+                elif mode is 'human':
+                    ... # pop up a window and render
+                else:
+                    super(MyEnv, self).render(mode=mode) # just raise an exception
+        """
+        raise NotImplementedError
+
+    def close(self):
+        """Override _close in your subclass to perform any necessary cleanup.
+
+        Environments will automatically close() themselves when
+        garbage collected or when the program exits.
+        """
+        return
+
+    def seed(self, seed=None):
+        """Sets the seed for this env's random number generator(s).
+
+        Note:
+            Some environments use multiple pseudorandom number generators.
+            We want to capture all such seeds used in order to ensure that
+            there aren't accidental correlations between multiple generators.
+
+        Returns:
+            list<bigint>: Returns the list of seeds used in this env's random
+              number generators. The first value in the list should be the
+              "main" seed, or the value which a reproducer should pass to
+              'seed'. Often, the main seed equals the provided 'seed', but
+              this won't be true if seed=None, for example.
+        """
+        logger.warn("Could not seed environment %s", self)
+        return
+
+    @property
+    def unwrapped(self):
+        """Completely unwrap this env.
+
+        Returns:
+            gym.Env: The base non-wrapped gym.Env instance
+        """
+        return self
+
+    def __str__(self):
+        if self.spec is None:
+            return '<{} instance>'.format(type(self).__name__)
+        else:
+            return '<{}<{}>>'.format(type(self).__name__, self.spec.id)
+
+
+class GoalEnv(Env):
+    """A goal-based environment. It functions just as any regular OpenAI Gym environment but it
+    imposes a required structure on the observation_space. More concretely, the observation
+    space is required to contain at least three elements, namely `observation`, `desired_goal`, and
+    `achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve.
+    `achieved_goal` is the goal that it currently achieved instead. `observation` contains the
+    actual observations of the environment as per usual.
+    """
+
+    def reset(self):
+        # Enforce that each GoalEnv uses a Goal-compatible observation space.
+        if not isinstance(self.observation_space, gym.spaces.Dict):
+            raise error.Error('GoalEnv requires an observation space of type gym.spaces.Dict')
+        result = super(GoalEnv, self).reset()
+        for key in ['observation', 'achieved_goal', 'desired_goal']:
+            if key not in result:
+                raise error.Error('GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(key))
+        return result
+
+    def compute_reward(self, achieved_goal, desired_goal, info):
+        """Compute the step reward. This externalizes the reward function and makes
+        it dependent on an a desired goal and the one that was achieved. If you wish to include
+        additional rewards that are independent of the goal, you can include the necessary values
+        to derive it in info and compute it accordingly.
+
+        Args:
+            achieved_goal (object): the goal that was achieved during execution
+            desired_goal (object): the desired goal that we asked the agent to attempt to achieve
+            info (dict): an info dictionary with additional information
+
+        Returns:
+            float: The reward that corresponds to the provided achieved goal w.r.t. to the desired
+            goal. Note that the following should always hold true:
+
+                ob, reward, done, info = env.step()
+                assert reward == env.compute_reward(ob['achieved_goal'], ob['goal'], info)
+        """
+        raise NotImplementedError()
+
+# Space-related abstractions
+
+class Space(object):
+    """Defines the observation and action spaces, so you can write generic
+    code that applies to any Env. For example, you can choose a random
+    action.
+    """
+    def __init__(self, shape=None, dtype=None):
+        import numpy as np # takes about 300-400ms to import, so we load lazily
+        self.shape = None if shape is None else tuple(shape)
+        self.dtype = None if dtype is None else np.dtype(dtype)
+
+    def sample(self):
+        """
+        Uniformly randomly sample a random element of this space
+        """
+        raise NotImplementedError
+
+    def contains(self, x):
+        """
+        Return boolean specifying if x is a valid
+        member of this space
+        """
+        raise NotImplementedError
+
+    __contains__ = contains
+
+    def to_jsonable(self, sample_n):
+        """Convert a batch of samples from this space to a JSONable data type."""
+        # By default, assume identity is JSONable
+        return sample_n
+
+    def from_jsonable(self, sample_n):
+        """Convert a JSONable data type to a batch of samples from this space."""
+        # By default, assume identity is JSONable
+        return sample_n
+
+
+warn_once = True
+
+def deprecated_warn_once(text):
+    global warn_once
+    if not warn_once: return
+    warn_once = False
+    logger.warn(text)
+
+
+class Wrapper(Env):
+    env = None
+
+    def __init__(self, env):
+        self.env = env
+        self.action_space = self.env.action_space
+        self.observation_space = self.env.observation_space
+        self.reward_range = self.env.reward_range
+        self.metadata = self.env.metadata
+
+    @classmethod
+    def class_name(cls):
+        return cls.__name__
+
+    def step(self, action):
+        if hasattr(self, "_step"):
+            deprecated_warn_once("%s doesn't implement 'step' method, but it implements deprecated '_step' method." % type(self))
+            self.step = self._step
+            return self.step(action)
+        else:
+            deprecated_warn_once("%s doesn't implement 'step' method, " % type(self) +
+                "which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.")
+            return self.env.step(action)
+
+    def reset(self, **kwargs):
+        if hasattr(self, "_reset"):
+            deprecated_warn_once("%s doesn't implement 'reset' method, but it implements deprecated '_reset' method." % type(self))
+            self.reset = self._reset
+            return self._reset(**kwargs)
+        else:
+            deprecated_warn_once("%s doesn't implement 'reset' method, " % type(self) +
+                "which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.")
+            return self.env.reset(**kwargs)
+
+    def render(self, mode='human', **kwargs):
+        return self.env.render(mode, **kwargs)
+
+    def close(self):
+        if self.env:
+            return self.env.close()
+
+    def seed(self, seed=None):
+        return self.env.seed(seed)
+
+    def compute_reward(self, achieved_goal, desired_goal, info):
+        return self.env.compute_reward(achieved_goal, desired_goal, info)
+
+    def __str__(self):
+        return '<{}{}>'.format(type(self).__name__, self.env)
+
+    def __repr__(self):
+        return str(self)
+
+    @property
+    def unwrapped(self):
+        return self.env.unwrapped
+
+    @property
+    def spec(self):
+        return self.env.spec
+
+
+class ObservationWrapper(Wrapper):
+    def step(self, action):
+        observation, reward, done, info = self.env.step(action)
+        return self.observation(observation), reward, done, info
+
+    def reset(self, **kwargs):
+        observation = self.env.reset(**kwargs)
+        return self.observation(observation)
+
+    def observation(self, observation):
+        deprecated_warn_once("%s doesn't implement 'observation' method. Maybe it implements deprecated '_observation' method." % type(self))
+        return self._observation(observation)
+
+
+class RewardWrapper(Wrapper):
+    def reset(self):
+        return self.env.reset()
+
+    def step(self, action):
+        observation, reward, done, info = self.env.step(action)
+        return observation, self.reward(reward), done, info
+
+    def reward(self, reward):
+        deprecated_warn_once("%s doesn't implement 'reward' method. Maybe it implements deprecated '_reward' method." % type(self))
+        return self._reward(reward)
+
+
+class ActionWrapper(Wrapper):
+    def step(self, action):
+        action = self.action(action)
+        return self.env.step(action)
+
+    def reset(self):
+        return self.env.reset()
+
+    def action(self, action):
+        deprecated_warn_once("%s doesn't implement 'action' method. Maybe it implements deprecated '_action' method." % type(self))
+        return self._action(action)
+
+    def reverse_action(self, action):
+        deprecated_warn_once("%s doesn't implement 'reverse_action' method. Maybe it implements deprecated '_reverse_action' method." % type(self))
+        return self._reverse_action(action)
--- a/src/gym/envs/README.md
+++ b/src/gym/envs/README.md
@@ -0,0 +1,113 @@
+# Envs
+
+These are the core integrated environments. Note that we may later
+restructure any of the files, but will keep the environments available
+at the relevant package's top-level. So for example, you should access
+`AntEnv` as follows:
+
+```
+# Will be supported in future releases
+from gym.envs import mujoco
+mujoco.AntEnv
+```
+
+Rather than:
+
+```
+# May break in future releases
+from gym.envs.mujoco import ant
+ant.AntEnv
+```
+
+## How to create new environments for Gym
+
+* Create a new repo called gym-foo, which should also be a PIP package.
+
+* A good example is https://github.com/openai/gym-soccer.
+
+* It should have at least the following files:
+  ```sh
+  gym-foo/
+    README.md
+    setup.py
+    gym_foo/
+      __init__.py
+      envs/
+        __init__.py
+        foo_env.py
+        foo_extrahard_env.py
+  ```
+
+* `gym-foo/setup.py` should have:
+
+  ```python
+  from setuptools import setup
+
+  setup(name='gym_foo',
+        version='0.0.1',
+        install_requires=['gym']  # And any other dependencies foo needs
+  )  
+  ```
+
+* `gym-foo/gym_foo/__init__.py` should have:
+  ```python
+  from gym.envs.registration import register
+
+  register(
+      id='foo-v0',
+      entry_point='gym_foo.envs:FooEnv',
+  )
+  register(
+      id='foo-extrahard-v0',
+      entry_point='gym_foo.envs:FooExtraHardEnv',
+  )
+  ```
+
+* `gym-foo/gym_foo/envs/__init__.py` should have:
+  ```python
+  from gym_foo.envs.foo_env import FooEnv
+  from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv
+  ```
+
+* `gym-foo/gym_foo/envs/foo_env.py` should look something like:
+  ```python
+  import gym
+  from gym import error, spaces, utils
+  from gym.utils import seeding
+
+  class FooEnv(gym.Env):
+    metadata = {'render.modes': ['human']}
+
+    def __init__(self):
+      ...
+    def step(self, action):
+      ...
+    def reset(self):
+      ...
+    def render(self, mode='human', close=False):
+      ...
+  ```
+
+## How to add new environments to Gym, within this repo (not recommended for new environments)
+
+1. Write your environment in an existing collection or a new collection. All collections are subfolders of `/gym/envs'.
+2. Import your environment into the `__init__.py` file of the collection. This file will be located at `/gym/envs/my_collection/__init__.py`. Add `from gym.envs.my_collection.my_awesome_env import MyEnv` to this file.
+3. Register your env in `/gym/envs/__init__.py`:
+
+ ```
+register(
+		id='MyEnv-v0',
+		entry_point='gym.envs.my_collection:MyEnv',
+)
+```
+
+4. Add your environment to the scoreboard in `/gym/scoreboard/__init__.py`:
+
+ ```
+add_task(
+		id='MyEnv-v0',
+		summary="Super cool environment",
+		group='my_collection',
+		contributor='mygithubhandle',
+)
+```
--- a/src/gym/envs/init.py
+++ b/src/gym/envs/init.py
@@ -0,0 +1,540 @@
+from gym.envs.registration import registry, register, make, spec
+
+# Algorithmic
+# ----------------------------------------
+
+register(
+  id='Copy-v0',
+  entry_point='gym.envs.algorithmic:CopyEnv',
+  max_episode_steps=200,
+  reward_threshold=25.0,
+)
+
+register(
+  id='RepeatCopy-v0',
+  entry_point='gym.envs.algorithmic:RepeatCopyEnv',
+  max_episode_steps=200,
+  reward_threshold=75.0,
+)
+
+register(
+  id='ReversedAddition-v0',
+  entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
+  kwargs={'rows': 2},
+  max_episode_steps=200,
+  reward_threshold=25.0,
+)
+
+register(
+  id='ReversedAddition3-v0',
+  entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
+  kwargs={'rows': 3},
+  max_episode_steps=200,
+  reward_threshold=25.0,
+)
+
+register(
+  id='DuplicatedInput-v0',
+  entry_point='gym.envs.algorithmic:DuplicatedInputEnv',
+  max_episode_steps=200,
+  reward_threshold=9.0,
+)
+
+register(
+  id='Reverse-v0',
+  entry_point='gym.envs.algorithmic:ReverseEnv',
+  max_episode_steps=200,
+  reward_threshold=25.0,
+)
+
+# Classic
+# ----------------------------------------
+
+register(
+  id='CartPole-v0',
+  entry_point='gym.envs.classic_control:CartPoleEnv',
+  max_episode_steps=200,
+  reward_threshold=195.0,
+)
+
+register(
+  id='CartPole-v1',
+  entry_point='gym.envs.classic_control:CartPoleEnv',
+  max_episode_steps=500,
+  reward_threshold=475.0,
+)
+
+register(
+  id='MountainCar-v0',
+  entry_point='gym.envs.classic_control:MountainCarEnv',
+  max_episode_steps=200,
+  reward_threshold=-110.0,
+)
+
+register(
+  id='MountainCarContinuous-v0',
+  entry_point='gym.envs.classic_control:Continuous_MountainCarEnv',
+  max_episode_steps=999,
+  reward_threshold=90.0,
+)
+
+register(
+  id='Pendulum-v0',
+  entry_point='gym.envs.classic_control:PendulumEnv',
+  max_episode_steps=200,
+)
+
+register(
+  id='Acrobot-v1',
+  entry_point='gym.envs.classic_control:AcrobotEnv',
+  max_episode_steps=500,
+)
+
+# Box2d
+# ----------------------------------------
+
+register(
+  id='LunarLander-v2',
+  entry_point='gym.envs.box2d:LunarLander',
+  max_episode_steps=1000,
+  reward_threshold=200,
+)
+
+register(
+  id='LunarLanderContinuous-v2',
+  entry_point='gym.envs.box2d:LunarLanderContinuous',
+  max_episode_steps=1000,
+  reward_threshold=200,
+)
+
+register(
+  id='BipedalWalker-v2',
+  entry_point='gym.envs.box2d:BipedalWalker',
+  max_episode_steps=1600,
+  reward_threshold=300,
+)
+
+register(
+  id='BipedalWalkerHardcore-v2',
+  entry_point='gym.envs.box2d:BipedalWalkerHardcore',
+  max_episode_steps=2000,
+  reward_threshold=300,
+)
+
+register(
+  id='CarRacing-v0',
+  entry_point='gym.envs.box2d:CarRacing',
+  max_episode_steps=1000,
+  reward_threshold=900,
+)
+
+# Toy Text
+# ----------------------------------------
+
+register(
+  id='Blackjack-v0',
+  entry_point='gym.envs.toy_text:BlackjackEnv',
+)
+
+register(
+  id='KellyCoinflip-v0',
+  entry_point='gym.envs.toy_text:KellyCoinflipEnv',
+  reward_threshold=246.61,
+)
+register(
+  id='KellyCoinflipGeneralized-v0',
+  entry_point='gym.envs.toy_text:KellyCoinflipGeneralizedEnv',
+)
+
+register(
+  id='FrozenLake-v0',
+  entry_point='gym.envs.toy_text:FrozenLakeEnv',
+  kwargs={'map_name': '4x4'},
+  max_episode_steps=100,
+  reward_threshold=0.78,  # optimum = .8196
+)
+
+register(
+  id='FrozenLake8x8-v0',
+  entry_point='gym.envs.toy_text:FrozenLakeEnv',
+  kwargs={'map_name': '8x8'},
+  max_episode_steps=200,
+  reward_threshold=0.99,  # optimum = 1
+)
+
+register(
+  id='CliffWalking-v0',
+  entry_point='gym.envs.toy_text:CliffWalkingEnv',
+)
+
+register(
+  id='NChain-v0',
+  entry_point='gym.envs.toy_text:NChainEnv',
+  max_episode_steps=1000,
+)
+
+register(
+  id='Roulette-v0',
+  entry_point='gym.envs.toy_text:RouletteEnv',
+  max_episode_steps=100,
+)
+
+register(
+  id='Taxi-v2',
+  entry_point='gym.envs.toy_text.taxi:TaxiEnv',
+  reward_threshold=8,  # optimum = 8.46
+  max_episode_steps=200,
+)
+
+register(
+  id='GuessingGame-v0',
+  entry_point='gym.envs.toy_text.guessing_game:GuessingGame',
+  max_episode_steps=200,
+)
+
+register(
+  id='HotterColder-v0',
+  entry_point='gym.envs.toy_text.hotter_colder:HotterColder',
+  max_episode_steps=200,
+)
+
+# Mujoco
+# ----------------------------------------
+
+# 2D
+
+register(
+  id='Reacher-v2',
+  entry_point='gym.envs.mujoco:ReacherEnv',
+  max_episode_steps=50,
+  reward_threshold=-3.75,
+)
+
+register(
+  id='Pusher-v2',
+  entry_point='gym.envs.mujoco:PusherEnv',
+  max_episode_steps=100,
+  reward_threshold=0.0,
+)
+
+register(
+  id='Thrower-v2',
+  entry_point='gym.envs.mujoco:ThrowerEnv',
+  max_episode_steps=100,
+  reward_threshold=0.0,
+)
+
+register(
+  id='Striker-v2',
+  entry_point='gym.envs.mujoco:StrikerEnv',
+  max_episode_steps=100,
+  reward_threshold=0.0,
+)
+
+register(
+  id='InvertedPendulum-v2',
+  entry_point='gym.envs.mujoco:InvertedPendulumEnv',
+  max_episode_steps=1000,
+  reward_threshold=950.0,
+)
+
+register(
+  id='InvertedDoublePendulum-v2',
+  entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
+  max_episode_steps=1000,
+  reward_threshold=9100.0,
+)
+
+register(
+  id='HalfCheetah-v2',
+  entry_point='gym.envs.mujoco:HalfCheetahEnv',
+  max_episode_steps=1000,
+  reward_threshold=4800.0,
+)
+
+register(
+  id='Hopper-v2',
+  entry_point='gym.envs.mujoco:HopperEnv',
+  max_episode_steps=1000,
+  reward_threshold=3800.0,
+)
+
+register(
+  id='Swimmer-v2',
+  entry_point='gym.envs.mujoco:SwimmerEnv',
+  max_episode_steps=1000,
+  reward_threshold=360.0,
+)
+
+register(
+  id='Walker2d-v2',
+  max_episode_steps=1000,
+  entry_point='gym.envs.mujoco:Walker2dEnv',
+)
+
+register(
+  id='Ant-v2',
+  entry_point='gym.envs.mujoco:AntEnv',
+  max_episode_steps=1000,
+  reward_threshold=6000.0,
+)
+
+register(
+  id='Humanoid-v2',
+  entry_point='gym.envs.mujoco:HumanoidEnv',
+  max_episode_steps=1000,
+)
+
+register(
+  id='HumanoidStandup-v2',
+  entry_point='gym.envs.mujoco:HumanoidStandupEnv',
+  max_episode_steps=1000,
+)
+
+
+# Robotics
+# ----------------------------------------
+
+def _merge(a, b):
+  a.update(b)
+  return a
+
+
+for reward_type in ['sparse', 'dense']:
+  suffix = 'Dense' if reward_type == 'dense' else ''
+  kwargs = {
+    'reward_type': reward_type,
+  }
+
+  # Fetch
+  register(
+    id='FetchSlide{}-v1'.format(suffix),
+    entry_point='gym.envs.robotics:FetchSlideEnv',
+    kwargs=kwargs,
+    max_episode_steps=50,
+  )
+
+  register(
+    id='FetchPickAndPlace{}-v1'.format(suffix),
+    entry_point='gym.envs.robotics:FetchPickAndPlaceEnv',
+    kwargs=kwargs,
+    max_episode_steps=50,
+  )
+
+  register(
+    id='FetchReach{}-v1'.format(suffix),
+    entry_point='gym.envs.robotics:FetchReachEnv',
+    kwargs=kwargs,
+    max_episode_steps=50,
+  )
+
+  register(
+    id='FetchPush{}-v1'.format(suffix),
+    entry_point='gym.envs.robotics:FetchPushEnv',
+    kwargs=kwargs,
+    max_episode_steps=50,
+  )
+
+  # Hand
+  register(
+    id='HandReach{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandReachEnv',
+    kwargs=kwargs,
+    max_episode_steps=50,
+  )
+
+  register(
+    id='HandManipulateBlockRotateZ{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandBlockEnv',
+    kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'z'}, kwargs),
+    max_episode_steps=100,
+  )
+
+  register(
+    id='HandManipulateBlockRotateParallel{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandBlockEnv',
+    kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'parallel'}, kwargs),
+    max_episode_steps=100,
+  )
+
+  register(
+    id='HandManipulateBlockRotateXYZ{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandBlockEnv',
+    kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
+    max_episode_steps=100,
+  )
+
+  register(
+    id='HandManipulateBlockFull{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandBlockEnv',
+    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
+    max_episode_steps=100,
+  )
+
+  # Alias for "Full"
+  register(
+    id='HandManipulateBlock{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandBlockEnv',
+    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
+    max_episode_steps=100,
+  )
+
+  register(
+    id='HandManipulateEggRotate{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandEggEnv',
+    kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
+    max_episode_steps=100,
+  )
+
+  register(
+    id='HandManipulateEggFull{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandEggEnv',
+    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
+    max_episode_steps=100,
+  )
+
+  # Alias for "Full"
+  register(
+    id='HandManipulateEgg{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandEggEnv',
+    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
+    max_episode_steps=100,
+  )
+
+  register(
+    id='HandManipulatePenRotate{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandPenEnv',
+    kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
+    max_episode_steps=100,
+  )
+
+  register(
+    id='HandManipulatePenFull{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandPenEnv',
+    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
+    max_episode_steps=100,
+  )
+
+  # Alias for "Full"
+  register(
+    id='HandManipulatePen{}-v0'.format(suffix),
+    entry_point='gym.envs.robotics:HandPenEnv',
+    kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
+    max_episode_steps=100,
+  )
+
+# Atari
+# ----------------------------------------
+
+# # print ', '.join(["'{}'".format(name.split('.')[0]) for name in atari_py.list_games()])
+for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
+             'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
+             'centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk',
+             'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
+             'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
+             'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
+             'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
+             'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
+             'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']:
+  for obs_type in ['image', 'ram']:
+    # space_invaders should yield SpaceInvaders-v0 and SpaceInvaders-ram-v0
+    name = ''.join([g.capitalize() for g in game.split('_')])
+    if obs_type == 'ram':
+      name = '{}-ram'.format(name)
+
+    nondeterministic = False
+    if game == 'elevator_action' and obs_type == 'ram':
+      # ElevatorAction-ram-v0 seems to yield slightly
+      # non-deterministic observations about 10% of the time. We
+      # should track this down eventually, but for now we just
+      # mark it as nondeterministic.
+      nondeterministic = True
+
+    register(
+      id='{}-v0'.format(name),
+      entry_point='gym.envs.atari:AtariEnv',
+      kwargs={'game': game, 'obs_type': obs_type, 'repeat_action_probability': 0.25},
+      max_episode_steps=10000,
+      nondeterministic=nondeterministic,
+    )
+
+    register(
+      id='{}-v4'.format(name),
+      entry_point='gym.envs.atari:AtariEnv',
+      kwargs={'game': game, 'obs_type': obs_type},
+      max_episode_steps=100000,
+      nondeterministic=nondeterministic,
+    )
+
+    # Standard Deterministic (as in the original DeepMind paper)
+    if game == 'space_invaders':
+      frameskip = 3
+    else:
+      frameskip = 4
+
+    # Use a deterministic frame skip.
+    register(
+      id='{}Deterministic-v0'.format(name),
+      entry_point='gym.envs.atari:AtariEnv',
+      kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip, 'repeat_action_probability': 0.25},
+      max_episode_steps=100000,
+      nondeterministic=nondeterministic,
+    )
+
+    register(
+      id='{}Deterministic-v4'.format(name),
+      entry_point='gym.envs.atari:AtariEnv',
+      kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip},
+      max_episode_steps=100000,
+      nondeterministic=nondeterministic,
+    )
+
+    register(
+      id='{}NoFrameskip-v0'.format(name),
+      entry_point='gym.envs.atari:AtariEnv',
+      kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1, 'repeat_action_probability': 0.25},
+      # A frameskip of 1 means we get every frame
+      max_episode_steps=frameskip * 100000,
+      nondeterministic=nondeterministic,
+    )
+
+    # No frameskip. (Atari has no entropy source, so these are
+    # deterministic environments.)
+    register(
+      id='{}NoFrameskip-v4'.format(name),
+      entry_point='gym.envs.atari:AtariEnv',
+      kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1},  # A frameskip of 1 means we get every frame
+      max_episode_steps=frameskip * 100000,
+      nondeterministic=nondeterministic,
+    )
+
+# Unit test
+# ---------
+
+register(
+  id='CubeCrash-v0',
+  entry_point='gym.envs.unittest:CubeCrash',
+  reward_threshold=0.9,
+)
+register(
+  id='CubeCrashSparse-v0',
+  entry_point='gym.envs.unittest:CubeCrashSparse',
+  reward_threshold=0.9,
+)
+register(
+  id='CubeCrashScreenBecomesBlack-v0',
+  entry_point='gym.envs.unittest:CubeCrashScreenBecomesBlack',
+  reward_threshold=0.9,
+)
+
+register(
+  id='MemorizeDigits-v0',
+  entry_point='gym.envs.unittest:MemorizeDigits',
+  reward_threshold=20,
+)
+
+register(
+  id='Pilesos-v0',
+  entry_point='gym.envs.pilesos:PilesosEnv',
+  max_episode_steps=5000,
+)
--- a/src/gym/envs/pycache/init.cpython-37.pyc
+++ b/src/gym/envs/pycache/init.cpython-37.pyc
--- a/src/gym/envs/pycache/registration.cpython-37.pyc
+++ b/src/gym/envs/pycache/registration.cpython-37.pyc
--- a/src/gym/envs/algorithmic/init.py
+++ b/src/gym/envs/algorithmic/init.py
@@ -0,0 +1,5 @@
+from gym.envs.algorithmic.copy_ import CopyEnv
+from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv
+from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv
+from gym.envs.algorithmic.reverse import ReverseEnv
+from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv
--- a/src/gym/envs/algorithmic/algorithmic_env.py
+++ b/src/gym/envs/algorithmic/algorithmic_env.py
@@ -0,0 +1,326 @@
+"""
+Algorithmic environments have the following traits in common:
+
+- A 1-d "input tape" or 2-d "input grid" of characters
+- A target string which is a deterministic function of the input characters
+
+Agents control a read head that moves over the input tape. Observations consist
+of the single character currently under the read head. The read head may fall
+off the end of the tape in any direction. When this happens, agents will observe
+a special blank character (with index=env.base) until they get back in bounds.
+
+Actions consist of 3 sub-actions:
+    - Direction to move the read head (left or right, plus up and down for 2-d envs)
+    - Whether to write to the output tape
+    - Which character to write (ignored if the above sub-action is 0)
+
+An episode ends when:
+    - The agent writes the full target string to the output tape.
+    - The agent writes an incorrect character.
+    - The agent runs out the time limit. (Which is fairly conservative.)
+
+Reward schedule:
+    write a correct character: +1
+    write a wrong character: -.5
+    run out the clock: -1
+    otherwise: 0
+
+In the beginning, input strings will be fairly short. After an environment has
+been consistently solved over some window of episodes, the environment will 
+increase the average length of generated strings. Typical env specs require
+leveling up many times to reach their reward threshold.
+"""
+from gym import Env, logger
+from gym.spaces import Discrete, Tuple
+from gym.utils import colorize, seeding
+import numpy as np
+from six import StringIO
+import sys
+import math
+
+class AlgorithmicEnv(Env):
+
+    metadata = {'render.modes': ['human', 'ansi']}
+    # Only 'promote' the length of generated input strings if the worst of the 
+    # last n episodes was no more than this far from the maximum reward
+    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -1.0
+
+    def __init__(self, base=10, chars=False, starting_min_length=2):
+        """
+        base: Number of distinct characters. 
+        chars: If True, use uppercase alphabet. Otherwise, digits. Only affects
+               rendering.
+        starting_min_length: Minimum input string length. Ramps up as episodes 
+                             are consistently solved.
+        """
+        self.base = base
+        # Keep track of this many past episodes
+        self.last = 10
+        # Cumulative reward earned this episode
+        self.episode_total_reward = None
+        # Running tally of reward shortfalls. e.g. if there were 10 points to earn and
+        # we got 8, we'd append -2
+        AlgorithmicEnv.reward_shortfalls = []
+        if chars:
+            self.charmap = [chr(ord('A')+i) for i in range(base)]
+        else:
+            self.charmap = [str(i) for i in range(base)]
+        self.charmap.append(' ')
+        # TODO: Not clear why this is a class variable rather than instance. 
+        # Could lead to some spooky action at a distance if someone is working
+        # with multiple algorithmic envs at once. Also makes testing tricky.
+        AlgorithmicEnv.min_length = starting_min_length
+        # Three sub-actions:
+        #       1. Move read head left or write (or up/down)
+        #       2. Write or not
+        #       3. Which character to write. (Ignored if should_write=0)
+        self.action_space = Tuple(
+            [Discrete(len(self.MOVEMENTS)), Discrete(2), Discrete(self.base)]
+        )
+        # Can see just what is on the input tape (one of n characters, or nothing)
+        self.observation_space = Discrete(self.base + 1)
+        self.seed()
+        self.reset()
+
+    @classmethod
+    def _movement_idx(kls, movement_name):
+        return kls.MOVEMENTS.index(movement_name)
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def _get_obs(self, pos=None):
+        """Return an observation corresponding to the given read head position
+        (or the current read head position, if none is given)."""
+        raise NotImplemented
+
+    def _get_str_obs(self, pos=None):
+        ret = self._get_obs(pos)
+        return self.charmap[ret]
+
+    def _get_str_target(self, pos):
+        """Return the ith character of the target string (or " " if index
+        out of bounds)."""
+        if pos < 0 or len(self.target) <= pos:
+            return " "
+        else:
+            return self.charmap[self.target[pos]]
+
+    def render_observation(self):
+        """Return a string representation of the input tape/grid."""
+        raise NotImplementedError
+
+    def render(self, mode='human'):
+
+        outfile = StringIO() if mode == 'ansi' else sys.stdout
+        inp = "Total length of input instance: %d, step: %d\n" % (self.input_width, self.time)
+        outfile.write(inp)
+        x, y, action = self.read_head_position, self.write_head_position, self.last_action
+        if action is not None:
+            inp_act, out_act, pred = action
+        outfile.write("=" * (len(inp) - 1) + "\n")
+        y_str =      "Output Tape         : "
+        target_str = "Targets             : "
+        if action is not None:
+            pred_str = self.charmap[pred]
+        x_str = self.render_observation()
+        for i in range(-2, len(self.target) + 2):
+            target_str += self._get_str_target(i)
+            if i < y - 1:
+                y_str += self._get_str_target(i)
+            elif i == (y - 1):
+                if action is not None and out_act == 1:
+                    color = 'green' if pred == self.target[i] else 'red'
+                    y_str += colorize(pred_str, color, highlight=True)
+                else:
+                    y_str += self._get_str_target(i)
+        outfile.write(x_str)
+        outfile.write(y_str + "\n")
+        outfile.write(target_str + "\n\n")
+
+        if action is not None:
+            outfile.write("Current reward      :   %.3f\n" % self.last_reward)
+            outfile.write("Cumulative reward   :   %.3f\n" % self.episode_total_reward)
+            move = self.MOVEMENTS[inp_act]
+            outfile.write("Action              :   Tuple(move over input: %s,\n" % move)
+            out_act = out_act == 1
+            outfile.write("                              write to the output tape: %s,\n" % out_act)
+            outfile.write("                              prediction: %s)\n" % pred_str)
+        else:
+            outfile.write("\n" * 5)
+        return outfile
+
+    @property
+    def input_width(self):
+        return len(self.input_data)
+
+    def step(self, action):
+        assert self.action_space.contains(action)
+        self.last_action = action
+        inp_act, out_act, pred = action
+        done = False
+        reward = 0.0
+        self.time += 1
+        assert 0 <= self.write_head_position
+        if out_act == 1:
+            try:
+                correct = pred == self.target[self.write_head_position]
+            except IndexError:
+                logger.warn("It looks like you're calling step() even though this "+
+                    "environment has already returned done=True. You should always call "+
+                    "reset() once you receive done=True. Any further steps are undefined "+
+                    "behaviour.")
+                correct = False
+            if correct:
+                reward = 1.0
+            else:
+                # Bail as soon as a wrong character is written to the tape
+                reward = -0.5
+                done = True
+            self.write_head_position += 1
+            if self.write_head_position >= len(self.target):
+                done = True
+        self._move(inp_act)
+        if self.time > self.time_limit:
+            reward = -1.0
+            done = True
+        obs = self._get_obs()
+        self.last_reward = reward
+        self.episode_total_reward += reward
+        return (obs, reward, done, {})
+
+    @property
+    def time_limit(self):
+        """If an agent takes more than this many timesteps, end the episode
+        immediately and return a negative reward."""
+        # (Seemingly arbitrary)
+        return self.input_width + len(self.target) + 4
+
+    def _check_levelup(self):
+        """Called between episodes. Update our running record of episode rewards 
+        and, if appropriate, 'level up' minimum input length."""
+        if self.episode_total_reward is None:
+            # This is before the first episode/call to reset(). Nothing to do
+            return
+        AlgorithmicEnv.reward_shortfalls.append(self.episode_total_reward - len(self.target))
+        AlgorithmicEnv.reward_shortfalls = AlgorithmicEnv.reward_shortfalls[-self.last:]
+        if len(AlgorithmicEnv.reward_shortfalls) == self.last and \
+          min(AlgorithmicEnv.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION and \
+          AlgorithmicEnv.min_length < 30:
+            AlgorithmicEnv.min_length += 1
+            AlgorithmicEnv.reward_shortfalls = []
+        
+
+    def reset(self):
+        self._check_levelup()
+        self.last_action = None
+        self.last_reward = 0
+        self.read_head_position = self.READ_HEAD_START
+        self.write_head_position = 0
+        self.episode_total_reward = 0.0
+        self.time = 0
+        length = self.np_random.randint(3) + AlgorithmicEnv.min_length
+        self.input_data = self.generate_input_data(length)
+        self.target = self.target_from_input_data(self.input_data)
+        return self._get_obs()
+
+    def generate_input_data(self, size):
+        raise NotImplemented
+
+    def target_from_input_data(self, input_data):
+        raise NotImplemented("Subclasses must implement")
+
+    def _move(self, movement):
+        raise NotImplemented
+
+class TapeAlgorithmicEnv(AlgorithmicEnv):
+    """An algorithmic env with a 1-d input tape."""
+    MOVEMENTS = ['left', 'right']
+    READ_HEAD_START = 0
+
+    def _move(self, movement):
+        named = self.MOVEMENTS[movement]
+        self.read_head_position += 1 if named == 'right' else -1
+
+    def _get_obs(self, pos=None):
+        if pos is None:
+            pos = self.read_head_position
+        if pos < 0:
+            return self.base
+        if isinstance(pos, np.ndarray):
+            pos = pos.item()
+        try:
+            return self.input_data[pos]
+        except IndexError:
+            return self.base
+    
+    def generate_input_data(self, size):
+        return [self.np_random.randint(self.base) for _ in range(size)]
+
+    def render_observation(self):
+        x = self.read_head_position
+        x_str =      "Observation Tape    : "
+        for i in range(-2, self.input_width + 2):
+            if i == x:
+                x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True)
+            else:
+                x_str += self._get_str_obs(np.array([i]))
+        x_str += "\n"
+        return x_str
+
+class GridAlgorithmicEnv(AlgorithmicEnv):
+    """An algorithmic env with a 2-d input grid."""
+    MOVEMENTS = ['left', 'right', 'up', 'down']
+    READ_HEAD_START = (0, 0)
+    def __init__(self, rows, *args, **kwargs):
+        self.rows = rows
+        AlgorithmicEnv.__init__(self, *args, **kwargs)
+
+    def _move(self, movement):
+        named = self.MOVEMENTS[movement]
+        x, y = self.read_head_position
+        if named == 'left':
+            x -= 1
+        elif named == 'right':
+            x += 1
+        elif named == 'up':
+            y -= 1
+        elif named == 'down':
+            y += 1
+        else:
+            raise ValueError("Unrecognized direction: {}".format(named))
+        self.read_head_position = x, y
+
+    def generate_input_data(self, size):
+        return [
+            [self.np_random.randint(self.base) for _ in range(self.rows)]
+            for __ in range(size)
+        ]
+
+    def _get_obs(self, pos=None):
+        if pos is None:
+            pos = self.read_head_position
+        x, y = pos
+        if any(idx < 0 for idx in pos):
+            return self.base
+        try:
+            return self.input_data[x][y]
+        except IndexError:
+            return self.base
+
+    def render_observation(self):
+        x = self.read_head_position
+        label =      "Observation Grid    : "
+        x_str = ""
+        for j in range(-1, self.rows+1):
+            if j != -1:
+                x_str += " " * len(label)
+            for i in range(-2, self.input_width + 2):
+                if i == x[0] and j == x[1]:
+                    x_str += colorize(self._get_str_obs((i, j)), 'green', highlight=True)
+                else:
+                    x_str += self._get_str_obs((i, j))
+            x_str += "\n"
+        x_str = label + x_str
+        return x_str
--- a/src/gym/envs/algorithmic/copy_.py
+++ b/src/gym/envs/algorithmic/copy_.py
@@ -0,0 +1,13 @@
+"""
+Task is to copy content from the input tape to
+the output tape. http://arxiv.org/abs/1511.07275
+"""
+from gym.envs.algorithmic import algorithmic_env
+
+class CopyEnv(algorithmic_env.TapeAlgorithmicEnv):
+    def __init__(self, base=5, chars=True):
+        super(CopyEnv, self).__init__(base=base, chars=chars)
+
+    def target_from_input_data(self, input_data):
+        return input_data
+
--- a/src/gym/envs/algorithmic/duplicated_input.py
+++ b/src/gym/envs/algorithmic/duplicated_input.py
@@ -0,0 +1,24 @@
+"""
+Task is to return every nth character from the input tape.
+http://arxiv.org/abs/1511.07275
+"""
+from __future__ import division
+from gym.envs.algorithmic import algorithmic_env
+
+class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
+    def __init__(self, duplication=2, base=5):
+        self.duplication = duplication
+        super(DuplicatedInputEnv, self).__init__(base=base, chars=True)
+
+    def generate_input_data(self, size):
+        res = []
+        if size < self.duplication:
+            size = self.duplication
+        for i in range(size//self.duplication):
+            char = self.np_random.randint(self.base)
+            for _ in range(self.duplication):
+                res.append(char)
+        return res
+
+    def target_from_input_data(self, input_data):
+        return [input_data[i] for i in range(0, len(input_data), self.duplication)]
--- a/src/gym/envs/algorithmic/repeat_copy.py
+++ b/src/gym/envs/algorithmic/repeat_copy.py
@@ -0,0 +1,15 @@
+"""
+Task is to copy content multiple times from the input tape to
+the output tape. http://arxiv.org/abs/1511.07275
+"""
+from gym.envs.algorithmic import algorithmic_env
+
+class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
+    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
+    def __init__(self, base=5):
+        super(RepeatCopyEnv, self).__init__(base=base, chars=True)
+        self.last = 50
+
+    def target_from_input_data(self, input_data):
+        return input_data + list(reversed(input_data)) + input_data
+
--- a/src/gym/envs/algorithmic/reverse.py
+++ b/src/gym/envs/algorithmic/reverse.py
@@ -0,0 +1,15 @@
+"""
+Task is to reverse content over the input tape.
+http://arxiv.org/abs/1511.07275
+"""
+
+from gym.envs.algorithmic import algorithmic_env
+
+class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
+    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
+    def __init__(self, base=2):
+        super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1)
+        self.last = 50
+
+    def target_from_input_data(self, input_str):
+        return list(reversed(input_str))
--- a/src/gym/envs/algorithmic/reversed_addition.py
+++ b/src/gym/envs/algorithmic/reversed_addition.py
@@ -0,0 +1,30 @@
+from __future__ import division
+import numpy as np
+from gym.envs.algorithmic import algorithmic_env
+
+class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
+    def __init__(self, rows=2, base=3):
+        super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)
+
+    def target_from_input_data(self, input_strings):
+        curry = 0
+        target = []
+        for digits in input_strings:
+            total = sum(digits) + curry
+            target.append(total % self.base)
+            curry = total // self.base
+
+        if curry > 0:
+            target.append(curry)
+        return target
+
+    @property
+    def time_limit(self):
+        # Quirk preserved for the sake of consistency: add the length of the input
+        # rather than the length of the desired output (which may differ if there's
+        # an extra carried digit).
+        # TODO: It seems like this time limit is so strict as to make Addition3-v0
+        # unsolvable, since agents aren't even given enough time steps to look at
+        # all the digits. (The solutions on the scoreboard seem to only work by
+        # save-scumming.)
+        return self.input_width*2 + 4
--- a/src/gym/envs/algorithmic/tests/init.py
+++ b/src/gym/envs/algorithmic/tests/init.py
--- a/src/gym/envs/algorithmic/tests/test_algorithmic.py
+++ b/src/gym/envs/algorithmic/tests/test_algorithmic.py
@@ -0,0 +1,239 @@
+from gym.envs import algorithmic as alg
+import unittest
+
+# All concrete subclasses of AlgorithmicEnv
+ALL_ENVS = [
+    alg.copy_.CopyEnv, 
+    alg.duplicated_input.DuplicatedInputEnv,
+    alg.repeat_copy.RepeatCopyEnv,
+    alg.reverse.ReverseEnv,
+    alg.reversed_addition.ReversedAdditionEnv,
+]
+ALL_TAPE_ENVS = [env for env in ALL_ENVS 
+    if issubclass(env, alg.algorithmic_env.TapeAlgorithmicEnv)]
+ALL_GRID_ENVS = [env for env in ALL_ENVS 
+    if issubclass(env, alg.algorithmic_env.GridAlgorithmicEnv)]
+
+def imprint(env, input_arr):
+    """Monkey-patch the given environment so that when reset() is called, the
+    input tape/grid will be set to the given data, rather than being randomly
+    generated."""
+    env.generate_input_data = lambda _: input_arr
+
+class TestAlgorithmicEnvInteractions(unittest.TestCase):
+    """Test some generic behaviour not specific to any particular algorithmic
+    environment. Movement, allocation of rewards, etc."""
+    CANNED_INPUT = [0, 1]
+    ENV_KLS = alg.copy_.CopyEnv
+    LEFT, RIGHT = ENV_KLS._movement_idx('left'), ENV_KLS._movement_idx('right')
+    def setUp(self):
+        self.env = self.ENV_KLS(base=2, chars=True)
+        imprint(self.env, self.CANNED_INPUT)
+
+    def test_successful_interaction(self):
+        obs = self.env.reset()
+        self.assertEqual(obs, 0)
+        obs, reward, done, _ = self.env.step([self.RIGHT, 1, 0])
+        self.assertEqual(obs, 1)
+        self.assertGreater(reward, 0)
+        self.assertFalse(done)
+        obs, reward, done, _ = self.env.step([self.LEFT, 1, 1])
+        self.assertTrue(done)
+        self.assertGreater(reward, 0)
+
+    def test_bad_output_fail_fast(self):
+        obs = self.env.reset()
+        obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
+        self.assertTrue(done)
+        self.assertLess(reward, 0)
+
+    def test_levelup(self):
+        obs = self.env.reset()
+        # Kind of a hack
+        alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls = []
+        min_length = self.env.min_length
+        for i in range(self.env.last):
+            obs, reward, done, _ = self.env.step([self.RIGHT, 1, 0])
+            self.assertFalse(done)
+            obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
+            self.assertTrue(done)
+            self.env.reset()
+            if i < self.env.last-1:
+                self.assertEqual(len(alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls), i+1)
+            else:
+                # Should have leveled up on the last iteration
+                self.assertEqual(self.env.min_length, min_length+1)
+                self.assertEqual(len(alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls), 0)
+
+    def test_walk_off_the_end(self):
+        obs = self.env.reset()
+        # Walk off the end
+        obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
+        self.assertEqual(obs, self.env.base)
+        self.assertEqual(r, 0)
+        self.assertFalse(done)
+        # Walk further off track
+        obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
+        self.assertEqual(obs, self.env.base)
+        self.assertFalse(done)
+        # Return to the first input character
+        obs, r, done, _ = self.env.step([self.RIGHT, 0, 0])
+        self.assertEqual(obs, self.env.base)
+        self.assertFalse(done)
+        obs, r, done, _ = self.env.step([self.RIGHT, 0, 0])
+        self.assertEqual(obs, 0)
+
+    def test_grid_naviation(self):
+        env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=6)
+        N,S,E,W = [env._movement_idx(named_dir) for named_dir in ['up', 'down', 'right', 'left']]
+        # Corresponds to a grid that looks like...
+        #       0 1 2
+        #       3 4 5
+        canned = [ [0, 3], [1, 4], [2, 5] ]
+        imprint(env, canned)
+        obs = env.reset()
+        self.assertEqual(obs, 0)
+        navigation = [
+          (S, 3), (N, 0), (E, 1), (S, 4), (S, 6), (E, 6), (N, 5), (N, 2), (W, 1)
+        ]
+        for (movement, expected_obs) in navigation:
+            obs, reward, done, _ = env.step([movement, 0, 0])
+            self.assertEqual(reward, 0)
+            self.assertFalse(done)
+            self.assertEqual(obs, expected_obs)
+
+    def test_grid_success(self):
+        env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=3)
+        canned = [ [1, 2], [1, 0], [2, 2] ]
+        imprint(env, canned)
+        obs = env.reset()
+        target = [0, 2, 1, 1]
+        self.assertEqual(env.target, target)
+        self.assertEqual(obs, 1)
+        for i, target_digit in enumerate(target):
+            obs, reward, done, _ = env.step([0, 1, target_digit])
+            self.assertGreater(reward, 0)
+            self.assertEqual(done, i==len(target)-1)
+
+    def test_sane_time_limit(self):
+        obs = self.env.reset()
+        self.assertLess(self.env.time_limit, 100)
+        for _ in range(100):
+            obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
+            if done:
+                return
+        self.fail("Time limit wasn't enforced")
+
+    def test_rendering(self):
+        env = self.env
+        obs = env.reset()
+        self.assertEqual(env._get_str_obs(), 'A')
+        self.assertEqual(env._get_str_obs(1), 'B')
+        self.assertEqual(env._get_str_obs(-1), ' ')
+        self.assertEqual(env._get_str_obs(2), ' ')
+        self.assertEqual(env._get_str_target(0), 'A')
+        self.assertEqual(env._get_str_target(1), 'B')
+        # Test numerical alphabet rendering
+        env = self.ENV_KLS(base=3, chars=False)
+        imprint(env, self.CANNED_INPUT)
+        env.reset()
+        self.assertEqual(env._get_str_obs(), '0')
+        self.assertEqual(env._get_str_obs(1), '1')
+
+
+class TestTargets(unittest.TestCase):
+    """Test the rules mapping input strings/grids to target outputs."""
+    def test_reverse_target(self):
+        input_expected = [
+            ([0], [0]),
+            ([0, 1], [1, 0]),
+            ([1, 1], [1, 1]),
+            ([1, 0, 1], [1, 0, 1]),
+            ([0, 0, 1, 1], [1, 1, 0, 0]),
+        ]
+        env = alg.reverse.ReverseEnv()
+        for input_arr, expected in input_expected:
+            target = env.target_from_input_data(input_arr)
+            self.assertEqual(target, expected)
+
+    def test_reversed_addition_target(self):
+        env = alg.reversed_addition.ReversedAdditionEnv(base=3)
+        input_expected = [
+            ([[1,1], [1,1]], [2, 2]),
+            ([[2,2], [0,1]], [1, 2]),
+            ([[2,1], [1,1], [1,1], [1,0]], [0, 0, 0, 2]),
+        ]
+        for (input_grid, expected_target) in input_expected:
+            self.assertEqual(env.target_from_input_data(input_grid), expected_target)
+
+    def test_reversed_addition_3rows(self):
+        env = alg.reversed_addition.ReversedAdditionEnv(base=3, rows=3)
+        input_expected = [
+            ([[1,1,0],[0,1,1]], [2, 2]),
+            ([[1,1,2],[0,1,1]], [1,0,1]),
+        ]
+        for (input_grid, expected_target) in input_expected:
+            self.assertEqual(env.target_from_input_data(input_grid), expected_target)
+
+    def test_copy_target(self):
+        env = alg.copy_.CopyEnv()
+        self.assertEqual(env.target_from_input_data([0, 1, 2]), [0, 1, 2])
+
+    def test_duplicated_input_target(self):
+        env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
+        self.assertEqual(env.target_from_input_data([0, 0, 0, 0, 1, 1]), [0, 0, 1])
+
+    def test_repeat_copy_target(self):
+        env = alg.repeat_copy.RepeatCopyEnv()
+        self.assertEqual(env.target_from_input_data([0, 1, 2]), [0, 1, 2, 2, 1, 0, 0, 1, 2])
+
+class TestInputGeneration(unittest.TestCase):
+    """Test random input generation.
+    """
+    def test_tape_inputs(self):
+        for env_kls in ALL_TAPE_ENVS:
+            env = env_kls()
+            for size in range(2,5):
+                input_tape = env.generate_input_data(size)
+                self.assertTrue(all(0<=x<=env.base for x in input_tape),
+                "Invalid input tape from env {}: {}".format(env_kls, input_tape))
+                # DuplicatedInput needs to generate inputs with even length,
+                # so it may be short one
+                self.assertLessEqual(len(input_tape), size)
+
+    def test_grid_inputs(self):
+        for env_kls in ALL_GRID_ENVS:
+            env = env_kls()
+            for size in range(2, 5):
+                input_grid = env.generate_input_data(size)
+                # Should get "size" sublists, each of length self.rows (not the
+                # opposite, as you might expect)
+                self.assertEqual(len(input_grid), size)
+                self.assertTrue(all(len(col) == env.rows for col in input_grid))
+                self.assertTrue(all(0<=x<=env.base for x in input_grid[0]))
+
+    def test_duplicatedinput_inputs(self):
+        """The duplicated_input env needs to generate strings with the appropriate
+        amount of repetiion."""
+        env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
+        input_tape = env.generate_input_data(4)
+        self.assertEqual(len(input_tape), 4)
+        self.assertEqual(input_tape[0], input_tape[1])
+        self.assertEqual(input_tape[2], input_tape[3])
+        # If requested input size isn't a multiple of duplication, go lower
+        input_tape = env.generate_input_data(3)
+        self.assertEqual(len(input_tape), 2)
+        self.assertEqual(input_tape[0], input_tape[1])
+        # If requested input size is *less than* duplication, go up
+        input_tape = env.generate_input_data(1)
+        self.assertEqual(len(input_tape), 2)
+        self.assertEqual(input_tape[0], input_tape[1])
+        
+        env = alg.duplicated_input.DuplicatedInputEnv(duplication=3)
+        input_tape = env.generate_input_data(6)
+        self.assertEqual(len(input_tape), 6)
+        self.assertEqual(input_tape[0], input_tape[1])
+        self.assertEqual(input_tape[1], input_tape[2])
+
+if __name__ == '__main__':
+    unittest.main()
--- a/src/gym/envs/atari/init.py
+++ b/src/gym/envs/atari/init.py
@@ -0,0 +1 @@
+from gym.envs.atari.atari_env import AtariEnv
--- a/src/gym/envs/atari/atari_env.py
+++ b/src/gym/envs/atari/atari_env.py
@@ -0,0 +1,192 @@
+import numpy as np
+import os
+import gym
+from gym import error, spaces
+from gym import utils
+from gym.utils import seeding
+
+try:
+    import atari_py
+except ImportError as e:
+    raise error.DependencyNotInstalled("{}. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)".format(e))
+
+def to_ram(ale):
+    ram_size = ale.getRAMSize()
+    ram = np.zeros((ram_size),dtype=np.uint8)
+    ale.getRAM(ram)
+    return ram
+
+class AtariEnv(gym.Env, utils.EzPickle):
+    metadata = {'render.modes': ['human', 'rgb_array']}
+
+    def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
+        """Frameskip should be either a tuple (indicating a random range to
+        choose from, with the top value exclude), or an int."""
+
+        utils.EzPickle.__init__(self, game, obs_type, frameskip, repeat_action_probability)
+        assert obs_type in ('ram', 'image')
+
+        self.game_path = atari_py.get_game_path(game)
+        if not os.path.exists(self.game_path):
+            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
+        self._obs_type = obs_type
+        self.frameskip = frameskip
+        self.ale = atari_py.ALEInterface()
+        self.viewer = None
+
+        # Tune (or disable) ALE's action repeat:
+        # https://github.com/openai/gym/issues/349
+        assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
+        self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)
+
+        self.seed()
+
+        self._action_set = self.ale.getMinimalActionSet()
+        self.action_space = spaces.Discrete(len(self._action_set))
+
+        (screen_width,screen_height) = self.ale.getScreenDims()
+        if self._obs_type == 'ram':
+            self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
+        elif self._obs_type == 'image':
+            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
+        else:
+            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
+
+    def seed(self, seed=None):
+        self.np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed. This gets passed as a uint, but gets
+        # checked as an int elsewhere, so we need to keep it below
+        # 2**31.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
+        # Empirically, we need to seed before loading the ROM.
+        self.ale.setInt(b'random_seed', seed2)
+        self.ale.loadROM(self.game_path)
+        return [seed1, seed2]
+
+    def step(self, a):
+        reward = 0.0
+        action = self._action_set[a]
+
+        if isinstance(self.frameskip, int):
+            num_steps = self.frameskip
+        else:
+            num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])
+        for _ in range(num_steps):
+            reward += self.ale.act(action)
+        ob = self._get_obs()
+
+        return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}
+
+    def _get_image(self):
+        return self.ale.getScreenRGB2()
+
+    def _get_ram(self):
+        return to_ram(self.ale)
+
+    @property
+    def _n_actions(self):
+        return len(self._action_set)
+
+    def _get_obs(self):
+        if self._obs_type == 'ram':
+            return self._get_ram()
+        elif self._obs_type == 'image':
+            img = self._get_image()
+        return img
+
+    # return: (states, observations)
+    def reset(self):
+        self.ale.reset_game()
+        return self._get_obs()
+
+    def render(self, mode='human'):
+        img = self._get_image()
+        if mode == 'rgb_array':
+            return img
+        elif mode == 'human':
+            from gym.envs.classic_control import rendering
+            if self.viewer is None:
+                self.viewer = rendering.SimpleImageViewer()
+            self.viewer.imshow(img)
+            return self.viewer.isopen
+
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+            self.viewer = None
+
+    def get_action_meanings(self):
+        return [ACTION_MEANING[i] for i in self._action_set]
+
+    def get_keys_to_action(self):
+        KEYWORD_TO_KEY = {
+            'UP':      ord('w'),
+            'DOWN':    ord('s'),
+            'LEFT':    ord('a'),
+            'RIGHT':   ord('d'),
+            'FIRE':    ord(' '),
+        }
+
+        keys_to_action = {}
+
+        for action_id, action_meaning in enumerate(self.get_action_meanings()):
+            keys = []
+            for keyword, key in KEYWORD_TO_KEY.items():
+                if keyword in action_meaning:
+                    keys.append(key)
+            keys = tuple(sorted(keys))
+
+            assert keys not in keys_to_action
+            keys_to_action[keys] = action_id
+
+        return keys_to_action
+
+    def clone_state(self):
+        """Clone emulator state w/o system state. Restoring this state will
+        *not* give an identical environment. For complete cloning and restoring
+        of the full state, see `{clone,restore}_full_state()`."""
+        state_ref = self.ale.cloneState()
+        state = self.ale.encodeState(state_ref)
+        self.ale.deleteState(state_ref)
+        return state
+
+    def restore_state(self, state):
+        """Restore emulator state w/o system state."""
+        state_ref = self.ale.decodeState(state)
+        self.ale.restoreState(state_ref)
+        self.ale.deleteState(state_ref)
+
+    def clone_full_state(self):
+        """Clone emulator state w/ system state including pseudorandomness.
+        Restoring this state will give an identical environment."""
+        state_ref = self.ale.cloneSystemState()
+        state = self.ale.encodeState(state_ref)
+        self.ale.deleteState(state_ref)
+        return state
+
+    def restore_full_state(self, state):
+        """Restore emulator state w/ system state including pseudorandomness."""
+        state_ref = self.ale.decodeState(state)
+        self.ale.restoreSystemState(state_ref)
+        self.ale.deleteState(state_ref)
+
+ACTION_MEANING = {
+    0 : "NOOP",
+    1 : "FIRE",
+    2 : "UP",
+    3 : "RIGHT",
+    4 : "LEFT",
+    5 : "DOWN",
+    6 : "UPRIGHT",
+    7 : "UPLEFT",
+    8 : "DOWNRIGHT",
+    9 : "DOWNLEFT",
+    10 : "UPFIRE",
+    11 : "RIGHTFIRE",
+    12 : "LEFTFIRE",
+    13 : "DOWNFIRE",
+    14 : "UPRIGHTFIRE",
+    15 : "UPLEFTFIRE",
+    16 : "DOWNRIGHTFIRE",
+    17 : "DOWNLEFTFIRE",
+}
--- a/src/gym/envs/box2d/init.py
+++ b/src/gym/envs/box2d/init.py
@@ -0,0 +1,4 @@
+from gym.envs.box2d.lunar_lander import LunarLander
+from gym.envs.box2d.lunar_lander import LunarLanderContinuous
+from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
+from gym.envs.box2d.car_racing import CarRacing
--- a/src/gym/envs/box2d/bipedal_walker.py
+++ b/src/gym/envs/box2d/bipedal_walker.py
@@ -0,0 +1,581 @@
+import sys, math
+import numpy as np
+
+import Box2D
+from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
+
+import gym
+from gym import spaces
+from gym.utils import colorize, seeding, EzPickle
+
+# This is simple 4-joints walker robot environment.
+#
+# There are two versions:
+#
+# - Normal, with slightly uneven terrain.
+#
+# - Hardcore with ladders, stumps, pitfalls.
+#
+# Reward is given for moving forward, total 300+ points up to the far end. If the robot falls,
+# it gets -100. Applying motor torque costs a small amount of points, more optimal agent
+# will get better score.
+#
+# Heuristic is provided for testing, it's also useful to get demonstrations to
+# learn from. To run heuristic:
+#
+# python gym/envs/box2d/bipedal_walker.py
+#
+# State consists of hull angle speed, angular velocity, horizontal speed, vertical speed,
+# position of joints and joints angular speed, legs contact with ground, and 10 lidar
+# rangefinder measurements to help to deal with the hardcore version. There's no coordinates
+# in the state vector. Lidar is less useful in normal version, but it works.
+#
+# To solve the game you need to get 300 points in 1600 time steps.
+#
+# To solve hardcore version you need 300 points in 2000 time steps.
+#
+# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+
+FPS    = 50
+SCALE  = 30.0   # affects how fast-paced the game is, forces should be adjusted as well
+
+MOTORS_TORQUE = 80
+SPEED_HIP     = 4
+SPEED_KNEE    = 6
+LIDAR_RANGE   = 160/SCALE
+
+INITIAL_RANDOM = 5
+
+HULL_POLY =[
+    (-30,+9), (+6,+9), (+34,+1),
+    (+34,-8), (-30,-8)
+    ]
+LEG_DOWN = -8/SCALE
+LEG_W, LEG_H = 8/SCALE, 34/SCALE
+
+VIEWPORT_W = 600
+VIEWPORT_H = 400
+
+TERRAIN_STEP   = 14/SCALE
+TERRAIN_LENGTH = 200     # in steps
+TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4
+TERRAIN_GRASS    = 10    # low long are grass spots, in steps
+TERRAIN_STARTPAD = 20    # in steps
+FRICTION = 2.5
+
+HULL_FD = fixtureDef(
+                shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]),
+                density=5.0,
+                friction=0.1,
+                categoryBits=0x0020,
+                maskBits=0x001,  # collide only with ground
+                restitution=0.0) # 0.99 bouncy
+
+LEG_FD = fixtureDef(
+                    shape=polygonShape(box=(LEG_W/2, LEG_H/2)),
+                    density=1.0,
+                    restitution=0.0,
+                    categoryBits=0x0020,
+                    maskBits=0x001)
+
+LOWER_FD = fixtureDef(
+                    shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)),
+                    density=1.0,
+                    restitution=0.0,
+                    categoryBits=0x0020,
+                    maskBits=0x001)
+
+class ContactDetector(contactListener):
+    def __init__(self, env):
+        contactListener.__init__(self)
+        self.env = env
+    def BeginContact(self, contact):
+        if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body:
+            self.env.game_over = True
+        for leg in [self.env.legs[1], self.env.legs[3]]:
+            if leg in [contact.fixtureA.body, contact.fixtureB.body]:
+                leg.ground_contact = True
+    def EndContact(self, contact):
+        for leg in [self.env.legs[1], self.env.legs[3]]:
+            if leg in [contact.fixtureA.body, contact.fixtureB.body]:
+                leg.ground_contact = False
+
+class BipedalWalker(gym.Env, EzPickle):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second' : FPS
+    }
+
+    hardcore = False
+
+    def __init__(self):
+        EzPickle.__init__(self)
+        self.seed()
+        self.viewer = None
+
+        self.world = Box2D.b2World()
+        self.terrain = None
+        self.hull = None
+
+        self.prev_shaping = None
+
+        self.fd_polygon = fixtureDef(
+                        shape = polygonShape(vertices=
+                        [(0, 0),
+                         (1, 0),
+                         (1, -1),
+                         (0, -1)]),
+                        friction = FRICTION)
+
+        self.fd_edge = fixtureDef(
+                    shape = edgeShape(vertices=
+                    [(0, 0),
+                     (1, 1)]),
+                    friction = FRICTION,
+                    categoryBits=0x0001,
+                )
+
+        self.reset()
+
+        high = np.array([np.inf]*24)
+        self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
+        self.observation_space = spaces.Box(-high, high)
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def _destroy(self):
+        if not self.terrain: return
+        self.world.contactListener = None
+        for t in self.terrain:
+            self.world.DestroyBody(t)
+        self.terrain = []
+        self.world.DestroyBody(self.hull)
+        self.hull = None
+        for leg in self.legs:
+            self.world.DestroyBody(leg)
+        self.legs = []
+        self.joints = []
+
+    def _generate_terrain(self, hardcore):
+        GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
+        state    = GRASS
+        velocity = 0.0
+        y        = TERRAIN_HEIGHT
+        counter  = TERRAIN_STARTPAD
+        oneshot  = False
+        self.terrain   = []
+        self.terrain_x = []
+        self.terrain_y = []
+        for i in range(TERRAIN_LENGTH):
+            x = i*TERRAIN_STEP
+            self.terrain_x.append(x)
+
+            if state==GRASS and not oneshot:
+                velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
+                if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1)/SCALE   #1
+                y += velocity
+
+            elif state==PIT and oneshot:
+                counter = self.np_random.randint(3, 5)
+                poly = [
+                    (x,              y),
+                    (x+TERRAIN_STEP, y),
+                    (x+TERRAIN_STEP, y-4*TERRAIN_STEP),
+                    (x,              y-4*TERRAIN_STEP),
+                    ]
+                self.fd_polygon.shape.vertices=poly
+                t = self.world.CreateStaticBody(
+                    fixtures = self.fd_polygon)
+                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                self.terrain.append(t)
+
+                self.fd_polygon.shape.vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]
+                t = self.world.CreateStaticBody(
+                    fixtures = self.fd_polygon)
+                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                self.terrain.append(t)
+                counter += 2
+                original_y = y
+
+            elif state==PIT and not oneshot:
+                y = original_y
+                if counter > 1:
+                    y -= 4*TERRAIN_STEP
+
+            elif state==STUMP and oneshot:
+                counter = self.np_random.randint(1, 3)
+                poly = [
+                    (x,                      y),
+                    (x+counter*TERRAIN_STEP, y),
+                    (x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP),
+                    (x,                      y+counter*TERRAIN_STEP),
+                    ]
+                self.fd_polygon.shape.vertices=poly
+                t = self.world.CreateStaticBody(
+                    fixtures = self.fd_polygon)
+                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                self.terrain.append(t)
+
+            elif state==STAIRS and oneshot:
+                stair_height = +1 if self.np_random.rand() > 0.5 else -1
+                stair_width = self.np_random.randint(4, 5)
+                stair_steps = self.np_random.randint(3, 5)
+                original_y = y
+                for s in range(stair_steps):
+                    poly = [
+                        (x+(    s*stair_width)*TERRAIN_STEP, y+(   s*stair_height)*TERRAIN_STEP),
+                        (x+((1+s)*stair_width)*TERRAIN_STEP, y+(   s*stair_height)*TERRAIN_STEP),
+                        (x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
+                        (x+(    s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
+                        ]
+                    self.fd_polygon.shape.vertices=poly
+                    t = self.world.CreateStaticBody(
+                        fixtures = self.fd_polygon)
+                    t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                    self.terrain.append(t)
+                counter = stair_steps*stair_width
+
+            elif state==STAIRS and not oneshot:
+                s = stair_steps*stair_width - counter - stair_height
+                n = s/stair_width
+                y = original_y + (n*stair_height)*TERRAIN_STEP
+
+            oneshot = False
+            self.terrain_y.append(y)
+            counter -= 1
+            if counter==0:
+                counter = self.np_random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
+                if state==GRASS and hardcore:
+                    state = self.np_random.randint(1, _STATES_)
+                    oneshot = True
+                else:
+                    state = GRASS
+                    oneshot = True
+
+        self.terrain_poly = []
+        for i in range(TERRAIN_LENGTH-1):
+            poly = [
+                (self.terrain_x[i],   self.terrain_y[i]),
+                (self.terrain_x[i+1], self.terrain_y[i+1])
+                ]
+            self.fd_edge.shape.vertices=poly
+            t = self.world.CreateStaticBody(
+                fixtures = self.fd_edge)
+            color = (0.3, 1.0 if i%2==0 else 0.8, 0.3)
+            t.color1 = color
+            t.color2 = color
+            self.terrain.append(t)
+            color = (0.4, 0.6, 0.3)
+            poly += [ (poly[1][0], 0), (poly[0][0], 0) ]
+            self.terrain_poly.append( (poly, color) )
+        self.terrain.reverse()
+
+    def _generate_clouds(self):
+        # Sorry for the clouds, couldn't resist
+        self.cloud_poly   = []
+        for i in range(TERRAIN_LENGTH//20):
+            x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
+            y = VIEWPORT_H/SCALE*3/4
+            poly = [
+                (x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
+                 y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
+                for a in range(5) ]
+            x1 = min( [p[0] for p in poly] )
+            x2 = max( [p[0] for p in poly] )
+            self.cloud_poly.append( (poly,x1,x2) )
+
+    def reset(self):
+        self._destroy()
+        self.world.contactListener_bug_workaround = ContactDetector(self)
+        self.world.contactListener = self.world.contactListener_bug_workaround
+        self.game_over = False
+        self.prev_shaping = None
+        self.scroll = 0.0
+        self.lidar_render = 0
+
+        W = VIEWPORT_W/SCALE
+        H = VIEWPORT_H/SCALE
+
+        self._generate_terrain(self.hardcore)
+        self._generate_clouds()
+
+        init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2
+        init_y = TERRAIN_HEIGHT+2*LEG_H
+        self.hull = self.world.CreateDynamicBody(
+            position = (init_x, init_y),
+            fixtures = HULL_FD
+                )
+        self.hull.color1 = (0.5,0.4,0.9)
+        self.hull.color2 = (0.3,0.3,0.5)
+        self.hull.ApplyForceToCenter((self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
+
+        self.legs = []
+        self.joints = []
+        for i in [-1,+1]:
+            leg = self.world.CreateDynamicBody(
+                position = (init_x, init_y - LEG_H/2 - LEG_DOWN),
+                angle = (i*0.05),
+                fixtures = LEG_FD
+                )
+            leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
+            leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
+            rjd = revoluteJointDef(
+                bodyA=self.hull,
+                bodyB=leg,
+                localAnchorA=(0, LEG_DOWN),
+                localAnchorB=(0, LEG_H/2),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=MOTORS_TORQUE,
+                motorSpeed = i,
+                lowerAngle = -0.8,
+                upperAngle = 1.1,
+                )
+            self.legs.append(leg)
+            self.joints.append(self.world.CreateJoint(rjd))
+
+            lower = self.world.CreateDynamicBody(
+                position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN),
+                angle = (i*0.05),
+                fixtures = LOWER_FD
+                )
+            lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
+            lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
+            rjd = revoluteJointDef(
+                bodyA=leg,
+                bodyB=lower,
+                localAnchorA=(0, -LEG_H/2),
+                localAnchorB=(0, LEG_H/2),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=MOTORS_TORQUE,
+                motorSpeed = 1,
+                lowerAngle = -1.6,
+                upperAngle = -0.1,
+                )
+            lower.ground_contact = False
+            self.legs.append(lower)
+            self.joints.append(self.world.CreateJoint(rjd))
+
+        self.drawlist = self.terrain + self.legs + [self.hull]
+
+        class LidarCallback(Box2D.b2.rayCastCallback):
+            def ReportFixture(self, fixture, point, normal, fraction):
+                if (fixture.filterData.categoryBits & 1) == 0:
+                    return 1
+                self.p2 = point
+                self.fraction = fraction
+                return 0
+        self.lidar = [LidarCallback() for _ in range(10)]
+
+        return self.step(np.array([0,0,0,0]))[0]
+
+    def step(self, action):
+        #self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
+        control_speed = False  # Should be easier as well
+        if control_speed:
+            self.joints[0].motorSpeed = float(SPEED_HIP  * np.clip(action[0], -1, 1))
+            self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
+            self.joints[2].motorSpeed = float(SPEED_HIP  * np.clip(action[2], -1, 1))
+            self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
+        else:
+            self.joints[0].motorSpeed     = float(SPEED_HIP     * np.sign(action[0]))
+            self.joints[0].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1))
+            self.joints[1].motorSpeed     = float(SPEED_KNEE    * np.sign(action[1]))
+            self.joints[1].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1))
+            self.joints[2].motorSpeed     = float(SPEED_HIP     * np.sign(action[2]))
+            self.joints[2].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1))
+            self.joints[3].motorSpeed     = float(SPEED_KNEE    * np.sign(action[3]))
+            self.joints[3].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1))
+
+        self.world.Step(1.0/FPS, 6*30, 2*30)
+
+        pos = self.hull.position
+        vel = self.hull.linearVelocity
+
+        for i in range(10):
+            self.lidar[i].fraction = 1.0
+            self.lidar[i].p1 = pos
+            self.lidar[i].p2 = (
+                pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE,
+                pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE)
+            self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
+
+        state = [
+            self.hull.angle,        # Normal angles up to 0.5 here, but sure more is possible.
+            2.0*self.hull.angularVelocity/FPS,
+            0.3*vel.x*(VIEWPORT_W/SCALE)/FPS,  # Normalized to get -1..1 range
+            0.3*vel.y*(VIEWPORT_H/SCALE)/FPS,
+            self.joints[0].angle,   # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
+            self.joints[0].speed / SPEED_HIP,
+            self.joints[1].angle + 1.0,
+            self.joints[1].speed / SPEED_KNEE,
+            1.0 if self.legs[1].ground_contact else 0.0,
+            self.joints[2].angle,
+            self.joints[2].speed / SPEED_HIP,
+            self.joints[3].angle + 1.0,
+            self.joints[3].speed / SPEED_KNEE,
+            1.0 if self.legs[3].ground_contact else 0.0
+            ]
+        state += [l.fraction for l in self.lidar]
+        assert len(state)==24
+
+        self.scroll = pos.x - VIEWPORT_W/SCALE/5
+
+        shaping  = 130*pos[0]/SCALE   # moving forward is a way to receive reward (normalized to get 300 on completion)
+        shaping -= 5.0*abs(state[0])  # keep head straight, other than that and falling, any behavior is unpunished
+
+        reward = 0
+        if self.prev_shaping is not None:
+            reward = shaping - self.prev_shaping
+        self.prev_shaping = shaping
+
+        for a in action:
+            reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
+            # normalized to about -50.0 using heuristic, more optimal agent should spend less
+
+        done = False
+        if self.game_over or pos[0] < 0:
+            reward = -100
+            done   = True
+        if pos[0] > (TERRAIN_LENGTH-TERRAIN_GRASS)*TERRAIN_STEP:
+            done   = True
+        return np.array(state), reward, done, {}
+
+    def render(self, mode='human'):
+        from gym.envs.classic_control import rendering
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
+        self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE)
+
+        self.viewer.draw_polygon( [
+            (self.scroll,                  0),
+            (self.scroll+VIEWPORT_W/SCALE, 0),
+            (self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE),
+            (self.scroll,                  VIEWPORT_H/SCALE),
+            ], color=(0.9, 0.9, 1.0) )
+        for poly,x1,x2 in self.cloud_poly:
+            if x2 < self.scroll/2: continue
+            if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue
+            self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1))
+        for poly, color in self.terrain_poly:
+            if poly[1][0] < self.scroll: continue
+            if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue
+            self.viewer.draw_polygon(poly, color=color)
+
+        self.lidar_render = (self.lidar_render+1) % 100
+        i = self.lidar_render
+        if i < 2*len(self.lidar):
+            l = self.lidar[i] if i < len(self.lidar) else self.lidar[len(self.lidar)-i-1]
+            self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 )
+
+        for obj in self.drawlist:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                if type(f.shape) is circleShape:
+                    t = rendering.Transform(translation=trans*f.shape.pos)
+                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
+                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
+                else:
+                    path = [trans*v for v in f.shape.vertices]
+                    self.viewer.draw_polygon(path, color=obj.color1)
+                    path.append(path[0])
+                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
+
+        flagy1 = TERRAIN_HEIGHT
+        flagy2 = flagy1 + 50/SCALE
+        x = TERRAIN_STEP*3
+        self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 )
+        f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)]
+        self.viewer.draw_polygon(f, color=(0.9,0.2,0) )
+        self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 )
+
+        return self.viewer.render(return_rgb_array = mode=='rgb_array')
+
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+            self.viewer = None
+
+class BipedalWalkerHardcore(BipedalWalker):
+    hardcore = True
+
+if __name__=="__main__":
+    # Heurisic: suboptimal, have no notion of balance.
+    env = BipedalWalker()
+    env.reset()
+    steps = 0
+    total_reward = 0
+    a = np.array([0.0, 0.0, 0.0, 0.0])
+    STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1,2,3
+    SPEED = 0.29  # Will fall forward on higher speed
+    state = STAY_ON_ONE_LEG
+    moving_leg = 0
+    supporting_leg = 1 - moving_leg
+    SUPPORT_KNEE_ANGLE = +0.1
+    supporting_knee_angle = SUPPORT_KNEE_ANGLE
+    while True:
+        s, r, done, info = env.step(a)
+        total_reward += r
+        if steps % 20 == 0 or done:
+            print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
+            print("step {} total_reward {:+0.2f}".format(steps, total_reward))
+            print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4] ]))
+            print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9] ]))
+            print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]]))
+        steps += 1
+
+        contact0 = s[8]
+        contact1 = s[13]
+        moving_s_base = 4 + 5*moving_leg
+        supporting_s_base = 4 + 5*supporting_leg
+
+        hip_targ  = [None,None]   # -0.8 .. +1.1
+        knee_targ = [None,None]   # -0.6 .. +0.9
+        hip_todo  = [0.0, 0.0]
+        knee_todo = [0.0, 0.0]
+
+        if state==STAY_ON_ONE_LEG:
+            hip_targ[moving_leg]  = 1.1
+            knee_targ[moving_leg] = -0.6
+            supporting_knee_angle += 0.03
+            if s[2] > SPEED: supporting_knee_angle += 0.03
+            supporting_knee_angle = min( supporting_knee_angle, SUPPORT_KNEE_ANGLE )
+            knee_targ[supporting_leg] = supporting_knee_angle
+            if s[supporting_s_base+0] < 0.10: # supporting leg is behind
+                state = PUT_OTHER_DOWN
+        if state==PUT_OTHER_DOWN:
+            hip_targ[moving_leg]  = +0.1
+            knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
+            knee_targ[supporting_leg] = supporting_knee_angle
+            if s[moving_s_base+4]:
+                state = PUSH_OFF
+                supporting_knee_angle = min( s[moving_s_base+2], SUPPORT_KNEE_ANGLE )
+        if state==PUSH_OFF:
+            knee_targ[moving_leg] = supporting_knee_angle
+            knee_targ[supporting_leg] = +1.0
+            if s[supporting_s_base+2] > 0.88 or s[2] > 1.2*SPEED:
+                state = STAY_ON_ONE_LEG
+                moving_leg = 1 - moving_leg
+                supporting_leg = 1 - moving_leg
+
+        if hip_targ[0]: hip_todo[0] = 0.9*(hip_targ[0] - s[4]) - 0.25*s[5]
+        if hip_targ[1]: hip_todo[1] = 0.9*(hip_targ[1] - s[9]) - 0.25*s[10]
+        if knee_targ[0]: knee_todo[0] = 4.0*(knee_targ[0] - s[6])  - 0.25*s[7]
+        if knee_targ[1]: knee_todo[1] = 4.0*(knee_targ[1] - s[11]) - 0.25*s[12]
+
+        hip_todo[0] -= 0.9*(0-s[0]) - 1.5*s[1] # PID to keep head strait
+        hip_todo[1] -= 0.9*(0-s[0]) - 1.5*s[1]
+        knee_todo[0] -= 15.0*s[3]  # vertical speed, to damp oscillations
+        knee_todo[1] -= 15.0*s[3]
+
+        a[0] = hip_todo[0]
+        a[1] = knee_todo[0]
+        a[2] = hip_todo[1]
+        a[3] = knee_todo[1]
+        a = np.clip(0.5*a, -1.0, 1.0)
+
+        env.render()
+        if done: break
--- a/src/gym/envs/box2d/car_dynamics.py
+++ b/src/gym/envs/box2d/car_dynamics.py
@@ -0,0 +1,244 @@
+import numpy as np
+import math
+import Box2D
+from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
+
+# Top-down car dynamics simulation.
+#
+# Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
+# This simulation is a bit more detailed, with wheels rotation.
+#
+# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+
+SIZE = 0.02
+ENGINE_POWER            = 100000000*SIZE*SIZE
+WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
+FRICTION_LIMIT          = 1000000*SIZE*SIZE     # friction ~= mass ~= size^2 (calculated implicitly using density)
+WHEEL_R  = 27
+WHEEL_W  = 14
+WHEELPOS = [
+    (-55,+80), (+55,+80),
+    (-55,-82), (+55,-82)
+    ]
+HULL_POLY1 =[
+    (-60,+130), (+60,+130),
+    (+60,+110), (-60,+110)
+    ]
+HULL_POLY2 =[
+    (-15,+120), (+15,+120),
+    (+20, +20), (-20,  20)
+    ]
+HULL_POLY3 =[
+    (+25, +20),
+    (+50, -10),
+    (+50, -40),
+    (+20, -90),
+    (-20, -90),
+    (-50, -40),
+    (-50, -10),
+    (-25, +20)
+    ]
+HULL_POLY4 =[
+    (-50,-120), (+50,-120),
+    (+50,-90),  (-50,-90)
+    ]
+WHEEL_COLOR = (0.0,0.0,0.0)
+WHEEL_WHITE = (0.3,0.3,0.3)
+MUD_COLOR   = (0.4,0.4,0.0)
+
+class Car:
+    def __init__(self, world, init_angle, init_x, init_y):
+        self.world = world
+        self.hull = self.world.CreateDynamicBody(
+            position = (init_x, init_y),
+            angle = init_angle,
+            fixtures = [
+                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY1 ]), density=1.0),
+                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY2 ]), density=1.0),
+                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY3 ]), density=1.0),
+                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY4 ]), density=1.0)
+                ]
+            )
+        self.hull.color = (0.8,0.0,0.0)
+        self.wheels = []
+        self.fuel_spent = 0.0
+        WHEEL_POLY = [
+            (-WHEEL_W,+WHEEL_R), (+WHEEL_W,+WHEEL_R),
+            (+WHEEL_W,-WHEEL_R), (-WHEEL_W,-WHEEL_R)
+            ]
+        for wx,wy in WHEELPOS:
+            front_k = 1.0 if wy > 0 else 1.0
+            w = self.world.CreateDynamicBody(
+                position = (init_x+wx*SIZE, init_y+wy*SIZE),
+                angle = init_angle,
+                fixtures = fixtureDef(
+                    shape=polygonShape(vertices=[ (x*front_k*SIZE,y*front_k*SIZE) for x,y in WHEEL_POLY ]),
+                    density=0.1,
+                    categoryBits=0x0020,
+                    maskBits=0x001,
+                    restitution=0.0)
+                    )
+            w.wheel_rad = front_k*WHEEL_R*SIZE
+            w.color = WHEEL_COLOR
+            w.gas   = 0.0
+            w.brake = 0.0
+            w.steer = 0.0
+            w.phase = 0.0  # wheel angle
+            w.omega = 0.0  # angular velocity
+            w.skid_start = None
+            w.skid_particle = None
+            rjd = revoluteJointDef(
+                bodyA=self.hull,
+                bodyB=w,
+                localAnchorA=(wx*SIZE,wy*SIZE),
+                localAnchorB=(0,0),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=180*900*SIZE*SIZE,
+                motorSpeed = 0,
+                lowerAngle = -0.4,
+                upperAngle = +0.4,
+                )
+            w.joint = self.world.CreateJoint(rjd)
+            w.tiles = set()
+            w.userData = w
+            self.wheels.append(w)
+        self.drawlist =  self.wheels + [self.hull]
+        self.particles = []
+
+    def gas(self, gas):
+        'control: rear wheel drive'
+        gas = np.clip(gas, 0, 1)
+        for w in self.wheels[2:4]:
+            diff = gas - w.gas
+            if diff > 0.1: diff = 0.1  # gradually increase, but stop immediately
+            w.gas += diff
+
+    def brake(self, b):
+        'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation'
+        for w in self.wheels:
+            w.brake = b
+
+    def steer(self, s):
+        'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position'
+        self.wheels[0].steer = s
+        self.wheels[1].steer = s
+
+    def step(self, dt):
+        for w in self.wheels:
+            # Steer each wheel
+            dir = np.sign(w.steer - w.joint.angle)
+            val = abs(w.steer - w.joint.angle)
+            w.joint.motorSpeed = dir*min(50.0*val, 3.0)
+
+            # Position => friction_limit
+            grass = True
+            friction_limit = FRICTION_LIMIT*0.6  # Grass friction if no tile
+            for tile in w.tiles:
+                friction_limit = max(friction_limit, FRICTION_LIMIT*tile.road_friction)
+                grass = False
+
+            # Force
+            forw = w.GetWorldVector( (0,1) )
+            side = w.GetWorldVector( (1,0) )
+            v = w.linearVelocity
+            vf = forw[0]*v[0] + forw[1]*v[1]  # forward speed
+            vs = side[0]*v[0] + side[1]*v[1]  # side speed
+
+            # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
+            # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
+            # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
+            w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)  # small coef not to divide by zero
+            self.fuel_spent += dt*ENGINE_POWER*w.gas
+
+            if w.brake >= 0.9:
+                w.omega = 0
+            elif w.brake > 0:
+                BRAKE_FORCE = 15    # radians per second
+                dir = -np.sign(w.omega)
+                val = BRAKE_FORCE*w.brake
+                if abs(val) > abs(w.omega): val = abs(w.omega)  # low speed => same as = 0
+                w.omega += dir*val
+            w.phase += w.omega*dt
+
+            vr = w.omega*w.wheel_rad  # rotating wheel speed
+            f_force = -vf + vr        # force direction is direction of speed difference
+            p_force = -vs
+
+            # Physically correct is to always apply friction_limit until speed is equal.
+            # But dt is finite, that will lead to oscillations if difference is already near zero.
+            f_force *= 205000*SIZE*SIZE  # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
+            p_force *= 205000*SIZE*SIZE
+            force = np.sqrt(np.square(f_force) + np.square(p_force))
+
+            # Skid trace
+            if abs(force) > 2.0*friction_limit:
+                if w.skid_particle and w.skid_particle.grass==grass and len(w.skid_particle.poly) < 30:
+                    w.skid_particle.poly.append( (w.position[0], w.position[1]) )
+                elif w.skid_start is None:
+                    w.skid_start = w.position
+                else:
+                    w.skid_particle = self._create_particle( w.skid_start, w.position, grass )
+                    w.skid_start = None
+            else:
+                w.skid_start = None
+                w.skid_particle = None
+
+            if abs(force) > friction_limit:
+                f_force /= force
+                p_force /= force
+                force = friction_limit  # Correct physics here
+                f_force *= force
+                p_force *= force
+
+            w.omega -= dt*f_force*w.wheel_rad/WHEEL_MOMENT_OF_INERTIA
+
+            w.ApplyForceToCenter( (
+                p_force*side[0] + f_force*forw[0],
+                p_force*side[1] + f_force*forw[1]), True )
+
+    def draw(self, viewer, draw_particles=True):
+        if draw_particles:
+            for p in self.particles:
+                viewer.draw_polyline(p.poly, color=p.color, linewidth=5)
+        for obj in self.drawlist:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                path = [trans*v for v in f.shape.vertices]
+                viewer.draw_polygon(path, color=obj.color)
+                if "phase" not in obj.__dict__: continue
+                a1 = obj.phase
+                a2 = obj.phase + 1.2  # radians
+                s1 = math.sin(a1)
+                s2 = math.sin(a2)
+                c1 = math.cos(a1)
+                c2 = math.cos(a2)
+                if s1>0 and s2>0: continue
+                if s1>0: c1 = np.sign(c1)
+                if s2>0: c2 = np.sign(c2)
+                white_poly = [
+                    (-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
+                    (+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
+                    ]
+                viewer.draw_polygon([trans*v for v in white_poly], color=WHEEL_WHITE)
+
+    def _create_particle(self, point1, point2, grass):
+        class Particle:
+            pass
+        p = Particle()
+        p.color = WHEEL_COLOR if not grass else MUD_COLOR
+        p.ttl = 1
+        p.poly = [(point1[0],point1[1]), (point2[0],point2[1])]
+        p.grass = grass
+        self.particles.append(p)
+        while len(self.particles) > 30:
+            self.particles.pop(0)
+        return p
+
+    def destroy(self):
+        self.world.DestroyBody(self.hull)
+        self.hull = None
+        for w in self.wheels:
+            self.world.DestroyBody(w)
+        self.wheels = []
+
--- a/src/gym/envs/box2d/car_racing.py
+++ b/src/gym/envs/box2d/car_racing.py
@@ -0,0 +1,498 @@
+import sys, math
+import numpy as np
+
+import Box2D
+from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
+
+import gym
+from gym import spaces
+from gym.envs.box2d.car_dynamics import Car
+from gym.utils import colorize, seeding, EzPickle
+
+import pyglet
+from pyglet import gl
+
+# Easiest continuous control task to learn from pixels, a top-down racing environment.
+# Discreet control is reasonable in this environment as well, on/off discretisation is
+# fine.
+#
+# State consists of STATE_W x STATE_H pixels.
+#
+# Reward is -0.1 every frame and +1000/N for every track tile visited, where N is
+# the total number of tiles in track. For example, if you have finished in 732 frames,
+# your reward is 1000 - 0.1*732 = 926.8 points.
+#
+# Game is solved when agent consistently gets 900+ points. Track is random every episode.
+#
+# Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that
+# is far off the track, then it will get -100 and die.
+#
+# Some indicators shown at the bottom of the window and the state RGB buffer. From
+# left to right: true speed, four ABS sensors, steering wheel position, gyroscope.
+#
+# To play yourself (it's rather fast for humans), type:
+#
+# python gym/envs/box2d/car_racing.py
+#
+# Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the
+# same time.
+#
+# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+
+STATE_W = 96   # less than Atari 160x192
+STATE_H = 96
+VIDEO_W = 600
+VIDEO_H = 400
+WINDOW_W = 1200
+WINDOW_H = 1000
+
+SCALE       = 6.0        # Track scale
+TRACK_RAD   = 900/SCALE  # Track is heavily morphed circle with this radius
+PLAYFIELD   = 2000/SCALE # Game over boundary
+FPS         = 50
+ZOOM        = 2.7        # Camera zoom
+ZOOM_FOLLOW = True       # Set to False for fixed view (don't use zoom)
+
+
+TRACK_DETAIL_STEP = 21/SCALE
+TRACK_TURN_RATE = 0.31
+TRACK_WIDTH = 40/SCALE
+BORDER = 8/SCALE
+BORDER_MIN_COUNT = 4
+
+ROAD_COLOR = [0.4, 0.4, 0.4]
+
+class FrictionDetector(contactListener):
+    def __init__(self, env):
+        contactListener.__init__(self)
+        self.env = env
+    def BeginContact(self, contact):
+        self._contact(contact, True)
+    def EndContact(self, contact):
+        self._contact(contact, False)
+    def _contact(self, contact, begin):
+        tile = None
+        obj = None
+        u1 = contact.fixtureA.body.userData
+        u2 = contact.fixtureB.body.userData
+        if u1 and "road_friction" in u1.__dict__:
+            tile = u1
+            obj  = u2
+        if u2 and "road_friction" in u2.__dict__:
+            tile = u2
+            obj  = u1
+        if not tile: return
+
+        tile.color[0] = ROAD_COLOR[0]
+        tile.color[1] = ROAD_COLOR[1]
+        tile.color[2] = ROAD_COLOR[2]
+        if not obj or "tiles" not in obj.__dict__: return
+        if begin:
+            obj.tiles.add(tile)
+            #print tile.road_friction, "ADD", len(obj.tiles)
+            if not tile.road_visited:
+                tile.road_visited = True
+                self.env.reward += 1000.0/len(self.env.track)
+                self.env.tile_visited_count += 1
+        else:
+            obj.tiles.remove(tile)
+            #print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works)
+
+class CarRacing(gym.Env, EzPickle):
+    metadata = {
+        'render.modes': ['human', 'rgb_array', 'state_pixels'],
+        'video.frames_per_second' : FPS
+    }
+
+    def __init__(self):
+        EzPickle.__init__(self)
+        self.seed()
+        self.contactListener_keepref = FrictionDetector(self)
+        self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
+        self.viewer = None
+        self.invisible_state_window = None
+        self.invisible_video_window = None
+        self.road = None
+        self.car = None
+        self.reward = 0.0
+        self.prev_reward = 0.0
+
+        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32)  # steer, gas, brake
+        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def _destroy(self):
+        if not self.road: return
+        for t in self.road:
+            self.world.DestroyBody(t)
+        self.road = []
+        self.car.destroy()
+
+    def _create_track(self):
+        CHECKPOINTS = 12
+
+        # Create checkpoints
+        checkpoints = []
+        for c in range(CHECKPOINTS):
+            alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
+            rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
+            if c==0:
+                alpha = 0
+                rad = 1.5*TRACK_RAD
+            if c==CHECKPOINTS-1:
+                alpha = 2*math.pi*c/CHECKPOINTS
+                self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
+                rad = 1.5*TRACK_RAD
+            checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
+
+        #print "\n".join(str(h) for h in checkpoints)
+        #self.road_poly = [ (    # uncomment this to see checkpoints
+        #    [ (tx,ty) for a,tx,ty in checkpoints ],
+        #    (0.7,0.7,0.9) ) ]
+        self.road = []
+
+        # Go from one checkpoint to another to create track
+        x, y, beta = 1.5*TRACK_RAD, 0, 0
+        dest_i = 0
+        laps = 0
+        track = []
+        no_freeze = 2500
+        visited_other_side = False
+        while 1:
+            alpha = math.atan2(y, x)
+            if visited_other_side and alpha > 0:
+                laps += 1
+                visited_other_side = False
+            if alpha < 0:
+                visited_other_side = True
+                alpha += 2*math.pi
+            while True: # Find destination from checkpoints
+                failed = True
+                while True:
+                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
+                    if alpha <= dest_alpha:
+                        failed = False
+                        break
+                    dest_i += 1
+                    if dest_i % len(checkpoints) == 0: break
+                if not failed: break
+                alpha -= 2*math.pi
+                continue
+            r1x = math.cos(beta)
+            r1y = math.sin(beta)
+            p1x = -r1y
+            p1y = r1x
+            dest_dx = dest_x - x  # vector towards destination
+            dest_dy = dest_y - y
+            proj = r1x*dest_dx + r1y*dest_dy  # destination vector projected on rad
+            while beta - alpha >  1.5*math.pi: beta -= 2*math.pi
+            while beta - alpha < -1.5*math.pi: beta += 2*math.pi
+            prev_beta = beta
+            proj *= SCALE
+            if proj >  0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
+            if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj))
+            x += p1x*TRACK_DETAIL_STEP
+            y += p1y*TRACK_DETAIL_STEP
+            track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
+            if laps > 4: break
+            no_freeze -= 1
+            if no_freeze==0: break
+        #print "\n".join([str(t) for t in enumerate(track)])
+
+        # Find closed loop range i1..i2, first loop should be ignored, second is OK
+        i1, i2 = -1, -1
+        i = len(track)
+        while True:
+            i -= 1
+            if i==0: return False  # Failed
+            pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
+            if pass_through_start and i2==-1:
+                i2 = i
+            elif pass_through_start and i1==-1:
+                i1 = i
+                break
+        print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
+        assert i1!=-1
+        assert i2!=-1
+
+        track = track[i1:i2-1]
+
+        first_beta = track[0][1]
+        first_perp_x = math.cos(first_beta)
+        first_perp_y = math.sin(first_beta)
+        # Length of perpendicular jump to put together head and tail
+        well_glued_together = np.sqrt(
+            np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
+            np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
+        if well_glued_together > TRACK_DETAIL_STEP:
+            return False
+
+        # Red-white border on hard turns
+        border = [False]*len(track)
+        for i in range(len(track)):
+            good = True
+            oneside = 0
+            for neg in range(BORDER_MIN_COUNT):
+                beta1 = track[i-neg-0][1]
+                beta2 = track[i-neg-1][1]
+                good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
+                oneside += np.sign(beta1 - beta2)
+            good &= abs(oneside) == BORDER_MIN_COUNT
+            border[i] = good
+        for i in range(len(track)):
+            for neg in range(BORDER_MIN_COUNT):
+                border[i-neg] |= border[i]
+
+        # Create tiles
+        for i in range(len(track)):
+            alpha1, beta1, x1, y1 = track[i]
+            alpha2, beta2, x2, y2 = track[i-1]
+            road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1))
+            road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1))
+            road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2))
+            road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2))
+            t = self.world.CreateStaticBody( fixtures = fixtureDef(
+                shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l])
+                ))
+            t.userData = t
+            c = 0.01*(i%3)
+            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
+            t.road_visited = False
+            t.road_friction = 1.0
+            t.fixtures[0].sensor = True
+            self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color ))
+            self.road.append(t)
+            if border[i]:
+                side = np.sign(beta2 - beta1)
+                b1_l = (x1 + side* TRACK_WIDTH        *math.cos(beta1), y1 + side* TRACK_WIDTH        *math.sin(beta1))
+                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
+                b2_l = (x2 + side* TRACK_WIDTH        *math.cos(beta2), y2 + side* TRACK_WIDTH        *math.sin(beta2))
+                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
+                self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
+        self.track = track
+        return True
+
+    def reset(self):
+        self._destroy()
+        self.reward = 0.0
+        self.prev_reward = 0.0
+        self.tile_visited_count = 0
+        self.t = 0.0
+        self.road_poly = []
+        self.human_render = False
+
+        while True:
+            success = self._create_track()
+            if success: break
+            print("retry to generate track (normal if there are not many of this messages)")
+        self.car = Car(self.world, *self.track[0][1:4])
+
+        return self.step(None)[0]
+
+    def step(self, action):
+        if action is not None:
+            self.car.steer(-action[0])
+            self.car.gas(action[1])
+            self.car.brake(action[2])
+
+        self.car.step(1.0/FPS)
+        self.world.Step(1.0/FPS, 6*30, 2*30)
+        self.t += 1.0/FPS
+
+        self.state = self.render("state_pixels")
+
+        step_reward = 0
+        done = False
+        if action is not None: # First step without action, called from reset()
+            self.reward -= 0.1
+            # We actually don't want to count fuel spent, we want car to be faster.
+            #self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
+            self.car.fuel_spent = 0.0
+            step_reward = self.reward - self.prev_reward
+            self.prev_reward = self.reward
+            if self.tile_visited_count==len(self.track):
+                done = True
+            x, y = self.car.hull.position
+            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
+                done = True
+                step_reward = -100
+
+        return self.state, step_reward, done, {}
+
+    def render(self, mode='human'):
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
+            self.score_label = pyglet.text.Label('0000', font_size=36,
+                x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center',
+                color=(255,255,255,255))
+            self.transform = rendering.Transform()
+
+        if "t" not in self.__dict__: return  # reset() not called yet
+
+        zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
+        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
+        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
+        scroll_x = self.car.hull.position[0]
+        scroll_y = self.car.hull.position[1]
+        angle = -self.car.hull.angle
+        vel = self.car.hull.linearVelocity
+        if np.linalg.norm(vel) > 0.5:
+            angle = math.atan2(vel[0], vel[1])
+        self.transform.set_scale(zoom, zoom)
+        self.transform.set_translation(
+            WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
+            WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
+        self.transform.set_rotation(angle)
+
+        self.car.draw(self.viewer, mode!="state_pixels")
+
+        arr = None
+        win = self.viewer.window
+        if mode != 'state_pixels':
+            win.switch_to()
+            win.dispatch_events()
+        if mode=="rgb_array" or mode=="state_pixels":
+            win.clear()
+            t = self.transform
+            if mode=='rgb_array':
+                VP_W = VIDEO_W
+                VP_H = VIDEO_H
+            else:
+                VP_W = STATE_W
+                VP_H = STATE_H
+            gl.glViewport(0, 0, VP_W, VP_H)
+            t.enable()
+            self.render_road()
+            for geom in self.viewer.onetime_geoms:
+                geom.render()
+            t.disable()
+            self.render_indicators(WINDOW_W, WINDOW_H)  # TODO: find why 2x needed, wtf
+            image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
+            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
+            arr = arr.reshape(VP_H, VP_W, 4)
+            arr = arr[::-1, :, 0:3]
+
+        if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video.
+            win.flip()
+
+        if mode=='human':
+            self.human_render = True
+            win.clear()
+            t = self.transform
+            gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
+            t.enable()
+            self.render_road()
+            for geom in self.viewer.onetime_geoms:
+                geom.render()
+            t.disable()
+            self.render_indicators(WINDOW_W, WINDOW_H)
+            win.flip()
+
+        self.viewer.onetime_geoms = []
+        return arr
+
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+            self.viewer = None
+
+    def render_road(self):
+        gl.glBegin(gl.GL_QUADS)
+        gl.glColor4f(0.4, 0.8, 0.4, 1.0)
+        gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
+        gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
+        gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
+        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
+        gl.glColor4f(0.4, 0.9, 0.4, 1.0)
+        k = PLAYFIELD/20.0
+        for x in range(-20, 20, 2):
+            for y in range(-20, 20, 2):
+                gl.glVertex3f(k*x + k, k*y + 0, 0)
+                gl.glVertex3f(k*x + 0, k*y + 0, 0)
+                gl.glVertex3f(k*x + 0, k*y + k, 0)
+                gl.glVertex3f(k*x + k, k*y + k, 0)
+        for poly, color in self.road_poly:
+            gl.glColor4f(color[0], color[1], color[2], 1)
+            for p in poly:
+                gl.glVertex3f(p[0], p[1], 0)
+        gl.glEnd()
+
+    def render_indicators(self, W, H):
+        gl.glBegin(gl.GL_QUADS)
+        s = W/40.0
+        h = H/40.0
+        gl.glColor4f(0,0,0,1)
+        gl.glVertex3f(W, 0, 0)
+        gl.glVertex3f(W, 5*h, 0)
+        gl.glVertex3f(0, 5*h, 0)
+        gl.glVertex3f(0, 0, 0)
+        def vertical_ind(place, val, color):
+            gl.glColor4f(color[0], color[1], color[2], 1)
+            gl.glVertex3f((place+0)*s, h + h*val, 0)
+            gl.glVertex3f((place+1)*s, h + h*val, 0)
+            gl.glVertex3f((place+1)*s, h, 0)
+            gl.glVertex3f((place+0)*s, h, 0)
+        def horiz_ind(place, val, color):
+            gl.glColor4f(color[0], color[1], color[2], 1)
+            gl.glVertex3f((place+0)*s, 4*h , 0)
+            gl.glVertex3f((place+val)*s, 4*h, 0)
+            gl.glVertex3f((place+val)*s, 2*h, 0)
+            gl.glVertex3f((place+0)*s, 2*h, 0)
+        true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
+        vertical_ind(5, 0.02*true_speed, (1,1,1))
+        vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
+        vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
+        vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
+        vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
+        horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
+        horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
+        gl.glEnd()
+        self.score_label.text = "%04i" % self.reward
+        self.score_label.draw()
+
+
+if __name__=="__main__":
+    from pyglet.window import key
+    a = np.array( [0.0, 0.0, 0.0] )
+    def key_press(k, mod):
+        global restart
+        if k==0xff0d: restart = True
+        if k==key.LEFT:  a[0] = -1.0
+        if k==key.RIGHT: a[0] = +1.0
+        if k==key.UP:    a[1] = +1.0
+        if k==key.DOWN:  a[2] = +0.8   # set 1.0 for wheels to block to zero rotation
+    def key_release(k, mod):
+        if k==key.LEFT  and a[0]==-1.0: a[0] = 0
+        if k==key.RIGHT and a[0]==+1.0: a[0] = 0
+        if k==key.UP:    a[1] = 0
+        if k==key.DOWN:  a[2] = 0
+    env = CarRacing()
+    env.render()
+    record_video = False
+    if record_video:
+        env.monitor.start('/tmp/video-test', force=True)
+    env.viewer.window.on_key_press = key_press
+    env.viewer.window.on_key_release = key_release
+    while True:
+        env.reset()
+        total_reward = 0.0
+        steps = 0
+        restart = False
+        while True:
+            s, r, done, info = env.step(a)
+            total_reward += r
+            if steps % 200 == 0 or done:
+                print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
+                print("step {} total_reward {:+0.2f}".format(steps, total_reward))
+                #import matplotlib.pyplot as plt
+                #plt.imshow(s)
+                #plt.savefig("test.jpeg")
+            steps += 1
+            if not record_video: # Faster, but you can as well call env.render() every time to play full window.
+                env.render()
+            if done or restart: break
+    env.close()
--- a/src/gym/envs/box2d/lunar_lander.py
+++ b/src/gym/envs/box2d/lunar_lander.py
@@ -0,0 +1,420 @@
+import sys, math
+import numpy as np
+
+import Box2D
+from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
+
+import gym
+from gym import spaces
+from gym.utils import seeding, EzPickle
+
+# Rocket trajectory optimization is a classic topic in Optimal Control.
+#
+# According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
+# turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
+#
+# Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
+# Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
+# If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
+# comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
+# engine is -0.3 points each frame. Solved is 200 points.
+#
+# Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
+# on its first attempt. Please see source code for details.
+#
+# Too see heuristic landing, run:
+#
+# python gym/envs/box2d/lunar_lander.py
+#
+# To play yourself, run:
+#
+# python examples/agents/keyboard_agent.py LunarLander-v2
+#
+# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+
+FPS    = 50
+SCALE  = 30.0   # affects how fast-paced the game is, forces should be adjusted as well
+
+MAIN_ENGINE_POWER  = 13.0
+SIDE_ENGINE_POWER  =  0.6
+
+INITIAL_RANDOM = 1000.0   # Set 1500 to make game harder
+
+LANDER_POLY =[
+    (-14,+17), (-17,0), (-17,-10),
+    (+17,-10), (+17,0), (+14,+17)
+    ]
+LEG_AWAY = 20
+LEG_DOWN = 18
+LEG_W, LEG_H = 2, 8
+LEG_SPRING_TORQUE = 40
+
+SIDE_ENGINE_HEIGHT = 14.0
+SIDE_ENGINE_AWAY   = 12.0
+
+VIEWPORT_W = 600
+VIEWPORT_H = 400
+
+class ContactDetector(contactListener):
+    def __init__(self, env):
+        contactListener.__init__(self)
+        self.env = env
+    def BeginContact(self, contact):
+        if self.env.lander==contact.fixtureA.body or self.env.lander==contact.fixtureB.body:
+            self.env.game_over = True
+        for i in range(2):
+            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
+                self.env.legs[i].ground_contact = True
+    def EndContact(self, contact):
+        for i in range(2):
+            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
+                self.env.legs[i].ground_contact = False
+
+class LunarLander(gym.Env, EzPickle):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second' : FPS
+    }
+
+    continuous = False
+
+    def __init__(self):
+        EzPickle.__init__(self)
+        self.seed()
+        self.viewer = None
+
+        self.world = Box2D.b2World()
+        self.moon = None
+        self.lander = None
+        self.particles = []
+
+        self.prev_reward = None
+
+        # useful range is -1 .. +1, but spikes can be higher
+        self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)
+
+        if self.continuous:
+            # Action is two floats [main engine, left-right engines].
+            # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
+            # Left-right:  -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
+            self.action_space = spaces.Box(-1, +1, (2,), dtype=np.float32)
+        else:
+            # Nop, fire left engine, main engine, right engine
+            self.action_space = spaces.Discrete(4)
+
+        self.reset()
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def _destroy(self):
+        if not self.moon: return
+        self.world.contactListener = None
+        self._clean_particles(True)
+        self.world.DestroyBody(self.moon)
+        self.moon = None
+        self.world.DestroyBody(self.lander)
+        self.lander = None
+        self.world.DestroyBody(self.legs[0])
+        self.world.DestroyBody(self.legs[1])
+
+    def reset(self):
+        self._destroy()
+        self.world.contactListener_keepref = ContactDetector(self)
+        self.world.contactListener = self.world.contactListener_keepref
+        self.game_over = False
+        self.prev_shaping = None
+
+        W = VIEWPORT_W/SCALE
+        H = VIEWPORT_H/SCALE
+
+        # terrain
+        CHUNKS = 11
+        height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
+        chunk_x  = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
+        self.helipad_x1 = chunk_x[CHUNKS//2-1]
+        self.helipad_x2 = chunk_x[CHUNKS//2+1]
+        self.helipad_y  = H/4
+        height[CHUNKS//2-2] = self.helipad_y
+        height[CHUNKS//2-1] = self.helipad_y
+        height[CHUNKS//2+0] = self.helipad_y
+        height[CHUNKS//2+1] = self.helipad_y
+        height[CHUNKS//2+2] = self.helipad_y
+        smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
+
+        self.moon = self.world.CreateStaticBody( shapes=edgeShape(vertices=[(0, 0), (W, 0)]) )
+        self.sky_polys = []
+        for i in range(CHUNKS-1):
+            p1 = (chunk_x[i],   smooth_y[i])
+            p2 = (chunk_x[i+1], smooth_y[i+1])
+            self.moon.CreateEdgeFixture(
+                vertices=[p1,p2],
+                density=0,
+                friction=0.1)
+            self.sky_polys.append( [p1, p2, (p2[0],H), (p1[0],H)] )
+
+        self.moon.color1 = (0.0,0.0,0.0)
+        self.moon.color2 = (0.0,0.0,0.0)
+
+        initial_y = VIEWPORT_H/SCALE
+        self.lander = self.world.CreateDynamicBody(
+            position = (VIEWPORT_W/SCALE/2, initial_y),
+            angle=0.0,
+            fixtures = fixtureDef(
+                shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in LANDER_POLY ]),
+                density=5.0,
+                friction=0.1,
+                categoryBits=0x0010,
+                maskBits=0x001,  # collide only with ground
+                restitution=0.0) # 0.99 bouncy
+                )
+        self.lander.color1 = (0.5,0.4,0.9)
+        self.lander.color2 = (0.3,0.3,0.5)
+        self.lander.ApplyForceToCenter( (
+            self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
+            self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
+            ), True)
+
+        self.legs = []
+        for i in [-1,+1]:
+            leg = self.world.CreateDynamicBody(
+                position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
+                angle = (i*0.05),
+                fixtures = fixtureDef(
+                    shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
+                    density=1.0,
+                    restitution=0.0,
+                    categoryBits=0x0020,
+                    maskBits=0x001)
+                )
+            leg.ground_contact = False
+            leg.color1 = (0.5,0.4,0.9)
+            leg.color2 = (0.3,0.3,0.5)
+            rjd = revoluteJointDef(
+                bodyA=self.lander,
+                bodyB=leg,
+                localAnchorA=(0, 0),
+                localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=LEG_SPRING_TORQUE,
+                motorSpeed=+0.3*i  # low enough not to jump back into the sky
+                )
+            if i==-1:
+                rjd.lowerAngle = +0.9 - 0.5  # Yes, the most esoteric numbers here, angles legs have freedom to travel within
+                rjd.upperAngle = +0.9
+            else:
+                rjd.lowerAngle = -0.9
+                rjd.upperAngle = -0.9 + 0.5
+            leg.joint = self.world.CreateJoint(rjd)
+            self.legs.append(leg)
+
+        self.drawlist = [self.lander] + self.legs
+
+        return self.step(np.array([0,0]) if self.continuous else 0)[0]
+
+    def _create_particle(self, mass, x, y, ttl):
+        p = self.world.CreateDynamicBody(
+            position = (x,y),
+            angle=0.0,
+            fixtures = fixtureDef(
+                shape=circleShape(radius=2/SCALE, pos=(0,0)),
+                density=mass,
+                friction=0.1,
+                categoryBits=0x0100,
+                maskBits=0x001,  # collide only with ground
+                restitution=0.3)
+                )
+        p.ttl = ttl
+        self.particles.append(p)
+        self._clean_particles(False)
+        return p
+
+    def _clean_particles(self, all):
+        while self.particles and (all or self.particles[0].ttl<0):
+            self.world.DestroyBody(self.particles.pop(0))
+
+    def step(self, action):
+        if self.continuous:
+            action = np.clip(action, -1, +1).astype(np.float32)
+        else:
+            assert self.action_space.contains(action), "%r (%s) invalid " % (action, type(action))
+
+        # Engines
+        tip  = (math.sin(self.lander.angle), math.cos(self.lander.angle))
+        side = (-tip[1], tip[0]);
+        dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
+
+        m_power = 0.0
+        if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
+            # Main engine
+            if self.continuous:
+                m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5   # 0.5..1.0
+                assert m_power>=0.5 and m_power <= 1.0
+            else:
+                m_power = 1.0
+            ox =  tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1]   # 4 is move a bit downwards, +-2 for randomness
+            oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
+            impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
+            p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power)    # particles are just a decoration, 3.5 is here to make particle speed adequate
+            p.ApplyLinearImpulse(           ( ox*MAIN_ENGINE_POWER*m_power,  oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
+            self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
+
+        s_power = 0.0
+        if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
+            # Orientation engines
+            if self.continuous:
+                direction = np.sign(action[1])
+                s_power = np.clip(np.abs(action[1]), 0.5,1.0)
+                assert s_power>=0.5 and s_power <= 1.0
+            else:
+                direction = action-2
+                s_power = 1.0
+            ox =  tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
+            oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
+            impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
+            p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
+            p.ApplyLinearImpulse(           ( ox*SIDE_ENGINE_POWER*s_power,  oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
+            self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
+
+        self.world.Step(1.0/FPS, 6*30, 2*30)
+
+        pos = self.lander.position
+        vel = self.lander.linearVelocity
+        state = [
+            (pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
+            (pos.y - (self.helipad_y+LEG_DOWN/SCALE)) / (VIEWPORT_H/SCALE/2),
+            vel.x*(VIEWPORT_W/SCALE/2)/FPS,
+            vel.y*(VIEWPORT_H/SCALE/2)/FPS,
+            self.lander.angle,
+            20.0*self.lander.angularVelocity/FPS,
+            1.0 if self.legs[0].ground_contact else 0.0,
+            1.0 if self.legs[1].ground_contact else 0.0
+            ]
+        assert len(state)==8
+
+        reward = 0
+        shaping = \
+            - 100*np.sqrt(state[0]*state[0] + state[1]*state[1]) \
+            - 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
+            - 100*abs(state[4]) + 10*state[6] + 10*state[7]   # And ten points for legs contact, the idea is if you
+                                                              # lose contact again after landing, you get negative reward
+        if self.prev_shaping is not None:
+            reward = shaping - self.prev_shaping
+        self.prev_shaping = shaping
+
+        reward -= m_power*0.30  # less fuel spent is better, about -30 for heurisic landing
+        reward -= s_power*0.03
+
+        done = False
+        if self.game_over or abs(state[0]) >= 1.0:
+            done   = True
+            reward = -100
+        if not self.lander.awake:
+            done   = True
+            reward = +100
+        return np.array(state, dtype=np.float32), reward, done, {}
+
+    def render(self, mode='human'):
+        from gym.envs.classic_control import rendering
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
+            self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
+
+        for obj in self.particles:
+            obj.ttl -= 0.15
+            obj.color1 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
+            obj.color2 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
+
+        self._clean_particles(False)
+
+        for p in self.sky_polys:
+            self.viewer.draw_polygon(p, color=(0,0,0))
+
+        for obj in self.particles + self.drawlist:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                if type(f.shape) is circleShape:
+                    t = rendering.Transform(translation=trans*f.shape.pos)
+                    self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t)
+                    self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t)
+                else:
+                    path = [trans*v for v in f.shape.vertices]
+                    self.viewer.draw_polygon(path, color=obj.color1)
+                    path.append(path[0])
+                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
+
+        for x in [self.helipad_x1, self.helipad_x2]:
+            flagy1 = self.helipad_y
+            flagy2 = flagy1 + 50/SCALE
+            self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(1,1,1) )
+            self.viewer.draw_polygon( [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)], color=(0.8,0.8,0) )
+
+        return self.viewer.render(return_rgb_array = mode=='rgb_array')
+
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+            self.viewer = None
+
+class LunarLanderContinuous(LunarLander):
+    continuous = True
+
+def heuristic(env, s):
+    # Heuristic for:
+    # 1. Testing. 
+    # 2. Demonstration rollout.
+    angle_targ = s[0]*0.5 + s[2]*1.0         # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
+    if angle_targ >  0.4: angle_targ =  0.4  # more than 0.4 radians (22 degrees) is bad
+    if angle_targ < -0.4: angle_targ = -0.4
+    hover_targ = 0.55*np.abs(s[0])           # target y should be proporional to horizontal offset
+
+    # PID controller: s[4] angle, s[5] angularSpeed
+    angle_todo = (angle_targ - s[4])*0.5 - (s[5])*1.0
+    #print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
+
+    # PID controller: s[1] vertical coordinate s[3] vertical speed
+    hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
+    #print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
+
+    if s[6] or s[7]: # legs have contact
+        angle_todo = 0
+        hover_todo = -(s[3])*0.5  # override to reduce fall speed, that's all we need after contact
+
+    if env.continuous:
+        a = np.array( [hover_todo*20 - 1, -angle_todo*20] )
+        a = np.clip(a, -1, +1)
+    else:
+        a = 0
+        if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
+        elif angle_todo < -0.05: a = 3
+        elif angle_todo > +0.05: a = 1
+    return a
+
+def demo_heuristic_lander(env, seed=None, render=False):
+    env.seed(seed)
+    total_reward = 0
+    steps = 0
+    s = env.reset()
+    while True:
+        a = heuristic(env, s)
+        s, r, done, info = env.step(a)
+        total_reward += r
+
+        if render:
+            still_open = env.render()
+            if still_open == False: break
+
+        if steps % 20 == 0 or done:
+            print("observations:", " ".join(["{:+0.2f}".format(x) for x in s]))
+            print("step {} total_reward {:+0.2f}".format(steps, total_reward))
+        steps += 1
+        if done: break
+    return total_reward
+
+
+if __name__ == '__main__':
+    demo_heuristic_lander(LunarLander(), render=True)
+    
+    
--- a/src/gym/envs/box2d/test_lunar_lander.py
+++ b/src/gym/envs/box2d/test_lunar_lander.py
@@ -0,0 +1,13 @@
+from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander
+
+def test_lunar_lander():
+    _test_lander(LunarLander(), seed=0)
+
+def test_lunar_lander_continuous():
+    _test_lander(LunarLanderContinuous(), seed=0)
+
+def _test_lander(env, seed=None, render=False):
+    total_reward = demo_heuristic_lander(env, seed=seed, render=render)
+    assert total_reward > 100
+
+
--- a/src/gym/envs/classic_control/init.py
+++ b/src/gym/envs/classic_control/init.py
@@ -0,0 +1,6 @@
+from gym.envs.classic_control.cartpole import CartPoleEnv
+from gym.envs.classic_control.mountain_car import MountainCarEnv
+from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
+from gym.envs.classic_control.pendulum import PendulumEnv
+from gym.envs.classic_control.acrobot import AcrobotEnv
+
--- a/src/gym/envs/classic_control/acrobot.py
+++ b/src/gym/envs/classic_control/acrobot.py
@@ -0,0 +1,304 @@
+"""classic Acrobot task"""
+from gym import core, spaces
+from gym.utils import seeding
+import numpy as np
+from numpy import sin, cos, pi
+
+__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
+__credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
+               "William Dabney", "Jonathan P. How"]
+__license__ = "BSD 3-Clause"
+__author__ = "Christoph Dann <cdann@cdann.de>"
+
+# SOURCE:
+# https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
+
+class AcrobotEnv(core.Env):
+
+    """
+    Acrobot is a 2-link pendulum with only the second joint actuated
+    Intitially, both links point downwards. The goal is to swing the
+    end-effector at a height at least the length of one link above the base.
+    Both links can swing freely and can pass by each other, i.e., they don't
+    collide when they have the same angle.
+    **STATE:**
+    The state consists of the sin() and cos() of the two rotational joint
+    angles and the joint angular velocities :
+    [cos(theta1) sin(theta1) cos(theta2) sin(theta2) thetaDot1 thetaDot2].
+    For the first link, an angle of 0 corresponds to the link pointing downwards.
+    The angle of the second link is relative to the angle of the first link.
+    An angle of 0 corresponds to having the same angle between the two links.
+    A state of [1, 0, 1, 0, ..., ...] means that both links point downwards.
+    **ACTIONS:**
+    The action is either applying +1, 0 or -1 torque on the joint between
+    the two pendulum links.
+    .. note::
+        The dynamics equations were missing some terms in the NIPS paper which
+        are present in the book. R. Sutton confirmed in personal correspondance
+        that the experimental results shown in the paper and the book were
+        generated with the equations shown in the book.
+        However, there is the option to run the domain with the paper equations
+        by setting book_or_nips = 'nips'
+    **REFERENCE:**
+    .. seealso::
+        R. Sutton: Generalization in Reinforcement Learning:
+        Successful Examples Using Sparse Coarse Coding (NIPS 1996)
+    .. seealso::
+        R. Sutton and A. G. Barto:
+        Reinforcement learning: An introduction.
+        Cambridge: MIT press, 1998.
+    .. warning::
+        This version of the domain uses the Runge-Kutta method for integrating
+        the system dynamics and is more realistic, but also considerably harder
+        than the original version which employs Euler integration,
+        see the AcrobotLegacy class.
+    """
+
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second' : 15
+    }
+
+    dt = .2
+
+    LINK_LENGTH_1 = 1.  # [m]
+    LINK_LENGTH_2 = 1.  # [m]
+    LINK_MASS_1 = 1.  #: [kg] mass of link 1
+    LINK_MASS_2 = 1.  #: [kg] mass of link 2
+    LINK_COM_POS_1 = 0.5  #: [m] position of the center of mass of link 1
+    LINK_COM_POS_2 = 0.5  #: [m] position of the center of mass of link 2
+    LINK_MOI = 1.  #: moments of inertia for both links
+
+    MAX_VEL_1 = 4 * np.pi
+    MAX_VEL_2 = 9 * np.pi
+
+    AVAIL_TORQUE = [-1., 0., +1]
+
+    torque_noise_max = 0.
+
+    #: use dynamics equations from the nips paper or the book
+    book_or_nips = "book"
+    action_arrow = None
+    domain_fig = None
+    actions_num = 3
+
+    def __init__(self):
+        self.viewer = None
+        high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
+        low = -high
+        self.observation_space = spaces.Box(low=low, high=high)
+        self.action_space = spaces.Discrete(3)
+        self.state = None
+        self.seed()
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def reset(self):
+        self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
+        return self._get_ob()
+
+    def step(self, a):
+        s = self.state
+        torque = self.AVAIL_TORQUE[a]
+
+        # Add noise to the force action
+        if self.torque_noise_max > 0:
+            torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)
+
+        # Now, augment the state with our force action so it can be passed to
+        # _dsdt
+        s_augmented = np.append(s, torque)
+
+        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
+        # only care about final timestep of integration returned by integrator
+        ns = ns[-1]
+        ns = ns[:4]  # omit action
+        # ODEINT IS TOO SLOW!
+        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
+        # self.s_continuous = ns_continuous[-1] # We only care about the state
+        # at the ''final timestep'', self.dt
+
+        ns[0] = wrap(ns[0], -pi, pi)
+        ns[1] = wrap(ns[1], -pi, pi)
+        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
+        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
+        self.state = ns
+        terminal = self._terminal()
+        reward = -1. if not terminal else 0.
+        return (self._get_ob(), reward, terminal, {})
+
+    def _get_ob(self):
+        s = self.state
+        return np.array([cos(s[0]), np.sin(s[0]), cos(s[1]), sin(s[1]), s[2], s[3]])
+
+    def _terminal(self):
+        s = self.state
+        return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.)
+
+    def _dsdt(self, s_augmented, t):
+        m1 = self.LINK_MASS_1
+        m2 = self.LINK_MASS_2
+        l1 = self.LINK_LENGTH_1
+        lc1 = self.LINK_COM_POS_1
+        lc2 = self.LINK_COM_POS_2
+        I1 = self.LINK_MOI
+        I2 = self.LINK_MOI
+        g = 9.8
+        a = s_augmented[-1]
+        s = s_augmented[:-1]
+        theta1 = s[0]
+        theta2 = s[1]
+        dtheta1 = s[2]
+        dtheta2 = s[3]
+        d1 = m1 * lc1 ** 2 + m2 * \
+            (l1 ** 2 + lc2 ** 2 + 2 * l1 * lc2 * np.cos(theta2)) + I1 + I2
+        d2 = m2 * (lc2 ** 2 + l1 * lc2 * np.cos(theta2)) + I2
+        phi2 = m2 * lc2 * g * np.cos(theta1 + theta2 - np.pi / 2.)
+        phi1 = - m2 * l1 * lc2 * dtheta2 ** 2 * np.sin(theta2) \
+               - 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * np.sin(theta2)  \
+            + (m1 * lc1 + m2 * l1) * g * np.cos(theta1 - np.pi / 2) + phi2
+        if self.book_or_nips == "nips":
+            # the following line is consistent with the description in the
+            # paper
+            ddtheta2 = (a + d2 / d1 * phi1 - phi2) / \
+                (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
+        else:
+            # the following line is consistent with the java implementation and the
+            # book
+            ddtheta2 = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1 ** 2 * np.sin(theta2) - phi2) \
+                / (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
+        ddtheta1 = -(d2 * ddtheta2 + phi1) / d1
+        return (dtheta1, dtheta2, ddtheta1, ddtheta2, 0.)
+
+    def render(self, mode='human'):
+        from gym.envs.classic_control import rendering
+
+        s = self.state
+
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(500,500)
+            bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2  # 2.2 for default
+            self.viewer.set_bounds(-bound,bound,-bound,bound)
+
+        if s is None: return None
+
+        p1 = [-self.LINK_LENGTH_1 *
+              np.cos(s[0]), self.LINK_LENGTH_1 * np.sin(s[0])]
+
+        p2 = [p1[0] - self.LINK_LENGTH_2 * np.cos(s[0] + s[1]),
+              p1[1] + self.LINK_LENGTH_2 * np.sin(s[0] + s[1])]
+
+        xys = np.array([[0,0], p1, p2])[:,::-1]
+        thetas = [s[0]-np.pi/2, s[0]+s[1]-np.pi/2]
+        link_lengths = [self.LINK_LENGTH_1, self.LINK_LENGTH_2]
+
+        self.viewer.draw_line((-2.2, 1), (2.2, 1))
+        for ((x,y),th,llen) in zip(xys, thetas, link_lengths):
+            l,r,t,b = 0, llen, .1, -.1
+            jtransform = rendering.Transform(rotation=th, translation=(x,y))
+            link = self.viewer.draw_polygon([(l,b), (l,t), (r,t), (r,b)])
+            link.add_attr(jtransform)
+            link.set_color(0,.8, .8)
+            circ = self.viewer.draw_circle(.1)
+            circ.set_color(.8, .8, 0)
+            circ.add_attr(jtransform)
+
+        return self.viewer.render(return_rgb_array = mode=='rgb_array')
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
+
+def wrap(x, m, M):
+    """
+    :param x: a scalar
+    :param m: minimum possible value in range
+    :param M: maximum possible value in range
+    Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
+    truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
+    For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
+    """
+    diff = M - m
+    while x > M:
+        x = x - diff
+    while x < m:
+        x = x + diff
+    return x
+
+def bound(x, m, M=None):
+    """
+    :param x: scalar
+    Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
+    have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
+    """
+    if M is None:
+        M = m[1]
+        m = m[0]
+    # bound x between min (m) and Max (M)
+    return min(max(x, m), M)
+
+
+def rk4(derivs, y0, t, *args, **kwargs):
+    """
+    Integrate 1D or ND system of ODEs using 4-th order Runge-Kutta.
+    This is a toy implementation which may be useful if you find
+    yourself stranded on a system w/o scipy.  Otherwise use
+    :func:`scipy.integrate`.
+    *y0*
+        initial state vector
+    *t*
+        sample times
+    *derivs*
+        returns the derivative of the system and has the
+        signature ``dy = derivs(yi, ti)``
+    *args*
+        additional arguments passed to the derivative function
+    *kwargs*
+        additional keyword arguments passed to the derivative function
+    Example 1 ::
+        ## 2D system
+        def derivs6(x,t):
+            d1 =  x[0] + 2*x[1]
+            d2 =  -3*x[0] + 4*x[1]
+            return (d1, d2)
+        dt = 0.0005
+        t = arange(0.0, 2.0, dt)
+        y0 = (1,2)
+        yout = rk4(derivs6, y0, t)
+    Example 2::
+        ## 1D system
+        alpha = 2
+        def derivs(x,t):
+            return -alpha*x + exp(-t)
+        y0 = 1
+        yout = rk4(derivs, y0, t)
+    If you have access to scipy, you should probably be using the
+    scipy.integrate tools rather than this function.
+    """
+
+    try:
+        Ny = len(y0)
+    except TypeError:
+        yout = np.zeros((len(t),), np.float_)
+    else:
+        yout = np.zeros((len(t), Ny), np.float_)
+
+    yout[0] = y0
+
+
+    for i in np.arange(len(t) - 1):
+
+        thist = t[i]
+        dt = t[i + 1] - thist
+        dt2 = dt / 2.0
+        y0 = yout[i]
+
+        k1 = np.asarray(derivs(y0, thist, *args, **kwargs))
+        k2 = np.asarray(derivs(y0 + dt2 * k1, thist + dt2, *args, **kwargs))
+        k3 = np.asarray(derivs(y0 + dt2 * k2, thist + dt2, *args, **kwargs))
+        k4 = np.asarray(derivs(y0 + dt * k3, thist + dt, *args, **kwargs))
+        yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4)
+    return yout
--- a/src/gym/envs/classic_control/assets/clockwise.png
+++ b/src/gym/envs/classic_control/assets/clockwise.png
--- a/src/gym/envs/classic_control/cartpole.py
+++ b/src/gym/envs/classic_control/cartpole.py
@@ -0,0 +1,193 @@
+"""
+Classic cart-pole system implemented by Rich Sutton et al.
+Copied from http://incompleteideas.net/sutton/book/code/pole.c
+permalink: https://perma.cc/C9ZM-652R
+"""
+
+import math
+import gym
+from gym import spaces, logger
+from gym.utils import seeding
+import numpy as np
+
+class CartPoleEnv(gym.Env):
+    """
+    Description:
+        A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
+
+    Source:
+        This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson
+
+    Observation: 
+        Type: Box(4)
+        Num	Observation                 Min         Max
+        0	Cart Position             -4.8            4.8
+        1	Cart Velocity             -Inf            Inf
+        2	Pole Angle                 -24°           24°
+        3	Pole Velocity At Tip      -Inf            Inf
+        
+    Actions:
+        Type: Discrete(2)
+        Num	Action
+        0	Push cart to the left
+        1	Push cart to the right
+        
+        Note: The amount the velocity is reduced or increased is not fixed as it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it
+
+    Reward:
+        Reward is 1 for every step taken, including the termination step
+
+    Starting State:
+        All observations are assigned a uniform random value between ±0.05
+
+    Episode Termination:
+        Pole Angle is more than ±12°
+        Cart Position is more than ±2.4 (center of the cart reaches the edge of the display)
+        Episode length is greater than 200
+        Solved Requirements
+        Considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.
+    """
+    
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second' : 50
+    }
+
+    def __init__(self):
+        self.gravity = 9.8
+        self.masscart = 1.0
+        self.masspole = 0.1
+        self.total_mass = (self.masspole + self.masscart)
+        self.length = 0.5 # actually half the pole's length
+        self.polemass_length = (self.masspole * self.length)
+        self.force_mag = 10.0
+        self.tau = 0.02  # seconds between state updates
+        self.kinematics_integrator = 'euler'
+
+        # Angle at which to fail the episode
+        self.theta_threshold_radians = 12 * 2 * math.pi / 360
+        self.x_threshold = 2.4
+
+        # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
+        high = np.array([
+            self.x_threshold * 2,
+            np.finfo(np.float32).max,
+            self.theta_threshold_radians * 2,
+            np.finfo(np.float32).max])
+
+        self.action_space = spaces.Discrete(2)
+        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
+
+        self.seed()
+        self.viewer = None
+        self.state = None
+
+        self.steps_beyond_done = None
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def step(self, action):
+        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
+        state = self.state
+        x, x_dot, theta, theta_dot = state
+        force = self.force_mag if action==1 else -self.force_mag
+        costheta = math.cos(theta)
+        sintheta = math.sin(theta)
+        temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
+        thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
+        xacc  = temp - self.polemass_length * thetaacc * costheta / self.total_mass
+        if self.kinematics_integrator == 'euler':
+            x  = x + self.tau * x_dot
+            x_dot = x_dot + self.tau * xacc
+            theta = theta + self.tau * theta_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+        else: # semi-implicit euler
+            x_dot = x_dot + self.tau * xacc
+            x  = x + self.tau * x_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+            theta = theta + self.tau * theta_dot
+        self.state = (x,x_dot,theta,theta_dot)
+        done =  x < -self.x_threshold \
+                or x > self.x_threshold \
+                or theta < -self.theta_threshold_radians \
+                or theta > self.theta_threshold_radians
+        done = bool(done)
+
+        if not done:
+            reward = 1.0
+        elif self.steps_beyond_done is None:
+            # Pole just fell!
+            self.steps_beyond_done = 0
+            reward = 1.0
+        else:
+            if self.steps_beyond_done == 0:
+                logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
+            self.steps_beyond_done += 1
+            reward = 0.0
+
+        return np.array(self.state), reward, done, {}
+
+    def reset(self):
+        self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
+        self.steps_beyond_done = None
+        return np.array(self.state)
+
+    def render(self, mode='human'):
+        screen_width = 600
+        screen_height = 400
+
+        world_width = self.x_threshold*2
+        scale = screen_width/world_width
+        carty = 100 # TOP OF CART
+        polewidth = 10.0
+        polelen = scale * (2 * self.length)
+        cartwidth = 50.0
+        cartheight = 30.0
+
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(screen_width, screen_height)
+            l,r,t,b = -cartwidth/2, cartwidth/2, cartheight/2, -cartheight/2
+            axleoffset =cartheight/4.0
+            cart = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
+            self.carttrans = rendering.Transform()
+            cart.add_attr(self.carttrans)
+            self.viewer.add_geom(cart)
+            l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
+            pole = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
+            pole.set_color(.8,.6,.4)
+            self.poletrans = rendering.Transform(translation=(0, axleoffset))
+            pole.add_attr(self.poletrans)
+            pole.add_attr(self.carttrans)
+            self.viewer.add_geom(pole)
+            self.axle = rendering.make_circle(polewidth/2)
+            self.axle.add_attr(self.poletrans)
+            self.axle.add_attr(self.carttrans)
+            self.axle.set_color(.5,.5,.8)
+            self.viewer.add_geom(self.axle)
+            self.track = rendering.Line((0,carty), (screen_width,carty))
+            self.track.set_color(0,0,0)
+            self.viewer.add_geom(self.track)
+
+            self._pole_geom = pole
+
+        if self.state is None: return None
+
+        # Edit the pole polygon vertex
+        pole = self._pole_geom
+        l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
+        pole.v = [(l,b), (l,t), (r,t), (r,b)]
+
+        x = self.state
+        cartx = x[0]*scale+screen_width/2.0 # MIDDLE OF CART
+        self.carttrans.set_translation(cartx, carty)
+        self.poletrans.set_rotation(-x[2])
+
+        return self.viewer.render(return_rgb_array = mode=='rgb_array')
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
--- a/src/gym/envs/classic_control/continuous_mountain_car.py
+++ b/src/gym/envs/classic_control/continuous_mountain_car.py
@@ -0,0 +1,144 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Olivier Sigaud
+
+A merge between two sources:
+
+* Adaptation of the MountainCar Environment from the "FAReinforcement" library
+of Jose Antonio Martin H. (version 1.0), adapted by  'Tom Schaul, tom@idsia.ch'
+and then modified by Arnaud de Broissia
+
+* the OpenAI/gym MountainCar environment
+itself from 
+http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
+permalink: https://perma.cc/6Z2N-PFWC
+"""
+
+import math
+import gym
+from gym import spaces
+from gym.utils import seeding
+import numpy as np
+
+class Continuous_MountainCarEnv(gym.Env):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second': 30
+    }
+
+    def __init__(self):
+        self.min_action = -1.0
+        self.max_action = 1.0
+        self.min_position = -1.2
+        self.max_position = 0.6
+        self.max_speed = 0.07
+        self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
+        self.power = 0.0015
+
+        self.low_state = np.array([self.min_position, -self.max_speed])
+        self.high_state = np.array([self.max_position, self.max_speed])
+
+        self.viewer = None
+
+        self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,))
+        self.observation_space = spaces.Box(low=self.low_state, high=self.high_state)
+
+        self.seed()
+        self.reset()
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def step(self, action):
+
+        position = self.state[0]
+        velocity = self.state[1]
+        force = min(max(action[0], -1.0), 1.0)
+
+        velocity += force*self.power -0.0025 * math.cos(3*position)
+        if (velocity > self.max_speed): velocity = self.max_speed
+        if (velocity < -self.max_speed): velocity = -self.max_speed
+        position += velocity
+        if (position > self.max_position): position = self.max_position
+        if (position < self.min_position): position = self.min_position
+        if (position==self.min_position and velocity<0): velocity = 0
+
+        done = bool(position >= self.goal_position)
+
+        reward = 0
+        if done:
+            reward = 100.0
+        reward-= math.pow(action[0],2)*0.1
+
+        self.state = np.array([position, velocity])
+        return self.state, reward, done, {}
+
+    def reset(self):
+        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
+        return np.array(self.state)
+
+#    def get_state(self):
+#        return self.state
+
+    def _height(self, xs):
+        return np.sin(3 * xs)*.45+.55
+
+    def render(self, mode='human'):
+        screen_width = 600
+        screen_height = 400
+
+        world_width = self.max_position - self.min_position
+        scale = screen_width/world_width
+        carwidth=40
+        carheight=20
+
+
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(screen_width, screen_height)
+            xs = np.linspace(self.min_position, self.max_position, 100)
+            ys = self._height(xs)
+            xys = list(zip((xs-self.min_position)*scale, ys*scale))
+
+            self.track = rendering.make_polyline(xys)
+            self.track.set_linewidth(4)
+            self.viewer.add_geom(self.track)
+
+            clearance = 10
+
+            l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
+            car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
+            car.add_attr(rendering.Transform(translation=(0, clearance)))
+            self.cartrans = rendering.Transform()
+            car.add_attr(self.cartrans)
+            self.viewer.add_geom(car)
+            frontwheel = rendering.make_circle(carheight/2.5)
+            frontwheel.set_color(.5, .5, .5)
+            frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
+            frontwheel.add_attr(self.cartrans)
+            self.viewer.add_geom(frontwheel)
+            backwheel = rendering.make_circle(carheight/2.5)
+            backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
+            backwheel.add_attr(self.cartrans)
+            backwheel.set_color(.5, .5, .5)
+            self.viewer.add_geom(backwheel)
+            flagx = (self.goal_position-self.min_position)*scale
+            flagy1 = self._height(self.goal_position)*scale
+            flagy2 = flagy1 + 50
+            flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
+            self.viewer.add_geom(flagpole)
+            flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
+            flag.set_color(.8,.8,0)
+            self.viewer.add_geom(flag)
+
+        pos = self.state[0]
+        self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
+        self.cartrans.set_rotation(math.cos(3 * pos))
+
+        return self.viewer.render(return_rgb_array = mode=='rgb_array')
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
--- a/src/gym/envs/classic_control/mountain_car.py
+++ b/src/gym/envs/classic_control/mountain_car.py
@@ -0,0 +1,119 @@
+"""
+http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
+permalink: https://perma.cc/6Z2N-PFWC
+"""
+
+import math
+import gym
+from gym import spaces
+from gym.utils import seeding
+import numpy as np
+
+class MountainCarEnv(gym.Env):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second': 30
+    }
+
+    def __init__(self):
+        self.min_position = -1.2
+        self.max_position = 0.6
+        self.max_speed = 0.07
+        self.goal_position = 0.5
+
+        self.low = np.array([self.min_position, -self.max_speed])
+        self.high = np.array([self.max_position, self.max_speed])
+
+        self.viewer = None
+
+        self.action_space = spaces.Discrete(3)
+        self.observation_space = spaces.Box(self.low, self.high)
+
+        self.seed()
+        self.reset()
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def step(self, action):
+        assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
+
+        position, velocity = self.state
+        velocity += (action-1)*0.001 + math.cos(3*position)*(-0.0025)
+        velocity = np.clip(velocity, -self.max_speed, self.max_speed)
+        position += velocity
+        position = np.clip(position, self.min_position, self.max_position)
+        if (position==self.min_position and velocity<0): velocity = 0
+
+        done = bool(position >= self.goal_position)
+        reward = -1.0
+
+        self.state = (position, velocity)
+        return np.array(self.state), reward, done, {}
+
+    def reset(self):
+        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
+        return np.array(self.state)
+
+    def _height(self, xs):
+        return np.sin(3 * xs)*.45+.55
+
+    def render(self, mode='human'):
+        screen_width = 600
+        screen_height = 400
+
+        world_width = self.max_position - self.min_position
+        scale = screen_width/world_width
+        carwidth=40
+        carheight=20
+
+
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(screen_width, screen_height)
+            xs = np.linspace(self.min_position, self.max_position, 100)
+            ys = self._height(xs)
+            xys = list(zip((xs-self.min_position)*scale, ys*scale))
+
+            self.track = rendering.make_polyline(xys)
+            self.track.set_linewidth(4)
+            self.viewer.add_geom(self.track)
+
+            clearance = 10
+
+            l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
+            car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
+            car.add_attr(rendering.Transform(translation=(0, clearance)))
+            self.cartrans = rendering.Transform()
+            car.add_attr(self.cartrans)
+            self.viewer.add_geom(car)
+            frontwheel = rendering.make_circle(carheight/2.5)
+            frontwheel.set_color(.5, .5, .5)
+            frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
+            frontwheel.add_attr(self.cartrans)
+            self.viewer.add_geom(frontwheel)
+            backwheel = rendering.make_circle(carheight/2.5)
+            backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
+            backwheel.add_attr(self.cartrans)
+            backwheel.set_color(.5, .5, .5)
+            self.viewer.add_geom(backwheel)
+            flagx = (self.goal_position-self.min_position)*scale
+            flagy1 = self._height(self.goal_position)*scale
+            flagy2 = flagy1 + 50
+            flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
+            self.viewer.add_geom(flagpole)
+            flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
+            flag.set_color(.8,.8,0)
+            self.viewer.add_geom(flag)
+
+        pos = self.state[0]
+        self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
+        self.cartrans.set_rotation(math.cos(3 * pos))
+
+        return self.viewer.render(return_rgb_array = mode=='rgb_array')
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
--- a/src/gym/envs/classic_control/pendulum.py
+++ b/src/gym/envs/classic_control/pendulum.py
@@ -0,0 +1,90 @@
+import gym
+from gym import spaces
+from gym.utils import seeding
+import numpy as np
+from os import path
+
+class PendulumEnv(gym.Env):
+    metadata = {
+        'render.modes' : ['human', 'rgb_array'],
+        'video.frames_per_second' : 30
+    }
+
+    def __init__(self):
+        self.max_speed=8
+        self.max_torque=2.
+        self.dt=.05
+        self.viewer = None
+
+        high = np.array([1., 1., self.max_speed])
+        self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32)
+        self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
+
+        self.seed()
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def step(self,u):
+        th, thdot = self.state # th := theta
+
+        g = 10.
+        m = 1.
+        l = 1.
+        dt = self.dt
+
+        u = np.clip(u, -self.max_torque, self.max_torque)[0]
+        self.last_u = u # for rendering
+        costs = angle_normalize(th)**2 + .1*thdot**2 + .001*(u**2)
+
+        newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*u) * dt
+        newth = th + newthdot*dt
+        newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) #pylint: disable=E1111
+
+        self.state = np.array([newth, newthdot])
+        return self._get_obs(), -costs, False, {}
+
+    def reset(self):
+        high = np.array([np.pi, 1])
+        self.state = self.np_random.uniform(low=-high, high=high)
+        self.last_u = None
+        return self._get_obs()
+
+    def _get_obs(self):
+        theta, thetadot = self.state
+        return np.array([np.cos(theta), np.sin(theta), thetadot])
+
+    def render(self, mode='human'):
+
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(500,500)
+            self.viewer.set_bounds(-2.2,2.2,-2.2,2.2)
+            rod = rendering.make_capsule(1, .2)
+            rod.set_color(.8, .3, .3)
+            self.pole_transform = rendering.Transform()
+            rod.add_attr(self.pole_transform)
+            self.viewer.add_geom(rod)
+            axle = rendering.make_circle(.05)
+            axle.set_color(0,0,0)
+            self.viewer.add_geom(axle)
+            fname = path.join(path.dirname(__file__), "assets/clockwise.png")
+            self.img = rendering.Image(fname, 1., 1.)
+            self.imgtrans = rendering.Transform()
+            self.img.add_attr(self.imgtrans)
+
+        self.viewer.add_onetime(self.img)
+        self.pole_transform.set_rotation(self.state[0] + np.pi/2)
+        if self.last_u:
+            self.imgtrans.scale = (-self.last_u/2, np.abs(self.last_u)/2)
+
+        return self.viewer.render(return_rgb_array = mode=='rgb_array')
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
+
+def angle_normalize(x):
+    return (((x+np.pi) % (2*np.pi)) - np.pi)
--- a/src/gym/envs/classic_control/rendering.py
+++ b/src/gym/envs/classic_control/rendering.py
@@ -0,0 +1,359 @@
+"""
+2D rendering framework
+"""
+from __future__ import division
+import os
+import six
+import sys
+
+if "Apple" in sys.version:
+    if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
+        os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
+        # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
+
+from gym.utils import reraise
+from gym import error
+
+try:
+    import pyglet
+except ImportError as e:
+    reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
+
+try:
+    from pyglet.gl import *
+except ImportError as e:
+    reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
+
+import math
+import numpy as np
+
+RAD2DEG = 57.29577951308232
+
+def get_display(spec):
+    """Convert a display specification (such as :0) into an actual Display
+    object.
+
+    Pyglet only supports multiple Displays on Linux.
+    """
+    if spec is None:
+        return None
+    elif isinstance(spec, six.string_types):
+        return pyglet.canvas.Display(spec)
+    else:
+        raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
+
+class Viewer(object):
+    def __init__(self, width, height, display=None):
+        display = get_display(display)
+
+        self.width = width
+        self.height = height
+        self.window = pyglet.window.Window(width=width, height=height, display=display)
+        self.window.on_close = self.window_closed_by_user
+        self.isopen = True
+        self.geoms = []
+        self.onetime_geoms = []
+        self.transform = Transform()
+
+        glEnable(GL_BLEND)
+        glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
+
+    def close(self):
+        self.window.close()
+
+    def window_closed_by_user(self):
+        self.isopen = False
+
+    def set_bounds(self, left, right, bottom, top):
+        assert right > left and top > bottom
+        scalex = self.width/(right-left)
+        scaley = self.height/(top-bottom)
+        self.transform = Transform(
+            translation=(-left*scalex, -bottom*scaley),
+            scale=(scalex, scaley))
+
+    def add_geom(self, geom):
+        self.geoms.append(geom)
+
+    def add_onetime(self, geom):
+        self.onetime_geoms.append(geom)
+
+    def render(self, return_rgb_array=False):
+        glClearColor(1,1,1,1)
+        self.window.clear()
+        self.window.switch_to()
+        self.window.dispatch_events()
+        self.transform.enable()
+        for geom in self.geoms:
+            geom.render()
+        for geom in self.onetime_geoms:
+            geom.render()
+        self.transform.disable()
+        arr = None
+        if return_rgb_array:
+            buffer = pyglet.image.get_buffer_manager().get_color_buffer()
+            image_data = buffer.get_image_data()
+            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
+            # In https://github.com/openai/gym-http-api/issues/2, we
+            # discovered that someone using Xmonad on Arch was having
+            # a window of size 598 x 398, though a 600 x 400 window
+            # was requested. (Guess Xmonad was preserving a pixel for
+            # the boundary.) So we use the buffer height/width rather
+            # than the requested one.
+            arr = arr.reshape(buffer.height, buffer.width, 4)
+            arr = arr[::-1,:,0:3]
+        self.window.flip()
+        self.onetime_geoms = []
+        return arr if return_rgb_array else self.isopen
+
+    # Convenience
+    def draw_circle(self, radius=10, res=30, filled=True, **attrs):
+        geom = make_circle(radius=radius, res=res, filled=filled)
+        _add_attrs(geom, attrs)
+        self.add_onetime(geom)
+        return geom
+
+    def draw_polygon(self, v, filled=True, **attrs):
+        geom = make_polygon(v=v, filled=filled)
+        _add_attrs(geom, attrs)
+        self.add_onetime(geom)
+        return geom
+
+    def draw_polyline(self, v, **attrs):
+        geom = make_polyline(v=v)
+        _add_attrs(geom, attrs)
+        self.add_onetime(geom)
+        return geom
+
+    def draw_line(self, start, end, **attrs):
+        geom = Line(start, end)
+        _add_attrs(geom, attrs)
+        self.add_onetime(geom)
+        return geom
+
+    def get_array(self):
+        self.window.flip()
+        image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
+        self.window.flip()
+        arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
+        arr = arr.reshape(self.height, self.width, 4)
+        return arr[::-1,:,0:3]
+
+    def __del__(self):
+        self.close()
+
+def _add_attrs(geom, attrs):
+    if "color" in attrs:
+        geom.set_color(*attrs["color"])
+    if "linewidth" in attrs:
+        geom.set_linewidth(attrs["linewidth"])
+
+class Geom(object):
+    def __init__(self):
+        self._color=Color((0, 0, 0, 1.0))
+        self.attrs = [self._color]
+    def render(self):
+        for attr in reversed(self.attrs):
+            attr.enable()
+        self.render1()
+        for attr in self.attrs:
+            attr.disable()
+    def render1(self):
+        raise NotImplementedError
+    def add_attr(self, attr):
+        self.attrs.append(attr)
+    def set_color(self, r, g, b):
+        self._color.vec4 = (r, g, b, 1)
+
+class Attr(object):
+    def enable(self):
+        raise NotImplementedError
+    def disable(self):
+        pass
+
+class Transform(Attr):
+    def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
+        self.set_translation(*translation)
+        self.set_rotation(rotation)
+        self.set_scale(*scale)
+    def enable(self):
+        glPushMatrix()
+        glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
+        glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
+        glScalef(self.scale[0], self.scale[1], 1)
+    def disable(self):
+        glPopMatrix()
+    def set_translation(self, newx, newy):
+        self.translation = (float(newx), float(newy))
+    def set_rotation(self, new):
+        self.rotation = float(new)
+    def set_scale(self, newx, newy):
+        self.scale = (float(newx), float(newy))
+
+class Color(Attr):
+    def __init__(self, vec4):
+        self.vec4 = vec4
+    def enable(self):
+        glColor4f(*self.vec4)
+
+class LineStyle(Attr):
+    def __init__(self, style):
+        self.style = style
+    def enable(self):
+        glEnable(GL_LINE_STIPPLE)
+        glLineStipple(1, self.style)
+    def disable(self):
+        glDisable(GL_LINE_STIPPLE)
+
+class LineWidth(Attr):
+    def __init__(self, stroke):
+        self.stroke = stroke
+    def enable(self):
+        glLineWidth(self.stroke)
+
+class Point(Geom):
+    def __init__(self):
+        Geom.__init__(self)
+    def render1(self):
+        glBegin(GL_POINTS) # draw point
+        glVertex3f(0.0, 0.0, 0.0)
+        glEnd()
+
+class FilledPolygon(Geom):
+    def __init__(self, v):
+        Geom.__init__(self)
+        self.v = v
+    def render1(self):
+        if   len(self.v) == 4 : glBegin(GL_QUADS)
+        elif len(self.v)  > 4 : glBegin(GL_POLYGON)
+        else: glBegin(GL_TRIANGLES)
+        for p in self.v:
+            glVertex3f(p[0], p[1],0)  # draw each vertex
+        glEnd()
+
+def make_circle(radius=10, res=30, filled=True):
+    points = []
+    for i in range(res):
+        ang = 2*math.pi*i / res
+        points.append((math.cos(ang)*radius, math.sin(ang)*radius))
+    if filled:
+        return FilledPolygon(points)
+    else:
+        return PolyLine(points, True)
+
+def make_polygon(v, filled=True):
+    if filled: return FilledPolygon(v)
+    else: return PolyLine(v, True)
+
+def make_polyline(v):
+    return PolyLine(v, False)
+
+def make_capsule(length, width):
+    l, r, t, b = 0, length, width/2, -width/2
+    box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
+    circ0 = make_circle(width/2)
+    circ1 = make_circle(width/2)
+    circ1.add_attr(Transform(translation=(length, 0)))
+    geom = Compound([box, circ0, circ1])
+    return geom
+
+class Compound(Geom):
+    def __init__(self, gs):
+        Geom.__init__(self)
+        self.gs = gs
+        for g in self.gs:
+            g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
+    def render1(self):
+        for g in self.gs:
+            g.render()
+
+class PolyLine(Geom):
+    def __init__(self, v, close):
+        Geom.__init__(self)
+        self.v = v
+        self.close = close
+        self.linewidth = LineWidth(1)
+        self.add_attr(self.linewidth)
+    def render1(self):
+        glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
+        for p in self.v:
+            glVertex3f(p[0], p[1],0)  # draw each vertex
+        glEnd()
+    def set_linewidth(self, x):
+        self.linewidth.stroke = x
+
+class Line(Geom):
+    def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
+        Geom.__init__(self)
+        self.start = start
+        self.end = end
+        self.linewidth = LineWidth(1)
+        self.add_attr(self.linewidth)
+
+    def render1(self):
+        glBegin(GL_LINES)
+        glVertex2f(*self.start)
+        glVertex2f(*self.end)
+        glEnd()
+
+class Image(Geom):
+    def __init__(self, fname, width, height):
+        Geom.__init__(self)
+        self.width = width
+        self.height = height
+        img = pyglet.image.load(fname)
+        self.img = img
+        self.flip = False
+    def render1(self):
+        self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
+
+# ================================================================
+
+class SimpleImageViewer(object):
+    def __init__(self, display=None, maxwidth=500):
+        self.window = None
+        self.isopen = False
+        self.display = display
+        self.maxwidth = maxwidth
+    def imshow(self, arr):
+        if self.window is None:
+            height, width, _channels = arr.shape
+            if width > self.maxwidth:
+                scale = self.maxwidth / width
+                width = int(scale * width)
+                height = int(scale * height)
+            self.window = pyglet.window.Window(width=width, height=height, 
+                display=self.display, vsync=False, resizable=True)            
+            self.width = width
+            self.height = height
+            self.isopen = True
+
+            @self.window.event
+            def on_resize(width, height):
+                self.width = width
+                self.height = height
+
+            @self.window.event
+            def on_close():
+                self.isopen = False
+
+        assert len(arr.shape) == 3, "You passed in an image with the wrong number shape"
+        image = pyglet.image.ImageData(arr.shape[1], arr.shape[0], 
+            'RGB', arr.tobytes(), pitch=arr.shape[1]*-3)
+        gl.glTexParameteri(gl.GL_TEXTURE_2D, 
+            gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
+        texture = image.get_texture()
+        texture.width = self.width
+        texture.height = self.height
+        self.window.clear()
+        self.window.switch_to()
+        self.window.dispatch_events()
+        texture.blit(0, 0) # draw
+        self.window.flip()
+    def close(self):
+        if self.isopen:
+            self.window.close()
+            self.isopen = False
+
+    def __del__(self):
+        self.close()
--- a/src/gym/envs/mujoco/init.py
+++ b/src/gym/envs/mujoco/init.py
@@ -0,0 +1,16 @@
+from gym.envs.mujoco.mujoco_env import MujocoEnv
+# ^^^^^ so that user gets the correct error
+# message if mujoco is not installed correctly
+from gym.envs.mujoco.ant import AntEnv
+from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
+from gym.envs.mujoco.hopper import HopperEnv
+from gym.envs.mujoco.walker2d import Walker2dEnv
+from gym.envs.mujoco.humanoid import HumanoidEnv
+from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
+from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
+from gym.envs.mujoco.reacher import ReacherEnv
+from gym.envs.mujoco.swimmer import SwimmerEnv
+from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
+from gym.envs.mujoco.pusher import PusherEnv
+from gym.envs.mujoco.thrower import ThrowerEnv
+from gym.envs.mujoco.striker import StrikerEnv
--- a/src/gym/envs/mujoco/ant.py
+++ b/src/gym/envs/mujoco/ant.py
@@ -0,0 +1,45 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
+        utils.EzPickle.__init__(self)
+
+    def step(self, a):
+        xposbefore = self.get_body_com("torso")[0]
+        self.do_simulation(a, self.frame_skip)
+        xposafter = self.get_body_com("torso")[0]
+        forward_reward = (xposafter - xposbefore)/self.dt
+        ctrl_cost = .5 * np.square(a).sum()
+        contact_cost = 0.5 * 1e-3 * np.sum(
+            np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
+        survive_reward = 1.0
+        reward = forward_reward - ctrl_cost - contact_cost + survive_reward
+        state = self.state_vector()
+        notdone = np.isfinite(state).all() \
+            and state[2] >= 0.2 and state[2] <= 1.0
+        done = not notdone
+        ob = self._get_obs()
+        return ob, reward, done, dict(
+            reward_forward=forward_reward,
+            reward_ctrl=-ctrl_cost,
+            reward_contact=-contact_cost,
+            reward_survive=survive_reward)
+
+    def _get_obs(self):
+        return np.concatenate([
+            self.sim.data.qpos.flat[2:],
+            self.sim.data.qvel.flat,
+            np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
+        ])
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
+        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def viewer_setup(self):
+        self.viewer.cam.distance = self.model.stat.extent * 0.5
--- a/src/gym/envs/mujoco/assets/ant.xml
+++ b/src/gym/envs/mujoco/assets/ant.xml
@@ -0,0 +1,81 @@
+<mujoco model="ant">
+  <compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
+  <option integrator="RK4" timestep="0.01"/>
+  <custom>
+    <numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
+  </custom>
+  <default>
+    <joint armature="1" damping="1" limited="true"/>
+    <geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
+  </default>
+  <asset>
+    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
+    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+    <material name="geom" texture="texgeom" texuniform="true"/>
+  </asset>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
+    <body name="torso" pos="0 0 0.75">
+      <camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
+      <geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
+      <joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
+      <body name="front_left_leg" pos="0 0 0">
+        <geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
+        <body name="aux_1" pos="0.2 0.2 0">
+          <joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
+          <geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
+          <body pos="0.2 0.2 0">
+            <joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
+            <geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <body name="front_right_leg" pos="0 0 0">
+        <geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
+        <body name="aux_2" pos="-0.2 0.2 0">
+          <joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
+          <geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
+          <body pos="-0.2 0.2 0">
+            <joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
+            <geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <body name="back_leg" pos="0 0 0">
+        <geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
+        <body name="aux_3" pos="-0.2 -0.2 0">
+          <joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
+          <geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
+          <body pos="-0.2 -0.2 0">
+            <joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
+            <geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <body name="right_back_leg" pos="0 0 0">
+        <geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
+        <body name="aux_4" pos="0.2 -0.2 0">
+          <joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
+          <geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
+          <body pos="0.2 -0.2 0">
+            <joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
+            <geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
+  </actuator>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/half_cheetah.xml
+++ b/src/gym/envs/mujoco/assets/half_cheetah.xml
@@ -0,0 +1,96 @@
+<!-- Cheetah Model
+
+    The state space is populated with joints in the order that they are
+    defined in this file. The actuators also operate on joints.
+
+    State-Space (name/joint/parameter):
+        - rootx     slider      position (m)
+        - rootz     slider      position (m)
+        - rooty     hinge       angle (rad)
+        - bthigh    hinge       angle (rad)
+        - bshin     hinge       angle (rad)
+        - bfoot     hinge       angle (rad)
+        - fthigh    hinge       angle (rad)
+        - fshin     hinge       angle (rad)
+        - ffoot     hinge       angle (rad)
+        - rootx     slider      velocity (m/s)
+        - rootz     slider      velocity (m/s)
+        - rooty     hinge       angular velocity (rad/s)
+        - bthigh    hinge       angular velocity (rad/s)
+        - bshin     hinge       angular velocity (rad/s)
+        - bfoot     hinge       angular velocity (rad/s)
+        - fthigh    hinge       angular velocity (rad/s)
+        - fshin     hinge       angular velocity (rad/s)
+        - ffoot     hinge       angular velocity (rad/s)
+
+    Actuators (name/actuator/parameter):
+        - bthigh    hinge       torque (N m)
+        - bshin     hinge       torque (N m)
+        - bfoot     hinge       torque (N m)
+        - fthigh    hinge       torque (N m)
+        - fshin     hinge       torque (N m)
+        - ffoot     hinge       torque (N m)
+
+-->
+<mujoco model="cheetah">
+  <compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
+  <default>
+    <joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
+    <geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
+    <motor ctrllimited="true" ctrlrange="-1 1"/>
+  </default>
+  <size nstack="300000" nuser_geom="1"/>
+  <option gravity="0 0 -9.81" timestep="0.01"/>
+  <asset>
+    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
+    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+    <material name="geom" texture="texgeom" texuniform="true"/>
+  </asset>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
+    <body name="torso" pos="0 0 .7">
+      <camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
+      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
+      <geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
+      <geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
+      <!-- <site name='tip'  pos='.15 0 .11'/>-->
+      <body name="bthigh" pos="-.5 0 0">
+        <joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
+        <geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
+        <body name="bshin" pos=".16 0 -.25">
+          <joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
+          <geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
+          <body name="bfoot" pos="-.28 0 -.14">
+            <joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
+            <geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <body name="fthigh" pos=".5 0 0">
+        <joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
+        <geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
+        <body name="fshin" pos="-.14 0 -.24">
+          <joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
+          <geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
+          <body name="ffoot" pos=".13 0 -.18">
+            <joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
+            <geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor gear="120" joint="bthigh" name="bthigh"/>
+    <motor gear="90" joint="bshin" name="bshin"/>
+    <motor gear="60" joint="bfoot" name="bfoot"/>
+    <motor gear="120" joint="fthigh" name="fthigh"/>
+    <motor gear="60" joint="fshin" name="fshin"/>
+    <motor gear="30" joint="ffoot" name="ffoot"/>
+  </actuator>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/hopper.xml
+++ b/src/gym/envs/mujoco/assets/hopper.xml
@@ -0,0 +1,48 @@
+<mujoco model="hopper">
+  <compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
+  <default>
+    <joint armature="1" damping="1" limited="true"/>
+    <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
+    <motor ctrllimited="true" ctrlrange="-.4 .4"/>
+  </default>
+  <option integrator="RK4" timestep="0.002"/>
+  <visual>
+    <map znear="0.02"/>
+  </visual>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
+    <body name="torso" pos="0 0 1.25">
+      <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
+      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
+      <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
+      <body name="thigh" pos="0 0 1.05">
+        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
+        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
+        <body name="leg" pos="0 0 0.35">
+          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
+          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
+          <body name="foot" pos="0.13/2 0 0.1">
+            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
+            <geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
+  </actuator>
+    <asset>
+        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
+            width="100" height="100"/>
+        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+        <material name="geom" texture="texgeom" texuniform="true"/>
+    </asset>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/humanoid.xml
+++ b/src/gym/envs/mujoco/assets/humanoid.xml
@@ -0,0 +1,121 @@
+<mujoco model="humanoid">
+    <compiler angle="degree" inertiafromgeom="true"/>
+    <default>
+        <joint armature="1" damping="1" limited="true"/>
+        <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
+        <motor ctrllimited="true" ctrlrange="-.4 .4"/>
+    </default>
+    <option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
+        <!-- <flags solverstat="enable" energy="enable"/>-->
+    </option>
+    <size nkey="5" nuser_geom="1"/>
+    <visual>
+        <map fogend="5" fogstart="3"/>
+    </visual>
+    <asset>
+        <texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
+        <!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
+        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+        <material name="geom" texture="texgeom" texuniform="true"/>
+    </asset>
+    <worldbody>
+        <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+        <geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
+        <!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
+        <body name="torso" pos="0 0 1.4">
+            <camera name="track" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1"/>
+            <joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
+            <geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
+            <geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258"/>
+            <geom fromto="-.01 -.06 -.12 -.01 .06 -.12" name="uwaist" size="0.06" type="capsule"/>
+            <body name="lwaist" pos="-.01 0 -0.260" quat="1.000 0 -0.002 0">
+                <geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
+                <joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
+                <joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
+                <body name="pelvis" pos="0 0 -0.165" quat="1.000 0 -0.002 0">
+                    <joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
+                    <geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
+                    <body name="right_thigh" pos="0 -0.1 -0.04">
+                        <joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
+                        <joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
+                        <geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule"/>
+                        <body name="right_shin" pos="0 0.01 -0.403">
+                            <joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
+                            <geom fromto="0 0 0 0 0 -.3" name="right_shin1" size="0.049" type="capsule"/>
+                            <body name="right_foot" pos="0 0 -0.45">
+                                <geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
+                            </body>
+                        </body>
+                    </body>
+                    <body name="left_thigh" pos="0 0.1 -0.04">
+                        <joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
+                        <geom fromto="0 0 0 0 -0.01 -.34" name="left_thigh1" size="0.06" type="capsule"/>
+                        <body name="left_shin" pos="0 -0.01 -0.403">
+                            <joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
+                            <geom fromto="0 0 0 0 0 -.3" name="left_shin1" size="0.049" type="capsule"/>
+                            <body name="left_foot" pos="0 0 -0.45">
+                                <geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+            <body name="right_upper_arm" pos="0 -0.17 0.06">
+                <joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
+                <joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
+                <geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
+                <body name="right_lower_arm" pos=".18 -.18 -.18">
+                    <joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
+                    <geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
+                    <geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
+                    <camera pos="0 0 0"/>
+                </body>
+            </body>
+            <body name="left_upper_arm" pos="0 0.17 0.06">
+                <joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
+                <joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
+                <geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
+                <body name="left_lower_arm" pos=".18 .18 -.18">
+                    <joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
+                    <geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
+                    <geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
+                </body>
+            </body>
+        </body>
+    </worldbody>
+    <tendon>
+        <fixed name="left_hipknee">
+            <joint coef="-1" joint="left_hip_y"/>
+            <joint coef="1" joint="left_knee"/>
+        </fixed>
+        <fixed name="right_hipknee">
+            <joint coef="-1" joint="right_hip_y"/>
+            <joint coef="1" joint="right_knee"/>
+        </fixed>
+    </tendon>
+
+    <actuator>
+        <motor gear="100" joint="abdomen_y" name="abdomen_y"/>
+        <motor gear="100" joint="abdomen_z" name="abdomen_z"/>
+        <motor gear="100" joint="abdomen_x" name="abdomen_x"/>
+        <motor gear="100" joint="right_hip_x" name="right_hip_x"/>
+        <motor gear="100" joint="right_hip_z" name="right_hip_z"/>
+        <motor gear="300" joint="right_hip_y" name="right_hip_y"/>
+        <motor gear="200" joint="right_knee" name="right_knee"/>
+        <motor gear="100" joint="left_hip_x" name="left_hip_x"/>
+        <motor gear="100" joint="left_hip_z" name="left_hip_z"/>
+        <motor gear="300" joint="left_hip_y" name="left_hip_y"/>
+        <motor gear="200" joint="left_knee" name="left_knee"/>
+        <motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
+        <motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
+        <motor gear="25" joint="right_elbow" name="right_elbow"/>
+        <motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
+        <motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
+        <motor gear="25" joint="left_elbow" name="left_elbow"/>
+    </actuator>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/humanoidstandup.xml
+++ b/src/gym/envs/mujoco/assets/humanoidstandup.xml
@@ -0,0 +1,121 @@
+<mujoco model="humanoidstandup">
+    <compiler angle="degree" inertiafromgeom="true"/>
+    <default>
+        <joint armature="1" damping="1" limited="true"/>
+        <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
+        <motor ctrllimited="true" ctrlrange="-.4 .4"/>
+    </default>
+    <option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
+        <!-- <flags solverstat="enable" energy="enable"/>-->
+    </option>
+    <size nkey="5" nuser_geom="1"/>
+    <visual>
+        <map fogend="5" fogstart="3"/>
+    </visual>
+    <asset>
+        <texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
+        <!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
+        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+        <material name="geom" texture="texgeom" texuniform="true"/>
+    </asset>
+    <worldbody>
+        <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+        <geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
+        <!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
+        <body name="torso" pos="0 0 .105">
+            <camera name="track" mode="trackcom" pos="0 -3 .5" xyaxes="1 0 0 0 0 1"/>
+            <joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
+            <geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
+            <geom name="head" pos="-.15 0 0" size=".09" type="sphere" user="258"/>
+            <geom fromto=".11 -.06 0 .11 .06 0" name="uwaist" size="0.06" type="capsule"/>
+            <body name="lwaist" pos=".21 0 0" quat="1.000 0 -0.002 0">
+                <geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
+                <joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
+                <joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
+                <body name="pelvis" pos="0.165 0 0" quat="1.000 0 -0.002 0">
+                    <joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
+                    <geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
+                    <body name="right_thigh" pos="0 -0.1 0">
+                        <joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
+                        <joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
+                        <geom fromto="0 0 0 0.34 0.01 0" name="right_thigh1" size="0.06" type="capsule"/>
+                        <body name="right_shin" pos="0.403 0.01 0">
+                            <joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
+                            <geom fromto="0 0 0 0.3 0 0" name="right_shin1" size="0.049" type="capsule"/>
+                            <body name="right_foot" pos="0.35 0 -.10">
+                                <geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
+                            </body>
+                        </body>
+                    </body>
+                    <body name="left_thigh" pos="0 0.1 0">
+                        <joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
+                        <joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
+                        <geom fromto="0 0 0 0.34 -0.01 0" name="left_thigh1" size="0.06" type="capsule"/>
+                        <body name="left_shin" pos="0.403 -0.01 0">
+                            <joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
+                            <geom fromto="0 0 0 0.3 0 0" name="left_shin1" size="0.049" type="capsule"/>
+                            <body name="left_foot" pos="0.35 0 -.1">
+                                <geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+            <body name="right_upper_arm" pos="0 -0.17 0.06">
+                <joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
+                <joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
+                <geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
+                <body name="right_lower_arm" pos=".18 -.18 -.18">
+                    <joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
+                    <geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
+                    <geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
+                    <camera pos="0 0 0"/>
+                </body>
+            </body>
+            <body name="left_upper_arm" pos="0 0.17 0.06">
+                <joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
+                <joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
+                <geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
+                <body name="left_lower_arm" pos=".18 .18 -.18">
+                    <joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
+                    <geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
+                    <geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
+                </body>
+            </body>
+        </body>
+    </worldbody>
+    <tendon>
+        <fixed name="left_hipknee">
+            <joint coef="-1" joint="left_hip_y"/>
+            <joint coef="1" joint="left_knee"/>
+        </fixed>
+        <fixed name="right_hipknee">
+            <joint coef="-1" joint="right_hip_y"/>
+            <joint coef="1" joint="right_knee"/>
+        </fixed>
+    </tendon>
+
+    <actuator>
+        <motor gear="100" joint="abdomen_y" name="abdomen_y"/>
+        <motor gear="100" joint="abdomen_z" name="abdomen_z"/>
+        <motor gear="100" joint="abdomen_x" name="abdomen_x"/>
+        <motor gear="100" joint="right_hip_x" name="right_hip_x"/>
+        <motor gear="100" joint="right_hip_z" name="right_hip_z"/>
+        <motor gear="300" joint="right_hip_y" name="right_hip_y"/>
+        <motor gear="200" joint="right_knee" name="right_knee"/>
+        <motor gear="100" joint="left_hip_x" name="left_hip_x"/>
+        <motor gear="100" joint="left_hip_z" name="left_hip_z"/>
+        <motor gear="300" joint="left_hip_y" name="left_hip_y"/>
+        <motor gear="200" joint="left_knee" name="left_knee"/>
+        <motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
+        <motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
+        <motor gear="25" joint="right_elbow" name="right_elbow"/>
+        <motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
+        <motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
+        <motor gear="25" joint="left_elbow" name="left_elbow"/>
+    </actuator>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/inverted_double_pendulum.xml
+++ b/src/gym/envs/mujoco/assets/inverted_double_pendulum.xml
@@ -0,0 +1,47 @@
+<!-- Cartpole Model
+
+    The state space is populated with joints in the order that they are
+    defined in this file. The actuators also operate on joints.
+
+    State-Space (name/joint/parameter):
+        - cart      slider      position (m)
+        - pole      hinge       angle (rad)
+        - cart      slider      velocity (m/s)
+        - pole      hinge       angular velocity (rad/s)
+
+    Actuators (name/actuator/parameter):
+        - cart      motor       force x (N)
+
+-->
+<mujoco model="cartpole">
+  <compiler coordinate="local" inertiafromgeom="true"/>
+  <custom>
+    <numeric data="2" name="frame_skip"/>
+  </custom>
+  <default>
+    <joint damping="0.05"/>
+    <geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
+  </default>
+  <option gravity="1e-5 0 -9.81" integrator="RK4" timestep="0.01"/>
+  <size nstack="3000"/>
+  <worldbody>
+    <geom name="floor" pos="0 0 -3.0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
+    <geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
+    <body name="cart" pos="0 0 0">
+      <joint axis="1 0 0" limited="true" margin="0.01" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
+      <geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
+      <body name="pole" pos="0 0 0">
+        <joint axis="0 1 0" name="hinge" pos="0 0 0" type="hinge"/>
+        <geom fromto="0 0 0 0 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
+        <body name="pole2" pos="0 0 0.6">
+          <joint axis="0 1 0" name="hinge2" pos="0 0 0" type="hinge"/>
+          <geom fromto="0 0 0 0 0 0.6" name="cpole2" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
+          <site name="tip" pos="0 0 .6" size="0.01 0.01"/>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor ctrllimited="true" ctrlrange="-1 1" gear="500" joint="slider" name="slide"/>
+  </actuator>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/inverted_pendulum.xml
+++ b/src/gym/envs/mujoco/assets/inverted_pendulum.xml
@@ -0,0 +1,27 @@
+<mujoco model="inverted pendulum">
+	<compiler inertiafromgeom="true"/>
+	<default>
+		<joint armature="0" damping="1" limited="true"/>
+		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
+		<tendon/>
+		<motor ctrlrange="-3 3"/>
+	</default>
+	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.02"/>
+	<size nstack="3000"/>
+	<worldbody>
+		<!--geom name="ground" type="plane" pos="0 0 0" /-->
+		<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
+		<body name="cart" pos="0 0 0">
+			<joint axis="1 0 0" limited="true" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
+			<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
+			<body name="pole" pos="0 0 0">
+				<joint axis="0 1 0" name="hinge" pos="0 0 0" range="-90 90" type="hinge"/>
+				<geom fromto="0 0 0 0.001 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/>
+				<!--                 <body name="pole2" pos="0.001 0 0.6"><joint name="hinge2" type="hinge" pos="0 0 0" axis="0 1 0"/><geom name="cpole2" type="capsule" fromto="0 0 0 0 0 0.6" size="0.05 0.3" rgba="0.7 0 0.7 1"/><site name="tip2" pos="0 0 .6"/></body>-->
+			</body>
+		</body>
+	</worldbody>
+	<actuator>
+		<motor gear="100" joint="slider" name="slide"/>
+	</actuator>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/point.xml
+++ b/src/gym/envs/mujoco/assets/point.xml
@@ -0,0 +1,31 @@
+<mujoco>
+  <compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
+  <option integrator="RK4" timestep="0.02"/>
+  <default>
+    <joint armature="0" damping="0" limited="false"/>
+    <geom conaffinity="0" condim="3" density="100" friction="1 0.5 0.5" margin="0" rgba="0.8 0.6 0.4 1"/>
+  </default>
+  <asset>
+    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
+    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
+    <material name="geom" texture="texgeom" texuniform="true"/>
+  </asset>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
+    <body name="torso" pos="0 0 0">
+      <geom name="pointbody" pos="0 0 0.5" size="0.5" type="sphere"/>
+      <geom name="pointarrow" pos="0.6 0 0.5" size="0.5 0.1 0.1" type="box"/>
+      <joint axis="1 0 0" name="ballx" pos="0 0 0" type="slide"/>
+      <joint axis="0 1 0" name="bally" pos="0 0 0" type="slide"/>
+      <joint axis="0 0 1" limited="false" name="rot" pos="0 0 0" type="hinge"/>
+    </body>
+  </worldbody>
+  <actuator>
+    <!-- Those are just dummy actuators for providing ranges -->
+    <motor ctrllimited="true" ctrlrange="-1 1" joint="ballx"/>
+    <motor ctrllimited="true" ctrlrange="-0.25 0.25" joint="rot"/>
+  </actuator>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/pusher.xml
+++ b/src/gym/envs/mujoco/assets/pusher.xml
@@ -0,0 +1,91 @@
+<mujoco model="arm3d">
+  <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
+  <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
+  
+  <default>
+    <joint armature='0.04' damping="1" limited="true"/>
+    <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
+  </default>
+  
+  <worldbody>
+    <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
+    <geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
+
+    <body name="r_shoulder_pan_link" pos="0 -0.6 0">
+      <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
+      <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
+      <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
+      <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
+      <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
+      <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
+
+      <body name="r_shoulder_lift_link" pos="0.1 0 0">
+        <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
+        <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
+
+        <body name="r_upper_arm_roll_link" pos="0 0 0">
+          <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
+          <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
+
+          <body name="r_upper_arm_link" pos="0 0 0">
+            <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
+
+            <body name="r_elbow_flex_link" pos="0.4 0 0">
+              <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
+              <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
+
+              <body name="r_forearm_roll_link" pos="0 0 0">
+                <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
+                <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
+
+                <body name="r_forearm_link" pos="0 0 0">
+                  <geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
+
+                  <body name="r_wrist_flex_link" pos="0.321 0 0">
+                    <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
+                    <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
+
+                    <body name="r_wrist_roll_link" pos="0 0 0">
+                      <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
+                      <body name="tips_arm" pos="0 0 0">
+                        <geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
+                        <geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
+                      </body>
+                      <geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
+                      <geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
+                      <geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
+                    </body>
+                  </body>
+                </body>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+
+    <!--<body name="object" pos="0.55 -0.3 -0.275" >-->
+    <body name="object" pos="0.45 -0.05 -0.275" >
+      <geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
+      <joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
+      <joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
+    </body>
+
+    <body name="goal" pos="0.45 -0.05 -0.3230">
+      <geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
+      <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
+      <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/> 
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
+  </actuator>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/reacher.xml
+++ b/src/gym/envs/mujoco/assets/reacher.xml
@@ -0,0 +1,39 @@
+<mujoco model="reacher">
+	<compiler angle="radian" inertiafromgeom="true"/>
+	<default>
+		<joint armature="1" damping="1" limited="true"/>
+		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
+	</default>
+	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
+	<worldbody>
+		<!-- Arena -->
+		<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
+		<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto=" .3 -.3 .01 .3  .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto="-.3  .3 .01 .3  .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<!-- Arm -->
+		<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
+		<body name="body0" pos="0 0 .01">
+			<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
+			<joint axis="0 0 1" limited="false" name="joint0" pos="0 0 0" type="hinge"/>
+			<body name="body1" pos="0.1 0 0">
+				<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-3.0 3.0" type="hinge"/>
+				<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
+				<body name="fingertip" pos="0.11 0 0">
+					<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
+				</body>
+			</body>
+		</body>
+		<!-- Target -->
+		<body name="target" pos=".1 -.1 .01">
+			<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
+			<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
+			<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
+		</body>
+	</worldbody>
+	<actuator>
+		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
+		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
+	</actuator>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/striker.xml
+++ b/src/gym/envs/mujoco/assets/striker.xml
@@ -0,0 +1,101 @@
+<mujoco model="arm3d">
+  <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
+  <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler"/>
+
+  <default>
+    <joint armature='0.04' damping="1" limited="true"/>
+    <geom friction=".0 .0 .0" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
+  </default>
+
+  <worldbody>
+    <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
+    <geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
+
+    <body name="r_shoulder_pan_link" pos="0 -0.6 0">
+      <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05"/>
+      <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05"/>
+      <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03"/>
+      <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03"/>
+      <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
+      <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
+
+      <body name="r_shoulder_lift_link" pos="0.1 0 0">
+        <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
+        <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
+
+        <body name="r_upper_arm_roll_link" pos="0 0 0">
+          <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
+          <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
+
+          <body name="r_upper_arm_link" pos="0 0 0">
+            <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
+
+            <body name="r_elbow_flex_link" pos="0.4 0 0">
+              <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
+              <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
+
+              <body name="r_forearm_roll_link" pos="0 0 0">
+                <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
+                <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
+
+                <body name="r_forearm_link" pos="0 0 0">
+                  <geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
+
+                  <body name="r_wrist_flex_link" pos="0.321 0 0">
+                    <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
+                    <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
+
+                    <body name="r_wrist_roll_link" pos="0 0 0">
+                      <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
+
+
+                      <body name="tips_arm" pos="0 0 0">
+                        <geom conaffinity="1" contype="1" name="tip_arml" pos="0.017 0 0" size="0.003 0.12 0.05" type="box" />
+
+                      </body>
+                      <geom conaffinity="1" contype="1" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" type="capsule" />
+                    </body>
+                  </body>
+                </body>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+
+    <body name="object" pos="0 0 -0.270" >
+      <geom type="sphere" rgba="1 1 1 1" pos="0 0 0" size="0.05 0.05 0.05" contype="1" conaffinity="0"/>
+      <joint name="obj_slidey" armature="0.1" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
+      <joint name="obj_slidex" armature="0.1" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
+    </body>
+
+    <body name="goal" pos="0.0 0.0 -0.3230">
+      <geom rgba="1. 1. 1. 0" pos="0 0 0" type="box" size="0.01 0.01 0.01" contype="0" conaffinity="0"/>
+      <body pos="0 0 0">
+        <geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
+      </body>
+      <body name="coaster" pos="0 0 0">
+        <geom rgba="1. 1. 1. 1" type="cylinder" size="0.08 0.001 0.1" density='1000000' contype="0" conaffinity="0"/>
+      </body>
+      <body pos="0 0 0" axisangle="0 0 1 0.785">
+        <geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
+      </body>
+      <body pos="0 0 0" axisangle="0 0 1 -0.785">
+        <geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
+      </body>
+      <joint name="goal_free" type="free" pos="0 0 0" limited="false" damping="0"/>
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor joint="r_shoulder_pan_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
+    <motor joint="r_shoulder_lift_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
+    <motor joint="r_upper_arm_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
+    <motor joint="r_elbow_flex_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
+    <motor joint="r_forearm_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
+    <motor joint="r_wrist_flex_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
+    <motor joint="r_wrist_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true"/>
+  </actuator>
+
+</mujoco>
--- a/src/gym/envs/mujoco/assets/swimmer.xml
+++ b/src/gym/envs/mujoco/assets/swimmer.xml
@@ -0,0 +1,38 @@
+<mujoco model="swimmer">
+  <compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
+  <option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1"/>
+  <default>
+    <geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1"/>
+    <joint armature='0.1'  />
+  </default>
+  <asset>
+    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
+    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
+    <material name="geom" texture="texgeom" texuniform="true"/>
+  </asset>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane"/>
+    <!--  ================= SWIMMER ================= /-->
+    <body name="torso" pos="0 0 0">
+      <geom density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule"/>
+      <joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide"/>
+      <joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide"/>
+      <joint axis="0 0 1" name="rot" pos="0 0 0" type="hinge"/>
+      <body name="mid" pos="0.5 0 0">
+        <geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
+        <joint axis="0 0 1" limited="true" name="rot2" pos="0 0 0" range="-100 100" type="hinge"/>
+        <body name="back" pos="-1 0 0">
+          <geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
+          <joint axis="0 0 1" limited="true" name="rot3" pos="0 0 0" range="-100 100" type="hinge"/>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot2"/>
+    <motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot3"/>
+  </actuator>
+</mujoco>
--- a/src/gym/envs/mujoco/assets/thrower.xml
+++ b/src/gym/envs/mujoco/assets/thrower.xml
@@ -0,0 +1,127 @@
+<mujoco model="arm3d">
+  <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
+  <option timestep="0.01" gravity="0 0 -9.81" iterations="20" integrator="Euler"/>
+  <default>
+    <joint armature='0.75' damping="1" limited="true"/>
+    <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
+  </default>
+
+  <worldbody>
+    <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
+
+    <geom type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
+
+    <body name="r_shoulder_pan_link" pos="0 -0.6 0">
+      <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" density="0.0001"/>
+      <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" density="0.0001"/>
+      <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" density="0.0001"/>
+      <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" density="0.0001"/>
+      <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" density="1"/>
+      <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-0.4854 1.214602" damping="1.0"/>
+
+      <body name="r_shoulder_lift_link" pos="0.1 0 0">
+        <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" density="0.0001"/>
+        <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 0.7963" damping="1.0"/>
+
+        <body name="r_upper_arm_roll_link" pos="0 0 0">
+          <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
+          <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="1.0"/>
+
+          <body name="r_upper_arm_link" pos="0 0 0">
+            <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" density="0.0001"/>
+
+            <body name="r_elbow_flex_link" pos="0.4 0 0">
+              <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" density="0.0001"/>
+              <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.7 0.7" damping="1.0"/>
+
+              <body name="r_forearm_roll_link" pos="0 0 0">
+                <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
+                <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping="1.0" range="-1.5 1.5"/>
+
+                <body name="r_forearm_link" pos="0 0 0" axisangle="1 0 0 0.392">
+                  <geom name="fa" type="capsule" fromto="0 0 0 0 0 0.291" size="0.05" density="0.0001"/>
+
+                  <body name="r_wrist_flex_link" pos="0 0 0.321" axisangle="0 0 1 1.57">
+                    <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" density="0.0001"/>
+                    <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-1.0 1.0" damping="1.0" />
+
+                    <body name="r_wrist_roll_link" pos="0 0 0" axisangle="0 1 0 -1.178">
+                      <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="0 1 0" damping="1.0" range="0 2.25"/>
+                      <geom type="capsule" fromto="0 -0.05 0 0 0.05 0" size="0.01" contype="1" conaffinity="1" density="0.0001"/>
+                      <body pos="0 0 0" axisangle="0 0 1 0.392">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density=".0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 1.57">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 1.178">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 0.785">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 1.96">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 2.355">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 2.74">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                    </body>
+                  </body>
+                </body>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+
+    <body name="goal" pos="0.575 0.5 -0.328">
+      <geom rgba="1 1 1 1" type="box" pos="0 0 0.005" size="0.075 0.075 0.001" contype="1" conaffinity="1" density="1000"/>
+      <geom rgba="1 1 1 1" type="box" pos="0.0 0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="box" pos="0.0 -0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="box" pos="0.075 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="box" pos="-0.076 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="capsule" fromto="0.073 0.073 0.0075 0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="capsule" fromto="0.073 -0.073 0.0075 0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="capsule" fromto="-0.073 0.073 0.0075 -0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="capsule" fromto="-0.073 -0.073 0.0075 -0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
+      <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="1.0"/>
+      <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="1.0"/>
+    </body>
+
+    <body name="ball" pos="0.5 -0.8 0.275">
+      <geom rgba="1. 1. 1. 1" type="sphere" size="0.03 0.03 0.1" density='25' contype="1" conaffinity="1"/>
+      <joint name="ball_free" type="free" armature='0' damping="0" limited="false"/>
+    </body>
+
+  </worldbody>
+
+  <actuator>
+    <motor joint="r_shoulder_pan_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
+    <motor joint="r_shoulder_lift_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
+    <motor joint="r_upper_arm_roll_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
+    <motor joint="r_elbow_flex_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
+    <motor joint="r_forearm_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
+    <motor joint="r_wrist_flex_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
+    <motor joint="r_wrist_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true"/>
+  </actuator>
+
+</mujoco>
--- a/src/gym/envs/mujoco/assets/walker2d.xml
+++ b/src/gym/envs/mujoco/assets/walker2d.xml
@@ -0,0 +1,62 @@
+<mujoco model="walker2d">
+  <compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
+  <default>
+    <joint armature="0.01" damping=".1" limited="true"/>
+    <geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
+  </default>
+  <option integrator="RK4" timestep="0.002"/>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
+    <body name="torso" pos="0 0 1.25">
+      <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
+      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
+      <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
+      <body name="thigh" pos="0 0 1.05">
+        <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
+        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
+        <body name="leg" pos="0 0 0.35">
+          <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
+          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
+          <body name="foot" pos="0.2/2 0 0.1">
+            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
+            <geom friction="0.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
+      <body name="thigh_left" pos="0 0 1.05">
+        <joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
+        <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.05" type="capsule"/>
+        <body name="leg_left" pos="0 0 0.35">
+          <joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
+          <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.04" type="capsule"/>
+          <body name="foot_left" pos="0.2/2 0 0.1">
+            <joint axis="0 -1 0" name="foot_left_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
+            <geom friction="1.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.06" type="capsule"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
+    <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
+    <!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
+  </actuator>
+    <asset>
+        <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
+            width="100" height="100"/>
+        <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+        <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+        <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
+        <material name="geom" texture="texgeom" texuniform="true"/>
+    </asset>
+</mujoco>
--- a/src/gym/envs/mujoco/half_cheetah.py
+++ b/src/gym/envs/mujoco/half_cheetah.py
@@ -0,0 +1,34 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
+        utils.EzPickle.__init__(self)
+
+    def step(self, action):
+        xposbefore = self.sim.data.qpos[0]
+        self.do_simulation(action, self.frame_skip)
+        xposafter = self.sim.data.qpos[0]
+        ob = self._get_obs()
+        reward_ctrl = - 0.1 * np.square(action).sum()
+        reward_run = (xposafter - xposbefore)/self.dt
+        reward = reward_ctrl + reward_run
+        done = False
+        return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
+
+    def _get_obs(self):
+        return np.concatenate([
+            self.sim.data.qpos.flat[1:],
+            self.sim.data.qvel.flat,
+        ])
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
+        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def viewer_setup(self):
+        self.viewer.cam.distance = self.model.stat.extent * 0.5
--- a/src/gym/envs/mujoco/hopper.py
+++ b/src/gym/envs/mujoco/hopper.py
@@ -0,0 +1,40 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4)
+        utils.EzPickle.__init__(self)
+
+    def step(self, a):
+        posbefore = self.sim.data.qpos[0]
+        self.do_simulation(a, self.frame_skip)
+        posafter, height, ang = self.sim.data.qpos[0:3]
+        alive_bonus = 1.0
+        reward = (posafter - posbefore) / self.dt
+        reward += alive_bonus
+        reward -= 1e-3 * np.square(a).sum()
+        s = self.state_vector()
+        done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
+                    (height > .7) and (abs(ang) < .2))
+        ob = self._get_obs()
+        return ob, reward, done, {}
+
+    def _get_obs(self):
+        return np.concatenate([
+            self.sim.data.qpos.flat[1:],
+            np.clip(self.sim.data.qvel.flat, -10, 10)
+        ])
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
+        qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 2
+        self.viewer.cam.distance = self.model.stat.extent * 0.75
+        self.viewer.cam.lookat[2] = 1.15
+        self.viewer.cam.elevation = -20
--- a/src/gym/envs/mujoco/humanoid.py
+++ b/src/gym/envs/mujoco/humanoid.py
@@ -0,0 +1,51 @@
+import numpy as np
+from gym.envs.mujoco import mujoco_env
+from gym import utils
+
+def mass_center(model, sim):
+    mass = np.expand_dims(model.body_mass, 1)
+    xpos = sim.data.xipos
+    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
+
+class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5)
+        utils.EzPickle.__init__(self)
+
+    def _get_obs(self):
+        data = self.sim.data
+        return np.concatenate([data.qpos.flat[2:],
+                               data.qvel.flat,
+                               data.cinert.flat,
+                               data.cvel.flat,
+                               data.qfrc_actuator.flat,
+                               data.cfrc_ext.flat])
+
+    def step(self, a):
+        pos_before = mass_center(self.model, self.sim)
+        self.do_simulation(a, self.frame_skip)
+        pos_after = mass_center(self.model, self.sim)
+        alive_bonus = 5.0
+        data = self.sim.data
+        lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
+        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
+        quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
+        quad_impact_cost = min(quad_impact_cost, 10)
+        reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
+        qpos = self.sim.data.qpos
+        done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
+        return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
+
+    def reset_model(self):
+        c = 0.01
+        self.set_state(
+            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
+            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
+        )
+        return self._get_obs()
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 1
+        self.viewer.cam.distance = self.model.stat.extent * 1.0
+        self.viewer.cam.lookat[2] = 2.0
+        self.viewer.cam.elevation = -20
--- a/src/gym/envs/mujoco/humanoidstandup.py
+++ b/src/gym/envs/mujoco/humanoidstandup.py
@@ -0,0 +1,45 @@
+from gym.envs.mujoco import mujoco_env
+from gym import utils
+import numpy as np
+
+class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
+        utils.EzPickle.__init__(self)
+
+    def _get_obs(self):
+        data = self.sim.data
+        return np.concatenate([data.qpos.flat[2:],
+                               data.qvel.flat,
+                               data.cinert.flat,
+                               data.cvel.flat,
+                               data.qfrc_actuator.flat,
+                               data.cfrc_ext.flat])
+
+    def step(self, a):
+        self.do_simulation(a, self.frame_skip)
+        pos_after = self.sim.data.qpos[2]
+        data = self.sim.data
+        uph_cost = (pos_after - 0) / self.model.opt.timestep
+
+        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
+        quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
+        quad_impact_cost = min(quad_impact_cost, 10)
+        reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
+
+        done = bool(False)
+        return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost)
+
+    def reset_model(self):
+        c = 0.01
+        self.set_state(
+            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
+            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
+        )
+        return self._get_obs()
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 1
+        self.viewer.cam.distance = self.model.stat.extent * 1.0
+        self.viewer.cam.lookat[2] = 0.8925
+        self.viewer.cam.elevation = -20
--- a/src/gym/envs/mujoco/inverted_double_pendulum.py
+++ b/src/gym/envs/mujoco/inverted_double_pendulum.py
@@ -0,0 +1,43 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5)
+        utils.EzPickle.__init__(self)
+
+    def step(self, action):
+        self.do_simulation(action, self.frame_skip)
+        ob = self._get_obs()
+        x, _, y = self.sim.data.site_xpos[0]
+        dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
+        v1, v2 = self.sim.data.qvel[1:3]
+        vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
+        alive_bonus = 10
+        r = alive_bonus - dist_penalty - vel_penalty
+        done = bool(y <= 1)
+        return ob, r, done, {}
+
+    def _get_obs(self):
+        return np.concatenate([
+            self.sim.data.qpos[:1],  # cart x pos
+            np.sin(self.sim.data.qpos[1:]),  # link angles
+            np.cos(self.sim.data.qpos[1:]),
+            np.clip(self.sim.data.qvel, -10, 10),
+            np.clip(self.sim.data.qfrc_constraint, -10, 10)
+        ]).ravel()
+
+    def reset_model(self):
+        self.set_state(
+            self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
+            self.init_qvel + self.np_random.randn(self.model.nv) * .1
+        )
+        return self._get_obs()
+
+    def viewer_setup(self):
+        v = self.viewer
+        v.cam.trackbodyid = 0
+        v.cam.distance = self.model.stat.extent * 0.5
+        v.cam.lookat[2] = 0.12250000000000005  # v.model.stat.center[2]
--- a/src/gym/envs/mujoco/inverted_pendulum.py
+++ b/src/gym/envs/mujoco/inverted_pendulum.py
@@ -0,0 +1,30 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        utils.EzPickle.__init__(self)
+        mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
+
+    def step(self, a):
+        reward = 1.0
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+        notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
+        done = not notdone
+        return ob, reward, done, {}
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
+        qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
+
+    def viewer_setup(self):
+        v = self.viewer
+        v.cam.trackbodyid = 0
+        v.cam.distance = self.model.stat.extent
--- a/src/gym/envs/mujoco/mujoco_env.py
+++ b/src/gym/envs/mujoco/mujoco_env.py
@@ -0,0 +1,145 @@
+import os
+
+from gym import error, spaces
+from gym.utils import seeding
+import numpy as np
+from os import path
+import gym
+import six
+
+try:
+    import mujoco_py
+except ImportError as e:
+    raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
+
+DEFAULT_SIZE = 500
+
+class MujocoEnv(gym.Env):
+    """Superclass for all MuJoCo environments.
+    """
+
+    def __init__(self, model_path, frame_skip):
+        if model_path.startswith("/"):
+            fullpath = model_path
+        else:
+            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
+        if not path.exists(fullpath):
+            raise IOError("File %s does not exist" % fullpath)
+        self.frame_skip = frame_skip
+        self.model = mujoco_py.load_model_from_path(fullpath)
+        self.sim = mujoco_py.MjSim(self.model)
+        self.data = self.sim.data
+        self.viewer = None
+        self._viewers = {}
+
+        self.metadata = {
+            'render.modes': ['human', 'rgb_array', 'depth_array'],
+            'video.frames_per_second': int(np.round(1.0 / self.dt))
+        }
+
+        self.init_qpos = self.sim.data.qpos.ravel().copy()
+        self.init_qvel = self.sim.data.qvel.ravel().copy()
+        observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
+        assert not done
+        self.obs_dim = observation.size
+
+        bounds = self.model.actuator_ctrlrange.copy()
+        low = bounds[:, 0]
+        high = bounds[:, 1]
+        self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
+
+        high = np.inf*np.ones(self.obs_dim)
+        low = -high
+        self.observation_space = spaces.Box(low, high, dtype=np.float32)
+
+        self.seed()
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    # methods to override:
+    # ----------------------------
+
+    def reset_model(self):
+        """
+        Reset the robot degrees of freedom (qpos and qvel).
+        Implement this in each subclass.
+        """
+        raise NotImplementedError
+
+    def viewer_setup(self):
+        """
+        This method is called when the viewer is initialized.
+        Optionally implement this method, if you need to tinker with camera position
+        and so forth.
+        """
+        pass
+
+    # -----------------------------
+
+    def reset(self):
+        self.sim.reset()
+        ob = self.reset_model()
+        return ob
+
+    def set_state(self, qpos, qvel):
+        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
+        old_state = self.sim.get_state()
+        new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
+                                         old_state.act, old_state.udd_state)
+        self.sim.set_state(new_state)
+        self.sim.forward()
+
+    @property
+    def dt(self):
+        return self.model.opt.timestep * self.frame_skip
+
+    def do_simulation(self, ctrl, n_frames):
+        self.sim.data.ctrl[:] = ctrl
+        for _ in range(n_frames):
+            self.sim.step()
+
+    def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
+        if mode == 'rgb_array':
+            self._get_viewer(mode).render(width, height)
+            # window size used for old mujoco-py:
+            data = self._get_viewer(mode).read_pixels(width, height, depth=False)
+            # original image is upside-down, so flip it
+            return data[::-1, :, :]
+        elif mode == 'depth_array':
+            self._get_viewer(mode).render(width, height)
+            # window size used for old mujoco-py:
+            # Extract depth part of the read_pixels() tuple
+            data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
+            # original image is upside-down, so flip it
+            return data[::-1, :]
+        elif mode == 'human':
+            self._get_viewer(mode).render()
+
+    def close(self):
+        if self.viewer is not None:
+            # self.viewer.finish()
+            self.viewer = None
+            self._viewers = {}
+
+    def _get_viewer(self, mode):
+        self.viewer = self._viewers.get(mode)
+        if self.viewer is None:
+            if mode == 'human':
+                self.viewer = mujoco_py.MjViewer(self.sim)
+            elif mode == 'rgb_array' or mode == 'depth_array':
+                self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
+                
+            self.viewer_setup()
+            self._viewers[mode] = self.viewer
+        return self.viewer
+
+    def get_body_com(self, body_name):
+        return self.data.get_body_xpos(body_name)
+
+    def state_vector(self):
+        return np.concatenate([
+            self.sim.data.qpos.flat,
+            self.sim.data.qvel.flat
+        ])
--- a/src/gym/envs/mujoco/pusher.py
+++ b/src/gym/envs/mujoco/pusher.py
@@ -0,0 +1,57 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+import mujoco_py
+
+class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        utils.EzPickle.__init__(self)
+        mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5)
+
+    def step(self, a):
+        vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
+        vec_2 = self.get_body_com("object") - self.get_body_com("goal")
+
+        reward_near = - np.linalg.norm(vec_1)
+        reward_dist = - np.linalg.norm(vec_2)
+        reward_ctrl = - np.square(a).sum()
+        reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
+
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+        done = False
+        return ob, reward, done, dict(reward_dist=reward_dist,
+                reward_ctrl=reward_ctrl)
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = -1
+        self.viewer.cam.distance = 4.0
+
+    def reset_model(self):
+        qpos = self.init_qpos
+
+        self.goal_pos = np.asarray([0, 0])
+        while True:
+            self.cylinder_pos = np.concatenate([
+                    self.np_random.uniform(low=-0.3, high=0, size=1),
+                    self.np_random.uniform(low=-0.2, high=0.2, size=1)])
+            if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
+                break
+
+        qpos[-4:-2] = self.cylinder_pos
+        qpos[-2:] = self.goal_pos
+        qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
+                high=0.005, size=self.model.nv)
+        qvel[-4:] = 0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate([
+            self.sim.data.qpos.flat[:7],
+            self.sim.data.qvel.flat[:7],
+            self.get_body_com("tips_arm"),
+            self.get_body_com("object"),
+            self.get_body_com("goal"),
+        ])
--- a/src/gym/envs/mujoco/reacher.py
+++ b/src/gym/envs/mujoco/reacher.py
@@ -0,0 +1,43 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        utils.EzPickle.__init__(self)
+        mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2)
+
+    def step(self, a):
+        vec = self.get_body_com("fingertip")-self.get_body_com("target")
+        reward_dist = - np.linalg.norm(vec)
+        reward_ctrl = - np.square(a).sum()
+        reward = reward_dist + reward_ctrl
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+        done = False
+        return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 0
+
+    def reset_model(self):
+        qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
+        while True:
+            self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
+            if np.linalg.norm(self.goal) < 2:
+                break
+        qpos[-2:] = self.goal
+        qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+        qvel[-2:] = 0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        theta = self.sim.data.qpos.flat[:2]
+        return np.concatenate([
+            np.cos(theta),
+            np.sin(theta),
+            self.sim.data.qpos.flat[2:],
+            self.sim.data.qvel.flat[:2],
+            self.get_body_com("fingertip") - self.get_body_com("target")
+        ])
--- a/src/gym/envs/mujoco/striker.py
+++ b/src/gym/envs/mujoco/striker.py
@@ -0,0 +1,75 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        utils.EzPickle.__init__(self)
+        self._striked = False
+        self._min_strike_dist = np.inf
+        self.strike_threshold = 0.1
+        mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5)
+
+    def step(self, a):
+        vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
+        vec_2 = self.get_body_com("object") - self.get_body_com("goal")
+        self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2))
+
+        if np.linalg.norm(vec_1) < self.strike_threshold:
+            self._striked = True
+            self._strike_pos = self.get_body_com("tips_arm")
+
+        if self._striked:
+            vec_3 = self.get_body_com("object") - self._strike_pos
+            reward_near = - np.linalg.norm(vec_3)
+        else:
+            reward_near = - np.linalg.norm(vec_1)
+
+        reward_dist = - np.linalg.norm(self._min_strike_dist)
+        reward_ctrl = - np.square(a).sum()
+        reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
+
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+        done = False
+        return ob, reward, done, dict(reward_dist=reward_dist,
+                reward_ctrl=reward_ctrl)
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 0
+        self.viewer.cam.distance = 4.0
+
+    def reset_model(self):
+        self._min_strike_dist = np.inf
+        self._striked = False
+        self._strike_pos = None
+
+        qpos = self.init_qpos
+
+        self.ball = np.array([0.5, -0.175])
+        while True:
+            self.goal = np.concatenate([
+                    self.np_random.uniform(low=0.15, high=0.7, size=1),
+                    self.np_random.uniform(low=0.1, high=1.0, size=1)])
+            if np.linalg.norm(self.ball - self.goal) > 0.17:
+                break
+
+        qpos[-9:-7] = [self.ball[1], self.ball[0]]
+        qpos[-7:-5] = self.goal
+        diff = self.ball - self.goal
+        angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
+        qpos[-1] = angle / 3.14
+        qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
+                size=self.model.nv)
+        qvel[7:] = 0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate([
+            self.sim.data.qpos.flat[:7],
+            self.sim.data.qvel.flat[:7],
+            self.get_body_com("tips_arm"),
+            self.get_body_com("object"),
+            self.get_body_com("goal"),
+        ])
--- a/src/gym/envs/mujoco/swimmer.py
+++ b/src/gym/envs/mujoco/swimmer.py
@@ -0,0 +1,31 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4)
+        utils.EzPickle.__init__(self)
+
+    def step(self, a):
+        ctrl_cost_coeff = 0.0001
+        xposbefore = self.sim.data.qpos[0]
+        self.do_simulation(a, self.frame_skip)
+        xposafter = self.sim.data.qpos[0]
+        reward_fwd = (xposafter - xposbefore) / self.dt
+        reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
+        reward = reward_fwd + reward_ctrl
+        ob = self._get_obs()
+        return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
+
+    def _get_obs(self):
+        qpos = self.sim.data.qpos
+        qvel = self.sim.data.qvel
+        return np.concatenate([qpos.flat[2:], qvel.flat])
+
+    def reset_model(self):
+        self.set_state(
+            self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
+            self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
+        )
+        return self._get_obs()
--- a/src/gym/envs/mujoco/thrower.py
+++ b/src/gym/envs/mujoco/thrower.py
@@ -0,0 +1,60 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        utils.EzPickle.__init__(self)
+        self._ball_hit_ground = False
+        self._ball_hit_location = None
+        mujoco_env.MujocoEnv.__init__(self, 'thrower.xml', 5)
+
+    def step(self, a):
+        ball_xy = self.get_body_com("ball")[:2]
+        goal_xy = self.get_body_com("goal")[:2]
+
+        if not self._ball_hit_ground and self.get_body_com("ball")[2] < -0.25:
+            self._ball_hit_ground = True
+            self._ball_hit_location = self.get_body_com("ball")
+
+        if self._ball_hit_ground:
+            ball_hit_xy = self._ball_hit_location[:2]
+            reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy)
+        else:
+            reward_dist = -np.linalg.norm(ball_xy - goal_xy)
+        reward_ctrl = - np.square(a).sum()
+
+        reward = reward_dist + 0.002 * reward_ctrl
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+        done = False
+        return ob, reward, done, dict(reward_dist=reward_dist,
+                reward_ctrl=reward_ctrl)
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 0
+        self.viewer.cam.distance = 4.0
+
+    def reset_model(self):
+        self._ball_hit_ground = False
+        self._ball_hit_location = None
+
+        qpos = self.init_qpos
+        self.goal = np.array([self.np_random.uniform(low=-0.3, high=0.3),
+                              self.np_random.uniform(low=-0.3, high=0.3)])
+
+        qpos[-9:-7] = self.goal
+        qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
+                high=0.005, size=self.model.nv)
+        qvel[7:] = 0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate([
+            self.sim.data.qpos.flat[:7],
+            self.sim.data.qvel.flat[:7],
+            self.get_body_com("r_wrist_roll_link"),
+            self.get_body_com("ball"),
+            self.get_body_com("goal"),
+        ])
--- a/src/gym/envs/mujoco/walker2d.py
+++ b/src/gym/envs/mujoco/walker2d.py
@@ -0,0 +1,40 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4)
+        utils.EzPickle.__init__(self)
+
+    def step(self, a):
+        posbefore = self.sim.data.qpos[0]
+        self.do_simulation(a, self.frame_skip)
+        posafter, height, ang = self.sim.data.qpos[0:3]
+        alive_bonus = 1.0
+        reward = ((posafter - posbefore) / self.dt)
+        reward += alive_bonus
+        reward -= 1e-3 * np.square(a).sum()
+        done = not (height > 0.8 and height < 2.0 and
+                    ang > -1.0 and ang < 1.0)
+        ob = self._get_obs()
+        return ob, reward, done, {}
+
+    def _get_obs(self):
+        qpos = self.sim.data.qpos
+        qvel = self.sim.data.qvel
+        return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
+
+    def reset_model(self):
+        self.set_state(
+            self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
+            self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+        )
+        return self._get_obs()
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 2
+        self.viewer.cam.distance = self.model.stat.extent * 0.5
+        self.viewer.cam.lookat[2] = 1.15
+        self.viewer.cam.elevation = -20
--- a/src/gym/envs/pilesos/init.py
+++ b/src/gym/envs/pilesos/init.py
@@ -0,0 +1 @@
+from gym.envs.pilesos.pilesos import PilesosEnv
--- a/src/gym/envs/pilesos/pycache/init.cpython-37.pyc
+++ b/src/gym/envs/pilesos/pycache/init.cpython-37.pyc
--- a/src/gym/envs/pilesos/pycache/pilesos.cpython-37.pyc
+++ b/src/gym/envs/pilesos/pycache/pilesos.cpython-37.pyc
--- a/src/gym/envs/pilesos/pilesos.py
+++ b/src/gym/envs/pilesos/pilesos.py
@@ -0,0 +1,46 @@
+import numpy as np
+
+from gym import core, spaces
+from pilesos.mapstate import MapState
+
+
+class PilesosEnv(core.Env):
+  metadata = {}
+
+  IS_HOME = True
+  DIRT = False
+  ACTIONS = ['F', 'R', 'L', 'S', 'T']
+
+  state: MapState
+
+  def __init__(self):
+    self.viewer = None
+    low = np.array([0, 0, 0])
+    high = np.array([1, 1, 1])
+    self.observation_space = spaces.Box(low=low, high=high, dtype=int)
+    self.action_space = spaces.Discrete(5)
+
+    self.state = MapState.default()
+    self.current_obs = self.state.get_current_obs()
+
+  def reset(self):
+    self.state = MapState.default()
+    return self._get_ob()
+
+  def step(self, action):
+    action_char = self.ACTIONS[action]
+
+    self.current_obs = self.state.act(action_char)
+
+    has_finished = action_char == 'T'
+
+    return self._get_ob(), self.state.last_reward, has_finished, {}
+
+  def render(self, mode='human'):
+    raise NotImplementedError()
+
+  def close(self):
+    pass
+
+  def _get_ob(self):
+    return np.array([int(self.current_obs.has_bumped), int(self.current_obs.has_dirt), int(self.current_obs.is_home)])
--- a/src/gym/envs/registration.py
+++ b/src/gym/envs/registration.py
@@ -0,0 +1,191 @@
+import re
+from gym import error, logger
+
+# This format is true today, but it's *not* an official spec.
+# [username/](env-name)-v(version)    env-name is group 1, version is group 2
+#
+# 2016-10-31: We're experimentally expanding the environment ID format
+# to include an optional username.
+env_id_re = re.compile(r'^(?:[\w:-]+\/)?([\w:.-]+)-v(\d+)$')
+
+def load(name):
+    import pkg_resources # takes ~400ms to load, so we import it lazily
+    entry_point = pkg_resources.EntryPoint.parse('x={}'.format(name))
+    result = entry_point.load(False)
+    return result
+
+class EnvSpec(object):
+    """A specification for a particular instance of the environment. Used
+    to register the parameters for official evaluations.
+
+    Args:
+        id (str): The official environment ID
+        entry_point (Optional[str]): The Python entrypoint of the environment class (e.g. module.name:Class)
+        trials (int): The number of trials to average reward over
+        reward_threshold (Optional[int]): The reward threshold before the task is considered solved
+        local_only: True iff the environment is to be used only on the local machine (e.g. debugging envs)
+        kwargs (dict): The kwargs to pass to the environment class
+        nondeterministic (bool): Whether this environment is non-deterministic even after seeding
+        tags (dict[str:any]): A set of arbitrary key-value tags on this environment, including simple property=True tags
+
+    Attributes:
+        id (str): The official environment ID
+        trials (int): The number of trials run in official evaluation
+    """
+
+    def __init__(self, id, entry_point=None, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None, max_episode_seconds=None, timestep_limit=None):
+        self.id = id
+        # Evaluation parameters
+        self.trials = trials
+        self.reward_threshold = reward_threshold
+        # Environment properties
+        self.nondeterministic = nondeterministic
+
+        if tags is None:
+            tags = {}
+        self.tags = tags
+
+        # BACKWARDS COMPAT 2017/1/18
+        if tags.get('wrapper_config.TimeLimit.max_episode_steps'):
+            max_episode_steps = tags.get('wrapper_config.TimeLimit.max_episode_steps')
+            # TODO: Add the following deprecation warning after 2017/02/18
+            # warnings.warn("DEPRECATION WARNING wrapper_config.TimeLimit has been deprecated. Replace any calls to `register(tags={'wrapper_config.TimeLimit.max_episode_steps': 200)}` with `register(max_episode_steps=200)`. This change was made 2017/1/31 and is included in gym version 0.8.0. If you are getting many of these warnings, you may need to update universe past version 0.21.3")
+
+        tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps
+        ######
+
+        # BACKWARDS COMPAT 2017/1/31
+        if timestep_limit is not None:
+            max_episode_steps = timestep_limit
+            # TODO: Add the following deprecation warning after 2017/03/01
+            # warnings.warn("register(timestep_limit={}) is deprecated. Use register(max_episode_steps={}) instead.".format(timestep_limit, timestep_limit))
+        ######
+
+        self.max_episode_steps = max_episode_steps
+        self.max_episode_seconds = max_episode_seconds
+
+        # We may make some of these other parameters public if they're
+        # useful.
+        match = env_id_re.search(id)
+        if not match:
+            raise error.Error('Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id, env_id_re.pattern))
+        self._env_name = match.group(1)
+        self._entry_point = entry_point
+        self._local_only = local_only
+        self._kwargs = {} if kwargs is None else kwargs
+
+    def make(self):
+        """Instantiates an instance of the environment with appropriate kwargs"""
+        if self._entry_point is None:
+            raise error.Error('Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)'.format(self.id))
+
+        elif callable(self._entry_point):
+            env = self._entry_point(**self._kwargs)
+        else:
+            cls = load(self._entry_point)
+            env = cls(**self._kwargs)
+
+        # Make the enviroment aware of which spec it came from.
+        env.unwrapped.spec = self
+
+        return env
+
+    def __repr__(self):
+        return "EnvSpec({})".format(self.id)
+
+    @property
+    def timestep_limit(self):
+        return self.max_episode_steps
+
+    @timestep_limit.setter
+    def timestep_limit(self, value):
+        self.max_episode_steps = value
+
+
+class EnvRegistry(object):
+    """Register an env by ID. IDs remain stable over time and are
+    guaranteed to resolve to the same environment dynamics (or be
+    desupported). The goal is that results on a particular environment
+    should always be comparable, and not depend on the version of the
+    code that was running.
+    """
+
+    def __init__(self):
+        self.env_specs = {}
+
+    def make(self, id):
+        logger.info('Making new env: %s', id)
+        spec = self.spec(id)
+        env = spec.make()
+        # We used to have people override _reset/_step rather than
+        # reset/step. Set _gym_disable_underscore_compat = True on
+        # your environment if you use these methods and don't want
+        # compatibility code to be invoked.
+        if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
+            patch_deprecated_methods(env)
+        if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
+            from gym.wrappers.time_limit import TimeLimit
+            env = TimeLimit(env,
+                            max_episode_steps=env.spec.max_episode_steps,
+                            max_episode_seconds=env.spec.max_episode_seconds)
+        return env
+
+
+    def all(self):
+        return self.env_specs.values()
+
+    def spec(self, id):
+        match = env_id_re.search(id)
+        if not match:
+            raise error.Error('Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id.encode('utf-8'), env_id_re.pattern))
+
+        try:
+            return self.env_specs[id]
+        except KeyError:
+            # Parse the env name and check to see if it matches the non-version
+            # part of a valid env (could also check the exact number here)
+            env_name = match.group(1)
+            matching_envs = [valid_env_name for valid_env_name, valid_env_spec in self.env_specs.items()
+                             if env_name == valid_env_spec._env_name]
+            if matching_envs:
+                raise error.DeprecatedEnv('Env {} not found (valid versions include {})'.format(id, matching_envs))
+            else:
+                raise error.UnregisteredEnv('No registered env with id: {}'.format(id))
+
+    def register(self, id, **kwargs):
+        if id in self.env_specs:
+            raise error.Error('Cannot re-register id: {}'.format(id))
+        self.env_specs[id] = EnvSpec(id, **kwargs)
+
+# Have a global registry
+registry = EnvRegistry()
+
+def register(id, **kwargs):
+    return registry.register(id, **kwargs)
+
+def make(id):
+    return registry.make(id)
+
+def spec(id):
+    return registry.spec(id)
+
+warn_once = True
+
+def patch_deprecated_methods(env):
+    """
+    Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method.
+    For backward compatibility, this makes it possible to work with unmodified environments.
+    """
+    global warn_once
+    if warn_once:
+        logger.warn("Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior." % str(type(env)))
+        warn_once = False
+    env.reset = env._reset
+    env.step  = env._step
+    env.seed  = env._seed
+    def render(mode):
+        return env._render(mode, close=False)
+    def close():
+        env._render("human", close=True)
+    env.render = render
+    env.close = close
--- a/src/gym/envs/robotics/README.md
+++ b/src/gym/envs/robotics/README.md
@@ -0,0 +1,54 @@
+# Robotics environments
+
+Details and documentation on these robotics environments are available in our [blog post](https://blog.openai.com/ingredients-for-robotics-research/), the accompanying [technical report](https://arxiv.org/abs/1802.09464), and the [Gym website](https://gym.openai.com/envs/#robotics).
+
+If you use these environments, please cite the following paper:
+
+```
+@misc{1802.09464,
+  Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba},
+  Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research},
+  Year = {2018},
+  Eprint = {arXiv:1802.09464},
+}
+```
+
+## Fetch environments
+<img src="https://blog.openai.com/content/images/2018/02/fetch-reach.png" width="500">
+
+[FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position.
+
+
+<img src="https://blog.openai.com/content/images/2018/02/fetch-slide.png" width="500">
+
+[FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
+
+
+<img src="https://blog.openai.com/content/images/2018/02/fetch-push.png" width="500">
+
+[FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position.
+
+
+<img src="https://blog.openai.com/content/images/2018/02/fetch-pickandplace.png" width="500">
+
+[FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
+
+## Shadow Dexterous Hand environments
+<img src="https://blog.openai.com/content/images/2018/02/hand-reach.png" width="500">
+
+[HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
+
+
+<img src="https://blog.openai.com/content/images/2018/02/hand-block.png" width="500">
+
+[HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
+
+
+<img src="https://blog.openai.com/content/images/2018/02/hand-egg.png" width="500">
+
+[HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
+
+
+<img src="https://blog.openai.com/content/images/2018/02/hand-pen.png" width="500">
+
+[HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.
--- a/src/gym/envs/robotics/init.py
+++ b/src/gym/envs/robotics/init.py
@@ -0,0 +1,10 @@
+from gym.envs.robotics.fetch_env import FetchEnv
+from gym.envs.robotics.fetch.slide import FetchSlideEnv
+from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv
+from gym.envs.robotics.fetch.push import FetchPushEnv
+from gym.envs.robotics.fetch.reach import FetchReachEnv
+
+from gym.envs.robotics.hand.reach import HandReachEnv
+from gym.envs.robotics.hand.manipulate import HandBlockEnv
+from gym.envs.robotics.hand.manipulate import HandEggEnv
+from gym.envs.robotics.hand.manipulate import HandPenEnv
--- a/src/gym/envs/robotics/assets/LICENSE.md
+++ b/src/gym/envs/robotics/assets/LICENSE.md
@@ -0,0 +1,222 @@
+# Fetch Robotics
+The model of the [Fetch](http://fetchrobotics.com/platforms-research-development/) is based on [models provided by Fetch](https://github.com/fetchrobotics/fetch_ros/tree/indigo-devel/fetch_description). It was adapted and refined by OpenAI.
+
+# ShadowHand
+The model of the [ShadowHand](https://www.shadowrobot.com/products/dexterous-hand/) is based on [models provided by ShadowRobot](https://github.com/shadow-robot/sr_common/tree/kinetic-devel/sr_description/hand/model), and on code used under the following license:
+
+(C) Vikash Kumar, CSE, UW. Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
+
+	                                 Apache License
+	                           Version 2.0, January 2004
+	                        http://www.apache.org/licenses/
+
+	   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+	   1. Definitions.
+
+	      "License" shall mean the terms and conditions for use, reproduction,
+	      and distribution as defined by Sections 1 through 9 of this document.
+
+	      "Licensor" shall mean the copyright owner or entity authorized by
+	      the copyright owner that is granting the License.
+
+	      "Legal Entity" shall mean the union of the acting entity and all
+	      other entities that control, are controlled by, or are under common
+	      control with that entity. For the purposes of this definition,
+	      "control" means (i) the power, direct or indirect, to cause the
+	      direction or management of such entity, whether by contract or
+	      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+	      outstanding shares, or (iii) beneficial ownership of such entity.
+
+	      "You" (or "Your") shall mean an individual or Legal Entity
+	      exercising permissions granted by this License.
+
+	      "Source" form shall mean the preferred form for making modifications,
+	      including but not limited to software source code, documentation
+	      source, and configuration files.
+
+	      "Object" form shall mean any form resulting from mechanical
+	      transformation or translation of a Source form, including but
+	      not limited to compiled object code, generated documentation,
+	      and conversions to other media types.
+
+	      "Work" shall mean the work of authorship, whether in Source or
+	      Object form, made available under the License, as indicated by a
+	      copyright notice that is included in or attached to the work
+	      (an example is provided in the Appendix below).
+
+	      "Derivative Works" shall mean any work, whether in Source or Object
+	      form, that is based on (or derived from) the Work and for which the
+	      editorial revisions, annotations, elaborations, or other modifications
+	      represent, as a whole, an original work of authorship. For the purposes
+	      of this License, Derivative Works shall not include works that remain
+	      separable from, or merely link (or bind by name) to the interfaces of,
+	      the Work and Derivative Works thereof.
+
+	      "Contribution" shall mean any work of authorship, including
+	      the original version of the Work and any modifications or additions
+	      to that Work or Derivative Works thereof, that is intentionally
+	      submitted to Licensor for inclusion in the Work by the copyright owner
+	      or by an individual or Legal Entity authorized to submit on behalf of
+	      the copyright owner. For the purposes of this definition, "submitted"
+	      means any form of electronic, verbal, or written communication sent
+	      to the Licensor or its representatives, including but not limited to
+	      communication on electronic mailing lists, source code control systems,
+	      and issue tracking systems that are managed by, or on behalf of, the
+	      Licensor for the purpose of discussing and improving the Work, but
+	      excluding communication that is conspicuously marked or otherwise
+	      designated in writing by the copyright owner as "Not a Contribution."
+
+	      "Contributor" shall mean Licensor and any individual or Legal Entity
+	      on behalf of whom a Contribution has been received by Licensor and
+	      subsequently incorporated within the Work.
+
+	   2. Grant of Copyright License. Subject to the terms and conditions of
+	      this License, each Contributor hereby grants to You a perpetual,
+	      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+	      copyright license to reproduce, prepare Derivative Works of,
+	      publicly display, publicly perform, sublicense, and distribute the
+	      Work and such Derivative Works in Source or Object form.
+
+	   3. Grant of Patent License. Subject to the terms and conditions of
+	      this License, each Contributor hereby grants to You a perpetual,
+	      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+	      (except as stated in this section) patent license to make, have made,
+	      use, offer to sell, sell, import, and otherwise transfer the Work,
+	      where such license applies only to those patent claims licensable
+	      by such Contributor that are necessarily infringed by their
+	      Contribution(s) alone or by combination of their Contribution(s)
+	      with the Work to which such Contribution(s) was submitted. If You
+	      institute patent litigation against any entity (including a
+	      cross-claim or counterclaim in a lawsuit) alleging that the Work
+	      or a Contribution incorporated within the Work constitutes direct
+	      or contributory patent infringement, then any patent licenses
+	      granted to You under this License for that Work shall terminate
+	      as of the date such litigation is filed.
+
+	   4. Redistribution. You may reproduce and distribute copies of the
+	      Work or Derivative Works thereof in any medium, with or without
+	      modifications, and in Source or Object form, provided that You
+	      meet the following conditions:
+
+	      (a) You must give any other recipients of the Work or
+	          Derivative Works a copy of this License; and
+
+	      (b) You must cause any modified files to carry prominent notices
+	          stating that You changed the files; and
+
+	      (c) You must retain, in the Source form of any Derivative Works
+	          that You distribute, all copyright, patent, trademark, and
+	          attribution notices from the Source form of the Work,
+	          excluding those notices that do not pertain to any part of
+	          the Derivative Works; and
+
+	      (d) If the Work includes a "NOTICE" text file as part of its
+	          distribution, then any Derivative Works that You distribute must
+	          include a readable copy of the attribution notices contained
+	          within such NOTICE file, excluding those notices that do not
+	          pertain to any part of the Derivative Works, in at least one
+	          of the following places: within a NOTICE text file distributed
+	          as part of the Derivative Works; within the Source form or
+	          documentation, if provided along with the Derivative Works; or,
+	          within a display generated by the Derivative Works, if and
+	          wherever such third-party notices normally appear. The contents
+	          of the NOTICE file are for informational purposes only and
+	          do not modify the License. You may add Your own attribution
+	          notices within Derivative Works that You distribute, alongside
+	          or as an addendum to the NOTICE text from the Work, provided
+	          that such additional attribution notices cannot be construed
+	          as modifying the License.
+
+	      You may add Your own copyright statement to Your modifications and
+	      may provide additional or different license terms and conditions
+	      for use, reproduction, or distribution of Your modifications, or
+	      for any such Derivative Works as a whole, provided Your use,
+	      reproduction, and distribution of the Work otherwise complies with
+	      the conditions stated in this License.
+
+	   5. Submission of Contributions. Unless You explicitly state otherwise,
+	      any Contribution intentionally submitted for inclusion in the Work
+	      by You to the Licensor shall be under the terms and conditions of
+	      this License, without any additional terms or conditions.
+	      Notwithstanding the above, nothing herein shall supersede or modify
+	      the terms of any separate license agreement you may have executed
+	      with Licensor regarding such Contributions.
+
+	   6. Trademarks. This License does not grant permission to use the trade
+	      names, trademarks, service marks, or product names of the Licensor,
+	      except as required for reasonable and customary use in describing the
+	      origin of the Work and reproducing the content of the NOTICE file.
+
+	   7. Disclaimer of Warranty. Unless required by applicable law or
+	      agreed to in writing, Licensor provides the Work (and each
+	      Contributor provides its Contributions) on an "AS IS" BASIS,
+	      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+	      implied, including, without limitation, any warranties or conditions
+	      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+	      PARTICULAR PURPOSE. You are solely responsible for determining the
+	      appropriateness of using or redistributing the Work and assume any
+	      risks associated with Your exercise of permissions under this License.
+
+	   8. Limitation of Liability. In no event and under no legal theory,
+	      whether in tort (including negligence), contract, or otherwise,
+	      unless required by applicable law (such as deliberate and grossly
+	      negligent acts) or agreed to in writing, shall any Contributor be
+	      liable to You for damages, including any direct, indirect, special,
+	      incidental, or consequential damages of any character arising as a
+	      result of this License or out of the use or inability to use the
+	      Work (including but not limited to damages for loss of goodwill,
+	      work stoppage, computer failure or malfunction, or any and all
+	      other commercial damages or losses), even if such Contributor
+	      has been advised of the possibility of such damages.
+
+	   9. Accepting Warranty or Additional Liability. While redistributing
+	      the Work or Derivative Works thereof, You may choose to offer,
+	      and charge a fee for, acceptance of support, warranty, indemnity,
+	      or other liability obligations and/or rights consistent with this
+	      License. However, in accepting such obligations, You may act only
+	      on Your own behalf and on Your sole responsibility, not on behalf
+	      of any other Contributor, and only if You agree to indemnify,
+	      defend, and hold each Contributor harmless for any liability
+	      incurred by, or claims asserted against, such Contributor by reason
+	      of your accepting any such warranty or additional liability.
+
+	   END OF TERMS AND CONDITIONS
+
+	   APPENDIX: How to apply the Apache License to your work.
+
+	      To apply the Apache License to your work, attach the following
+	      boilerplate notice, with the fields enclosed by brackets "[]"
+	      replaced with your own identifying information. (Don't include
+	      the brackets!)  The text should be enclosed in the appropriate
+	      comment syntax for the file format. We also recommend that a
+	      file or class name and description of purpose be included on the
+	      same "printed page" as the copyright notice for easier
+	      identification within third-party archives.
+
+	   Copyright [yyyy] [name of copyright owner]
+
+	   Licensed under the Apache License, Version 2.0 (the "License");
+	   you may not use this file except in compliance with the License.
+	   You may obtain a copy of the License at
+
+	       http://www.apache.org/licenses/LICENSE-2.0
+
+	   Unless required by applicable law or agreed to in writing, software
+	   distributed under the License is distributed on an "AS IS" BASIS,
+	   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	   See the License for the specific language governing permissions and
+	   limitations under the License.
+
+Additional license notices:
+
+	Sources		: 1) Manipulator and Manipulation in High Dimensional Spaces. Vikash Kumar, Ph.D. Thesis, CSE, Univ. of Washington. 2016.
+
+	Mujoco		:: Advanced physics simulation engine
+		Source		: www.roboti.us
+		Version		: 1.40
+		Released 	: 17Jan'17
+
+	Author		:: Vikash Kumar
+		Contacts 	: vikash@openai.com
+		Last edits 	: 3Apr'17
--- a/src/gym/envs/robotics/assets/fetch/pick_and_place.xml
+++ b/src/gym/envs/robotics/assets/fetch/pick_and_place.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
+	<option timestep="0.002">
+		<flag warmstart="enable"></flag>
+	</option>
+
+	<include file="shared.xml"></include>
+	
+	<worldbody>
+		<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
+		<body name="floor0" pos="0.8 0.75 0">
+			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
+		</body>
+
+		<include file="robot.xml"></include>
+		
+		<body pos="1.3 0.75 0.2" name="table0">
+			<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
+		</body>
+		
+		<body name="object0" pos="0.025 0.025 0.025">
+			<joint name="object0:joint" type="free" damping="0.01"></joint>
+			<geom size="0.025 0.025 0.025" type="box" condim="3" name="object0" material="block_mat" mass="2"></geom>
+			<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
+		</body>
+
+		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
+	</worldbody>
+
+	<actuator>
+		<position ctrllimited="true" ctrlrange="0 0.2" joint="robot0:l_gripper_finger_joint" kp="30000" name="robot0:l_gripper_finger_joint" user="1"></position>
+		<position ctrllimited="true" ctrlrange="0 0.2" joint="robot0:r_gripper_finger_joint" kp="30000" name="robot0:r_gripper_finger_joint" user="1"></position>
+	</actuator>
+</mujoco>
--- a/src/gym/envs/robotics/assets/fetch/push.xml
+++ b/src/gym/envs/robotics/assets/fetch/push.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
+	<option timestep="0.002">
+		<flag warmstart="enable"></flag>
+	</option>
+
+	<include file="shared.xml"></include>
+	
+	<worldbody>
+		<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.70 1" type="plane" condim="3" material="floor_mat"></geom>
+		<body name="floor0" pos="0.8 0.75 0">
+			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
+		</body>
+
+		<include file="robot.xml"></include>
+
+		<body pos="1.3 0.75 0.2" name="table0">
+			<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
+		</body>
+		
+		<body name="object0" pos="0.025 0.025 0.025">
+			<joint name="object0:joint" type="free" damping="0.01"></joint>
+			<geom size="0.025 0.025 0.025" type="box" condim="3" name="object0" material="block_mat" mass="2"></geom>
+			<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
+		</body>
+
+		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
+	</worldbody>
+	
+	<actuator></actuator>
+</mujoco>
--- a/src/gym/envs/robotics/assets/fetch/reach.xml
+++ b/src/gym/envs/robotics/assets/fetch/reach.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
+	<option timestep="0.002">
+		<flag warmstart="enable"></flag>
+	</option>
+
+	<include file="shared.xml"></include>
+	
+	<worldbody>
+		<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
+		<body name="floor0" pos="0.8 0.75 0">
+			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
+		</body>
+
+		<include file="robot.xml"></include>
+		
+		<body pos="1.3 0.75 0.2" name="table0">
+			<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
+		</body>
+
+		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
+	</worldbody>
+	
+	<actuator></actuator>
+</mujoco>
--- a/src/gym/envs/robotics/assets/fetch/robot.xml
+++ b/src/gym/envs/robotics/assets/fetch/robot.xml
@@ -0,0 +1,123 @@
+<mujoco>
+	<body mocap="true" name="robot0:mocap" pos="0 0 0">
+		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.7" size="0.005 0.005 0.005" type="box"></geom>
+		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="1 0.005 0.005" type="box"></geom>
+		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 1 0.001" type="box"></geom>
+		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 0.005 1" type="box"></geom>
+	</body>
+	<body childclass="robot0:fetch" name="robot0:base_link" pos="0.2869 0.2641 0">
+		<joint armature="0.0001" axis="1 0 0" damping="1e+11" name="robot0:slide0" pos="0 0 0" type="slide"></joint>
+		<joint armature="0.0001" axis="0 1 0" damping="1e+11" name="robot0:slide1" pos="0 0 0" type="slide"></joint>
+		<joint armature="0.0001" axis="0 0 1" damping="1e+11" name="robot0:slide2" pos="0 0 0" type="slide"></joint>
+		<inertial diaginertia="1.2869 1.2236 0.9868" mass="70.1294" pos="-0.0036 0 0.0014" quat="0.7605 -0.0133 -0.0061 0.6491"></inertial>
+		<geom mesh="robot0:base_link" name="robot0:base_link" material="robot0:base_mat" class="robot0:grey"></geom>
+		<body name="robot0:torso_lift_link" pos="-0.0869 0 0.3774">
+			<inertial diaginertia="0.3365 0.3354 0.0943" mass="10.7796" pos="-0.0013 -0.0009 0.2935" quat="0.9993 -0.0006 0.0336 0.0185"></inertial>
+			<joint axis="0 0 1" damping="1e+07" name="robot0:torso_lift_joint" range="0.0386 0.3861" type="slide"></joint>
+			<geom mesh="robot0:torso_lift_link" name="robot0:torso_lift_link" material="robot0:torso_mat"></geom>
+			<body name="robot0:head_pan_link" pos="0.0531 0 0.603">
+				<inertial diaginertia="0.0185 0.0128 0.0095" mass="2.2556" pos="0.0321 0.0161 0.039" quat="0.5148 0.5451 -0.453 0.4823"></inertial>
+				<joint axis="0 0 1" name="robot0:head_pan_joint" range="-1.57 1.57"></joint>
+				<geom mesh="robot0:head_pan_link" name="robot0:head_pan_link" material="robot0:head_mat" class="robot0:grey"></geom>
+				<body name="robot0:head_tilt_link" pos="0.1425 0 0.058">
+					<inertial diaginertia="0.0063 0.0059 0.0014" mass="0.9087" pos="0.0081 0.0025 0.0113" quat="0.6458 0.66 -0.274 0.2689"></inertial>
+					<joint axis="0 1 0" damping="1000" name="robot0:head_tilt_joint" range="-0.76 1.45" ref="0.06"></joint>
+					<geom mesh="robot0:head_tilt_link" name="robot0:head_tilt_link" material="robot0:head_mat" class="robot0:blue"></geom>
+					<body name="robot0:head_camera_link" pos="0.055 0 0.0225">
+						<inertial diaginertia="0 0 0" mass="0" pos="0.055 0 0.0225"></inertial>
+						<body name="robot0:head_camera_rgb_frame" pos="0 0.02 0">
+							<inertial diaginertia="0 0 0" mass="0" pos="0 0.02 0"></inertial>
+							<body name="robot0:head_camera_rgb_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
+								<inertial diaginertia="0 0 0" mass="0" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></inertial>
+								<camera euler="3.1415 0 0" fovy="50" name="head_camera_rgb" pos="0 0 0"></camera>
+							</body>
+						</body>
+						<body name="robot0:head_camera_depth_frame" pos="0 0.045 0">
+							<inertial diaginertia="0 0 0" mass="0" pos="0 0.045 0"></inertial>
+							<body name="robot0:head_camera_depth_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
+								<inertial diaginertia="0 0 0" mass="0" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></inertial>
+							</body>
+						</body>
+					</body>
+				</body>
+			</body>
+			<body name="robot0:shoulder_pan_link" pos="0.1195 0 0.3486">
+				<inertial diaginertia="0.009 0.0086 0.0041" mass="2.5587" pos="0.0927 -0.0056 0.0564" quat="-0.1364 0.7624 -0.1562 0.613"></inertial>
+				<joint axis="0 0 1" name="robot0:shoulder_pan_joint" range="-1.6056 1.6056"></joint>
+				<geom mesh="robot0:shoulder_pan_link" name="robot0:shoulder_pan_link" material="robot0:arm_mat"></geom>
+				<body name="robot0:shoulder_lift_link" pos="0.117 0 0.06">
+					<inertial diaginertia="0.0116 0.0112 0.0023" mass="2.6615" pos="0.1432 0.0072 -0.0001" quat="0.4382 0.4382 0.555 0.555"></inertial>
+					<joint axis="0 1 0" name="robot0:shoulder_lift_joint" range="-1.221 1.518"></joint>
+					<geom mesh="robot0:shoulder_lift_link" name="robot0:shoulder_lift_link" material="robot0:arm_mat" class="robot0:blue"></geom>
+					<body name="robot0:upperarm_roll_link" pos="0.219 0 0">
+						<inertial diaginertia="0.0047 0.0045 0.0019" mass="2.3311" pos="0.1165 0.0014 0" quat="-0.0136 0.707 0.0136 0.707"></inertial>
+						<joint axis="1 0 0" limited="false" name="robot0:upperarm_roll_joint"></joint>
+						<geom mesh="robot0:upperarm_roll_link" name="robot0:upperarm_roll_link" material="robot0:arm_mat"></geom>
+						<body name="robot0:elbow_flex_link" pos="0.133 0 0">
+							<inertial diaginertia="0.0086 0.0084 0.002" mass="2.1299" pos="0.1279 0.0073 0" quat="0.4332 0.4332 0.5589 0.5589"></inertial>
+							<joint axis="0 1 0" name="robot0:elbow_flex_joint" range="-2.251 2.251"></joint>
+							<geom mesh="robot0:elbow_flex_link" name="robot0:elbow_flex_link" material="robot0:arm_mat" class="robot0:blue"></geom>
+							<body name="robot0:forearm_roll_link" pos="0.197 0 0">
+								<inertial diaginertia="0.0035 0.0031 0.0015" mass="1.6563" pos="0.1097 -0.0266 0" quat="-0.0715 0.7035 0.0715 0.7035"></inertial>
+								<joint armature="2.7538" axis="1 0 0" damping="3.5247" frictionloss="0" limited="false" name="robot0:forearm_roll_joint" stiffness="10"></joint>
+								<geom mesh="robot0:forearm_roll_link" name="robot0:forearm_roll_link" material="robot0:arm_mat"></geom>
+								<body name="robot0:wrist_flex_link" pos="0.1245 0 0">
+									<inertial diaginertia="0.0042 0.0042 0.0018" mass="1.725" pos="0.0882 0.0009 -0.0001" quat="0.4895 0.4895 0.5103 0.5103"></inertial>
+									<joint axis="0 1 0" name="robot0:wrist_flex_joint" range="-2.16 2.16"></joint>
+									<geom mesh="robot0:wrist_flex_link" name="robot0:wrist_flex_link" material="robot0:arm_mat" class="robot0:blue"></geom>
+									<body name="robot0:wrist_roll_link" pos="0.1385 0 0">
+										<inertial diaginertia="0.0001 0.0001 0.0001" mass="0.1354" pos="0.0095 0.0004 -0.0002"></inertial>
+										<joint axis="1 0 0" limited="false" name="robot0:wrist_roll_joint"></joint>
+										<geom mesh="robot0:wrist_roll_link" name="robot0:wrist_roll_link" material="robot0:arm_mat"></geom>
+										<body euler="0 0 0" name="robot0:gripper_link" pos="0.1664 0 0">
+											<inertial diaginertia="0.0024 0.0019 0.0013" mass="1.5175" pos="-0.09 -0.0001 -0.0017" quat="0 0.7071 0 0.7071"></inertial>
+											<geom mesh="robot0:gripper_link" name="robot0:gripper_link" material="robot0:gripper_mat"></geom>
+											<body name="robot0:gipper_camera_link" pos="0.055 0 0.0225">
+												<body name="robot0:gripper_camera_rgb_frame" pos="0 0.02 0">
+													<body name="robot0:gripper_camera_rgb_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
+														<camera euler="3.1415 0 0" fovy="50" name="gripper_camera_rgb" pos="0 0 0"></camera>
+													</body>
+												</body>
+												<body name="robot0:gripper_camera_depth_frame" pos="0 0.045 0">
+													<body name="robot0:gripper_camera_depth_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></body>
+												</body>
+											</body>
+
+											<body childclass="robot0:fetchGripper" name="robot0:r_gripper_finger_link" pos="0 0.0159 0">
+												<inertial diaginertia="0.1 0.1 0.1" mass="4" pos="-0.01 0 0"></inertial>
+												<joint axis="0 1 0" name="robot0:r_gripper_finger_joint" range="0 0.05"></joint>
+												<geom pos="0 -0.008 0" size="0.0385 0.007 0.0135" type="box" name="robot0:r_gripper_finger_link" material="robot0:gripper_finger_mat" condim="4" friction="1 0.05 0.01"></geom>
+											</body>
+											<body childclass="robot0:fetchGripper" name="robot0:l_gripper_finger_link" pos="0 -0.0159 0">
+												<inertial diaginertia="0.1 0.1 0.1" mass="4" pos="-0.01 0 0"></inertial>
+												<joint axis="0 -1 0" name="robot0:l_gripper_finger_joint" range="0 0.05"></joint>
+												<geom pos="0 0.008 0" size="0.0385 0.007 0.0135" type="box" name="robot0:l_gripper_finger_link" material="robot0:gripper_finger_mat" condim="4" friction="1 0.05 0.01"></geom>
+											</body>
+											<site name="robot0:grip" pos="0.02 0 0" rgba="0 0 0 0" size="0.02 0.02 0.02"></site>
+										</body>
+									</body>
+								</body>
+							</body>
+						</body>
+					</body>
+				</body>
+			</body>
+		</body>
+		<body name="robot0:estop_link" pos="-0.1246 0.2389 0.3113" quat="0.7071 0.7071 0 0">
+			<inertial diaginertia="0 0 0" mass="0.002" pos="0.0024 -0.0033 0.0067" quat="0.3774 -0.1814 0.1375 0.8977"></inertial>
+			<geom mesh="robot0:estop_link" rgba="0.8 0 0 1" name="robot0:estop_link"></geom>
+		</body>
+		<body name="robot0:laser_link" pos="0.235 0 0.2878" quat="0 1 0 0">
+			<inertial diaginertia="0 0 0" mass="0.0083" pos="-0.0306 0.0007 0.0552" quat="0.5878 0.5378 -0.4578 0.3945"></inertial>
+			<geom mesh="robot0:laser_link" rgba="0.7922 0.8196 0.9333 1" name="robot0:laser_link"></geom>
+			<camera euler="1.55 -1.55 3.14" fovy="25" name="lidar" pos="0 0 0.02"></camera>
+		</body>
+		<body name="robot0:torso_fixed_link" pos="-0.0869 0 0.3774">
+			<inertial diaginertia="0.3865 0.3394 0.1009" mass="13.2775" pos="-0.0722 0.0057 0.2656" quat="0.9995 0.0249 0.0177 0.011"></inertial>
+			<geom mesh="robot0:torso_fixed_link" name="robot0:torso_fixed_link" class="robot0:blue"></geom>
+		</body>
+		<body name="robot0:external_camera_body_0" pos="0 0 0">
+			<camera euler="0 0.75 1.57" fovy="43.3" name="external_camera_0" pos="1.3 0 1.2"></camera>
+		</body>
+	</body>
+</mujoco>
--- a/src/gym/envs/robotics/assets/fetch/shared.xml
+++ b/src/gym/envs/robotics/assets/fetch/shared.xml
@@ -0,0 +1,66 @@
+<mujoco>
+    <asset>
+        <texture type="skybox" builtin="gradient" rgb1="0.44 0.85 0.56" rgb2="0.46 0.87 0.58" width="32" height="32"></texture>
+        <texture name="texture_block" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
+
+        <material name="floor_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
+        <material name="table_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.93 0.93 0.93 1"></material>
+        <material name="block_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
+        <material name="puck_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
+        <material name="robot0:geomMat" shininess="0.03" specular="0.4"></material>
+        <material name="robot0:gripper_finger_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
+        <material name="robot0:gripper_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
+        <material name="robot0:arm_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
+        <material name="robot0:head_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
+        <material name="robot0:torso_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
+        <material name="robot0:base_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
+        
+        <mesh file="base_link_collision.stl" name="robot0:base_link"></mesh>
+        <mesh file="bellows_link_collision.stl" name="robot0:bellows_link"></mesh>
+        <mesh file="elbow_flex_link_collision.stl" name="robot0:elbow_flex_link"></mesh>
+        <mesh file="estop_link.stl" name="robot0:estop_link"></mesh>
+        <mesh file="forearm_roll_link_collision.stl" name="robot0:forearm_roll_link"></mesh>
+        <mesh file="gripper_link.stl" name="robot0:gripper_link"></mesh>
+        <mesh file="head_pan_link_collision.stl" name="robot0:head_pan_link"></mesh>
+        <mesh file="head_tilt_link_collision.stl" name="robot0:head_tilt_link"></mesh>
+        <mesh file="l_wheel_link_collision.stl" name="robot0:l_wheel_link"></mesh>
+        <mesh file="laser_link.stl" name="robot0:laser_link"></mesh>
+        <mesh file="r_wheel_link_collision.stl" name="robot0:r_wheel_link"></mesh>
+        <mesh file="torso_lift_link_collision.stl" name="robot0:torso_lift_link"></mesh>
+        <mesh file="shoulder_pan_link_collision.stl" name="robot0:shoulder_pan_link"></mesh>
+        <mesh file="shoulder_lift_link_collision.stl" name="robot0:shoulder_lift_link"></mesh>
+        <mesh file="upperarm_roll_link_collision.stl" name="robot0:upperarm_roll_link"></mesh>
+        <mesh file="wrist_flex_link_collision.stl" name="robot0:wrist_flex_link"></mesh>
+        <mesh file="wrist_roll_link_collision.stl" name="robot0:wrist_roll_link"></mesh>
+        <mesh file="torso_fixed_link.stl" name="robot0:torso_fixed_link"></mesh>
+    </asset>
+
+    <equality>
+        <weld body1="robot0:mocap" body2="robot0:gripper_link" solimp="0.9 0.95 0.001" solref="0.02 1"></weld>
+    </equality>
+    
+    <contact>
+        <exclude body1="robot0:r_gripper_finger_link" body2="robot0:l_gripper_finger_link"></exclude>
+        <exclude body1="robot0:torso_lift_link" body2="robot0:torso_fixed_link"></exclude>
+        <exclude body1="robot0:torso_lift_link" body2="robot0:shoulder_pan_link"></exclude>
+    </contact>
+    
+    <default>
+        <default class="robot0:fetch">
+            <geom margin="0.001" material="robot0:geomMat" rgba="1 1 1 1" solimp="0.99 0.99 0.01" solref="0.01 1" type="mesh" user="0"></geom>
+            <joint armature="1" damping="50" frictionloss="0" stiffness="0"></joint>
+            
+            <default class="robot0:fetchGripper">
+                <geom condim="4" margin="0.001" type="box" user="0" rgba="0.356 0.361 0.376 1.0"></geom>
+                <joint armature="100" damping="1000" limited="true" solimplimit="0.99 0.999 0.01" solreflimit="0.01 1" type="slide"></joint>
+            </default>
+
+            <default class="robot0:grey">
+                <geom rgba="0.356 0.361 0.376 1.0"></geom>
+            </default>
+            <default class="robot0:blue">
+                <geom rgba="0.086 0.506 0.767 1.0"></geom>
+            </default>
+        </default>
+    </default>
+</mujoco>
--- a/src/gym/envs/robotics/assets/fetch/slide.xml
+++ b/src/gym/envs/robotics/assets/fetch/slide.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
+	<option timestep="0.002">
+		<flag warmstart="enable"></flag>
+	</option>
+
+	<include file="shared.xml"></include>
+	
+	<worldbody>
+		<geom name="floor0" pos="1 0.75 0" size="1.05 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
+		<body name="floor0" pos="1 0.75 0">
+			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
+		</body>
+
+		<include file="robot.xml"></include>
+		
+		<body name="table0" pos="1.32441906 0.75018422 0.2">
+			<geom size="0.625 0.45 0.2" type="box" condim="3" name="table0" material="table_mat" mass="2000" friction="0.1 0.005 0.0001"></geom>
+		</body>
+
+		<body name="object0" pos="0.025 0.025 0.02">
+			<joint name="object0:joint" type="free" damping="0.01"></joint>
+			<geom size="0.025 0.02" type="cylinder" condim="3" name="object0" material="puck_mat" friction="0.1 0.005 0.0001" mass="2"></geom>
+			<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
+		</body>
+
+		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
+	</worldbody>
+
+	<actuator></actuator>
+</mujoco>
--- a/src/gym/envs/robotics/assets/hand/manipulate_block.xml
+++ b/src/gym/envs/robotics/assets/hand/manipulate_block.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+    <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
+    <option timestep="0.002" iterations="20" apirate="200">
+        <flag warmstart="enable"></flag>
+    </option>
+
+    <include file="shared.xml"></include>
+
+    <asset>
+        <include file="shared_asset.xml"></include>
+
+        <texture name="texture:object" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
+        <texture name="texture:hidden" file="block_hidden.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
+
+        <material name="material:object" texture="texture:object" specular="1" shininess="0.3" reflectance="0"></material>
+        <material name="material:hidden" texture="texture:hidden" specular="1" shininess="0.3" reflectance="0"></material>
+        <material name="material:target" texture="texture:object" specular="1" shininess="0.3" reflectance="0" rgba="1 1 1 0.5"></material>
+    </asset>
+
+    <worldbody>
+        <geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
+        <body name="floor0" pos="1 1 0"></body>
+
+        <include file="robot.xml"></include>
+
+        <body name="object" pos="1 0.87 0.2">
+            <geom name="object" type="box" size="0.025 0.025 0.025" material="material:object" condim="4" density="567"></geom>
+            <geom name="object_hidden" type="box" size="0.024 0.024 0.024" material="material:hidden" condim="4" contype="0" conaffinity="0" mass="0"></geom>
+            <site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
+            <joint name="object:joint" type="free" damping="0.01"></joint>
+        </body>
+        <body name="target" pos="1 0.87 0.2">
+            <geom name="target" type="box" size="0.025 0.025 0.025" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
+            <site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
+            <joint name="target:joint" type="free" damping="0.01"></joint>
+        </body>
+        
+        <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
+    </worldbody>
+</mujoco>
--- a/src/gym/envs/robotics/assets/hand/manipulate_egg.xml
+++ b/src/gym/envs/robotics/assets/hand/manipulate_egg.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+    <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
+    <option timestep="0.002" iterations="20" apirate="200">
+        <flag warmstart="enable"></flag>
+    </option>
+
+    <include file="shared.xml"></include>
+
+    <asset>
+        <include file="shared_asset.xml"></include>
+
+        <texture name="texture:object" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
+        <texture name="texture:hidden" file="block_hidden.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
+        <material name="material:object" texture="texture:object" specular="1" shininess="0.3" reflectance="0"></material>
+        <material name="material:hidden" texture="texture:hidden" specular="1" shininess="0.3" reflectance="0"></material>
+        <material name="material:target" texture="texture:object" specular="1" shininess="0.3" reflectance="0" rgba="1 1 1 0.5"></material>
+    </asset>
+
+    <worldbody>
+        <geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
+        <body name="floor0" pos="1 1 0"></body>
+
+        <include file="robot.xml"></include>
+
+        <body name="object" pos="1 0.87 0.2">
+            <geom name="object" type="ellipsoid" size="0.03 0.03 0.04" material="material:object" condim="4"></geom>
+            <geom name="object_hidden" type="ellipsoid" size="0.029 0.029 0.03" material="material:hidden" condim="4" contype="0" conaffinity="0" mass="0"></geom>
+            <site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
+            <joint name="object:joint" type="free" damping="0.01"></joint>
+        </body>
+        <body name="target" pos="1 0.87 0.2">
+            <geom name="target" type="ellipsoid" size="0.03 0.03 0.04" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
+            <site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
+            <joint name="target:joint" type="free" damping="0.01"></joint>
+        </body>
+        
+        <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
+    </worldbody>
+</mujoco>
--- a/src/gym/envs/robotics/assets/hand/manipulate_pen.xml
+++ b/src/gym/envs/robotics/assets/hand/manipulate_pen.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+    <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
+    <option timestep="0.002" iterations="20" apirate="200">
+        <flag warmstart="enable"></flag>
+    </option>
+
+    <include file="shared.xml"></include>
+
+    <asset>
+        <include file="shared_asset.xml"></include>
+
+        <material name="material:object" specular="0" shininess="0.5" reflectance="0.0" rgba="0.46 0.81 0.88 1.0"></material>
+        <material name="material:target" specular="0" shininess="0.5" reflectance="0.0" rgba="0.46 0.81 0.88 0.5"></material>
+    </asset>
+
+    <worldbody>
+        <geom name="floor0" pos="1 1 -0.2" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
+        <body name="floor0" pos="1 1 0"></body>
+
+        <include file="robot.xml"></include>
+
+        <body name="object" pos="1 0.87 0.2" euler="-1 1 0">
+            <geom name="object" type="capsule" size="0.008 0.1" material="material:object" condim="4"></geom>
+            <site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
+            <site name="object:top" pos="0 0 0.1" rgba="1 0 0 1" size="0.0081"></site>
+            <site name="object:bottom" pos="0 0 -0.1" rgba="0 1 0 1" size="0.0081"></site>
+            <joint name="object:joint" type="free" damping="0.01"></joint>
+        </body>
+        <body name="target" pos="1 0.87 0.2" euler="-1 1 0">
+            <geom name="target" type="capsule" size="0.008 0.1" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
+            <site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
+            <site name="target:top" pos="0 0 0.1" rgba="1 0 0 0.5" size="0.0081"></site>
+            <site name="target:bottom" pos="0 0 -0.1" rgba="0 1 0 0.5" size="0.0081"></site>
+            <joint name="target:joint" type="free" damping="0.01"></joint>
+        </body>
+        
+        <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
+    </worldbody>
+</mujoco>
--- a/src/gym/envs/robotics/assets/hand/reach.xml
+++ b/src/gym/envs/robotics/assets/hand/reach.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+    <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
+    <option timestep="0.002" iterations="20" apirate="200">
+        <flag warmstart="enable"></flag>
+    </option>
+
+    <include file="shared.xml"></include>
+
+    <asset>
+        <include file="shared_asset.xml"></include>
+    </asset>
+
+    <worldbody>
+        <geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
+        <body name="floor0" pos="1 1 0">
+            <site name="target0" pos="0 0 0" size="0.005" rgba="1 0 0 1" type="sphere"></site>
+            <site name="target1" pos="0 0 0" size="0.005" rgba="0 1 0 1" type="sphere"></site>
+            <site name="target2" pos="0 0 0" size="0.005" rgba="0 0 1 1" type="sphere"></site>
+            <site name="target3" pos="0 0 0" size="0.005" rgba="1 1 0 1" type="sphere"></site>
+            <site name="target4" pos="0 0 0" size="0.005" rgba="1 0 1 1" type="sphere"></site>
+
+            <site name="finger0" pos="0 0 0" size="0.01" rgba="1 0 0 0.2" type="sphere"></site>
+            <site name="finger1" pos="0 0 0" size="0.01" rgba="0 1 0 0.2" type="sphere"></site>
+            <site name="finger2" pos="0 0 0" size="0.01" rgba="0 0 1 0.2" type="sphere"></site>
+            <site name="finger3" pos="0 0 0" size="0.01" rgba="1 1 0 0.2" type="sphere"></site>
+            <site name="finger4" pos="0 0 0" size="0.01" rgba="1 0 1 0.2" type="sphere"></site>
+        </body>
+
+        <include file="robot.xml"></include>
+        
+        <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
+    </worldbody>
+</mujoco>
--- a/src/gym/envs/robotics/assets/hand/robot.xml
+++ b/src/gym/envs/robotics/assets/hand/robot.xml
@@ -0,0 +1,160 @@
+<!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
+<mujoco>
+    <body name="robot0:hand mount" pos="1 1.25 0.15" euler="1.5708 0 3.14159">
+        <inertial mass="0.1" pos="0 0 0" diaginertia="0.001 0.001 0.001"></inertial>
+        <body childclass="robot0:asset_class" name="robot0:forearm" pos="0 0.01 0" euler="0 0 0">
+            <inertial pos="0.001 -0.002 0.29" quat="0.982 -0.016 0 -0.188" mass="4" diaginertia="0.01 0.01 0.0075"></inertial>
+            <geom class="robot0:D_Vizual" pos="0 0.01 0.04" name="robot0:V_forearm" mesh="robot0:forearm" euler="0 0 1.57"></geom>
+            <geom class="robot0:DC_Hand" name="robot0:C_forearm" type="mesh" mesh="robot0:forearm_cvx" pos="0 0.01 0.04" euler="0 0 1.57" rgba="0.4 0.5 0.6 0.7"></geom>
+            <body name="robot0:wrist" pos="0 0 0.256">
+                <inertial pos="0.003 0 0.016" quat="0.504 0.496 0.495 0.504" mass="0.3" diaginertia="0.001 0.001 0.001"></inertial>
+                <joint name="robot0:WRJ1" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.489 0.14" damping="0.5" armature="0.005" user="1123"></joint>
+                <geom class="robot0:D_Vizual" name="robot0:V_wrist" mesh="robot0:wrist"></geom>
+                <geom class="robot0:DC_Hand" name="robot0:C_wrist" type="capsule" pos="0 0 0" quat="0.707 0.707 0 0" size="0.015 0.01" rgba="0.4 0.5 0.6 0.1"></geom>
+                <body name="robot0:palm" pos="0 0 0.034">
+                    <inertial pos="0.006 0 0.036" quat="0.716 0.044 0.075 0.693" mass="0.3" diaginertia="0.001 0.001 0.001"></inertial>
+                    <joint name="robot0:WRJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="-0.698 0.489" damping="0.5" armature="0.005" user="1122"></joint>
+                    <geom class="robot0:D_Vizual" name="robot0:V_palm" mesh="robot0:palm"></geom>
+                    <geom class="robot0:DC_Hand" name="robot0:C_palm0" type="box" pos="0.011 0 0.038" size="0.032 0.0111 0.049" rgba="0.4 0.5 0.6 0.1"></geom>
+                    <geom class="robot0:DC_Hand" name="robot0:C_palm1" type="box" pos="-0.032 0 0.014" size="0.011 0.0111 0.025" rgba="0.4 0.5 0.6 0.1"></geom>
+                    <body name="robot0:ffknuckle" pos="0.033 0 0.095">
+                        <inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                        <joint name="robot0:FFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1103"></joint>
+                        <geom class="robot0:D_Vizual" name="robot0:V_ffknuckle" mesh="robot0:knuckle"></geom>
+                        <body name="robot0:ffproximal" pos="0 0 0">
+                            <inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                            <joint name="robot0:FFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1102"></joint>
+                            <geom class="robot0:D_Vizual" name="robot0:V_ffproximal" mesh="robot0:F3"></geom>
+                            <geom class="robot0:DC_Hand" name="robot0:C_ffproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
+                            <body name="robot0:ffmiddle" pos="0 0 0.045">
+                                <inertial pos="0 0 0.011" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                <joint name="robot0:FFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1101"></joint>
+                                <geom class="robot0:D_Vizual" name="robot0:V_ffmiddle" mesh="robot0:F2"></geom>
+                                <geom class="robot0:DC_Hand" name="robot0:C_ffmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
+                                <body name="robot0:ffdistal" pos="0 0 0.025">
+                                    <inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                    <joint name="robot0:FFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1100"></joint>
+                                    <geom class="robot0:D_Vizual" name="robot0:V_ffdistal" pos="0 0 0.001" mesh="robot0:F1"></geom>
+                                    <geom class="robot0:DC_Hand" name="robot0:C_ffdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
+                                    <site name="robot0:S_fftip" pos="0 0 0.026" group="3"></site>
+                                    <site class="robot0:D_Touch" name="robot0:Tch_fftip"></site>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                    <body name="robot0:mfknuckle" pos="0.011 0 0.099">
+                        <inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                        <joint name="robot0:MFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1107"></joint>
+                        <geom class="robot0:D_Vizual" name="robot0:V_mfknuckle" mesh="robot0:knuckle"></geom>
+                        <body name="robot0:mfproximal" pos="0 0 0">
+                            <inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                            <joint name="robot0:MFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1106"></joint>
+                            <geom class="robot0:D_Vizual" name="robot0:V_mfproximal" mesh="robot0:F3"></geom>
+                            <geom class="robot0:DC_Hand" name="robot0:C_mfproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
+                            <body name="robot0:mfmiddle" pos="0 0 0.045">
+                                <inertial pos="0 0 0.012" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                <joint name="robot0:MFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1105"></joint>
+                                <geom class="robot0:D_Vizual" name="robot0:V_mfmiddle" mesh="robot0:F2"></geom>
+                                <geom class="robot0:DC_Hand" name="robot0:C_mfmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
+                                <body name="robot0:mfdistal" pos="0 0 0.025">
+                                    <inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                    <joint name="robot0:MFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1104"></joint>
+                                    <geom class="robot0:D_Vizual" name="robot0:V_mfdistal" mesh="robot0:F1"></geom>
+                                    <geom class="robot0:DC_Hand" name="robot0:C_mfdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
+                                    <site name="robot0:S_mftip" pos="0 0 0.026" group="3"></site>
+                                    <site class="robot0:D_Touch" name="robot0:Tch_mftip"></site>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                    <body name="robot0:rfknuckle" pos="-0.011 0 0.095">
+                        <inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                        <joint name="robot0:RFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1111"></joint>
+                        <geom class="robot0:D_Vizual" name="robot0:V_rfknuckle" mesh="robot0:knuckle"></geom>
+                        <body name="robot0:rfproximal" pos="0 0 0">
+                            <inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                            <joint name="robot0:RFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1110"></joint>
+                            <geom class="robot0:D_Vizual" name="robot0:V_rfproximal" mesh="robot0:F3"></geom>
+                            <geom class="robot0:DC_Hand" name="robot0:C_rfproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
+                            <body name="robot0:rfmiddle" pos="0 0 0.045">
+                                <inertial pos="0 0 0.012" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                <joint name="robot0:RFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1109"></joint>
+                                <geom class="robot0:D_Vizual" name="robot0:V_rfmiddle" mesh="robot0:F2"></geom>
+                                <geom class="robot0:DC_Hand" name="robot0:C_rfmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
+                                <body name="robot0:rfdistal" pos="0 0 0.025">
+                                    <inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                    <joint name="robot0:RFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1108"></joint>
+                                    <geom class="robot0:D_Vizual" name="robot0:V_rfdistal" mesh="robot0:F1" pos="0 0 0.001"></geom>
+                                    <geom class="robot0:DC_Hand" name="robot0:C_rfdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
+                                    <site name="robot0:S_rftip" pos="0 0 0.026" group="3"></site>
+                                    <site class="robot0:D_Touch" name="robot0:Tch_rftip"></site>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                    <body name="robot0:lfmetacarpal" pos="-0.017 0 0.044">
+                        <inertial pos="-0.014 0.001 0.014" quat="0.709 -0.092 -0.063 0.696" mass="0.075" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                        <joint name="robot0:LFJ4" type="hinge" pos="0 0 0" axis="0.571 0 0.821" range="0 0.785" user="1116"></joint>
+                        <geom class="robot0:D_Vizual" name="robot0:V_lfmetacarpal" pos="-0.016 0 -0.023" mesh="robot0:lfmetacarpal"></geom>
+                        <geom class="robot0:DC_Hand" name="robot0:C_lfmetacarpal" type="box" pos="-0.0165 0 0.01" size="0.0095 0.0111 0.025" rgba="0.4 0.5 0.6 0.2"></geom>
+                        <body name="robot0:lfknuckle" pos="-0.017 0 0.044">
+                            <inertial pos="0 0 0" quat="0.52 0.854 0.006 -0.003" mass="0.008" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                            <joint name="robot0:LFJ3" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.349 0.349" user="1115"></joint>
+                            <geom class="robot0:D_Vizual" name="robot0:V_lfknuckle" mesh="robot0:knuckle"></geom>
+                            <body name="robot0:lfproximal" pos="0 0 0">
+                                <inertial pos="0 0 0.023" quat="0.707 -0.004 0.004 0.707" mass="0.014" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                <joint name="robot0:LFJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1114"></joint>
+                                <geom class="robot0:D_Vizual" name="robot0:V_lfproximal" mesh="robot0:F3"></geom>
+                                <geom class="robot0:DC_Hand" name="robot0:C_lfproximal" type="capsule" pos="0 0 0.0225" size="0.01 0.0225"></geom>
+                                <body name="robot0:lfmiddle" pos="0 0 0.045">
+                                    <inertial pos="0 0 0.012" quat="0.707 0 0 0.707" mass="0.012" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                    <joint name="robot0:LFJ1" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1113"></joint>
+                                    <geom class="robot0:D_Vizual" name="robot0:V_lfmiddle" mesh="robot0:F2"></geom>
+                                    <geom class="robot0:DC_Hand" name="robot0:C_lfmiddle" type="capsule" pos="0 0 0.0125" size="0.00805 0.0125"></geom>
+                                    <body name="robot0:lfdistal" pos="0 0 0.025">
+                                        <inertial pos="0 0 0.015" quat="0.707 -0.003 0.003 0.707" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                        <joint name="robot0:LFJ0" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.571" user="1112"></joint>
+                                        <geom class="robot0:D_Vizual" name="robot0:V_lfdistal" mesh="robot0:F1" pos="0 0 0.001"></geom>
+                                        <geom class="robot0:DC_Hand" name="robot0:C_lfdistal" type="capsule" pos="0 0 0.012" size="0.00705 0.012" condim="4"></geom>
+                                        <site name="robot0:S_lftip" pos="0 0 0.026" group="3"></site>
+                                        <site class="robot0:D_Touch" name="robot0:Tch_lftip"></site>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                    <body name="robot0:thbase" pos="0.034 -0.009 0.029" axisangle="0 1 0 0.785">
+                        <inertial pos="0 0 0" mass="0.01" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                        <joint name="robot0:THJ4" type="hinge" pos="0 0 0" axis="0 0 -1" range="-1.047 1.047" user="1121"></joint>
+                        <geom name="robot0:V_thbase" type="box" group="1" pos="0 0 0" size="0.001 0.001 0.001"></geom>
+                        <body name="robot0:thproximal" pos="0 0 0">
+                            <inertial pos="0 0 0.017" quat="0.982 0 0.001 0.191" mass="0.016" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                            <joint name="robot0:THJ3" type="hinge" pos="0 0 0" axis="1 0 0" range="0 1.222" user="1120"></joint>
+                            <geom class="robot0:D_Vizual" name="robot0:V_thproximal" mesh="robot0:TH3_z"></geom>
+                            <geom class="robot0:DC_Hand" name="robot0:C_thproximal" type="capsule" pos="0 0 0.019" size="0.013 0.019" rgba="0.4 0.5 0.6 0.1"></geom>
+                            <body name="robot0:thhub" pos="0 0 0.038">
+                                <inertial pos="0 0 0" mass="0.002" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                <joint name="robot0:THJ2" type="hinge" pos="0 0 0" axis="1 0 0" range="-0.209 0.209" user="1119"></joint>
+                                <geom name="robot0:V_thhub" type="box" group="1" pos="0 0 0" size="0.001 0.001 0.001"></geom>
+                                <body name="robot0:thmiddle" pos="0 0 0">
+                                    <inertial pos="0 0 0.016" quat="1 -0.001 -0.007 0.003" mass="0.016" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                    <joint name="robot0:THJ1" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.524 0.524" user="1118"></joint>
+                                    <geom class="robot0:D_Vizual" name="robot0:V_thmiddle" mesh="robot0:TH2_z"></geom>
+                                    <geom class="robot0:DC_Hand" name="robot0:C_thmiddle" type="capsule" pos="0 0 0.016" size="0.011 0.016"></geom>
+                                    <body name="robot0:thdistal" pos="0 0 0.032">
+                                        <inertial pos="0 0 0.016" quat="0.999 -0.005 -0.047 0.005" mass="0.016" diaginertia="1e-05 1e-05 1e-05"></inertial>
+                                        <joint name="robot0:THJ0" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.571 0" user="1117"></joint>
+                                        <geom class="robot0:D_Vizual" name="robot0:V_thdistal" mesh="robot0:TH1_z"></geom>
+                                        <geom class="robot0:DC_Hand" name="robot0:C_thdistal" type="capsule" pos="0 0 0.013" size="0.00918 0.013" condim="4"></geom>
+                                        <site name="robot0:S_thtip" pos="0 0 0.0275" group="3"></site>
+                                        <site class="robot0:D_Touch" name="robot0:Tch_thtip" size="0.005 0.011 0.016" pos="-0.005 0 0.02"></site>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+    </body>
+</mujoco>
--- a/src/gym/envs/robotics/assets/hand/shared.xml
+++ b/src/gym/envs/robotics/assets/hand/shared.xml
@@ -0,0 +1,254 @@
+<!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
+<mujoco>
+    <size njmax="500" nconmax="100" nuser_jnt="1" nuser_site="1" nuser_tendon="1" nuser_sensor="1" nuser_actuator="16" nstack="600000"></size>
+
+    <visual>
+        <map fogstart="3" fogend="5" force="0.1"></map>
+        <quality shadowsize="4096"></quality>
+    </visual>
+
+    <default>
+        <default class="robot0:asset_class">
+            <geom friction="1 0.005 0.001" condim="3" margin="0.0005" contype="1" conaffinity="1"></geom>
+            <joint limited="true" damping="0.1" armature="0.001" margin="0.01" frictionloss="0.001"></joint>
+            <site size="0.005" rgba="0.4 0.9 0.4 1"></site>
+            <general ctrllimited="true" forcelimited="true"></general>
+        </default>
+        <default class="robot0:D_Touch">
+            <site type="box" size="0.009 0.004 0.013" pos="0 -0.004 0.018" rgba="0.8 0.8 0.8 0.15" group="4"></site>
+        </default>
+        <default class="robot0:DC_Hand">
+            <geom material="robot0:MatColl" contype="1" conaffinity="0" group="4"></geom>
+        </default>
+        <default class="robot0:D_Vizual">
+            <geom material="robot0:MatViz" contype="0" conaffinity="0" group="1" type="mesh"></geom>
+        </default>
+        <default class="robot0:free">
+            <joint type="free" damping="0" armature="0" limited="false"></joint>
+        </default>
+    </default>
+
+    <contact>
+        <pair geom1="robot0:C_ffdistal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_ffmiddle" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_ffproximal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_mfproximal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_mfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_rfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_palm0" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_mfdistal" geom2="robot0:C_ffdistal" condim="1"></pair>
+        <pair geom1="robot0:C_rfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
+        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
+        <pair geom1="robot0:C_mfproximal" geom2="robot0:C_ffproximal" condim="1"></pair>
+        <pair geom1="robot0:C_rfproximal" geom2="robot0:C_mfproximal" condim="1"></pair>
+        <pair geom1="robot0:C_lfproximal" geom2="robot0:C_rfproximal" condim="1"></pair>
+        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
+        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
+        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfmiddle" condim="1"></pair>
+        <pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfdistal" condim="1"></pair>
+        <pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfmiddle" condim="1"></pair>
+    </contact>
+
+    <tendon>
+        <fixed name="robot0:T_WRJ1r" limited="true" range="-0.032 0.032" user="1236">
+            <joint joint="robot0:WRJ1" coef="0.0325"></joint>
+        </fixed>
+        <fixed name="robot0:T_WRJ1l" limited="true" range="-0.032 0.032" user="1237">
+            <joint joint="robot0:WRJ1" coef="-0.0325"></joint>
+        </fixed>
+        <fixed name="robot0:T_WRJ0u" limited="true" range="-0.032 0.032" user="1236">
+            <joint joint="robot0:WRJ0" coef="0.0175"></joint>
+        </fixed>
+        <fixed name="robot0:T_WRJ0d" limited="true" range="-0.032 0.032" user="1237">
+            <joint joint="robot0:WRJ0" coef="-0.0175"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ3r" limited="true" range="-0.018 0.018" user="1204">
+            <joint joint="robot0:FFJ3" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ3l" limited="true" range="-0.018 0.018" user="1205">
+            <joint joint="robot0:FFJ3" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ2u" limited="true" range="-0.007 0.03" user="1202">
+            <joint joint="robot0:FFJ2" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ2d" limited="true" range="-0.03 0.007" user="1203">
+            <joint joint="robot0:FFJ2" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ1c" limited="true" range="-0.001 0.001">
+            <joint joint="robot0:FFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:FFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ1u" limited="true" range="-0.007 0.03" user="1200">
+            <joint joint="robot0:FFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:FFJ1" coef="0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ1d" limited="true" range="-0.03 0.007" user="1201">
+            <joint joint="robot0:FFJ0" coef="-0.00705"></joint>
+            <joint joint="robot0:FFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ3r" limited="true" range="-0.018 0.018" user="1210">
+            <joint joint="robot0:MFJ3" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ3l" limited="true" range="-0.018 0.018" user="1211">
+            <joint joint="robot0:MFJ3" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ2u" limited="true" range="-0.007 0.03" user="1208">
+            <joint joint="robot0:MFJ2" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ2d" limited="true" range="-0.03 0.007" user="1209">
+            <joint joint="robot0:MFJ2" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ1c" limited="true" range="-0.001 0.001">
+            <joint joint="robot0:MFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:MFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ1u" limited="true" range="-0.007 0.03" user="1206">
+            <joint joint="robot0:MFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:MFJ1" coef="0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ1d" limited="true" range="-0.03 0.007" user="1207">
+            <joint joint="robot0:MFJ0" coef="-0.00705"></joint>
+            <joint joint="robot0:MFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ3r" limited="true" range="-0.018 0.018" user="1216">
+            <joint joint="robot0:RFJ3" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ3l" limited="true" range="-0.018 0.018" user="1217">
+            <joint joint="robot0:RFJ3" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ2u" limited="true" range="-0.007 0.03" user="1214">
+            <joint joint="robot0:RFJ2" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ2d" limited="true" range="-0.03 0.007" user="1215">
+            <joint joint="robot0:RFJ2" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ1c" limited="true" range="-0.001 0.001">
+            <joint joint="robot0:RFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:RFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ1u" limited="true" range="-0.007 0.03" user="1212">
+            <joint joint="robot0:RFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:RFJ1" coef="0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ1d" limited="true" range="-0.03 0.007" user="1213">
+            <joint joint="robot0:RFJ0" coef="-0.00705"></joint>
+            <joint joint="robot0:RFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ4u" limited="true" range="-0.007 0.03" user="1224">
+            <joint joint="robot0:LFJ4" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ4d" limited="true" range="-0.03 0.007" user="1225">
+            <joint joint="robot0:LFJ4" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ3r" limited="true" range="-0.018 0.018" user="1222">
+            <joint joint="robot0:LFJ3" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ3l" limited="true" range="-0.018 0.018" user="1223">
+            <joint joint="robot0:LFJ3" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ2u" limited="true" range="-0.007 0.03" user="1220">
+            <joint joint="robot0:LFJ2" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ2d" limited="true" range="-0.03 0.007" user="1221">
+            <joint joint="robot0:LFJ2" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ1c" limited="true" range="-0.001 0.001">
+            <joint joint="robot0:LFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:LFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ1u" limited="true" range="-0.007 0.03" user="1218">
+            <joint joint="robot0:LFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:LFJ1" coef="0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ1d" limited="true" range="-0.03 0.007" user="1219">
+            <joint joint="robot0:LFJ0" coef="-0.00705"></joint>
+            <joint joint="robot0:LFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ4a" limited="true" range="-0.018 0.018" user="1234">
+            <joint joint="robot0:THJ4" coef="0.01636"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ4c" limited="true" range="-0.018 0.018" user="1235">
+            <joint joint="robot0:THJ4" coef="-0.01636"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ3u" limited="true" range="-0.007 0.03" user="1232">
+            <joint joint="robot0:THJ3" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ3d" limited="true" range="-0.03 0.007" user="1233">
+            <joint joint="robot0:THJ3" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ2u" limited="true" range="-0.018 0.018" user="1230">
+            <joint joint="robot0:THJ2" coef="0.011"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ2d" limited="true" range="-0.018 0.018" user="1231">
+            <joint joint="robot0:THJ2" coef="-0.011"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ1r" limited="true" range="-0.018 0.018" user="1228">
+            <joint joint="robot0:THJ1" coef="0.011"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ1l" limited="true" range="-0.018 0.018" user="1229">
+            <joint joint="robot0:THJ1" coef="-0.011"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ0r" limited="true" range="-0.03 0.007" user="1226">
+            <joint joint="robot0:THJ0" coef="0.009"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ0l" limited="true" range="-0.007 0.03" user="1227">
+            <joint joint="robot0:THJ0" coef="-0.009"></joint>
+        </fixed>
+    </tendon>
+
+    <sensor>
+        <jointpos name="robot0:Sjp_WRJ1" joint="robot0:WRJ1"></jointpos>
+        <jointpos name="robot0:Sjp_WRJ0" joint="robot0:WRJ0"></jointpos>
+        <jointpos name="robot0:Sjp_FFJ3" joint="robot0:FFJ3"></jointpos>
+        <jointpos name="robot0:Sjp_FFJ2" joint="robot0:FFJ2"></jointpos>
+        <jointpos name="robot0:Sjp_FFJ1" joint="robot0:FFJ1"></jointpos>
+        <jointpos name="robot0:Sjp_FFJ0" joint="robot0:FFJ0"></jointpos>
+        <jointpos name="robot0:Sjp_MFJ3" joint="robot0:MFJ3"></jointpos>
+        <jointpos name="robot0:Sjp_MFJ2" joint="robot0:MFJ2"></jointpos>
+        <jointpos name="robot0:Sjp_MFJ1" joint="robot0:MFJ1"></jointpos>
+        <jointpos name="robot0:Sjp_MFJ0" joint="robot0:MFJ0"></jointpos>
+        <jointpos name="robot0:Sjp_RFJ3" joint="robot0:RFJ3"></jointpos>
+        <jointpos name="robot0:Sjp_RFJ2" joint="robot0:RFJ2"></jointpos>
+        <jointpos name="robot0:Sjp_RFJ1" joint="robot0:RFJ1"></jointpos>
+        <jointpos name="robot0:Sjp_RFJ0" joint="robot0:RFJ0"></jointpos>
+        <jointpos name="robot0:Sjp_LFJ4" joint="robot0:LFJ4"></jointpos>
+        <jointpos name="robot0:Sjp_LFJ3" joint="robot0:LFJ3"></jointpos>
+        <jointpos name="robot0:Sjp_LFJ2" joint="robot0:LFJ2"></jointpos>
+        <jointpos name="robot0:Sjp_LFJ1" joint="robot0:LFJ1"></jointpos>
+        <jointpos name="robot0:Sjp_LFJ0" joint="robot0:LFJ0"></jointpos>
+        <jointpos name="robot0:Sjp_THJ4" joint="robot0:THJ4"></jointpos>
+        <jointpos name="robot0:Sjp_THJ3" joint="robot0:THJ3"></jointpos>
+        <jointpos name="robot0:Sjp_THJ2" joint="robot0:THJ2"></jointpos>
+        <jointpos name="robot0:Sjp_THJ1" joint="robot0:THJ1"></jointpos>
+        <jointpos name="robot0:Sjp_THJ0" joint="robot0:THJ0"></jointpos>
+        <touch name="robot0:ST_Tch_fftip" site="robot0:Tch_fftip"></touch>
+        <touch name="robot0:ST_Tch_mftip" site="robot0:Tch_mftip"></touch>
+        <touch name="robot0:ST_Tch_rftip" site="robot0:Tch_rftip"></touch>
+        <touch name="robot0:ST_Tch_lftip" site="robot0:Tch_lftip"></touch>
+        <touch name="robot0:ST_Tch_thtip" site="robot0:Tch_thtip"></touch>
+    </sensor>
+
+    <actuator>
+        <position name="robot0:A_WRJ1" class="robot0:asset_class" user="2038" joint="robot0:WRJ1" ctrlrange="-0.489 0.14" kp="5" forcerange="-4.785 4.785"></position>
+        <position name="robot0:A_WRJ0" class="robot0:asset_class" user="2036" joint="robot0:WRJ0" ctrlrange="-0.698 0.489" kp="5" forcerange="-2.175 2.175"></position>
+        <position name="robot0:A_FFJ3" class="robot0:asset_class" user="2004" joint="robot0:FFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_FFJ2" class="robot0:asset_class" user="2002" joint="robot0:FFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_FFJ1" class="robot0:asset_class" user="2000" joint="robot0:FFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
+        <position name="robot0:A_MFJ3" class="robot0:asset_class" user="2010" joint="robot0:MFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_MFJ2" class="robot0:asset_class" user="2008" joint="robot0:MFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_MFJ1" class="robot0:asset_class" user="2006" joint="robot0:MFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
+        <position name="robot0:A_RFJ3" class="robot0:asset_class" user="2016" joint="robot0:RFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_RFJ2" class="robot0:asset_class" user="2014" joint="robot0:RFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_RFJ1" class="robot0:asset_class" user="2012" joint="robot0:RFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
+        <position name="robot0:A_LFJ4" class="robot0:asset_class" user="2024" joint="robot0:LFJ4" ctrlrange="0 0.785" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_LFJ3" class="robot0:asset_class" user="2022" joint="robot0:LFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_LFJ2" class="robot0:asset_class" user="2020" joint="robot0:LFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_LFJ1" class="robot0:asset_class" user="2018" joint="robot0:LFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
+        <position name="robot0:A_THJ4" class="robot0:asset_class" user="2034" joint="robot0:THJ4" ctrlrange="-1.047 1.047" kp="1" forcerange="-2.3722 2.3722"></position>
+        <position name="robot0:A_THJ3" class="robot0:asset_class" user="2032" joint="robot0:THJ3" ctrlrange="0 1.222" kp="1" forcerange="-1.45 1.45"></position>
+        <position name="robot0:A_THJ2" class="robot0:asset_class" user="2030" joint="robot0:THJ2" ctrlrange="-0.209 0.209" kp="1" forcerange="-0.99 0.99"></position>
+        <position name="robot0:A_THJ1" class="robot0:asset_class" user="2028" joint="robot0:THJ1" ctrlrange="-0.524 0.524" kp="1" forcerange="-0.99 0.99"></position>
+        <position name="robot0:A_THJ0" class="robot0:asset_class" user="2026" joint="robot0:THJ0" ctrlrange="-1.571 0" kp="1" forcerange="-0.81 0.81"></position>
+    </actuator>
+</mujoco>
--- a/src/gym/envs/robotics/assets/hand/shared_asset.xml
+++ b/src/gym/envs/robotics/assets/hand/shared_asset.xml
@@ -0,0 +1,26 @@
+<!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
+<mujoco>
+    <texture type="skybox" builtin="gradient" rgb1="0.44 0.85 0.56" rgb2="0.46 0.87 0.58" width="32" height="32"></texture>
+
+    <texture name="robot0:texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.15 0.2" width="512" height="512"></texture>
+    <texture name="robot0:texgeom" type="cube" builtin="flat" mark="cross" width="127" height="127" rgb1="0.3 0.6 0.5" rgb2="0.3 0.6 0.5" markrgb="0 0 0" random="0.01"></texture>
+
+    <material name="robot0:MatGnd" reflectance="0.5" texture="robot0:texplane" texrepeat="1 1" texuniform="true"></material>
+    <material name="robot0:MatColl" specular="1" shininess="0.3" reflectance="0.5" rgba="0.4 0.5 0.6 1"></material>
+    <material name="robot0:MatViz" specular="0.75" shininess="0.1" reflectance="0.5" rgba="0.93 0.93 0.93 1"></material>
+    <material name="robot0:object" texture="robot0:texgeom" texuniform="false"></material>
+    <material name="floor_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 0"></material>
+
+    <mesh name="robot0:forearm" file="forearm_electric.stl"></mesh>
+    <mesh name="robot0:forearm_cvx" file="forearm_electric_cvx.stl"></mesh>
+    <mesh name="robot0:wrist" scale="0.001 0.001 0.001" file="wrist.stl"></mesh>
+    <mesh name="robot0:palm" scale="0.001 0.001 0.001" file="palm.stl"></mesh>
+    <mesh name="robot0:knuckle" scale="0.001 0.001 0.001" file="knuckle.stl"></mesh>
+    <mesh name="robot0:F3" scale="0.001 0.001 0.001" file="F3.stl"></mesh>
+    <mesh name="robot0:F2" scale="0.001 0.001 0.001" file="F2.stl"></mesh>
+    <mesh name="robot0:F1" scale="0.001 0.001 0.001" file="F1.stl"></mesh>
+    <mesh name="robot0:lfmetacarpal" scale="0.001 0.001 0.001" file="lfmetacarpal.stl"></mesh>
+    <mesh name="robot0:TH3_z" scale="0.001 0.001 0.001" file="TH3_z.stl"></mesh>
+    <mesh name="robot0:TH2_z" scale="0.001 0.001 0.001" file="TH2_z.stl"></mesh>
+    <mesh name="robot0:TH1_z" scale="0.001 0.001 0.001" file="TH1_z.stl"></mesh>
+</mujoco>
--- a/src/gym/envs/robotics/assets/stls/.get
+++ b/src/gym/envs/robotics/assets/stls/.get
--- a/src/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/estop_link.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/estop_link.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/gripper_link.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/gripper_link.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl
--- a/src/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl
+++ b/src/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`from gym.envs.atari.atari_env import AtariEnv`
				`@@ -0,0 +1 @@`
				`from gym.envs.pilesos.pilesos import PilesosEnv`