一、目标
接下来要做个一个游戏是找金币,如下图所示,一个机器人从地图空白位置随机产生,通过上下左右移动,到达金币位置胜利,遇到了两个陷阱则失败,本文是构建了该地图并将地图注册到gym中,方便以后使用标准方法调用
二、构建过程
首先定义状态空间、动作空间、回报函数与状态转移概率
#状态空间 self.states = [1,2,3,4,5,6,7,8] #机器人可能产生的位置 self.x=[140,220,300,380,460,140,300,460] self.y=[250,250,250,250,250,150,150,150] self.terminate_states = dict() #终止状态为字典格式 self.terminate_states[6] = 1 self.terminate_states[7] = 1 self.terminate_states[8] = 1 #机器人可以进行的动作 self.actions = ['n','e','s','w'] #回报的数据结构为字典 self.rewards = dict(); self.rewards['1_s'] = -1.0 self.rewards['3_s'] = 1.0 self.rewards['5_s'] = -1.0 #状态转移的数据格式为字典 self.t = dict(); self.t['1_s'] = 6 self.t['1_e'] = 2 self.t['2_w'] = 1 self.t['2_e'] = 3 self.t['3_s'] = 7 self.t['3_w'] = 2 self.t['3_e'] = 4 self.t['4_w'] = 3 self.t['4_e'] = 5 self.t['5_s'] = 8 self.t['5_w'] = 4 #折扣因子 self.gamma = 0.8 self.viewer = None self.state = None
讯享网
下边要定义三个标准调用函数:step、reset、render:
step是根据当前机器人的位置和动作action,计算下一状态
讯享网def _step(self, action): #系统当前状态 state = self.state if state in self.terminate_states: return state, 0, True, {} key = "%d_%s"%(state, action) #将状态和动作组成字典的键值 #状态转移 if key in self.t: next_state = self.t[key] else: next_state = state self.state = next_state is_terminal = False if next_state in self.terminate_states: is_terminal = True if key not in self.rewards: r = 0.0 else: r = self.rewards[key]
reset是初始化
def _reset(self): self.state = self.states[int(random.random() * len(self.states))] return self.state
render是更新界面,里边很长的代码是用来画界面的,用线和圆圈来代表地图和机器人、陷阱、金币

讯享网def render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return screen_width = 600 screen_height = 400 if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(screen_width, screen_height) #创建网格世界 self.line1 = rendering.Line((100,300),(500,300)) self.line2 = rendering.Line((100, 200), (500, 200)) self.line3 = rendering.Line((100, 300), (100, 100)) self.line4 = rendering.Line((180, 300), (180, 100)) self.line5 = rendering.Line((260, 300), (260, 100)) self.line6 = rendering.Line((340, 300), (340, 100)) self.line7 = rendering.Line((420, 300), (420, 100)) self.line8 = rendering.Line((500, 300), (500, 100)) self.line9 = rendering.Line((100, 100), (180, 100)) self.line10 = rendering.Line((260, 100), (340, 100)) self.line11 = rendering.Line((420, 100), (500, 100)) #创建第一个骷髅 self.kulo1 = rendering.make_circle(40) self.circletrans = rendering.Transform(translation=(140,150)) self.kulo1.add_attr(self.circletrans) self.kulo1.set_color(0,0,0) #创建第二个骷髅 self.kulo2 = rendering.make_circle(40) self.circletrans = rendering.Transform(translation=(460, 150)) self.kulo2.add_attr(self.circletrans) self.kulo2.set_color(0, 0, 0) #创建金条 self.gold = rendering.make_circle(40) self.circletrans = rendering.Transform(translation=(300, 150)) self.gold.add_attr(self.circletrans) self.gold.set_color(1, 0.9, 0) #创建机器人 self.robot= rendering.make_circle(30) self.robotrans = rendering.Transform() self.robot.add_attr(self.robotrans) self.robot.set_color(0.8, 0.6, 0.4) self.line1.set_color(0, 0, 0) self.line2.set_color(0, 0, 0) self.line3.set_color(0, 0, 0) self.line4.set_color(0, 0, 0) self.line5.set_color(0, 0, 0) self.line6.set_color(0, 0, 0) self.line7.set_color(0, 0, 0) self.line8.set_color(0, 0, 0) self.line9.set_color(0, 0, 0) self.line10.set_color(0, 0, 0) self.line11.set_color(0, 0, 0) self.viewer.add_geom(self.line1) self.viewer.add_geom(self.line2) self.viewer.add_geom(self.line3) self.viewer.add_geom(self.line4) self.viewer.add_geom(self.line5) self.viewer.add_geom(self.line6) self.viewer.add_geom(self.line7) self.viewer.add_geom(self.line8) self.viewer.add_geom(self.line9) self.viewer.add_geom(self.line10) self.viewer.add_geom(self.line11) self.viewer.add_geom(self.kulo1) self.viewer.add_geom(self.kulo2) self.viewer.add_geom(self.gold) self.viewer.add_geom(self.robot) if self.state is None: return None #self.robotrans.set_translation(self.x[self.state-1],self.y[self.state-1]) self.robotrans.set_translation(self.x[self.state-1], self.y[self.state- 1]) return self.viewer.render(return_rgb_array=mode == 'rgb_array')
这样就完成了构建的主体,全部代码可以看链接:https://paste.ubuntu.com/p/XP8xbz6kJv/
三、环境注册
注册是为了将构建好的游戏地图保存并以gym的标准调用方式调用:
1、将上述代码所在的.py文件拷到gym安装目录/gym/gym/envs/classic_control文件夹下
2、在文件夹的__init__.py中,末尾加入:其中XXX就是上述你的文件名
from gym.envs.classic_control.XXX import GridEnv
3、在/gym/gym/envs中,打开__init__.py文件,添加代码:
讯享网register( id = 'GridWorld-v0', entry_point = 'gym.envs.classic_control:GridEnv', max_episode_steps = 200, reward_threshold = 100.0, )
第一个参数id就是gym.make(‘id’)中的id,可以根据情况改,这样就完成了注册

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/19992.html