ladybird clappybird( 三 ) _「广东龙网」

<= epsilon:print("----------Random Action----------")action_index = random.randrange(ACTIONS)a_t[random.randrange(ACTIONS)] = 1else:action_index = np.argmax(readout_t)a_t[action_index] = 1else:a_t[0] = 1# do nothing# scale down epsilon# 缩小 epsilonif epsilon > FINAL_EPSILON and t > OBSERVE:epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE# 其次，执行选择的动作，并保存返回的状态、得分。x_t1_colored, r_t, terminal = game_state.frame_step(a_t)x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (80, 80)), cv2.COLOR_BGR2GRAY)ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)x_t1 = np.reshape(x_t1, (80, 80, 1))s_t1 = np.append(x_t1, s_t[:, :, :3], axis=2)# 经验池保存的是以一个马尔科夫序列于D中D.append((s_t, a_t, r_t, s_t1, terminal))# (s_t, a_t, r_t, s_t1, terminal)分别表示# t时的状态s_t ， # 执行的动作a_t ， # 得到的反馈r_t ， # 得到的下一步的状态s_t1# 游戏是否结束的标志terminal# 如果经验池超过最大长度则弹出最早的经验数据if len(D) > REPLAY_MEMORY:D.popleft()# 过了一段时间之后， t 是计数器if t > OBSERVE:minibatch = random.sample(D, BATCH)# 从经验池D中随机提取马尔科夫序列s_j_batch = [d[0] for d in minibatch]a_batch = [d[1] for d in minibatch]r_batch = [d[2] for d in minibatch]s_j1_batch = [d[3] for d in minibatch]y_batch = []readout_j1_batch = readout.eval(feed_dict={s: s_j1_batch})for i in range(0, len(minibatch)):terminal = minibatch[i][4]if terminal:y_batch.append(r_batch[i])else:y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))train_step.run(feed_dict={y: y_batch,a: a_batch,s: s_j_batch})s_t = s_t1t += 1# save progress every 10000 iterationsif t % 10000 == 0:saver.save(sess, \\'saved_networks/\\' + GAME + \\'-dqn\\', global_step=t)# print infostate = ""if t <= OBSERVE:state = "observe"elif t > OBSERVE and t <= OBSERVE + EXPLORE:state = "explore"else:state = "train"print("TIMESTEP", t, "/ STATE", state, \"/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \"/ Q_MAX %e" % np.max(readout_t))# write info to files\\'\\'\\'if t % 10000 <= 100:a_file.write(",".join([str(x) for x in readout_t]) + \\'\n\\')h_file.write(",".join([str(x) for x in h_fc1.eval(feed_dict={s:[s_t]})[0]]) + \\'\n\\')cv2.imwrite("logs_tetris/frame" + str(t) + ".png", x_t1)\\'\\'\\'def playGame():sess = tf.InteractiveSession()s, readout, h_fc1 = createNetwork()trainNetwork(s, readout, h_fc1, sess)def main():playGame()if __name__ == "__main__":main()

特别声明：本站内容均来自网友提供或互联网，仅供参考，请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系，我们将在24小时内删除。