a = tf.placeholder("float", [None, ACTIONS])y = tf.placeholder("float", [None])readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices=1)cost = tf.reduce_mean(tf.square(y - readout_action))train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)# perform gradient steptrain_step.run(feed_dict={y: y_batch,a: a_batch,s: s_j_batch})综上 , 这个模型的主要框架即是如此 。
#!/usr/bin/env pythonfrom __future__ import print_functionimport tensorflow as tfimport cv2import syssys.path.append("game/")import wrapped_flappy_bird as gameimport randomimport numpy as npfrom collections import dequeGAME = \\'bird\\'# the name of the game being played for log filesACTIONS = 2# number of valid actionsGAMMA = 0.99# decay rate of past observationsOBSERVE = 10000.# timesteps to observe before trainingEXPLORE = 2000000.# frames over which to anneal epsilonFINAL_EPSILON = 0.0001# final value of epsilonINITIAL_EPSILON = 0.0001# starting value of epsilonREPLAY_MEMORY = 50000# number of previous transitions to rememberBATCH = 32# size of minibatchFRAME_PER_ACTION = 1# CNN 模型# 权重def weight_variable(shape):initial = tf.truncated_normal(shape, stddev=0.01)return tf.Variable(initial)# 偏置def bias_variable(shape):initial = tf.constant(0.01, shape=shape)return tf.Variable(initial)# 卷积函数def conv2d(x, W, stride):return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding="SAME")# 池化 核 2*2 步长2def max_pool_2x2(x):return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")# 创建网络def createNetwork():# network weightsW_conv1 = weight_variable([8, 8, 4, 32])b_conv1 = bias_variable([32])W_conv2 = weight_variable([4, 4, 32, 64])b_conv2 = bias_variable([64])W_conv3 = weight_variable([3, 3, 64, 64])b_conv3 = bias_variable([64])W_fc1 = weight_variable([1600, 512])b_fc1 = bias_variable([512])W_fc2 = weight_variable([512, ACTIONS])b_fc2 = bias_variable([ACTIONS])# 输入层 输入向量为80*80*4# input layers = tf.placeholder("float", [None, 80, 80, 4])# hidden layers# 第一个隐藏层+一个池化层h_conv1 = tf.nn.relu(conv2d(s, W_conv1, 4) + b_conv1)h_pool1 = max_pool_2x2(h_conv1)# 第二个隐藏层h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2, 2) + b_conv2)# h_pool2 = max_pool_2x2(h_conv2)# 第三个隐藏层h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 1) + b_conv3)# h_pool3 = max_pool_2x2(h_conv3)# 展平# h_pool3_flat = tf.reshape(h_pool3, [-1, 256])h_conv3_flat = tf.reshape(h_conv3, [-1, 1600])# 第一个全连接层h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flat, W_fc1) + b_fc1)# 输出层# readout layerreadout = tf.matmul(h_fc1, W_fc2) + b_fc2return s, readout, h_fc1def trainNetwork(s, readout, h_fc1, sess):# 定义损失函数# define the cost functiona = tf.placeholder("float", [None, ACTIONS])y = tf.placeholder("float", [None])readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices=1)cost = tf.reduce_mean(tf.square(y - readout_action))train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)# open up a game state to communicate with emulatorgame_state = game.GameState()# store the previous observations in replay memoryD = deque()# printinga_file = open("logs_" + GAME + "/readout.txt", \\'w\\')h_file = open("logs_" + GAME + "/hidden.txt", \\'w\\')# 初始化# 将图像转化为80*80*4 的矩阵do_nothing = np.zeros(ACTIONS)do_nothing[0] = 1x_t, r_0, terminal = game_state.frame_step(do_nothing)# 将图像转换成80*80 , 并进行灰度化x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)# 对图像进行二值化ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)# 将图像处理成4通道s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)# 保存和载入网络saver = tf.train.Saver()sess.run(tf.initialize_all_variables())checkpoint = tf.train.get_checkpoint_state("saved_networks")if checkpoint and checkpoint.model_checkpoint_path:saver.restore(sess, checkpoint.model_checkpoint_path)print("Successfully loaded:", checkpoint.model_checkpoint_path)else:print("Could not find old network weights")# 开始训练epsilon = INITIAL_EPSILONt = 0while "flappy bird" != "angry bird":# choose an action epsilon greedily# 将当前环境输入到CNN网络中readout_t = readout.eval(feed_dict={s: [s_t]})[0]a_t = np.zeros([ACTIONS])action_index = 0if t % FRAME_PER_ACTION == 0:if random.random()
- 古墓丽影3详尽攻略 古墓丽影3怎么过
- 注册公司记账报税流程图
- 教师演讲题目大全新颖 教师演讲稿题目
- 那一天,我真难忘 我看风水那些年
- 割双眼皮的风水化解 双眼皮的风水讲究
- 吃啥可以丰胸 吃什么东西可以丰胸?
- 儿童泳装秀 儿童泳衣品牌推荐
- 孩子11岁离婚 与小11岁文章离婚
- 我想考研究生怎么入手 研究生考试条件
- 网页基础知识 网站入门知识
特别声明:本站内容均来自网友提供或互联网,仅供参考,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
