本文共 8480 字,大约阅读时间需要 28 分钟。
记录一下tensorflow实现的mnist对抗样本生成。总共实现了两个版本,FGSM和迭代版本的FGSM。
具体的细节介绍可以看这篇文章:mnist.py
先运行这个文件,生成模型。
# coding: utf-8import tensorflow as tffrom tensorflow.examples.tutorials.mnist import input_datadef weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial)def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial)def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')def inference(x, keep_prob): # 将单张图片从784维向量重新还原为28x28的矩阵图片 x_image = tf.reshape(x, [-1, 28, 28, 1]) # 第一层卷积层 W_conv1 = weight_variable([5, 5, 1, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) # 第二层卷积层 W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) # 全连接层,输出为1024维的向量 W_fc1 = weight_variable([7 * 7 * 64, 1024]) b_fc1 = bias_variable([1024]) h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # 把1024维的向量转换成10维,对应10个类别 W_fc2 = weight_variable([1024, 10]) b_fc2 = bias_variable([10]) logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 return logitsdef loss(logits, labels): return tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))def evaluate(logits, y_): # 评估 correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) return correct_prediction, accuracyif __name__ == '__main__': # 读入数据 mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # x为训练图像的占位符、y_为训练图像标签的占位符 x = tf.placeholder(tf.float32, [None, 784], name="x") y_ = tf.placeholder(tf.float32, [None, 10], name="y_") # 使用Dropout,keep_prob是一个占位符,训练时为0.5,测试时为1 keep_prob = tf.placeholder(tf.float32) # inference logits = inference(x, keep_prob=keep_prob) # crossentropy cross_entropy = loss(logits, y_) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) # 评测 correct_prediction, accuracy = evaluate(logits, y_) # 创建Session和变量初始化 sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) # 训练10000步 epochs = 10000 for i in range(epochs): batch = mnist.train.next_batch(50) # 每100步报告一次在验证集上的准确 if i % 100 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0}) print("step %d, training accuracy %g" % (i, train_accuracy)) train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) # 训练结束后报告在测试集上的准确度 print("test accuracy %g" % accuracy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})) saver = tf.train.Saver() saver.save(sess, "./net/model", global_step=epochs)
fgsm.py
运行完mnist.py后就生成模型了,再运行这个,生成对抗样本。
import osimport scipy.miscfrom mnist import *if __name__ == '__main__': mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # x为训练图像的占位符、y_为训练图像标签的占位符 x = tf.placeholder(tf.float32, [None, 784], name="x") y_ = tf.placeholder(tf.float32, [None, 10], name="y_") # 使用Dropout,keep_prob 是一个占位符,训练时为0.5,测试时为1 keep_prob = tf.placeholder(tf.float32) # inference logits = inference(x, keep_prob=keep_prob) # crossentropy cross_entropy = loss(logits, y_) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) # 准确度 correct_prediction, accuracy = evaluate(logits, y_) saver = tf.train.Saver() with tf.Session() as sess: module_file = "./net/model-10000" saver.restore(sess, module_file) # test print("test accuracy %g" % accuracy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})) grad = tf.gradients(cross_entropy, x) adv_imgs = mnist.test.images.reshape((10000, 1, 784)) # 初始化样本 n_sample = 10 for i in range(n_sample): epsilon, prediction = 0.07, True img = adv_imgs[i] # x_0 = x # 直到分类错误,说明是对抗样本 while prediction: adv_img = tf.add(img, epsilon * tf.sign(grad)) adv_imgs[i] = sess.run(adv_img, feed_dict={ x: img.reshape(1, 784), y_: mnist.test.labels[i].reshape(1, 10), keep_prob: 1.0}) # 计算样本 prediction = sess.run(correct_prediction, feed_dict={ x: adv_imgs[i], y_: mnist.test.labels[i].reshape(1, 10), keep_prob: 1.0}) epsilon += 0.07 print("sample {}, eposion = {}".format(i, epsilon)) image_array = adv_imgs[i] image_array = image_array.reshape(28, 28) save_dir = "adversiral_samples/" if not os.path.exists(save_dir): os.makedirs(save_dir) filename = save_dir + 'adv_img%d.jpg' % i scipy.misc.toimage(image_array, cmin=0.0, cmax=1.0).save(filename) print("adversiral sample accuracy = ", sess.run(accuracy, feed_dict={ x: adv_imgs[0:n_sample], y_: mnist.test.labels[0:n_sample], keep_prob: 1.0}))
iter_fgsm.py
迭代法生成对抗样本。
import osimport scipy.miscfrom mnist import *if __name__ == '__main__': mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # x为训练图像的占位符、y_为训练图像标签的占位符 x = tf.placeholder(tf.float32, [None, 784], name="x") y_ = tf.placeholder(tf.float32, [None, 10], name="y_") # 使用Dropout,keep_prob 是一个占位符,训练时为0.5,测试时为1 keep_prob = tf.placeholder(tf.float32) # inference logits = inference(x, keep_prob=keep_prob) # crossentropy cross_entropy = loss(logits, y_) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) # 准确度 correct_prediction, accuracy = evaluate(logits, y_) saver = tf.train.Saver() with tf.Session() as sess: module_file = "./net/model-10000" saver.restore(sess, module_file) # test print("test accuracy %g" % accuracy.eval(feed_dict={ x: mnist.test.images.reshape(-1, 784), y_: mnist.test.labels, keep_prob: 1.0})) n_sample = 10 grad = tf.gradients(cross_entropy, x) grad = tf.div(grad, tf.norm(grad, ord='euclidean')) # grad = tf.multiply(0.1, grad) img_tensor = tf.placeholder(tf.float32, shape=[1, 784]) adv_img = tf.add(img_tensor, grad) adv_imgs = mnist.test.images.reshape(-1, 784) for i in range(n_sample): img = adv_imgs[i] # 给初始值 prediction = True # 不断迭代 while prediction: # 覆盖原来的adv_imgs[i] adv_imgs[i] = sess.run(adv_img, feed_dict={ x: adv_imgs[i].reshape(1, 784), y_: mnist.test.labels[i].reshape(1, 10), img_tensor: img.reshape(1, 784), keep_prob: 1.0}) prediction = sess.run(correct_prediction, feed_dict={ x: adv_imgs[i].reshape(1, 784), y_: mnist.test.labels[i].reshape(1, 10), keep_prob: 1.0}) print("step = {}".format(i)) save_dir = "adv_samples/" if not os.path.exists(save_dir): os.makedirs(save_dir) image_array = adv_imgs[i] image_array = image_array.reshape(28, 28) filename = save_dir + 'adv_img%d.jpg' % i scipy.misc.toimage(image_array, cmin=0.0, cmax=1.0).save(filename) print("adversiral sample accuracy ", sess.run(accuracy, feed_dict={ x: adv_imgs[0:n_sample], y_: mnist.test.labels[0:n_sample], keep_prob: 1.0}))
from keras import backend, lossesdef fgsm(model, image, y_true, eps=0.1): y_pred = model.output # y_true: 目标真实值的张量。 # y_pred: 目标预测值的张量。 loss = losses.categorical_crossentropy(y_true, y_pred) gradient = backend.gradients(loss, model.input) gradient = gradient[0] adv = image + backend.sign(gradient) * eps #fgsm算法 sess = backend.get_session() adv = sess.run(adv, feed_dict={ model.input : image}) #注意这里传递参数的情况 adv = np.clip(adv, 0, 255) #有的像素点会超过255,需要处理 return adv
转载地址:http://rdzvi.baihongyu.com/