I got a problem with Tensorflow + SVM + MNIST after learning your code

Question

I got a problem with Tensorflow + SVM + MNIST after learning your code

ArrowYL opened this issue 6 years ago · comments

Sorry my English is not well but i am really want to solve this problem so i come for your help.
After learn your code where (tensorflow_cookbook/04_Support_Vector_Machines/06_Implementing_Multiclass_SVMs/)

I am a greenhand for Tensorflow.
Rencently I attempt to separate the MNIST classes with TensorFlow + SVM
Here is my code:

`import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import LoadMNIST

from tensorflow.python.framework import ops
ops.reset_default_graph()

import tensorflow.examples.tutorials.mnist.input_data as input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)

x_vals = mnist.train.images
y_vals0 = np.array([1 if y == 0 else -1 for y in mnist.train.labels])
y_vals1 = np.array([1 if y == 1 else -1 for y in mnist.train.labels])
y_vals2 = np.array([1 if y == 2 else -1 for y in mnist.train.labels])
y_vals3 = np.array([1 if y == 3 else -1 for y in mnist.train.labels])
y_vals4 = np.array([1 if y == 4 else -1 for y in mnist.train.labels])
y_vals5 = np.array([1 if y == 5 else -1 for y in mnist.train.labels])
y_vals6 = np.array([1 if y == 6 else -1 for y in mnist.train.labels])
y_vals7 = np.array([1 if y == 7 else -1 for y in mnist.train.labels])
y_vals8 = np.array([1 if y == 8 else -1 for y in mnist.train.labels])
y_vals9 = np.array([1 if y == 9 else -1 for y in mnist.train.labels])
y_vals = np.array([
y_vals0,
y_vals1,
y_vals2,
y_vals3,
y_vals4,
y_vals5,
y_vals6,
y_vals7,
y_vals8,
y_vals9,
])

yt_vals0 = np.array([1 if y == 0 else -1 for y in mnist.test.labels])
yt_vals1 = np.array([1 if y == 1 else -1 for y in mnist.test.labels])
yt_vals2 = np.array([1 if y == 2 else -1 for y in mnist.test.labels])
yt_vals3 = np.array([1 if y == 3 else -1 for y in mnist.test.labels])
yt_vals4 = np.array([1 if y == 4 else -1 for y in mnist.test.labels])
yt_vals5 = np.array([1 if y == 5 else -1 for y in mnist.test.labels])
yt_vals6 = np.array([1 if y == 6 else -1 for y in mnist.test.labels])
yt_vals7 = np.array([1 if y == 7 else -1 for y in mnist.test.labels])
yt_vals8 = np.array([1 if y == 8 else -1 for y in mnist.test.labels])
yt_vals9 = np.array([1 if y == 9 else -1 for y in mnist.test.labels])
yt_vals = np.array([
yt_vals0,
yt_vals1,
yt_vals2,
yt_vals3,
yt_vals4,
yt_vals5,
yt_vals6,
yt_vals7,
yt_vals8,
yt_vals9,
])

x_data = tf.placeholder(tf.float32, [None, 784])
y_target = tf.placeholder(tf.float32, shape=[10, None])
prediction_grid = tf.placeholder(shape=[None, 784], dtype=tf.float32)

batch_size = 100

Create variables for svm

b = tf.Variable(tf.random_normal(shape=[10, batch_size]))

Gaussian (RBF) kernel

gamma = tf.constant(-10.0)
dist = tf.reduce_sum(tf.square(x_data), 1)
dist = tf.reshape(dist, [-1, 1])
sq_dists = tf.multiply(2., tf.matmul(x_data, tf.transpose(x_data)))
my_kernel = tf.exp(tf.multiply(gamma, tf.abs(sq_dists)))

def reshape_matmul(mat):
v1 = tf.expand_dims(mat, 1)
v2 = tf.reshape(v1, [10, batch_size, 1])
return (tf.matmul(v2, v1))

Compute SVM Model

first_term = tf.reduce_sum(b)
b_vec_cross = tf.matmul(tf.transpose(b), b)
y_target_cross = reshape_matmul(y_target)

second_term = tf.reduce_sum(
tf.multiply(my_kernel, tf.multiply(b_vec_cross, y_target_cross)), [1, 2])
loss = tf.reduce_sum(tf.negative(tf.subtract(first_term, second_term)))

Gaussian (RBF) prediction kernel

rA = tf.reshape(tf.reduce_sum(tf.square(x_data), 1), [-1, 1])
rB = tf.reshape(tf.reduce_sum(tf.square(prediction_grid), 1), [-1, 1])
pred_sq_dist = tf.add(
tf.subtract(
rA, tf.multiply(2., tf.matmul(x_data, tf.transpose(prediction_grid)))),
tf.transpose(rB))
pred_kernel = tf.exp(tf.multiply(gamma, tf.abs(pred_sq_dist)))

prediction_output = tf.matmul(tf.multiply(y_target, b), pred_kernel)
prediction = tf.argmax(
prediction_output - tf.expand_dims(
tf.reduce_mean(prediction_output, 1), 1), 0)
accuracy = tf.reduce_mean(
tf.cast(tf.equal(prediction, tf.argmax(y_target, 0)), tf.float32))

train_step = tf.train.GradientDescentOptimizer(0.002).minimize(loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

Training loop

loss_vec = []
batch_accuracy = []
test_accuracy = []
for i in range(500):
rand_index = np.random.choice(len(x_vals), size=batch_size)
rand_x = x_vals[rand_index]
rand_y = y_vals[:, rand_index]

sess.run(train_step, feed_dict={x_data: rand_x, y_target: rand_y})
temp_loss = sess.run(loss, feed_dict={x_data: rand_x, y_target: rand_y})
loss_vec.append(temp_loss)

acc_temp = sess.run(
    accuracy,
    feed_dict={
        x_data: rand_x,
        y_target: rand_y,
        prediction_grid: rand_x
    })
batch_accuracy.append(acc_temp)


# test accuracy
if (i + 1) % 500 == 0:
    for a in range(100):
        randt_index = np.random.choice(
            len(mnist.test.images), size=batch_size)
        # randt_index1 = np.random.choice(
        #     len(mnist.test.images), size=batch_size)

        randt_x = mnist.test.images[randt_index]
        randt_y = yt_vals[:, randt_index]
        # randtt_y = yt_vals[:, randt_index1]
        test_temp = sess.run(
            accuracy,
            feed_dict={
                x_data: randt_x,
                y_target: randt_y,
                prediction_grid: randt_x
            })

        test_accuracy.append(test_temp)

        # one_pic_arr = np.reshape(mnist.test.images[1], (28, 28))
        # pic_matrix = np.matrix(one_pic_arr, dtype="float")
        # plt.imshow(pic_matrix)
        # plt.show()

Plot batch accuracy

plt.plot(batch_accuracy, 'k-', label='Accuracy')
plt.title('Batch Accuracy')
plt.xlabel('Generation')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()

Plot batch accuracy

plt.plot(test_accuracy, 'k-', label='Accuracy')
plt.title('Test Accuracy')
plt.xlabel('Generation')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()

Plot loss over time

plt.plot(loss_vec, 'k-')
plt.title('Loss per Generation')
plt.xlabel('Generation')
plt.ylabel('Loss')
plt.show()
`

The problem is the accuracy result is completely wrong

when i print prediction function in test accuracy ,i make two tests:
first . put in x_data,true rand_index labels,prediction_grid result : 100% accuracy
second . put in x_data,false rand_index labels,prediction_grid result : it‘s still close to 100% accuracy（it should be very low）
I don‘t know how to amend the error 。i am suffering it for almost three days ，Could you help me plz？
Thanks a lot.

Nick · Answer 1 · Mon Sep 17 2018 09:41:55 GMT+0800 (China Standard Time)

Hi @ArrowYL ,

I was able to replicate this issue. Unfortunately, I'm quite busy over the next month. After things calm down I'll take a look into the details. Let me know if you are able to get this fixed in the mean time.

ArrowYL · Answer 2 · Mon Sep 17 2018 09:53:36 GMT+0800 (China Standard Time)

thank you for the reply.if I solve this one day I will tell you the details.And i will keep reading your GitHub,
Thank you again!

anbo1024 · Answer 3 · Fri Apr 26 2019 10:12:18 GMT+0800 (China Standard Time)

I have encountered the same problem with a test accuracy of 100%. Is this problem solved by you?