갈루아의 반서재

Deep MNIST for Experts 튜토리얼 실행시 발생할 수 있는 텐서플로우 코드 에러


ResourceExhaustedError 가 발생하는 경우



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
step 18500, training accuracy 1
step 18600, training accuracy 1
step 18700, training accuracy 1
step 18800, training accuracy 1
step 18900, training accuracy 1
step 19000, training accuracy 1
step 19100, training accuracy 1
step 19200, training accuracy 1
step 19300, training accuracy 1
step 19400, training accuracy 1
step 19500, training accuracy 1
step 19600, training accuracy 0.98
step 19700, training accuracy 0.98
step 19800, training accuracy 1
step 19900, training accuracy 0.98
 
print "test accuracy %g" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})
 
-------------------------------------------------------------------------
ResourceExhaustedError                  Traceback (most recent call last)
<ipython-input-22-b0ca1bd76417> in <module>()
----> 1 print "test accuracy %g" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})
 
/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in eval(self, feed_dict, session)
    553 
    554     """
--> 555     return _eval_using_default_session(self, feed_dict, self.graph, session)
    556 
    557 
/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in _eval_using_default_session(tensors, feed_dict, graph, session)
   3496                        "the tensor's graph is different from the session'"
   3497                        "graph.")
-> 3498   return session.run(tensors, feed_dict)
   3499 
   3500 
/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    370     try:
    371       result = self._run(None, fetches, feed_dict, options_ptr,
--> 372                          run_metadata_ptr)
    373       if run_metadata:
    374         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
    634     try:
    635       results = self._do_run(handle, target_list, unique_fetches,
--> 636                              feed_dict_string, options, run_metadata)
    637     finally:
    638       # The movers are no longer used. Delete them.
/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
    706     if handle is None:
    707       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
--> 708                            target_list, options, run_metadata)
    709     else:
    710       return self._do_call(_prun_fn, self._session, handle, feed_dict,
/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
    726         except KeyError:
    727           pass
--> 728       raise type(e)(node_def, op, message)
    729 
    730   def _extend_graph(self):
ResourceExhaustedError: OOM when allocating tensor with shape[10000,28,28,32]
     [[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/cpu:0"](Reshape, Variable_2/read)]]
Caused by op u'Conv2D', defined at:
  File "/root/anaconda/envs/tensorflow/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-2d359ddc58b5>", line 1, in <module>
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
  File "<ipython-input-5-63d0af69b544>", line 3, in conv2d
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 394, in conv2d
    data_format=data_format, name=name)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
    op_def=op_def)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/root/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
    self._traceback = _extract_stack()
cs


accurary evaluation 하는 동안 out of memory 가 발생했다는 내용이다. 따라서 전체 테스트 데이터셋에 대해서 실행하지 말고, 다음 포스팅에서 언급되듯이 배치 단위로 실행하면 되겠다.


How to read data into TensorFlow batches from example queue?

http://stackoverflow.com/questions/37126108/how-to-read-data-into-tensorflow-batches-from-example-queue


마지막 라인

1
print "test accuracy %g" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})
cs

다음의 코드로 교체한다.

1
2
3
for i in xrange(10):
    testSet = mnist.test.next_batch(50)
    print("test accuracy %g"%accuracy.eval(feed_dict={ x: testSet[0], y_: testSet[1], keep_prob: 1.0}))
cs


결과는 다음과 같다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
for i in range(1000):
    batch = mnist.train.next_batch(50)
    if i%100 == 0:
        train_accuracy = accuracy.eval(session=sess, feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
        print "step %d, training accuracy %g" % (i, train_accuracy)
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
 
step 0, training accuracy 0.12
step 100, training accuracy 0.8
step 200, training accuracy 0.92
step 300, training accuracy 0.86
step 400, training accuracy 1
step 500, training accuracy 0.94
step 600, training accuracy 1
step 700, training accuracy 1
step 800, training accuracy 0.96
step 900, training accuracy 1
 
for i in xrange(10):
    testSet = mnist.test.next_batch(50)
    print("test accuracy %g"%accuracy.eval(feed_dict={ x: testSet[0], y_: testSet[1], keep_prob: 1.0}))
 
test accuracy 0.98
test accuracy 0.96
test accuracy 1
test accuracy 0.98
test accuracy 0.96
test accuracy 0.98
test accuracy 0.9
test accuracy 0.98
test accuracy 0.92
test accuracy 0.98
cs