3장_주택가격

주택가격 예측

1
import tensorflow as tf
1
2
import keras
keras.__version__
Using TensorFlow backend.





'2.1.5'

데이터셋 로드하기

1
2
3
from keras.datasets import boston_housing

(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
1
train_data.shape
(404, 13)
1
test_data.shape
(102, 13)

404개 훈련 102개 테스트 샘플
핏쳐 : 13
핏처 종류

  1. Per capita crime rate.
  2. Proportion of residential land zoned for lots over 25,000 square feet.
  3. Proportion of non-retail business acres per town.
  4. Charles River dummy variable (= 1 if tract bounds river; 0 otherwise).
  5. Nitric oxides concentration (parts per 10 million).
  6. Average number of rooms per dwelling.
  7. Proportion of owner-occupied units built prior to 1940.
  8. Weighted distances to five Boston employment centres.
  9. Index of accessibility to radial highways.
  10. Full-value property-tax rate per $10,000.
  11. Pupil-teacher ratio by town.
  12. 1000 * (Bk - 0.63) ** 2 where Bk is the proportion of Black people by town.
  13. % lower status of the population.

데이터 준비하기

피쳐마다 스케일이 다를 경우 신경망이 학습시 문제가 됨.
피쳐의 정규화가 필요하다.

정규화 공식:
Z = X-mean / std

1
2
3
4
5
6
7
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

모델 구성

예측이므로 마지막 네트워크는 선형으로 자유롭게 학습될 수 있도록 함.

1
2
3
4
5
6
7
8
9
10
11
12
from keras import models
from keras import layers

# 동일한 모델을 여러 번 생성할 것이므로 함수를 만들어 사용합니다
def build_model():
model = models.Sequential()
model.add(layers.Dense(64, activation='relu',
input_shape=(train_data.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model

K-fold 검증

데이터를 k개로 분할하여 k번 만큼 훈련과 검증데이터를 구성하여 평가하는 방법.
검증은 k개의 평균으로 평가함.

1
!nvidia-smi
Wed Jan  2 13:59:27 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.69                 Driver Version: 384.69                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  GeForce GTX 980 Ti  Off  | 00000000:01:00.0 Off |                  N/A |
| 20%   43C    P8    17W / 260W |   6076MiB /  6077MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 980 Ti  Off  | 00000000:02:00.0 Off |                  N/A |
| 20%   45C    P8    20W / 260W |   6058MiB /  6078MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID  Type  Process name                               Usage      |
|=============================================================================|
|    0      5484    C   ...nv/versions/3.5.3/envs/env_3.5/bin/python   231MiB |
|    0     19790    C   /home/nerlab/python3-devel/bin/python3        5832MiB |
|    1      5484    C   ...nv/versions/3.5.3/envs/env_3.5/bin/python   278MiB |
|    1     19790    C   /home/nerlab/python3-devel/bin/python3        5767MiB |
+-----------------------------------------------------------------------------+
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import numpy as np

k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []
for i in range(k):
print('Fold : ', i)
# 검증 데이터 준비: k번째 분할
val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

# 훈련 데이터 준비: 다른 분할 전체
partial_train_data = np.concatenate(
[train_data[:i * num_val_samples],
train_data[(i + 1) * num_val_samples:]],
axis=0)
partial_train_targets = np.concatenate(
[train_targets[:i * num_val_samples],
train_targets[(i + 1) * num_val_samples:]],
axis=0)

# 케라스 모델 구성(컴파일 포함)
model = build_model()
# 모델 훈련(verbose=0 이므로 훈련 과정이 출력되지 않습니다)
model.fit(partial_train_data, partial_train_targets,
epochs=num_epochs, batch_size=1, verbose=0)
# 검증 세트로 모델 평가
val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
all_scores.append(val_mae)
Fold :  0



---------------------------------------------------------------------------

InternalError                             Traceback (most recent call last)

~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1326     try:
-> 1327       return fn(*args)
   1328     except errors.OpError as e:


~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1305                                    feed_dict, fetch_list, target_list,
-> 1306                                    status, run_metadata)
   1307 


~/.pyenv/versions/3.5.3/lib/python3.5/contextlib.py in __exit__(self, type, value, traceback)
     65             try:
---> 66                 next(self.gen)
     67             except StopIteration:


~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status()
    465           compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466           pywrap_tensorflow.TF_GetCode(status))
    467   finally:


InternalError: Blas GEMM launch failed : a.shape=(1, 13), b.shape=(13, 64), m=1, n=64, k=13
     [[Node: dense_1/MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](_arg_dense_1_input_0_0/_43, dense_1/kernel/read)]]


During handling of the above exception, another exception occurred:


InternalError                             Traceback (most recent call last)

<ipython-input-10-c7a92978087a> in <module>()
     25     # 모델 훈련(verbose=0 이므로 훈련 과정이 출력되지 않습니다)
     26     model.fit(partial_train_data, partial_train_targets,
---> 27               epochs=num_epochs, batch_size=1, verbose=0)
     28     # 검증 세트로 모델 평가
     29     val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)


~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
    961                               initial_epoch=initial_epoch,
    962                               steps_per_epoch=steps_per_epoch,
--> 963                               validation_steps=validation_steps)
    964 
    965     def evaluate(self, x=None, y=None,


~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
   1703                               initial_epoch=initial_epoch,
   1704                               steps_per_epoch=steps_per_epoch,
-> 1705                               validation_steps=validation_steps)
   1706 
   1707     def evaluate(self, x=None, y=None,


~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
   1233                         ins_batch[i] = ins_batch[i].toarray()
   1234 
-> 1235                     outs = f(ins_batch)
   1236                     if not isinstance(outs, list):
   1237                         outs = [outs]


~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   2476         session = get_session()
   2477         updated = session.run(fetches=fetches, feed_dict=feed_dict,
-> 2478                               **self.session_kwargs)
   2479         return updated[:len(self.outputs)]
   2480 


~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    893     try:
    894       result = self._run(None, fetches, feed_dict, options_ptr,
--> 895                          run_metadata_ptr)
    896       if run_metadata:
    897         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)


~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1122     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1123       results = self._do_run(handle, final_targets, final_fetches,
-> 1124                              feed_dict_tensor, options, run_metadata)
   1125     else:
   1126       results = []


~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1319     if handle is None:
   1320       return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1321                            options, run_metadata)
   1322     else:
   1323       return self._do_call(_prun_fn, self._session, handle, feeds, fetches)


~/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1338         except KeyError:
   1339           pass
-> 1340       raise type(e)(node_def, op, message)
   1341 
   1342   def _extend_graph(self):


InternalError: Blas GEMM launch failed : a.shape=(1, 13), b.shape=(13, 64), m=1, n=64, k=13
     [[Node: dense_1/MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](_arg_dense_1_input_0_0/_43, dense_1/kernel/read)]]

Caused by op 'dense_1/MatMul', defined at:
  File "/home/skc8957/.pyenv/versions/3.5.3/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/skc8957/.pyenv/versions/3.5.3/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 497, in start
    self.io_loop.start()
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/skc8957/.pyenv/versions/3.5.3/lib/python3.5/asyncio/base_events.py", line 421, in run_forever
    self._run_once()
  File "/home/skc8957/.pyenv/versions/3.5.3/lib/python3.5/asyncio/base_events.py", line 1424, in _run_once
    handle._run()
  File "/home/skc8957/.pyenv/versions/3.5.3/lib/python3.5/asyncio/events.py", line 126, in _run
    self._callback(*self._args)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 122, in _handle_events
    handler_func(fileobj, events)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-c7a92978087a>", line 24, in <module>
    model = build_model()
  File "<ipython-input-9-d9f7195d2712>", line 8, in build_model
    input_shape=(train_data.shape[1],)))
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/keras/models.py", line 467, in add
    layer(x)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/keras/engine/topology.py", line 619, in __call__
    output = self.call(inputs, **kwargs)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/keras/layers/core.py", line 855, in call
    output = K.dot(inputs, self.kernel)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 1075, in dot
    out = tf.matmul(x, y)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 1844, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py", line 1289, in _mat_mul
    transpose_b=transpose_b, name=name)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/skc8957/.pyenv/versions/3.5.3/envs/env_3.5/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InternalError (see above for traceback): Blas GEMM launch failed : a.shape=(1, 13), b.shape=(13, 64), m=1, n=64, k=13
     [[Node: dense_1/MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](_arg_dense_1_input_0_0/_43, dense_1/kernel/read)]]

GPU 장비 관련 이슈가 있어 글로만 학습

1
all_scores
1
np.mean(all_scores)
1
2
3
4
5

from keras import backend as K

# 메모리 해제
K.clear_session()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
um_epochs = 500
all_mae_histories = []
for i in range(k):
print('처리중인 폴드 #', i)
# 검증 데이터 준비: k번째 분할
val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

# 훈련 데이터 준비: 다른 분할 전체
partial_train_data = np.concatenate(
[train_data[:i * num_val_samples],
train_data[(i + 1) * num_val_samples:]],
axis=0)
partial_train_targets = np.concatenate(
[train_targets[:i * num_val_samples],
train_targets[(i + 1) * num_val_samples:]],
axis=0)

# 케라스 모델 구성(컴파일 포함)
model = build_model()
# 모델 훈련(verbose=0 이므로 훈련 과정이 출력되지 않습니다)
history = model.fit(partial_train_data, partial_train_targets,
validation_data=(val_data, val_targets),
epochs=num_epochs, batch_size=1, verbose=0)
mae_history = history.history['val_mean_absolute_error']
all_mae_histories.append(mae_history)
1
2
average_mae_history = [
np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]

그래프 그리기

1
2
3
4
5
6
7
8
9
10
import matplotlib.pyplot as plt


plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()



그래프가 보기힘든.. 다른걸보도록하자

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
def smooth_curve(points, factor=0.9):
smoothed_points = []
for point in points:
if smoothed_points:
previous = smoothed_points[-1]
smoothed_points.append(previous * factor + point * (1 - factor))
else:
smoothed_points.append(point)
return smoothed_points

smooth_mae_history = smooth_curve(average_mae_history[10:])

plt.plot(range(1, len(smooth_mae_history) + 1), smooth_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()

80번째 애폭 이후 줄어드는것을 멈춤.

회귀는 분류에서 사용했던 것과는 다른 손실 함수를 사용.
평균 제곱 오차(MSE)는 회귀에서 자주 사용되는 손실 함수.
비슷하게 회귀에서 사용되는 평가 지표는 분류와 다름.

당연히 정확도 개념은 회귀에 적용되지 않음.

일반적인 회귀 지표는 평균 절대 오차(MAE)입니다.
입력 데이터의 특성이 서로 다른 범위를 가지면 전처리 단계에서 각 특성을 개별적으로 스케일 조정해야 합니다.
가용한 데이터가 적다면 K-겹 검증을 사용하는 것이 신뢰할 수 있는 모델 신뢰있게 평가 방법입니다.
가용한 훈련 데이터가 적다면 과대적합을 피하기 위해 은닉층의 수를 줄인 모델이 좋습니다(일반적으로 하나 또는 두 개).

이과정을 마치며..

  • 이진 분류
  • 단일 레이블 다중 분류
  • 스칼라 회귀
1
!git status
fatal: (현재 폴더 또는 상위 폴더가) 깃 저장소가 아닙니다: .git
Author: NaYa[Na]
Link: http://yoursite.com/2019/01/13/3장-주택가격/
Copyright Notice: All articles in this blog are licensed under naya's_blog unless stating additionally.