Deep Learning Tutorials Logistic Regression編
Sponsored Links
皆さんこんにちわ
お元気ですか。私は元気です。
Theanoの使い方って難しいですよね。そして、Deep Learning Tutorialsも難しい。
そんな僕のメモ代わりに解説でもしていきたいなと思います。
間違ってたらご指摘ください。
今回はこちらのLogistic Regression(訳:ロジスティック回帰)を題材にします。
http://deeplearning.net/tutorial/logreg.html
The Model
重みと入力を掛けた値にバイアスパラメータであるbを足す。
最後にsoftmax関数を掛ける
softmax関数の式は
因みにクラスを推測するときは
を利用する。最も確率が高いクラスがそのクラスだと推測されるってことでしょう。
# generate symbolic variables for input (x and y represent a # minibatch) x = T.fmatrix('x') y = T.lvector('y') # b=バイアス、W=重み b = theano.shared(numpy.zeros((10,)), name='b') W = theano.shared(numpy.zeros((784, 10)), name='W') # W :重み # x :入力 # b :バイアス p_y_given_x = T.nnet.softmax(T.dot(x, W) + b) # 関数として定義 get_p_y_given_x = theano.function(inputs=[x], outputs=p_y_given_x) # ロジスティック回帰を実行する関数を実行し、表示 print 'Probability that x is of class %i is %f' % (i, get_p_y_given_x(x_value)[i]) # 実際の分類クラスを決定する関数 y_pred = T.argmax(p_y_given_x, axis=1) # functionとして定義 classify = theano.function(inputs=[x], outputs=y_pred)
Defining a Loss Function
損失関数lは以下のように定義されています
L = 最尤推定
#損失関数 loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y])
この損失関数を微分することによって、パラメータを更新します。
意味は正しい情報との差分ぐらいで考えたら十分だと思います。
これを勾配法といいます。
Learning the Model
勾配法を使用していますね。
更新はこんな感じ
lrate:学習率
損失lを微分した値から学習率(どの程度学習するのか)を掛けて、
元々の重みから引きます。
# 損失関数をそれぞれのパラメータで微分 g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # 関数を実行する度に重みやバイアスのパラメータを変更 updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # トレーニングモデル、損失関数を元に、更新を行う。 # outputs=実行する関数 # update = 更新するパラメータ。 # givens = 関数内のパラメータを変更する train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]})
Putting it All Together
うん、コピペして使えばどうにでもなるよ。
基本的に上記までの式でどうにかなります。
トレーニングのデータはMNISTを利用されております。
後半の学習部分はよくわからない箇所が多いのです。多分、学習法なのですが…。
読んでまた解説記事書きますね。
import cPickle import gzip import os import sys import time import numpy import theano import theano.tensor as T class LogisticRegression(object): """Multi-class Logistic Regression Class Logistic Legression Class """ def __init__(self, input, n_in, n_out): """ Initialize the parameters of the logistic regression :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # 重みW self.W = theano.shared(value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX), name='W', borrow=True) # バイアスb self.b = theano.shared(value=numpy.zeros((n_out,), dtype=theano.config.floatX), name='b', borrow=True) # ソフトマックス関数 self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) # predict self.y_pred = T.argmax(self.p_y_given_x, axis=1) # parameters of the model self.params = [self.W, self.b] def negative_log_likelihood(self, y): """損失を定義""" return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]) def errors(self, y): """エラー""" if y.ndim != self.y_pred.ndim: raise TypeError('y should have the same shape as self.y_pred', ('y', target.type, 'y_pred', self.y_pred.type)) # check if y is of the correct datatype if y.dtype.startswith('int'): # the T.neq operator returns a vector of 0s and 1s, where 1 # represents a mistake in prediction return T.mean(T.neq(self.y_pred, y)) else: raise NotImplementedError() def load_data(dataset): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' ############# # LOAD DATA # ############# # MNISTが存在しなければカレントディレクトリにダウンロード data_dir, data_file = os.path.split(dataset) if data_dir == "" and not os.path.isfile(dataset): # Check if dataset is in the data directory. new_path = os.path.join(os.path.split(__file__)[0], "..", "data", dataset) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': import urllib origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' print 'Downloading data from %s' % origin urllib.urlretrieve(origin, dataset) print '... loading data' # データをロードする。 f = gzip.open(dataset, 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() def shared_dataset(data_xy, borrow=True): """ 共有変数にする。 """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) return shared_x, T.cast(shared_y, 'int32') test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # バッチ処理を行う為の数を調整 n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # construct the logistic regression class、28,28はMNIST classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # コストを定義する関数 cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) # 微分 g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # アップデート updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # トレーニングモデル train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best' ' model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) if __name__ == '__main__': sgd_optimization_mnist()