{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#np.random.seed(7)\n", "plt.set_cmap('jet')\n", "\n", "#Create the data from Task 1\n", "n=40\n", "nhalf = int(n/2)\n", "\n", "train_a = np.random.exponential(0.25,(nhalf, 2))\n", "train_b = np.random.exponential(2,(nhalf, 2))\n", "\n", "plt.scatter(train_a[:,0],train_a[:,1])\n", "plt.scatter(train_b[:,0],train_b[:,1])\n", "plt.show()\n", "\n", "inputData = np.append(train_a,train_b,axis=0)\n", "inputLabels = np.append(-1*np.ones(nhalf),np.ones(nhalf))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "thresh = 10e-12\n", "\n", "def evalF(beta,t,x,y,b,kernel=None):\n", " #Evaluates the function $f = \\sum_{i=1}^n \\beta_i \\mathbf{y}_i \\langle \\mathbf{t}, \\mathbf{x}_i \\rangle + b\n", " if kernel is None:\n", " innerProducts = np.matmul(a=x,b=t)\n", " else:\n", " innerProducts = kernel(x,np.transpose(t))\n", " \n", " betay = np.multiply(beta,y)\n", " summer = np.matmul(np.transpose(innerProducts),betay)\n", " return summer + b * np.ones(summer.shape)\n", "\n", "# def computeEs(beta,x,y,b,C):\n", "# #Computes the discrepancy values eh and el which can be used to estimate b\n", " \n", "# #Get the updated values of f\n", "# fvals = evalF(beta=beta,t=np.transpose(x),x=x,y=y,b=b)\n", "# #Determine Index sets\n", "# I0 = (beta > thresh) & (beta < C - thresh)\n", "# I0P = (beta <= thresh) & (y == 1)\n", "# I0N = (beta <= thresh) & (y == -1)\n", "# ICP = (beta >= C - thresh) & (y == 1)\n", "# ICN = (beta >= C - thresh) & (y == -1)\n", "# #Determine max and mins\n", "# ehInd = np.argmin(fvals[I0 | I0P | ICN] - y[I0 | I0P | ICN])\n", "# elInd = np.argmax(fvals[I0 | I0N | ICP] - y[I0 | I0N | ICP])\n", "# eh = fvals[ehInd] - y[ehInd]\n", "# el = fvals[elInd] - y[elInd]\n", "# return (eh,el)\n", "\n", "def OneStep(beta, x, y, i, j, b, C,kernel=None):\n", " #One step of the SMO algorithm for indices i and j\n", " #with coefficients beta and data $(x_i,y_i)$\n", " if i == j:\n", " return (beta,b)\n", "\n", " delta = np.asscalar(y[i] * ((evalF(beta=beta,x=x,t=x[j,:],y=y,b=b,kernel=kernel) - y[j]) - (evalF(beta=beta,x=x,t=x[i,:],y=y,b=b,kernel=kernel) - y[i])))\n", " s = np.asscalar(y[i] * y[j])\n", " if kernel is None:\n", " chi = np.asscalar(np.dot(x[i,:],x[i,:]) + np.dot(x[j,:],x[j,:]) - 2* np.dot(x[j,:],x[i,:]))\n", " else:\n", " chi = np.asscalar(kernel(x[i,:],x[i,:]) + kernel(x[j,:],x[j,:]) - 2* kernel(x[j,:],x[i,:]) ) \n", " gamma = np.asscalar(s * beta[i] + beta[j])\n", " \n", " #Determine Upper and Lower bounds on the coefficients\n", " L = 0\n", " H = 0\n", " if (s > 0):\n", " L = max(0,gamma - C)\n", " H = min(gamma,C)\n", " else:\n", " L = max(0,-gamma)\n", " H = min(C,C-gamma)\n", " if L > H - thresh:\n", " return (beta,b)\n", " \n", " #Determine new coefficients \n", " betatmp = 0\n", " if (chi > thresh):\n", " betatmp = np.asscalar(beta[i] + delta/chi)\n", " elif (delta > 0):\n", " betatmp = L\n", " else:\n", " betatmp = H\n", " \n", " betai_old = beta[i]\n", " betaj_old = beta[j]\n", " beta[i] = min(max(betatmp,L),H)\n", " beta[j] = gamma - s * beta[i]\n", " \n", " #Update b \n", " #(eh,el) = computeEs(beta=beta,x=x,y=y,b=b,C=C)\n", " #b = b - 0.5 * (eh + el)\n", " \n", " #Update f\n", " fi = evalF(beta=beta,x=x,t=x[i,:],y=y,b=b,kernel=kernel)\n", " fj = evalF(beta=beta,x=x,t=x[j,:],y=y,b=b,kernel=kernel)\n", " #Update b \n", " b = b - 0.5 * (fi - y[i] + fj - y[j])\n", " \n", " return (beta,b)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def HeavisideThresh(x):\n", " if x <= thresh:\n", " return 0\n", " else:\n", " return 1\n", " \n", "def KKT(betai,yi,fi,C):\n", " #return HeavisideThresh(betai) * max(0,yi*fi - 1) + HeavisideThresh(C - betai) * max(0,1 - yi*fi)\n", " return betai * max(0,yi*fi - 1) + (C - betai) * max(0,1 - yi*fi)\n", "\n", "def SMO(x,y,C,rand=True,maxStep=10000,kernel=None):\n", "\n", " #Calculates the sequential minimal optimization solution for data (x_i,y_i) and\n", " #upper coefficient bound C (C-SVM)\n", " itStep = 0\n", " #Sanity-Check\n", " if (x.shape[0] != y.shape[0]):\n", " return -np.inf\n", " \n", " beta = np.zeros(y.shape[0])\n", " b = 0\n", " i = 0\n", " j = 0\n", " if rand:\n", " #Choose i and j randomly\n", " while (itStep < maxStep):\n", " \n", " ij = np.random.choice(x.shape[0], 2)\n", " i = np.asscalar(ij[0])\n", " j = np.asscalar(ij[1])\n", " (beta, b) = OneStep(beta=beta, x=x, y=y, i=i, j=j, b=b, C=C, kernel=kernel)\n", " itStep += 1\n", " else:\n", " #Loop over i and check KKT conditions\n", " while (itStep < maxStep):\n", " fvals = evalF(beta=beta,t=np.transpose(x),x=x,y=y,b=b,kernel=kernel)\n", " KKTbreak = True\n", " for i in range(x.shape[0]):\n", " if (KKT(betai = np.asscalar(beta[i]),yi = np.asscalar(y[i]), fi = np.asscalar(fvals[i]), C = C) > thresh):\n", " KKTbreak = False\n", " I0 = np.argwhere(np.all([beta < C - thresh, beta > thresh],axis=0)).flatten()\n", " if ((I0.size == 0) or ((I0.size == 1) and (I0[0] == i))):\n", " I0 = np.arange(beta.shape[0])\n", " #for j in I0:\n", " # if (i != j):\n", " # (beta, b) = OneStep(beta=beta, x=x, y=y, i=i, j=j, b=b, C=C, kernel=kernel)\n", " # itStep += 1\n", " j = i\n", " while (j == i):\n", " j = np.random.choice(I0)\n", " (beta, b) = OneStep(beta=beta, x=x, y=y, i=i, j=j, b=b, C=C, kernel=kernel)\n", " itStep += 1\n", " fvals = evalF(beta=beta,t=np.transpose(x),x=x,y=y,b=b,kernel=kernel)\n", " if KKTbreak:\n", " break\n", " \n", " #Do a final update of b but this time using all support vectors instead of just the two vectors in the oneStep function\n", " mask = (beta > thresh)\n", " if (np.any(mask)):\n", " fsv = evalF(beta=beta,t=np.transpose(x[mask,:]),x=x,y=y,b=b,kernel=kernel)\n", " El = fsv - y[mask]\n", " b = b - np.median(El)\n", "\n", " return (beta,b)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#LLS code\n", "def buildDataMatrix(inputData):\n", " augData = np.concatenate((np.ones((inputData.shape[0],1)), inputData), axis=1)\n", " return augData\n", "\n", "def LLS(inputData,inputLabels):\n", " #Linear least squares algorithm\n", " augData = buildDataMatrix(inputData=inputData)\n", "\n", " #Build equation system\n", " LabelsTrafo = np.dot(augData.T,inputLabels)\n", " InnerProdMat = np.dot(augData.T,augData)\n", " \n", " #Solve\n", " alpha = np.linalg.solve(InnerProdMat, LabelsTrafo)\n", " return alpha\n", "\n", "def PlotSeperatorLLS(value,inputData,inputLabels):\n", " #This plots the hyperplane alpha_0 + alpha_1 x_1 + ... + alpha_d x_d = value\n", " \n", " samplenum = 1000\n", " minx = np.amin(inputData[:,0])\n", " maxx = np.amax(inputData[:,0])\n", " miny = np.amin(inputData[:,1])\n", " maxy = np.amax(inputData[:,1])\n", " xrange = np.arange(minx-1, maxx+1, (maxx-minx+2)/samplenum)\n", " yrange = np.arange(miny-1, maxy+1, (maxy-miny+2)/samplenum)\n", " X, Y = np.meshgrid(xrange,yrange)\n", " alpha = LLS(inputData=inputData,inputLabels=inputLabels)\n", " Z = alpha[0] * np.ones(X.shape) + alpha[1] * X + alpha[2]*Y\n", " Z = np.where(Z > value, 1, -1)\n", " \n", " plt.xlim(minx - 1, maxx + 1)\n", " plt.ylim(miny - 1, maxy + 1)\n", " #plt.contour(X, Y, Z, alpha=0.2,linestyles='dashed',linewidths=2)\n", " plt.contourf(X, Y, Z, alpha=0.2)\n", "\n", " return alpha" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def PlotSeperatorSVM(value,inputData,inputLabels,C=1,featureMap=None,kernel=None):\n", " #This plots the hyperplane alpha_0 + alpha_1 x_1 + ... + alpha_d x_d = value\n", " \n", " if (featureMap is not None) and (kernel is not None):\n", " print(\"You have provided a featureMap and a kernel. This does not work!\")\n", " return(-np.inf,-np.inf)\n", " \n", " if featureMap is None:\n", " features = inputData\n", " else:\n", " features = featureMap(inputData)\n", " \n", " samplenum = 1000\n", " minx = np.amin(inputData[:,0])\n", " maxx = np.amax(inputData[:,0])\n", " miny = np.amin(inputData[:,1])\n", " maxy = np.amax(inputData[:,1])\n", " xrange = np.arange(minx-1, maxx+1, (maxx-minx+2)/samplenum)\n", " yrange = np.arange(miny-1, maxy+1, (maxy-miny+2)/samplenum)\n", " X, Y = np.meshgrid(xrange,yrange)\n", " inpArray = np.array([X.flatten(),Y.flatten()]).T\n", " (beta,b) = SMO(x=features,y=inputLabels,C=C,rand=False,maxStep=10000,kernel=kernel)\n", " if featureMap is None:\n", " testGrid = inpArray\n", " else:\n", " testGrid = featureMap(inpArray)\n", " Z = evalF(beta=beta,t=np.transpose(testGrid),x=features,y=inputLabels,b=b,kernel=kernel)\n", " \n", " Z = np.where(Z > value, 1, -1)\n", " Z = np.reshape(Z,X.shape)\n", " \n", " plt.xlim(minx - 1, maxx + 1)\n", " plt.ylim(miny - 1, maxy + 1)\n", " #plt.contour(X, Y, Z, alpha=0.2, linestyles='solid', linewidths=2)\n", " plt.contourf(X, Y, Z, alpha=0.2)\n", " \n", " for c in np.unique(inputLabels):\n", " mask = inputLabels == c\n", " plt.scatter(inputData[mask,0], inputData[mask,1],label='Class ' + str(int(c)))\n", " mask = np.all([beta > thresh, beta < C - thresh], axis=0)\n", " plt.scatter(inputData[beta > thresh,0], inputData[beta > thresh,1],label='Support Vector',alpha=0.3,marker='D',color='black')\n", " plt.scatter(inputData[mask,0], inputData[mask,1],label='Margin Vectors',alpha=1,marker='x',color='black')\n", " plt.xlabel(r'$t_1$')\n", " plt.ylabel(r'$t_2$')\n", " plt.title('SVM Classifier for C=' + str(C))\n", " plt.legend()\n", " plt.show()\n", " plt.gcf().clear()\n", " #print('Coefficients: ' + str(beta))\n", " print('#Support Vectors: ' + str((beta > thresh).sum()))\n", " print('#Margin Vectors: ' + str(mask.sum()))\n", " return (beta,b)\n", "\n", "#Plot the LLS fit\n", "alpha = PlotSeperatorLLS(value=0,inputData=inputData,inputLabels=inputLabels)\n", "for c in np.unique(inputLabels):\n", " mask = inputLabels == c\n", " plt.scatter(inputData[mask,0], inputData[mask,1],label='Class ' + str(int(c)))\n", "plt.xlabel(r'$t_1$')\n", "plt.ylabel(r'$t_2$')\n", "plt.title('LLS Classifier')\n", "plt.legend()\n", "plt.show()\n", "plt.gcf().clear()\n", "\n", "CoeffsSVM = dict()\n", " \n", "#Plot the SVM fit for different C\n", "for C in [0.01,1,100]:\n", " (beta,b) = PlotSeperatorSVM(value=0,inputData=inputData,inputLabels=inputLabels,C=C)\n", " CoeffsSVM[C] = (beta,b)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Create test data\n", "ntest = 2000\n", "ntesthalf = int(ntest/2)\n", "\n", "test_a = np.random.exponential(0.25,(ntesthalf, 2))\n", "test_b = np.random.exponential(2,(ntesthalf, 2))\n", "\n", "# plt.scatter(test_a[:,0],test_a[:,1])\n", "# plt.scatter(test_b[:,0],test_b[:,1])\n", "# plt.show()\n", "\n", "testData = np.append(test_a,test_b,axis=0)\n", "testLabels = np.append(-1*np.ones(ntesthalf),np.ones(ntesthalf))\n", "\n", "#Evaluate learned models on TestData \n", "testValuesLLS = alpha[0] * np.ones(testData.shape[0]) + alpha[1] * testData[:,0] + alpha[2]* testData[:,1]\n", "classRateLLS = (np.multiply(testLabels,testValuesLLS) > 0).sum() / ntest\n", "print(\"Classification Rate for LLS: \" + str(classRateLLS * 100) + \"%\")\n", "\n", "for (C, params) in CoeffsSVM.items():\n", " testValuesSVM = evalF(beta=params[0],t=np.transpose(testData),x=inputData,y=inputLabels,b=params[1])\n", " classRateSVM = (np.multiply(testLabels,testValuesSVM) > 0).sum() / ntest\n", " print(\"Classification Rate for SVM with C = \" + str(C) + \": \" + str(classRateSVM * 100) + \"%\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Create the data from Task 2.5\n", "n=100\n", "nhalf = int(n/2)\n", "\n", "train_a = np.array([]).reshape(0,2)\n", "train_b = np.array([]).reshape(0,2)\n", "\n", "while(train_a.shape[0] < 50):\n", " tmp = np.random.uniform(-1,1,(1,2))\n", " if np.linalg.norm(tmp) < 1:\n", " train_a = np.append(train_a,tmp,axis=0)\n", " \n", "while(train_b.shape[0] < 50):\n", " tmp = np.random.uniform(-2,2,(1,2))\n", " if (np.linalg.norm(tmp) < 2) and (np.linalg.norm(tmp) > 1):\n", " train_b = np.append(train_b,tmp,axis=0)\n", "\n", "plt.scatter(train_a[:,0],train_a[:,1])\n", "plt.scatter(train_b[:,0],train_b[:,1])\n", "plt.show()\n", "\n", "inputData = np.append(train_a,train_b,axis=0)\n", "inputLabels = np.append(-1*np.ones(nhalf),np.ones(nhalf))\n", "\n", "#Fit a linear SVM\n", "print(\"Without Feature Map: \")\n", "(_,_) = PlotSeperatorSVM(value=0,inputData=inputData,inputLabels=inputLabels,C=10)\n", "\n", "#Fit a nonlinear SVM with feature map $\\phi(t) = (t_1, t_2, t_1^2 + t_2^2)\n", "def sampleFeatureMap(t):\n", " if (t.shape[1] != 2):\n", " return -np.inf\n", " #return np.transpose(np.array([t[:,0], t[:,1], t[:,0]*t[:,0] + t[:,1]*t[:,1]]))\n", " return np.transpose(np.array([t[:,0]*t[:,0] + t[:,1]*t[:,1]]))\n", "\n", "print(\"\\nWith quadratic feature map: \")\n", "(_,_) = PlotSeperatorSVM(value=0,inputData=inputData,inputLabels=inputLabels,C=10,featureMap = sampleFeatureMap)\n", "\n", "#Fit nonlinear SVM with Gaussian kernel\n", "import scipy.spatial.distance as dist\n", "\n", "def gaussKernel(x,t,sigma=1):\n", " pairDist = dist.cdist(XA=np.atleast_2d(x), XB=np.atleast_2d(t), metric='euclidean')\n", " return np.exp(- np.square(pairDist)/2*np.square(sigma))\n", "\n", "print(\"\\nWith Gaussian kernel: \")\n", "(_,_) = PlotSeperatorSVM(value=0,inputData=inputData,inputLabels=inputLabels,C=10,kernel = gaussKernel)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Load MNIST Data\n", "import os\n", "import gzip\n", "from urllib.request import urlretrieve\n", "\n", "def download(filename, source='http://yann.lecun.com/exdb/mnist/'):\n", " print(\"Downloading %s\" % filename)\n", " urlretrieve(source + filename, filename)\n", "\n", "# We define functions for loading MNIST images and labels.\n", "def load_mnist_images(filename):\n", " if not os.path.exists(filename):\n", " download(filename)\n", "\n", " with gzip.open(filename, 'rb') as f:\n", " data = np.frombuffer(f.read(), np.uint8, offset=16)\n", " # The inputs are vectors, we reshape them to monochrome 2D images,\n", " # following the shape convention: (examples, rows, columns)\n", " data = data.reshape(-1, 28, 28)\n", " # The inputs come as bytes, we convert them to float32 in range [0,1].\n", " return data / np.float32(256)\n", "\n", "def load_mnist_labels(filename):\n", " if not os.path.exists(filename):\n", " download(filename)\n", "\n", " with gzip.open(filename, 'rb') as f:\n", " data = np.frombuffer(f.read(), np.uint8, offset=8)\n", " return data\n", "\n", "X_train = load_mnist_images('train-images-idx3-ubyte.gz')\n", "y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')\n", "X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')\n", "y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')\n", "\n", "print(\"First four training labels: \" + str(y_train[0:4]))\n", "print(\"First four training images: \")\n", "for i in range(4):\n", " plt.subplot(1,4,i+1)\n", " plt.imshow(X_train[i,:,:], cmap=plt.cm.gray_r, interpolation='nearest')\n", " plt.axis('off')\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn import svm, metrics\n", "from sklearn.model_selection import GridSearchCV\n", "\n", "#Reshape to get a data matrix for learning\n", "X_train = X_train.reshape(-1,28*28)\n", "X_test = X_test.reshape(-1,28*28)\n", "\n", "#Shuffle training data by random permutation\n", "perm = np.random.permutation(X_train.shape[0])\n", "\n", "param_grid = [{\"C\": [1, 10, 100], \"gamma\": [0.1, 0.01, 0.001], \"kernel\": [\"rbf\"]}]\n", "\n", "svmclass = GridSearchCV(estimator=svm.SVC(), param_grid=param_grid, cv=5, scoring=\"accuracy\", refit=False)\n", "svmclass.fit(X_train[perm[:500]], y_train[perm[:500]])\n", "\n", "for mean, params in zip(svmclass.cv_results_['mean_test_score'], svmclass.cv_results_['params']):\n", " print(\"Accuracy: %0.2f%% for parameters %r\" % (mean*100, params))\n", "print(\"\\nOptimal parameter set determined by CV: \" + str(svmclass.best_params_) + \"\\n\")\n", "\n", "print(\"Relearning on 2000 random training points for optimal CV parameters.\")\n", "bestsvmclass = svm.SVC(**svmclass.best_params_)\n", "perm = np.random.permutation(X_train.shape[0])\n", "bestsvmclass.fit(X_train[perm[0:2000]], y_train[perm[0:2000]])\n", "pred = bestsvmclass.predict(X_test)\n", "print(\"Confusion matrix: \")\n", "print(metrics.confusion_matrix(y_test, pred))\n", "print(\"Accuracy: \" + str(100*metrics.accuracy_score(y_test, pred)) + \"%\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Just for picture generation stuff\n", "\n", "np.random.seed(12)\n", "mean_a = np.array([0, 0])\n", "mean_b = np.array([5, 0])\n", "cov = np.eye(2,2)\n", "\n", "a = np.random.multivariate_normal(mean_a,cov,10)\n", "b = np.random.multivariate_normal(mean_b,2*cov,10)\n", "inputData = np.append(a,b,axis=0)\n", "inputLabels = np.append(-1*np.ones(a.shape[0]),np.ones(b.shape[0]))\n", "\n", "_ = PlotSeperatorLLS(value=0,inputData=inputData,inputLabels=inputLabels)\n", "plt.scatter(a[:,0],a[:,1],c='tab:blue')\n", "plt.scatter(b[:,0],b[:,1],c='tab:orange')\n", "(_,_) = PlotSeperatorSVM(value=0,inputData=inputData,inputLabels=inputLabels,C=10000000)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }