LW Senior Architect

Coursera:Machine Learning by Andrew Ng(6) - SVM


SVM is mainly forcus on the classification for the samples which are very close to the classifier boundary, while LR focus on the global samples to make them far away from the classifier boundary. There are 2 kinds of SVM:

  • SVM without kernel(linear svm)
    • when the \(\Theta * X >= 0\), y = 1; otherwise, y = 0
    • classify the training set with linear line.
  • SVM with kernel(typically gaussian kernel)
    • used to classify the training set with nonlinear line
    • the key is to choose the best C(\(\frac{1}{\lambda}\)) and sigma(\(\delta\))

load the data and visualization

    %matplotlib inline
    
    import numpy as np
    import scipy.io # used to import mat data
    import matplotlib.pyplot as plt # used to plot the data to visualize
    from sklearn import svm # the svn lib

load the data

    # load the data
    filename = 'ex6/data/ex6data1.mat'
    mat = scipy.io.loadmat(filename)
    X = mat['X']
    y = mat['y']

visualization

    # visualization
    def plotData(myX, myY):
        """
        plot the data:myX with label in myY
        """
        plt.figure(figsize=(8,6)) # create a new figure
        myY = myY.flatten()
        pos = myY==1
        neg = myY==0
        posX = myX[pos]
        negX = myX[neg]
        plt.plot(posX[:,0],posX[:,1],'k+')
        plt.plot(negX[:,0],negX[:,1],'yo')
    
    plotData(X,y)     

png

SVM without kernel to classify the data

SVM without kernel means it is a SVM with Linear

use SVM.svc to train

  • with C=1.0
    train = svm.SVC(C=1.0,kernel='linear')
    train.fit(X,y.flatten())
    print train

Here is the description of the model:

    SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
      decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
      max_iter=-1, probability=False, random_state=None, shrinking=True,
      tol=0.001, verbose=False)

plot the boundary

    def plotBoundary(train,X):
        b = train.intercept_[0] # the bias
        theta = train.coef_[0] # the weight
        #print type(theta),theta.shape
        x1 = np.insert(X[:,:-1],0,1,axis=1)
        theta = np.insert(theta,0,b)
        #print theta
        x2 = np.dot(x1, theta[:-1].T) / -theta[-1]
        #print x2[:10],x2.shape,x1.shape
        plt.plot(x1[:,1],x2,'b-')
    
    plotData(X,y)
    plotBoundary(train,X)

png

  • C=10.0
    train = svm.SVC(C=100,kernel='linear')
    train.fit(X,y.flatten())
    
    plotData(X,y)
    plotBoundary(train,X)

png

    def plotBoundary2(mySVM, xmin, xmax, ymin, ymax):
        """
        Function to plot the decision boundary for a trained SVM with 2-n sample data
        """
        xvals = np.linspace(xmin, xmax,100)
        yvals = np.linspace(ymin, ymax,100)
        zvals = np.zeros((len(xvals),len(yvals)))
        for i in xrange(len(xvals)):
            for j in xrange(len(yvals)):
                zvals[i][j] = float(mySVM.predict(np.array([[xvals[i],yvals[j]]]))[0])
        zvals = zvals.transpose()
        u, v = np.meshgrid( xvals, yvals )
        plt.contour(xvals,yvals,zvals,[0])
    plotData(X,y)
    plotBoundary2(train,0,4.5,1.5,5)

png

Gaussian Kernels

SVM without kernel is a linear classifier, but when we want to have a nonlinear classifier, we need to make use of kernel to help

the formula

    def gaussianKernel(x1,x2, delta):
        """
        function used to compute the gaussian possibility
        """
        t = x1 - x2
        return np.exp(-np.dot(t.T,t) / (2.0 * np.power(delta,2)))
    
    x1 = np.array([1, 2, 1])
    x2 = np.array([0, 4, -1])
    sigma = 2
    
    print "the expected result should be:0.324652, and the real output is:%0.6f" % gaussianKernel(x1,x2,sigma)

here we come to the following result:

the expected result should be:0.324652, and the real output is:0.324652

load the data and visualization

    filename = 'ex6/data/ex6data2.mat'
    mat = scipy.io.loadmat(filename)
    X = mat['X']
    y = mat['y']

    plotData(X,y)

png

use rbf to train the model

    C,sigma = 1,0.1
    train = svm.SVC(C=C,kernel='rbf',gamma=100.0)
    train.fit(X,y.flatten())
    print train

here is the description of the model:

    SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
      decision_function_shape=None, degree=3, gamma=100.0, kernel='rbf',
      max_iter=-1, probability=False, random_state=None, shrinking=True,
      tol=0.001, verbose=False)
    plotData(X,y)
    plotBoundary2(train,0,1.0,0.4,1.0)

png

Cross-validation to choose the best C and sigma

load the data3

    filename = 'ex6/data/ex6data3.mat'
    mat = scipy.io.loadmat(filename)
    X = mat['X']
    y = mat['y']
    Xval = mat['Xval']
    yval = mat['yval']

choose C and sigma

    def evaluateSVM(X,y,Xval,yval):
        """
        function used to compute the best C and sigma for the SVC model using parameter:<X,y> as training set,
        and <Xval, yval> as the validation set
        """
        T = [0.01,0.03,0.1,0.3,1,3,10,30] # this is used for the candidate number for both C and sigma
        score= 0.0
        C,gamma=0.0,0.0
        
        for c in T:
            for delta in T:
                sigma = np.power(delta,-2.0)
                model = svm.SVC(C=c,kernel='rbf',gamma=sigma)
                model.fit(X,y.flatten())
                
                # predict
                ypred = model.predict(Xval)
                sc = float(np.sum(yval.flatten()==ypred)) / len(yval)
                if (sc > score):
                    score = sc
                    #print score
                    C = c
                    gamma = sigma
        
        return C,gamma
                    
    C,gamma = evaluateSVM(X,y,Xval,yval)
    print "Best C=%f, Best gamma=%f" % (C,gamma)

Now, we come to result:

Best C=0.300000, Best gamma=100.000000

    model = svm.SVC(C=C,kernel='rbf',gamma=gamma)
    model.fit(X,y.flatten())
    plotData(X,y)
    plotBoundary2(model,-0.5,0.3,-0.8,0.6)

png


Comments