#kbo_svm.py # # by Joe Hahn, jhahn@spacescience.org, 23 January 2014. # # Train the SVM algorithm to classify KBO orbits. #to execute in ipython: > ipython --pylab In [1]: %run kbo_svm.py #import modules used below import numpy as np import pandas as pd import matplotlib.pyplot as plt from plot_kbo import * import sklearn as sk #read kbo dataframe plus orbit element ranges and plot kb = pd.read_pickle('kb.pkl') npl = np.load('select_kbos.npz') a_neptune = npl['a_neptune'] a_rng = npl['a_rng'] e_rng = npl['e_rng'] i_rng = npl['i_rng'] q_rng = npl['q_rng'] clsses = npl['clsses'] clss_values = npl['clss_values'] ax1, ax2, ax3 = plot_kbo(kb, a_neptune, a_rng, e_rng, i_rng, q_rng) #extract training data from kb x_train = kb[['a', 'e', 'Incl', 'q']].values[kb.clss.values != 'None'] y_train = kb['clss_value'].values[kb.clss.values != 'None'] #accuracy of linear fit to training data from sklearn.svm import SVC clf = SVC(kernel='linear') clf.fit(x_train, y_train) print 'accuracy of linear fit to training data = ', clf.score(x_train, y_train) #accuracy of rbf fit to training data from sklearn.svm import SVC clf = SVC(kernel='rbf') clf.fit(x_train, y_train) print 'accuracy of rbf fit to training data = ', clf.score(x_train, y_train) #accuracy of polynomial fit to training data...use this one! from sklearn.svm import SVC clf = SVC(kernel='poly', degree=3) clf.fit(x_train, y_train) print 'accuracy of degree=3 polynomial fit to training data = ', clf.score(x_train, y_train) #apply the fitted SVM classifier to all the kbo data, and insert classifications into new columns in the kb dataframe x = kb[['a', 'e', 'Incl', 'q']] y = clf.predict(x) kb['clss_value_pred'] = 0 kb['clss_pred'] = '' for j, yj in enumerate(y): kb.clss_value_pred.values[j] = yj kb.clss_pred.values[j] = clsses[clss_values == yj] #this plot shows how SVM classified all KBOs clr = ['burlywood', 'gray', 'green', 'yellow', 'purple', 'orange', 'cyan', 'magenta', 'black', 'red', 'blue'] for j in range(1, len(clsses)): a = kb.a[kb.clss_pred == clsses[j]] e = kb.e[kb.clss_pred == clsses[j]] Incl = kb.Incl[kb.clss_pred == clsses[j]] q = kb.q[kb.clss_pred == clsses[j]] #ax1, ax2, ax3 = plot_kbo(kb, a_neptune, a_rng, e_rng, i_rng, q_rng) ax1.plot(a, e, marker='o', markersize=4, linestyle='None', markeredgewidth=1.0, color=clr[j], label=clsses[j]) ax2.plot(a, Incl, marker='o', markersize=4, linestyle='None', markeredgewidth=1.0, color=clr[j]) ax3.plot(a, q, marker='o', markersize=4, linestyle='None', markeredgewidth=1.0, color=clr[j]) plt.show(block=False) plt.draw() plt.savefig('kbo_svm.png', dpi=200) print 'total number of kbos in dataset = ', len(kb) print 'total number of kbos in training set = ', len(kb[kb.clss != 'None']) print 'number of misclassifications = ', 35