#kbo_svm.py
#
#  by Joe Hahn, jhahn@spacescience.org, 23 January 2014.
#
#  Train the SVM algorithm to classify KBO orbits.

#to execute in ipython:		> ipython --pylab	In [1]: %run kbo_svm.py

#import modules used below
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from plot_kbo import * 
import sklearn as sk

#read kbo dataframe plus orbit element ranges and plot
kb = pd.read_pickle('kb.pkl')
npl = np.load('select_kbos.npz')
a_neptune = npl['a_neptune']
a_rng = npl['a_rng']
e_rng = npl['e_rng']
i_rng = npl['i_rng']
q_rng = npl['q_rng']
clsses = npl['clsses']
clss_values = npl['clss_values']
ax1, ax2, ax3 = plot_kbo(kb, a_neptune, a_rng, e_rng, i_rng, q_rng)

#extract training data from kb
x_train = kb[['a', 'e', 'Incl', 'q']].values[kb.clss.values != 'None']
y_train = kb['clss_value'].values[kb.clss.values != 'None']

#accuracy of linear fit to training data
from sklearn.svm import SVC
clf = SVC(kernel='linear')
clf.fit(x_train, y_train)
print 'accuracy of linear fit to training data = ', clf.score(x_train, y_train)

#accuracy of rbf fit to training data
from sklearn.svm import SVC
clf = SVC(kernel='rbf')
clf.fit(x_train, y_train)
print 'accuracy of rbf fit to training data = ', clf.score(x_train, y_train)

#accuracy of polynomial fit to training data...use this one!
from sklearn.svm import SVC
clf = SVC(kernel='poly', degree=3)
clf.fit(x_train, y_train)
print 'accuracy of degree=3 polynomial fit to training data = ', clf.score(x_train, y_train)

#apply the fitted SVM classifier to all the kbo data, and insert classifications into new columns in the kb dataframe
x = kb[['a', 'e', 'Incl', 'q']]
y = clf.predict(x)
kb['clss_value_pred'] = 0
kb['clss_pred'] = ''
for j, yj in enumerate(y):
    kb.clss_value_pred.values[j] = yj
    kb.clss_pred.values[j] = clsses[clss_values == yj]

#this plot shows how SVM classified all KBOs
clr = ['burlywood', 'gray', 'green', 'yellow', 'purple', 'orange', 'cyan', 'magenta', 'black', 'red', 'blue']
for j in range(1, len(clsses)):
    a = kb.a[kb.clss_pred == clsses[j]]
    e = kb.e[kb.clss_pred == clsses[j]]
    Incl = kb.Incl[kb.clss_pred == clsses[j]]
    q = kb.q[kb.clss_pred == clsses[j]]
    #ax1, ax2, ax3 = plot_kbo(kb, a_neptune, a_rng, e_rng, i_rng, q_rng)
    ax1.plot(a, e, marker='o', markersize=4, linestyle='None', markeredgewidth=1.0, color=clr[j], label=clsses[j])
    ax2.plot(a, Incl, marker='o', markersize=4, linestyle='None', markeredgewidth=1.0, color=clr[j])
    ax3.plot(a, q, marker='o', markersize=4, linestyle='None', markeredgewidth=1.0, color=clr[j])
    plt.show(block=False)
    plt.draw()

plt.savefig('kbo_svm.png', dpi=200)
print 'total number of kbos in dataset = ', len(kb)
print 'total number of kbos in training set = ', len(kb[kb.clss != 'None'])
print 'number of misclassifications = ', 35