#kbo_tree.py # # by Joe Hahn, jhahn@spacescience.org, 28 January 2014. # # Train the decision tree algorithm to classify KBO orbits, # and show the decision tree's logic via a flowchart. #to execute in ipython: > ipython --pylab In [1]: %run kbo_tree.py #import modules used below import numpy as np import pylab as pl import pandas as pd import matplotlib.pyplot as plt from plot_kbo import * import sklearn as sk #read kbo dataframe plus orbit element ranges and plot kb = pd.read_pickle('kb.pkl') npl = np.load('select_kbos.npz') a_neptune = npl['a_neptune'] a_rng = npl['a_rng'] e_rng = npl['e_rng'] i_rng = npl['i_rng'] q_rng = npl['q_rng'] clsses = npl['clsses'] clss_values = npl['clss_values'] ax1, ax2, ax3 = plot_kbo(kb, a_neptune, a_rng, e_rng, i_rng, q_rng) #extract training data from kb features = ['a', 'e', 'Incl', 'q'] x_train = kb[features].values[kb.clss.values != 'None'] y_train = kb['clss_value'].values[kb.clss.values != 'None'] #train decision tree classifier from sklearn import tree clf = tree.DecisionTreeClassifier(max_depth=7, min_samples_leaf=1) clf.fit(x_train, y_train) print 'accuracy of random forest fit to training data = ', clf.score(x_train, y_train) #apply the fitted SVM classifier to all the kbo data, and insert classifications into new columns in the kb dataframe x = kb[features] y = clf.predict(x) kb['clss_value_pred'] = 0 kb['clss_pred'] = '' for j, yj in enumerate(y): kb.clss_value_pred.values[j] = yj kb.clss_pred.values[j] = clsses[clss_values == yj] #this plot shows how SVM classified all KBOs clr = ['burlywood', 'gray', 'green', 'yellow', 'cyan', 'orange', 'purple', 'magenta', 'black', 'red', 'blue'] for j in range(1, len(clsses)): a = kb.a[kb.clss_pred == clsses[j]] e = kb.e[kb.clss_pred == clsses[j]] Incl = kb.Incl[kb.clss_pred == clsses[j]] q = kb.q[kb.clss_pred == clsses[j]] #ax1, ax2, ax3 = plot_kbo(kb, a_neptune, a_rng, e_rng, i_rng, q_rng) ax1.plot(a, e, marker='o', markersize=4, linestyle='None', markeredgewidth=1.0, color=clr[j], label=clsses[j]) ax2.plot(a, Incl, marker='o', markersize=4, linestyle='None', markeredgewidth=1.0, color=clr[j]) ax3.plot(a, q, marker='o', markersize=4, linestyle='None', markeredgewidth=1.0, color=clr[j]) plt.show(block=False) plt.draw() plt.savefig('kbo_tree.png', dpi=200) print 'total number of kbos in dataset = ', len(kb) print 'total number of kbos in training set = ', len(kb[kb.clss != 'None']) #Make decision tree graphic import pydot, StringIO dot_data = StringIO.StringIO() tree.export_graphviz(clf, out_file=dot_data, feature_names=features) tree_graphic = pydot.graph_from_dot_data(dot_data.getvalue()) tree_graphic.write_png('kbo_dec_tree.png') from IPython.core.display import Image Image(filename='kbo_dec_tree.png')