#select_kbos.py # # by Joe Hahn, jhahn@spacescience.org, 21 January 2014. # # Read the output generated by make_kbo_data_file.py, and then select 10% or so KBOs # in a quasi-random way, and then classify them by hand as members of the Main Belt, # the 3:2 population, 2:1, etc. This will be the 'training' population that will # be used to train the SVM algorithm so that it can automatically classify the remaining # 90% of the KBO population. #to execute in ipython: > ipython --pylab In [1]: %run select_kbos.py #import modules used below import numpy as np import pandas as pd import matplotlib.pyplot as plt import random from plot_kbo import * #set plotting limits and Neptune's orbit a_neptune = 30.07 a_rng = [28, 55] e_rng = [0.0, 0.6] i_rng = [0.0, 50.0] q_rng = [15.0, 50.0] #allowed kbo classes clsses = np.array(['None', 'sd', '5:4', '4:3', '3:2', '5:3', '7:4', '2:1', 'esd', 'mbh', 'mbc']) #read kbo orbit data and randomize their order in the dataframe kb = pd.DataFrame.from_csv('kbos.csv') kb['rn'] = np.random.random_sample(size=len(kb)) kb.sort(['rn'], inplace=True) #delete from kbo list any objects observed less than two oppositions kb['n_opps'] = 0 for j in range(kb.Opps.size): o = kb.Opps[j] if (o.find('d') < 0): kb.n_opps[j] = int(o) kb = kb[kb.n_opps > 1] kbs = kb.sort(['rn']) #also delete any objects outside the above plotting limits kb = kb[(kb.a > a_rng[0]) & (kb.a < a_rng[1]) & (kb.e > e_rng[0]) & (kb.e < e_rng[1]) & (kb.Incl > i_rng[0]) & (kb.Incl < i_rng[1]) & (kb.q > q_rng[0]) & (kb.q < q_rng[1])] kb.index = range(len(kb)) #plot kbo e versus a plt.close() plt.figure(figsize=(6, 5)) plt.rcParams.update({'font.size': 11}) ax = plt.subplot(1, 1, 1) ax.set_xlabel('semimajor axis a (AU)') ax.set_ylabel('eccentricity e') ax.set_xlim(a_rng) ax.set_ylim(e_rng) ax.plot(kb.a.values, kb.e.values, marker='o', markersize=3.0, color='blue', linestyle='None') a_axis = np.arange(a_rng[0], a_rng[1], 0.1) e_crossing = 1 - a_neptune/a_axis ax.plot(a_axis, e_crossing, color='orange') epsilon = -1.0 res = np.array([[2, 1], [3, 2], [4, 3], [7, 4], [5, 3], [5, 4]]) for r in res: m = 1.0*r[0] + epsilon k = 1.0*r[1] - m ar = a_neptune*((1. - epsilon/m)/(1. + k/m))**(2./3.) ax.plot([ar, ar], e_rng, color='black', linestyle=':') if ((ar > a_rng[0]) and (ar < a_rng[1])): plt.text(ar-0.4, e_rng[1]+0.025, str(r[0]) + ':' + str(r[1]), rotation=90) plt.show(block=False) plt.draw() plt.savefig('kbos_ea.png', dpi=200) #number density of observed kbos across a-e orbit element space Nbins = 31 density, a_axis, e_axis = np.histogram2d(kb.a.values, kb.e.values, bins=(Nbins, Nbins), range=[a_rng, e_rng]) density = density/density.sum() plt.close() plt.figure(figsize=(6, 5.0)) ax = plt.subplot(1, 1, 1) ax.imshow(density.T, origin='lower') plt.axis('off') plt.show(block=False) plt.draw() plt.savefig('kbos_density.png', dpi=200) #select quasi-random subset of kbos, selected so that their density is uniform in this plot factor = 14.0 random.seed(8) dsu_min = density[density > 0].min() dsu_max = density.max() prob = np.zeros_like(density) prob[density>0] = factor*dsu_min*dsu_max/density[density>0] ax.imshow(prob.T, origin='lower') plt.draw() kb['s'] = 0 for i in range(len(kb)): j = np.where(a_axis < kb.a.values[i])[0].max() if (j == Nbins): j = Nbins - 1 k = np.where(e_axis < kb.e.values[i])[0].max() if (k == Nbins): k = Nbins - 1 if (random.random() < prob[j, k]): kb['s'].values[i] = 1 #show plot of Kuiper Belt print 'number of selected and total kbos = ', len(kb[kb.s == 1]), len(kb) ax1, ax2, ax3 = plot_kbo(kb, a_neptune, a_rng, e_rng, i_rng, q_rng) plt.show(block=False) plt.draw() plt.savefig('kbos.png', dpi=200) #show Kuiper Belt and selected KBOs kbs = kb[kb.s == 1] ax1.plot(kbs.a, kbs.e, marker='o', markersize=6.0, color='red', linestyle='None', markeredgewidth=1.0) ax2.plot(kbs.a, kbs.Incl, marker='o', markersize=6.0, color='red', linestyle='None', markeredgewidth=1.0) ax3.plot(kbs.a, kbs.q, marker='o', markersize=6.0, color='red', linestyle='None', markeredgewidth=1.0) plt.show(block=False) plt.draw() #Select random KBOs, classify them by hand, and assign a numerical value to each kbo class refresh_kb = False clss_values = np.arange(clsses.size) if (refresh_kb == True): kb['clss'] = 'None' kb['clss_value'] = 0 for j in range(len(kb)): if (kb.s.values[j] == 1): a_rng1 = kb.a.values[j] + [-7.5, 7.5] e_rng1 = kb.e.values[j] + [-0.15, 0.15] i_rng1 = kb.Incl.values[j] + [-7.5, 7.5] q_rng1 = kb.q.values[j] + [-7.5, 7.5] ax1, ax2, ax3 = plot_kbo(kb, a_neptune, a_rng1, e_rng1, i_rng1, q_rng1) ax1.plot(kb.a.values[j], kb.e.values[j], marker='+', markersize=10.0, color='red', linestyle='None', markeredgewidth=1.0) ax2.plot(kb.a.values[j], kb.Incl.values[j], marker='+', markersize=10.0, color='red', linestyle='None', markeredgewidth=1.0) ax3.plot(kb.a.values[j], kb.q.values[j], marker='+', markersize=10.0, color='red', linestyle='None', markeredgewidth=1.0) plt.show(block=False) plt.draw() kb.clss.values[j] = raw_input(str(clsses) + ': ') kb.clss_value.values[j] = clss_values[clsses == kb.clss.values[j]] kb.to_pickle('kb.pkl') #the plot checks the hand-classification of selected KBOS ax1, ax2, ax3 = plot_kbo(kb, a_neptune, a_rng, e_rng, i_rng, q_rng) kb = pd.read_pickle('kb.pkl') kbo_classes = kb.clss.unique() clr = ['blue', 'red', 'green', 'yellow', 'purple', 'orange', 'cyan', 'magenta', 'black', 'burlywood', 'gray'] for j in range(len(kbo_classes)): a = kb.a[kb.clss == kbo_classes[j]] e = kb.e[kb.clss == kbo_classes[j]] Incl = kb.Incl[kb.clss == kbo_classes[j]] q = kb.q[kb.clss == kbo_classes[j]] msize = 6.0 if (kbo_classes[j] == 'None'): msize = 3.0 #ax1, ax2, ax3 = plot_kbo(kb, a_neptune, a_rng, e_rng, i_rng, q_rng) ax1.plot(a, e, marker='o', markersize=msize, linestyle='None', markeredgewidth=1.0, color=clr[j]) ax2.plot(a, Incl, marker='o', markersize=msize, linestyle='None', markeredgewidth=1.0, color=clr[j]) ax3.plot(a, q, marker='o', markersize=msize, linestyle='None', markeredgewidth=1.0, color=clr[j]) plt.show(block=False) plt.draw() #save results np.savez('select_kbos.npz', a_neptune=a_neptune, a_rng=a_rng, e_rng=e_rng, i_rng=i_rng, q_rng=q_rng, clsses=clsses, clss_values=clss_values)