# -*- coding: utf-8 -*- # 3.0 # # mc_model.ipynb, # # by Joe Hahn, jhahn@spacescience.org, 14 January 2014. # # To execute this ipython notebook, use > ipython notebook --pylab inline and to generate an html version use > ipython nbconvert --to html mc_model.ipynb # # A Monte Carlo model is used to simulate a client's business. Details about that business are of course confidential, so the model has been scrubbed of confidential details and is made generic. # # In this generic model the client produces 'widgets' that are then made available for sale online, which customers then order at later dates. The probability (per time) that a customer orders a widget is assumed to obey to vary as lambda(t-t_widget)=lambda_0*((t/tau)**n)*exp(-t/tau) where lambda_0 is a constant, tau is the timescale over which customer interest in that widget wanes, and n is a power-law exponent. In short, the order probability grows with time and then goes to zero when t>>tau. So if the client makes n_widgets(t') at time t', then orders generated at time t will be generated at the rate dN_orders/dt=integral(lambda(t-t')*n_widgets(t')dt'). In the following, a Monte Carlo method is used to generate the times t_widget when widgets are randomly created, and times t_orders when customers later order those widgets from the client. Those results are later used by fit.ipynb, which will use python's optimization methods on this Monte Carlo data to estimate the model parameters lambda_0, n, and tau that control sale probability. # # Import the modules to be used below. # import numpy as np import pandas as pd import matplotlib.pyplot as plt # # Use Monte Carlo method to generate random times t_widgets when each widget is created. The following assumes time t is in days, but that can be minutes or months or whatever. Nw_total=total number of widgets created over times 0 Nw_total = 100000 t_max = 300.0 fw = 0.4 r = np.random.random(Nw_total) if (fw >0): t_widgets = t_max*( np.sqrt(1.0 + 4.0*fw*r*(1.0+fw) ) - 1.0)/(2.0*fw) else: t_widgets = r*t_max t_widgets.sort() # # Histogram the of number of widgets versus time t. Nt=number of times in array t. Append an extra datapoint to n_widgets_hist so that all arrays have the same length. Note that n_widgets_hist is the rate at which widgets are produced. # Nt = t_max + 1 n_widgets_hist, t = np.histogram(t_widgets, range=(0.0, t_max), bins=Nt-1) n_widgets_hist = np.append(n_widgets_hist, (n_widgets_hist[-1] - n_widgets_hist[-2]) + n_widgets_hist[-1]) # # Plot n_widgets_hist=rate widgets are produced, versus time t. # plt.close() plt.figure(figsize=(7, 5)) plt.rcParams.update({'font.size': 12}) ax = plt.subplot(1, 1, 1) ax.set_xlabel('time t (days)') ax.set_ylabel('widgets produced per day') ax.set_title('widget production rate') ax.set_ylim(0, 1.2*n_widgets_hist.max()) ax.plot(t, n_widgets_hist, label='widgets', drawstyle='steps-mid') plt.savefig('widgets.png', dpi=200) # # Plot delta_prob=probability per time-bin (ie per days) that one widget generates and order t days later after that widget was created; that formula is delta_prob(t)=lambda_0*((t/tau)**n)*exp(-t/tau) where lambda_0, n, and tau are the Monte Carlo model's three parameters. So lambda_0 is roughly the peak sale probability (within a factor of ~2), tau=timescale over which orders later drop to zero when t>>tau, and n is a power-law exponent. # lambda_0 = 0.01 #units are probaility per day tau = 15.0 #units = days n = 2.0 ds = t[1] - t[0] x = t/tau delta_prob = lambda_0*(x**n)*np.exp(-x)*ds plt.close() plt.figure(figsize=(6.5, 4.5)) ax = plt.subplot(1, 1, 1) ax.set_title('order probability') ax.set_xlabel('time t since widget creation (days)') ax.set_ylabel('$\Delta$p (probability/day)') plt.rcParams.update({'font.size': 12}) ax.plot(t, delta_prob) plt.savefig('delta_prob.png', dpi=200) # # Use Monte Carlo method to generate t_orders=times when orders are generated by the widgets, in days since the widget's creation t_widgets. # t_orders = np.array([], dtype=float) for tw in t_widgets: dt = t - tw x = dt/tau delta_prob = lambda_0*(x**n)*np.exp(-x)*ds delta_prob[x<0] = 0.0 r = np.random.random(Nt) t_orders = np.append(t_orders, t[r # Histogram the number of orders versus time t, with additional data point appended to the end. # n_orders_hist, to_axis = np.histogram(t_orders, range=(0.0, t_max), bins=Nt-1) n_orders_hist[-1] = n_orders_hist[-1]/2 n_orders_hist = np.append(n_orders_hist, n_orders_hist[-1]) # # Plot histogram of widgets and orders versus time. # plt.close() plt.figure(figsize=(6.5, 4.5)) ax = plt.subplot(1, 1, 1) ax.set_title('widget & orders production rate') ax.set_xlabel('time t (days)') ax.set_ylabel('number of orders and widgets per day') ax.plot(t, n_widgets_hist, drawstyle='steps-mid', label='widgets') ax.plot(t, n_orders_hist, drawstyle='steps-mid', label='orders') ax.legend(loc='best') plt.savefig('order_histogram.png', dpi=200) # print 'number of orders = ', t_orders.size print 'fraction of widgets that produce orders = ', float(t_orders.size)/t_widgets.size # # Save the results for later use. # np.savez('mc_model.npz', t=t, n_widgets_hist=n_widgets_hist, n_orders_hist=n_orders_hist, t_orders=t_orders, t_widgets=t_widgets, lambda_0=lambda_0, n=n, tau=tau)