Overview

This notebook serves two purposes: testing out your installation and giving you a rough idea of what will be covered in this tutorial.

Basic python

hello world

In [1]:
print 'hello world'
hello world

And others

In [2]:
1+1
Out[2]:
2
In [3]:
for i in range(3):
    print 'loop', i
loop 0
loop 1
loop 2

Reading ROOT FILE

In [4]:
from root_numpy import root2rec
In [5]:
bb = root2rec('data/B*.root') #yep that simple
cc = root2rec('data/cc-BtoDpi-all.root')

And plotting

In [6]:
hist([bb.R2All, cc.R2All], bins=100, histtype='stepfilled', 
     color=['red','green'], alpha=0.5, label=[r'$B\bar{B}$',r'$c\bar{c}$']);
legend().get_frame().set_alpha(0.5)
title(r'My Amazing plot of R2All $\alpha$ $\beta$', fontsize='xx-large' )
xlabel('R2All')
Out[6]:
<matplotlib.text.Text at 0x103f3c710>

Multivariate analysis

In [7]:
from sklearn import tree
In [8]:
feature_sig= np.random.multivariate_normal(mean=[0.0, 0.0], cov=[ [1,0.5], [0.5,1] ], size= 1000)
feature_bkg= (np.random.rand(1000,2)-0.5)*10
scatter(feature_bkg[:,0], feature_bkg[:,1], color='b', alpha=0.4, label='bkg')
scatter(feature_sig[:,0], feature_sig[:,1], color='g', alpha=0.4, label='sig')
legend().get_frame().set_alpha(0.8) # I submitted a patch to do legend(framealpha=0.5)
In [9]:
#prepare data for training
features = np.concatenate([feature_sig, feature_bkg])
classes = np.array( [0]*len(feature_bkg) + [1]*len(feature_sig) )
In [10]:
clf = tree.DecisionTreeClassifier(min_samples_leaf=10)
In [11]:
clf.fit(features, classes)
Out[11]:
DecisionTreeClassifier(compute_importances=False, criterion='gini',
            max_depth=None, max_features=None, min_density=0.1,
            min_samples_leaf=10, min_samples_split=20,
            random_state=<mtrand.RandomState object at 0x1002ab4e0>)
In [12]:
prediction = clf.predict(features)
print prediction.shape
print features[:,0].shape
(2000,)
(2000,)
In [13]:
sig = features[prediction==1]
bg = features[prediction==0]
figure(figsize=(12,6))
subplot(121)
x = linspace(-5,5,100)
y = linspace(-5,5,100)
xx, yy = np.meshgrid((x[1:]+x[:-1])/2.0, (y[1:]+y[:-1])/2.0)
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
pcolormesh(xx,yy,Z,cmap='Pastel2')
scatter(sig[:,0],sig[:,1],color='b', alpha=0.4)
scatter(bg[:,0],bg[:,1],color='g', alpha=0.4)
title('Prediction');
subplot(122)
scatter(feature_bkg[:,0], feature_bkg[:,1], color='b', alpha=0.4, label='bkg')
scatter(feature_sig[:,0], feature_sig[:,1], color='g', alpha=0.4, label='sig')
legend().get_frame().set_alpha(0.8)
title('Original')
Out[13]:
<matplotlib.text.Text at 0x109d388d0>

Fitting

In [14]:
from iminuit import Minuit
from probfit import UnbinnedLH, gaussian
In [15]:
data = randn(10000)
hist(data, bins=100, histtype='step');
In [16]:
ulh = UnbinnedLH(gaussian, data)
ulh.draw(args=dict(mean=1.2, sigma=0.7))
In [17]:
m = Minuit(ulh, mean=1.2, sigma=0.7)
-c:1: InitialParamWarning: Parameter mean is floating but does not have initial step size. Assume 1.
-c:1: InitialParamWarning: Parameter sigma is floating but does not have initial step size. Assume 1.
In [18]:
m.migrad()

FCN = 14253.4400285 NFCN = 58 NCALLS = 58
EDM = 2.25861962361e-08 GOAL EDM = 5e-06 UP = 0.5
Valid Valid Param Accurate Covar PosDef Made PosDef
True True True True False
Hesse Fail HasCov Above EDM Reach calllim
False True False False
Name Value Parab Error Minos Error- Minos Error+ Limit- Limit+ FIXED
1 mean 1.468257e-02 1.006428e-02 0.000000e+00 0.000000e+00
2 sigma 1.006427e+00 7.116531e-03 0.000000e+00 0.000000e+00

Out[18]:
({'hesse_failed': False, 'has_reached_call_limit': False, 'has_accurate_covar': True, 'has_posdef_covar': True, 'up': 0.5, 'edm': 2.258619623612016e-08, 'is_valid': True, 'is_above_max_edm': False, 'has_covariance': True, 'has_made_posdef_covar': False, 'has_valid_parameters': True, 'fval': 14253.440028460127, 'nfcn': 58},
 [{'is_const': False, 'name': 'mean', 'has_limits': False, 'value': 0.014682570532315777, 'number': 0L, 'has_lower_limit': False, 'upper_limit': 0.0, 'lower_limit': 0.0, 'has_upper_limit': False, 'error': 0.010064276469830966, 'is_fixed': False},
  {'is_const': False, 'name': 'sigma', 'has_limits': False, 'value': 1.0064274701983684, 'number': 1L, 'has_lower_limit': False, 'upper_limit': 0.0, 'lower_limit': 0.0, 'has_upper_limit': False, 'error': 0.007116530650577966, 'is_fixed': False}])
In [19]:
print m.values
print m.errors
ulh.draw(m)
{'sigma': 1.0064274701983684, 'mean': 0.014682570532315777}
{'sigma': 0.007116530650577966, 'mean': 0.010064276469830966}