Source code for rootpy.math.stats.qqplot

"""
Taken from exmaple by Zhiyi Liu, zhiyil@fnal.gov
here: http://root.cern.ch/phpBB3/viewtopic.php?f=3&t=6865
and converted into Python
"""

import ROOT
from math import sqrt
from array import array
from rootpy.plotting import Graph, Hist, Canvas


[docs]def qqplot(h1, h2, quantiles=None): """ Return a Graph of a QQ plot and confidence band """ if quantiles is None: quantiles = max(min(len(h1), len(h2)) / 2, 1) nq = quantiles xq = array('d', [0.] * nq) # position where to compute the quantiles in [0,1] yq1 = array('d', [0.] * nq) # array to contain the quantiles yq2 = array('d', [0.] * nq) # array to contain the quantiles for i in xrange(nq): xq[i] = float(i + 1) / nq h1.GetQuantiles(nq, yq1, xq) h2.GetQuantiles(nq, yq2, xq) xq_plus = array('d', [0.] * nq) xq_minus = array('d', [0.] * nq) yq2_plus = array('d', [0.] * nq) yq2_minus = array('d', [0.] * nq) """ KS_cv: KS critical value 1.36 KS_cv = ----------- sqrt( N ) Where 1.36 is for alpha = 0.05 (confidence level 1-5%=95%, about 2 sigma) For 1 sigma (alpha=0.32, CL=68%), the value in the nominator is 0.9561, it is gotten by GetCriticalValue(1, 1 - 0.68). NOTE: * For 1-sample KS test (data and theoretic), N should be n * For 2-sample KS test (2 data set), N should be sqrt(m*n/(m+n))! Here is the case m or n (size of samples) should be effective size for a histogram * Critical value here is valid for only for sample size >= 80 (some references say 35) which means, for example, for a unweighted histogram, it must have more than 80 (or 35) entries filled and then confidence band is reliable. """ esum1 = effective_sample_size(h1) esum2 = effective_sample_size(h2) # one sigma band KS_cv = (critical_value(1, 1 - 0.68) / sqrt((esum1 * esum2) / (esum1 + esum2))) for i in xrange(nq): xq_plus[i] = float(xq[i] + KS_cv) # upper limit xq_minus[i] = float(xq[i] - KS_cv) # lower limit h2.GetQuantiles(nq, yq2_plus, xq_plus) h2.GetQuantiles(nq, yq2_minus, xq_minus) yq2_err_plus = array('d', [0.] * nq) yq2_err_minus = array('d', [0.] * nq) for i in xrange(nq): yq2_err_plus[i] = yq2_plus[i] - yq2[i] yq2_err_minus[i] = yq2[i] - yq2_minus[i] #forget the last point, so number of points: (nq-1) gr = Graph(nq - 1) for i in xrange(nq - 1): gr[i] = (yq1[i], yq2[i]) # confidence level band gr.SetPointEYlow(i, yq2_err_minus[i]) gr.SetPointEYhigh(i, yq2_err_plus[i]) return gr
[docs]def effective_sample_size(h): """ calculate effective sample size for a histogram same way as ROOT does. """ axis = h.GetXaxis() last = axis.GetNbins() sum = 0 ew = 0 w = 0 for bin in xrange(1, last + 1): sum += h.GetBinContent(bin) ew = h.GetBinError(bin) w += ew * ew esum = sum * sum / w return esum
[docs]def critical_value(n, p): """ the routine is used to calculate critical value given n and p, confidential level = 1 - p. Original Reference: http://velveeta.che.wisc.edu/octave/lists/archive//octave-sources.2003/msg00031.html I just checked it, but it is not available now... """ dn = 1 delta = 0.5 res = ROOT.TMath.KolmogorovProb(dn * sqrt(n)) while res > 1.0001 * p or res < 0.9999 * p: if (res > 1.0001 * p): dn = dn + delta if (res < 0.9999 * p): dn = dn - delta delta = delta / 2. res = ROOT.TMath.KolmogorovProb(dn * sqrt(n)) return dn
if __name__ == '__main__': """ this is an example of drawing a quantile-quantile plot with confidential level (CL) band by Zhiyi Liu, zhiyil@fnal.gov """ ROOT.gROOT.SetStyle("Plain") ROOT.gStyle.SetOptStat(0) can = Canvas(name="can", title="can", width=600, height=450) rand = ROOT.TRandom3() h1 = Hist(100, -5, 5, name="h1", title="Histogram 1") h1.Sumw2() h1.SetLineColor(ROOT.kRed) h2 = Hist(100, -5, 5, name="h2", title="Histogram 2") h2.SetLineColor(ROOT.kBlue) for ievt in xrange(10000): #some test histograms: #1. let 2 histograms screwed #h1.Fill(rand.Gaus(0.5, 0.8)) #h2.Fill(rand.Gaus(0, 1)) #2. long tail and short tail h1.Fill(rand.Gaus(0, 0.8)) h2.Fill(rand.Gaus(0, 1)) #hs = ROOT.THStack("hs", "2 example distributions") #hs.Add(h1) #hs.Add(h2) #hs.Draw("nostack") h1.Draw() h2.Draw('same') #draw legend leg = ROOT.TLegend(0.7, 0.7, 0.89, 0.89) leg.SetFillColor(0) leg.AddEntry(h1, h1.GetTitle(), "pl") leg.AddEntry(h2, h2.GetTitle(), "pl") leg.Draw() can.Modified() can.Update() gr = qqplot(h1, h2) gr.SetLineColor(ROOT.kRed + 2) gr.SetMarkerColor(ROOT.kRed + 2) gr.SetMarkerStyle(20) gr.SetTitle("QQ with CL") gr.GetXaxis().SetTitle(h1.GetTitle()) gr.GetYaxis().SetTitle(h2.GetTitle()) gr.SetFillColor(17) gr.SetFillStyle(1001) c = Canvas(name="c", title="QQ with CL", width=600, height=450) gr.Draw("ap") x_min = gr.GetXaxis().GetXmin() x_max = gr.GetXaxis().GetXmax() y_min = gr.GetXaxis().GetXmin() y_max = gr.GetXaxis().GetXmax() c.Clear() gr.Draw('a3') gr.Draw('Xp same') # a straight line y=x to be a reference f_dia = ROOT.TF1("f_dia", "x", h1.GetXaxis().GetXmin(), h1.GetXaxis().GetXmax()) f_dia.SetLineColor(9) f_dia.SetLineWidth(2) f_dia.SetLineStyle(2) f_dia.Draw("same") leg = ROOT.TLegend(0.52, 0.15, 0.87, 0.35) leg.SetFillColor(0) leg.SetShadowColor(17) leg.SetBorderSize(3) leg.AddEntry(gr, "QQ points", "p") leg.AddEntry(gr, "68% CL band", "f") leg.AddEntry(f_dia, "Diagonal line", "l") leg.Draw() c.Modified() c.Update() c.OwnMembers() c.Draw() raw_input()