////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// #include #include #include #include "TStopwatch.h" #include "TFile.h" #include "TH1D.h" #include "TProfile.h" #include "TCanvas.h" #include "TString.h" #include "TStyle.h" #include "TChain.h" #include "TH2.h" #include "TH1.h" #include "TF1.h" #include "TTree.h" #include "TKey.h" #include "TMath.h" // For the likelihood #include "TMinuit.h" #include "TROOT.h" // Others #include "TRandom.h" #include "TRandom3.h" #include "Riostream.h" #include "TVirtualFitter.h" #include #include #include "Math/GoFTest.h" using namespace std; #define NVAR 6 // ====================== Main routine void kNN (const int NB=50000, const int NS=50000, const int Ntest=10000, double bgrfrac=0.8, int imask=63, double Kernel_size=0.1) { // Pass parameters: // ================ // NB = number of training background // NS = number of training signal // Ntest = number of test data // bgrfrac = fraction of background in the test dataset. // imask = mask bit for variables to use or not // Kernel_size = factor defining how to weight distances in NN calculation // ------------------------------------------------------------------------------------------ const int Nb = NB; const int Ns = NS; const int Nt = Ntest; const int Nbins = 50; // Range of data // ------------- double xmin[NVAR] = {-1.,-1.,-1.,-1.,-1.,-1.}; double xmax[NVAR] = {1.,1.,1.,1.,1.,1.}; // Masking inactive variables // -------------------------- bool * mask = new bool[NVAR]; for (int i=0; i=0; i--) { if (imask>=pow(2,i)) { imask-=pow(2,i); mask[i]=false; Nvar_active++; } } for (int i=0; iSetParameters(0.,0.1); TF1 * G2 = new TF1 ("G2","exp(-0.5*pow((x-[0])/[1],2))", (xmin[1]+xmin[2])/2., (xmax[1]+xmax[2])/2.); G2->SetParameters(0.,0.1); TF1 * G3 = new TF1 ("G3","exp(-0.5*pow((x-[0])/[1],2))", (xmin[2]+xmin[0])/2., (xmax[2]+xmax[0])/2.); G3->SetParameters(0.,0.3); // Generate background training sample // ----------------------------------- for (int i=0; iUniform(xmin[0],xmax[0]); double x2 = gRandom->Uniform(xmin[1],xmax[1]); double x3 = gRandom->Uniform(xmin[2],xmax[2]); double x4 = gRandom->Uniform(xmin[3],xmax[3]); double x5 = gRandom->Uniform(xmin[4],xmax[4]); double x6 = gRandom->Uniform(xmin[5],xmax[5]); b[NVAR*i] =x1; b[NVAR*i+1]=x2; b[NVAR*i+2]=x3; b[NVAR*i+3]=x4; b[NVAR*i+4]=x5; b[NVAR*i+5]=x6; for (int j=0; jFill(b[NVAR*i+j]); sumx[j]+=b[NVAR*i+j]; // Also get stuff to compute variance of each variable sumx2[j]+=pow(b[NVAR*i+j],2); } } // Calculate variances (only do this for train bgr sample, // it does not matter which we use for this since it is just a normalization factor) // --------------------------------------------------------------------------------- double var[NVAR]; for (int j=0; jUniform(xmin[0],xmax[0]); double x2 = gRandom->Uniform(xmin[1],xmax[1]); double x3 = gRandom->Uniform(xmin[2],xmax[2]); double x4 = gRandom->Uniform(xmin[3],xmax[3]); double x5 = gRandom->Uniform(xmin[4],xmax[4]); double x6 = gRandom->Uniform(xmin[5],xmax[5]); x[NVAR*i] =x1; x[NVAR*i+1]=x2; x[NVAR*i+2]=x3; x[NVAR*i+3]=x4; x[NVAR*i+4]=x5; x[NVAR*i+5]=x6; for (int j=0; jFill(x[NVAR*i+j]); } } // Generate signal training sample // ------------------------------- double flatfrac0 = 0.2; double flatfrac1 = 0.4; double flatfrac2 = 0.6; double corr_coeff_1=0.3; double corr_coeff_2=0.4; for (int i=0; iUniform(xmin[0],xmax[0]); double x2 = gRandom->Uniform(xmin[1],xmax[1]); double x3 = gRandom->Uniform(xmin[2],xmax[2]); double x4 = gRandom->Uniform(xmin[3],xmax[3]); double x5 = gRandom->Uniform(xmin[4],xmax[4]); double x6 = gRandom->Uniform(xmin[5],xmax[5]); // Force the density to be Gaussian in three of the variables // ---------------------------------------------------------- double pdf= (flatfrac0+(1-flatfrac0)*G1->Eval(corr_coeff_1*x1+(1-corr_coeff_1)*x2))* (flatfrac1+(1-flatfrac1)*G2->Eval(corr_coeff_2*x3+(1-corr_coeff_2)*x4))* (flatfrac2+(1-flatfrac2)*G3->Eval(x5)); double r = gRandom->Uniform(); if (rFill(s[NVAR*i+j]); } s12->Fill(x1,x2); s13->Fill(x1,x3); s14->Fill(x1,x4); s15->Fill(x1,x5); s16->Fill(x1,x6); s23->Fill(x2,x3); s24->Fill(x2,x4); s25->Fill(x2,x5); s26->Fill(x2,x6); s34->Fill(x3,x4); s35->Fill(x3,x5); s36->Fill(x3,x6); s45->Fill(x4,x5); s46->Fill(x4,x6); s56->Fill(x5,x6); i++; } } // Generate signal in test sample // ------------------------------ for (int i=Nt*bgrfrac; iUniform(xmin[0],xmax[0]); double x2 = gRandom->Uniform(xmin[1],xmax[1]); double x3 = gRandom->Uniform(xmin[2],xmax[2]); double x4 = gRandom->Uniform(xmin[3],xmax[3]); double x5 = gRandom->Uniform(xmin[4],xmax[4]); double x6 = gRandom->Uniform(xmin[5],xmax[5]); // Force the density to be Gaussian in three of the variables // ---------------------------------------------------------- double pdf= (flatfrac0+(1-flatfrac0)*G1->Eval(corr_coeff_1*x1+(1-corr_coeff_1)*x2))* (flatfrac1+(1-flatfrac1)*G2->Eval(corr_coeff_2*x3+(1-corr_coeff_2)*x4))* (flatfrac2+(1-flatfrac2)*G3->Eval(x5)); double r = gRandom->Uniform(); if (rFill(x[NVAR*i+j]); } i++; } } cout << endl; cout << "Now computing LR discriminant... " << endl; // Compute likelihood ratio discriminant // ------------------------------------- double * DiscrLR = new double[Nt]; for (int i=0; iEval(corr_coeff_1*x[NVAR*i]+(1-corr_coeff_1)*x[NVAR*i+1]))); DiscrLR[i]+=log(flatfrac1+(1-flatfrac1)*(G2->Eval(corr_coeff_2*x[NVAR*i+2]+(1-corr_coeff_2)*x[NVAR*i+3]))); DiscrLR[i]+=log(flatfrac2+(1-flatfrac2)*(G3->Eval(x[NVAR*i+4]))); // the sixth variable is Uniform distributed as in bgr events, so it does not affect the LR } cout << "Now computing RL discriminant... " << endl; // Compute relative likelihood discriminant // ---------------------------------------- double * DiscrRL = new double[Nt]; for (int i=0; iGetBinContent(ibin); double conts = s_[j]->GetBinContent(ibin); if (contb==0) contb=1; // avoid zero-entry bins by artificially setting them to 1 if (conts==0) conts=1; // same for signal distributions DiscrRL[i] += log(conts) - log(contb); } } } cout << "Now computing NN discriminant..." << endl; // Compute kNN estimator for test events // ------------------------------------- double * DiscrNN = new double[Nt]; double suminvd_b; double suminvd_s; double d2; double factor_exp = -0.5/(Kernel_size*Nvar_active); for (int i=0; iDiscrLR[i]) minDiscrLR=DiscrLR[i]; if (maxDiscrLRDiscrRL[i]) minDiscrRL=DiscrRL[i]; if (maxDiscrRLDiscrNN[i]) minDiscrNN=DiscrNN[i]; if (maxDiscrNNFill(DiscrLR[i]); DiscrRLB->Fill(DiscrRL[i]); DiscrNNB->Fill(DiscrNN[i]); } else { DiscrLRS->Fill(DiscrLR[i]); DiscrRLS->Fill(DiscrRL[i]); DiscrNNS->Fill(DiscrNN[i]); } } // Order events by LR, or by RL, // or by distance in multi-D space (ordL[]) // ---------------------------------------- cout << endl; cout << "Ordering events... " << endl; int * ordLR = new int[Nt]; int * ordRL = new int[Nt]; int * ordNN = new int[Nt]; for (int i=0; i0; j--) { if (DiscrLR[ordLR[j]]=Nt*bgrfrac) NsigLR--; if (ordLR[i]0) purLR = NsigLR/(NsigLR+NbgrLR); EvsPLR->Fill(effLR,purLR); EvsELR->Fill(effLR,effbLR); if (NsigLRtmp>NsigLR && NsigLRtmp>=10) { // avoid considering the zero-eff tail of the curve NsigLRtmp=NsigLR; IntEPLR+=purLR; NintLR++; } // compute integral of purity curve for rel lik now if (ordRL[i]>=Nt*bgrfrac) NsigRL--; if (ordRL[i]0) purRL = NsigRL/(NsigRL+NbgrRL); EvsPRL->Fill(effRL,purRL); EvsERL->Fill(effRL,effbRL); if (NsigRLtmp>NsigRL && NsigRLtmp>=10) { // avoid considering flukes in the zero-eff tail NsigRLtmp=NsigRL; IntEPRL+=purRL; NintRL++; } // compute integral of purity curve for Nearest-neighbor if (ordNN[i]>=Nt*bgrfrac) NsigNN--; if (ordNN[i]0) purNN = NsigNN/(NsigNN+NbgrNN); EvsPNN->Fill(effNN,purNN); EvsENN->Fill(effNN,effbNN); if (NsigNNtmp>NsigNN && NsigNNtmp>=10) { // avoid considering flukes in the zero-eff tail NsigNNtmp=NsigNN; IntEPNN+=purNN; NintNN++; } } // Compute integrated purity // (global estimator of separation) // -------------------------------- IntEPLR=IntEPLR/NintLR; IntEPRL=IntEPRL/NintRL; IntEPNN=IntEPNN/NintNN; } // Fill 2d plot of lnLev vs ord // ---------------------------- for (int i=0; i=0 && jindRL>=0 && jindNN>=0) break; } if (iFill((double)jindLR,(double)jindRL); LvsObgrBN->Fill((double)jindLR,(double)jindNN); LvsObgrNL->Fill((double)jindNN,(double)jindRL); } else { LvsOsigBL->Fill((double)jindLR,(double)jindRL); LvsOsigBN->Fill((double)jindLR,(double)jindNN); LvsOsigNL->Fill((double)jindNN,(double)jindRL); } } // Dump results on video // --------------------- cout << endl; cout << "============================================================================= " << endl; cout << endl; cout << " Results: " << endl; cout << " -------- " << endl; cout << endl; cout << "Parametric multi-D likelihood ratio: " << endl; cout << "Integrated purity = " << IntEPLR << endl; cout << endl; cout << "Histogram-based single-D relative likelihood: " << endl; cout << "Integrated purity = " << IntEPRL << endl; cout << endl; cout << "Integrated nearest-neighbor multi-D distance: " << endl; cout << "Integrated purity = " << IntEPNN << endl; cout << endl; cout << endl; // Plot results // ------------ TCanvas * SB = new TCanvas ("SB","Distributions", 500, 500); SB->Divide(2,3); for (int i=0; icd(i+1); s_[i]->Scale((double)Nb/(double)Ns); s_[i]->SetLineColor(kRed); s_[i]->SetMinimum(0); s_[i]->Draw(); b_[i]->Draw("SAME"); } TCanvas * Data = new TCanvas ("Data","Distributions", 500, 500); Data->Divide(2,3); for (int i=0; icd(i+1); x_[i]->SetMinimum(0); x_[i]->Draw(); } /* TCanvas * TMP2 = new TCanvas ("TMP2","Correlations", 500, 500); */ /* TMP2->Divide(2,3); */ /* TMP2->cd(1); */ /* LvsObgrBL->Draw(); */ /* TMP2->cd(2); */ /* LvsObgrBN->Draw(); */ /* TMP2->cd(3); */ /* LvsObgrNL->Draw(); */ /* TMP2->cd(4); */ /* LvsOsigBL->Draw(); */ /* TMP2->cd(5); */ /* LvsOsigBN->Draw(); */ /* TMP2->cd(6); */ /* LvsOsigNL->Draw(); */ TCanvas * C = new TCanvas ("C", "C", 600, 600); C->Divide(1,3); C->cd(1); C->GetPad(1)->SetLogx(); EvsPLR->SetMarkerStyle(20); EvsPLR->SetMarkerSize(0.4); EvsPLR->Draw(); C->cd(2); C->GetPad(2)->SetLogx(); EvsPRL->SetMarkerStyle(20); EvsPRL->SetMarkerSize(0.4); EvsPRL->Draw(); C->cd(3); C->GetPad(3)->SetLogx(); EvsPNN->SetMarkerStyle(20); EvsPNN->SetMarkerSize(0.4); EvsPNN->Draw(); TCanvas * CF = new TCanvas ("CF", "CF", 500, 500); CF->Divide(2,2); CF->cd(1); EvsPLR->SetMarkerColor(kRed); EvsPLR->Draw(); EvsPRL->Draw("SAME"); EvsPNN->SetMarkerColor(kBlue); EvsPNN->Draw("SAME"); CF->cd(2); DiscrLRB->Draw(); DiscrLRS->SetLineColor(kRed); if (bgrfrac<1 && bgrfrac>0) DiscrLRS->Scale(bgrfrac/(1.-bgrfrac)); DiscrLRS->Draw("SAME"); CF->cd(3); DiscrRLB->Draw(); DiscrRLS->SetLineColor(kRed); if (bgrfrac<1 && bgrfrac>0) DiscrRLS->Scale(bgrfrac/(1.-bgrfrac)); DiscrRLS->Draw("SAME"); CF->cd(4); DiscrNNB->Draw(); DiscrNNS->SetLineColor(kRed); if (bgrfrac<1 && bgrfrac>0) DiscrNNS->Scale(bgrfrac/(1.-bgrfrac)); DiscrNNS->Draw("SAME"); TCanvas * C2b = new TCanvas ("C2b", "C2b", 600,600); C2b->Divide(1,2); C2b->cd(1); C2b->GetPad(1)->SetLogx(); C2b->GetPad(1)->SetLogy(); EvsELR->SetMarkerStyle(20); EvsELR->SetMarkerSize(0.4); EvsERL->SetMarkerStyle(20); EvsERL->SetMarkerSize(0.4); EvsENN->SetMarkerStyle(20); EvsENN->SetMarkerSize(0.4); EvsELR->SetMarkerColor(kRed); EvsELR->Draw(); EvsERL->Draw("SAME"); EvsENN->SetMarkerColor(kBlue); EvsENN->Draw("SAME"); C2b->cd(2); EvsPLR->Draw(); EvsPRL->Draw("SAME"); EvsPNN->Draw("SAME"); TCanvas * SC = new TCanvas ("SC", "Correlation scatterplots for signal", 500, 500); SC->Divide(3,5); SC->cd(1); s12->Draw(); SC->cd(2); s13->Draw(); SC->cd(3); s14->Draw(); SC->cd(4); s15->Draw(); SC->cd(5); s16->Draw(); SC->cd(6); s23->Draw(); SC->cd(7); s24->Draw(); SC->cd(8); s25->Draw(); SC->cd(9); s26->Draw(); SC->cd(10); s34->Draw(); SC->cd(11); s35->Draw(); SC->cd(12); s36->Draw(); SC->cd(13); s45->Draw(); SC->cd(14); s46->Draw(); SC->cd(15); s56->Draw(); // Save histos to root file // ------------------------ char name2[100]; int kNb=Nb/1000; int kNs=Ns/1000; int kNt=Nt/1000; sprintf (name2, "C:/root/macros/Bootstrap/Compare_LR_RL_NN_parametric_Nb%dk_Ns%dk_Nt%dk_F%f_%d_kernel%f.root", kNb, kNs, kNt, bgrfrac, imask_copy, Kernel_size); TFile * out = new TFile (name2,"RECREATE"); out->cd(); LvsOsigBL->Write(); LvsObgrBL->Write(); LvsOsigBN->Write(); LvsObgrBN->Write(); LvsOsigNL->Write(); LvsObgrNL->Write(); EvsPLR->Write(); EvsPRL->Write(); EvsPNN->Write(); EvsELR->Write(); EvsERL->Write(); EvsENN->Write(); for (int j=0; jWrite(); s_[j]->Write(); x_[j]->Write(); } out->Close(); // Printout of results on ascii file // --------------------------------- char outfilename[100]; sprintf (outfilename,"C:/root/macros/Bootstrap/Compare_LR_RL_NN_parametric.asc"); ofstream outfile (outfilename,ios::app); outfile << endl; outfile << "Parametric test 3 gaussians 3 flat" << endl; outfile << "Nb = " << Nb << "; Ns = " << Ns << "Ntest = " << Nt << endl; outfile << "Fs = " << bgrfrac << "; Mask = " << imask_copy << "; kernel size = " << Kernel_size << endl; outfile << "Lik. Ratio: Avg purity = " << IntEPLR << endl; outfile << "Relat. Lik.: Avg purity = " << IntEPRL << endl; outfile << "N. Neigh.: Avg purity = " << IntEPNN << endl; outfile.close(); // End of program // -------------- cout << endl; cout << "Goodbye." << endl; }