Skip to content

Commit 2f142fc

Browse files
committed
[hist] Fix computation of number of entries in ResetStats
When computing the number of entries in TH1::ResetStats always include underflow/overflow. Also, in case of weighted histogram, fEntries is set to the effective entries inclusing underflow/overflow. Note that GetEffectiveEntries() will return the underflow or overflow depending on the flag used for computing the statistics. Fix a bug in the computation of GetAllSumOfWeights() and improve the function to return (as an optional parameter) the total sum of weight square (including underflow/overflows). The bug was happening for dimensions < 2. (see ROOT-21253) Remove in UHI indexing test the check that number of effective entries is equal to number of entries. With the current changes the number of entries will contain underflows/overflows while number of effective entries not, because it follows the convention used for the statistics (mean,std, etc..) including what is in the current histogram range In stressHistogram add test to check sumofallweights including underflow/overflow and compare with GetEntries for unweighted histograms (not for TProfile's which are different)
1 parent ef81d2b commit 2f142fc

File tree

5 files changed

+87
-21
lines changed

5 files changed

+87
-21
lines changed

bindings/pyroot/pythonizations/test/uhi_indexing.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,6 @@ def test_statistics_slice(self, hist_setup):
296296
sliced_hist_full = hist_setup[...]
297297

298298
assert hist_setup.GetEffectiveEntries() == pytest.approx(sliced_hist_full.GetEffectiveEntries())
299-
assert sliced_hist_full.GetEntries() == pytest.approx(sliced_hist_full.GetEffectiveEntries())
300299
assert hist_setup.Integral() == pytest.approx(sliced_hist_full.Integral())
301300

302301
# Check if slicing over a range updates the statistics
@@ -307,7 +306,6 @@ def test_statistics_slice(self, hist_setup):
307306

308307
assert hist_setup.Integral() == sliced_hist.Integral()
309308
assert hist_setup.GetEffectiveEntries() == pytest.approx(sliced_hist.GetEffectiveEntries())
310-
assert sliced_hist.GetEntries() == pytest.approx(sliced_hist.GetEffectiveEntries())
311309
assert hist_setup.GetStdDev() == pytest.approx(sliced_hist.GetStdDev(), rel=10e-5)
312310
assert hist_setup.GetMean() == pytest.approx(sliced_hist.GetMean(), rel=10e-5)
313311

hist/hist/inc/TH1.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ class TH1 : public TNamed, public TAttLine, public TAttFill, public TAttMarker {
553553
virtual void GetStats(Double_t *stats) const;
554554
virtual Double_t GetStdDev(Int_t axis=1) const;
555555
virtual Double_t GetStdDevError(Int_t axis=1) const;
556-
Double_t GetSumOfAllWeights(const bool includeOverflow) const;
556+
Double_t GetSumOfAllWeights(const bool includeOverflow, Double_t * sumWeightSquare = nullptr) const;
557557
/// Return the sum of weights across all bins excluding under/overflows.
558558
/// \see TH1::GetSumOfAllWeights()
559559
virtual Double_t GetSumOfWeights() const { return GetSumOfAllWeights(false); }

hist/hist/src/TH1.cxx

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -973,11 +973,13 @@ Bool_t TH1::Add(const TH1 *h1, Double_t c1)
973973
return (iret >= 0);
974974
}
975975

976-
// Create Sumw2 if h1 has Sumw2 set
976+
// Create Sumw2 if h1 has Sumw2 set
977977
if (fSumw2.fN == 0 && h1->GetSumw2N() != 0) Sumw2();
978+
// In addition, create Sumw2 if is not a simple addition, otherwise errors will not be correctly computed
979+
if (fSumw2.fN == 0 && c1 != 1.0) Sumw2();
978980

979-
// - Add statistics
980-
Double_t entries = TMath::Abs( GetEntries() + c1 * h1->GetEntries() );
981+
// - Add statistics (for c1=1)
982+
Double_t entries = GetEntries() + h1->GetEntries();
981983

982984
// statistics can be preserved only in case of positive coefficients
983985
// otherwise with negative c1 (histogram subtraction) one risks to get negative variances
@@ -1054,7 +1056,15 @@ Bool_t TH1::Add(const TH1 *h1, Double_t c1)
10541056
else s1[i] += c1*s2[i];
10551057
}
10561058
PutStats(s1);
1057-
SetEntries(entries);
1059+
if (c1 == 1.0)
1060+
SetEntries(entries);
1061+
else {
1062+
// compute entries as effective entries in case of
1063+
// weights different than 1
1064+
double sumw2 = 0;
1065+
double sumw = GetSumOfAllWeights(true, &sumw2);
1066+
if (sumw2 > 0) SetEntries( sumw*sumw/sumw2);
1067+
}
10581068
}
10591069
return kTRUE;
10601070
}
@@ -1139,7 +1149,8 @@ Bool_t TH1::Add(const TH1 *h1, const TH1 *h2, Double_t c1, Double_t c2)
11391149

11401150
// Create Sumw2 if h1 or h2 have Sumw2 set
11411151
if (fSumw2.fN == 0 && (h1->GetSumw2N() != 0 || h2->GetSumw2N() != 0)) Sumw2();
1142-
1152+
// Create also Sumw2 if not a simple addition (c1 = 1, c2 = 1)
1153+
if (fSumw2.fN == 0 && (c1 != 1.0 || c2 != 1.0)) Sumw2();
11431154
// - Add statistics
11441155
Double_t nEntries = TMath::Abs( c1*h1->GetEntries() + c2*h2->GetEntries() );
11451156

@@ -1246,9 +1257,18 @@ Bool_t TH1::Add(const TH1 *h1, const TH1 *h2, Double_t c1, Double_t c2)
12461257
ResetStats();
12471258
}
12481259
else {
1249-
// update statistics (do here to avoid changes by SetBinContent) FIXME remove???
1260+
// update statistics
12501261
PutStats(s3);
1251-
SetEntries(nEntries);
1262+
// previous entries are correct only if c1=1 and c2=1
1263+
if (c1 == 1.0 && c2 == 1.0)
1264+
SetEntries(nEntries);
1265+
else {
1266+
// compute entries as effective entries in case of
1267+
// weights different than 1
1268+
double sumw2 = 0;
1269+
double sumw = GetSumOfAllWeights(true, &sumw2);
1270+
if (sumw2 > 0) SetEntries( sumw*sumw/sumw2);
1271+
}
12521272
}
12531273

12541274
return kTRUE;
@@ -7981,33 +8001,48 @@ void TH1::ResetStats()
79818001
fEntries = 1; // to force re-calculation of the statistics in TH1::GetStats
79828002
GetStats(stats);
79838003
PutStats(stats);
7984-
fEntries = TMath::Abs(fTsumw);
7985-
// use effective entries for weighted histograms: (sum_w) ^2 / sum_w2
7986-
if (fSumw2.fN > 0 && fTsumw > 0 && stats[1] > 0 ) fEntries = stats[0]*stats[0]/ stats[1];
8004+
// histogram entries should include always underflows and overflows
8005+
if (GetStatOverflowsBehaviour() && !fXaxis.TestBit(TAxis::kAxisRange))
8006+
fEntries = TMath::Abs(fTsumw);
8007+
else {
8008+
Double_t sumw2 = 0;
8009+
Double_t * p_sumw2 = (fSumw2.fN > 0) ? &sumw2 : nullptr;
8010+
fEntries = GetSumOfAllWeights(true, p_sumw2);
8011+
// use effective entries for weighted histograms: (sum_w) ^2 / sum_w2
8012+
if (p_sumw2 && sumw2 > 0) fEntries = fEntries*fEntries/ sumw2;
8013+
}
79878014
}
79888015

79898016
////////////////////////////////////////////////////////////////////////////////
7990-
/// Return the sum of all weights
8017+
/// Return the sum of all weights and optionally also the sum of weight squares
79918018
/// \param includeOverflow true to include under/overflows bins, false to exclude those.
79928019
/// \note Different from TH1::GetSumOfWeights, that always excludes those
79938020

7994-
Double_t TH1::GetSumOfAllWeights(const bool includeOverflow) const
8021+
Double_t TH1::GetSumOfAllWeights(const bool includeOverflow, Double_t * sumWeightSquare) const
79958022
{
79968023
if (fBuffer) const_cast<TH1*>(this)->BufferEmpty();
79978024

79988025
const Int_t start = (includeOverflow ? 0 : 1);
79998026
const Int_t lastX = fXaxis.GetNbins() + (includeOverflow ? 1 : 0);
8000-
const Int_t lastY = fYaxis.GetNbins() + (includeOverflow ? 1 : 0);
8001-
const Int_t lastZ = fZaxis.GetNbins() + (includeOverflow ? 1 : 0);
8027+
const Int_t lastY = (fDimension > 1) ? (fYaxis.GetNbins() + (includeOverflow ? 1 : 0)) : start;
8028+
const Int_t lastZ = (fDimension > 2) ? (fZaxis.GetNbins() + (includeOverflow ? 1 : 0)) : start;
80028029
Double_t sum =0;
8030+
Double_t sum2 = 0;
80038031
for(auto binz = start; binz <= lastZ; binz++) {
80048032
for(auto biny = start; biny <= lastY; biny++) {
80058033
for(auto binx = start; binx <= lastX; binx++) {
80068034
const auto bin = GetBin(binx, biny, binz);
8007-
sum += RetrieveBinContent(bin);
8035+
sum += RetrieveBinContent(bin);
8036+
if (sumWeightSquare && fSumw2.fN > 0) sum2 += GetBinErrorSqUnchecked(bin);
80088037
}
80098038
}
80108039
}
8040+
if (sumWeightSquare) {
8041+
if (fSumw2.fN > 0)
8042+
*sumWeightSquare = sum2;
8043+
else
8044+
*sumWeightSquare = sum;
8045+
}
80118046
return sum;
80128047
}
80138048

hist/hist/src/TProfileHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ Bool_t TProfileHelper::Add(T* p, const TH1 *h1, const TH1 *h2, Double_t c1, Dou
108108

109109
// create sumw2 per bin if not set
110110
if (p->fBinSumw2.fN == 0 && (p1->fBinSumw2.fN != 0 || p2->fBinSumw2.fN != 0)) p->Sumw2();
111+
// create sumw2 also for the case where coefficients are not equal to 1
112+
if (p->fSumw2.fN == 0 && (c1 != 1.0 || c2 != 1.0)) p->Sumw2();
111113

112114
// Make the loop over the bins to calculate the Addition
113115
Double_t *cu1 = p1->GetW(); Double_t *cu2 = p2->GetW();

test/stressHistogram.cxx

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ enum compareOptions {
127127
int defaultEqualOptions = 0; //cmpOptPrint;
128128
//int defaultEqualOptions = cmpOptDebug;
129129

130-
Bool_t cleanHistos = kTRUE; // delete histogram after testing (swicth off in case of debugging)
130+
Bool_t cleanHistos = kTRUE; // delete histogram after testing (switch off in case of debugging)
131131

132132
const double defaultErrorLimit = 1.E-10;
133133

@@ -142,7 +142,7 @@ const char* refFileName = "./stressHistogram.5.18.00.root";
142142

143143
TRandom2 r;
144144
// set to zero if want to run different numbers every time
145-
const int initialSeed = 0;
145+
int initialSeed = 0;
146146

147147

148148

@@ -181,7 +181,6 @@ bool testAdd1()
181181
TH1D* h2 = new TH1D("t1D1_h2", "h2-Title", numberOfBins, minRange, maxRange);
182182
TH1D* h3 = new TH1D("t1D1_h3", "h3=c1*h1+c2*h2", numberOfBins, minRange, maxRange);
183183

184-
h1->Sumw2();h2->Sumw2();h3->Sumw2();
185184

186185
FillHistograms(h1, h3, 1.0, c1);
187186
FillHistograms(h2, h3, 1.0, c2);
@@ -11106,6 +11105,11 @@ int stressHistogram(int testNumber = 0)
1110611105

1110711106
// Test 7
1110811107
// Add Tests
11108+
// for adding we do not need to set by default Sumw2
11109+
// we test that automatically is set
11110+
bool globalSumw2 = TH1::GetDefaultSumw2();
11111+
if (globalSumw2) TH1::SetDefaultSumw2(false);
11112+
1110911113
const unsigned int numberOfAdds = 22;
1111011114
pointer2Test addTestPointer[numberOfAdds] = { testAdd1, testAddProfile1,
1111111115
testAdd2, testAddProfile2,
@@ -11126,6 +11130,8 @@ int stressHistogram(int testNumber = 0)
1112611130
"Add tests for 1D, 2D and 3D Histograms and Profiles..............",
1112711131
addTestPointer };
1112811132

11133+
if (globalSumw2) TH1::SetDefaultSumw2(true);
11134+
1112911135
// Test 8
1113011136
// Multiply Tests
1113111137
const unsigned int numberOfMultiply = 20;
@@ -11911,6 +11917,21 @@ int compareStatistics( TH1* h1, TH1* h2, bool debug, double ERRORLIMIT)
1191111917
<< " | " << fabs( stats1[0] - stats2[0] )
1191211918
<< " " << differents
1191311919
<< std::endl;
11920+
// check the sum of weights (excluding underflows/overflows)
11921+
differents += (bool) equals( h1->GetSumOfWeights(), h2->GetSumOfWeights(), 100*ERRORLIMIT);
11922+
if ( debug )
11923+
std::cout << "Sum Of Weigths(2): " << h1->GetSumOfWeights() << " " << h2->GetSumOfWeights()
11924+
<< " | " << fabs( h2->GetSumOfWeights() - h1->GetSumOfWeights() )
11925+
<< " " << differents
11926+
<< std::endl;
11927+
// check the sum of all weights (including underflows/overflows)
11928+
differents += (bool) equals( h1->GetSumOfAllWeights(true), h2->GetSumOfAllWeights(true), 100*ERRORLIMIT);
11929+
if ( debug )
11930+
std::cout << "Sum Of All Weigths (with u/o): " << h1->GetSumOfAllWeights(true) << " " << h2->GetSumOfAllWeights(true)
11931+
<< " | " << fabs( h2->GetSumOfAllWeights(true) - h1->GetSumOfAllWeights(true) )
11932+
<< " " << differents
11933+
<< std::endl;
11934+
1191411935

1191511936
if (TMath::AreEqualRel(stats1[0], h1->GetEffectiveEntries() , 1.E-12) ) {
1191611937
// unweighted histograms - check also number of entries
@@ -11920,6 +11941,16 @@ int compareStatistics( TH1* h1, TH1* h2, bool debug, double ERRORLIMIT)
1192011941
<< " | " << fabs( h1->GetEntries() - h2->GetEntries() )
1192111942
<< " " << differents
1192211943
<< std::endl;
11944+
11945+
// check that the number of entries is equal to sum of all weights
11946+
// this is not valid for TProfiles
11947+
if (!h1->InheritsFrom(TProfile::Class()) && !h1->InheritsFrom(TProfile2D::Class()) && !h1->InheritsFrom(TProfile3D::Class())) {
11948+
differents += (bool) equals( h1->GetEntries(), h2->GetSumOfAllWeights(true), 100*ERRORLIMIT);
11949+
if (debug)
11950+
std::cout << "Entries vs Sum of All Weights: " << h1->GetEntries() << " " << h2->GetSumOfAllWeights(true)
11951+
<< " | " << fabs( h1->GetEntries() - h2->GetSumOfAllWeights(true) )
11952+
<< " " << differents << std::endl;
11953+
}
1192311954
}
1192411955

1192511956
// Number of Effective Entries

0 commit comments

Comments
 (0)