Skip to content

Commit 5e283da

Browse files
committed
Aggregations: Return the sum of the doc counts of other buckets.
This commit adds a new field to the response of the terms aggregation called `sum_other_doc_count` which is equal to the sum of the doc counts of the buckets that did not make it to the list of top buckets. It is typically useful to have a sector called eg. `other` when using terms aggregations to build pie charts. Example query and response: ```json GET test/_search?search_type=count { "aggs": { "colors": { "terms": { "field": "color", "size": 3 } } } } ``` ```json { [...], "aggregations": { "colors": { "doc_count_error_upper_bound": 0, "sum_other_doc_count": 4, "buckets": [ { "key": "blue", "doc_count": 65 }, { "key": "red", "doc_count": 14 }, { "key": "brown", "doc_count": 3 } ] } } } ``` Close elastic#8213
1 parent 2319e73 commit 5e283da

18 files changed

+187
-43
lines changed

docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ Response:
2525
2626
"aggregations" : {
2727
"genders" : {
28-
"buckets" : [
28+
"doc_count_error_upper_bound": 0, <1>
29+
"sum_other_doc_count": 0, <2>
30+
"buckets" : [ <3>
2931
{
3032
"key" : "male",
3133
"doc_count" : 10
@@ -40,6 +42,10 @@ Response:
4042
}
4143
--------------------------------------------------
4244

45+
<1> an upper bound of the error on the document counts for each term, see <<search-aggregations-bucket-terms-aggregation-approximate-counts,below>>
46+
<2> when there are lots of unique terms, elasticsearch only returns the top terms; this number is the sum of the document counts for all buckets that are not part of the response
47+
<3> the list of the top buckets, the meaning of `top` being defined by the <<search-aggregations-bucket-terms-aggregation-order,order>>
48+
4349
By default, the `terms` aggregation will return the buckets for the top ten terms ordered by the `doc_count`. One can
4450
change this default behaviour by setting the `size` parameter.
4551

@@ -52,6 +58,7 @@ This means that if the number of unique terms is greater than `size`, the return
5258
(it could be that the term counts are slightly off and it could even be that a term that should have been in the top
5359
size buckets was not returned). If set to `0`, the `size` will be set to `Integer.MAX_VALUE`.
5460

61+
[[search-aggregations-bucket-terms-aggregation-approximate-counts]]
5562
==== Document counts are approximate
5663

5764
As described above, the document counts (and the results of any sub aggregations) in the terms aggregation are not always
@@ -226,6 +233,7 @@ does not return a particular term which appears in the results from another shar
226233
aggregation is either sorted by a sub aggregation or in order of ascending document count, the error in the document counts cannot be
227234
determined and is given a value of -1 to indicate this.
228235

236+
[[search-aggregations-bucket-terms-aggregation-order]]
229237
==== Order
230238

231239
The order of the buckets can be customized by setting the `order` parameter. By default, the buckets are ordered by

src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public boolean shouldCollect() {
4545

4646
@Override
4747
public InternalAggregation buildEmptyAggregation() {
48-
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList(), showTermDocCountError, 0);
48+
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList(), showTermDocCountError, 0, 0);
4949
}
5050

5151
}

src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ Bucket newBucket(long docCount, InternalAggregations aggs, long docCountError) {
9898

9999
DoubleTerms() {} // for serialization
100100

101-
public DoubleTerms(String name, InternalOrder order, @Nullable ValueFormatter formatter, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
102-
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
101+
public DoubleTerms(String name, InternalOrder order, @Nullable ValueFormatter formatter, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
102+
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
103103
this.formatter = formatter;
104104
}
105105

@@ -109,8 +109,8 @@ public Type type() {
109109
}
110110

111111
@Override
112-
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
113-
return new DoubleTerms(name, order, formatter, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
112+
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
113+
return new DoubleTerms(name, order, formatter, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
114114
}
115115

116116
@Override
@@ -132,6 +132,9 @@ public void readFrom(StreamInput in) throws IOException {
132132
this.showTermDocCountError = false;
133133
}
134134
this.minDocCount = in.readVLong();
135+
if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
136+
this.otherDocCount = in.readVLong();
137+
}
135138
int size = in.readVInt();
136139
List<InternalTerms.Bucket> buckets = new ArrayList<>(size);
137140
for (int i = 0; i < size; i++) {
@@ -162,6 +165,9 @@ public void writeTo(StreamOutput out) throws IOException {
162165
out.writeBoolean(showTermDocCountError);
163166
}
164167
out.writeVLong(minDocCount);
168+
if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
169+
out.writeVLong(otherDocCount);
170+
}
165171
out.writeVInt(buckets.size());
166172
for (InternalTerms.Bucket bucket : buckets) {
167173
out.writeDouble(((Bucket) bucket).term);
@@ -176,6 +182,7 @@ public void writeTo(StreamOutput out) throws IOException {
176182
@Override
177183
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
178184
builder.field(InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME, docCountError);
185+
builder.field(SUM_OF_OTHER_DOC_COUNTS, otherDocCount);
179186
builder.startArray(CommonFields.BUCKETS);
180187
for (InternalTerms.Bucket bucket : buckets) {
181188
builder.startObject();

src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ private static DoubleTerms convertToDouble(LongTerms terms) {
6969
for (int i = 0; i < buckets.length; ++i) {
7070
buckets[i] = convertToDouble(buckets[i]);
7171
}
72-
return new DoubleTerms(terms.getName(), terms.order, terms.formatter, terms.requiredSize, terms.shardSize, terms.minDocCount, Arrays.asList(buckets), terms.showTermDocCountError, terms.docCountError);
72+
return new DoubleTerms(terms.getName(), terms.order, terms.formatter, terms.requiredSize, terms.shardSize, terms.minDocCount, Arrays.asList(buckets), terms.showTermDocCountError, terms.docCountError, terms.otherDocCount);
7373
}
7474

7575
}

src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {
150150
} else {
151151
size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
152152
}
153+
long otherDocCount = 0;
153154
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
154155
OrdBucket spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0);
155156
for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
@@ -161,6 +162,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {
161162
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
162163
continue;
163164
}
165+
otherDocCount += bucketDocCount;
164166
spare.globalOrd = globalTermOrd;
165167
spare.bucketOrd = bucketOrd;
166168
spare.docCount = bucketDocCount;
@@ -182,6 +184,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {
182184
copy(globalOrds.lookupOrd(bucket.globalOrd), scratch);
183185
list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0);
184186
list[i].bucketOrd = bucket.bucketOrd;
187+
otherDocCount -= list[i].docCount;
185188
}
186189
//replay any deferred collections
187190
runDeferredCollections(survivingBucketOrds);
@@ -193,7 +196,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {
193196
bucket.docCountError = 0;
194197
}
195198

196-
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0);
199+
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0, otherDocCount);
197200
}
198201

199202
/** This is used internally only, just for compare using global ordinal instead of term bytes in the PQ */

src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalTerms.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
public abstract class InternalTerms extends InternalAggregation implements Terms, ToXContent, Streamable {
3838

3939
protected static final String DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME = "doc_count_error_upper_bound";
40+
protected static final String SUM_OF_OTHER_DOC_COUNTS = "sum_other_doc_count";
4041

4142
public static abstract class Bucket extends Terms.Bucket {
4243

@@ -104,10 +105,11 @@ public Bucket reduce(List<? extends Bucket> buckets, ReduceContext context) {
104105
protected Map<String, Bucket> bucketMap;
105106
protected long docCountError;
106107
protected boolean showTermDocCountError;
108+
protected long otherDocCount;
107109

108110
protected InternalTerms() {} // for serialization
109111

110-
protected InternalTerms(String name, InternalOrder order, int requiredSize, int shardSize, long minDocCount, List<Bucket> buckets, boolean showTermDocCountError, long docCountError) {
112+
protected InternalTerms(String name, InternalOrder order, int requiredSize, int shardSize, long minDocCount, List<Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
111113
super(name);
112114
this.order = order;
113115
this.requiredSize = requiredSize;
@@ -116,6 +118,7 @@ protected InternalTerms(String name, InternalOrder order, int requiredSize, int
116118
this.buckets = buckets;
117119
this.showTermDocCountError = showTermDocCountError;
118120
this.docCountError = docCountError;
121+
this.otherDocCount = otherDocCount;
119122
}
120123

121124
@Override
@@ -139,14 +142,21 @@ public long getDocCountError() {
139142
return docCountError;
140143
}
141144

145+
@Override
146+
public long getSumOfOtherDocCounts() {
147+
return otherDocCount;
148+
}
149+
142150
@Override
143151
public InternalAggregation reduce(ReduceContext reduceContext) {
144152
List<InternalAggregation> aggregations = reduceContext.aggregations();
145153

146154
Multimap<Object, InternalTerms.Bucket> buckets = ArrayListMultimap.create();
147155
long sumDocCountError = 0;
156+
long otherDocCount = 0;
148157
for (InternalAggregation aggregation : aggregations) {
149158
InternalTerms terms = (InternalTerms) aggregation;
159+
otherDocCount += terms.getSumOfOtherDocCounts();
150160
final long thisAggDocCountError;
151161
if (terms.buckets.size() < this.shardSize || this.order == InternalOrder.TERM_ASC || this.order == InternalOrder.TERM_DESC) {
152162
thisAggDocCountError = 0;
@@ -182,7 +192,10 @@ public InternalAggregation reduce(ReduceContext reduceContext) {
182192
}
183193
}
184194
if (b.docCount >= minDocCount) {
185-
ordered.insertWithOverflow(b);
195+
Terms.Bucket removed = ordered.insertWithOverflow(b);
196+
if (removed != null) {
197+
otherDocCount += removed.getDocCount();
198+
}
186199
}
187200
}
188201
Bucket[] list = new Bucket[ordered.size()];
@@ -195,9 +208,9 @@ public InternalAggregation reduce(ReduceContext reduceContext) {
195208
} else {
196209
docCountError = aggregations.size() == 1 ? 0 : sumDocCountError;
197210
}
198-
return newAggregation(name, Arrays.asList(list), showTermDocCountError, docCountError);
211+
return newAggregation(name, Arrays.asList(list), showTermDocCountError, docCountError, otherDocCount);
199212
}
200213

201-
protected abstract InternalTerms newAggregation(String name, List<Bucket> buckets, boolean showTermDocCountError, long docCountError);
214+
protected abstract InternalTerms newAggregation(String name, List<Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount);
202215

203216
}

src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ Bucket newBucket(long docCount, InternalAggregations aggs, long docCountError) {
9999

100100
LongTerms() {} // for serialization
101101

102-
public LongTerms(String name, InternalOrder order, @Nullable ValueFormatter formatter, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
103-
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
102+
public LongTerms(String name, InternalOrder order, @Nullable ValueFormatter formatter, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
103+
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
104104
this.formatter = formatter;
105105
}
106106

@@ -110,8 +110,8 @@ public Type type() {
110110
}
111111

112112
@Override
113-
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
114-
return new LongTerms(name, order, formatter, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
113+
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
114+
return new LongTerms(name, order, formatter, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
115115
}
116116

117117
@Override
@@ -133,6 +133,9 @@ public void readFrom(StreamInput in) throws IOException {
133133
this.showTermDocCountError = false;
134134
}
135135
this.minDocCount = in.readVLong();
136+
if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
137+
this.otherDocCount = in.readVLong();
138+
}
136139
int size = in.readVInt();
137140
List<InternalTerms.Bucket> buckets = new ArrayList<>(size);
138141
for (int i = 0; i < size; i++) {
@@ -163,6 +166,9 @@ public void writeTo(StreamOutput out) throws IOException {
163166
out.writeBoolean(showTermDocCountError);
164167
}
165168
out.writeVLong(minDocCount);
169+
if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
170+
out.writeVLong(otherDocCount);
171+
}
166172
out.writeVInt(buckets.size());
167173
for (InternalTerms.Bucket bucket : buckets) {
168174
out.writeLong(((Bucket) bucket).term);
@@ -177,6 +183,7 @@ public void writeTo(StreamOutput out) throws IOException {
177183
@Override
178184
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
179185
builder.field(InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME, docCountError);
186+
builder.field(SUM_OF_OTHER_DOC_COUNTS, otherDocCount);
180187
builder.startArray(CommonFields.BUCKETS);
181188
for (InternalTerms.Bucket bucket : buckets) {
182189
builder.startObject();

src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {
115115

116116
final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
117117

118+
long otherDocCount = 0;
118119
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
119120
LongTerms.Bucket spare = null;
120121
for (long i = 0; i < bucketOrds.size(); i++) {
@@ -123,6 +124,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {
123124
}
124125
spare.term = bucketOrds.get(i);
125126
spare.docCount = bucketDocCount(i);
127+
otherDocCount += spare.docCount;
126128
spare.bucketOrd = i;
127129
if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
128130
spare = (LongTerms.Bucket) ordered.insertWithOverflow(spare);
@@ -136,6 +138,7 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {
136138
final LongTerms.Bucket bucket = (LongTerms.Bucket) ordered.pop();
137139
survivingBucketOrds[i] = bucket.bucketOrd;
138140
list[i] = bucket;
141+
otherDocCount -= bucket.docCount;
139142
}
140143

141144
runDeferredCollections(survivingBucketOrds);
@@ -146,13 +149,13 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {
146149
list[i].docCountError = 0;
147150
}
148151

149-
return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0);
152+
return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0, otherDocCount);
150153
}
151154

152155

153156
@Override
154157
public InternalAggregation buildEmptyAggregation() {
155-
return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList(), showTermDocCountError, 0);
158+
return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList(), showTermDocCountError, 0, 0);
156159
}
157160

158161
@Override

src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTerms.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ Bucket newBucket(long docCount, InternalAggregations aggs, long docCountError) {
9898

9999
StringTerms() {} // for serialization
100100

101-
public StringTerms(String name, InternalOrder order, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
102-
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
101+
public StringTerms(String name, InternalOrder order, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
102+
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
103103
}
104104

105105
@Override
@@ -108,8 +108,8 @@ public Type type() {
108108
}
109109

110110
@Override
111-
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
112-
return new StringTerms(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
111+
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
112+
return new StringTerms(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
113113
}
114114

115115
@Override
@@ -130,6 +130,9 @@ public void readFrom(StreamInput in) throws IOException {
130130
this.showTermDocCountError = false;
131131
}
132132
this.minDocCount = in.readVLong();
133+
if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
134+
this.otherDocCount = in.readVLong();
135+
}
133136
int size = in.readVInt();
134137
List<InternalTerms.Bucket> buckets = new ArrayList<>(size);
135138
for (int i = 0; i < size; i++) {
@@ -159,6 +162,9 @@ public void writeTo(StreamOutput out) throws IOException {
159162
out.writeBoolean(showTermDocCountError);
160163
}
161164
out.writeVLong(minDocCount);
165+
if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
166+
out.writeVLong(otherDocCount);
167+
}
162168
out.writeVInt(buckets.size());
163169
for (InternalTerms.Bucket bucket : buckets) {
164170
out.writeBytesRef(((Bucket) bucket).termBytes);
@@ -173,6 +179,7 @@ public void writeTo(StreamOutput out) throws IOException {
173179
@Override
174180
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
175181
builder.field(InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME, docCountError);
182+
builder.field(SUM_OF_OTHER_DOC_COUNTS, otherDocCount);
176183
builder.startArray(CommonFields.BUCKETS);
177184
for (InternalTerms.Bucket bucket : buckets) {
178185
builder.startObject();

0 commit comments

Comments
 (0)