Skip to content

Commit be45be9

Browse files
committed
Implement GROUP BY DISTINCT
With grouping sets, it's possible that some of the grouping sets are duplicate. This is especially common with CUBE and ROLLUP clauses. For example GROUP BY CUBE (a,b), CUBE (b,c) is equivalent to GROUP BY GROUPING SETS ( (a, b, c), (a, b, c), (a, b, c), (a, b), (a, b), (a, b), (a), (a), (a), (c, a), (c, a), (c, a), (c), (b, c), (b), () ) Some of the grouping sets are calculated multiple times, which is mostly unnecessary. This commit implements a new GROUP BY DISTINCT feature, as defined in the SQL standard, which eliminates the duplicate sets. Author: Vik Fearing Reviewed-by: Erik Rijkers, Georgios Kokolatos, Tomas Vondra Discussion: https://postgr.es/m/bf3805a8-d7d1-ae61-fece-761b7ff41ecc@postgresfriends.org
1 parent cd91de0 commit be45be9

File tree

18 files changed

+333
-27
lines changed

18 files changed

+333
-27
lines changed

doc/src/sgml/queries.sgml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,55 @@ GROUP BY GROUPING SETS (
13721372
</programlisting>
13731373
</para>
13741374

1375+
<para>
1376+
<indexterm zone="queries-grouping-sets">
1377+
<primary>ALL</primary>
1378+
<secondary>GROUP BY ALL</secondary>
1379+
</indexterm>
1380+
<indexterm zone="queries-grouping-sets">
1381+
<primary>DISTINCT</primary>
1382+
<secondary>GROUP BY DISTINCT</secondary>
1383+
</indexterm>
1384+
When specifying multiple grouping items together, the final set of grouping
1385+
sets might contain duplicates. For example:
1386+
<programlisting>
1387+
GROUP BY ROLLUP (a, b), ROLLUP (a, c)
1388+
</programlisting>
1389+
is equivalent to
1390+
<programlisting>
1391+
GROUP BY GROUPING SETS (
1392+
(a, b, c),
1393+
(a, b),
1394+
(a, b),
1395+
(a, c),
1396+
(a),
1397+
(a),
1398+
(a, c),
1399+
(a),
1400+
()
1401+
)
1402+
</programlisting>
1403+
If these duplicates are undesirable, they can be removed using the
1404+
<literal>DISTINCT</literal> clause directly on the <literal>GROUP BY</literal>.
1405+
Therefore:
1406+
<programlisting>
1407+
GROUP BY <emphasis>DISTINCT</emphasis> ROLLUP (a, b), ROLLUP (a, c)
1408+
</programlisting>
1409+
is equivalent to
1410+
<programlisting>
1411+
GROUP BY GROUPING SETS (
1412+
(a, b, c),
1413+
(a, b),
1414+
(a, c),
1415+
(a),
1416+
()
1417+
)
1418+
</programlisting>
1419+
This is not the same as using <literal>SELECT DISTINCT</literal> because the output
1420+
rows may still contain duplicates. If any of the ungrouped columns contains NULL,
1421+
it will be indistinguishable from the NULL used when that same column is grouped.
1422+
</para>
1423+
13751424
<note>
13761425
<para>
13771426
The construct <literal>(a, b)</literal> is normally recognized in expressions as
@@ -1560,8 +1609,13 @@ SELECT a "from", b + c AS sum FROM ...
15601609
<sect2 id="queries-distinct">
15611610
<title><literal>DISTINCT</literal></title>
15621611

1612+
<indexterm zone="queries-distinct">
1613+
<primary>ALL</primary>
1614+
<secondary>SELECT ALL</secondary>
1615+
</indexterm>
15631616
<indexterm zone="queries-distinct">
15641617
<primary>DISTINCT</primary>
1618+
<secondary>SELECT DISTINCT</secondary>
15651619
</indexterm>
15661620

15671621
<indexterm zone="queries-distinct">

doc/src/sgml/ref/select.sgml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
3737
[ * | <replaceable class="parameter">expression</replaceable> [ [ AS ] <replaceable class="parameter">output_name</replaceable> ] [, ...] ]
3838
[ FROM <replaceable class="parameter">from_item</replaceable> [, ...] ]
3939
[ WHERE <replaceable class="parameter">condition</replaceable> ]
40-
[ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...] ]
40+
[ GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...] ]
4141
[ HAVING <replaceable class="parameter">condition</replaceable> ]
4242
[ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ]
4343
[ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] <replaceable class="parameter">select</replaceable> ]
@@ -778,7 +778,7 @@ WHERE <replaceable class="parameter">condition</replaceable>
778778
<para>
779779
The optional <literal>GROUP BY</literal> clause has the general form
780780
<synopsis>
781-
GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...]
781+
GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...]
782782
</synopsis>
783783
</para>
784784

@@ -802,7 +802,10 @@ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...]
802802
independent <replaceable>grouping sets</replaceable>. The effect of this is
803803
equivalent to constructing a <literal>UNION ALL</literal> between
804804
subqueries with the individual grouping sets as their
805-
<literal>GROUP BY</literal> clauses. For further details on the handling
805+
<literal>GROUP BY</literal> clauses. The optional <literal>DISTINCT</literal>
806+
clause removes duplicate sets before processing; it does <emphasis>not</emphasis>
807+
transform the <literal>UNION ALL</literal> into a <literal>UNION DISTINCT</literal>.
808+
For further details on the handling
806809
of grouping sets see <xref linkend="queries-grouping-sets"/>.
807810
</para>
808811

src/backend/catalog/sql_features.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ T351 Bracketed SQL comments (/*...*/ comments) YES
482482
T431 Extended grouping capabilities YES
483483
T432 Nested and concatenated GROUPING SETS YES
484484
T433 Multiargument GROUPING function YES
485-
T434 GROUP BY DISTINCT NO
485+
T434 GROUP BY DISTINCT YES
486486
T441 ABS and MOD functions YES
487487
T461 Symmetric BETWEEN predicate YES
488488
T471 Result sets return value NO

src/backend/nodes/copyfuncs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3135,6 +3135,7 @@ _copyQuery(const Query *from)
31353135
COPY_NODE_FIELD(onConflict);
31363136
COPY_NODE_FIELD(returningList);
31373137
COPY_NODE_FIELD(groupClause);
3138+
COPY_SCALAR_FIELD(groupDistinct);
31383139
COPY_NODE_FIELD(groupingSets);
31393140
COPY_NODE_FIELD(havingQual);
31403141
COPY_NODE_FIELD(windowClause);
@@ -3221,6 +3222,7 @@ _copySelectStmt(const SelectStmt *from)
32213222
COPY_NODE_FIELD(fromClause);
32223223
COPY_NODE_FIELD(whereClause);
32233224
COPY_NODE_FIELD(groupClause);
3225+
COPY_SCALAR_FIELD(groupDistinct);
32243226
COPY_NODE_FIELD(havingClause);
32253227
COPY_NODE_FIELD(windowClause);
32263228
COPY_NODE_FIELD(valuesLists);

src/backend/nodes/equalfuncs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,7 @@ _equalQuery(const Query *a, const Query *b)
977977
COMPARE_NODE_FIELD(onConflict);
978978
COMPARE_NODE_FIELD(returningList);
979979
COMPARE_NODE_FIELD(groupClause);
980+
COMPARE_SCALAR_FIELD(groupDistinct);
980981
COMPARE_NODE_FIELD(groupingSets);
981982
COMPARE_NODE_FIELD(havingQual);
982983
COMPARE_NODE_FIELD(windowClause);
@@ -1053,6 +1054,7 @@ _equalSelectStmt(const SelectStmt *a, const SelectStmt *b)
10531054
COMPARE_NODE_FIELD(fromClause);
10541055
COMPARE_NODE_FIELD(whereClause);
10551056
COMPARE_NODE_FIELD(groupClause);
1057+
COMPARE_SCALAR_FIELD(groupDistinct);
10561058
COMPARE_NODE_FIELD(havingClause);
10571059
COMPARE_NODE_FIELD(windowClause);
10581060
COMPARE_NODE_FIELD(valuesLists);

src/backend/nodes/list.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,22 @@ list_sort(List *list, list_sort_comparator cmp)
15061506
qsort(list->elements, len, sizeof(ListCell), (qsort_comparator) cmp);
15071507
}
15081508

1509+
/*
1510+
* list_sort comparator for sorting a list into ascending int order.
1511+
*/
1512+
int
1513+
list_int_cmp(const ListCell *p1, const ListCell *p2)
1514+
{
1515+
int v1 = lfirst_int(p1);
1516+
int v2 = lfirst_int(p2);
1517+
1518+
if (v1 < v2)
1519+
return -1;
1520+
if (v1 > v2)
1521+
return 1;
1522+
return 0;
1523+
}
1524+
15091525
/*
15101526
* list_sort comparator for sorting a list into ascending OID order.
15111527
*/

src/backend/nodes/outfuncs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2771,6 +2771,7 @@ _outSelectStmt(StringInfo str, const SelectStmt *node)
27712771
WRITE_NODE_FIELD(fromClause);
27722772
WRITE_NODE_FIELD(whereClause);
27732773
WRITE_NODE_FIELD(groupClause);
2774+
WRITE_BOOL_FIELD(groupDistinct);
27742775
WRITE_NODE_FIELD(havingClause);
27752776
WRITE_NODE_FIELD(windowClause);
27762777
WRITE_NODE_FIELD(valuesLists);
@@ -2996,6 +2997,7 @@ _outQuery(StringInfo str, const Query *node)
29962997
WRITE_NODE_FIELD(onConflict);
29972998
WRITE_NODE_FIELD(returningList);
29982999
WRITE_NODE_FIELD(groupClause);
3000+
WRITE_BOOL_FIELD(groupDistinct);
29993001
WRITE_NODE_FIELD(groupingSets);
30003002
WRITE_NODE_FIELD(havingQual);
30013003
WRITE_NODE_FIELD(windowClause);

src/backend/nodes/readfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ _readQuery(void)
271271
READ_NODE_FIELD(onConflict);
272272
READ_NODE_FIELD(returningList);
273273
READ_NODE_FIELD(groupClause);
274+
READ_BOOL_FIELD(groupDistinct);
274275
READ_NODE_FIELD(groupingSets);
275276
READ_NODE_FIELD(havingQual);
276277
READ_NODE_FIELD(windowClause);

src/backend/optimizer/plan/planner.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2442,7 +2442,7 @@ preprocess_grouping_sets(PlannerInfo *root)
24422442
ListCell *lc_set;
24432443
grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
24442444

2445-
parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
2445+
parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
24462446

24472447
gd->any_hashable = false;
24482448
gd->unhashable_refs = NULL;

src/backend/parser/analyze.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,6 +1264,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
12641264
qry->sortClause,
12651265
EXPR_KIND_GROUP_BY,
12661266
false /* allow SQL92 rules */ );
1267+
qry->groupDistinct = stmt->groupDistinct;
12671268

12681269
if (stmt->distinctClause == NIL)
12691270
{

0 commit comments

Comments
 (0)