Skip to content

Commit 5df33e2

Browse files
author
Nikita Glukhov
committed
Sort jsonb object values by length
1 parent 8c8ced1 commit 5df33e2

File tree

2 files changed

+173
-29
lines changed

2 files changed

+173
-29
lines changed

src/backend/utils/adt/jsonb_util.c

Lines changed: 164 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include "utils/memutils.h"
3131
#include "utils/varlena.h"
3232

33+
#define JSONB_SORTED_VALUES 1
34+
3335
/*
3436
* Maximum number of elements in an array (or key/value pairs in an object).
3537
* This is limited by two things: the size of the JEntry array must fit
@@ -82,6 +84,7 @@ typedef struct jsonbIterator
8284
const JEntry *children; /* JEntrys for child nodes */
8385
/* Data proper. This points to the beginning of the variable-length data */
8486
char *dataProper;
87+
uint32 *kvMap;
8588

8689
/* Current item in buffer (up to nElems) */
8790
int curIndex;
@@ -556,6 +559,8 @@ jsonbFindKeyInObject(JsonContainer *jsc, const char *keyVal, int keyLen,
556559
const JEntry *children = container->children;
557560
int count = JsonContainerSize(jsc);
558561
char *baseAddr;
562+
bool sorted_values = (container->header & JBC_TMASK) == JBC_TOBJECT_SORTED;
563+
const uint32 *kvmap;
559564
uint32 stopLow,
560565
stopHigh;
561566

@@ -569,7 +574,16 @@ jsonbFindKeyInObject(JsonContainer *jsc, const char *keyVal, int keyLen,
569574
* Binary search the container. Since we know this is an object, account
570575
* for *Pairs* of Jentrys
571576
*/
572-
baseAddr = (char *) (children + count * 2);
577+
if (sorted_values)
578+
{
579+
kvmap = &children[count * 2];
580+
baseAddr = (char *) &kvmap[count];
581+
}
582+
else
583+
{
584+
kvmap = NULL;
585+
baseAddr = (char *) (children + count * 2);
586+
}
573587
stopLow = 0;
574588
stopHigh = count;
575589
while (stopLow < stopHigh)
@@ -590,7 +604,7 @@ jsonbFindKeyInObject(JsonContainer *jsc, const char *keyVal, int keyLen,
590604
if (difference == 0)
591605
{
592606
/* Found our key, return corresponding value */
593-
int index = stopMiddle + count;
607+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
594608

595609
if (!res)
596610
res = palloc(sizeof(JsonbValue));
@@ -1203,14 +1217,18 @@ jsonbIteratorNext(JsonIterator **jsit, JsonbValue *val, bool skipNested)
12031217
(*it)->state = JBI_OBJECT_KEY;
12041218

12051219
fillCompressedJsonbValue((*it)->compressed, (*it)->container,
1206-
(*it)->curIndex + (*it)->nElems,
1207-
(*it)->dataProper, (*it)->curValueOffset,
1220+
((*it)->kvMap ? (*it)->kvMap[(*it)->curIndex] : (*it)->curIndex) + (*it)->nElems,
1221+
(*it)->dataProper,
1222+
(*it)->kvMap ?
1223+
getJsonbOffset((*it)->container, (*it)->kvMap[(*it)->curIndex] + (*it)->nElems) :
1224+
(*it)->curValueOffset,
12081225
val);
12091226

12101227
JBE_ADVANCE_OFFSET((*it)->curDataOffset,
12111228
(*it)->children[(*it)->curIndex]);
1212-
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1213-
(*it)->children[(*it)->curIndex + (*it)->nElems]);
1229+
if (!(*it)->kvMap)
1230+
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1231+
(*it)->children[(*it)->curIndex + (*it)->nElems]);
12141232
(*it)->curIndex++;
12151233

12161234
/*
@@ -1263,24 +1281,34 @@ jsonbIteratorInitExt(JsonContainer *cont,
12631281
/* Array starts just after header */
12641282
it->children = container->children;
12651283

1266-
switch (container->header & (JBC_FARRAY | JBC_FOBJECT))
1284+
switch (container->header & JBC_TMASK)
12671285
{
1268-
case JBC_FARRAY:
1286+
case JBC_TSCALAR:
1287+
it->isScalar = true;
1288+
/* FALLTHROUGH */
1289+
case JBC_TARRAY:
12691290
it->dataProper =
12701291
(char *) it->children + it->nElems * sizeof(JEntry);
1271-
it->isScalar = (container->header & JBC_FSCALAR) != 0;
12721292
/* This is either a "raw scalar", or an array */
12731293
Assert(!it->isScalar || it->nElems == 1);
12741294

12751295
it->state = JBI_ARRAY_START;
12761296
break;
12771297

1278-
case JBC_FOBJECT:
1298+
case JBC_TOBJECT:
1299+
it->kvMap = NULL;
12791300
it->dataProper =
12801301
(char *) it->children + it->nElems * sizeof(JEntry) * 2;
12811302
it->state = JBI_OBJECT_START;
12821303
break;
12831304

1305+
case JBC_TOBJECT_SORTED:
1306+
it->kvMap = (uint32 *)
1307+
((char *) it->children + it->nElems * sizeof(JEntry) * 2);
1308+
it->dataProper = (char *) &it->kvMap[it->nElems];
1309+
it->state = JBI_OBJECT_START;
1310+
break;
1311+
12841312
default:
12851313
elog(ERROR, "unknown type of jsonb container");
12861314
}
@@ -1887,13 +1915,14 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
18871915
* Construct the header Jentry and store it in the beginning of the
18881916
* variable-length payload.
18891917
*/
1890-
header = nElems | JBC_FARRAY;
18911918
if (val->val.array.rawScalar)
18921919
{
18931920
Assert(nElems == 1);
18941921
Assert(level == 0);
1895-
header |= JBC_FSCALAR;
1922+
header = nElems | JBC_TSCALAR;
18961923
}
1924+
else
1925+
header = nElems | JBC_TARRAY;
18971926

18981927
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
18991928

@@ -1951,6 +1980,48 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
19511980
*pheader = JENTRY_ISCONTAINER | totallen;
19521981
}
19531982

1983+
static int
1984+
int_cmp(const void *a, const void *b)
1985+
{
1986+
int x = *(const int *) a;
1987+
int y = *(const int *) b;
1988+
1989+
return x == y ? 0 : x > y ? 1 : -1;
1990+
}
1991+
1992+
static int
1993+
estimateJsonbValueSize(const JsonbValue *jbv)
1994+
{
1995+
int size;
1996+
1997+
switch (jbv->type)
1998+
{
1999+
case jbvNull:
2000+
case jbvBool:
2001+
return 0;
2002+
case jbvString:
2003+
return jbv->val.string.len;
2004+
case jbvNumeric:
2005+
return VARSIZE_ANY(jbv->val.numeric);
2006+
case jbvArray:
2007+
size = offsetof(JsonbContainerHeader, children[jbv->val.array.nElems]);
2008+
for (int i = 0; i < jbv->val.array.nElems; i++)
2009+
size += estimateJsonbValueSize(&jbv->val.array.elems[i]);
2010+
return size;
2011+
case jbvObject:
2012+
size = offsetof(JsonbContainerHeader, children[jbv->val.object.nPairs * 2]);
2013+
for (int i = 0; i < jbv->val.object.nPairs; i++)
2014+
{
2015+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].key);
2016+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].value);
2017+
}
2018+
return size;
2019+
default:
2020+
elog(ERROR, "invalid jsonb value type: %d", jbv->type);
2021+
return 0;
2022+
}
2023+
}
2024+
19542025
static void
19552026
convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int level)
19562027
{
@@ -1960,9 +2031,39 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19602031
int totallen;
19612032
uint32 header;
19622033
int nPairs = val->val.object.nPairs;
2034+
int reserved_size;
2035+
bool sorted_values = JSONB_SORTED_VALUES && nPairs > 1;
2036+
struct
2037+
{
2038+
int size;
2039+
int32 index;
2040+
} *values = sorted_values ? palloc(sizeof(*values) * nPairs) : NULL;
19632041

19642042
Assert(nPairs >= 0);
19652043

2044+
if (sorted_values)
2045+
{
2046+
for (i = 0; i < nPairs; i++)
2047+
{
2048+
values[i].index = i;
2049+
values[i].size = estimateJsonbValueSize(&val->val.object.pairs[i].value);
2050+
}
2051+
2052+
qsort(values, nPairs, sizeof(*values), int_cmp);
2053+
2054+
/* check if keys were really moved */
2055+
sorted_values = false;
2056+
2057+
for (i = 0; i < nPairs; i++)
2058+
{
2059+
if (values[i].index != i)
2060+
{
2061+
sorted_values = true;
2062+
break;
2063+
}
2064+
}
2065+
}
2066+
19662067
/* Remember where in the buffer this object starts. */
19672068
base_offset = buffer->len;
19682069

@@ -1973,17 +2074,30 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19732074
* Construct the header Jentry and store it in the beginning of the
19742075
* variable-length payload.
19752076
*/
1976-
header = nPairs | JBC_FOBJECT;
2077+
header = nPairs | (sorted_values ? JBC_TOBJECT_SORTED : JBC_TOBJECT);
19772078
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
19782079

19792080
/* Reserve space for the JEntries of the keys and values. */
1980-
jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
2081+
reserved_size = sizeof(JEntry) * nPairs * 2;
2082+
if (sorted_values)
2083+
reserved_size += sizeof(int32) * nPairs;
2084+
2085+
jentry_offset = reserveFromBuffer(buffer, reserved_size);
2086+
2087+
/* Write key-value map */
2088+
if (sorted_values)
2089+
{
2090+
for (i = 0; i < nPairs; i++)
2091+
copyToBuffer(buffer, jentry_offset + sizeof(JEntry) * nPairs * 2 + values[i].index * sizeof(int32),
2092+
(void *) &i, sizeof(int32));
2093+
}
19812094

19822095
/*
19832096
* Iterate over the keys, then over the values, since that is the ordering
19842097
* we want in the on-disk representation.
19852098
*/
19862099
totallen = 0;
2100+
19872101
for (i = 0; i < nPairs; i++)
19882102
{
19892103
JsonbPair *pair = &val->val.object.pairs[i];
@@ -2019,9 +2133,11 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
20192133
copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
20202134
jentry_offset += sizeof(JEntry);
20212135
}
2136+
20222137
for (i = 0; i < nPairs; i++)
20232138
{
2024-
JsonbPair *pair = &val->val.object.pairs[i];
2139+
int val_index = sorted_values ? values[i].index : i;
2140+
JsonbPair *pair = &val->val.object.pairs[val_index];
20252141
int len;
20262142
JEntry meta;
20272143

@@ -2055,6 +2171,9 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
20552171
jentry_offset += sizeof(JEntry);
20562172
}
20572173

2174+
if (values)
2175+
pfree(values);
2176+
20582177
/* Total data size is everything we've appended to buffer */
20592178
totallen = buffer->len - base_offset;
20602179

@@ -2280,17 +2399,36 @@ uniqueifyJsonbObject(JsonbValue *object, bool unique_keys, bool skip_nulls)
22802399
}
22812400
}
22822401

2402+
static void
2403+
jsonbInitContainerFromHeader(JsonContainerData *jc, JsonbContainerHeader *jbc)
2404+
{
2405+
jc->size = jbc->header & JBC_CMASK;
2406+
switch (jbc->header & JBC_TMASK)
2407+
{
2408+
case JBC_TOBJECT:
2409+
case JBC_TOBJECT_SORTED:
2410+
jc->type = jbvObject;
2411+
break;
2412+
case JBC_TARRAY:
2413+
jc->type = jbvArray;
2414+
break;
2415+
case JBC_TSCALAR:
2416+
jc->type = jbvArray | jbvScalar;
2417+
break;
2418+
default:
2419+
elog(ERROR, "invalid jsonb container type: %d",
2420+
jbc->header & JBC_TMASK);
2421+
}
2422+
}
2423+
22832424
static void
22842425
jsonbInitContainer(JsonContainerData *jc, JsonbContainerHeader *jbc, int len)
22852426
{
22862427
jc->ops = &jsonbContainerOps;
22872428
JsonContainerDataPtr(jc) = jbc;
22882429
jc->len = len;
22892430
jc->toasterid = InvalidOid;
2290-
jc->size = jbc->header & JBC_CMASK;
2291-
jc->type = jbc->header & JBC_FOBJECT ? jbvObject :
2292-
jbc->header & JBC_FSCALAR ? jbvArray | jbvScalar :
2293-
jbvArray;
2431+
jsonbInitContainerFromHeader(jc, jbc);
22942432
}
22952433

22962434
static void
@@ -2406,10 +2544,7 @@ jsonbzInitContainer(JsonContainerData *jc, CompressedJsonb *cjb, int len)
24062544

24072545
jc->ops = &jsonbzContainerOps;
24082546
jc->len = len;
2409-
jc->size = jbc->header & JBC_CMASK;
2410-
jc->type = jbc->header & JBC_FOBJECT ? jbvObject :
2411-
jbc->header & JBC_FSCALAR ? jbvArray | jbvScalar :
2412-
jbvArray;
2547+
jsonbInitContainerFromHeader(jc, jbc);
24132548
}
24142549

24152550
static JsonbContainer *
@@ -2479,12 +2614,15 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
24792614
JEntry *children = container->children;
24802615
int count = container->header & JBC_CMASK;
24812616
/* Since this is an object, account for *Pairs* of Jentrys */
2482-
char *base_addr = (char *) (children + count * 2);
2617+
bool sorted_values = (container->header & JBC_TMASK) == JBC_TOBJECT_SORTED;
2618+
char *base_addr = (char *) (children + count * 2) + (sorted_values ? sizeof(uint32) * count : 0);
2619+
uint32 *kvmap = sorted_values ? &container->children[count * 2] : NULL;
24832620
Size base_offset = base_addr - (char *) jb;
24842621
uint32 stopLow = 0,
24852622
stopHigh = count;
24862623

2487-
Assert(jb->root.header & JB_FOBJECT);
2624+
Assert((jb->root.header & JBC_TMASK) == JBC_TOBJECT ||
2625+
(jb->root.header & JBC_TMASK) == JBC_TOBJECT_SORTED);
24882626

24892627
/* Quick out if object/array is empty */
24902628
if (count <= 0)
@@ -2521,7 +2659,7 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
25212659
if (difference == 0)
25222660
{
25232661
/* Found our key, return corresponding value */
2524-
int index = stopMiddle + count;
2662+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
25252663

25262664
if (!res)
25272665
res = palloc(sizeof(*res));

src/include/utils/jsonb_internals.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,15 @@ typedef struct JsonbContainerHeader
144144

145145
/* flags for the header-field in JsonbContainer */
146146
#define JBC_CMASK 0x0FFFFFFF /* mask for count field */
147-
#define JBC_FSCALAR 0x10000000 /* flag bits */
148-
#define JBC_FOBJECT 0x20000000
149-
#define JBC_FARRAY 0x40000000
147+
#define JBC_TMASK 0x70000000 /* mask for container type */
148+
/* container types */
149+
#define JBC_TOBJECT 0x20000000 /* object with key-value pairs
150+
* sorted by key length-alpha */
151+
#define JBC_TOBJECT_SORTED 0x30000000 /* object with keys sorted by
152+
* length-alpha; values sorted by
153+
* length */
154+
#define JBC_TARRAY 0x40000000 /* array */
155+
#define JBC_TSCALAR 0x50000000 /* scalar pseudo-array */
150156

151157
/* The top-level on-disk format for a jsonb datum. */
152158
typedef struct

0 commit comments

Comments
 (0)