Skip to content

Commit dcabf52

Browse files
feodordmpgpro
authored andcommitted
Change predecence of phrase operator.
<-> operator now have higher predecence than & (AND) operator. This change was motivated by unexpected difference of similar queries: 'a & b <-> c'::tsquery and 'b <-> c & a'. Before first query means (a & b) <-> c and second one - '(b <-> c) & a', now phrase operator evaluates first. Per suggestion from Tom Lane 32260.1465402409@sss.pgh.pa.us Conflicts: src/backend/utils/adt/tsquery.c src/include/tsearch/ts_type.h
1 parent 4eecbb0 commit dcabf52

File tree

6 files changed

+130
-143
lines changed

6 files changed

+130
-143
lines changed

src/backend/utils/adt/tsquery.c

Lines changed: 61 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@
2727
/* FTS operator priorities, see ts_type.h */
2828
const int tsearch_op_priority[OP_COUNT] =
2929
{
30-
3, /* OP_NOT */
31-
2, /* OP_AND */
32-
1, /* OP_OR */
33-
4 /* OP_PHRASE */
30+
4, /* OP_NOT */
31+
2, /* OP_AND */
32+
1, /* OP_OR */
33+
3 /* OP_PHRASE */
3434
};
3535

3636
struct TSQueryParserStateData
@@ -431,6 +431,40 @@ pushStop(TSQueryParserState state)
431431

432432
#define STACKDEPTH 32
433433

434+
typedef struct OperatorElement {
435+
int8 op;
436+
int16 distance;
437+
} OperatorElement;
438+
439+
static void
440+
pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
441+
{
442+
if (*lenstack == STACKDEPTH) /* internal error */
443+
elog(ERROR, "tsquery stack too small");
444+
445+
stack[*lenstack].op = op;
446+
stack[*lenstack].distance = distance;
447+
448+
(*lenstack)++;
449+
}
450+
451+
static void
452+
cleanOpStack(TSQueryParserState state,
453+
OperatorElement *stack, int *lenstack, int8 op)
454+
{
455+
int opPriority = OP_PRIORITY(op);
456+
457+
while(*lenstack)
458+
{
459+
if (opPriority > OP_PRIORITY(stack[*lenstack - 1].op))
460+
break;
461+
462+
(*lenstack)--;
463+
pushOperator(state, stack[*lenstack].op,
464+
stack[*lenstack].distance);
465+
}
466+
}
467+
434468
/*
435469
* Make polish (prefix) notation of query.
436470
*
@@ -441,18 +475,14 @@ makepol(TSQueryParserState state,
441475
PushFunction pushval,
442476
Datum opaque)
443477
{
444-
int8 operator = 0;
445-
ts_tokentype type;
446-
int lenval = 0;
447-
char *strval = NULL;
448-
struct
449-
{
450-
int8 op;
451-
int16 distance;
452-
} opstack[STACKDEPTH];
453-
int lenstack = 0;
454-
int16 weight = 0;
455-
bool prefix;
478+
int8 operator = 0;
479+
ts_tokentype type;
480+
int lenval = 0;
481+
char *strval = NULL;
482+
OperatorElement opstack[STACKDEPTH];
483+
int lenstack = 0;
484+
int16 weight = 0;
485+
bool prefix;
456486

457487
/* since this function recurses, it could be driven to stack overflow */
458488
check_stack_depth();
@@ -463,49 +493,16 @@ makepol(TSQueryParserState state,
463493
{
464494
case PT_VAL:
465495
pushval(opaque, state, strval, lenval, weight, prefix);
466-
while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
467-
opstack[lenstack - 1].op == OP_PHRASE ||
468-
opstack[lenstack - 1].op == OP_NOT))
469-
{
470-
lenstack--;
471-
pushOperator(state,
472-
opstack[lenstack].op,
473-
opstack[lenstack].distance);
474-
}
475496
break;
476497
case PT_OPR:
477-
if (lenstack && operator == OP_OR)
478-
pushOperator(state, OP_OR, 0);
479-
else
480-
{
481-
if (lenstack == STACKDEPTH) /* internal error */
482-
elog(ERROR, "tsquery stack too small");
483-
opstack[lenstack].op = operator;
484-
opstack[lenstack].distance = weight;
485-
lenstack++;
486-
}
498+
cleanOpStack(state, opstack, &lenstack, operator);
499+
pushOpStack(opstack, &lenstack, operator, weight);
487500
break;
488501
case PT_OPEN:
489502
makepol(state, pushval, opaque);
490-
491-
while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
492-
opstack[lenstack - 1].op == OP_PHRASE ||
493-
opstack[lenstack - 1].op == OP_NOT))
494-
{
495-
lenstack--;
496-
pushOperator(state,
497-
opstack[lenstack].op,
498-
opstack[lenstack].distance);
499-
}
500503
break;
501504
case PT_CLOSE:
502-
while (lenstack)
503-
{
504-
lenstack--;
505-
pushOperator(state,
506-
opstack[lenstack].op,
507-
opstack[lenstack].distance);
508-
};
505+
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */);
509506
return;
510507
case PT_ERR:
511508
default:
@@ -515,13 +512,8 @@ makepol(TSQueryParserState state,
515512
state->buffer)));
516513
}
517514
}
518-
while (lenstack)
519-
{
520-
lenstack--;
521-
pushOperator(state,
522-
opstack[lenstack].op,
523-
opstack[lenstack].distance);
524-
}
515+
516+
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */);
525517
}
526518

527519
static void
@@ -751,7 +743,7 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
751743
* print it in infix (human-readable) form
752744
*/
753745
static void
754-
infix(INFIX *in, int parentPriority)
746+
infix(INFIX *in, int parentPriority, bool rightPhraseOp)
755747
{
756748
/* since this function recurses, it could be driven to stack overflow. */
757749
check_stack_depth();
@@ -820,7 +812,7 @@ infix(INFIX *in, int parentPriority)
820812
}
821813
else if (in->curpol->qoperator.oper == OP_NOT)
822814
{
823-
int priority = PRINT_PRIORITY(in->curpol);
815+
int priority = QO_PRIORITY(in->curpol);
824816

825817
if (priority < parentPriority)
826818
{
@@ -834,7 +826,7 @@ infix(INFIX *in, int parentPriority)
834826
*(in->cur) = '\0';
835827
in->curpol++;
836828

837-
infix(in, priority);
829+
infix(in, priority, false);
838830
if (priority < parentPriority)
839831
{
840832
RESIZEBUF(in, 2);
@@ -845,16 +837,15 @@ infix(INFIX *in, int parentPriority)
845837
else
846838
{
847839
int8 op = in->curpol->qoperator.oper;
848-
int priority = PRINT_PRIORITY(in->curpol);
840+
int priority = QO_PRIORITY(in->curpol);
849841
int16 distance = in->curpol->qoperator.distance;
850842
INFIX nrm;
851843
bool needParenthesis = false;
852844

853845
in->curpol++;
854846
if (priority < parentPriority ||
855-
(op == OP_PHRASE &&
856-
(priority == parentPriority || /* phrases are not commutative! */
857-
parentPriority == OP_PRIORITY(OP_AND))))
847+
/* phrase operator depends on order */
848+
(op == OP_PHRASE && rightPhraseOp))
858849
{
859850
needParenthesis = true;
860851
RESIZEBUF(in, 2);
@@ -868,11 +859,11 @@ infix(INFIX *in, int parentPriority)
868859
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
869860

870861
/* get right operand */
871-
infix(&nrm, priority);
862+
infix(&nrm, priority, (op == OP_PHRASE));
872863

873864
/* get & print left operand */
874865
in->curpol = nrm.curpol;
875-
infix(in, priority);
866+
infix(in, priority, false);
876867

877868
/* print operator & right operand */
878869
RESIZEBUF(in, 3 + (2 + 10 /* distance */) + (nrm.cur - nrm.buf));
@@ -924,7 +915,7 @@ tsqueryout(PG_FUNCTION_ARGS)
924915
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
925916
*(nrm.cur) = '\0';
926917
nrm.op = GETOPERAND(query);
927-
infix(&nrm, -1 /* lowest priority */);
918+
infix(&nrm, -1 /* lowest priority */, false);
928919

929920
PG_FREE_IF_COPY(query, 0);
930921
PG_RETURN_CSTRING(nrm.buf);
@@ -1151,7 +1142,7 @@ tsquerytree(PG_FUNCTION_ARGS)
11511142
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
11521143
*(nrm.cur) = '\0';
11531144
nrm.op = GETOPERAND(query);
1154-
infix(&nrm, true);
1145+
infix(&nrm, -1, false);
11551146
res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
11561147
pfree(q);
11571148
}

src/backend/utils/adt/tsquery_cleanup.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,18 @@ typedef struct NODE
2626
QueryItem *valnode;
2727
} NODE;
2828

29-
/* Non-operator nodes have fake (but highest) priority */
29+
/*
30+
* To simplify walking on query tree and pushing down of phrase operator
31+
* we define some fake priority here: phrase operator has highest priority
32+
* of any other operators (and we believe here that OP_PHRASE is a highest
33+
* code of operations) and value node has ever highest priority.
34+
* Priority values of other operations don't matter until they are less than
35+
* phrase operator and value node.
36+
*/
37+
#define VALUE_PRIORITY (OP_COUNT + 1)
3038
#define NODE_PRIORITY(x) \
3139
( ((x)->valnode->qoperator.type == QI_OPR) ? \
32-
QO_PRIORITY((x)->valnode) : \
33-
TOP_PRIORITY )
40+
(x)->valnode->qoperator.oper : VALUE_PRIORITY )
3441

3542
/*
3643
* make query tree from plain view of query
@@ -413,6 +420,10 @@ normalize_phrase_tree(NODE *node)
413420
node->left = normalize_phrase_tree(node->left);
414421
node->right = normalize_phrase_tree(node->right);
415422

423+
/*
424+
* if subtree contains only nodes with higher "priority" then
425+
* we are done. See comment near NODE_PRIORITY()
426+
*/
416427
if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) &&
417428
NODE_PRIORITY(node) <= NODE_PRIORITY(node->left))
418429
return node;

src/include/tsearch/ts_type.h

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -211,34 +211,19 @@ typedef struct
211211

212212
/*
213213
* Legal values for QueryOperator.operator.
214-
* They should be ordered by priority! We assume that phrase
215-
* has highest priority, but this agreement is only
216-
* for query transformation! That's need to simplify
217-
* algorithm of query transformation.
218214
*/
219215
#define OP_NOT 1
220216
#define OP_AND 2
221217
#define OP_OR 3
222-
#define OP_PHRASE 4
218+
#define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */
223219
#define OP_COUNT 4
224220

225221
extern const int tsearch_op_priority[OP_COUNT];
226222

227-
#define NOT_PHRASE_P 5 /*
228-
* OP_PHRASE negation operations must have greater
229-
* priority in order to force infix() to surround
230-
* the whole OP_PHRASE expression with parentheses.
231-
*/
232-
233-
#define TOP_PRIORITY 6 /* highest priority for val nodes */
234-
235223
/* get operation priority by its code*/
236224
#define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] )
237225
/* get QueryOperator priority */
238226
#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
239-
/* special case: get QueryOperator priority for correct printing !(a <-> b>) */
240-
#define PRINT_PRIORITY(x) \
241-
( (((QueryOperator *) (x))->oper == OP_NOT) ? NOT_PHRASE_P : QO_PRIORITY(x) )
242227

243228
typedef struct
244229
{

src/test/regress/expected/tsdicts.out

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -470,15 +470,15 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
470470
(1 row)
471471

472472
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
473-
to_tsquery
474-
-----------------------------------------------------------------------------
475-
( 'foot':B <-> 'sky' ) & ( 'ball':B <-> 'sky' ) & ( 'klubber':B <-> 'sky' )
473+
to_tsquery
474+
-----------------------------------------------------------------
475+
'foot':B <-> 'sky' & 'ball':B <-> 'sky' & 'klubber':B <-> 'sky'
476476
(1 row)
477477

478478
SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
479-
phraseto_tsquery
480-
-----------------------------------------------------------------------
481-
( 'foot' <-> 'sky' ) & ( 'ball' <-> 'sky' ) & ( 'klubber' <-> 'sky' )
479+
phraseto_tsquery
480+
-----------------------------------------------------------
481+
'foot' <-> 'sky' & 'ball' <-> 'sky' & 'klubber' <-> 'sky'
482482
(1 row)
483483

484484
-- Test ispell dictionary with hunspell affix with FLAG long in configuration

src/test/regress/expected/tsearch.out

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -772,9 +772,9 @@ SELECT to_tsquery('english', 'foo <-> a <-> the <-> bar');
772772
(1 row)
773773

774774
SELECT phraseto_tsquery('english', 'PostgreSQL can be extended by the user in many ways');
775-
phraseto_tsquery
776-
-----------------------------------------------------------------------
777-
( ( ( 'postgresql' <3> 'extend' ) <3> 'user' ) <2> 'mani' ) <-> 'way'
775+
phraseto_tsquery
776+
-----------------------------------------------------------
777+
'postgresql' <3> 'extend' <3> 'user' <2> 'mani' <-> 'way'
778778
(1 row)
779779

780780
SELECT ts_rank_cd(to_tsvector('english', '
@@ -1209,15 +1209,15 @@ SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::t
12091209
(1 row)
12101210

12111211
SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
1212-
ts_rewrite
1213-
-----------------------
1214-
'1' & ( '2' <2> '3' )
1212+
ts_rewrite
1213+
-------------------
1214+
'1' & '2' <2> '3'
12151215
(1 row)
12161216

12171217
SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
1218-
ts_rewrite
1219-
-----------------------------------------------
1220-
( '5' <-> '1' ) & ( '5' <-> ( '2' <-> '3' ) )
1218+
ts_rewrite
1219+
---------------------------------------
1220+
'5' <-> '1' & '5' <-> ( '2' <-> '3' )
12211221
(1 row)
12221222

12231223
SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );

0 commit comments

Comments
 (0)