Skip to content

Commit 85b41b0

Browse files
authored
Refactor the IN operator to use '= ANY()' syntax (#1236)
This change reduces use of internal function calls from the IN implementation to optimize performance. This also changes IN behavior to correctly return NULL upon NULL elements included in the list that IN checks against. Added and corrected regression tests. Co-authored by: Josh Innis <[email protected]>
1 parent c171573 commit 85b41b0

File tree

4 files changed

+213
-19
lines changed

4 files changed

+213
-19
lines changed

regress/expected/expr.out

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,28 @@ $$RETURN {bool: true, int: 1} IN ['str', 1, 1.0, true, null, {bool: true, int: 1
198198
t
199199
(1 row)
200200

201+
SELECT * FROM cypher('expr',
202+
$$RETURN 1 IN [1.0, [NULL]]$$) AS r(c boolean);
203+
c
204+
---
205+
t
206+
(1 row)
207+
208+
SELECT * FROM cypher('expr',
209+
$$RETURN [NULL] IN [1.0, [NULL]]$$) AS r(c boolean);
210+
c
211+
---
212+
t
213+
(1 row)
214+
201215
-- should return SQL null, nothing
216+
SELECT * FROM cypher('expr',
217+
$$RETURN true IN NULL $$) AS r(c boolean);
218+
c
219+
---
220+
221+
(1 row)
222+
202223
SELECT * FROM cypher('expr',
203224
$$RETURN null IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
204225
c
@@ -220,49 +241,95 @@ $$RETURN 'str' IN null $$) AS r(c boolean);
220241

221242
(1 row)
222243

223-
-- should all return false
224244
SELECT * FROM cypher('expr',
225245
$$RETURN 0 IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
226246
c
227247
---
228-
f
248+
229249
(1 row)
230250

231251
SELECT * FROM cypher('expr',
232252
$$RETURN 1.1 IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
233253
c
234254
---
235-
f
255+
236256
(1 row)
237257

238258
SELECT * FROM cypher('expr',
239259
$$RETURN 'Str' IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
240260
c
241261
---
242-
f
262+
243263
(1 row)
244264

245265
SELECT * FROM cypher('expr',
246266
$$RETURN [1,3,5,[2,4,5]] IN ['str', 1, 1.0, true, null, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
247267
c
248268
---
249-
f
269+
250270
(1 row)
251271

252272
SELECT * FROM cypher('expr',
253273
$$RETURN {bool: true, int: 2} IN ['str', 1, 1.0, true, null, {bool: true, int: 1}, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
254274
c
255275
---
276+
277+
(1 row)
278+
279+
-- should return false
280+
SELECT * FROM cypher('expr',
281+
$$RETURN 'str' IN ['StR', 1, true]$$) AS r(c boolean);
282+
c
283+
---
284+
f
285+
(1 row)
286+
287+
SELECT * FROM cypher('expr',
288+
$$RETURN 2 IN ['StR', 1, true]$$) AS r(c boolean);
289+
c
290+
---
291+
f
292+
(1 row)
293+
294+
SELECT * FROM cypher('expr',
295+
$$RETURN false IN ['StR', 1, true]$$) AS r(c boolean);
296+
c
297+
---
298+
f
299+
(1 row)
300+
301+
SELECT * FROM cypher('expr',
302+
$$RETURN [1,2] IN ['StR', 1, 2, true]$$) AS r(c boolean);
303+
c
304+
---
305+
f
306+
(1 row)
307+
308+
SELECT * FROM cypher('expr',
309+
$$RETURN 1 in [[1]]$$) AS r(c boolean);
310+
c
311+
---
312+
f
313+
(1 row)
314+
315+
SELECT * FROM cypher('expr',
316+
$$RETURN 1 IN [[null]]$$) AS r(c boolean);
317+
c
318+
---
256319
f
257320
(1 row)
258321

259322
-- should error - ERROR: object of IN must be a list
260323
SELECT * FROM cypher('expr',
261324
$$RETURN null IN 'str' $$) AS r(c boolean);
262325
ERROR: object of IN must be a list
326+
LINE 2: $$RETURN null IN 'str' $$) AS r(c boolean);
327+
^
263328
SELECT * FROM cypher('expr',
264329
$$RETURN 'str' IN 'str' $$) AS r(c boolean);
265330
ERROR: object of IN must be a list
331+
LINE 2: $$RETURN 'str' IN 'str' $$) AS r(c boolean);
332+
^
266333
-- list access
267334
SELECT * FROM cypher('expr',
268335
$$RETURN [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10][0]$$) AS r(c agtype);

regress/sql/expr.sql

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,19 @@ SELECT * FROM cypher('expr',
121121
$$RETURN [1,3,5,[2,4,6]] IN ['str', 1, 1.0, true, null, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
122122
SELECT * FROM cypher('expr',
123123
$$RETURN {bool: true, int: 1} IN ['str', 1, 1.0, true, null, {bool: true, int: 1}, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
124+
SELECT * FROM cypher('expr',
125+
$$RETURN 1 IN [1.0, [NULL]]$$) AS r(c boolean);
126+
SELECT * FROM cypher('expr',
127+
$$RETURN [NULL] IN [1.0, [NULL]]$$) AS r(c boolean);
124128
-- should return SQL null, nothing
125129
SELECT * FROM cypher('expr',
130+
$$RETURN true IN NULL $$) AS r(c boolean);
131+
SELECT * FROM cypher('expr',
126132
$$RETURN null IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
127133
SELECT * FROM cypher('expr',
128134
$$RETURN null IN ['str', 1, 1.0, true]$$) AS r(c boolean);
129135
SELECT * FROM cypher('expr',
130136
$$RETURN 'str' IN null $$) AS r(c boolean);
131-
-- should all return false
132137
SELECT * FROM cypher('expr',
133138
$$RETURN 0 IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
134139
SELECT * FROM cypher('expr',
@@ -139,6 +144,19 @@ SELECT * FROM cypher('expr',
139144
$$RETURN [1,3,5,[2,4,5]] IN ['str', 1, 1.0, true, null, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
140145
SELECT * FROM cypher('expr',
141146
$$RETURN {bool: true, int: 2} IN ['str', 1, 1.0, true, null, {bool: true, int: 1}, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
147+
-- should return false
148+
SELECT * FROM cypher('expr',
149+
$$RETURN 'str' IN ['StR', 1, true]$$) AS r(c boolean);
150+
SELECT * FROM cypher('expr',
151+
$$RETURN 2 IN ['StR', 1, true]$$) AS r(c boolean);
152+
SELECT * FROM cypher('expr',
153+
$$RETURN false IN ['StR', 1, true]$$) AS r(c boolean);
154+
SELECT * FROM cypher('expr',
155+
$$RETURN [1,2] IN ['StR', 1, 2, true]$$) AS r(c boolean);
156+
SELECT * FROM cypher('expr',
157+
$$RETURN 1 in [[1]]$$) AS r(c boolean);
158+
SELECT * FROM cypher('expr',
159+
$$RETURN 1 IN [[null]]$$) AS r(c boolean);
142160
-- should error - ERROR: object of IN must be a list
143161
SELECT * FROM cypher('expr',
144162
$$RETURN null IN 'str' $$) AS r(c boolean);

src/backend/parser/cypher_expr.c

Lines changed: 121 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -491,23 +491,132 @@ static Node *transform_AEXPR_OP(cypher_parsestate *cpstate, A_Expr *a)
491491

492492
static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a)
493493
{
494-
Oid func_in_oid;
495-
FuncExpr *result;
496-
List *args = NIL;
494+
ParseState *pstate = (ParseState *)cpstate;
495+
cypher_list *rexpr;
496+
Node *result = NULL;
497+
Node *lexpr;
498+
List *rexprs;
499+
List *rvars;
500+
List *rnonvars;
501+
bool useOr;
502+
ListCell *l;
503+
504+
/* Check for null arguments in the list to return NULL*/
505+
if (!is_ag_node(a->rexpr, cypher_list))
506+
{
507+
if (nodeTag(a->rexpr) == T_A_Const)
508+
{
509+
A_Const *r_a_const = (A_Const*)a->rexpr;
510+
if (r_a_const->isnull == true)
511+
{
512+
return (Node *)makeConst(AGTYPEOID, -1, InvalidOid, -1, (Datum)NULL, true, false);
513+
}
514+
}
497515

498-
args = lappend(args, transform_cypher_expr_recurse(cpstate, a->rexpr));
499-
args = lappend(args, transform_cypher_expr_recurse(cpstate, a->lexpr));
516+
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
517+
errmsg("object of IN must be a list")));
518+
}
500519

501-
/* get the agtype_access_slice function */
502-
func_in_oid = get_ag_func_oid("agtype_in_operator", 2, AGTYPEOID,
503-
AGTYPEOID);
520+
Assert(is_ag_node(a->rexpr, cypher_list));
521+
522+
// If the operator is <>, combine with AND not OR.
523+
if (strcmp(strVal(linitial(a->name)), "<>") == 0)
524+
{
525+
useOr = false;
526+
}
527+
else
528+
{
529+
useOr = true;
530+
}
531+
532+
lexpr = transform_cypher_expr_recurse(cpstate, a->lexpr);
533+
534+
rexprs = rvars = rnonvars = NIL;
535+
536+
rexpr = (cypher_list *)a->rexpr;
537+
538+
foreach(l, (List *) rexpr->elems)
539+
{
540+
Node *rexpr = transform_cypher_expr_recurse(cpstate, lfirst(l));
504541

505-
result = makeFuncExpr(func_in_oid, AGTYPEOID, args, InvalidOid, InvalidOid,
506-
COERCE_EXPLICIT_CALL);
542+
rexprs = lappend(rexprs, rexpr);
543+
if (contain_vars_of_level(rexpr, 0))
544+
{
545+
rvars = lappend(rvars, rexpr);
546+
}
547+
else
548+
{
549+
rnonvars = lappend(rnonvars, rexpr);
550+
}
551+
}
507552

508-
result->location = exprLocation(a->lexpr);
509553

510-
return (Node *)result;
554+
/*
555+
* ScalarArrayOpExpr is only going to be useful if there's more than one
556+
* non-Var righthand item.
557+
*/
558+
if (list_length(rnonvars) > 1)
559+
{
560+
List *allexprs;
561+
Oid scalar_type;
562+
List *aexprs;
563+
ArrayExpr *newa;
564+
565+
allexprs = list_concat(list_make1(lexpr), rnonvars);
566+
567+
scalar_type = AGTYPEOID;
568+
569+
Assert (verify_common_type(scalar_type, allexprs));
570+
/*
571+
* coerce all the right-hand non-Var inputs to the common type
572+
* and build an ArrayExpr for them.
573+
*/
574+
575+
aexprs = NIL;
576+
foreach(l, rnonvars)
577+
{
578+
Node *rexpr = (Node *) lfirst(l);
579+
580+
rexpr = coerce_to_common_type(pstate, rexpr, AGTYPEOID, "IN");
581+
aexprs = lappend(aexprs, rexpr);
582+
}
583+
newa = makeNode(ArrayExpr);
584+
newa->array_typeid = get_array_type(AGTYPEOID);
585+
/* array_collid will be set by parse_collate.c */
586+
newa->element_typeid = AGTYPEOID;
587+
newa->elements = aexprs;
588+
newa->multidims = false;
589+
result = (Node *) make_scalar_array_op(pstate, a->name, useOr,
590+
lexpr, (Node *) newa, a->location);
591+
592+
/* Consider only the Vars (if any) in the loop below */
593+
rexprs = rvars;
594+
}
595+
596+
// Must do it the hard way, with a boolean expression tree.
597+
foreach(l, rexprs)
598+
{
599+
Node *rexpr = (Node *) lfirst(l);
600+
Node *cmp;
601+
602+
// Ordinary scalar operator
603+
cmp = (Node *) make_op(pstate, a->name, copyObject(lexpr), rexpr,
604+
pstate->p_last_srf, a->location);
605+
606+
cmp = coerce_to_boolean(pstate, cmp, "IN");
607+
if (result == NULL)
608+
{
609+
result = cmp;
610+
}
611+
else
612+
{
613+
result = (Node *) makeBoolExpr(useOr ? OR_EXPR : AND_EXPR,
614+
list_make2(result, cmp),
615+
a->location);
616+
}
617+
}
618+
619+
return result;
511620
}
512621

513622
static Node *transform_BoolExpr(cypher_parsestate *cpstate, BoolExpr *expr)

src/backend/utils/adt/agtype_ops.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1160,7 +1160,7 @@ Datum agtype_exists_all_agtype(PG_FUNCTION_ARGS)
11601160

11611161
PG_FUNCTION_INFO_V1(agtype_contains);
11621162
/*
1163-
* <@ operator for agtype. Returns true if the right agtype path/value entries
1163+
* @> operator for agtype. Returns true if the right agtype path/value entries
11641164
* contained at the top level within the left agtype value
11651165
*/
11661166
Datum agtype_contains(PG_FUNCTION_ARGS)

0 commit comments

Comments
 (0)