Skip to content

Commit d2761b6

Browse files
committed
Fix timing issue with ALTER TABLE's validate constraint
An ALTER TABLE to validate a foreign key in which another subcommand already caused a pending table rewrite could fail due to ALTER TABLE attempting to validate the foreign key before the actual table rewrite takes place. This situation could result in an error such as: ERROR: could not read block 0 in file "base/nnnnn/nnnnn": read only 0 of 8192 bytes The failure here was due to the SPI call which validates the foreign key trying to access an index which is yet to be rebuilt. Similarly, we also incorrectly tried to validate CHECK constraints before the heap had been rewritten. The fix for both is to delay constraint validation until phase 3, after the table has been rewritten. For CHECK constraints this means a slight behavioral change. Previously ALTER TABLE VALIDATE CONSTRAINT on inheritance tables would be validated from the bottom up. This was different from the order of evaluation when a new CHECK constraint was added. The changes made here aligns the VALIDATE CONSTRAINT evaluation order for inheritance tables to be the same as ADD CONSTRAINT, which is generally top-down. Reported-by: Nazli Ugur Koyluoglu, using SQLancer Discussion: https://postgr.es/m/CAApHDvp%3DZXv8wiRyk_0rWr00skhGkt8vXDrHJYXRMft3TjkxCA%40mail.gmail.com Backpatch-through: 9.5 (all supported versions)
1 parent 3753df8 commit d2761b6

File tree

3 files changed

+95
-111
lines changed

3 files changed

+95
-111
lines changed

src/backend/commands/tablecmds.c

Lines changed: 52 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -322,8 +322,9 @@ static void AlterSeqNamespaces(Relation classRel, Relation rel,
322322
LOCKMODE lockmode);
323323
static ObjectAddress ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
324324
bool recurse, bool recursing, LOCKMODE lockmode);
325-
static ObjectAddress ATExecValidateConstraint(Relation rel, char *constrName,
326-
bool recurse, bool recursing, LOCKMODE lockmode);
325+
static ObjectAddress ATExecValidateConstraint(List **wqueue, Relation rel,
326+
char *constrName, bool recurse, bool recursing,
327+
LOCKMODE lockmode);
327328
static int transformColumnNameList(Oid relId, List *colList,
328329
int16 *attnums, Oid *atttypids);
329330
static int transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid,
@@ -336,7 +337,6 @@ static Oid transformFkeyCheckAttrs(Relation pkrel,
336337
static void checkFkeyPermissions(Relation rel, int16 *attnums, int natts);
337338
static CoercionPathType findFkeyCast(Oid targetTypeId, Oid sourceTypeId,
338339
Oid *funcid);
339-
static void validateCheckConstraint(Relation rel, HeapTuple constrtup);
340340
static void validateForeignKeyConstraint(char *conname,
341341
Relation rel, Relation pkrel,
342342
Oid pkindOid, Oid constraintOid);
@@ -4187,13 +4187,13 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
41874187
address = ATExecAlterConstraint(rel, cmd, false, false, lockmode);
41884188
break;
41894189
case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */
4190-
address = ATExecValidateConstraint(rel, cmd->name, false, false,
4191-
lockmode);
4190+
address = ATExecValidateConstraint(wqueue, rel, cmd->name, false,
4191+
false, lockmode);
41924192
break;
41934193
case AT_ValidateConstraintRecurse: /* VALIDATE CONSTRAINT with
41944194
* recursion */
4195-
address = ATExecValidateConstraint(rel, cmd->name, true, false,
4196-
lockmode);
4195+
address = ATExecValidateConstraint(wqueue, rel, cmd->name, true,
4196+
false, lockmode);
41974197
break;
41984198
case AT_DropConstraint: /* DROP CONSTRAINT */
41994199
ATExecDropConstraint(rel, cmd->name, cmd->behavior,
@@ -8474,8 +8474,8 @@ ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
84748474
* was already validated, InvalidObjectAddress is returned.
84758475
*/
84768476
static ObjectAddress
8477-
ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
8478-
bool recursing, LOCKMODE lockmode)
8477+
ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
8478+
bool recurse, bool recursing, LOCKMODE lockmode)
84798479
{
84808480
Relation conrel;
84818481
SysScanDesc scan;
@@ -8521,27 +8521,31 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
85218521

85228522
if (!con->convalidated)
85238523
{
8524+
AlteredTableInfo *tab;
85248525
HeapTuple copyTuple;
85258526
Form_pg_constraint copy_con;
85268527

85278528
if (con->contype == CONSTRAINT_FOREIGN)
85288529
{
8529-
Relation refrel;
8530+
NewConstraint *newcon;
8531+
Constraint *fkconstraint;
85308532

8531-
/*
8532-
* Triggers are already in place on both tables, so a concurrent
8533-
* write that alters the result here is not possible. Normally we
8534-
* can run a query here to do the validation, which would only
8535-
* require AccessShareLock. In some cases, it is possible that we
8536-
* might need to fire triggers to perform the check, so we take a
8537-
* lock at RowShareLock level just in case.
8538-
*/
8539-
refrel = heap_open(con->confrelid, RowShareLock);
8533+
/* Queue validation for phase 3 */
8534+
fkconstraint = makeNode(Constraint);
8535+
/* for now this is all we need */
8536+
fkconstraint->conname = constrName;
8537+
8538+
newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
8539+
newcon->name = constrName;
8540+
newcon->contype = CONSTR_FOREIGN;
8541+
newcon->refrelid = con->confrelid;
8542+
newcon->refindid = con->conindid;
8543+
newcon->conid = HeapTupleGetOid(tuple);
8544+
newcon->qual = (Node *) fkconstraint;
85408545

8541-
validateForeignKeyConstraint(constrName, rel, refrel,
8542-
con->conindid,
8543-
HeapTupleGetOid(tuple));
8544-
heap_close(refrel, NoLock);
8546+
/* Find or create work queue entry for this table */
8547+
tab = ATGetQueueEntry(wqueue, rel);
8548+
tab->constraints = lappend(tab->constraints, newcon);
85458549

85468550
/*
85478551
* We disallow creating invalid foreign keys to or from
@@ -8552,6 +8556,10 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
85528556
{
85538557
List *children = NIL;
85548558
ListCell *child;
8559+
NewConstraint *newcon;
8560+
bool isnull;
8561+
Datum val;
8562+
char *conbin;
85558563

85568564
/*
85578565
* If we're recursing, the parent has already done this, so skip
@@ -8591,12 +8599,31 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
85918599
/* find_all_inheritors already got lock */
85928600
childrel = heap_open(childoid, NoLock);
85938601

8594-
ATExecValidateConstraint(childrel, constrName, false,
8602+
ATExecValidateConstraint(wqueue, childrel, constrName, false,
85958603
true, lockmode);
85968604
heap_close(childrel, NoLock);
85978605
}
85988606

8599-
validateCheckConstraint(rel, tuple);
8607+
/* Queue validation for phase 3 */
8608+
newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
8609+
newcon->name = constrName;
8610+
newcon->contype = CONSTR_CHECK;
8611+
newcon->refrelid = InvalidOid;
8612+
newcon->refindid = InvalidOid;
8613+
newcon->conid = HeapTupleGetOid(tuple);
8614+
8615+
val = SysCacheGetAttr(CONSTROID, tuple,
8616+
Anum_pg_constraint_conbin, &isnull);
8617+
if (isnull)
8618+
elog(ERROR, "null conbin for constraint %u",
8619+
HeapTupleGetOid(tuple));
8620+
8621+
conbin = TextDatumGetCString(val);
8622+
newcon->qual = (Node *) stringToNode(conbin);
8623+
8624+
/* Find or create work queue entry for this table */
8625+
tab = ATGetQueueEntry(wqueue, rel);
8626+
tab->constraints = lappend(tab->constraints, newcon);
86008627

86018628
/*
86028629
* Invalidate relcache so that others see the new validated
@@ -8972,91 +8999,6 @@ checkFkeyPermissions(Relation rel, int16 *attnums, int natts)
89728999
}
89739000
}
89749001

8975-
/*
8976-
* Scan the existing rows in a table to verify they meet a proposed
8977-
* CHECK constraint.
8978-
*
8979-
* The caller must have opened and locked the relation appropriately.
8980-
*/
8981-
static void
8982-
validateCheckConstraint(Relation rel, HeapTuple constrtup)
8983-
{
8984-
EState *estate;
8985-
Datum val;
8986-
char *conbin;
8987-
Expr *origexpr;
8988-
ExprState *exprstate;
8989-
TupleDesc tupdesc;
8990-
HeapScanDesc scan;
8991-
HeapTuple tuple;
8992-
ExprContext *econtext;
8993-
MemoryContext oldcxt;
8994-
TupleTableSlot *slot;
8995-
Form_pg_constraint constrForm;
8996-
bool isnull;
8997-
Snapshot snapshot;
8998-
8999-
/*
9000-
* VALIDATE CONSTRAINT is a no-op for foreign tables and partitioned
9001-
* tables.
9002-
*/
9003-
if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
9004-
rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
9005-
return;
9006-
9007-
constrForm = (Form_pg_constraint) GETSTRUCT(constrtup);
9008-
9009-
estate = CreateExecutorState();
9010-
9011-
/*
9012-
* XXX this tuple doesn't really come from a syscache, but this doesn't
9013-
* matter to SysCacheGetAttr, because it only wants to be able to fetch
9014-
* the tupdesc
9015-
*/
9016-
val = SysCacheGetAttr(CONSTROID, constrtup, Anum_pg_constraint_conbin,
9017-
&isnull);
9018-
if (isnull)
9019-
elog(ERROR, "null conbin for constraint %u",
9020-
HeapTupleGetOid(constrtup));
9021-
conbin = TextDatumGetCString(val);
9022-
origexpr = (Expr *) stringToNode(conbin);
9023-
exprstate = ExecPrepareExpr(origexpr, estate);
9024-
9025-
econtext = GetPerTupleExprContext(estate);
9026-
tupdesc = RelationGetDescr(rel);
9027-
slot = MakeSingleTupleTableSlot(tupdesc);
9028-
econtext->ecxt_scantuple = slot;
9029-
9030-
snapshot = RegisterSnapshot(GetLatestSnapshot());
9031-
scan = heap_beginscan(rel, snapshot, 0, NULL);
9032-
9033-
/*
9034-
* Switch to per-tuple memory context and reset it for each tuple
9035-
* produced, so we don't leak memory.
9036-
*/
9037-
oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
9038-
9039-
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
9040-
{
9041-
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
9042-
9043-
if (!ExecCheck(exprstate, econtext))
9044-
ereport(ERROR,
9045-
(errcode(ERRCODE_CHECK_VIOLATION),
9046-
errmsg("check constraint \"%s\" is violated by some row",
9047-
NameStr(constrForm->conname)),
9048-
errtableconstraint(rel, NameStr(constrForm->conname))));
9049-
9050-
ResetExprContext(econtext);
9051-
}
9052-
9053-
MemoryContextSwitchTo(oldcxt);
9054-
heap_endscan(scan);
9055-
UnregisterSnapshot(snapshot);
9056-
ExecDropSingleTupleTableSlot(slot);
9057-
FreeExecutorState(estate);
9058-
}
9059-
90609002
/*
90619003
* Scan the existing rows in a table to verify they meet a proposed FK
90629004
* constraint.

src/test/regress/expected/alter_table.out

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,8 @@ NOTICE: boo: 18
493493
ALTER TABLE attmp3 ADD CONSTRAINT IDENTITY check (b = boo(b)) NOT VALID;
494494
NOTICE: merging constraint "identity" with inherited definition
495495
ALTER TABLE attmp3 VALIDATE CONSTRAINT identity;
496-
NOTICE: boo: 16
497496
NOTICE: boo: 20
497+
NOTICE: boo: 16
498498
-- A NO INHERIT constraint should not be looked for in children during VALIDATE CONSTRAINT
499499
create table parent_noinh_convalid (a int);
500500
create table child_noinh_convalid () inherits (parent_noinh_convalid);
@@ -998,6 +998,26 @@ ERROR: column "test2" contains null values
998998
-- now add a primary key column with a default (succeeds).
999999
alter table atacc1 add column test2 int default 0 primary key;
10001000
drop table atacc1;
1001+
-- additionally, we've seen issues with foreign key validation not being
1002+
-- properly delayed until after a table rewrite. Check that works ok.
1003+
create table atacc1 (a int primary key);
1004+
alter table atacc1 add constraint atacc1_fkey foreign key (a) references atacc1 (a) not valid;
1005+
alter table atacc1 validate constraint atacc1_fkey, alter a type bigint;
1006+
drop table atacc1;
1007+
-- we've also seen issues with check constraints being validated at the wrong
1008+
-- time when there's a pending table rewrite.
1009+
create table atacc1 (a bigint, b int);
1010+
insert into atacc1 values(1,1);
1011+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
1012+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
1013+
drop table atacc1;
1014+
-- same as above, but ensure the constraint violation is detected
1015+
create table atacc1 (a bigint, b int);
1016+
insert into atacc1 values(1,2);
1017+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
1018+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
1019+
ERROR: check constraint "atacc1_chk" is violated by some row
1020+
drop table atacc1;
10011021
-- something a little more complicated
10021022
create table atacc1 ( test int, test2 int);
10031023
-- add a primary key constraint

src/test/regress/sql/alter_table.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,28 @@ alter table atacc1 add column test2 int primary key;
763763
alter table atacc1 add column test2 int default 0 primary key;
764764
drop table atacc1;
765765

766+
-- additionally, we've seen issues with foreign key validation not being
767+
-- properly delayed until after a table rewrite. Check that works ok.
768+
create table atacc1 (a int primary key);
769+
alter table atacc1 add constraint atacc1_fkey foreign key (a) references atacc1 (a) not valid;
770+
alter table atacc1 validate constraint atacc1_fkey, alter a type bigint;
771+
drop table atacc1;
772+
773+
-- we've also seen issues with check constraints being validated at the wrong
774+
-- time when there's a pending table rewrite.
775+
create table atacc1 (a bigint, b int);
776+
insert into atacc1 values(1,1);
777+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
778+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
779+
drop table atacc1;
780+
781+
-- same as above, but ensure the constraint violation is detected
782+
create table atacc1 (a bigint, b int);
783+
insert into atacc1 values(1,2);
784+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
785+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
786+
drop table atacc1;
787+
766788
-- something a little more complicated
767789
create table atacc1 ( test int, test2 int);
768790
-- add a primary key constraint

0 commit comments

Comments
 (0)