Skip to content

Commit 47ba0fb

Browse files
committed
Be more careful to not lose sync in the FE/BE protocol.
If any error occurred while we were in the middle of reading a protocol message from the client, we could lose sync, and incorrectly try to interpret a part of another message as a new protocol message. That will usually lead to an "invalid frontend message" error that terminates the connection. However, this is a security issue because an attacker might be able to deliberately cause an error, inject a Query message in what's supposed to be just user data, and have the server execute it. We were quite careful to not have CHECK_FOR_INTERRUPTS() calls or other operations that could ereport(ERROR) in the middle of processing a message, but a query cancel interrupt or statement timeout could nevertheless cause it to happen. Also, the V2 fastpath and COPY handling were not so careful. It's very difficult to recover in the V2 COPY protocol, so we will just terminate the connection on error. In practice, that's what happened previously anyway, as we lost protocol sync. To fix, add a new variable in pqcomm.c, PqCommReadingMsg, that is set whenever we're in the middle of reading a message. When it's set, we cannot safely ERROR out and continue running, because we might've read only part of a message. PqCommReadingMsg acts somewhat similarly to critical sections in that if an error occurs while it's set, the error handler will force the connection to be terminated, as if the error was FATAL. It's not implemented by promoting ERROR to FATAL in elog.c, like ERROR is promoted to PANIC in critical sections, because we want to be able to use PG_TRY/CATCH to recover and regain protocol sync. pq_getmessage() takes advantage of that to prevent an OOM error from terminating the connection. To prevent unnecessary connection terminations, add a holdoff mechanism similar to HOLD/RESUME_INTERRUPTS() that can be used hold off query cancel interrupts, but still allow die interrupts. The rules on which interrupts are processed when are now a bit more complicated, so refactor ProcessInterrupts() and the calls to it in signal handlers so that the signal handlers always call it if ImmediateInterruptOK is set, and ProcessInterrupts() can decide to not do anything if the other conditions are not met. Reported by Emil Lenngren. Patch reviewed by Noah Misch and Andres Freund. Backpatch to all supported versions. Security: CVE-2015-0244
1 parent 0a3ee8a commit 47ba0fb

File tree

13 files changed

+225
-80
lines changed

13 files changed

+225
-80
lines changed

src/backend/commands/copy.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ ReceiveCopyBegin(CopyState cstate)
357357
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
358358
errmsg("COPY BINARY is not supported to stdout or from stdin")));
359359
pq_putemptymessage('G');
360+
/* any error in old protocol will make us lose sync */
361+
pq_startmsgread();
360362
cstate->copy_dest = COPY_OLD_FE;
361363
}
362364
else
@@ -367,6 +369,8 @@ ReceiveCopyBegin(CopyState cstate)
367369
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
368370
errmsg("COPY BINARY is not supported to stdout or from stdin")));
369371
pq_putemptymessage('D');
372+
/* any error in old protocol will make us lose sync */
373+
pq_startmsgread();
370374
cstate->copy_dest = COPY_OLD_FE;
371375
}
372376
/* We *must* flush here to ensure FE knows it can send. */
@@ -527,6 +531,8 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
527531
int mtype;
528532

529533
readmessage:
534+
HOLD_CANCEL_INTERRUPTS();
535+
pq_startmsgread();
530536
mtype = pq_getbyte();
531537
if (mtype == EOF)
532538
ereport(ERROR,
@@ -536,6 +542,7 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
536542
ereport(ERROR,
537543
(errcode(ERRCODE_CONNECTION_FAILURE),
538544
errmsg("unexpected EOF on client connection")));
545+
RESUME_CANCEL_INTERRUPTS();
539546
switch (mtype)
540547
{
541548
case 'd': /* CopyData */
@@ -2196,6 +2203,13 @@ CopyFrom(CopyState cstate)
21962203

21972204
MemoryContextSwitchTo(oldcontext);
21982205

2206+
/*
2207+
* In the old protocol, tell pqcomm that we can process normal protocol
2208+
* messages again.
2209+
*/
2210+
if (cstate->copy_dest == COPY_OLD_FE)
2211+
pq_endmsgread();
2212+
21992213
/* Execute AFTER STATEMENT insertion triggers */
22002214
ExecASInsertTriggers(estate, resultRelInfo);
22012215

src/backend/libpq/auth.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,7 @@ recv_password_packet(Port *port)
640640
{
641641
StringInfoData buf;
642642

643+
pq_startmsgread();
643644
if (PG_PROTOCOL_MAJOR(port->proto) >= 3)
644645
{
645646
/* Expect 'p' message type */
@@ -1046,6 +1047,7 @@ pg_GSS_recvauth(Port *port)
10461047
*/
10471048
do
10481049
{
1050+
pq_startmsgread();
10491051
mtype = pq_getbyte();
10501052
if (mtype != 'p')
10511053
{
@@ -1284,6 +1286,7 @@ pg_SSPI_recvauth(Port *port)
12841286
*/
12851287
do
12861288
{
1289+
pq_startmsgread();
12871290
mtype = pq_getbyte();
12881291
if (mtype != 'p')
12891292
{

src/backend/libpq/pqcomm.c

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,9 @@ static int PqRecvLength; /* End of data available in PqRecvBuffer */
120120
/*
121121
* Message status
122122
*/
123-
static bool PqCommBusy;
124-
static bool DoingCopyOut;
123+
static bool PqCommBusy; /* busy sending data to the client */
124+
static bool PqCommReadingMsg; /* in the middle of reading a message */
125+
static bool DoingCopyOut; /* in old-protocol COPY OUT processing */
125126

126127

127128
/* Internal functions */
@@ -144,6 +145,7 @@ pq_init(void)
144145
{
145146
PqSendPointer = PqRecvPointer = PqRecvLength = 0;
146147
PqCommBusy = false;
148+
PqCommReadingMsg = false;
147149
DoingCopyOut = false;
148150
on_proc_exit(pq_close, 0);
149151
}
@@ -808,6 +810,8 @@ pq_recvbuf(void)
808810
int
809811
pq_getbyte(void)
810812
{
813+
Assert(PqCommReadingMsg);
814+
811815
while (PqRecvPointer >= PqRecvLength)
812816
{
813817
if (pq_recvbuf()) /* If nothing in buffer, then recv some */
@@ -847,6 +851,8 @@ pq_getbyte_if_available(unsigned char *c)
847851
{
848852
int r;
849853

854+
Assert(PqCommReadingMsg);
855+
850856
if (PqRecvPointer < PqRecvLength)
851857
{
852858
*c = PqRecvBuffer[PqRecvPointer++];
@@ -934,6 +940,8 @@ pq_getbytes(char *s, size_t len)
934940
{
935941
size_t amount;
936942

943+
Assert(PqCommReadingMsg);
944+
937945
while (len > 0)
938946
{
939947
while (PqRecvPointer >= PqRecvLength)
@@ -966,6 +974,8 @@ pq_discardbytes(size_t len)
966974
{
967975
size_t amount;
968976

977+
Assert(PqCommReadingMsg);
978+
969979
while (len > 0)
970980
{
971981
while (PqRecvPointer >= PqRecvLength)
@@ -1002,6 +1012,8 @@ pq_getstring(StringInfo s)
10021012
{
10031013
int i;
10041014

1015+
Assert(PqCommReadingMsg);
1016+
10051017
resetStringInfo(s);
10061018

10071019
/* Read until we get the terminating '\0' */
@@ -1033,6 +1045,58 @@ pq_getstring(StringInfo s)
10331045
}
10341046

10351047

1048+
/* --------------------------------
1049+
* pq_startmsgread - begin reading a message from the client.
1050+
*
1051+
* This must be called before any of the pq_get* functions.
1052+
* --------------------------------
1053+
*/
1054+
void
1055+
pq_startmsgread(void)
1056+
{
1057+
/*
1058+
* There shouldn't be a read active already, but let's check just to be
1059+
* sure.
1060+
*/
1061+
if (PqCommReadingMsg)
1062+
ereport(FATAL,
1063+
(errcode(ERRCODE_PROTOCOL_VIOLATION),
1064+
errmsg("terminating connection because protocol sync was lost")));
1065+
1066+
PqCommReadingMsg = true;
1067+
}
1068+
1069+
1070+
/* --------------------------------
1071+
* pq_endmsgread - finish reading message.
1072+
*
1073+
* This must be called after reading a V2 protocol message with
1074+
* pq_getstring() and friends, to indicate that we have read the whole
1075+
* message. In V3 protocol, pq_getmessage() does this implicitly.
1076+
* --------------------------------
1077+
*/
1078+
void
1079+
pq_endmsgread(void)
1080+
{
1081+
Assert(PqCommReadingMsg);
1082+
1083+
PqCommReadingMsg = false;
1084+
}
1085+
1086+
/* --------------------------------
1087+
* pq_is_reading_msg - are we currently reading a message?
1088+
*
1089+
* This is used in error recovery at the outer idle loop to detect if we have
1090+
* lost protocol sync, and need to terminate the connection. pq_startmsgread()
1091+
* will check for that too, but it's nicer to detect it earlier.
1092+
* --------------------------------
1093+
*/
1094+
bool
1095+
pq_is_reading_msg(void)
1096+
{
1097+
return PqCommReadingMsg;
1098+
}
1099+
10361100
/* --------------------------------
10371101
* pq_getmessage - get a message with length word from connection
10381102
*
@@ -1054,6 +1118,8 @@ pq_getmessage(StringInfo s, int maxlen)
10541118
{
10551119
int32 len;
10561120

1121+
Assert(PqCommReadingMsg);
1122+
10571123
resetStringInfo(s);
10581124

10591125
/* Read message length word */
@@ -1095,6 +1161,9 @@ pq_getmessage(StringInfo s, int maxlen)
10951161
ereport(COMMERROR,
10961162
(errcode(ERRCODE_PROTOCOL_VIOLATION),
10971163
errmsg("incomplete message from client")));
1164+
1165+
/* we discarded the rest of the message so we're back in sync. */
1166+
PqCommReadingMsg = false;
10981167
PG_RE_THROW();
10991168
}
11001169
PG_END_TRY();
@@ -1112,6 +1181,9 @@ pq_getmessage(StringInfo s, int maxlen)
11121181
s->data[len] = '\0';
11131182
}
11141183

1184+
/* finished reading the message. */
1185+
PqCommReadingMsg = false;
1186+
11151187
return 0;
11161188
}
11171189

src/backend/postmaster/postmaster.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1588,6 +1588,7 @@ ProcessStartupPacket(Port *port, bool SSLdone)
15881588
ProtocolVersion proto;
15891589
MemoryContext oldcontext;
15901590

1591+
pq_startmsgread();
15911592
if (pq_getbytes((char *) &len, 4) == EOF)
15921593
{
15931594
/*
@@ -1632,6 +1633,7 @@ ProcessStartupPacket(Port *port, bool SSLdone)
16321633
errmsg("incomplete startup packet")));
16331634
return STATUS_ERROR;
16341635
}
1636+
pq_endmsgread();
16351637

16361638
/*
16371639
* The first field is either a protocol version number or a special

src/backend/replication/walsender.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,16 @@ WalSndHandshake(void)
164164
int firstchar;
165165

166166
/* Wait for a command to arrive */
167+
pq_startmsgread();
167168
firstchar = pq_getbyte();
168169

170+
/* Read the message contents */
171+
if (firstchar != EOF)
172+
{
173+
if (pq_getmessage(&input_message, 0))
174+
firstchar = EOF; /* suitable message already logged */
175+
}
176+
169177
/*
170178
* Emergency bailout if postmaster has died. This is to avoid the
171179
* necessity for manual cleanup of all postmaster children.
@@ -183,16 +191,6 @@ WalSndHandshake(void)
183191
ProcessConfigFile(PGC_SIGHUP);
184192
}
185193

186-
if (firstchar != EOF)
187-
{
188-
/*
189-
* Read the message contents. This is expected to be done without
190-
* blocking because we've been able to get message type code.
191-
*/
192-
if (pq_getmessage(&input_message, 0))
193-
firstchar = EOF; /* suitable message already logged */
194-
}
195-
196194
/* Handle the very limited subset of commands expected in this phase */
197195
switch (firstchar)
198196
{
@@ -331,6 +329,7 @@ CheckClosedConnection(void)
331329
unsigned char firstchar;
332330
int r;
333331

332+
pq_startmsgread();
334333
r = pq_getbyte_if_available(&firstchar);
335334
if (r < 0)
336335
{
@@ -343,6 +342,7 @@ CheckClosedConnection(void)
343342
if (r == 0)
344343
{
345344
/* no data available without blocking */
345+
pq_endmsgread();
346346
return;
347347
}
348348

src/backend/storage/lmgr/proc.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,9 +571,14 @@ LockWaitCancel(void)
571571
{
572572
LWLockId partitionLock;
573573

574+
HOLD_INTERRUPTS();
575+
574576
/* Nothing to do if we weren't waiting for a lock */
575577
if (lockAwaited == NULL)
578+
{
579+
RESUME_INTERRUPTS();
576580
return;
581+
}
577582

578583
/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
579584
disable_sig_alarm(false);
@@ -612,6 +617,8 @@ LockWaitCancel(void)
612617
* wakeup signal isn't harmful, and it seems not worth expending cycles to
613618
* get rid of a signal that most likely isn't there.
614619
*/
620+
621+
RESUME_INTERRUPTS();
615622
}
616623

617624

src/backend/tcop/fastpath.c

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ static int16 parse_fcall_arguments_20(StringInfo msgBuf, struct fp_info * fip,
7474
* The caller should already have initialized buf to empty.
7575
* ----------------
7676
*/
77-
static int
77+
int
7878
GetOldFunctionMessage(StringInfo buf)
7979
{
8080
int32 ibuf;
@@ -279,20 +279,6 @@ HandleFunctionRequest(StringInfo msgBuf)
279279
bool was_logged = false;
280280
char msec_str[32];
281281

282-
/*
283-
* Read message contents if not already done.
284-
*/
285-
if (PG_PROTOCOL_MAJOR(FrontendProtocol) < 3)
286-
{
287-
if (GetOldFunctionMessage(msgBuf))
288-
{
289-
ereport(COMMERROR,
290-
(errcode(ERRCODE_PROTOCOL_VIOLATION),
291-
errmsg("unexpected EOF on client connection")));
292-
return EOF;
293-
}
294-
}
295-
296282
/*
297283
* Now that we've eaten the input message, check to see if we actually
298284
* want to do the function call or not. It's now safe to ereport(); we

0 commit comments

Comments
 (0)