Skip to content

Commit bb1b8f6

Browse files
committed
De-reserve most statement-introducing keywords in plpgsql.
Add a bit of context sensitivity to plpgsql_yylex() so that it can recognize when the word it is looking at is the first word of a new statement, and if so whether it is the target of an assignment statement. When we are at start of statement and it's not an assignment, we can prefer recognizing unreserved keywords over recognizing variable names, thereby allowing most statements' initial keywords to be demoted from reserved to unreserved status. This is rather useful already (there are 15 such words that get demoted here), and what's more to the point is that future patches proposing to add new plpgsql statements can avoid objections about having to add new reserved words. The keywords BEGIN, DECLARE, FOR, FOREACH, LOOP, WHILE need to remain reserved because they can be preceded by block labels, and the logic added here doesn't understand about block labels. In principle we could probably fix that, but it would take more than one token of lookback and the benefit doesn't seem worth extra complexity. Also note I didn't de-reserve EXECUTE, because it is used in more places than just statement start. It's possible it could be de-reserved with more work, but that would be an independent fix. In passing, also de-reserve COLLATE and DEFAULT, which shouldn't have been reserved in the first place since they only need to be recognized within DECLARE sections.
1 parent bac2739 commit bb1b8f6

File tree

4 files changed

+147
-38
lines changed

4 files changed

+147
-38
lines changed

src/pl/plpgsql/src/pl_gram.y

+16
Original file line numberDiff line numberDiff line change
@@ -2315,32 +2315,46 @@ unreserved_keyword :
23152315
| K_ALIAS
23162316
| K_ARRAY
23172317
| K_BACKWARD
2318+
| K_CLOSE
2319+
| K_COLLATE
23182320
| K_COLUMN
23192321
| K_COLUMN_NAME
23202322
| K_CONSTANT
23212323
| K_CONSTRAINT
23222324
| K_CONSTRAINT_NAME
2325+
| K_CONTINUE
23232326
| K_CURRENT
23242327
| K_CURSOR
23252328
| K_DATATYPE
23262329
| K_DEBUG
2330+
| K_DEFAULT
23272331
| K_DETAIL
2332+
| K_DIAGNOSTICS
23282333
| K_DUMP
2334+
| K_ELSIF
23292335
| K_ERRCODE
23302336
| K_ERROR
2337+
| K_EXCEPTION
2338+
| K_EXIT
2339+
| K_FETCH
23312340
| K_FIRST
23322341
| K_FORWARD
2342+
| K_GET
23332343
| K_HINT
23342344
| K_INFO
2345+
| K_INSERT
23352346
| K_IS
23362347
| K_LAST
23372348
| K_LOG
23382349
| K_MESSAGE
23392350
| K_MESSAGE_TEXT
2351+
| K_MOVE
23402352
| K_NEXT
23412353
| K_NO
23422354
| K_NOTICE
2355+
| K_OPEN
23432356
| K_OPTION
2357+
| K_PERFORM
23442358
| K_PG_CONTEXT
23452359
| K_PG_DATATYPE_NAME
23462360
| K_PG_EXCEPTION_CONTEXT
@@ -2349,8 +2363,10 @@ unreserved_keyword :
23492363
| K_PRINT_STRICT_PARAMS
23502364
| K_PRIOR
23512365
| K_QUERY
2366+
| K_RAISE
23522367
| K_RELATIVE
23532368
| K_RESULT_OID
2369+
| K_RETURN
23542370
| K_RETURNED_SQLSTATE
23552371
| K_REVERSE
23562372
| K_ROW_COUNT

src/pl/plpgsql/src/pl_scanner.c

+106-38
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,28 @@ IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
3131
*
3232
* We keep reserved and unreserved keywords in separate arrays. The
3333
* reserved keywords are passed to the core scanner, so they will be
34-
* recognized before (and instead of) any variable name. Unreserved
35-
* words are checked for separately, after determining that the identifier
34+
* recognized before (and instead of) any variable name. Unreserved words
35+
* are checked for separately, usually after determining that the identifier
3636
* isn't a known variable name. If plpgsql_IdentifierLookup is DECLARE then
3737
* no variable names will be recognized, so the unreserved words always work.
3838
* (Note in particular that this helps us avoid reserving keywords that are
3939
* only needed in DECLARE sections.)
4040
*
4141
* In certain contexts it is desirable to prefer recognizing an unreserved
42-
* keyword over recognizing a variable name. Those cases are handled in
43-
* pl_gram.y using tok_is_keyword().
42+
* keyword over recognizing a variable name. In particular, at the start
43+
* of a statement we should prefer unreserved keywords unless the statement
44+
* looks like an assignment (i.e., first token is followed by ':=' or '[').
45+
* This rule allows most statement-introducing keywords to be kept unreserved.
46+
* (We still have to reserve initial keywords that might follow a block
47+
* label, unfortunately, since the method used to determine if we are at
48+
* start of statement doesn't recognize such cases. We'd also have to
49+
* reserve any keyword that could legitimately be followed by ':=' or '['.)
50+
* Some additional cases are handled in pl_gram.y using tok_is_keyword().
4451
*
45-
* For the most part, the reserved keywords are those that start a PL/pgSQL
46-
* statement (and so would conflict with an assignment to a variable of the
47-
* same name). We also don't sweat it much about reserving keywords that
48-
* are reserved in the core grammar. Try to avoid reserving other words.
52+
* We try to avoid reserving more keywords than we have to; but there's
53+
* little point in not reserving a word if it's reserved in the core grammar.
54+
* Currently, the following words are reserved here but not in the core:
55+
* BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE
4956
*/
5057

5158
/*
@@ -63,37 +70,20 @@ static const ScanKeyword reserved_keywords[] = {
6370
PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
6471
PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
6572
PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
66-
PG_KEYWORD("close", K_CLOSE, RESERVED_KEYWORD)
67-
PG_KEYWORD("collate", K_COLLATE, RESERVED_KEYWORD)
68-
PG_KEYWORD("continue", K_CONTINUE, RESERVED_KEYWORD)
6973
PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
70-
PG_KEYWORD("default", K_DEFAULT, RESERVED_KEYWORD)
71-
PG_KEYWORD("diagnostics", K_DIAGNOSTICS, RESERVED_KEYWORD)
7274
PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
73-
PG_KEYWORD("elseif", K_ELSIF, RESERVED_KEYWORD)
74-
PG_KEYWORD("elsif", K_ELSIF, RESERVED_KEYWORD)
7575
PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
76-
PG_KEYWORD("exception", K_EXCEPTION, RESERVED_KEYWORD)
7776
PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
78-
PG_KEYWORD("exit", K_EXIT, RESERVED_KEYWORD)
79-
PG_KEYWORD("fetch", K_FETCH, RESERVED_KEYWORD)
8077
PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
8178
PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD)
8279
PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
83-
PG_KEYWORD("get", K_GET, RESERVED_KEYWORD)
8480
PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
8581
PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
86-
PG_KEYWORD("insert", K_INSERT, RESERVED_KEYWORD)
8782
PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
8883
PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
89-
PG_KEYWORD("move", K_MOVE, RESERVED_KEYWORD)
9084
PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
9185
PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
92-
PG_KEYWORD("open", K_OPEN, RESERVED_KEYWORD)
9386
PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
94-
PG_KEYWORD("perform", K_PERFORM, RESERVED_KEYWORD)
95-
PG_KEYWORD("raise", K_RAISE, RESERVED_KEYWORD)
96-
PG_KEYWORD("return", K_RETURN, RESERVED_KEYWORD)
9787
PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
9888
PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
9989
PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
@@ -109,32 +99,47 @@ static const ScanKeyword unreserved_keywords[] = {
10999
PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
110100
PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
111101
PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
102+
PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
103+
PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
112104
PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
113105
PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
114106
PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
115107
PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
116108
PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
109+
PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
117110
PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
118111
PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
119112
PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
120113
PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
114+
PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
121115
PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
116+
PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
122117
PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
118+
PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
119+
PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
123120
PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
124121
PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
122+
PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
123+
PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
124+
PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
125125
PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
126126
PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
127+
PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
127128
PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
128129
PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
130+
PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
129131
PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
130132
PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
131133
PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
132134
PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
133135
PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
136+
PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
134137
PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
135138
PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
136139
PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
140+
PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
137141
PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
142+
PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
138143
PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
139144
PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
140145
PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
@@ -143,8 +148,10 @@ static const ScanKeyword unreserved_keywords[] = {
143148
PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
144149
PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
145150
PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
151+
PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
146152
PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
147153
PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD)
154+
PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
148155
PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
149156
PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
150157
PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
@@ -166,6 +173,19 @@ static const ScanKeyword unreserved_keywords[] = {
166173

167174
static const int num_unreserved_keywords = lengthof(unreserved_keywords);
168175

176+
/*
177+
* This macro must recognize all tokens that can immediately precede a
178+
* PL/pgSQL executable statement (that is, proc_sect or proc_stmt in the
179+
* grammar). Fortunately, there are not very many, so hard-coding in this
180+
* fashion seems sufficient.
181+
*/
182+
#define AT_STMT_START(prev_token) \
183+
((prev_token) == ';' || \
184+
(prev_token) == K_BEGIN || \
185+
(prev_token) == K_THEN || \
186+
(prev_token) == K_ELSE || \
187+
(prev_token) == K_LOOP)
188+
169189

170190
/* Auxiliary data about a token (other than the token type) */
171191
typedef struct
@@ -192,6 +212,9 @@ static const char *scanorig;
192212
/* Current token's length (corresponds to plpgsql_yylval and plpgsql_yylloc) */
193213
static int plpgsql_yyleng;
194214

215+
/* Current token's code (corresponds to plpgsql_yylval and plpgsql_yylloc) */
216+
static int plpgsql_yytoken;
217+
195218
/* Token pushback stack */
196219
#define MAX_PUSHBACKS 4
197220

@@ -315,31 +338,75 @@ plpgsql_yylex(void)
315338
{
316339
/* not A.B, so just process A */
317340
push_back_token(tok2, &aux2);
318-
if (plpgsql_parse_word(aux1.lval.str,
319-
core_yy.scanbuf + aux1.lloc,
320-
&aux1.lval.wdatum,
321-
&aux1.lval.word))
322-
tok1 = T_DATUM;
323-
else if (!aux1.lval.word.quoted &&
324-
(kw = ScanKeywordLookup(aux1.lval.word.ident,
325-
unreserved_keywords,
326-
num_unreserved_keywords)))
341+
342+
/*
343+
* If we are at start of statement, prefer unreserved keywords
344+
* over variable names, unless the next token is assignment or
345+
* '[', in which case prefer variable names. (Note we need not
346+
* consider '.' as the next token; that case was handled above,
347+
* and we always prefer variable names in that case.) If we are
348+
* not at start of statement, always prefer variable names over
349+
* unreserved keywords.
350+
*/
351+
if (AT_STMT_START(plpgsql_yytoken) &&
352+
!(tok2 == '=' || tok2 == COLON_EQUALS || tok2 == '['))
327353
{
328-
aux1.lval.keyword = kw->name;
329-
tok1 = kw->value;
354+
/* try for unreserved keyword, then for variable name */
355+
if (core_yy.scanbuf[aux1.lloc] != '"' &&
356+
(kw = ScanKeywordLookup(aux1.lval.str,
357+
unreserved_keywords,
358+
num_unreserved_keywords)))
359+
{
360+
aux1.lval.keyword = kw->name;
361+
tok1 = kw->value;
362+
}
363+
else if (plpgsql_parse_word(aux1.lval.str,
364+
core_yy.scanbuf + aux1.lloc,
365+
&aux1.lval.wdatum,
366+
&aux1.lval.word))
367+
tok1 = T_DATUM;
368+
else
369+
tok1 = T_WORD;
330370
}
331371
else
332-
tok1 = T_WORD;
372+
{
373+
/* try for variable name, then for unreserved keyword */
374+
if (plpgsql_parse_word(aux1.lval.str,
375+
core_yy.scanbuf + aux1.lloc,
376+
&aux1.lval.wdatum,
377+
&aux1.lval.word))
378+
tok1 = T_DATUM;
379+
else if (!aux1.lval.word.quoted &&
380+
(kw = ScanKeywordLookup(aux1.lval.word.ident,
381+
unreserved_keywords,
382+
num_unreserved_keywords)))
383+
{
384+
aux1.lval.keyword = kw->name;
385+
tok1 = kw->value;
386+
}
387+
else
388+
tok1 = T_WORD;
389+
}
333390
}
334391
}
335392
else
336393
{
337-
/* Not a potential plpgsql variable name, just return the data */
394+
/*
395+
* Not a potential plpgsql variable name, just return the data.
396+
*
397+
* Note that we also come through here if the grammar pushed back a
398+
* T_DATUM, T_CWORD, T_WORD, or unreserved-keyword token returned by a
399+
* previous lookup cycle; thus, pushbacks do not incur extra lookup
400+
* work, since we'll never do the above code twice for the same token.
401+
* This property also makes it safe to rely on the old value of
402+
* plpgsql_yytoken in the is-this-start-of-statement test above.
403+
*/
338404
}
339405

340406
plpgsql_yylval = aux1.lval;
341407
plpgsql_yylloc = aux1.lloc;
342408
plpgsql_yyleng = aux1.leng;
409+
plpgsql_yytoken = tok1;
343410
return tok1;
344411
}
345412

@@ -645,6 +712,7 @@ plpgsql_scanner_init(const char *str)
645712

646713
/* Other setup */
647714
plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
715+
plpgsql_yytoken = 0;
648716

649717
num_pushbacks = 0;
650718

src/test/regress/expected/plpgsql.out

+14
Original file line numberDiff line numberDiff line change
@@ -4906,6 +4906,20 @@ select unreserved_test();
49064906
42
49074907
(1 row)
49084908

4909+
create or replace function unreserved_test() returns int as $$
4910+
declare
4911+
return int := 42;
4912+
begin
4913+
return := return + 1;
4914+
return return;
4915+
end
4916+
$$ language plpgsql;
4917+
select unreserved_test();
4918+
unreserved_test
4919+
-----------------
4920+
43
4921+
(1 row)
4922+
49094923
drop function unreserved_test();
49104924
--
49114925
-- Test FOREACH over arrays

src/test/regress/sql/plpgsql.sql

+11
Original file line numberDiff line numberDiff line change
@@ -3940,6 +3940,17 @@ $$ language plpgsql;
39403940
39413941
select unreserved_test();
39423942
3943+
create or replace function unreserved_test() returns int as $$
3944+
declare
3945+
return int := 42;
3946+
begin
3947+
return := return + 1;
3948+
return return;
3949+
end
3950+
$$ language plpgsql;
3951+
3952+
select unreserved_test();
3953+
39433954
drop function unreserved_test();
39443955
39453956
--

0 commit comments

Comments
 (0)