tzparser.c source code [PostgreSQL/src/backend/utils/misc/tzparser.c]

1	/-------------------------------------------------------------------------*
2	*
3	* tzparser.c
4	* Functions for parsing timezone offset files
5	*
6	* Note: this code is invoked from the check_hook for the GUC variable
7	* timezone_abbreviations. Therefore, it should report problems using
8	* GUC_check_errmsg() and related functions, and try to avoid throwing
9	* elog(ERROR). This is not completely bulletproof at present --- in
10	* particular out-of-memory will throw an error. Could probably fix with
11	* PG_TRY if necessary.
12	*
13	*
14	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
15	* Portions Copyright (c) 1994, Regents of the University of California
16	*
17	* IDENTIFICATION
18	* src/backend/utils/misc/tzparser.c
19	*
20	*-------------------------------------------------------------------------
21	*/
22
23	#include "postgres.h"
24
25	#include <ctype.h>
26
27	#include "miscadmin.h"
28	#include "storage/fd.h"
29	#include "utils/guc.h"
30	#include "utils/memutils.h"
31	#include "utils/tzparser.h"
32
33
34	#define WHITESPACE " \t\n\r"
35
36	static bool validateTzEntry(tzEntry *tzentry);
37	static bool splitTzLine(const char filename, int* lineno,
38	char line, tzEntry tzentry);
39	static int addToArray(tzEntry *base, int* arraysize, int* n,
40	tzEntry *entry, bool override);
41	static int ParseTzFile(const char filename, int* depth,
42	tzEntry *base, int* arraysize, int* n);
43
44
45	/*
46	* Apply additional validation checks to a tzEntry
47	*
48	* Returns true if OK, else false
49	*/
50	static bool
51	validateTzEntry(tzEntry *tzentry)
52	{
53	unsigned char *p;
54
55	/*
56	* Check restrictions imposed by datetkntbl storage format (see
57	* datetime.c)
58	*/
59	if (strlen(tzentry->abbrev) > TOKMAXLEN)
60	{
61	GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
62	tzentry->abbrev, TOKMAXLEN,
63	tzentry->filename, tzentry->lineno);
64	return false;
65	}
66
67	/*
68	* Sanity-check the offset: shouldn't exceed 14 hours
69	*/
70	if (tzentry->offset > `14` * `60` * `60` \|\|
71	tzentry->offset < -`14` * `60` * `60`)
72	{
73	GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
74	tzentry->offset,
75	tzentry->filename, tzentry->lineno);
76	return false;
77	}
78
79	/*
80	* Convert abbrev to lowercase (must match datetime.c's conversion)
81	*/
82	for (p = (unsigned char ) tzentry->abbrev; p; p++)
83	p = pg_tolower(p);
84
85	return true;
86	}
87
88	/*
89	* Attempt to parse the line as a timezone abbrev spec
90	*
91	* Valid formats are:
92	* name zone
93	* name offset dst
94	*
95	* Returns true if OK, else false; data is stored in *tzentry
96	*/
97	static bool
98	splitTzLine(const char filename, int* lineno, char line, tzEntry tzentry)
99	{
100	char *abbrev;
101	char *offset;
102	char *offset_endptr;
103	char *remain;
104	char *is_dst;
105
106	tzentry->lineno = lineno;
107	tzentry->filename = filename;
108
109	abbrev = strtok(line, WHITESPACE);
110	if (!abbrev)
111	{
112	GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
113	filename, lineno);
114	return false;
115	}
116	tzentry->abbrev = pstrdup(abbrev);
117
118	offset = strtok(NULL, WHITESPACE);
119	if (!offset)
120	{
121	GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
122	filename, lineno);
123	return false;
124	}
125
126	/ We assume zone names don't begin with a digit or sign /
127	if (isdigit((unsigned char) offset) \|\| offset == `'+'` \|\| *offset == `'-'`)
128	{
129	tzentry->zone = NULL;
130	tzentry->offset = strtol(offset, &offset_endptr, `10`);
131	if (offset_endptr == offset \|\| *offset_endptr != `'\0'`)
132	{
133	GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
134	filename, lineno);
135	return false;
136	}
137
138	is_dst = strtok(NULL, WHITESPACE);
139	if (is_dst && pg_strcasecmp(is_dst, "D") == `0`)
140	{
141	tzentry->is_dst = true;
142	remain = strtok(NULL, WHITESPACE);
143	}
144	else
145	{
146	/ there was no 'D' dst specifier /
147	tzentry->is_dst = false;
148	remain = is_dst;
149	}
150	}
151	else
152	{
153	/*
154	* Assume entry is a zone name. We do not try to validate it by
155	* looking up the zone, because that would force loading of a lot of
156	* zones that probably will never be used in the current session.
157	*/
158	tzentry->zone = pstrdup(offset);
159	tzentry->offset = `0`;
160	tzentry->is_dst = false;
161	remain = strtok(NULL, WHITESPACE);
162	}
163
164	if (!remain) / no more non-whitespace chars /
165	return true;
166
167	if (remain[`0`] != `'#'`) / must be a comment /
168	{
169	GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
170	filename, lineno);
171	return false;
172	}
173	return true;
174	}
175
176	/*
177	* Insert entry into sorted array
178	*
179	* *base: base address of array (changeable if must enlarge array)
180	* *arraysize: allocated length of array (changeable if must enlarge array)
181	* n: current number of valid elements in array
182	* entry: new data to insert
183	* override: true if OK to override
184	*
185	* Returns the new array length (new value for n), or -1 if error
186	*/
187	static int
188	addToArray(tzEntry *base, int* arraysize, int* n,
189	tzEntry *entry, bool override)
190	{
191	tzEntry *arrayptr;
192	int low;
193	int high;
194
195	/*
196	* Search the array for a duplicate; as a useful side effect, the array is
197	* maintained in sorted order. We use strcmp() to ensure we match the
198	* sort order datetime.c expects.
199	*/
200	arrayptr = *base;
201	low = `0`;
202	high = n - `1`;
203	while (low <= high)
204	{
205	int mid = (low + high) >> `1`;
206	tzEntry *midptr = arrayptr + mid;
207	int cmp;
208
209	cmp = strcmp(entry->abbrev, midptr->abbrev);
210	if (cmp < `0`)
211	high = mid - `1`;
212	else if (cmp > `0`)
213	low = mid + `1`;
214	else
215	{
216	/*
217	* Found a duplicate entry; complain unless it's the same.
218	*/
219	if ((midptr->zone == NULL && entry->zone == NULL &&
220	midptr->offset == entry->offset &&
221	midptr->is_dst == entry->is_dst) \|\|
222	(midptr->zone != NULL && entry->zone != NULL &&
223	strcmp(midptr->zone, entry->zone) == `0`))
224	{
225	/ return unchanged array /
226	return n;
227	}
228	if (override)
229	{
230	/ same abbrev but something is different, override /
231	midptr->zone = entry->zone;
232	midptr->offset = entry->offset;
233	midptr->is_dst = entry->is_dst;
234	return n;
235	}
236	/ same abbrev but something is different, complain /
237	GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
238	entry->abbrev);
239	GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
240	midptr->filename, midptr->lineno,
241	entry->filename, entry->lineno);
242	return -`1`;
243	}
244	}
245
246	/*
247	* No match, insert at position "low".
248	*/
249	if (n >= *arraysize)
250	{
251	arraysize = `2`;
252	base = (tzEntry ) repalloc(base, arraysize * sizeof(tzEntry));
253	}
254
255	arrayptr = *base + low;
256
257	memmove(arrayptr + `1`, arrayptr, (n - low) * sizeof(tzEntry));
258
259	memcpy(arrayptr, entry, sizeof(tzEntry));
260
261	return n + `1`;
262	}
263
264	/*
265	* Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
266	*
267	* filename: user-specified file name (does not include path)
268	* depth: current recursion depth
269	* *base: array for results (changeable if must enlarge array)
270	* *arraysize: allocated length of array (changeable if must enlarge array)
271	* n: current number of valid elements in array
272	*
273	* Returns the new array length (new value for n), or -1 if error
274	*/
275	static int
276	ParseTzFile(const char filename, int* depth,
277	tzEntry *base, int* arraysize, int* n)
278	{
279	char share_path[MAXPGPATH];
280	char file_path[MAXPGPATH];
281	FILE *tzFile;
282	char tzbuf[`1024`];
283	char *line;
284	tzEntry tzentry;
285	int lineno = `0`;
286	bool override = false;
287	const char *p;
288
289	/*
290	* We enforce that the filename is all alpha characters. This may be
291	* overly restrictive, but we don't want to allow access to anything
292	* outside the timezonesets directory, so for instance '/' must be
293	* rejected.
294	*/
295	for (p = filename; *p; p++)
296	{
297	if (!isalpha((unsigned char) *p))
298	{
299	/ at level 0, just use guc.c's regular "invalid value" message /
300	if (depth > `0`)
301	GUC_check_errmsg("invalid time zone file name \"%s\"",
302	filename);
303	return -`1`;
304	}
305	}
306
307	/*
308	* The maximal recursion depth is a pretty arbitrary setting. It is hard
309	* to imagine that someone needs more than 3 levels so stick with this
310	* conservative setting until someone complains.
311	*/
312	if (depth > `3`)
313	{
314	GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
315	filename);
316	return -`1`;
317	}
318
319	get_share_path(my_exec_path, share_path);
320	snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
321	share_path, filename);
322	tzFile = AllocateFile(file_path, "r");
323	if (!tzFile)
324	{
325	/*
326	* Check to see if the problem is not the filename but the directory.
327	* This is worth troubling over because if the installation share/
328	* directory is missing or unreadable, this is likely to be the first
329	* place we notice a problem during postmaster startup.
330	*/
331	int save_errno = errno;
332	DIR *tzdir;
333
334	snprintf(file_path, sizeof(file_path), "%s/timezonesets",
335	share_path);
336	tzdir = AllocateDir(file_path);
337	if (tzdir == NULL)
338	{
339	GUC_check_errmsg("could not open directory \"%s\": %m",
340	file_path);
341	GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
342	my_exec_path);
343	return -`1`;
344	}
345	FreeDir(tzdir);
346	errno = save_errno;
347
348	/*
349	* otherwise, if file doesn't exist and it's level 0, guc.c's
350	* complaint is enough
351	*/
352	if (errno != ENOENT \|\| depth > `0`)
353	GUC_check_errmsg("could not read time zone file \"%s\": %m",
354	filename);
355
356	return -`1`;
357	}
358
359	while (!feof(tzFile))
360	{
361	lineno++;
362	if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
363	{
364	if (ferror(tzFile))
365	{
366	GUC_check_errmsg("could not read time zone file \"%s\": %m",
367	filename);
368	return -`1`;
369	}
370	/ else we're at EOF after all /
371	break;
372	}
373	if (strlen(tzbuf) == sizeof(tzbuf) - `1`)
374	{
375	/ the line is too long for tzbuf /
376	GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
377	filename, lineno);
378	return -`1`;
379	}
380
381	/ skip over whitespace /
382	line = tzbuf;
383	while (line && isspace((unsigned* char) *line))
384	line++;
385
386	if (line == `'\0'`) /* empty line /
387	continue;
388	if (line == `'#'`) /* comment line /
389	continue;
390
391	if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == `0`)
392	{
393	/ pstrdup so we can use filename in result data structure /
394	char *includeFile = pstrdup(line + strlen("@INCLUDE"));
395
396	includeFile = strtok(includeFile, WHITESPACE);
397	if (!includeFile \|\| !*includeFile)
398	{
399	GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
400	filename, lineno);
401	return -`1`;
402	}
403	n = ParseTzFile(includeFile, depth + `1`,
404	base, arraysize, n);
405	if (n < `0`)
406	return -`1`;
407	continue;
408	}
409
410	if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == `0`)
411	{
412	override = true;
413	continue;
414	}
415
416	if (!splitTzLine(filename, lineno, line, &tzentry))
417	return -`1`;
418	if (!validateTzEntry(&tzentry))
419	return -`1`;
420	n = addToArray(base, arraysize, n, &tzentry, override);
421	if (n < `0`)
422	return -`1`;
423	}
424
425	FreeFile(tzFile);
426
427	return n;
428	}
429
430	/*
431	* load_tzoffsets --- read and parse the specified timezone offset file
432	*
433	* On success, return a filled-in TimeZoneAbbrevTable, which must have been
434	* malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
435	* and friends to give details of the problem.
436	*/
437	TimeZoneAbbrevTable *
438	load_tzoffsets(const char *filename)
439	{
440	TimeZoneAbbrevTable *result = NULL;
441	MemoryContext tmpContext;
442	MemoryContext oldContext;
443	tzEntry *array;
444	int arraysize;
445	int n;
446
447	/*
448	* Create a temp memory context to work in. This makes it easy to clean
449	* up afterwards.
450	*/
451	tmpContext = AllocSetContextCreate(CurrentMemoryContext,
452	"TZParserMemory",
453	ALLOCSET_SMALL_SIZES);
454	oldContext = MemoryContextSwitchTo(tmpContext);
455
456	/ Initialize array at a reasonable size /
457	arraysize = `128`;
458	array = (tzEntry ) palloc(arraysize sizeof(tzEntry));
459
460	/ Parse the file(s) /
461	n = ParseTzFile(filename, `0`, &array, &arraysize, `0`);
462
463	/ If no errors so far, let datetime.c allocate memory & convert format /
464	if (n >= `0`)
465	{
466	result = ConvertTimeZoneAbbrevs(array, n);
467	if (!result)
468	GUC_check_errmsg("out of memory");
469	}
470
471	/ Clean up /
472	MemoryContextSwitchTo(oldContext);
473	MemoryContextDelete(tmpContext);
474
475	return result;
476	}
477

Browse the source code of PostgreSQL/src/backend/utils/misc/tzparser.c