| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * tzparser.c |
| 4 | * Functions for parsing timezone offset files |
| 5 | * |
| 6 | * Note: this code is invoked from the check_hook for the GUC variable |
| 7 | * timezone_abbreviations. Therefore, it should report problems using |
| 8 | * GUC_check_errmsg() and related functions, and try to avoid throwing |
| 9 | * elog(ERROR). This is not completely bulletproof at present --- in |
| 10 | * particular out-of-memory will throw an error. Could probably fix with |
| 11 | * PG_TRY if necessary. |
| 12 | * |
| 13 | * |
| 14 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 15 | * Portions Copyright (c) 1994, Regents of the University of California |
| 16 | * |
| 17 | * IDENTIFICATION |
| 18 | * src/backend/utils/misc/tzparser.c |
| 19 | * |
| 20 | *------------------------------------------------------------------------- |
| 21 | */ |
| 22 | |
| 23 | #include "postgres.h" |
| 24 | |
| 25 | #include <ctype.h> |
| 26 | |
| 27 | #include "miscadmin.h" |
| 28 | #include "storage/fd.h" |
| 29 | #include "utils/guc.h" |
| 30 | #include "utils/memutils.h" |
| 31 | #include "utils/tzparser.h" |
| 32 | |
| 33 | |
| 34 | #define WHITESPACE " \t\n\r" |
| 35 | |
| 36 | static bool validateTzEntry(tzEntry *tzentry); |
| 37 | static bool splitTzLine(const char *filename, int lineno, |
| 38 | char *line, tzEntry *tzentry); |
| 39 | static int addToArray(tzEntry **base, int *arraysize, int n, |
| 40 | tzEntry *entry, bool override); |
| 41 | static int ParseTzFile(const char *filename, int depth, |
| 42 | tzEntry **base, int *arraysize, int n); |
| 43 | |
| 44 | |
| 45 | /* |
| 46 | * Apply additional validation checks to a tzEntry |
| 47 | * |
| 48 | * Returns true if OK, else false |
| 49 | */ |
| 50 | static bool |
| 51 | validateTzEntry(tzEntry *tzentry) |
| 52 | { |
| 53 | unsigned char *p; |
| 54 | |
| 55 | /* |
| 56 | * Check restrictions imposed by datetkntbl storage format (see |
| 57 | * datetime.c) |
| 58 | */ |
| 59 | if (strlen(tzentry->abbrev) > TOKMAXLEN) |
| 60 | { |
| 61 | GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d" , |
| 62 | tzentry->abbrev, TOKMAXLEN, |
| 63 | tzentry->filename, tzentry->lineno); |
| 64 | return false; |
| 65 | } |
| 66 | |
| 67 | /* |
| 68 | * Sanity-check the offset: shouldn't exceed 14 hours |
| 69 | */ |
| 70 | if (tzentry->offset > 14 * 60 * 60 || |
| 71 | tzentry->offset < -14 * 60 * 60) |
| 72 | { |
| 73 | GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d" , |
| 74 | tzentry->offset, |
| 75 | tzentry->filename, tzentry->lineno); |
| 76 | return false; |
| 77 | } |
| 78 | |
| 79 | /* |
| 80 | * Convert abbrev to lowercase (must match datetime.c's conversion) |
| 81 | */ |
| 82 | for (p = (unsigned char *) tzentry->abbrev; *p; p++) |
| 83 | *p = pg_tolower(*p); |
| 84 | |
| 85 | return true; |
| 86 | } |
| 87 | |
| 88 | /* |
| 89 | * Attempt to parse the line as a timezone abbrev spec |
| 90 | * |
| 91 | * Valid formats are: |
| 92 | * name zone |
| 93 | * name offset dst |
| 94 | * |
| 95 | * Returns true if OK, else false; data is stored in *tzentry |
| 96 | */ |
| 97 | static bool |
| 98 | splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry) |
| 99 | { |
| 100 | char *abbrev; |
| 101 | char *offset; |
| 102 | char *offset_endptr; |
| 103 | char *remain; |
| 104 | char *is_dst; |
| 105 | |
| 106 | tzentry->lineno = lineno; |
| 107 | tzentry->filename = filename; |
| 108 | |
| 109 | abbrev = strtok(line, WHITESPACE); |
| 110 | if (!abbrev) |
| 111 | { |
| 112 | GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d" , |
| 113 | filename, lineno); |
| 114 | return false; |
| 115 | } |
| 116 | tzentry->abbrev = pstrdup(abbrev); |
| 117 | |
| 118 | offset = strtok(NULL, WHITESPACE); |
| 119 | if (!offset) |
| 120 | { |
| 121 | GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d" , |
| 122 | filename, lineno); |
| 123 | return false; |
| 124 | } |
| 125 | |
| 126 | /* We assume zone names don't begin with a digit or sign */ |
| 127 | if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-') |
| 128 | { |
| 129 | tzentry->zone = NULL; |
| 130 | tzentry->offset = strtol(offset, &offset_endptr, 10); |
| 131 | if (offset_endptr == offset || *offset_endptr != '\0') |
| 132 | { |
| 133 | GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d" , |
| 134 | filename, lineno); |
| 135 | return false; |
| 136 | } |
| 137 | |
| 138 | is_dst = strtok(NULL, WHITESPACE); |
| 139 | if (is_dst && pg_strcasecmp(is_dst, "D" ) == 0) |
| 140 | { |
| 141 | tzentry->is_dst = true; |
| 142 | remain = strtok(NULL, WHITESPACE); |
| 143 | } |
| 144 | else |
| 145 | { |
| 146 | /* there was no 'D' dst specifier */ |
| 147 | tzentry->is_dst = false; |
| 148 | remain = is_dst; |
| 149 | } |
| 150 | } |
| 151 | else |
| 152 | { |
| 153 | /* |
| 154 | * Assume entry is a zone name. We do not try to validate it by |
| 155 | * looking up the zone, because that would force loading of a lot of |
| 156 | * zones that probably will never be used in the current session. |
| 157 | */ |
| 158 | tzentry->zone = pstrdup(offset); |
| 159 | tzentry->offset = 0; |
| 160 | tzentry->is_dst = false; |
| 161 | remain = strtok(NULL, WHITESPACE); |
| 162 | } |
| 163 | |
| 164 | if (!remain) /* no more non-whitespace chars */ |
| 165 | return true; |
| 166 | |
| 167 | if (remain[0] != '#') /* must be a comment */ |
| 168 | { |
| 169 | GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d" , |
| 170 | filename, lineno); |
| 171 | return false; |
| 172 | } |
| 173 | return true; |
| 174 | } |
| 175 | |
| 176 | /* |
| 177 | * Insert entry into sorted array |
| 178 | * |
| 179 | * *base: base address of array (changeable if must enlarge array) |
| 180 | * *arraysize: allocated length of array (changeable if must enlarge array) |
| 181 | * n: current number of valid elements in array |
| 182 | * entry: new data to insert |
| 183 | * override: true if OK to override |
| 184 | * |
| 185 | * Returns the new array length (new value for n), or -1 if error |
| 186 | */ |
| 187 | static int |
| 188 | addToArray(tzEntry **base, int *arraysize, int n, |
| 189 | tzEntry *entry, bool override) |
| 190 | { |
| 191 | tzEntry *arrayptr; |
| 192 | int low; |
| 193 | int high; |
| 194 | |
| 195 | /* |
| 196 | * Search the array for a duplicate; as a useful side effect, the array is |
| 197 | * maintained in sorted order. We use strcmp() to ensure we match the |
| 198 | * sort order datetime.c expects. |
| 199 | */ |
| 200 | arrayptr = *base; |
| 201 | low = 0; |
| 202 | high = n - 1; |
| 203 | while (low <= high) |
| 204 | { |
| 205 | int mid = (low + high) >> 1; |
| 206 | tzEntry *midptr = arrayptr + mid; |
| 207 | int cmp; |
| 208 | |
| 209 | cmp = strcmp(entry->abbrev, midptr->abbrev); |
| 210 | if (cmp < 0) |
| 211 | high = mid - 1; |
| 212 | else if (cmp > 0) |
| 213 | low = mid + 1; |
| 214 | else |
| 215 | { |
| 216 | /* |
| 217 | * Found a duplicate entry; complain unless it's the same. |
| 218 | */ |
| 219 | if ((midptr->zone == NULL && entry->zone == NULL && |
| 220 | midptr->offset == entry->offset && |
| 221 | midptr->is_dst == entry->is_dst) || |
| 222 | (midptr->zone != NULL && entry->zone != NULL && |
| 223 | strcmp(midptr->zone, entry->zone) == 0)) |
| 224 | { |
| 225 | /* return unchanged array */ |
| 226 | return n; |
| 227 | } |
| 228 | if (override) |
| 229 | { |
| 230 | /* same abbrev but something is different, override */ |
| 231 | midptr->zone = entry->zone; |
| 232 | midptr->offset = entry->offset; |
| 233 | midptr->is_dst = entry->is_dst; |
| 234 | return n; |
| 235 | } |
| 236 | /* same abbrev but something is different, complain */ |
| 237 | GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined" , |
| 238 | entry->abbrev); |
| 239 | GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d." , |
| 240 | midptr->filename, midptr->lineno, |
| 241 | entry->filename, entry->lineno); |
| 242 | return -1; |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | /* |
| 247 | * No match, insert at position "low". |
| 248 | */ |
| 249 | if (n >= *arraysize) |
| 250 | { |
| 251 | *arraysize *= 2; |
| 252 | *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry)); |
| 253 | } |
| 254 | |
| 255 | arrayptr = *base + low; |
| 256 | |
| 257 | memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry)); |
| 258 | |
| 259 | memcpy(arrayptr, entry, sizeof(tzEntry)); |
| 260 | |
| 261 | return n + 1; |
| 262 | } |
| 263 | |
| 264 | /* |
| 265 | * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE |
| 266 | * |
| 267 | * filename: user-specified file name (does not include path) |
| 268 | * depth: current recursion depth |
| 269 | * *base: array for results (changeable if must enlarge array) |
| 270 | * *arraysize: allocated length of array (changeable if must enlarge array) |
| 271 | * n: current number of valid elements in array |
| 272 | * |
| 273 | * Returns the new array length (new value for n), or -1 if error |
| 274 | */ |
| 275 | static int |
| 276 | ParseTzFile(const char *filename, int depth, |
| 277 | tzEntry **base, int *arraysize, int n) |
| 278 | { |
| 279 | char share_path[MAXPGPATH]; |
| 280 | char file_path[MAXPGPATH]; |
| 281 | FILE *tzFile; |
| 282 | char tzbuf[1024]; |
| 283 | char *line; |
| 284 | tzEntry tzentry; |
| 285 | int lineno = 0; |
| 286 | bool override = false; |
| 287 | const char *p; |
| 288 | |
| 289 | /* |
| 290 | * We enforce that the filename is all alpha characters. This may be |
| 291 | * overly restrictive, but we don't want to allow access to anything |
| 292 | * outside the timezonesets directory, so for instance '/' *must* be |
| 293 | * rejected. |
| 294 | */ |
| 295 | for (p = filename; *p; p++) |
| 296 | { |
| 297 | if (!isalpha((unsigned char) *p)) |
| 298 | { |
| 299 | /* at level 0, just use guc.c's regular "invalid value" message */ |
| 300 | if (depth > 0) |
| 301 | GUC_check_errmsg("invalid time zone file name \"%s\"" , |
| 302 | filename); |
| 303 | return -1; |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | /* |
| 308 | * The maximal recursion depth is a pretty arbitrary setting. It is hard |
| 309 | * to imagine that someone needs more than 3 levels so stick with this |
| 310 | * conservative setting until someone complains. |
| 311 | */ |
| 312 | if (depth > 3) |
| 313 | { |
| 314 | GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"" , |
| 315 | filename); |
| 316 | return -1; |
| 317 | } |
| 318 | |
| 319 | get_share_path(my_exec_path, share_path); |
| 320 | snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s" , |
| 321 | share_path, filename); |
| 322 | tzFile = AllocateFile(file_path, "r" ); |
| 323 | if (!tzFile) |
| 324 | { |
| 325 | /* |
| 326 | * Check to see if the problem is not the filename but the directory. |
| 327 | * This is worth troubling over because if the installation share/ |
| 328 | * directory is missing or unreadable, this is likely to be the first |
| 329 | * place we notice a problem during postmaster startup. |
| 330 | */ |
| 331 | int save_errno = errno; |
| 332 | DIR *tzdir; |
| 333 | |
| 334 | snprintf(file_path, sizeof(file_path), "%s/timezonesets" , |
| 335 | share_path); |
| 336 | tzdir = AllocateDir(file_path); |
| 337 | if (tzdir == NULL) |
| 338 | { |
| 339 | GUC_check_errmsg("could not open directory \"%s\": %m" , |
| 340 | file_path); |
| 341 | GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location." , |
| 342 | my_exec_path); |
| 343 | return -1; |
| 344 | } |
| 345 | FreeDir(tzdir); |
| 346 | errno = save_errno; |
| 347 | |
| 348 | /* |
| 349 | * otherwise, if file doesn't exist and it's level 0, guc.c's |
| 350 | * complaint is enough |
| 351 | */ |
| 352 | if (errno != ENOENT || depth > 0) |
| 353 | GUC_check_errmsg("could not read time zone file \"%s\": %m" , |
| 354 | filename); |
| 355 | |
| 356 | return -1; |
| 357 | } |
| 358 | |
| 359 | while (!feof(tzFile)) |
| 360 | { |
| 361 | lineno++; |
| 362 | if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL) |
| 363 | { |
| 364 | if (ferror(tzFile)) |
| 365 | { |
| 366 | GUC_check_errmsg("could not read time zone file \"%s\": %m" , |
| 367 | filename); |
| 368 | return -1; |
| 369 | } |
| 370 | /* else we're at EOF after all */ |
| 371 | break; |
| 372 | } |
| 373 | if (strlen(tzbuf) == sizeof(tzbuf) - 1) |
| 374 | { |
| 375 | /* the line is too long for tzbuf */ |
| 376 | GUC_check_errmsg("line is too long in time zone file \"%s\", line %d" , |
| 377 | filename, lineno); |
| 378 | return -1; |
| 379 | } |
| 380 | |
| 381 | /* skip over whitespace */ |
| 382 | line = tzbuf; |
| 383 | while (*line && isspace((unsigned char) *line)) |
| 384 | line++; |
| 385 | |
| 386 | if (*line == '\0') /* empty line */ |
| 387 | continue; |
| 388 | if (*line == '#') /* comment line */ |
| 389 | continue; |
| 390 | |
| 391 | if (pg_strncasecmp(line, "@INCLUDE" , strlen("@INCLUDE" )) == 0) |
| 392 | { |
| 393 | /* pstrdup so we can use filename in result data structure */ |
| 394 | char *includeFile = pstrdup(line + strlen("@INCLUDE" )); |
| 395 | |
| 396 | includeFile = strtok(includeFile, WHITESPACE); |
| 397 | if (!includeFile || !*includeFile) |
| 398 | { |
| 399 | GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d" , |
| 400 | filename, lineno); |
| 401 | return -1; |
| 402 | } |
| 403 | n = ParseTzFile(includeFile, depth + 1, |
| 404 | base, arraysize, n); |
| 405 | if (n < 0) |
| 406 | return -1; |
| 407 | continue; |
| 408 | } |
| 409 | |
| 410 | if (pg_strncasecmp(line, "@OVERRIDE" , strlen("@OVERRIDE" )) == 0) |
| 411 | { |
| 412 | override = true; |
| 413 | continue; |
| 414 | } |
| 415 | |
| 416 | if (!splitTzLine(filename, lineno, line, &tzentry)) |
| 417 | return -1; |
| 418 | if (!validateTzEntry(&tzentry)) |
| 419 | return -1; |
| 420 | n = addToArray(base, arraysize, n, &tzentry, override); |
| 421 | if (n < 0) |
| 422 | return -1; |
| 423 | } |
| 424 | |
| 425 | FreeFile(tzFile); |
| 426 | |
| 427 | return n; |
| 428 | } |
| 429 | |
| 430 | /* |
| 431 | * load_tzoffsets --- read and parse the specified timezone offset file |
| 432 | * |
| 433 | * On success, return a filled-in TimeZoneAbbrevTable, which must have been |
| 434 | * malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg |
| 435 | * and friends to give details of the problem. |
| 436 | */ |
| 437 | TimeZoneAbbrevTable * |
| 438 | load_tzoffsets(const char *filename) |
| 439 | { |
| 440 | TimeZoneAbbrevTable *result = NULL; |
| 441 | MemoryContext tmpContext; |
| 442 | MemoryContext oldContext; |
| 443 | tzEntry *array; |
| 444 | int arraysize; |
| 445 | int n; |
| 446 | |
| 447 | /* |
| 448 | * Create a temp memory context to work in. This makes it easy to clean |
| 449 | * up afterwards. |
| 450 | */ |
| 451 | tmpContext = AllocSetContextCreate(CurrentMemoryContext, |
| 452 | "TZParserMemory" , |
| 453 | ALLOCSET_SMALL_SIZES); |
| 454 | oldContext = MemoryContextSwitchTo(tmpContext); |
| 455 | |
| 456 | /* Initialize array at a reasonable size */ |
| 457 | arraysize = 128; |
| 458 | array = (tzEntry *) palloc(arraysize * sizeof(tzEntry)); |
| 459 | |
| 460 | /* Parse the file(s) */ |
| 461 | n = ParseTzFile(filename, 0, &array, &arraysize, 0); |
| 462 | |
| 463 | /* If no errors so far, let datetime.c allocate memory & convert format */ |
| 464 | if (n >= 0) |
| 465 | { |
| 466 | result = ConvertTimeZoneAbbrevs(array, n); |
| 467 | if (!result) |
| 468 | GUC_check_errmsg("out of memory" ); |
| 469 | } |
| 470 | |
| 471 | /* Clean up */ |
| 472 | MemoryContextSwitchTo(oldContext); |
| 473 | MemoryContextDelete(tmpContext); |
| 474 | |
| 475 | return result; |
| 476 | } |
| 477 | |