| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * like_support.c |
| 4 | * Planner support functions for LIKE, regex, and related operators. |
| 5 | * |
| 6 | * These routines handle special optimization of operators that can be |
| 7 | * used with index scans even though they are not known to the executor's |
| 8 | * indexscan machinery. The key idea is that these operators allow us |
| 9 | * to derive approximate indexscan qual clauses, such that any tuples |
| 10 | * that pass the operator clause itself must also satisfy the simpler |
| 11 | * indexscan condition(s). Then we can use the indexscan machinery |
| 12 | * to avoid scanning as much of the table as we'd otherwise have to, |
| 13 | * while applying the original operator as a qpqual condition to ensure |
| 14 | * we deliver only the tuples we want. (In essence, we're using a regular |
| 15 | * index as if it were a lossy index.) |
| 16 | * |
| 17 | * An example of what we're doing is |
| 18 | * textfield LIKE 'abc%def' |
| 19 | * from which we can generate the indexscanable conditions |
| 20 | * textfield >= 'abc' AND textfield < 'abd' |
| 21 | * which allow efficient scanning of an index on textfield. |
| 22 | * (In reality, character set and collation issues make the transformation |
| 23 | * from LIKE to indexscan limits rather harder than one might think ... |
| 24 | * but that's the basic idea.) |
| 25 | * |
| 26 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 27 | * Portions Copyright (c) 1994, Regents of the University of California |
| 28 | * |
| 29 | * |
| 30 | * IDENTIFICATION |
| 31 | * src/backend/utils/adt/like_support.c |
| 32 | * |
| 33 | *------------------------------------------------------------------------- |
| 34 | */ |
| 35 | #include "postgres.h" |
| 36 | |
| 37 | #include <math.h> |
| 38 | |
| 39 | #include "access/htup_details.h" |
| 40 | #include "access/stratnum.h" |
| 41 | #include "catalog/pg_collation.h" |
| 42 | #include "catalog/pg_opfamily.h" |
| 43 | #include "catalog/pg_statistic.h" |
| 44 | #include "catalog/pg_type.h" |
| 45 | #include "mb/pg_wchar.h" |
| 46 | #include "nodes/makefuncs.h" |
| 47 | #include "nodes/nodeFuncs.h" |
| 48 | #include "nodes/supportnodes.h" |
| 49 | #include "utils/builtins.h" |
| 50 | #include "utils/datum.h" |
| 51 | #include "utils/lsyscache.h" |
| 52 | #include "utils/pg_locale.h" |
| 53 | #include "utils/selfuncs.h" |
| 54 | #include "utils/varlena.h" |
| 55 | |
| 56 | |
| 57 | typedef enum |
| 58 | { |
| 59 | Pattern_Type_Like, |
| 60 | Pattern_Type_Like_IC, |
| 61 | Pattern_Type_Regex, |
| 62 | Pattern_Type_Regex_IC, |
| 63 | Pattern_Type_Prefix |
| 64 | } Pattern_Type; |
| 65 | |
| 66 | typedef enum |
| 67 | { |
| 68 | Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact |
| 69 | } Pattern_Prefix_Status; |
| 70 | |
| 71 | static Node *like_regex_support(Node *rawreq, Pattern_Type ptype); |
| 72 | static List *match_pattern_prefix(Node *leftop, |
| 73 | Node *rightop, |
| 74 | Pattern_Type ptype, |
| 75 | Oid expr_coll, |
| 76 | Oid opfamily, |
| 77 | Oid indexcollation); |
| 78 | static double patternsel_common(PlannerInfo *root, |
| 79 | Oid oprid, |
| 80 | Oid opfuncid, |
| 81 | List *args, |
| 82 | int varRelid, |
| 83 | Oid collation, |
| 84 | Pattern_Type ptype, |
| 85 | bool negate); |
| 86 | static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, |
| 87 | Pattern_Type ptype, |
| 88 | Oid collation, |
| 89 | Const **prefix, |
| 90 | Selectivity *rest_selec); |
| 91 | static Selectivity prefix_selectivity(PlannerInfo *root, |
| 92 | VariableStatData *vardata, |
| 93 | Oid vartype, Oid opfamily, Const *prefixcon); |
| 94 | static Selectivity like_selectivity(const char *patt, int pattlen, |
| 95 | bool case_insensitive); |
| 96 | static Selectivity regex_selectivity(const char *patt, int pattlen, |
| 97 | bool case_insensitive, |
| 98 | int fixed_prefix_len); |
| 99 | static int pattern_char_isalpha(char c, bool is_multibyte, |
| 100 | pg_locale_t locale, bool locale_is_c); |
| 101 | static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc, |
| 102 | Oid collation); |
| 103 | static Datum string_to_datum(const char *str, Oid datatype); |
| 104 | static Const *string_to_const(const char *str, Oid datatype); |
| 105 | static Const *string_to_bytea_const(const char *str, size_t str_len); |
| 106 | |
| 107 | |
| 108 | /* |
| 109 | * Planner support functions for LIKE, regex, and related operators |
| 110 | */ |
| 111 | Datum |
| 112 | textlike_support(PG_FUNCTION_ARGS) |
| 113 | { |
| 114 | Node *rawreq = (Node *) PG_GETARG_POINTER(0); |
| 115 | |
| 116 | PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like)); |
| 117 | } |
| 118 | |
| 119 | Datum |
| 120 | texticlike_support(PG_FUNCTION_ARGS) |
| 121 | { |
| 122 | Node *rawreq = (Node *) PG_GETARG_POINTER(0); |
| 123 | |
| 124 | PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC)); |
| 125 | } |
| 126 | |
| 127 | Datum |
| 128 | textregexeq_support(PG_FUNCTION_ARGS) |
| 129 | { |
| 130 | Node *rawreq = (Node *) PG_GETARG_POINTER(0); |
| 131 | |
| 132 | PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex)); |
| 133 | } |
| 134 | |
| 135 | Datum |
| 136 | texticregexeq_support(PG_FUNCTION_ARGS) |
| 137 | { |
| 138 | Node *rawreq = (Node *) PG_GETARG_POINTER(0); |
| 139 | |
| 140 | PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC)); |
| 141 | } |
| 142 | |
| 143 | /* Common code for the above */ |
| 144 | static Node * |
| 145 | like_regex_support(Node *rawreq, Pattern_Type ptype) |
| 146 | { |
| 147 | Node *ret = NULL; |
| 148 | |
| 149 | if (IsA(rawreq, SupportRequestSelectivity)) |
| 150 | { |
| 151 | /* |
| 152 | * Make a selectivity estimate for a function call, just as we'd do if |
| 153 | * the call was via the corresponding operator. |
| 154 | */ |
| 155 | SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq; |
| 156 | Selectivity s1; |
| 157 | |
| 158 | if (req->is_join) |
| 159 | { |
| 160 | /* |
| 161 | * For the moment we just punt. If patternjoinsel is ever |
| 162 | * improved to do better, this should be made to call it. |
| 163 | */ |
| 164 | s1 = DEFAULT_MATCH_SEL; |
| 165 | } |
| 166 | else |
| 167 | { |
| 168 | /* Share code with operator restriction selectivity functions */ |
| 169 | s1 = patternsel_common(req->root, |
| 170 | InvalidOid, |
| 171 | req->funcid, |
| 172 | req->args, |
| 173 | req->varRelid, |
| 174 | req->inputcollid, |
| 175 | ptype, |
| 176 | false); |
| 177 | } |
| 178 | req->selectivity = s1; |
| 179 | ret = (Node *) req; |
| 180 | } |
| 181 | else if (IsA(rawreq, SupportRequestIndexCondition)) |
| 182 | { |
| 183 | /* Try to convert operator/function call to index conditions */ |
| 184 | SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq; |
| 185 | |
| 186 | /* |
| 187 | * Currently we have no "reverse" match operators with the pattern on |
| 188 | * the left, so we only need consider cases with the indexkey on the |
| 189 | * left. |
| 190 | */ |
| 191 | if (req->indexarg != 0) |
| 192 | return NULL; |
| 193 | |
| 194 | if (is_opclause(req->node)) |
| 195 | { |
| 196 | OpExpr *clause = (OpExpr *) req->node; |
| 197 | |
| 198 | Assert(list_length(clause->args) == 2); |
| 199 | ret = (Node *) |
| 200 | match_pattern_prefix((Node *) linitial(clause->args), |
| 201 | (Node *) lsecond(clause->args), |
| 202 | ptype, |
| 203 | clause->inputcollid, |
| 204 | req->opfamily, |
| 205 | req->indexcollation); |
| 206 | } |
| 207 | else if (is_funcclause(req->node)) /* be paranoid */ |
| 208 | { |
| 209 | FuncExpr *clause = (FuncExpr *) req->node; |
| 210 | |
| 211 | Assert(list_length(clause->args) == 2); |
| 212 | ret = (Node *) |
| 213 | match_pattern_prefix((Node *) linitial(clause->args), |
| 214 | (Node *) lsecond(clause->args), |
| 215 | ptype, |
| 216 | clause->inputcollid, |
| 217 | req->opfamily, |
| 218 | req->indexcollation); |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | return ret; |
| 223 | } |
| 224 | |
| 225 | /* |
| 226 | * match_pattern_prefix |
| 227 | * Try to generate an indexqual for a LIKE or regex operator. |
| 228 | */ |
| 229 | static List * |
| 230 | match_pattern_prefix(Node *leftop, |
| 231 | Node *rightop, |
| 232 | Pattern_Type ptype, |
| 233 | Oid expr_coll, |
| 234 | Oid opfamily, |
| 235 | Oid indexcollation) |
| 236 | { |
| 237 | List *result; |
| 238 | Const *patt; |
| 239 | Const *prefix; |
| 240 | Pattern_Prefix_Status pstatus; |
| 241 | Oid ldatatype; |
| 242 | Oid rdatatype; |
| 243 | Oid oproid; |
| 244 | Expr *expr; |
| 245 | FmgrInfo ltproc; |
| 246 | Const *greaterstr; |
| 247 | |
| 248 | /* |
| 249 | * Can't do anything with a non-constant or NULL pattern argument. |
| 250 | * |
| 251 | * Note that since we restrict ourselves to cases with a hard constant on |
| 252 | * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry |
| 253 | * about verifying that. |
| 254 | */ |
| 255 | if (!IsA(rightop, Const) || |
| 256 | ((Const *) rightop)->constisnull) |
| 257 | return NIL; |
| 258 | patt = (Const *) rightop; |
| 259 | |
| 260 | /* |
| 261 | * Not supported if the expression collation is nondeterministic. The |
| 262 | * optimized equality or prefix tests use bytewise comparisons, which is |
| 263 | * not consistent with nondeterministic collations. The actual |
| 264 | * pattern-matching implementation functions will later error out that |
| 265 | * pattern-matching is not supported with nondeterministic collations. (We |
| 266 | * could also error out here, but by doing it later we get more precise |
| 267 | * error messages.) (It should be possible to support at least |
| 268 | * Pattern_Prefix_Exact, but no point as along as the actual |
| 269 | * pattern-matching implementations don't support it.) |
| 270 | * |
| 271 | * expr_coll is not set for a non-collation-aware data type such as bytea. |
| 272 | */ |
| 273 | if (expr_coll && !get_collation_isdeterministic(expr_coll)) |
| 274 | return NIL; |
| 275 | |
| 276 | /* |
| 277 | * Try to extract a fixed prefix from the pattern. |
| 278 | */ |
| 279 | pstatus = pattern_fixed_prefix(patt, ptype, expr_coll, |
| 280 | &prefix, NULL); |
| 281 | |
| 282 | /* fail if no fixed prefix */ |
| 283 | if (pstatus == Pattern_Prefix_None) |
| 284 | return NIL; |
| 285 | |
| 286 | /* |
| 287 | * Must also check that index's opfamily supports the operators we will |
| 288 | * want to apply. (A hash index, for example, will not support ">=".) |
| 289 | * Currently, only btree and spgist support the operators we need. |
| 290 | * |
| 291 | * Note: actually, in the Pattern_Prefix_Exact case, we only need "=" so a |
| 292 | * hash index would work. Currently it doesn't seem worth checking for |
| 293 | * that, however. |
| 294 | * |
| 295 | * We insist on the opfamily being one of the specific ones we expect, |
| 296 | * else we'd do the wrong thing if someone were to make a reverse-sort |
| 297 | * opfamily with the same operators. |
| 298 | * |
| 299 | * The non-pattern opclasses will not sort the way we need in most non-C |
| 300 | * locales. We can use such an index anyway for an exact match (simple |
| 301 | * equality), but not for prefix-match cases. Note that here we are |
| 302 | * looking at the index's collation, not the expression's collation -- |
| 303 | * this test is *not* dependent on the LIKE/regex operator's collation. |
| 304 | * |
| 305 | * While we're at it, identify the type the comparison constant(s) should |
| 306 | * have, based on the opfamily. |
| 307 | */ |
| 308 | switch (opfamily) |
| 309 | { |
| 310 | case TEXT_BTREE_FAM_OID: |
| 311 | if (!(pstatus == Pattern_Prefix_Exact || |
| 312 | lc_collate_is_c(indexcollation))) |
| 313 | return NIL; |
| 314 | rdatatype = TEXTOID; |
| 315 | break; |
| 316 | |
| 317 | case TEXT_PATTERN_BTREE_FAM_OID: |
| 318 | case TEXT_SPGIST_FAM_OID: |
| 319 | rdatatype = TEXTOID; |
| 320 | break; |
| 321 | |
| 322 | case BPCHAR_BTREE_FAM_OID: |
| 323 | if (!(pstatus == Pattern_Prefix_Exact || |
| 324 | lc_collate_is_c(indexcollation))) |
| 325 | return NIL; |
| 326 | rdatatype = BPCHAROID; |
| 327 | break; |
| 328 | |
| 329 | case BPCHAR_PATTERN_BTREE_FAM_OID: |
| 330 | rdatatype = BPCHAROID; |
| 331 | break; |
| 332 | |
| 333 | case BYTEA_BTREE_FAM_OID: |
| 334 | rdatatype = BYTEAOID; |
| 335 | break; |
| 336 | |
| 337 | default: |
| 338 | return NIL; |
| 339 | } |
| 340 | |
| 341 | /* OK, prepare to create the indexqual(s) */ |
| 342 | ldatatype = exprType(leftop); |
| 343 | |
| 344 | /* |
| 345 | * If necessary, coerce the prefix constant to the right type. The given |
| 346 | * prefix constant is either text or bytea type, therefore the only case |
| 347 | * where we need to do anything is when converting text to bpchar. Those |
| 348 | * two types are binary-compatible, so relabeling the Const node is |
| 349 | * sufficient. |
| 350 | */ |
| 351 | if (prefix->consttype != rdatatype) |
| 352 | { |
| 353 | Assert(prefix->consttype == TEXTOID && |
| 354 | rdatatype == BPCHAROID); |
| 355 | prefix->consttype = rdatatype; |
| 356 | } |
| 357 | |
| 358 | /* |
| 359 | * If we found an exact-match pattern, generate an "=" indexqual. |
| 360 | */ |
| 361 | if (pstatus == Pattern_Prefix_Exact) |
| 362 | { |
| 363 | oproid = get_opfamily_member(opfamily, ldatatype, rdatatype, |
| 364 | BTEqualStrategyNumber); |
| 365 | if (oproid == InvalidOid) |
| 366 | elog(ERROR, "no = operator for opfamily %u" , opfamily); |
| 367 | expr = make_opclause(oproid, BOOLOID, false, |
| 368 | (Expr *) leftop, (Expr *) prefix, |
| 369 | InvalidOid, indexcollation); |
| 370 | result = list_make1(expr); |
| 371 | return result; |
| 372 | } |
| 373 | |
| 374 | /* |
| 375 | * Otherwise, we have a nonempty required prefix of the values. |
| 376 | * |
| 377 | * We can always say "x >= prefix". |
| 378 | */ |
| 379 | oproid = get_opfamily_member(opfamily, ldatatype, rdatatype, |
| 380 | BTGreaterEqualStrategyNumber); |
| 381 | if (oproid == InvalidOid) |
| 382 | elog(ERROR, "no >= operator for opfamily %u" , opfamily); |
| 383 | expr = make_opclause(oproid, BOOLOID, false, |
| 384 | (Expr *) leftop, (Expr *) prefix, |
| 385 | InvalidOid, indexcollation); |
| 386 | result = list_make1(expr); |
| 387 | |
| 388 | /*------- |
| 389 | * If we can create a string larger than the prefix, we can say |
| 390 | * "x < greaterstr". NB: we rely on make_greater_string() to generate |
| 391 | * a guaranteed-greater string, not just a probably-greater string. |
| 392 | * In general this is only guaranteed in C locale, so we'd better be |
| 393 | * using a C-locale index collation. |
| 394 | *------- |
| 395 | */ |
| 396 | oproid = get_opfamily_member(opfamily, ldatatype, rdatatype, |
| 397 | BTLessStrategyNumber); |
| 398 | if (oproid == InvalidOid) |
| 399 | elog(ERROR, "no < operator for opfamily %u" , opfamily); |
| 400 | fmgr_info(get_opcode(oproid), <proc); |
| 401 | greaterstr = make_greater_string(prefix, <proc, indexcollation); |
| 402 | if (greaterstr) |
| 403 | { |
| 404 | expr = make_opclause(oproid, BOOLOID, false, |
| 405 | (Expr *) leftop, (Expr *) greaterstr, |
| 406 | InvalidOid, indexcollation); |
| 407 | result = lappend(result, expr); |
| 408 | } |
| 409 | |
| 410 | return result; |
| 411 | } |
| 412 | |
| 413 | |
| 414 | /* |
| 415 | * patternsel_common - generic code for pattern-match restriction selectivity. |
| 416 | * |
| 417 | * To support using this from either the operator or function paths, caller |
| 418 | * may pass either operator OID or underlying function OID; we look up the |
| 419 | * latter from the former if needed. (We could just have patternsel() call |
| 420 | * get_opcode(), but the work would be wasted if we don't have a need to |
| 421 | * compare a fixed prefix to the pg_statistic data.) |
| 422 | * |
| 423 | * Note that oprid and/or opfuncid should be for the positive-match operator |
| 424 | * even when negate is true. |
| 425 | */ |
| 426 | static double |
| 427 | patternsel_common(PlannerInfo *root, |
| 428 | Oid oprid, |
| 429 | Oid opfuncid, |
| 430 | List *args, |
| 431 | int varRelid, |
| 432 | Oid collation, |
| 433 | Pattern_Type ptype, |
| 434 | bool negate) |
| 435 | { |
| 436 | VariableStatData vardata; |
| 437 | Node *other; |
| 438 | bool varonleft; |
| 439 | Datum constval; |
| 440 | Oid consttype; |
| 441 | Oid vartype; |
| 442 | Oid opfamily; |
| 443 | Pattern_Prefix_Status pstatus; |
| 444 | Const *patt; |
| 445 | Const *prefix = NULL; |
| 446 | Selectivity rest_selec = 0; |
| 447 | double nullfrac = 0.0; |
| 448 | double result; |
| 449 | |
| 450 | /* |
| 451 | * Initialize result to the appropriate default estimate depending on |
| 452 | * whether it's a match or not-match operator. |
| 453 | */ |
| 454 | if (negate) |
| 455 | result = 1.0 - DEFAULT_MATCH_SEL; |
| 456 | else |
| 457 | result = DEFAULT_MATCH_SEL; |
| 458 | |
| 459 | /* |
| 460 | * If expression is not variable op constant, then punt and return the |
| 461 | * default estimate. |
| 462 | */ |
| 463 | if (!get_restriction_variable(root, args, varRelid, |
| 464 | &vardata, &other, &varonleft)) |
| 465 | return result; |
| 466 | if (!varonleft || !IsA(other, Const)) |
| 467 | { |
| 468 | ReleaseVariableStats(vardata); |
| 469 | return result; |
| 470 | } |
| 471 | |
| 472 | /* |
| 473 | * If the constant is NULL, assume operator is strict and return zero, ie, |
| 474 | * operator will never return TRUE. (It's zero even for a negator op.) |
| 475 | */ |
| 476 | if (((Const *) other)->constisnull) |
| 477 | { |
| 478 | ReleaseVariableStats(vardata); |
| 479 | return 0.0; |
| 480 | } |
| 481 | constval = ((Const *) other)->constvalue; |
| 482 | consttype = ((Const *) other)->consttype; |
| 483 | |
| 484 | /* |
| 485 | * The right-hand const is type text or bytea for all supported operators. |
| 486 | * We do not expect to see binary-compatible types here, since |
| 487 | * const-folding should have relabeled the const to exactly match the |
| 488 | * operator's declared type. |
| 489 | */ |
| 490 | if (consttype != TEXTOID && consttype != BYTEAOID) |
| 491 | { |
| 492 | ReleaseVariableStats(vardata); |
| 493 | return result; |
| 494 | } |
| 495 | |
| 496 | /* |
| 497 | * Similarly, the exposed type of the left-hand side should be one of |
| 498 | * those we know. (Do not look at vardata.atttype, which might be |
| 499 | * something binary-compatible but different.) We can use it to choose |
| 500 | * the index opfamily from which we must draw the comparison operators. |
| 501 | * |
| 502 | * NOTE: It would be more correct to use the PATTERN opfamilies than the |
| 503 | * simple ones, but at the moment ANALYZE will not generate statistics for |
| 504 | * the PATTERN operators. But our results are so approximate anyway that |
| 505 | * it probably hardly matters. |
| 506 | */ |
| 507 | vartype = vardata.vartype; |
| 508 | |
| 509 | switch (vartype) |
| 510 | { |
| 511 | case TEXTOID: |
| 512 | case NAMEOID: |
| 513 | opfamily = TEXT_BTREE_FAM_OID; |
| 514 | break; |
| 515 | case BPCHAROID: |
| 516 | opfamily = BPCHAR_BTREE_FAM_OID; |
| 517 | break; |
| 518 | case BYTEAOID: |
| 519 | opfamily = BYTEA_BTREE_FAM_OID; |
| 520 | break; |
| 521 | default: |
| 522 | ReleaseVariableStats(vardata); |
| 523 | return result; |
| 524 | } |
| 525 | |
| 526 | /* |
| 527 | * Grab the nullfrac for use below. |
| 528 | */ |
| 529 | if (HeapTupleIsValid(vardata.statsTuple)) |
| 530 | { |
| 531 | Form_pg_statistic stats; |
| 532 | |
| 533 | stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); |
| 534 | nullfrac = stats->stanullfrac; |
| 535 | } |
| 536 | |
| 537 | /* |
| 538 | * Pull out any fixed prefix implied by the pattern, and estimate the |
| 539 | * fractional selectivity of the remainder of the pattern. Unlike many |
| 540 | * other selectivity estimators, we use the pattern operator's actual |
| 541 | * collation for this step. This is not because we expect the collation |
| 542 | * to make a big difference in the selectivity estimate (it seldom would), |
| 543 | * but because we want to be sure we cache compiled regexps under the |
| 544 | * right cache key, so that they can be re-used at runtime. |
| 545 | */ |
| 546 | patt = (Const *) other; |
| 547 | pstatus = pattern_fixed_prefix(patt, ptype, collation, |
| 548 | &prefix, &rest_selec); |
| 549 | |
| 550 | /* |
| 551 | * If necessary, coerce the prefix constant to the right type. |
| 552 | */ |
| 553 | if (prefix && prefix->consttype != vartype) |
| 554 | { |
| 555 | char *prefixstr; |
| 556 | |
| 557 | switch (prefix->consttype) |
| 558 | { |
| 559 | case TEXTOID: |
| 560 | prefixstr = TextDatumGetCString(prefix->constvalue); |
| 561 | break; |
| 562 | case BYTEAOID: |
| 563 | prefixstr = DatumGetCString(DirectFunctionCall1(byteaout, |
| 564 | prefix->constvalue)); |
| 565 | break; |
| 566 | default: |
| 567 | elog(ERROR, "unrecognized consttype: %u" , |
| 568 | prefix->consttype); |
| 569 | ReleaseVariableStats(vardata); |
| 570 | return result; |
| 571 | } |
| 572 | prefix = string_to_const(prefixstr, vartype); |
| 573 | pfree(prefixstr); |
| 574 | } |
| 575 | |
| 576 | if (pstatus == Pattern_Prefix_Exact) |
| 577 | { |
| 578 | /* |
| 579 | * Pattern specifies an exact match, so pretend operator is '=' |
| 580 | */ |
| 581 | Oid eqopr = get_opfamily_member(opfamily, vartype, vartype, |
| 582 | BTEqualStrategyNumber); |
| 583 | |
| 584 | if (eqopr == InvalidOid) |
| 585 | elog(ERROR, "no = operator for opfamily %u" , opfamily); |
| 586 | result = var_eq_const(&vardata, eqopr, prefix->constvalue, |
| 587 | false, true, false); |
| 588 | } |
| 589 | else |
| 590 | { |
| 591 | /* |
| 592 | * Not exact-match pattern. If we have a sufficiently large |
| 593 | * histogram, estimate selectivity for the histogram part of the |
| 594 | * population by counting matches in the histogram. If not, estimate |
| 595 | * selectivity of the fixed prefix and remainder of pattern |
| 596 | * separately, then combine the two to get an estimate of the |
| 597 | * selectivity for the part of the column population represented by |
| 598 | * the histogram. (For small histograms, we combine these |
| 599 | * approaches.) |
| 600 | * |
| 601 | * We then add up data for any most-common-values values; these are |
| 602 | * not in the histogram population, and we can get exact answers for |
| 603 | * them by applying the pattern operator, so there's no reason to |
| 604 | * approximate. (If the MCVs cover a significant part of the total |
| 605 | * population, this gives us a big leg up in accuracy.) |
| 606 | */ |
| 607 | Selectivity selec; |
| 608 | int hist_size; |
| 609 | FmgrInfo opproc; |
| 610 | double mcv_selec, |
| 611 | sumcommon; |
| 612 | |
| 613 | /* Try to use the histogram entries to get selectivity */ |
| 614 | if (!OidIsValid(opfuncid)) |
| 615 | opfuncid = get_opcode(oprid); |
| 616 | fmgr_info(opfuncid, &opproc); |
| 617 | |
| 618 | selec = histogram_selectivity(&vardata, &opproc, constval, true, |
| 619 | 10, 1, &hist_size); |
| 620 | |
| 621 | /* If not at least 100 entries, use the heuristic method */ |
| 622 | if (hist_size < 100) |
| 623 | { |
| 624 | Selectivity heursel; |
| 625 | Selectivity prefixsel; |
| 626 | |
| 627 | if (pstatus == Pattern_Prefix_Partial) |
| 628 | prefixsel = prefix_selectivity(root, &vardata, vartype, |
| 629 | opfamily, prefix); |
| 630 | else |
| 631 | prefixsel = 1.0; |
| 632 | heursel = prefixsel * rest_selec; |
| 633 | |
| 634 | if (selec < 0) /* fewer than 10 histogram entries? */ |
| 635 | selec = heursel; |
| 636 | else |
| 637 | { |
| 638 | /* |
| 639 | * For histogram sizes from 10 to 100, we combine the |
| 640 | * histogram and heuristic selectivities, putting increasingly |
| 641 | * more trust in the histogram for larger sizes. |
| 642 | */ |
| 643 | double hist_weight = hist_size / 100.0; |
| 644 | |
| 645 | selec = selec * hist_weight + heursel * (1.0 - hist_weight); |
| 646 | } |
| 647 | } |
| 648 | |
| 649 | /* In any case, don't believe extremely small or large estimates. */ |
| 650 | if (selec < 0.0001) |
| 651 | selec = 0.0001; |
| 652 | else if (selec > 0.9999) |
| 653 | selec = 0.9999; |
| 654 | |
| 655 | /* |
| 656 | * If we have most-common-values info, add up the fractions of the MCV |
| 657 | * entries that satisfy MCV OP PATTERN. These fractions contribute |
| 658 | * directly to the result selectivity. Also add up the total fraction |
| 659 | * represented by MCV entries. |
| 660 | */ |
| 661 | mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true, |
| 662 | &sumcommon); |
| 663 | |
| 664 | /* |
| 665 | * Now merge the results from the MCV and histogram calculations, |
| 666 | * realizing that the histogram covers only the non-null values that |
| 667 | * are not listed in MCV. |
| 668 | */ |
| 669 | selec *= 1.0 - nullfrac - sumcommon; |
| 670 | selec += mcv_selec; |
| 671 | result = selec; |
| 672 | } |
| 673 | |
| 674 | /* now adjust if we wanted not-match rather than match */ |
| 675 | if (negate) |
| 676 | result = 1.0 - result - nullfrac; |
| 677 | |
| 678 | /* result should be in range, but make sure... */ |
| 679 | CLAMP_PROBABILITY(result); |
| 680 | |
| 681 | if (prefix) |
| 682 | { |
| 683 | pfree(DatumGetPointer(prefix->constvalue)); |
| 684 | pfree(prefix); |
| 685 | } |
| 686 | |
| 687 | ReleaseVariableStats(vardata); |
| 688 | |
| 689 | return result; |
| 690 | } |
| 691 | |
| 692 | /* |
| 693 | * Fix impedance mismatch between SQL-callable functions and patternsel_common |
| 694 | */ |
| 695 | static double |
| 696 | patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) |
| 697 | { |
| 698 | PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); |
| 699 | Oid operator = PG_GETARG_OID(1); |
| 700 | List *args = (List *) PG_GETARG_POINTER(2); |
| 701 | int varRelid = PG_GETARG_INT32(3); |
| 702 | Oid collation = PG_GET_COLLATION(); |
| 703 | |
| 704 | /* |
| 705 | * If this is for a NOT LIKE or similar operator, get the corresponding |
| 706 | * positive-match operator and work with that. |
| 707 | */ |
| 708 | if (negate) |
| 709 | { |
| 710 | operator = get_negator(operator); |
| 711 | if (!OidIsValid(operator)) |
| 712 | elog(ERROR, "patternsel called for operator without a negator" ); |
| 713 | } |
| 714 | |
| 715 | return patternsel_common(root, |
| 716 | operator, |
| 717 | InvalidOid, |
| 718 | args, |
| 719 | varRelid, |
| 720 | collation, |
| 721 | ptype, |
| 722 | negate); |
| 723 | } |
| 724 | |
| 725 | /* |
| 726 | * regexeqsel - Selectivity of regular-expression pattern match. |
| 727 | */ |
| 728 | Datum |
| 729 | regexeqsel(PG_FUNCTION_ARGS) |
| 730 | { |
| 731 | PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false)); |
| 732 | } |
| 733 | |
| 734 | /* |
| 735 | * icregexeqsel - Selectivity of case-insensitive regex match. |
| 736 | */ |
| 737 | Datum |
| 738 | icregexeqsel(PG_FUNCTION_ARGS) |
| 739 | { |
| 740 | PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false)); |
| 741 | } |
| 742 | |
| 743 | /* |
| 744 | * likesel - Selectivity of LIKE pattern match. |
| 745 | */ |
| 746 | Datum |
| 747 | likesel(PG_FUNCTION_ARGS) |
| 748 | { |
| 749 | PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false)); |
| 750 | } |
| 751 | |
| 752 | /* |
| 753 | * prefixsel - selectivity of prefix operator |
| 754 | */ |
| 755 | Datum |
| 756 | prefixsel(PG_FUNCTION_ARGS) |
| 757 | { |
| 758 | PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false)); |
| 759 | } |
| 760 | |
| 761 | /* |
| 762 | * |
| 763 | * iclikesel - Selectivity of ILIKE pattern match. |
| 764 | */ |
| 765 | Datum |
| 766 | iclikesel(PG_FUNCTION_ARGS) |
| 767 | { |
| 768 | PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false)); |
| 769 | } |
| 770 | |
| 771 | /* |
| 772 | * regexnesel - Selectivity of regular-expression pattern non-match. |
| 773 | */ |
| 774 | Datum |
| 775 | regexnesel(PG_FUNCTION_ARGS) |
| 776 | { |
| 777 | PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true)); |
| 778 | } |
| 779 | |
| 780 | /* |
| 781 | * icregexnesel - Selectivity of case-insensitive regex non-match. |
| 782 | */ |
| 783 | Datum |
| 784 | icregexnesel(PG_FUNCTION_ARGS) |
| 785 | { |
| 786 | PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true)); |
| 787 | } |
| 788 | |
| 789 | /* |
| 790 | * nlikesel - Selectivity of LIKE pattern non-match. |
| 791 | */ |
| 792 | Datum |
| 793 | nlikesel(PG_FUNCTION_ARGS) |
| 794 | { |
| 795 | PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true)); |
| 796 | } |
| 797 | |
| 798 | /* |
| 799 | * icnlikesel - Selectivity of ILIKE pattern non-match. |
| 800 | */ |
| 801 | Datum |
| 802 | icnlikesel(PG_FUNCTION_ARGS) |
| 803 | { |
| 804 | PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true)); |
| 805 | } |
| 806 | |
| 807 | /* |
| 808 | * patternjoinsel - Generic code for pattern-match join selectivity. |
| 809 | */ |
| 810 | static double |
| 811 | patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) |
| 812 | { |
| 813 | /* For the moment we just punt. */ |
| 814 | return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL; |
| 815 | } |
| 816 | |
| 817 | /* |
| 818 | * regexeqjoinsel - Join selectivity of regular-expression pattern match. |
| 819 | */ |
| 820 | Datum |
| 821 | regexeqjoinsel(PG_FUNCTION_ARGS) |
| 822 | { |
| 823 | PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false)); |
| 824 | } |
| 825 | |
| 826 | /* |
| 827 | * icregexeqjoinsel - Join selectivity of case-insensitive regex match. |
| 828 | */ |
| 829 | Datum |
| 830 | icregexeqjoinsel(PG_FUNCTION_ARGS) |
| 831 | { |
| 832 | PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false)); |
| 833 | } |
| 834 | |
| 835 | /* |
| 836 | * likejoinsel - Join selectivity of LIKE pattern match. |
| 837 | */ |
| 838 | Datum |
| 839 | likejoinsel(PG_FUNCTION_ARGS) |
| 840 | { |
| 841 | PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false)); |
| 842 | } |
| 843 | |
| 844 | /* |
| 845 | * prefixjoinsel - Join selectivity of prefix operator |
| 846 | */ |
| 847 | Datum |
| 848 | prefixjoinsel(PG_FUNCTION_ARGS) |
| 849 | { |
| 850 | PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false)); |
| 851 | } |
| 852 | |
| 853 | /* |
| 854 | * iclikejoinsel - Join selectivity of ILIKE pattern match. |
| 855 | */ |
| 856 | Datum |
| 857 | iclikejoinsel(PG_FUNCTION_ARGS) |
| 858 | { |
| 859 | PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false)); |
| 860 | } |
| 861 | |
| 862 | /* |
| 863 | * regexnejoinsel - Join selectivity of regex non-match. |
| 864 | */ |
| 865 | Datum |
| 866 | regexnejoinsel(PG_FUNCTION_ARGS) |
| 867 | { |
| 868 | PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true)); |
| 869 | } |
| 870 | |
| 871 | /* |
| 872 | * icregexnejoinsel - Join selectivity of case-insensitive regex non-match. |
| 873 | */ |
| 874 | Datum |
| 875 | icregexnejoinsel(PG_FUNCTION_ARGS) |
| 876 | { |
| 877 | PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true)); |
| 878 | } |
| 879 | |
| 880 | /* |
| 881 | * nlikejoinsel - Join selectivity of LIKE pattern non-match. |
| 882 | */ |
| 883 | Datum |
| 884 | nlikejoinsel(PG_FUNCTION_ARGS) |
| 885 | { |
| 886 | PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true)); |
| 887 | } |
| 888 | |
| 889 | /* |
| 890 | * icnlikejoinsel - Join selectivity of ILIKE pattern non-match. |
| 891 | */ |
| 892 | Datum |
| 893 | icnlikejoinsel(PG_FUNCTION_ARGS) |
| 894 | { |
| 895 | PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true)); |
| 896 | } |
| 897 | |
| 898 | |
| 899 | /*------------------------------------------------------------------------- |
| 900 | * |
| 901 | * Pattern analysis functions |
| 902 | * |
| 903 | * These routines support analysis of LIKE and regular-expression patterns |
| 904 | * by the planner/optimizer. It's important that they agree with the |
| 905 | * regular-expression code in backend/regex/ and the LIKE code in |
| 906 | * backend/utils/adt/like.c. Also, the computation of the fixed prefix |
| 907 | * must be conservative: if we report a string longer than the true fixed |
| 908 | * prefix, the query may produce actually wrong answers, rather than just |
| 909 | * getting a bad selectivity estimate! |
| 910 | * |
| 911 | *------------------------------------------------------------------------- |
| 912 | */ |
| 913 | |
| 914 | /* |
| 915 | * Extract the fixed prefix, if any, for a pattern. |
| 916 | * |
| 917 | * *prefix is set to a palloc'd prefix string (in the form of a Const node), |
| 918 | * or to NULL if no fixed prefix exists for the pattern. |
| 919 | * If rest_selec is not NULL, *rest_selec is set to an estimate of the |
| 920 | * selectivity of the remainder of the pattern (without any fixed prefix). |
| 921 | * The prefix Const has the same type (TEXT or BYTEA) as the input pattern. |
| 922 | * |
| 923 | * The return value distinguishes no fixed prefix, a partial prefix, |
| 924 | * or an exact-match-only pattern. |
| 925 | */ |
| 926 | |
| 927 | static Pattern_Prefix_Status |
| 928 | like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, |
| 929 | Const **prefix_const, Selectivity *rest_selec) |
| 930 | { |
| 931 | char *match; |
| 932 | char *patt; |
| 933 | int pattlen; |
| 934 | Oid typeid = patt_const->consttype; |
| 935 | int pos, |
| 936 | match_pos; |
| 937 | bool is_multibyte = (pg_database_encoding_max_length() > 1); |
| 938 | pg_locale_t locale = 0; |
| 939 | bool locale_is_c = false; |
| 940 | |
| 941 | /* the right-hand const is type text or bytea */ |
| 942 | Assert(typeid == BYTEAOID || typeid == TEXTOID); |
| 943 | |
| 944 | if (case_insensitive) |
| 945 | { |
| 946 | if (typeid == BYTEAOID) |
| 947 | ereport(ERROR, |
| 948 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| 949 | errmsg("case insensitive matching not supported on type bytea" ))); |
| 950 | |
| 951 | /* If case-insensitive, we need locale info */ |
| 952 | if (lc_ctype_is_c(collation)) |
| 953 | locale_is_c = true; |
| 954 | else if (collation != DEFAULT_COLLATION_OID) |
| 955 | { |
| 956 | if (!OidIsValid(collation)) |
| 957 | { |
| 958 | /* |
| 959 | * This typically means that the parser could not resolve a |
| 960 | * conflict of implicit collations, so report it that way. |
| 961 | */ |
| 962 | ereport(ERROR, |
| 963 | (errcode(ERRCODE_INDETERMINATE_COLLATION), |
| 964 | errmsg("could not determine which collation to use for ILIKE" ), |
| 965 | errhint("Use the COLLATE clause to set the collation explicitly." ))); |
| 966 | } |
| 967 | locale = pg_newlocale_from_collation(collation); |
| 968 | } |
| 969 | } |
| 970 | |
| 971 | if (typeid != BYTEAOID) |
| 972 | { |
| 973 | patt = TextDatumGetCString(patt_const->constvalue); |
| 974 | pattlen = strlen(patt); |
| 975 | } |
| 976 | else |
| 977 | { |
| 978 | bytea *bstr = DatumGetByteaPP(patt_const->constvalue); |
| 979 | |
| 980 | pattlen = VARSIZE_ANY_EXHDR(bstr); |
| 981 | patt = (char *) palloc(pattlen); |
| 982 | memcpy(patt, VARDATA_ANY(bstr), pattlen); |
| 983 | Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue)); |
| 984 | } |
| 985 | |
| 986 | match = palloc(pattlen + 1); |
| 987 | match_pos = 0; |
| 988 | for (pos = 0; pos < pattlen; pos++) |
| 989 | { |
| 990 | /* % and _ are wildcard characters in LIKE */ |
| 991 | if (patt[pos] == '%' || |
| 992 | patt[pos] == '_') |
| 993 | break; |
| 994 | |
| 995 | /* Backslash escapes the next character */ |
| 996 | if (patt[pos] == '\\') |
| 997 | { |
| 998 | pos++; |
| 999 | if (pos >= pattlen) |
| 1000 | break; |
| 1001 | } |
| 1002 | |
| 1003 | /* Stop if case-varying character (it's sort of a wildcard) */ |
| 1004 | if (case_insensitive && |
| 1005 | pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c)) |
| 1006 | break; |
| 1007 | |
| 1008 | match[match_pos++] = patt[pos]; |
| 1009 | } |
| 1010 | |
| 1011 | match[match_pos] = '\0'; |
| 1012 | |
| 1013 | if (typeid != BYTEAOID) |
| 1014 | *prefix_const = string_to_const(match, typeid); |
| 1015 | else |
| 1016 | *prefix_const = string_to_bytea_const(match, match_pos); |
| 1017 | |
| 1018 | if (rest_selec != NULL) |
| 1019 | *rest_selec = like_selectivity(&patt[pos], pattlen - pos, |
| 1020 | case_insensitive); |
| 1021 | |
| 1022 | pfree(patt); |
| 1023 | pfree(match); |
| 1024 | |
| 1025 | /* in LIKE, an empty pattern is an exact match! */ |
| 1026 | if (pos == pattlen) |
| 1027 | return Pattern_Prefix_Exact; /* reached end of pattern, so exact */ |
| 1028 | |
| 1029 | if (match_pos > 0) |
| 1030 | return Pattern_Prefix_Partial; |
| 1031 | |
| 1032 | return Pattern_Prefix_None; |
| 1033 | } |
| 1034 | |
| 1035 | static Pattern_Prefix_Status |
| 1036 | regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, |
| 1037 | Const **prefix_const, Selectivity *rest_selec) |
| 1038 | { |
| 1039 | Oid typeid = patt_const->consttype; |
| 1040 | char *prefix; |
| 1041 | bool exact; |
| 1042 | |
| 1043 | /* |
| 1044 | * Should be unnecessary, there are no bytea regex operators defined. As |
| 1045 | * such, it should be noted that the rest of this function has *not* been |
| 1046 | * made safe for binary (possibly NULL containing) strings. |
| 1047 | */ |
| 1048 | if (typeid == BYTEAOID) |
| 1049 | ereport(ERROR, |
| 1050 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| 1051 | errmsg("regular-expression matching not supported on type bytea" ))); |
| 1052 | |
| 1053 | /* Use the regexp machinery to extract the prefix, if any */ |
| 1054 | prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue), |
| 1055 | case_insensitive, collation, |
| 1056 | &exact); |
| 1057 | |
| 1058 | if (prefix == NULL) |
| 1059 | { |
| 1060 | *prefix_const = NULL; |
| 1061 | |
| 1062 | if (rest_selec != NULL) |
| 1063 | { |
| 1064 | char *patt = TextDatumGetCString(patt_const->constvalue); |
| 1065 | |
| 1066 | *rest_selec = regex_selectivity(patt, strlen(patt), |
| 1067 | case_insensitive, |
| 1068 | 0); |
| 1069 | pfree(patt); |
| 1070 | } |
| 1071 | |
| 1072 | return Pattern_Prefix_None; |
| 1073 | } |
| 1074 | |
| 1075 | *prefix_const = string_to_const(prefix, typeid); |
| 1076 | |
| 1077 | if (rest_selec != NULL) |
| 1078 | { |
| 1079 | if (exact) |
| 1080 | { |
| 1081 | /* Exact match, so there's no additional selectivity */ |
| 1082 | *rest_selec = 1.0; |
| 1083 | } |
| 1084 | else |
| 1085 | { |
| 1086 | char *patt = TextDatumGetCString(patt_const->constvalue); |
| 1087 | |
| 1088 | *rest_selec = regex_selectivity(patt, strlen(patt), |
| 1089 | case_insensitive, |
| 1090 | strlen(prefix)); |
| 1091 | pfree(patt); |
| 1092 | } |
| 1093 | } |
| 1094 | |
| 1095 | pfree(prefix); |
| 1096 | |
| 1097 | if (exact) |
| 1098 | return Pattern_Prefix_Exact; /* pattern specifies exact match */ |
| 1099 | else |
| 1100 | return Pattern_Prefix_Partial; |
| 1101 | } |
| 1102 | |
| 1103 | static Pattern_Prefix_Status |
| 1104 | pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, |
| 1105 | Const **prefix, Selectivity *rest_selec) |
| 1106 | { |
| 1107 | Pattern_Prefix_Status result; |
| 1108 | |
| 1109 | switch (ptype) |
| 1110 | { |
| 1111 | case Pattern_Type_Like: |
| 1112 | result = like_fixed_prefix(patt, false, collation, |
| 1113 | prefix, rest_selec); |
| 1114 | break; |
| 1115 | case Pattern_Type_Like_IC: |
| 1116 | result = like_fixed_prefix(patt, true, collation, |
| 1117 | prefix, rest_selec); |
| 1118 | break; |
| 1119 | case Pattern_Type_Regex: |
| 1120 | result = regex_fixed_prefix(patt, false, collation, |
| 1121 | prefix, rest_selec); |
| 1122 | break; |
| 1123 | case Pattern_Type_Regex_IC: |
| 1124 | result = regex_fixed_prefix(patt, true, collation, |
| 1125 | prefix, rest_selec); |
| 1126 | break; |
| 1127 | case Pattern_Type_Prefix: |
| 1128 | /* Prefix type work is trivial. */ |
| 1129 | result = Pattern_Prefix_Partial; |
| 1130 | *rest_selec = 1.0; /* all */ |
| 1131 | *prefix = makeConst(patt->consttype, |
| 1132 | patt->consttypmod, |
| 1133 | patt->constcollid, |
| 1134 | patt->constlen, |
| 1135 | datumCopy(patt->constvalue, |
| 1136 | patt->constbyval, |
| 1137 | patt->constlen), |
| 1138 | patt->constisnull, |
| 1139 | patt->constbyval); |
| 1140 | break; |
| 1141 | default: |
| 1142 | elog(ERROR, "unrecognized ptype: %d" , (int) ptype); |
| 1143 | result = Pattern_Prefix_None; /* keep compiler quiet */ |
| 1144 | break; |
| 1145 | } |
| 1146 | return result; |
| 1147 | } |
| 1148 | |
| 1149 | /* |
| 1150 | * Estimate the selectivity of a fixed prefix for a pattern match. |
| 1151 | * |
| 1152 | * A fixed prefix "foo" is estimated as the selectivity of the expression |
| 1153 | * "variable >= 'foo' AND variable < 'fop'" (see also indxpath.c). |
| 1154 | * |
| 1155 | * The selectivity estimate is with respect to the portion of the column |
| 1156 | * population represented by the histogram --- the caller must fold this |
| 1157 | * together with info about MCVs and NULLs. |
| 1158 | * |
| 1159 | * We use the >= and < operators from the specified btree opfamily to do the |
| 1160 | * estimation. The given variable and Const must be of the associated |
| 1161 | * datatype. |
| 1162 | * |
| 1163 | * XXX Note: we make use of the upper bound to estimate operator selectivity |
| 1164 | * even if the locale is such that we cannot rely on the upper-bound string. |
| 1165 | * The selectivity only needs to be approximately right anyway, so it seems |
| 1166 | * more useful to use the upper-bound code than not. |
| 1167 | */ |
| 1168 | static Selectivity |
| 1169 | prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, |
| 1170 | Oid vartype, Oid opfamily, Const *prefixcon) |
| 1171 | { |
| 1172 | Selectivity prefixsel; |
| 1173 | Oid cmpopr; |
| 1174 | FmgrInfo opproc; |
| 1175 | AttStatsSlot sslot; |
| 1176 | Const *greaterstrcon; |
| 1177 | Selectivity eq_sel; |
| 1178 | |
| 1179 | cmpopr = get_opfamily_member(opfamily, vartype, vartype, |
| 1180 | BTGreaterEqualStrategyNumber); |
| 1181 | if (cmpopr == InvalidOid) |
| 1182 | elog(ERROR, "no >= operator for opfamily %u" , opfamily); |
| 1183 | fmgr_info(get_opcode(cmpopr), &opproc); |
| 1184 | |
| 1185 | prefixsel = ineq_histogram_selectivity(root, vardata, |
| 1186 | &opproc, true, true, |
| 1187 | prefixcon->constvalue, |
| 1188 | prefixcon->consttype); |
| 1189 | |
| 1190 | if (prefixsel < 0.0) |
| 1191 | { |
| 1192 | /* No histogram is present ... return a suitable default estimate */ |
| 1193 | return DEFAULT_MATCH_SEL; |
| 1194 | } |
| 1195 | |
| 1196 | /*------- |
| 1197 | * If we can create a string larger than the prefix, say |
| 1198 | * "x < greaterstr". We try to generate the string referencing the |
| 1199 | * collation of the var's statistics, but if that's not available, |
| 1200 | * use DEFAULT_COLLATION_OID. |
| 1201 | *------- |
| 1202 | */ |
| 1203 | if (HeapTupleIsValid(vardata->statsTuple) && |
| 1204 | get_attstatsslot(&sslot, vardata->statsTuple, |
| 1205 | STATISTIC_KIND_HISTOGRAM, InvalidOid, 0)) |
| 1206 | /* sslot.stacoll is set up */ ; |
| 1207 | else |
| 1208 | sslot.stacoll = DEFAULT_COLLATION_OID; |
| 1209 | cmpopr = get_opfamily_member(opfamily, vartype, vartype, |
| 1210 | BTLessStrategyNumber); |
| 1211 | if (cmpopr == InvalidOid) |
| 1212 | elog(ERROR, "no < operator for opfamily %u" , opfamily); |
| 1213 | fmgr_info(get_opcode(cmpopr), &opproc); |
| 1214 | greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll); |
| 1215 | if (greaterstrcon) |
| 1216 | { |
| 1217 | Selectivity topsel; |
| 1218 | |
| 1219 | topsel = ineq_histogram_selectivity(root, vardata, |
| 1220 | &opproc, false, false, |
| 1221 | greaterstrcon->constvalue, |
| 1222 | greaterstrcon->consttype); |
| 1223 | |
| 1224 | /* ineq_histogram_selectivity worked before, it shouldn't fail now */ |
| 1225 | Assert(topsel >= 0.0); |
| 1226 | |
| 1227 | /* |
| 1228 | * Merge the two selectivities in the same way as for a range query |
| 1229 | * (see clauselist_selectivity()). Note that we don't need to worry |
| 1230 | * about double-exclusion of nulls, since ineq_histogram_selectivity |
| 1231 | * doesn't count those anyway. |
| 1232 | */ |
| 1233 | prefixsel = topsel + prefixsel - 1.0; |
| 1234 | } |
| 1235 | |
| 1236 | /* |
| 1237 | * If the prefix is long then the two bounding values might be too close |
| 1238 | * together for the histogram to distinguish them usefully, resulting in a |
| 1239 | * zero estimate (plus or minus roundoff error). To avoid returning a |
| 1240 | * ridiculously small estimate, compute the estimated selectivity for |
| 1241 | * "variable = 'foo'", and clamp to that. (Obviously, the resultant |
| 1242 | * estimate should be at least that.) |
| 1243 | * |
| 1244 | * We apply this even if we couldn't make a greater string. That case |
| 1245 | * suggests that the prefix is near the maximum possible, and thus |
| 1246 | * probably off the end of the histogram, and thus we probably got a very |
| 1247 | * small estimate from the >= condition; so we still need to clamp. |
| 1248 | */ |
| 1249 | cmpopr = get_opfamily_member(opfamily, vartype, vartype, |
| 1250 | BTEqualStrategyNumber); |
| 1251 | if (cmpopr == InvalidOid) |
| 1252 | elog(ERROR, "no = operator for opfamily %u" , opfamily); |
| 1253 | eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue, |
| 1254 | false, true, false); |
| 1255 | |
| 1256 | prefixsel = Max(prefixsel, eq_sel); |
| 1257 | |
| 1258 | return prefixsel; |
| 1259 | } |
| 1260 | |
| 1261 | |
| 1262 | /* |
| 1263 | * Estimate the selectivity of a pattern of the specified type. |
| 1264 | * Note that any fixed prefix of the pattern will have been removed already, |
| 1265 | * so actually we may be looking at just a fragment of the pattern. |
| 1266 | * |
| 1267 | * For now, we use a very simplistic approach: fixed characters reduce the |
| 1268 | * selectivity a good deal, character ranges reduce it a little, |
| 1269 | * wildcards (such as % for LIKE or .* for regex) increase it. |
| 1270 | */ |
| 1271 | |
| 1272 | #define FIXED_CHAR_SEL 0.20 /* about 1/5 */ |
| 1273 | #define CHAR_RANGE_SEL 0.25 |
| 1274 | #define ANY_CHAR_SEL 0.9 /* not 1, since it won't match end-of-string */ |
| 1275 | #define FULL_WILDCARD_SEL 5.0 |
| 1276 | #define PARTIAL_WILDCARD_SEL 2.0 |
| 1277 | |
| 1278 | static Selectivity |
| 1279 | like_selectivity(const char *patt, int pattlen, bool case_insensitive) |
| 1280 | { |
| 1281 | Selectivity sel = 1.0; |
| 1282 | int pos; |
| 1283 | |
| 1284 | /* Skip any leading wildcard; it's already factored into initial sel */ |
| 1285 | for (pos = 0; pos < pattlen; pos++) |
| 1286 | { |
| 1287 | if (patt[pos] != '%' && patt[pos] != '_') |
| 1288 | break; |
| 1289 | } |
| 1290 | |
| 1291 | for (; pos < pattlen; pos++) |
| 1292 | { |
| 1293 | /* % and _ are wildcard characters in LIKE */ |
| 1294 | if (patt[pos] == '%') |
| 1295 | sel *= FULL_WILDCARD_SEL; |
| 1296 | else if (patt[pos] == '_') |
| 1297 | sel *= ANY_CHAR_SEL; |
| 1298 | else if (patt[pos] == '\\') |
| 1299 | { |
| 1300 | /* Backslash quotes the next character */ |
| 1301 | pos++; |
| 1302 | if (pos >= pattlen) |
| 1303 | break; |
| 1304 | sel *= FIXED_CHAR_SEL; |
| 1305 | } |
| 1306 | else |
| 1307 | sel *= FIXED_CHAR_SEL; |
| 1308 | } |
| 1309 | /* Could get sel > 1 if multiple wildcards */ |
| 1310 | if (sel > 1.0) |
| 1311 | sel = 1.0; |
| 1312 | return sel; |
| 1313 | } |
| 1314 | |
| 1315 | static Selectivity |
| 1316 | regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive) |
| 1317 | { |
| 1318 | Selectivity sel = 1.0; |
| 1319 | int paren_depth = 0; |
| 1320 | int paren_pos = 0; /* dummy init to keep compiler quiet */ |
| 1321 | int pos; |
| 1322 | |
| 1323 | for (pos = 0; pos < pattlen; pos++) |
| 1324 | { |
| 1325 | if (patt[pos] == '(') |
| 1326 | { |
| 1327 | if (paren_depth == 0) |
| 1328 | paren_pos = pos; /* remember start of parenthesized item */ |
| 1329 | paren_depth++; |
| 1330 | } |
| 1331 | else if (patt[pos] == ')' && paren_depth > 0) |
| 1332 | { |
| 1333 | paren_depth--; |
| 1334 | if (paren_depth == 0) |
| 1335 | sel *= regex_selectivity_sub(patt + (paren_pos + 1), |
| 1336 | pos - (paren_pos + 1), |
| 1337 | case_insensitive); |
| 1338 | } |
| 1339 | else if (patt[pos] == '|' && paren_depth == 0) |
| 1340 | { |
| 1341 | /* |
| 1342 | * If unquoted | is present at paren level 0 in pattern, we have |
| 1343 | * multiple alternatives; sum their probabilities. |
| 1344 | */ |
| 1345 | sel += regex_selectivity_sub(patt + (pos + 1), |
| 1346 | pattlen - (pos + 1), |
| 1347 | case_insensitive); |
| 1348 | break; /* rest of pattern is now processed */ |
| 1349 | } |
| 1350 | else if (patt[pos] == '[') |
| 1351 | { |
| 1352 | bool negclass = false; |
| 1353 | |
| 1354 | if (patt[++pos] == '^') |
| 1355 | { |
| 1356 | negclass = true; |
| 1357 | pos++; |
| 1358 | } |
| 1359 | if (patt[pos] == ']') /* ']' at start of class is not special */ |
| 1360 | pos++; |
| 1361 | while (pos < pattlen && patt[pos] != ']') |
| 1362 | pos++; |
| 1363 | if (paren_depth == 0) |
| 1364 | sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL); |
| 1365 | } |
| 1366 | else if (patt[pos] == '.') |
| 1367 | { |
| 1368 | if (paren_depth == 0) |
| 1369 | sel *= ANY_CHAR_SEL; |
| 1370 | } |
| 1371 | else if (patt[pos] == '*' || |
| 1372 | patt[pos] == '?' || |
| 1373 | patt[pos] == '+') |
| 1374 | { |
| 1375 | /* Ought to be smarter about quantifiers... */ |
| 1376 | if (paren_depth == 0) |
| 1377 | sel *= PARTIAL_WILDCARD_SEL; |
| 1378 | } |
| 1379 | else if (patt[pos] == '{') |
| 1380 | { |
| 1381 | while (pos < pattlen && patt[pos] != '}') |
| 1382 | pos++; |
| 1383 | if (paren_depth == 0) |
| 1384 | sel *= PARTIAL_WILDCARD_SEL; |
| 1385 | } |
| 1386 | else if (patt[pos] == '\\') |
| 1387 | { |
| 1388 | /* backslash quotes the next character */ |
| 1389 | pos++; |
| 1390 | if (pos >= pattlen) |
| 1391 | break; |
| 1392 | if (paren_depth == 0) |
| 1393 | sel *= FIXED_CHAR_SEL; |
| 1394 | } |
| 1395 | else |
| 1396 | { |
| 1397 | if (paren_depth == 0) |
| 1398 | sel *= FIXED_CHAR_SEL; |
| 1399 | } |
| 1400 | } |
| 1401 | /* Could get sel > 1 if multiple wildcards */ |
| 1402 | if (sel > 1.0) |
| 1403 | sel = 1.0; |
| 1404 | return sel; |
| 1405 | } |
| 1406 | |
| 1407 | static Selectivity |
| 1408 | regex_selectivity(const char *patt, int pattlen, bool case_insensitive, |
| 1409 | int fixed_prefix_len) |
| 1410 | { |
| 1411 | Selectivity sel; |
| 1412 | |
| 1413 | /* If patt doesn't end with $, consider it to have a trailing wildcard */ |
| 1414 | if (pattlen > 0 && patt[pattlen - 1] == '$' && |
| 1415 | (pattlen == 1 || patt[pattlen - 2] != '\\')) |
| 1416 | { |
| 1417 | /* has trailing $ */ |
| 1418 | sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive); |
| 1419 | } |
| 1420 | else |
| 1421 | { |
| 1422 | /* no trailing $ */ |
| 1423 | sel = regex_selectivity_sub(patt, pattlen, case_insensitive); |
| 1424 | sel *= FULL_WILDCARD_SEL; |
| 1425 | } |
| 1426 | |
| 1427 | /* If there's a fixed prefix, discount its selectivity */ |
| 1428 | if (fixed_prefix_len > 0) |
| 1429 | sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len); |
| 1430 | |
| 1431 | /* Make sure result stays in range */ |
| 1432 | CLAMP_PROBABILITY(sel); |
| 1433 | return sel; |
| 1434 | } |
| 1435 | |
| 1436 | /* |
| 1437 | * Check whether char is a letter (and, hence, subject to case-folding) |
| 1438 | * |
| 1439 | * In multibyte character sets or with ICU, we can't use isalpha, and it does |
| 1440 | * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha. |
| 1441 | * Instead, just assume any non-ASCII char is potentially case-varying, and |
| 1442 | * hard-wire knowledge of which ASCII chars are letters. |
| 1443 | */ |
| 1444 | static int |
| 1445 | pattern_char_isalpha(char c, bool is_multibyte, |
| 1446 | pg_locale_t locale, bool locale_is_c) |
| 1447 | { |
| 1448 | if (locale_is_c) |
| 1449 | return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); |
| 1450 | else if (is_multibyte && IS_HIGHBIT_SET(c)) |
| 1451 | return true; |
| 1452 | else if (locale && locale->provider == COLLPROVIDER_ICU) |
| 1453 | return IS_HIGHBIT_SET(c) || |
| 1454 | (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); |
| 1455 | #ifdef HAVE_LOCALE_T |
| 1456 | else if (locale && locale->provider == COLLPROVIDER_LIBC) |
| 1457 | return isalpha_l((unsigned char) c, locale->info.lt); |
| 1458 | #endif |
| 1459 | else |
| 1460 | return isalpha((unsigned char) c); |
| 1461 | } |
| 1462 | |
| 1463 | |
| 1464 | /* |
| 1465 | * For bytea, the increment function need only increment the current byte |
| 1466 | * (there are no multibyte characters to worry about). |
| 1467 | */ |
| 1468 | static bool |
| 1469 | byte_increment(unsigned char *ptr, int len) |
| 1470 | { |
| 1471 | if (*ptr >= 255) |
| 1472 | return false; |
| 1473 | (*ptr)++; |
| 1474 | return true; |
| 1475 | } |
| 1476 | |
| 1477 | /* |
| 1478 | * Try to generate a string greater than the given string or any |
| 1479 | * string it is a prefix of. If successful, return a palloc'd string |
| 1480 | * in the form of a Const node; else return NULL. |
| 1481 | * |
| 1482 | * The caller must provide the appropriate "less than" comparison function |
| 1483 | * for testing the strings, along with the collation to use. |
| 1484 | * |
| 1485 | * The key requirement here is that given a prefix string, say "foo", |
| 1486 | * we must be able to generate another string "fop" that is greater than |
| 1487 | * all strings "foobar" starting with "foo". We can test that we have |
| 1488 | * generated a string greater than the prefix string, but in non-C collations |
| 1489 | * that is not a bulletproof guarantee that an extension of the string might |
| 1490 | * not sort after it; an example is that "foo " is less than "foo!", but it |
| 1491 | * is not clear that a "dictionary" sort ordering will consider "foo!" less |
| 1492 | * than "foo bar". CAUTION: Therefore, this function should be used only for |
| 1493 | * estimation purposes when working in a non-C collation. |
| 1494 | * |
| 1495 | * To try to catch most cases where an extended string might otherwise sort |
| 1496 | * before the result value, we determine which of the strings "Z", "z", "y", |
| 1497 | * and "9" is seen as largest by the collation, and append that to the given |
| 1498 | * prefix before trying to find a string that compares as larger. |
| 1499 | * |
| 1500 | * To search for a greater string, we repeatedly "increment" the rightmost |
| 1501 | * character, using an encoding-specific character incrementer function. |
| 1502 | * When it's no longer possible to increment the last character, we truncate |
| 1503 | * off that character and start incrementing the next-to-rightmost. |
| 1504 | * For example, if "z" were the last character in the sort order, then we |
| 1505 | * could produce "foo" as a string greater than "fonz". |
| 1506 | * |
| 1507 | * This could be rather slow in the worst case, but in most cases we |
| 1508 | * won't have to try more than one or two strings before succeeding. |
| 1509 | * |
| 1510 | * Note that it's important for the character incrementer not to be too anal |
| 1511 | * about producing every possible character code, since in some cases the only |
| 1512 | * way to get a larger string is to increment a previous character position. |
| 1513 | * So we don't want to spend too much time trying every possible character |
| 1514 | * code at the last position. A good rule of thumb is to be sure that we |
| 1515 | * don't try more than 256*K values for a K-byte character (and definitely |
| 1516 | * not 256^K, which is what an exhaustive search would approach). |
| 1517 | */ |
| 1518 | static Const * |
| 1519 | make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation) |
| 1520 | { |
| 1521 | Oid datatype = str_const->consttype; |
| 1522 | char *workstr; |
| 1523 | int len; |
| 1524 | Datum cmpstr; |
| 1525 | char *cmptxt = NULL; |
| 1526 | mbcharacter_incrementer charinc; |
| 1527 | |
| 1528 | /* |
| 1529 | * Get a modifiable copy of the prefix string in C-string format, and set |
| 1530 | * up the string we will compare to as a Datum. In C locale this can just |
| 1531 | * be the given prefix string, otherwise we need to add a suffix. Type |
| 1532 | * BYTEA sorts bytewise so it never needs a suffix either. |
| 1533 | */ |
| 1534 | if (datatype == BYTEAOID) |
| 1535 | { |
| 1536 | bytea *bstr = DatumGetByteaPP(str_const->constvalue); |
| 1537 | |
| 1538 | len = VARSIZE_ANY_EXHDR(bstr); |
| 1539 | workstr = (char *) palloc(len); |
| 1540 | memcpy(workstr, VARDATA_ANY(bstr), len); |
| 1541 | Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue)); |
| 1542 | cmpstr = str_const->constvalue; |
| 1543 | } |
| 1544 | else |
| 1545 | { |
| 1546 | if (datatype == NAMEOID) |
| 1547 | workstr = DatumGetCString(DirectFunctionCall1(nameout, |
| 1548 | str_const->constvalue)); |
| 1549 | else |
| 1550 | workstr = TextDatumGetCString(str_const->constvalue); |
| 1551 | len = strlen(workstr); |
| 1552 | if (lc_collate_is_c(collation) || len == 0) |
| 1553 | cmpstr = str_const->constvalue; |
| 1554 | else |
| 1555 | { |
| 1556 | /* If first time through, determine the suffix to use */ |
| 1557 | static char suffixchar = 0; |
| 1558 | static Oid suffixcollation = 0; |
| 1559 | |
| 1560 | if (!suffixchar || suffixcollation != collation) |
| 1561 | { |
| 1562 | char *best; |
| 1563 | |
| 1564 | best = "Z" ; |
| 1565 | if (varstr_cmp(best, 1, "z" , 1, collation) < 0) |
| 1566 | best = "z" ; |
| 1567 | if (varstr_cmp(best, 1, "y" , 1, collation) < 0) |
| 1568 | best = "y" ; |
| 1569 | if (varstr_cmp(best, 1, "9" , 1, collation) < 0) |
| 1570 | best = "9" ; |
| 1571 | suffixchar = *best; |
| 1572 | suffixcollation = collation; |
| 1573 | } |
| 1574 | |
| 1575 | /* And build the string to compare to */ |
| 1576 | if (datatype == NAMEOID) |
| 1577 | { |
| 1578 | cmptxt = palloc(len + 2); |
| 1579 | memcpy(cmptxt, workstr, len); |
| 1580 | cmptxt[len] = suffixchar; |
| 1581 | cmptxt[len + 1] = '\0'; |
| 1582 | cmpstr = PointerGetDatum(cmptxt); |
| 1583 | } |
| 1584 | else |
| 1585 | { |
| 1586 | cmptxt = palloc(VARHDRSZ + len + 1); |
| 1587 | SET_VARSIZE(cmptxt, VARHDRSZ + len + 1); |
| 1588 | memcpy(VARDATA(cmptxt), workstr, len); |
| 1589 | *(VARDATA(cmptxt) + len) = suffixchar; |
| 1590 | cmpstr = PointerGetDatum(cmptxt); |
| 1591 | } |
| 1592 | } |
| 1593 | } |
| 1594 | |
| 1595 | /* Select appropriate character-incrementer function */ |
| 1596 | if (datatype == BYTEAOID) |
| 1597 | charinc = byte_increment; |
| 1598 | else |
| 1599 | charinc = pg_database_encoding_character_incrementer(); |
| 1600 | |
| 1601 | /* And search ... */ |
| 1602 | while (len > 0) |
| 1603 | { |
| 1604 | int charlen; |
| 1605 | unsigned char *lastchar; |
| 1606 | |
| 1607 | /* Identify the last character --- for bytea, just the last byte */ |
| 1608 | if (datatype == BYTEAOID) |
| 1609 | charlen = 1; |
| 1610 | else |
| 1611 | charlen = len - pg_mbcliplen(workstr, len, len - 1); |
| 1612 | lastchar = (unsigned char *) (workstr + len - charlen); |
| 1613 | |
| 1614 | /* |
| 1615 | * Try to generate a larger string by incrementing the last character |
| 1616 | * (for BYTEA, we treat each byte as a character). |
| 1617 | * |
| 1618 | * Note: the incrementer function is expected to return true if it's |
| 1619 | * generated a valid-per-the-encoding new character, otherwise false. |
| 1620 | * The contents of the character on false return are unspecified. |
| 1621 | */ |
| 1622 | while (charinc(lastchar, charlen)) |
| 1623 | { |
| 1624 | Const *workstr_const; |
| 1625 | |
| 1626 | if (datatype == BYTEAOID) |
| 1627 | workstr_const = string_to_bytea_const(workstr, len); |
| 1628 | else |
| 1629 | workstr_const = string_to_const(workstr, datatype); |
| 1630 | |
| 1631 | if (DatumGetBool(FunctionCall2Coll(ltproc, |
| 1632 | collation, |
| 1633 | cmpstr, |
| 1634 | workstr_const->constvalue))) |
| 1635 | { |
| 1636 | /* Successfully made a string larger than cmpstr */ |
| 1637 | if (cmptxt) |
| 1638 | pfree(cmptxt); |
| 1639 | pfree(workstr); |
| 1640 | return workstr_const; |
| 1641 | } |
| 1642 | |
| 1643 | /* No good, release unusable value and try again */ |
| 1644 | pfree(DatumGetPointer(workstr_const->constvalue)); |
| 1645 | pfree(workstr_const); |
| 1646 | } |
| 1647 | |
| 1648 | /* |
| 1649 | * No luck here, so truncate off the last character and try to |
| 1650 | * increment the next one. |
| 1651 | */ |
| 1652 | len -= charlen; |
| 1653 | workstr[len] = '\0'; |
| 1654 | } |
| 1655 | |
| 1656 | /* Failed... */ |
| 1657 | if (cmptxt) |
| 1658 | pfree(cmptxt); |
| 1659 | pfree(workstr); |
| 1660 | |
| 1661 | return NULL; |
| 1662 | } |
| 1663 | |
| 1664 | /* |
| 1665 | * Generate a Datum of the appropriate type from a C string. |
| 1666 | * Note that all of the supported types are pass-by-ref, so the |
| 1667 | * returned value should be pfree'd if no longer needed. |
| 1668 | */ |
| 1669 | static Datum |
| 1670 | string_to_datum(const char *str, Oid datatype) |
| 1671 | { |
| 1672 | Assert(str != NULL); |
| 1673 | |
| 1674 | /* |
| 1675 | * We cheat a little by assuming that CStringGetTextDatum() will do for |
| 1676 | * bpchar and varchar constants too... |
| 1677 | */ |
| 1678 | if (datatype == NAMEOID) |
| 1679 | return DirectFunctionCall1(namein, CStringGetDatum(str)); |
| 1680 | else if (datatype == BYTEAOID) |
| 1681 | return DirectFunctionCall1(byteain, CStringGetDatum(str)); |
| 1682 | else |
| 1683 | return CStringGetTextDatum(str); |
| 1684 | } |
| 1685 | |
| 1686 | /* |
| 1687 | * Generate a Const node of the appropriate type from a C string. |
| 1688 | */ |
| 1689 | static Const * |
| 1690 | string_to_const(const char *str, Oid datatype) |
| 1691 | { |
| 1692 | Datum conval = string_to_datum(str, datatype); |
| 1693 | Oid collation; |
| 1694 | int constlen; |
| 1695 | |
| 1696 | /* |
| 1697 | * We only need to support a few datatypes here, so hard-wire properties |
| 1698 | * instead of incurring the expense of catalog lookups. |
| 1699 | */ |
| 1700 | switch (datatype) |
| 1701 | { |
| 1702 | case TEXTOID: |
| 1703 | case VARCHAROID: |
| 1704 | case BPCHAROID: |
| 1705 | collation = DEFAULT_COLLATION_OID; |
| 1706 | constlen = -1; |
| 1707 | break; |
| 1708 | |
| 1709 | case NAMEOID: |
| 1710 | collation = C_COLLATION_OID; |
| 1711 | constlen = NAMEDATALEN; |
| 1712 | break; |
| 1713 | |
| 1714 | case BYTEAOID: |
| 1715 | collation = InvalidOid; |
| 1716 | constlen = -1; |
| 1717 | break; |
| 1718 | |
| 1719 | default: |
| 1720 | elog(ERROR, "unexpected datatype in string_to_const: %u" , |
| 1721 | datatype); |
| 1722 | return NULL; |
| 1723 | } |
| 1724 | |
| 1725 | return makeConst(datatype, -1, collation, constlen, |
| 1726 | conval, false, false); |
| 1727 | } |
| 1728 | |
| 1729 | /* |
| 1730 | * Generate a Const node of bytea type from a binary C string and a length. |
| 1731 | */ |
| 1732 | static Const * |
| 1733 | string_to_bytea_const(const char *str, size_t str_len) |
| 1734 | { |
| 1735 | bytea *bstr = palloc(VARHDRSZ + str_len); |
| 1736 | Datum conval; |
| 1737 | |
| 1738 | memcpy(VARDATA(bstr), str, str_len); |
| 1739 | SET_VARSIZE(bstr, VARHDRSZ + str_len); |
| 1740 | conval = PointerGetDatum(bstr); |
| 1741 | |
| 1742 | return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false); |
| 1743 | } |
| 1744 | |