| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * pgstrcasecmp.c |
| 4 | * Portable SQL-like case-independent comparisons and conversions. |
| 5 | * |
| 6 | * SQL99 specifies Unicode-aware case normalization, which we don't yet |
| 7 | * have the infrastructure for. Instead we use tolower() to provide a |
| 8 | * locale-aware translation. However, there are some locales where this |
| 9 | * is not right either (eg, Turkish may do strange things with 'i' and |
| 10 | * 'I'). Our current compromise is to use tolower() for characters with |
| 11 | * the high bit set, and use an ASCII-only downcasing for 7-bit |
| 12 | * characters. |
| 13 | * |
| 14 | * NB: this code should match downcase_truncate_identifier() in scansup.c. |
| 15 | * |
| 16 | * We also provide strict ASCII-only case conversion functions, which can |
| 17 | * be used to implement C/POSIX case folding semantics no matter what the |
| 18 | * C library thinks the locale is. |
| 19 | * |
| 20 | * |
| 21 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 22 | * |
| 23 | * src/port/pgstrcasecmp.c |
| 24 | * |
| 25 | *------------------------------------------------------------------------- |
| 26 | */ |
| 27 | #include "c.h" |
| 28 | |
| 29 | #include <ctype.h> |
| 30 | |
| 31 | |
| 32 | /* |
| 33 | * Case-independent comparison of two null-terminated strings. |
| 34 | */ |
| 35 | int |
| 36 | pg_strcasecmp(const char *s1, const char *s2) |
| 37 | { |
| 38 | for (;;) |
| 39 | { |
| 40 | unsigned char ch1 = (unsigned char) *s1++; |
| 41 | unsigned char ch2 = (unsigned char) *s2++; |
| 42 | |
| 43 | if (ch1 != ch2) |
| 44 | { |
| 45 | if (ch1 >= 'A' && ch1 <= 'Z') |
| 46 | ch1 += 'a' - 'A'; |
| 47 | else if (IS_HIGHBIT_SET(ch1) && isupper(ch1)) |
| 48 | ch1 = tolower(ch1); |
| 49 | |
| 50 | if (ch2 >= 'A' && ch2 <= 'Z') |
| 51 | ch2 += 'a' - 'A'; |
| 52 | else if (IS_HIGHBIT_SET(ch2) && isupper(ch2)) |
| 53 | ch2 = tolower(ch2); |
| 54 | |
| 55 | if (ch1 != ch2) |
| 56 | return (int) ch1 - (int) ch2; |
| 57 | } |
| 58 | if (ch1 == 0) |
| 59 | break; |
| 60 | } |
| 61 | return 0; |
| 62 | } |
| 63 | |
| 64 | /* |
| 65 | * Case-independent comparison of two not-necessarily-null-terminated strings. |
| 66 | * At most n bytes will be examined from each string. |
| 67 | */ |
| 68 | int |
| 69 | pg_strncasecmp(const char *s1, const char *s2, size_t n) |
| 70 | { |
| 71 | while (n-- > 0) |
| 72 | { |
| 73 | unsigned char ch1 = (unsigned char) *s1++; |
| 74 | unsigned char ch2 = (unsigned char) *s2++; |
| 75 | |
| 76 | if (ch1 != ch2) |
| 77 | { |
| 78 | if (ch1 >= 'A' && ch1 <= 'Z') |
| 79 | ch1 += 'a' - 'A'; |
| 80 | else if (IS_HIGHBIT_SET(ch1) && isupper(ch1)) |
| 81 | ch1 = tolower(ch1); |
| 82 | |
| 83 | if (ch2 >= 'A' && ch2 <= 'Z') |
| 84 | ch2 += 'a' - 'A'; |
| 85 | else if (IS_HIGHBIT_SET(ch2) && isupper(ch2)) |
| 86 | ch2 = tolower(ch2); |
| 87 | |
| 88 | if (ch1 != ch2) |
| 89 | return (int) ch1 - (int) ch2; |
| 90 | } |
| 91 | if (ch1 == 0) |
| 92 | break; |
| 93 | } |
| 94 | return 0; |
| 95 | } |
| 96 | |
| 97 | /* |
| 98 | * Fold a character to upper case. |
| 99 | * |
| 100 | * Unlike some versions of toupper(), this is safe to apply to characters |
| 101 | * that aren't lower case letters. Note however that the whole thing is |
| 102 | * a bit bogus for multibyte character sets. |
| 103 | */ |
| 104 | unsigned char |
| 105 | pg_toupper(unsigned char ch) |
| 106 | { |
| 107 | if (ch >= 'a' && ch <= 'z') |
| 108 | ch += 'A' - 'a'; |
| 109 | else if (IS_HIGHBIT_SET(ch) && islower(ch)) |
| 110 | ch = toupper(ch); |
| 111 | return ch; |
| 112 | } |
| 113 | |
| 114 | /* |
| 115 | * Fold a character to lower case. |
| 116 | * |
| 117 | * Unlike some versions of tolower(), this is safe to apply to characters |
| 118 | * that aren't upper case letters. Note however that the whole thing is |
| 119 | * a bit bogus for multibyte character sets. |
| 120 | */ |
| 121 | unsigned char |
| 122 | pg_tolower(unsigned char ch) |
| 123 | { |
| 124 | if (ch >= 'A' && ch <= 'Z') |
| 125 | ch += 'a' - 'A'; |
| 126 | else if (IS_HIGHBIT_SET(ch) && isupper(ch)) |
| 127 | ch = tolower(ch); |
| 128 | return ch; |
| 129 | } |
| 130 | |
| 131 | /* |
| 132 | * Fold a character to upper case, following C/POSIX locale rules. |
| 133 | */ |
| 134 | unsigned char |
| 135 | pg_ascii_toupper(unsigned char ch) |
| 136 | { |
| 137 | if (ch >= 'a' && ch <= 'z') |
| 138 | ch += 'A' - 'a'; |
| 139 | return ch; |
| 140 | } |
| 141 | |
| 142 | /* |
| 143 | * Fold a character to lower case, following C/POSIX locale rules. |
| 144 | */ |
| 145 | unsigned char |
| 146 | pg_ascii_tolower(unsigned char ch) |
| 147 | { |
| 148 | if (ch >= 'A' && ch <= 'Z') |
| 149 | ch += 'a' - 'A'; |
| 150 | return ch; |
| 151 | } |
| 152 | |