| 1 | /* Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. |
| 2 | |
| 3 | This program is free software; you can redistribute it and/or modify |
| 4 | it under the terms of the GNU General Public License as published by |
| 5 | the Free Software Foundation; version 2 of the License. |
| 6 | |
| 7 | This program is distributed in the hope that it will be useful, |
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | GNU General Public License for more details. |
| 11 | |
| 12 | You should have received a copy of the GNU General Public License |
| 13 | along with this program; if not, write to the Free Software |
| 14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ |
| 15 | |
| 16 | /* Some useful string utility functions used by the MySQL server */ |
| 17 | |
| 18 | #include "mariadb.h" |
| 19 | #include "sql_priv.h" |
| 20 | #include "unireg.h" |
| 21 | #include "strfunc.h" |
| 22 | #include "sql_class.h" |
| 23 | #include "typelib.h" // TYPELIB |
| 24 | #include "m_ctype.h" // my_charset_latin1 |
| 25 | #include "mysqld.h" // system_charset_info |
| 26 | |
| 27 | /* |
| 28 | Return bitmap for strings used in a set |
| 29 | |
| 30 | SYNOPSIS |
| 31 | find_set() |
| 32 | lib Strings in set |
| 33 | str Strings of set-strings separated by ',' |
| 34 | err_pos If error, set to point to start of wrong set string |
| 35 | err_len If error, set to the length of wrong set string |
| 36 | set_warning Set to 1 if some string in set couldn't be used |
| 37 | |
| 38 | NOTE |
| 39 | We delete all end space from str before comparison |
| 40 | |
| 41 | RETURN |
| 42 | bitmap of all sets found in x. |
| 43 | set_warning is set to 1 if there was any sets that couldn't be set |
| 44 | */ |
| 45 | |
| 46 | static const char field_separator=','; |
| 47 | |
| 48 | ulonglong find_set(TYPELIB *lib, const char *str, size_t length, CHARSET_INFO *cs, |
| 49 | char **err_pos, uint *err_len, bool *set_warning) |
| 50 | { |
| 51 | CHARSET_INFO *strip= cs ? cs : &my_charset_latin1; |
| 52 | const char *end= str + strip->cset->lengthsp(strip, str, length); |
| 53 | ulonglong found= 0; |
| 54 | *err_pos= 0; // No error yet |
| 55 | *err_len= 0; |
| 56 | if (str != end) |
| 57 | { |
| 58 | const char *start= str; |
| 59 | for (;;) |
| 60 | { |
| 61 | const char *pos= start; |
| 62 | uint var_len; |
| 63 | int mblen= 1; |
| 64 | |
| 65 | if (cs && cs->mbminlen > 1) |
| 66 | { |
| 67 | for ( ; pos < end; pos+= mblen) |
| 68 | { |
| 69 | my_wc_t wc; |
| 70 | if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos, |
| 71 | (const uchar *) end)) < 1) |
| 72 | mblen= 1; // Not to hang on a wrong multibyte sequence |
| 73 | if (wc == (my_wc_t) field_separator) |
| 74 | break; |
| 75 | } |
| 76 | } |
| 77 | else |
| 78 | for (; pos != end && *pos != field_separator; pos++) ; |
| 79 | var_len= (uint) (pos - start); |
| 80 | uint find= cs ? find_type2(lib, start, var_len, cs) : |
| 81 | find_type(lib, start, var_len, (bool) 0); |
| 82 | if (unlikely(!find && *err_len == 0)) |
| 83 | { |
| 84 | // report the first error with length > 0 |
| 85 | *err_pos= (char*) start; |
| 86 | *err_len= var_len; |
| 87 | *set_warning= 1; |
| 88 | } |
| 89 | else |
| 90 | found|= 1ULL << (find - 1); |
| 91 | if (pos >= end) |
| 92 | break; |
| 93 | start= pos + mblen; |
| 94 | } |
| 95 | } |
| 96 | return found; |
| 97 | } |
| 98 | |
| 99 | /* |
| 100 | Function to find a string in a TYPELIB |
| 101 | (similar to find_type() of mysys/typelib.c) |
| 102 | |
| 103 | SYNOPSIS |
| 104 | find_type() |
| 105 | lib TYPELIB (struct of pointer to values + count) |
| 106 | find String to find |
| 107 | length Length of string to find |
| 108 | part_match Allow part matching of value |
| 109 | |
| 110 | RETURN |
| 111 | 0 error |
| 112 | > 0 position in TYPELIB->type_names +1 |
| 113 | */ |
| 114 | |
| 115 | uint find_type(const TYPELIB *lib, const char *find, size_t length, |
| 116 | bool part_match) |
| 117 | { |
| 118 | uint found_count=0, found_pos=0; |
| 119 | const char *end= find+length; |
| 120 | const char *i; |
| 121 | const char *j; |
| 122 | for (uint pos=0 ; (j=lib->type_names[pos++]) ; ) |
| 123 | { |
| 124 | for (i=find ; i != end && |
| 125 | my_toupper(system_charset_info,*i) == |
| 126 | my_toupper(system_charset_info,*j) ; i++, j++) ; |
| 127 | if (i == end) |
| 128 | { |
| 129 | if (! *j) |
| 130 | return(pos); |
| 131 | found_count++; |
| 132 | found_pos= pos; |
| 133 | } |
| 134 | } |
| 135 | return(found_count == 1 && part_match ? found_pos : 0); |
| 136 | } |
| 137 | |
| 138 | |
| 139 | /* |
| 140 | Find a string in a list of strings according to collation |
| 141 | |
| 142 | SYNOPSIS |
| 143 | find_type2() |
| 144 | lib TYPELIB (struct of pointer to values + count) |
| 145 | x String to find |
| 146 | length String length |
| 147 | cs Character set + collation to use for comparison |
| 148 | |
| 149 | NOTES |
| 150 | |
| 151 | RETURN |
| 152 | 0 No matching value |
| 153 | >0 Offset+1 in typelib for matched string |
| 154 | */ |
| 155 | |
| 156 | uint find_type2(const TYPELIB *typelib, const char *x, size_t length, |
| 157 | CHARSET_INFO *cs) |
| 158 | { |
| 159 | int pos; |
| 160 | const char *j; |
| 161 | DBUG_ENTER("find_type2" ); |
| 162 | DBUG_PRINT("enter" ,("x: '%.*s' lib: %p" , (int)length, x, typelib)); |
| 163 | |
| 164 | if (!typelib->count) |
| 165 | { |
| 166 | DBUG_PRINT("exit" ,("no count" )); |
| 167 | DBUG_RETURN(0); |
| 168 | } |
| 169 | |
| 170 | for (pos=0 ; (j=typelib->type_names[pos]) ; pos++) |
| 171 | { |
| 172 | if (!my_strnncoll(cs, (const uchar*) x, length, |
| 173 | (const uchar*) j, typelib->type_lengths[pos])) |
| 174 | DBUG_RETURN(pos+1); |
| 175 | } |
| 176 | DBUG_PRINT("exit" ,("Couldn't find type" )); |
| 177 | DBUG_RETURN(0); |
| 178 | } /* find_type */ |
| 179 | |
| 180 | |
| 181 | /* |
| 182 | Un-hex all elements in a typelib |
| 183 | |
| 184 | SYNOPSIS |
| 185 | unhex_type2() |
| 186 | interval TYPELIB (struct of pointer to values + lengths + count) |
| 187 | |
| 188 | NOTES |
| 189 | |
| 190 | RETURN |
| 191 | N/A |
| 192 | */ |
| 193 | |
| 194 | void unhex_type2(TYPELIB *interval) |
| 195 | { |
| 196 | for (uint pos= 0; pos < interval->count; pos++) |
| 197 | { |
| 198 | char *from, *to; |
| 199 | for (from= to= (char*) interval->type_names[pos]; *from; ) |
| 200 | { |
| 201 | /* |
| 202 | Note, hexchar_to_int(*from++) doesn't work |
| 203 | one some compilers, e.g. IRIX. Looks like a compiler |
| 204 | bug in inline functions in combination with arguments |
| 205 | that have a side effect. So, let's use from[0] and from[1] |
| 206 | and increment 'from' by two later. |
| 207 | */ |
| 208 | |
| 209 | *to++= (char) (hexchar_to_int(from[0]) << 4) + |
| 210 | hexchar_to_int(from[1]); |
| 211 | from+= 2; |
| 212 | } |
| 213 | interval->type_lengths[pos] /= 2; |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | |
| 218 | /* |
| 219 | Check if the first word in a string is one of the ones in TYPELIB |
| 220 | |
| 221 | SYNOPSIS |
| 222 | check_word() |
| 223 | lib TYPELIB |
| 224 | val String to check |
| 225 | end End of input |
| 226 | end_of_word Store value of last used byte here if we found word |
| 227 | |
| 228 | RETURN |
| 229 | 0 No matching value |
| 230 | > 1 lib->type_names[#-1] matched |
| 231 | end_of_word will point to separator character/end in 'val' |
| 232 | */ |
| 233 | |
| 234 | uint check_word(TYPELIB *lib, const char *val, const char *end, |
| 235 | const char **end_of_word) |
| 236 | { |
| 237 | int res; |
| 238 | const char *ptr; |
| 239 | |
| 240 | /* Fiend end of word */ |
| 241 | for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++) |
| 242 | ; |
| 243 | if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0) |
| 244 | *end_of_word= ptr; |
| 245 | return res; |
| 246 | } |
| 247 | |
| 248 | |
| 249 | /* |
| 250 | Converts a string between character sets |
| 251 | |
| 252 | SYNOPSIS |
| 253 | strconvert() |
| 254 | from_cs source character set |
| 255 | from source, a null terminated string |
| 256 | to destination buffer |
| 257 | to_length destination buffer length |
| 258 | |
| 259 | NOTES |
| 260 | 'to' is always terminated with a '\0' character. |
| 261 | If there is no enough space to convert whole string, |
| 262 | only prefix is converted, and terminated with '\0'. |
| 263 | |
| 264 | RETURN VALUES |
| 265 | result string length |
| 266 | */ |
| 267 | |
| 268 | |
| 269 | uint strconvert(CHARSET_INFO *from_cs, const char *from, size_t from_length, |
| 270 | CHARSET_INFO *to_cs, char *to, size_t to_length, uint *errors) |
| 271 | { |
| 272 | int cnvres; |
| 273 | my_wc_t wc; |
| 274 | char *to_start= to; |
| 275 | uchar *to_end= (uchar*) to + to_length - 1; |
| 276 | const uchar *from_end= (const uchar*) from + from_length; |
| 277 | my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc; |
| 278 | my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb; |
| 279 | uint error_count= 0; |
| 280 | |
| 281 | while (1) |
| 282 | { |
| 283 | if ((cnvres= (*mb_wc)(from_cs, &wc, |
| 284 | (uchar*) from, from_end)) > 0) |
| 285 | { |
| 286 | if (!wc) |
| 287 | break; |
| 288 | from+= cnvres; |
| 289 | } |
| 290 | else if (cnvres == MY_CS_ILSEQ) |
| 291 | { |
| 292 | error_count++; |
| 293 | from++; |
| 294 | wc= '?'; |
| 295 | } |
| 296 | else |
| 297 | break; // Impossible char. |
| 298 | |
| 299 | outp: |
| 300 | |
| 301 | if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) |
| 302 | to+= cnvres; |
| 303 | else if (cnvres == MY_CS_ILUNI && wc != '?') |
| 304 | { |
| 305 | error_count++; |
| 306 | wc= '?'; |
| 307 | goto outp; |
| 308 | } |
| 309 | else |
| 310 | break; |
| 311 | } |
| 312 | *to= '\0'; |
| 313 | *errors= error_count; |
| 314 | return (uint32) (to - to_start); |
| 315 | |
| 316 | } |
| 317 | |
| 318 | |
| 319 | /* |
| 320 | Searches for a LEX_STRING in an LEX_STRING array. |
| 321 | |
| 322 | SYNOPSIS |
| 323 | find_string_in_array() |
| 324 | heap The array |
| 325 | needle The string to search for |
| 326 | |
| 327 | NOTE |
| 328 | The last LEX_STRING in the array should have str member set to NULL |
| 329 | |
| 330 | RETURN VALUES |
| 331 | -1 Not found |
| 332 | >=0 Ordinal position |
| 333 | */ |
| 334 | |
| 335 | int find_string_in_array(LEX_CSTRING * const haystack, LEX_CSTRING * const needle, |
| 336 | CHARSET_INFO * const cs) |
| 337 | { |
| 338 | const LEX_CSTRING *pos; |
| 339 | for (pos= haystack; pos->str; pos++) |
| 340 | if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length, |
| 341 | (uchar *) needle->str, needle->length)) |
| 342 | { |
| 343 | return (int)(pos - haystack); |
| 344 | } |
| 345 | return -1; |
| 346 | } |
| 347 | |
| 348 | |
| 349 | const char *set_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, |
| 350 | const char *lib[]) |
| 351 | { |
| 352 | char buff[STRING_BUFFER_USUAL_SIZE*8]; |
| 353 | String tmp(buff, sizeof(buff), &my_charset_latin1); |
| 354 | LEX_CSTRING unused; |
| 355 | |
| 356 | if (!result) |
| 357 | result= &unused; |
| 358 | |
| 359 | tmp.length(0); |
| 360 | |
| 361 | for (uint i= 0; set; i++, set >>= 1) |
| 362 | if (set & 1) { |
| 363 | tmp.append(lib[i]); |
| 364 | tmp.append(','); |
| 365 | } |
| 366 | |
| 367 | if (tmp.length()) |
| 368 | { |
| 369 | result->str= thd->strmake(tmp.ptr(), tmp.length()-1); |
| 370 | result->length= tmp.length()-1; |
| 371 | } |
| 372 | else |
| 373 | { |
| 374 | result->str= const_cast<char*>("" ); |
| 375 | result->length= 0; |
| 376 | } |
| 377 | return result->str; |
| 378 | } |
| 379 | |
| 380 | const char *flagset_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, |
| 381 | const char *lib[]) |
| 382 | { |
| 383 | char buff[STRING_BUFFER_USUAL_SIZE*8]; |
| 384 | String tmp(buff, sizeof(buff), &my_charset_latin1); |
| 385 | LEX_CSTRING unused; |
| 386 | |
| 387 | if (!result) result= &unused; |
| 388 | |
| 389 | tmp.length(0); |
| 390 | |
| 391 | // note that the last element is always "default", and it's ignored below |
| 392 | for (uint i= 0; lib[i+1]; i++, set >>= 1) |
| 393 | { |
| 394 | tmp.append(lib[i]); |
| 395 | tmp.append(set & 1 ? "=on," : "=off," ); |
| 396 | } |
| 397 | |
| 398 | result->str= thd->strmake(tmp.ptr(), tmp.length()-1); |
| 399 | result->length= tmp.length()-1; |
| 400 | |
| 401 | return result->str; |
| 402 | } |
| 403 | |
| 404 | |