strfunc.cc source code [MariaDB/sql/strfunc.cc]

1	/ Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.*
2
3	This program is free software; you can redistribute it and/or modify
4	it under the terms of the GNU General Public License as published by
5	the Free Software Foundation; version 2 of the License.
6
7	This program is distributed in the hope that it will be useful,
8	but WITHOUT ANY WARRANTY; without even the implied warranty of
9	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	GNU General Public License for more details.
11
12	You should have received a copy of the GNU General Public License
13	along with this program; if not, write to the Free Software
14	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA /*
15
16	/ Some useful string utility functions used by the MySQL server /
17
18	#include "mariadb.h"
19	#include "sql_priv.h"
20	#include "unireg.h"
21	#include "strfunc.h"
22	#include "sql_class.h"
23	#include "typelib.h" // TYPELIB
24	#include "m_ctype.h" // my_charset_latin1
25	#include "mysqld.h" // system_charset_info
26
27	/*
28	Return bitmap for strings used in a set
29
30	SYNOPSIS
31	find_set()
32	lib Strings in set
33	str Strings of set-strings separated by ','
34	err_pos If error, set to point to start of wrong set string
35	err_len If error, set to the length of wrong set string
36	set_warning Set to 1 if some string in set couldn't be used
37
38	NOTE
39	We delete all end space from str before comparison
40
41	RETURN
42	bitmap of all sets found in x.
43	set_warning is set to 1 if there was any sets that couldn't be set
44	*/
45
46	static const char field_separator=`','`;
47
48	ulonglong find_set(TYPELIB lib, const* char str, size_t length, CHARSET_INFO cs,
49	char *err_pos, uint err_len, bool *set_warning)
50	{
51	CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
52	const char *end= str + strip->cset->lengthsp(strip, str, length);
53	ulonglong found= `0`;
54	err_pos= `0`; // No error yet*
55	*err_len= `0`;
56	if (str != end)
57	{
58	const char *start= str;
59	for (;;)
60	{
61	const char *pos= start;
62	uint var_len;
63	int mblen= `1`;
64
65	if (cs && cs->mbminlen > `1`)
66	{
67	for ( ; pos < end; pos+= mblen)
68	{
69	my_wc_t wc;
70	if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos,
71	(const uchar *) end)) < `1`)
72	mblen= `1`; // Not to hang on a wrong multibyte sequence
73	if (wc == (my_wc_t) field_separator)
74	break;
75	}
76	}
77	else
78	for (; pos != end && *pos != field_separator; pos++) ;
79	var_len= (uint) (pos - start);
80	uint find= cs ? find_type2(lib, start, var_len, cs) :
81	find_type(lib, start, var_len, (bool) `0`);
82	if (unlikely(!find && *err_len == `0`))
83	{
84	// report the first error with length > 0
85	err_pos= (char**) start;
86	*err_len= var_len;
87	*set_warning= `1`;
88	}
89	else
90	found\|= `1ULL` << (find - `1`);
91	if (pos >= end)
92	break;
93	start= pos + mblen;
94	}
95	}
96	return found;
97	}
98
99	/*
100	Function to find a string in a TYPELIB
101	(similar to find_type() of mysys/typelib.c)
102
103	SYNOPSIS
104	find_type()
105	lib TYPELIB (struct of pointer to values + count)
106	find String to find
107	length Length of string to find
108	part_match Allow part matching of value
109
110	RETURN
111	0 error
112	> 0 position in TYPELIB->type_names +1
113	*/
114
115	uint find_type(const TYPELIB lib, const* char *find, size_t length,
116	bool part_match)
117	{
118	uint found_count=`0`, found_pos=`0`;
119	const char *end= find+length;
120	const char *i;
121	const char *j;
122	for (uint pos=`0` ; (j=lib->type_names[pos++]) ; )
123	{
124	for (i=find ; i != end &&
125	my_toupper(system_charset_info,*i) ==
126	my_toupper(system_charset_info,*j) ; i++, j++) ;
127	if (i == end)
128	{
129	if (! *j)
130	return(pos);
131	found_count++;
132	found_pos= pos;
133	}
134	}
135	return(found_count == `1` && part_match ? found_pos : `0`);
136	}
137
138
139	/*
140	Find a string in a list of strings according to collation
141
142	SYNOPSIS
143	find_type2()
144	lib TYPELIB (struct of pointer to values + count)
145	x String to find
146	length String length
147	cs Character set + collation to use for comparison
148
149	NOTES
150
151	RETURN
152	0 No matching value
153	>0 Offset+1 in typelib for matched string
154	*/
155
156	uint find_type2(const TYPELIB typelib, const* char *x, size_t length,
157	CHARSET_INFO *cs)
158	{
159	int pos;
160	const char *j;
161	DBUG_ENTER("find_type2");
162	DBUG_PRINT("enter",("x: '%.s' lib: %p", (int*)length, x, typelib));
163
164	if (!typelib->count)
165	{
166	DBUG_PRINT("exit",("no count"));
167	DBUG_RETURN(`0`);
168	}
169
170	for (pos=`0` ; (j=typelib->type_names[pos]) ; pos++)
171	{
172	if (!my_strnncoll(cs, (const uchar*) x, length,
173	(const uchar*) j, typelib->type_lengths[pos]))
174	DBUG_RETURN(pos+`1`);
175	}
176	DBUG_PRINT("exit",("Couldn't find type"));
177	DBUG_RETURN(`0`);
178	} / find_type /
179
180
181	/*
182	Un-hex all elements in a typelib
183
184	SYNOPSIS
185	unhex_type2()
186	interval TYPELIB (struct of pointer to values + lengths + count)
187
188	NOTES
189
190	RETURN
191	N/A
192	*/
193
194	void unhex_type2(TYPELIB *interval)
195	{
196	for (uint pos= `0`; pos < interval->count; pos++)
197	{
198	char from, to;
199	for (from= to= (char) interval->type_names[pos]; from; )
200	{
201	/*
202	Note, hexchar_to_int(from++) doesn't work*
203	one some compilers, e.g. IRIX. Looks like a compiler
204	bug in inline functions in combination with arguments
205	that have a side effect. So, let's use from[0] and from[1]
206	and increment 'from' by two later.
207	*/
208
209	to++= (char*) (hexchar_to_int(from[`0`]) << `4`) +
210	hexchar_to_int(from[`1`]);
211	from+= `2`;
212	}
213	interval->type_lengths[pos] /= `2`;
214	}
215	}
216
217
218	/*
219	Check if the first word in a string is one of the ones in TYPELIB
220
221	SYNOPSIS
222	check_word()
223	lib TYPELIB
224	val String to check
225	end End of input
226	end_of_word Store value of last used byte here if we found word
227
228	RETURN
229	0 No matching value
230	> 1 lib->type_names[#-1] matched
231	end_of_word will point to separator character/end in 'val'
232	*/
233
234	uint check_word(TYPELIB lib, const* char val, const* char *end,
235	const char **end_of_word)
236	{
237	int res;
238	const char *ptr;
239
240	/ Fiend end of word /
241	for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
242	;
243	if ((res=find_type(lib, val, (uint) (ptr - val), `1`)) > `0`)
244	*end_of_word= ptr;
245	return res;
246	}
247
248
249	/*
250	Converts a string between character sets
251
252	SYNOPSIS
253	strconvert()
254	from_cs source character set
255	from source, a null terminated string
256	to destination buffer
257	to_length destination buffer length
258
259	NOTES
260	'to' is always terminated with a '\0' character.
261	If there is no enough space to convert whole string,
262	only prefix is converted, and terminated with '\0'.
263
264	RETURN VALUES
265	result string length
266	*/
267
268
269	uint strconvert(CHARSET_INFO from_cs, const* char *from, size_t from_length,
270	CHARSET_INFO to_cs, char* to, size_t to_length, uint errors)
271	{
272	int cnvres;
273	my_wc_t wc;
274	char *to_start= to;
275	uchar to_end= (uchar) to + to_length - `1`;
276	const uchar from_end= (const* uchar*) from + from_length;
277	my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
278	my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
279	uint error_count= `0`;
280
281	while (`1`)
282	{
283	if ((cnvres= (*mb_wc)(from_cs, &wc,
284	(uchar*) from, from_end)) > `0`)
285	{
286	if (!wc)
287	break;
288	from+= cnvres;
289	}
290	else if (cnvres == MY_CS_ILSEQ)
291	{
292	error_count++;
293	from++;
294	wc= `'?'`;
295	}
296	else
297	break; // Impossible char.
298
299	outp:
300
301	if ((cnvres= (wc_mb)(to_cs, wc, (uchar) to, to_end)) > `0`)
302	to+= cnvres;
303	else if (cnvres == MY_CS_ILUNI && wc != `'?'`)
304	{
305	error_count++;
306	wc= `'?'`;
307	goto outp;
308	}
309	else
310	break;
311	}
312	*to= `'\0'`;
313	*errors= error_count;
314	return (uint32) (to - to_start);
315
316	}
317
318
319	/*
320	Searches for a LEX_STRING in an LEX_STRING array.
321
322	SYNOPSIS
323	find_string_in_array()
324	heap The array
325	needle The string to search for
326
327	NOTE
328	The last LEX_STRING in the array should have str member set to NULL
329
330	RETURN VALUES
331	-1 Not found
332	>=0 Ordinal position
333	*/
334
335	int find_string_in_array(LEX_CSTRING * const haystack, LEX_CSTRING * const needle,
336	CHARSET_INFO * const cs)
337	{
338	const LEX_CSTRING *pos;
339	for (pos= haystack; pos->str; pos++)
340	if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
341	(uchar *) needle->str, needle->length))
342	{
343	return (int)(pos - haystack);
344	}
345	return -`1`;
346	}
347
348
349	const char set_to_string(THD thd, LEX_CSTRING *result, ulonglong set,
350	const char *lib[])
351	{
352	char buff[STRING_BUFFER_USUAL_SIZE*`8`];
353	String tmp(buff, sizeof(buff), &my_charset_latin1);
354	LEX_CSTRING unused;
355
356	if (!result)
357	result= &unused;
358
359	tmp.length(`0`);
360
361	for (uint i= `0`; set; i++, set >>= `1`)
362	if (set & `1`) {
363	tmp.append(lib[i]);
364	tmp.append(`','`);
365	}
366
367	if (tmp.length())
368	{
369	result->str= thd->strmake(tmp.ptr(), tmp.length()-`1`);
370	result->length= tmp.length()-`1`;
371	}
372	else
373	{
374	result->str= const_cast<char*>("");
375	result->length= `0`;
376	}
377	return result->str;
378	}
379
380	const char flagset_to_string(THD thd, LEX_CSTRING *result, ulonglong set,
381	const char *lib[])
382	{
383	char buff[STRING_BUFFER_USUAL_SIZE*`8`];
384	String tmp(buff, sizeof(buff), &my_charset_latin1);
385	LEX_CSTRING unused;
386
387	if (!result) result= &unused;
388
389	tmp.length(`0`);
390
391	// note that the last element is always "default", and it's ignored below
392	for (uint i= `0`; lib[i+`1`]; i++, set >>= `1`)
393	{
394	tmp.append(lib[i]);
395	tmp.append(set & `1` ? "=on," : "=off,");
396	}
397
398	result->str= thd->strmake(tmp.ptr(), tmp.length()-`1`);
399	result->length= tmp.length()-`1`;
400
401	return result->str;
402	}
403
404

Browse the source code of MariaDB/sql/strfunc.cc