| 1 | /**************************************************************************/ |
| 2 | /* script_iterator.cpp */ |
| 3 | /**************************************************************************/ |
| 4 | /* This file is part of: */ |
| 5 | /* GODOT ENGINE */ |
| 6 | /* https://godotengine.org */ |
| 7 | /**************************************************************************/ |
| 8 | /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ |
| 9 | /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ |
| 10 | /* */ |
| 11 | /* Permission is hereby granted, free of charge, to any person obtaining */ |
| 12 | /* a copy of this software and associated documentation files (the */ |
| 13 | /* "Software"), to deal in the Software without restriction, including */ |
| 14 | /* without limitation the rights to use, copy, modify, merge, publish, */ |
| 15 | /* distribute, sublicense, and/or sell copies of the Software, and to */ |
| 16 | /* permit persons to whom the Software is furnished to do so, subject to */ |
| 17 | /* the following conditions: */ |
| 18 | /* */ |
| 19 | /* The above copyright notice and this permission notice shall be */ |
| 20 | /* included in all copies or substantial portions of the Software. */ |
| 21 | /* */ |
| 22 | /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ |
| 23 | /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ |
| 24 | /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ |
| 25 | /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ |
| 26 | /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ |
| 27 | /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ |
| 28 | /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
| 29 | /**************************************************************************/ |
| 30 | |
| 31 | #include "script_iterator.h" |
| 32 | |
| 33 | // This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp |
| 34 | |
| 35 | bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) { |
| 36 | return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two; |
| 37 | } |
| 38 | |
| 39 | ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length) { |
| 40 | struct ParenStackEntry { |
| 41 | int pair_index; |
| 42 | UScriptCode script_code; |
| 43 | }; |
| 44 | |
| 45 | if (p_start >= p_length) { |
| 46 | p_start = p_length - 1; |
| 47 | } |
| 48 | |
| 49 | if (p_start < 0) { |
| 50 | p_start = 0; |
| 51 | } |
| 52 | |
| 53 | int paren_size = PAREN_STACK_DEPTH; |
| 54 | ParenStackEntry *paren_stack = static_cast<ParenStackEntry *>(memalloc(paren_size * sizeof(ParenStackEntry))); |
| 55 | |
| 56 | int script_start; |
| 57 | int script_end = p_start; |
| 58 | UScriptCode script_code; |
| 59 | int paren_sp = -1; |
| 60 | int start_sp = paren_sp; |
| 61 | UErrorCode err = U_ZERO_ERROR; |
| 62 | const char32_t *str = p_string.ptr(); |
| 63 | |
| 64 | do { |
| 65 | script_code = USCRIPT_COMMON; |
| 66 | for (script_start = script_end; script_end < p_length; script_end++) { |
| 67 | UChar32 ch = str[script_end]; |
| 68 | UScriptCode sc = uscript_getScript(ch, &err); |
| 69 | if (U_FAILURE(err)) { |
| 70 | memfree(paren_stack); |
| 71 | ERR_FAIL_MSG(u_errorName(err)); |
| 72 | } |
| 73 | if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) { |
| 74 | if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) { |
| 75 | // If it's an open character, push it onto the stack. |
| 76 | paren_sp++; |
| 77 | if (unlikely(paren_sp >= paren_size)) { |
| 78 | // If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text. |
| 79 | paren_size += PAREN_STACK_DEPTH; |
| 80 | paren_stack = static_cast<ParenStackEntry *>(memrealloc(paren_stack, paren_size * sizeof(ParenStackEntry))); |
| 81 | } |
| 82 | paren_stack[paren_sp].pair_index = ch; |
| 83 | paren_stack[paren_sp].script_code = script_code; |
| 84 | } else if (paren_sp >= 0) { |
| 85 | // If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be popped. |
| 86 | UChar32 paired_ch = u_getBidiPairedBracket(ch); |
| 87 | while (paren_sp >= 0 && paren_stack[paren_sp].pair_index != paired_ch) { |
| 88 | paren_sp -= 1; |
| 89 | } |
| 90 | if (paren_sp < start_sp) { |
| 91 | start_sp = paren_sp; |
| 92 | } |
| 93 | if (paren_sp >= 0) { |
| 94 | sc = paren_stack[paren_sp].script_code; |
| 95 | } |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | if (same_script(script_code, sc)) { |
| 100 | if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) { |
| 101 | script_code = sc; |
| 102 | // Now that we have a final script code, fix any open characters we pushed before we knew the script code. |
| 103 | while (start_sp < paren_sp) { |
| 104 | paren_stack[++start_sp].script_code = script_code; |
| 105 | } |
| 106 | } |
| 107 | if ((u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE) && paren_sp >= 0) { |
| 108 | // If this character is a close paired character pop the matching open character from the stack. |
| 109 | paren_sp -= 1; |
| 110 | if (start_sp >= 0) { |
| 111 | start_sp -= 1; |
| 112 | } |
| 113 | } |
| 114 | } else { |
| 115 | break; |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | ScriptRange rng; |
| 120 | rng.script = hb_icu_script_to_script(script_code); |
| 121 | rng.start = script_start; |
| 122 | rng.end = script_end; |
| 123 | |
| 124 | script_ranges.push_back(rng); |
| 125 | } while (script_end < p_length); |
| 126 | |
| 127 | memfree(paren_stack); |
| 128 | } |
| 129 | |