| 1 | #include <Columns/ColumnString.h> |
| 2 | #include <DataTypes/DataTypeString.h> |
| 3 | #include <Functions/FunctionFactory.h> |
| 4 | #include <Functions/FunctionHelpers.h> |
| 5 | #include <common/find_symbols.h> |
| 6 | |
| 7 | |
| 8 | namespace DB |
| 9 | { |
| 10 | |
| 11 | namespace ErrorCodes |
| 12 | { |
| 13 | extern const int ILLEGAL_COLUMN; |
| 14 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
| 15 | } |
| 16 | |
| 17 | class FunctionRegexpQuoteMeta : public IFunction |
| 18 | { |
| 19 | public: |
| 20 | static constexpr auto name = "regexpQuoteMeta" ; |
| 21 | |
| 22 | static FunctionPtr create(const Context &) |
| 23 | { |
| 24 | return std::make_shared<FunctionRegexpQuoteMeta>(); |
| 25 | } |
| 26 | |
| 27 | String getName() const override |
| 28 | { |
| 29 | return name; |
| 30 | } |
| 31 | |
| 32 | size_t getNumberOfArguments() const override |
| 33 | { |
| 34 | return 1; |
| 35 | } |
| 36 | |
| 37 | bool useDefaultImplementationForConstants() const override |
| 38 | { |
| 39 | return true; |
| 40 | } |
| 41 | |
| 42 | DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override |
| 43 | { |
| 44 | if (!WhichDataType(arguments[0].type).isString()) |
| 45 | throw Exception( |
| 46 | "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String." , |
| 47 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 48 | |
| 49 | return std::make_shared<DataTypeString>(); |
| 50 | } |
| 51 | |
| 52 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override |
| 53 | { |
| 54 | const ColumnPtr & column_string = block.getByPosition(arguments[0]).column; |
| 55 | const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get()); |
| 56 | |
| 57 | if (!input) |
| 58 | throw Exception( |
| 59 | "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), |
| 60 | ErrorCodes::ILLEGAL_COLUMN); |
| 61 | |
| 62 | auto dst_column = ColumnString::create(); |
| 63 | auto & dst_data = dst_column->getChars(); |
| 64 | auto & dst_offsets = dst_column->getOffsets(); |
| 65 | |
| 66 | dst_offsets.resize(input_rows_count); |
| 67 | |
| 68 | const ColumnString::Offsets & src_offsets = input->getOffsets(); |
| 69 | |
| 70 | auto src_begin = reinterpret_cast<const char *>(input->getChars().data()); |
| 71 | auto src_pos = src_begin; |
| 72 | |
| 73 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) |
| 74 | { |
| 75 | /// NOTE This implementation slightly differs from re2::RE2::QuoteMeta. |
| 76 | /// It escapes zero byte as \0 instead of \x00 |
| 77 | /// and it escapes only required characters. |
| 78 | /// This is Ok. Look at comments in re2.cc |
| 79 | |
| 80 | const char * src_end = src_begin + src_offsets[row_idx] - 1; |
| 81 | |
| 82 | while (true) |
| 83 | { |
| 84 | const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end); |
| 85 | |
| 86 | size_t bytes_to_copy = next_src_pos - src_pos; |
| 87 | size_t old_dst_size = dst_data.size(); |
| 88 | dst_data.resize(old_dst_size + bytes_to_copy); |
| 89 | memcpySmallAllowReadWriteOverflow15(dst_data.data() + old_dst_size, src_pos, bytes_to_copy); |
| 90 | src_pos = next_src_pos + 1; |
| 91 | |
| 92 | if (next_src_pos == src_end) |
| 93 | { |
| 94 | dst_data.emplace_back('\0'); |
| 95 | break; |
| 96 | } |
| 97 | |
| 98 | dst_data.emplace_back('\\'); |
| 99 | dst_data.emplace_back(*next_src_pos); |
| 100 | } |
| 101 | |
| 102 | dst_offsets[row_idx] = dst_data.size(); |
| 103 | } |
| 104 | |
| 105 | block.getByPosition(result).column = std::move(dst_column); |
| 106 | } |
| 107 | |
| 108 | }; |
| 109 | |
| 110 | void registerFunctionRegexpQuoteMeta(FunctionFactory & factory) |
| 111 | { |
| 112 | factory.registerFunction<FunctionRegexpQuoteMeta>(); |
| 113 | } |
| 114 | } |
| 115 | |