| 1 | #include <Columns/ColumnFixedString.h> |
| 2 | #include <Columns/ColumnsNumber.h> |
| 3 | #include <Columns/ColumnConst.h> |
| 4 | |
| 5 | #include <Formats/FormatSettings.h> |
| 6 | #include <Formats/ProtobufReader.h> |
| 7 | #include <Formats/ProtobufWriter.h> |
| 8 | #include <DataTypes/DataTypeFixedString.h> |
| 9 | #include <DataTypes/DataTypeFactory.h> |
| 10 | |
| 11 | #include <IO/WriteBuffer.h> |
| 12 | #include <IO/ReadHelpers.h> |
| 13 | #include <IO/WriteHelpers.h> |
| 14 | #include <IO/VarInt.h> |
| 15 | |
| 16 | #include <Parsers/IAST.h> |
| 17 | #include <Parsers/ASTLiteral.h> |
| 18 | |
| 19 | #include <Common/typeid_cast.h> |
| 20 | #include <Common/assert_cast.h> |
| 21 | |
| 22 | |
| 23 | namespace DB |
| 24 | { |
| 25 | |
| 26 | namespace ErrorCodes |
| 27 | { |
| 28 | extern const int CANNOT_READ_ALL_DATA; |
| 29 | extern const int TOO_LARGE_STRING_SIZE; |
| 30 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
| 31 | extern const int UNEXPECTED_AST_STRUCTURE; |
| 32 | } |
| 33 | |
| 34 | |
| 35 | std::string DataTypeFixedString::doGetName() const |
| 36 | { |
| 37 | return type_name + "(" + toString(n) + ")" ; |
| 38 | } |
| 39 | |
| 40 | |
| 41 | void DataTypeFixedString::serializeBinary(const Field & field, WriteBuffer & ostr) const |
| 42 | { |
| 43 | const String & s = get<const String &>(field); |
| 44 | ostr.write(s.data(), std::min(s.size(), n)); |
| 45 | if (s.size() < n) |
| 46 | for (size_t i = s.size(); i < n; ++i) |
| 47 | ostr.write(0); |
| 48 | } |
| 49 | |
| 50 | |
| 51 | void DataTypeFixedString::deserializeBinary(Field & field, ReadBuffer & istr) const |
| 52 | { |
| 53 | field = String(); |
| 54 | String & s = get<String &>(field); |
| 55 | s.resize(n); |
| 56 | istr.readStrict(s.data(), n); |
| 57 | } |
| 58 | |
| 59 | |
| 60 | void DataTypeFixedString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const |
| 61 | { |
| 62 | ostr.write(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n); |
| 63 | } |
| 64 | |
| 65 | |
| 66 | void DataTypeFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr) const |
| 67 | { |
| 68 | ColumnFixedString::Chars & data = assert_cast<ColumnFixedString &>(column).getChars(); |
| 69 | size_t old_size = data.size(); |
| 70 | data.resize(old_size + n); |
| 71 | try |
| 72 | { |
| 73 | istr.readStrict(reinterpret_cast<char *>(data.data() + old_size), n); |
| 74 | } |
| 75 | catch (...) |
| 76 | { |
| 77 | data.resize_assume_reserved(old_size); |
| 78 | throw; |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | |
| 83 | void DataTypeFixedString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const |
| 84 | { |
| 85 | const ColumnFixedString::Chars & data = typeid_cast<const ColumnFixedString &>(column).getChars(); |
| 86 | |
| 87 | size_t size = data.size() / n; |
| 88 | |
| 89 | if (limit == 0 || offset + limit > size) |
| 90 | limit = size - offset; |
| 91 | |
| 92 | if (limit) |
| 93 | ostr.write(reinterpret_cast<const char *>(&data[n * offset]), n * limit); |
| 94 | } |
| 95 | |
| 96 | |
| 97 | void DataTypeFixedString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const |
| 98 | { |
| 99 | ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars(); |
| 100 | |
| 101 | size_t initial_size = data.size(); |
| 102 | size_t max_bytes = limit * n; |
| 103 | data.resize(initial_size + max_bytes); |
| 104 | size_t read_bytes = istr.readBig(reinterpret_cast<char *>(&data[initial_size]), max_bytes); |
| 105 | |
| 106 | if (read_bytes % n != 0) |
| 107 | throw Exception("Cannot read all data of type FixedString. Bytes read:" + toString(read_bytes) + ". String size:" + toString(n) + "." , |
| 108 | ErrorCodes::CANNOT_READ_ALL_DATA); |
| 109 | |
| 110 | data.resize(initial_size + read_bytes); |
| 111 | } |
| 112 | |
| 113 | |
| 114 | void DataTypeFixedString::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const |
| 115 | { |
| 116 | writeString(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n, ostr); |
| 117 | } |
| 118 | |
| 119 | |
| 120 | void DataTypeFixedString::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const |
| 121 | { |
| 122 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
| 123 | writeAnyEscapedString<'\''>(pos, pos + n, ostr); |
| 124 | } |
| 125 | |
| 126 | |
| 127 | static inline void alignStringLength(const DataTypeFixedString & type, |
| 128 | ColumnFixedString::Chars & data, |
| 129 | size_t string_start) |
| 130 | { |
| 131 | size_t length = data.size() - string_start; |
| 132 | if (length < type.getN()) |
| 133 | { |
| 134 | data.resize_fill(string_start + type.getN()); |
| 135 | } |
| 136 | else if (length > type.getN()) |
| 137 | { |
| 138 | data.resize_assume_reserved(string_start); |
| 139 | throw Exception("Too large value for " + type.getName(), ErrorCodes::TOO_LARGE_STRING_SIZE); |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | template <typename Reader> |
| 144 | static inline void read(const DataTypeFixedString & self, IColumn & column, Reader && reader) |
| 145 | { |
| 146 | ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars(); |
| 147 | size_t prev_size = data.size(); |
| 148 | try |
| 149 | { |
| 150 | reader(data); |
| 151 | alignStringLength(self, data, prev_size); |
| 152 | } |
| 153 | catch (...) |
| 154 | { |
| 155 | data.resize_assume_reserved(prev_size); |
| 156 | throw; |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | |
| 161 | void DataTypeFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const |
| 162 | { |
| 163 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); }); |
| 164 | } |
| 165 | |
| 166 | |
| 167 | void DataTypeFixedString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const |
| 168 | { |
| 169 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
| 170 | writeAnyQuotedString<'\''>(pos, pos + n, ostr); |
| 171 | } |
| 172 | |
| 173 | |
| 174 | void DataTypeFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const |
| 175 | { |
| 176 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readQuotedStringInto<true>(data, istr); }); |
| 177 | } |
| 178 | |
| 179 | |
| 180 | void DataTypeFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const |
| 181 | { |
| 182 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringInto(data, istr); }); |
| 183 | } |
| 184 | |
| 185 | |
| 186 | void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
| 187 | { |
| 188 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
| 189 | writeJSONString(pos, pos + n, ostr, settings); |
| 190 | } |
| 191 | |
| 192 | |
| 193 | void DataTypeFixedString::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const |
| 194 | { |
| 195 | read(*this, column, [&istr](ColumnFixedString::Chars & data) { readJSONStringInto(data, istr); }); |
| 196 | } |
| 197 | |
| 198 | |
| 199 | void DataTypeFixedString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const |
| 200 | { |
| 201 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
| 202 | writeXMLString(pos, pos + n, ostr); |
| 203 | } |
| 204 | |
| 205 | |
| 206 | void DataTypeFixedString::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const |
| 207 | { |
| 208 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
| 209 | writeCSVString(pos, pos + n, ostr); |
| 210 | } |
| 211 | |
| 212 | |
| 213 | void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
| 214 | { |
| 215 | read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); }); |
| 216 | } |
| 217 | |
| 218 | |
| 219 | void DataTypeFixedString::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const |
| 220 | { |
| 221 | if (value_index) |
| 222 | return; |
| 223 | const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]); |
| 224 | value_index = static_cast<bool>(protobuf.writeString(StringRef(pos, n))); |
| 225 | } |
| 226 | |
| 227 | |
| 228 | void DataTypeFixedString::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const |
| 229 | { |
| 230 | row_added = false; |
| 231 | auto & column_string = assert_cast<ColumnFixedString &>(column); |
| 232 | ColumnFixedString::Chars & data = column_string.getChars(); |
| 233 | size_t old_size = data.size(); |
| 234 | try |
| 235 | { |
| 236 | if (allow_add_row) |
| 237 | { |
| 238 | if (protobuf.readStringInto(data)) |
| 239 | { |
| 240 | alignStringLength(*this, data, old_size); |
| 241 | row_added = true; |
| 242 | } |
| 243 | else |
| 244 | data.resize_assume_reserved(old_size); |
| 245 | } |
| 246 | else |
| 247 | { |
| 248 | ColumnFixedString::Chars temp_data; |
| 249 | if (protobuf.readStringInto(temp_data)) |
| 250 | { |
| 251 | alignStringLength(*this, temp_data, 0); |
| 252 | column_string.popBack(1); |
| 253 | old_size = data.size(); |
| 254 | data.insertSmallAllowReadWriteOverflow15(temp_data.begin(), temp_data.end()); |
| 255 | } |
| 256 | } |
| 257 | } |
| 258 | catch (...) |
| 259 | { |
| 260 | data.resize_assume_reserved(old_size); |
| 261 | throw; |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | |
| 266 | MutableColumnPtr DataTypeFixedString::createColumn() const |
| 267 | { |
| 268 | return ColumnFixedString::create(n); |
| 269 | } |
| 270 | |
| 271 | Field DataTypeFixedString::getDefault() const |
| 272 | { |
| 273 | return String(); |
| 274 | } |
| 275 | |
| 276 | bool DataTypeFixedString::equals(const IDataType & rhs) const |
| 277 | { |
| 278 | return typeid(rhs) == typeid(*this) && n == static_cast<const DataTypeFixedString &>(rhs).n; |
| 279 | } |
| 280 | |
| 281 | |
| 282 | static DataTypePtr create(const String & type_name, const ASTPtr & arguments) |
| 283 | { |
| 284 | if (!arguments || arguments->children.size() != 1) |
| 285 | throw Exception("FixedString data type family must have exactly one argument - size in bytes" , ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
| 286 | |
| 287 | const auto * argument = arguments->children[0]->as<ASTLiteral>(); |
| 288 | if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get<UInt64>() == 0) |
| 289 | throw Exception("FixedString data type family must have a number (positive integer) as its argument" , ErrorCodes::UNEXPECTED_AST_STRUCTURE); |
| 290 | |
| 291 | return std::make_shared<DataTypeFixedString>(argument->value.get<UInt64>(), type_name); |
| 292 | } |
| 293 | |
| 294 | |
| 295 | void registerDataTypeFixedString(DataTypeFactory & factory) |
| 296 | { |
| 297 | factory.registerDataType("FixedString" , create); |
| 298 | |
| 299 | /// Compatibility alias. |
| 300 | factory.registerAlias("BINARY" , "FixedString" , DataTypeFactory::CaseInsensitive); |
| 301 | } |
| 302 | |
| 303 | } |
| 304 | |