| 1 | // #define CATCH_CONFIG_RUNNER |
| 2 | #include "catch.hpp" |
| 3 | |
| 4 | #include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp" |
| 5 | #include "duckdb/common/file_system.hpp" |
| 6 | #include "duckdb/common/value_operations/value_operations.hpp" |
| 7 | #include "compare_result.hpp" |
| 8 | #include "duckdb/main/query_result.hpp" |
| 9 | #include "test_helpers.hpp" |
| 10 | #include "duckdb/parser/parsed_data/copy_info.hpp" |
| 11 | |
| 12 | #include <cmath> |
| 13 | #include <fstream> |
| 14 | |
| 15 | using namespace std; |
| 16 | |
| 17 | #define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir" |
| 18 | |
| 19 | namespace duckdb { |
| 20 | |
| 21 | bool NO_FAIL(QueryResult &result) { |
| 22 | if (!result.success) { |
| 23 | fprintf(stderr, "Query failed with message: %s\n" , result.error.c_str()); |
| 24 | } |
| 25 | return result.success; |
| 26 | } |
| 27 | |
| 28 | bool NO_FAIL(unique_ptr<QueryResult> result) { |
| 29 | return NO_FAIL(*result); |
| 30 | } |
| 31 | |
| 32 | void TestDeleteDirectory(string path) { |
| 33 | FileSystem fs; |
| 34 | if (fs.DirectoryExists(path)) { |
| 35 | fs.RemoveDirectory(path); |
| 36 | } |
| 37 | } |
| 38 | |
| 39 | void TestDeleteFile(string path) { |
| 40 | FileSystem fs; |
| 41 | if (fs.FileExists(path)) { |
| 42 | fs.RemoveFile(path); |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | void DeleteDatabase(string path) { |
| 47 | TestDeleteFile(path); |
| 48 | TestDeleteFile(path + ".wal" ); |
| 49 | } |
| 50 | |
| 51 | void TestCreateDirectory(string path) { |
| 52 | FileSystem fs; |
| 53 | fs.CreateDirectory(path); |
| 54 | } |
| 55 | |
| 56 | string TestCreatePath(string suffix) { |
| 57 | FileSystem fs; |
| 58 | if (!fs.DirectoryExists(TESTING_DIRECTORY_NAME)) { |
| 59 | fs.CreateDirectory(TESTING_DIRECTORY_NAME); |
| 60 | } |
| 61 | return fs.JoinPath(TESTING_DIRECTORY_NAME, suffix); |
| 62 | } |
| 63 | |
| 64 | unique_ptr<DBConfig> GetTestConfig() { |
| 65 | auto result = make_unique<DBConfig>(); |
| 66 | result->checkpoint_wal_size = 0; |
| 67 | return result; |
| 68 | } |
| 69 | |
| 70 | string GetCSVPath() { |
| 71 | FileSystem fs; |
| 72 | string csv_path = TestCreatePath("csv_files" ); |
| 73 | if (fs.DirectoryExists(csv_path)) { |
| 74 | fs.RemoveDirectory(csv_path); |
| 75 | } |
| 76 | fs.CreateDirectory(csv_path); |
| 77 | return csv_path; |
| 78 | } |
| 79 | |
| 80 | void WriteCSV(string path, const char *csv) { |
| 81 | ofstream csv_writer(path); |
| 82 | csv_writer << csv; |
| 83 | csv_writer.close(); |
| 84 | } |
| 85 | |
| 86 | void WriteBinary(string path, const uint8_t *data, uint64_t length) { |
| 87 | ofstream binary_writer(path, ios::binary); |
| 88 | binary_writer.write((const char *)data, length); |
| 89 | binary_writer.close(); |
| 90 | } |
| 91 | |
| 92 | bool CHECK_COLUMN(QueryResult &result_, size_t column_number, vector<duckdb::Value> values) { |
| 93 | unique_ptr<MaterializedQueryResult> materialized; |
| 94 | if (result_.type == QueryResultType::STREAM_RESULT) { |
| 95 | materialized = ((StreamQueryResult &)result_).Materialize(); |
| 96 | } |
| 97 | auto &result = materialized ? *materialized : (MaterializedQueryResult &)result_; |
| 98 | if (!result.success) { |
| 99 | fprintf(stderr, "Query failed with message: %s\n" , result.error.c_str()); |
| 100 | return false; |
| 101 | } |
| 102 | if (!(result.names.size() == result.types.size())) { |
| 103 | // column names do not match |
| 104 | result.Print(); |
| 105 | return false; |
| 106 | } |
| 107 | if (values.size() == 0) { |
| 108 | if (result.collection.count != 0) { |
| 109 | result.Print(); |
| 110 | return false; |
| 111 | } else { |
| 112 | return true; |
| 113 | } |
| 114 | } |
| 115 | if (result.collection.count == 0) { |
| 116 | result.Print(); |
| 117 | return false; |
| 118 | } |
| 119 | if (column_number >= result.types.size()) { |
| 120 | result.Print(); |
| 121 | return false; |
| 122 | } |
| 123 | size_t chunk_index = 0; |
| 124 | for (size_t i = 0; i < values.size();) { |
| 125 | if (chunk_index >= result.collection.chunks.size()) { |
| 126 | // ran out of chunks |
| 127 | result.Print(); |
| 128 | return false; |
| 129 | } |
| 130 | // check this vector |
| 131 | auto &chunk = *result.collection.chunks[chunk_index]; |
| 132 | auto &vector = chunk.data[column_number]; |
| 133 | if (i + chunk.size() > values.size()) { |
| 134 | // too many values in this vector |
| 135 | result.Print(); |
| 136 | return false; |
| 137 | } |
| 138 | for (size_t j = 0; j < chunk.size(); j++) { |
| 139 | // NULL <> NULL, hence special handling |
| 140 | if (vector.GetValue(j).is_null && values[i + j].is_null) { |
| 141 | continue; |
| 142 | } |
| 143 | |
| 144 | if (!Value::ValuesAreEqual(vector.GetValue(j), values[i + j])) { |
| 145 | // FAIL("Incorrect result! Got " + vector.GetValue(j).ToString() |
| 146 | // + |
| 147 | // " but expected " + values[i + j].ToString()); |
| 148 | result.Print(); |
| 149 | return false; |
| 150 | } |
| 151 | } |
| 152 | chunk_index++; |
| 153 | i += chunk.size(); |
| 154 | } |
| 155 | return true; |
| 156 | } |
| 157 | |
| 158 | bool CHECK_COLUMN(unique_ptr<duckdb::QueryResult> &result, size_t column_number, vector<duckdb::Value> values) { |
| 159 | return CHECK_COLUMN(*result, column_number, values); |
| 160 | } |
| 161 | |
| 162 | bool CHECK_COLUMN(unique_ptr<duckdb::MaterializedQueryResult> &result, size_t column_number, |
| 163 | vector<duckdb::Value> values) { |
| 164 | return CHECK_COLUMN((QueryResult &)*result, column_number, values); |
| 165 | } |
| 166 | |
| 167 | string compare_csv(duckdb::QueryResult &result, string csv, bool ) { |
| 168 | assert(result.type == QueryResultType::MATERIALIZED_RESULT); |
| 169 | auto &materialized = (MaterializedQueryResult &)result; |
| 170 | if (!materialized.success) { |
| 171 | fprintf(stderr, "Query failed with message: %s\n" , materialized.error.c_str()); |
| 172 | return materialized.error; |
| 173 | } |
| 174 | string error; |
| 175 | if (!compare_result(csv, materialized.collection, materialized.sql_types, header, error)) { |
| 176 | return error; |
| 177 | } |
| 178 | return "" ; |
| 179 | } |
| 180 | |
| 181 | string show_diff(DataChunk &left, DataChunk &right) { |
| 182 | if (left.column_count() != right.column_count()) { |
| 183 | return StringUtil::Format("Different column counts: %d vs %d" , (int)left.column_count(), |
| 184 | (int)right.column_count()); |
| 185 | } |
| 186 | if (left.size() != right.size()) { |
| 187 | return StringUtil::Format("Different sizes: %zu vs %zu" , left.size(), right.size()); |
| 188 | } |
| 189 | string difference; |
| 190 | for (size_t i = 0; i < left.column_count(); i++) { |
| 191 | bool has_differences = false; |
| 192 | auto &left_vector = left.data[i]; |
| 193 | auto &right_vector = right.data[i]; |
| 194 | string left_column = StringUtil::Format("Result\n------\n%s [" , TypeIdToString(left_vector.type).c_str()); |
| 195 | string right_column = StringUtil::Format("Expect\n------\n%s [" , TypeIdToString(right_vector.type).c_str()); |
| 196 | if (left_vector.type == right_vector.type) { |
| 197 | for (size_t j = 0; j < left.size(); j++) { |
| 198 | auto left_value = left_vector.GetValue(j); |
| 199 | auto right_value = right_vector.GetValue(j); |
| 200 | if (!Value::ValuesAreEqual(left_value, right_value)) { |
| 201 | left_column += left_value.ToString() + "," ; |
| 202 | right_column += right_value.ToString() + "," ; |
| 203 | has_differences = true; |
| 204 | } else { |
| 205 | left_column += "_," ; |
| 206 | right_column += "_," ; |
| 207 | } |
| 208 | } |
| 209 | } else { |
| 210 | left_column += "..." ; |
| 211 | right_column += "..." ; |
| 212 | } |
| 213 | left_column += "]\n" ; |
| 214 | right_column += "]\n" ; |
| 215 | if (has_differences) { |
| 216 | difference += StringUtil::Format("Difference in column %d:\n" , i); |
| 217 | difference += left_column + "\n" + right_column + "\n" ; |
| 218 | } |
| 219 | } |
| 220 | return difference; |
| 221 | } |
| 222 | |
| 223 | bool compare_chunk(DataChunk &left, DataChunk &right) { |
| 224 | if (left.column_count() != right.column_count()) { |
| 225 | return false; |
| 226 | } |
| 227 | if (left.size() != right.size()) { |
| 228 | return false; |
| 229 | } |
| 230 | for (size_t i = 0; i < left.column_count(); i++) { |
| 231 | auto &left_vector = left.data[i]; |
| 232 | auto &right_vector = right.data[i]; |
| 233 | if (left_vector.type == right_vector.type) { |
| 234 | for (size_t j = 0; j < left.size(); j++) { |
| 235 | auto left_value = left_vector.GetValue(j); |
| 236 | auto right_value = right_vector.GetValue(j); |
| 237 | if (!Value::ValuesAreEqual(left_value, right_value)) { |
| 238 | return false; |
| 239 | } |
| 240 | } |
| 241 | } |
| 242 | } |
| 243 | return true; |
| 244 | } |
| 245 | |
| 246 | //! Compares the result of a pipe-delimited CSV with the given DataChunk |
| 247 | //! Returns true if they are equal, and stores an error_message otherwise |
| 248 | bool compare_result(string csv, ChunkCollection &collection, vector<SQLType> sql_types, bool , |
| 249 | string &error_message) { |
| 250 | assert(collection.count == 0 || collection.types.size() == sql_types.size()); |
| 251 | |
| 252 | // set up the CSV reader |
| 253 | CopyInfo info; |
| 254 | info.delimiter = "|" ; |
| 255 | info.header = true; |
| 256 | info.quote = "\"" ; |
| 257 | info.escape = "\"" ; |
| 258 | // set up the intermediate result chunk |
| 259 | vector<TypeId> internal_types; |
| 260 | for (auto &type : sql_types) { |
| 261 | internal_types.push_back(GetInternalType(type)); |
| 262 | } |
| 263 | DataChunk parsed_result; |
| 264 | parsed_result.Initialize(internal_types); |
| 265 | |
| 266 | // convert the CSV string into a stringstream |
| 267 | auto source = make_unique<istringstream>(csv); |
| 268 | |
| 269 | BufferedCSVReader reader(info, sql_types, move(source)); |
| 270 | idx_t collection_index = 0; |
| 271 | idx_t tuple_count = 0; |
| 272 | while (true) { |
| 273 | // parse a chunk from the CSV file |
| 274 | try { |
| 275 | parsed_result.Reset(); |
| 276 | reader.ParseCSV(parsed_result); |
| 277 | } catch (Exception &ex) { |
| 278 | error_message = "Could not parse CSV: " + string(ex.what()); |
| 279 | return false; |
| 280 | } |
| 281 | if (parsed_result.size() == 0) { |
| 282 | // out of tuples in CSV file |
| 283 | if (collection_index < collection.chunks.size()) { |
| 284 | error_message = StringUtil::Format("Too many tuples in result! Found %llu tuples, but expected %llu" , |
| 285 | collection.count, tuple_count); |
| 286 | return false; |
| 287 | } |
| 288 | return true; |
| 289 | } |
| 290 | if (collection_index >= collection.chunks.size()) { |
| 291 | // ran out of chunks in the collection, but there are still tuples in the result |
| 292 | // keep parsing the csv file to get the total expected count |
| 293 | while (parsed_result.size() > 0) { |
| 294 | tuple_count += parsed_result.size(); |
| 295 | parsed_result.Reset(); |
| 296 | reader.ParseCSV(parsed_result); |
| 297 | } |
| 298 | error_message = StringUtil::Format("Too few tuples in result! Found %llu tuples, but expected %llu" , |
| 299 | collection.count, tuple_count); |
| 300 | return false; |
| 301 | } |
| 302 | // same counts, compare tuples in chunks |
| 303 | if (!compare_chunk(*collection.chunks[collection_index], parsed_result)) { |
| 304 | error_message = show_diff(*collection.chunks[collection_index], parsed_result); |
| 305 | } |
| 306 | |
| 307 | collection_index++; |
| 308 | tuple_count += parsed_result.size(); |
| 309 | } |
| 310 | } |
| 311 | } // namespace duckdb |
| 312 | |