| 1 | #include <iostream> |
| 2 | |
| 3 | #include <boost/program_options.hpp> |
| 4 | #include <boost/algorithm/string/predicate.hpp> |
| 5 | |
| 6 | #include <Compression/CompressedWriteBuffer.h> |
| 7 | #include <Compression/CompressedReadBuffer.h> |
| 8 | #include <IO/WriteHelpers.h> |
| 9 | #include <IO/Operators.h> |
| 10 | #include <IO/ReadBufferFromFile.h> |
| 11 | #include <IO/ReadHelpers.h> |
| 12 | #include <IO/WriteBufferFromFileDescriptor.h> |
| 13 | #include <Compression/CompressedReadBufferFromFile.h> |
| 14 | |
| 15 | |
| 16 | /** This program checks correctness of .mrk (marks) file for corresponding compressed .bin file. |
| 17 | */ |
| 18 | |
| 19 | |
| 20 | namespace DB |
| 21 | { |
| 22 | namespace ErrorCodes |
| 23 | { |
| 24 | extern const int TOO_LARGE_SIZE_COMPRESSED; |
| 25 | } |
| 26 | } |
| 27 | |
| 28 | |
| 29 | /// Read and check header of compressed block. Print size of decompressed and compressed data. |
| 30 | std::pair<UInt32, UInt32> stat(DB::ReadBuffer & in, DB::WriteBuffer & out) |
| 31 | { |
| 32 | if (in.eof()) |
| 33 | return {}; |
| 34 | |
| 35 | in.ignore(16); /// checksum |
| 36 | |
| 37 | char [COMPRESSED_BLOCK_HEADER_SIZE]; |
| 38 | in.readStrict(header, COMPRESSED_BLOCK_HEADER_SIZE); |
| 39 | |
| 40 | UInt32 size_compressed = unalignedLoad<UInt32>(&header[1]); |
| 41 | |
| 42 | if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) |
| 43 | throw DB::Exception("Too large size_compressed. Most likely corrupted data." , DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); |
| 44 | |
| 45 | UInt32 size_decompressed = unalignedLoad<UInt32>(&header[5]); |
| 46 | |
| 47 | return {size_compressed, size_decompressed}; |
| 48 | } |
| 49 | |
| 50 | |
| 51 | void (const std::string & mrk_path, const std::string & bin_path) |
| 52 | { |
| 53 | DB::ReadBufferFromFile mrk_in(mrk_path); |
| 54 | DB::ReadBufferFromFile bin_in(bin_path, 4096); /// Small buffer size just to check header of compressed block. |
| 55 | |
| 56 | DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); |
| 57 | |
| 58 | for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num) |
| 59 | { |
| 60 | UInt64 offset_in_compressed_file = 0; |
| 61 | UInt64 offset_in_decompressed_block = 0; |
| 62 | |
| 63 | DB::readBinary(offset_in_compressed_file, mrk_in); |
| 64 | DB::readBinary(offset_in_decompressed_block, mrk_in); |
| 65 | |
| 66 | out << "Mark " << mark_num << ", points to " << offset_in_compressed_file << ", " << offset_in_decompressed_block << ". " ; |
| 67 | |
| 68 | bin_in.seek(offset_in_compressed_file); |
| 69 | auto sizes = stat(bin_in, out); |
| 70 | |
| 71 | out << "Block sizes: " << sizes.first << ", " << sizes.second << '\n' << DB::flush; |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | |
| 76 | void checkByCompressedReadBuffer(const std::string & mrk_path, const std::string & bin_path) |
| 77 | { |
| 78 | DB::ReadBufferFromFile mrk_in(mrk_path); |
| 79 | DB::CompressedReadBufferFromFile bin_in(bin_path, 0, 0); |
| 80 | |
| 81 | DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); |
| 82 | bool mrk2_format = boost::algorithm::ends_with(mrk_path, ".mrk2" ); |
| 83 | |
| 84 | for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num) |
| 85 | { |
| 86 | UInt64 offset_in_compressed_file = 0; |
| 87 | UInt64 offset_in_decompressed_block = 0; |
| 88 | UInt64 index_granularity_rows = 0; |
| 89 | |
| 90 | DB::readBinary(offset_in_compressed_file, mrk_in); |
| 91 | DB::readBinary(offset_in_decompressed_block, mrk_in); |
| 92 | |
| 93 | out << "Mark " << mark_num << ", points to " << offset_in_compressed_file << ", " << offset_in_decompressed_block; |
| 94 | |
| 95 | if (mrk2_format) |
| 96 | { |
| 97 | DB::readBinary(index_granularity_rows, mrk_in); |
| 98 | |
| 99 | out << ", has rows after " << index_granularity_rows; |
| 100 | } |
| 101 | |
| 102 | out << ".\n" << DB::flush; |
| 103 | |
| 104 | bin_in.seek(offset_in_compressed_file, offset_in_decompressed_block); |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | |
| 109 | int main(int argc, char ** argv) |
| 110 | { |
| 111 | boost::program_options::options_description desc("Allowed options" ); |
| 112 | desc.add_options() |
| 113 | ("help,h" , "produce help message" ) |
| 114 | ; |
| 115 | |
| 116 | boost::program_options::variables_map options; |
| 117 | boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); |
| 118 | |
| 119 | if (options.count("help" ) || argc != 3) |
| 120 | { |
| 121 | std::cout << "Usage: " << argv[0] << " file.mrk file.bin" << std::endl; |
| 122 | std::cout << desc << std::endl; |
| 123 | return 1; |
| 124 | } |
| 125 | |
| 126 | try |
| 127 | { |
| 128 | /// checkCompressedHeaders(argv[1], argv[2]); |
| 129 | checkByCompressedReadBuffer(argv[1], argv[2]); |
| 130 | } |
| 131 | catch (const DB::Exception & e) |
| 132 | { |
| 133 | std::cerr << e.what() << ", " << e.message() << std::endl |
| 134 | << std::endl |
| 135 | << "Stack trace:" << std::endl |
| 136 | << e.getStackTrace().toString() |
| 137 | << std::endl; |
| 138 | throw; |
| 139 | } |
| 140 | |
| 141 | return 0; |
| 142 | } |
| 143 | |