| 1 | #include "duckdb/execution/operator/persistent/physical_export.hpp" |
| 2 | |
| 3 | #include "duckdb/catalog/catalog.hpp" |
| 4 | #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp" |
| 5 | #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" |
| 6 | #include "duckdb/common/file_system.hpp" |
| 7 | #include "duckdb/common/string_util.hpp" |
| 8 | #include "duckdb/parallel/meta_pipeline.hpp" |
| 9 | #include "duckdb/parallel/pipeline.hpp" |
| 10 | #include "duckdb/parser/keyword_helper.hpp" |
| 11 | #include "duckdb/transaction/transaction.hpp" |
| 12 | |
| 13 | #include <algorithm> |
| 14 | #include <sstream> |
| 15 | |
| 16 | namespace duckdb { |
| 17 | |
| 18 | using std::stringstream; |
| 19 | |
| 20 | static void WriteCatalogEntries(stringstream &ss, vector<reference<CatalogEntry>> &entries) { |
| 21 | for (auto &entry : entries) { |
| 22 | if (entry.get().internal) { |
| 23 | continue; |
| 24 | } |
| 25 | ss << entry.get().ToSQL() << std::endl; |
| 26 | } |
| 27 | ss << std::endl; |
| 28 | } |
| 29 | |
| 30 | static void WriteStringStreamToFile(FileSystem &fs, stringstream &ss, const string &path) { |
| 31 | auto ss_string = ss.str(); |
| 32 | auto handle = fs.OpenFile(path, flags: FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW, |
| 33 | lock: FileLockType::WRITE_LOCK); |
| 34 | fs.Write(handle&: *handle, buffer: (void *)ss_string.c_str(), nr_bytes: ss_string.size()); |
| 35 | handle.reset(); |
| 36 | } |
| 37 | |
| 38 | static void WriteValueAsSQL(stringstream &ss, Value &val) { |
| 39 | if (val.type().IsNumeric()) { |
| 40 | ss << val.ToString(); |
| 41 | } else { |
| 42 | ss << "'" << val.ToString() << "'" ; |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | static void WriteCopyStatement(FileSystem &fs, stringstream &ss, CopyInfo &info, ExportedTableData &exported_table, |
| 47 | CopyFunction const &function) { |
| 48 | ss << "COPY " ; |
| 49 | |
| 50 | if (exported_table.schema_name != DEFAULT_SCHEMA) { |
| 51 | ss << KeywordHelper::WriteOptionallyQuoted(text: exported_table.schema_name) << "." ; |
| 52 | } |
| 53 | |
| 54 | ss << StringUtil::Format(fmt_str: "%s FROM %s (" , params: SQLIdentifier(exported_table.table_name), |
| 55 | params: SQLString(exported_table.file_path)); |
| 56 | |
| 57 | // write the copy options |
| 58 | ss << "FORMAT '" << info.format << "'" ; |
| 59 | if (info.format == "csv" ) { |
| 60 | // insert default csv options, if not specified |
| 61 | if (info.options.find(x: "header" ) == info.options.end()) { |
| 62 | info.options["header" ].push_back(x: Value::INTEGER(value: 0)); |
| 63 | } |
| 64 | if (info.options.find(x: "delimiter" ) == info.options.end() && info.options.find(x: "sep" ) == info.options.end() && |
| 65 | info.options.find(x: "delim" ) == info.options.end()) { |
| 66 | info.options["delimiter" ].push_back(x: Value("," )); |
| 67 | } |
| 68 | if (info.options.find(x: "quote" ) == info.options.end()) { |
| 69 | info.options["quote" ].push_back(x: Value("\"" )); |
| 70 | } |
| 71 | } |
| 72 | for (auto ©_option : info.options) { |
| 73 | if (copy_option.first == "force_quote" ) { |
| 74 | continue; |
| 75 | } |
| 76 | ss << ", " << copy_option.first << " " ; |
| 77 | if (copy_option.second.size() == 1) { |
| 78 | WriteValueAsSQL(ss, val&: copy_option.second[0]); |
| 79 | } else { |
| 80 | // FIXME handle multiple options |
| 81 | throw NotImplementedException("FIXME: serialize list of options" ); |
| 82 | } |
| 83 | } |
| 84 | ss << ");" << std::endl; |
| 85 | } |
| 86 | |
| 87 | //===--------------------------------------------------------------------===// |
| 88 | // Source |
| 89 | //===--------------------------------------------------------------------===// |
| 90 | class ExportSourceState : public GlobalSourceState { |
| 91 | public: |
| 92 | ExportSourceState() : finished(false) { |
| 93 | } |
| 94 | |
| 95 | bool finished; |
| 96 | }; |
| 97 | |
| 98 | unique_ptr<GlobalSourceState> PhysicalExport::GetGlobalSourceState(ClientContext &context) const { |
| 99 | return make_uniq<ExportSourceState>(); |
| 100 | } |
| 101 | |
| 102 | SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &chunk, |
| 103 | OperatorSourceInput &input) const { |
| 104 | auto &state = input.global_state.Cast<ExportSourceState>(); |
| 105 | if (state.finished) { |
| 106 | return SourceResultType::FINISHED; |
| 107 | } |
| 108 | |
| 109 | auto &ccontext = context.client; |
| 110 | auto &fs = FileSystem::GetFileSystem(context&: ccontext); |
| 111 | |
| 112 | // gather all catalog types to export |
| 113 | vector<reference<CatalogEntry>> schemas; |
| 114 | vector<reference<CatalogEntry>> custom_types; |
| 115 | vector<reference<CatalogEntry>> sequences; |
| 116 | vector<reference<CatalogEntry>> tables; |
| 117 | vector<reference<CatalogEntry>> views; |
| 118 | vector<reference<CatalogEntry>> indexes; |
| 119 | vector<reference<CatalogEntry>> macros; |
| 120 | |
| 121 | auto schema_list = Catalog::GetSchemas(context&: ccontext, catalog_name: info->catalog); |
| 122 | for (auto &schema_p : schema_list) { |
| 123 | auto &schema = schema_p.get(); |
| 124 | if (!schema.internal) { |
| 125 | schemas.push_back(x: schema); |
| 126 | } |
| 127 | schema.Scan(context&: context.client, type: CatalogType::TABLE_ENTRY, callback: [&](CatalogEntry &entry) { |
| 128 | if (entry.internal) { |
| 129 | return; |
| 130 | } |
| 131 | if (entry.type != CatalogType::TABLE_ENTRY) { |
| 132 | views.push_back(x: entry); |
| 133 | } |
| 134 | }); |
| 135 | schema.Scan(context&: context.client, type: CatalogType::SEQUENCE_ENTRY, |
| 136 | callback: [&](CatalogEntry &entry) { sequences.push_back(x: entry); }); |
| 137 | schema.Scan(context&: context.client, type: CatalogType::TYPE_ENTRY, |
| 138 | callback: [&](CatalogEntry &entry) { custom_types.push_back(x: entry); }); |
| 139 | schema.Scan(context&: context.client, type: CatalogType::INDEX_ENTRY, callback: [&](CatalogEntry &entry) { indexes.push_back(x: entry); }); |
| 140 | schema.Scan(context&: context.client, type: CatalogType::MACRO_ENTRY, callback: [&](CatalogEntry &entry) { |
| 141 | if (!entry.internal && entry.type == CatalogType::MACRO_ENTRY) { |
| 142 | macros.push_back(x: entry); |
| 143 | } |
| 144 | }); |
| 145 | schema.Scan(context&: context.client, type: CatalogType::TABLE_MACRO_ENTRY, callback: [&](CatalogEntry &entry) { |
| 146 | if (!entry.internal && entry.type == CatalogType::TABLE_MACRO_ENTRY) { |
| 147 | macros.push_back(x: entry); |
| 148 | } |
| 149 | }); |
| 150 | } |
| 151 | |
| 152 | // consider the order of tables because of foreign key constraint |
| 153 | for (idx_t i = 0; i < exported_tables.data.size(); i++) { |
| 154 | tables.push_back(x: exported_tables.data[i].entry); |
| 155 | } |
| 156 | |
| 157 | // order macro's by timestamp so nested macro's are imported nicely |
| 158 | sort(first: macros.begin(), last: macros.end(), comp: [](const reference<CatalogEntry> &lhs, const reference<CatalogEntry> &rhs) { |
| 159 | return lhs.get().oid < rhs.get().oid; |
| 160 | }); |
| 161 | |
| 162 | // write the schema.sql file |
| 163 | // export order is SCHEMA -> SEQUENCE -> TABLE -> VIEW -> INDEX |
| 164 | |
| 165 | stringstream ss; |
| 166 | WriteCatalogEntries(ss, entries&: schemas); |
| 167 | WriteCatalogEntries(ss, entries&: custom_types); |
| 168 | WriteCatalogEntries(ss, entries&: sequences); |
| 169 | WriteCatalogEntries(ss, entries&: tables); |
| 170 | WriteCatalogEntries(ss, entries&: views); |
| 171 | WriteCatalogEntries(ss, entries&: indexes); |
| 172 | WriteCatalogEntries(ss, entries&: macros); |
| 173 | |
| 174 | WriteStringStreamToFile(fs, ss, path: fs.JoinPath(a: info->file_path, path: "schema.sql" )); |
| 175 | |
| 176 | // write the load.sql file |
| 177 | // for every table, we write COPY INTO statement with the specified options |
| 178 | stringstream load_ss; |
| 179 | for (idx_t i = 0; i < exported_tables.data.size(); i++) { |
| 180 | auto exported_table_info = exported_tables.data[i].table_data; |
| 181 | WriteCopyStatement(fs, ss&: load_ss, info&: *info, exported_table&: exported_table_info, function); |
| 182 | } |
| 183 | WriteStringStreamToFile(fs, ss&: load_ss, path: fs.JoinPath(a: info->file_path, path: "load.sql" )); |
| 184 | state.finished = true; |
| 185 | |
| 186 | return SourceResultType::FINISHED; |
| 187 | } |
| 188 | |
| 189 | //===--------------------------------------------------------------------===// |
| 190 | // Sink |
| 191 | //===--------------------------------------------------------------------===// |
| 192 | SinkResultType PhysicalExport::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const { |
| 193 | // nop |
| 194 | return SinkResultType::NEED_MORE_INPUT; |
| 195 | } |
| 196 | |
| 197 | //===--------------------------------------------------------------------===// |
| 198 | // Pipeline Construction |
| 199 | //===--------------------------------------------------------------------===// |
| 200 | void PhysicalExport::BuildPipelines(Pipeline ¤t, MetaPipeline &meta_pipeline) { |
| 201 | // EXPORT has an optional child |
| 202 | // we only need to schedule child pipelines if there is a child |
| 203 | auto &state = meta_pipeline.GetState(); |
| 204 | state.SetPipelineSource(pipeline&: current, op&: *this); |
| 205 | if (children.empty()) { |
| 206 | return; |
| 207 | } |
| 208 | PhysicalOperator::BuildPipelines(current, meta_pipeline); |
| 209 | } |
| 210 | |
| 211 | vector<const_reference<PhysicalOperator>> PhysicalExport::GetSources() const { |
| 212 | return {*this}; |
| 213 | } |
| 214 | |
| 215 | } // namespace duckdb |
| 216 | |