| 1 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
| 2 | #include "duckdb/execution/expression_executor.hpp" |
| 3 | #include "duckdb/planner/expression/bound_comparison_expression.hpp" |
| 4 | #include "duckdb/common/operator/comparison_operators.hpp" |
| 5 | #include "duckdb/common/vector_operations/binary_executor.hpp" |
| 6 | |
| 7 | #include <algorithm> |
| 8 | |
| 9 | namespace duckdb { |
| 10 | |
| 11 | unique_ptr<ExpressionState> ExpressionExecutor::InitializeState(const BoundComparisonExpression &expr, |
| 12 | ExpressionExecutorState &root) { |
| 13 | auto result = make_uniq<ExpressionState>(args: expr, args&: root); |
| 14 | result->AddChild(expr: expr.left.get()); |
| 15 | result->AddChild(expr: expr.right.get()); |
| 16 | result->Finalize(); |
| 17 | return result; |
| 18 | } |
| 19 | |
| 20 | void ExpressionExecutor::Execute(const BoundComparisonExpression &expr, ExpressionState *state, |
| 21 | const SelectionVector *sel, idx_t count, Vector &result) { |
| 22 | // resolve the children |
| 23 | state->intermediate_chunk.Reset(); |
| 24 | auto &left = state->intermediate_chunk.data[0]; |
| 25 | auto &right = state->intermediate_chunk.data[1]; |
| 26 | |
| 27 | Execute(expr: *expr.left, state: state->child_states[0].get(), sel, count, result&: left); |
| 28 | Execute(expr: *expr.right, state: state->child_states[1].get(), sel, count, result&: right); |
| 29 | |
| 30 | switch (expr.type) { |
| 31 | case ExpressionType::COMPARE_EQUAL: |
| 32 | VectorOperations::Equals(left, right, result, count); |
| 33 | break; |
| 34 | case ExpressionType::COMPARE_NOTEQUAL: |
| 35 | VectorOperations::NotEquals(left, right, result, count); |
| 36 | break; |
| 37 | case ExpressionType::COMPARE_LESSTHAN: |
| 38 | VectorOperations::LessThan(left, right, result, count); |
| 39 | break; |
| 40 | case ExpressionType::COMPARE_GREATERTHAN: |
| 41 | VectorOperations::GreaterThan(left, right, result, count); |
| 42 | break; |
| 43 | case ExpressionType::COMPARE_LESSTHANOREQUALTO: |
| 44 | VectorOperations::LessThanEquals(left, right, result, count); |
| 45 | break; |
| 46 | case ExpressionType::COMPARE_GREATERTHANOREQUALTO: |
| 47 | VectorOperations::GreaterThanEquals(left, right, result, count); |
| 48 | break; |
| 49 | case ExpressionType::COMPARE_DISTINCT_FROM: |
| 50 | VectorOperations::DistinctFrom(left, right, result, count); |
| 51 | break; |
| 52 | case ExpressionType::COMPARE_NOT_DISTINCT_FROM: |
| 53 | VectorOperations::NotDistinctFrom(left, right, result, count); |
| 54 | break; |
| 55 | default: |
| 56 | throw InternalException("Unknown comparison type!" ); |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | template <typename OP> |
| 61 | static idx_t NestedSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
| 62 | SelectionVector *true_sel, SelectionVector *false_sel); |
| 63 | |
| 64 | template <class OP> |
| 65 | static idx_t TemplatedSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
| 66 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 67 | // the inplace loops take the result as the last parameter |
| 68 | switch (left.GetType().InternalType()) { |
| 69 | case PhysicalType::BOOL: |
| 70 | case PhysicalType::INT8: |
| 71 | return BinaryExecutor::Select<int8_t, int8_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 72 | case PhysicalType::INT16: |
| 73 | return BinaryExecutor::Select<int16_t, int16_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 74 | case PhysicalType::INT32: |
| 75 | return BinaryExecutor::Select<int32_t, int32_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 76 | case PhysicalType::INT64: |
| 77 | return BinaryExecutor::Select<int64_t, int64_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 78 | case PhysicalType::UINT8: |
| 79 | return BinaryExecutor::Select<uint8_t, uint8_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 80 | case PhysicalType::UINT16: |
| 81 | return BinaryExecutor::Select<uint16_t, uint16_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 82 | case PhysicalType::UINT32: |
| 83 | return BinaryExecutor::Select<uint32_t, uint32_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 84 | case PhysicalType::UINT64: |
| 85 | return BinaryExecutor::Select<uint64_t, uint64_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 86 | case PhysicalType::INT128: |
| 87 | return BinaryExecutor::Select<hugeint_t, hugeint_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 88 | case PhysicalType::FLOAT: |
| 89 | return BinaryExecutor::Select<float, float, OP>(left, right, sel, count, true_sel, false_sel); |
| 90 | case PhysicalType::DOUBLE: |
| 91 | return BinaryExecutor::Select<double, double, OP>(left, right, sel, count, true_sel, false_sel); |
| 92 | case PhysicalType::INTERVAL: |
| 93 | return BinaryExecutor::Select<interval_t, interval_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 94 | case PhysicalType::VARCHAR: |
| 95 | return BinaryExecutor::Select<string_t, string_t, OP>(left, right, sel, count, true_sel, false_sel); |
| 96 | case PhysicalType::LIST: |
| 97 | case PhysicalType::STRUCT: |
| 98 | return NestedSelectOperation<OP>(left, right, sel, count, true_sel, false_sel); |
| 99 | default: |
| 100 | throw InternalException("Invalid type for comparison" ); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | struct NestedSelector { |
| 105 | // Select the matching rows for the values of a nested type that are not both NULL. |
| 106 | // Those semantics are the same as the corresponding non-distinct comparator |
| 107 | template <typename OP> |
| 108 | static idx_t Select(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, SelectionVector *true_sel, |
| 109 | SelectionVector *false_sel) { |
| 110 | throw InvalidTypeException(left.GetType(), "Invalid operation for nested SELECT" ); |
| 111 | } |
| 112 | }; |
| 113 | |
| 114 | template <> |
| 115 | idx_t NestedSelector::Select<duckdb::Equals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
| 116 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 117 | return VectorOperations::NestedEquals(left, right, sel, count, true_sel, false_sel); |
| 118 | } |
| 119 | |
| 120 | template <> |
| 121 | idx_t NestedSelector::Select<duckdb::NotEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
| 122 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 123 | return VectorOperations::NestedNotEquals(left, right, sel, count, true_sel, false_sel); |
| 124 | } |
| 125 | |
| 126 | template <> |
| 127 | idx_t NestedSelector::Select<duckdb::LessThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
| 128 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 129 | return VectorOperations::DistinctLessThan(left, right, sel: &sel, count, true_sel, false_sel); |
| 130 | } |
| 131 | |
| 132 | template <> |
| 133 | idx_t NestedSelector::Select<duckdb::LessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, |
| 134 | idx_t count, SelectionVector *true_sel, |
| 135 | SelectionVector *false_sel) { |
| 136 | return VectorOperations::DistinctLessThanEquals(left, right, sel: &sel, count, true_sel, false_sel); |
| 137 | } |
| 138 | |
| 139 | template <> |
| 140 | idx_t NestedSelector::Select<duckdb::GreaterThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
| 141 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 142 | return VectorOperations::DistinctGreaterThan(left, right, sel: &sel, count, true_sel, false_sel); |
| 143 | } |
| 144 | |
| 145 | template <> |
| 146 | idx_t NestedSelector::Select<duckdb::GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, |
| 147 | idx_t count, SelectionVector *true_sel, |
| 148 | SelectionVector *false_sel) { |
| 149 | return VectorOperations::DistinctGreaterThanEquals(left, right, sel: &sel, count, true_sel, false_sel); |
| 150 | } |
| 151 | |
| 152 | static inline idx_t SelectNotNull(Vector &left, Vector &right, const idx_t count, const SelectionVector &sel, |
| 153 | SelectionVector &maybe_vec, OptionalSelection &false_opt) { |
| 154 | |
| 155 | UnifiedVectorFormat lvdata, rvdata; |
| 156 | left.ToUnifiedFormat(count, data&: lvdata); |
| 157 | right.ToUnifiedFormat(count, data&: rvdata); |
| 158 | |
| 159 | auto &lmask = lvdata.validity; |
| 160 | auto &rmask = rvdata.validity; |
| 161 | |
| 162 | // For top-level comparisons, NULL semantics are in effect, |
| 163 | // so filter out any NULLs |
| 164 | idx_t remaining = 0; |
| 165 | if (lmask.AllValid() && rmask.AllValid()) { |
| 166 | // None are NULL, distinguish values. |
| 167 | for (idx_t i = 0; i < count; ++i) { |
| 168 | const auto idx = sel.get_index(idx: i); |
| 169 | maybe_vec.set_index(idx: remaining++, loc: idx); |
| 170 | } |
| 171 | return remaining; |
| 172 | } |
| 173 | |
| 174 | // Slice the Vectors down to the rows that are not determined (i.e., neither is NULL) |
| 175 | SelectionVector slicer(count); |
| 176 | idx_t false_count = 0; |
| 177 | for (idx_t i = 0; i < count; ++i) { |
| 178 | const auto result_idx = sel.get_index(idx: i); |
| 179 | const auto lidx = lvdata.sel->get_index(idx: i); |
| 180 | const auto ridx = rvdata.sel->get_index(idx: i); |
| 181 | if (!lmask.RowIsValid(row_idx: lidx) || !rmask.RowIsValid(row_idx: ridx)) { |
| 182 | false_opt.Append(count&: false_count, idx: result_idx); |
| 183 | } else { |
| 184 | // Neither is NULL, distinguish values. |
| 185 | slicer.set_index(idx: remaining, loc: i); |
| 186 | maybe_vec.set_index(idx: remaining++, loc: result_idx); |
| 187 | } |
| 188 | } |
| 189 | false_opt.Advance(completed: false_count); |
| 190 | |
| 191 | if (remaining && remaining < count) { |
| 192 | left.Slice(sel: slicer, count: remaining); |
| 193 | right.Slice(sel: slicer, count: remaining); |
| 194 | } |
| 195 | |
| 196 | return remaining; |
| 197 | } |
| 198 | |
| 199 | static void ScatterSelection(SelectionVector *target, const idx_t count, const SelectionVector &dense_vec) { |
| 200 | if (target) { |
| 201 | for (idx_t i = 0; i < count; ++i) { |
| 202 | target->set_index(idx: i, loc: dense_vec.get_index(idx: i)); |
| 203 | } |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | template <typename OP> |
| 208 | static idx_t NestedSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
| 209 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 210 | // The Select operations all use a dense pair of input vectors to partition |
| 211 | // a selection vector in a single pass. But to implement progressive comparisons, |
| 212 | // we have to make multiple passes, so we need to keep track of the original input positions |
| 213 | // and then scatter the output selections when we are done. |
| 214 | if (!sel) { |
| 215 | sel = FlatVector::IncrementalSelectionVector(); |
| 216 | } |
| 217 | |
| 218 | // Make buffered selections for progressive comparisons |
| 219 | // TODO: Remove unnecessary allocations |
| 220 | SelectionVector true_vec(count); |
| 221 | OptionalSelection true_opt(&true_vec); |
| 222 | |
| 223 | SelectionVector false_vec(count); |
| 224 | OptionalSelection false_opt(&false_vec); |
| 225 | |
| 226 | SelectionVector maybe_vec(count); |
| 227 | |
| 228 | // Handle NULL nested values |
| 229 | Vector l_not_null(left); |
| 230 | Vector r_not_null(right); |
| 231 | |
| 232 | auto match_count = SelectNotNull(left&: l_not_null, right&: r_not_null, count, sel: *sel, maybe_vec, false_opt); |
| 233 | auto no_match_count = count - match_count; |
| 234 | count = match_count; |
| 235 | |
| 236 | // Now that we have handled the NULLs, we can use the recursive nested comparator for the rest. |
| 237 | match_count = NestedSelector::Select<OP>(l_not_null, r_not_null, maybe_vec, count, true_opt, false_opt); |
| 238 | no_match_count += (count - match_count); |
| 239 | |
| 240 | // Copy the buffered selections to the output selections |
| 241 | ScatterSelection(target: true_sel, count: match_count, dense_vec: true_vec); |
| 242 | ScatterSelection(target: false_sel, count: no_match_count, dense_vec: false_vec); |
| 243 | |
| 244 | return match_count; |
| 245 | } |
| 246 | |
| 247 | idx_t VectorOperations::Equals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
| 248 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 249 | return TemplatedSelectOperation<duckdb::Equals>(left, right, sel, count, true_sel, false_sel); |
| 250 | } |
| 251 | |
| 252 | idx_t VectorOperations::NotEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
| 253 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 254 | return TemplatedSelectOperation<duckdb::NotEquals>(left, right, sel, count, true_sel, false_sel); |
| 255 | } |
| 256 | |
| 257 | idx_t VectorOperations::GreaterThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
| 258 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 259 | return TemplatedSelectOperation<duckdb::GreaterThan>(left, right, sel, count, true_sel, false_sel); |
| 260 | } |
| 261 | |
| 262 | idx_t VectorOperations::GreaterThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
| 263 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 264 | return TemplatedSelectOperation<duckdb::GreaterThanEquals>(left, right, sel, count, true_sel, false_sel); |
| 265 | } |
| 266 | |
| 267 | idx_t VectorOperations::LessThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
| 268 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 269 | return TemplatedSelectOperation<duckdb::GreaterThan>(left&: right, right&: left, sel, count, true_sel, false_sel); |
| 270 | } |
| 271 | |
| 272 | idx_t VectorOperations::LessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
| 273 | SelectionVector *true_sel, SelectionVector *false_sel) { |
| 274 | return TemplatedSelectOperation<duckdb::GreaterThanEquals>(left&: right, right&: left, sel, count, true_sel, false_sel); |
| 275 | } |
| 276 | |
| 277 | idx_t ExpressionExecutor::Select(const BoundComparisonExpression &expr, ExpressionState *state, |
| 278 | const SelectionVector *sel, idx_t count, SelectionVector *true_sel, |
| 279 | SelectionVector *false_sel) { |
| 280 | // resolve the children |
| 281 | state->intermediate_chunk.Reset(); |
| 282 | auto &left = state->intermediate_chunk.data[0]; |
| 283 | auto &right = state->intermediate_chunk.data[1]; |
| 284 | |
| 285 | Execute(expr: *expr.left, state: state->child_states[0].get(), sel, count, result&: left); |
| 286 | Execute(expr: *expr.right, state: state->child_states[1].get(), sel, count, result&: right); |
| 287 | |
| 288 | switch (expr.type) { |
| 289 | case ExpressionType::COMPARE_EQUAL: |
| 290 | return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel); |
| 291 | case ExpressionType::COMPARE_NOTEQUAL: |
| 292 | return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel); |
| 293 | case ExpressionType::COMPARE_LESSTHAN: |
| 294 | return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel); |
| 295 | case ExpressionType::COMPARE_GREATERTHAN: |
| 296 | return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel); |
| 297 | case ExpressionType::COMPARE_LESSTHANOREQUALTO: |
| 298 | return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel); |
| 299 | case ExpressionType::COMPARE_GREATERTHANOREQUALTO: |
| 300 | return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel); |
| 301 | case ExpressionType::COMPARE_DISTINCT_FROM: |
| 302 | return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, false_sel); |
| 303 | case ExpressionType::COMPARE_NOT_DISTINCT_FROM: |
| 304 | return VectorOperations::NotDistinctFrom(left, right, sel, count, true_sel, false_sel); |
| 305 | default: |
| 306 | throw InternalException("Unknown comparison type!" ); |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | } // namespace duckdb |
| 311 | |