/* Copyright 2018-2025 Stichting DuckDB Foundation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #define DUCKDB_AMALGAMATION 1 #define DUCKDB_SOURCE_ID "d1dc88f950" #define DUCKDB_VERSION "v1.4.3" #define DUCKDB_MAJOR_VERSION 1 #define DUCKDB_MINOR_VERSION 4 #define DUCKDB_PATCH_VERSION "3" //===----------------------------------------------------------------------===// // DuckDB // // duckdb.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/connection.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/profiler_format.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/constants.hpp // // //===----------------------------------------------------------------------===// #include //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/string.hpp // // //===----------------------------------------------------------------------===// #include #define DUCKDB_BASE_STD namespace duckdb_base_std { using ::std::basic_stringstream; using ::std::stringstream; using ::std::wstringstream; } // namespace duckdb_base_std #include #include namespace duckdb { using std::string; } // namespace duckdb namespace duckdb { template , class Allocator = std::allocator> class basic_stringstream : public duckdb_base_std::basic_stringstream { public: using original = duckdb_base_std::basic_stringstream; explicit basic_stringstream(std::ios_base::openmode which = std::ios_base::out | std::ios_base::in) : original(which) { this->imbue(std::locale::classic()); } explicit basic_stringstream(const std::basic_string &s, std::ios_base::openmode which = std::ios_base::out | std::ios_base::in) : original(s, which) { this->imbue(std::locale::classic()); } basic_stringstream(const basic_stringstream &) = delete; basic_stringstream(basic_stringstream &&rhs) noexcept; }; typedef basic_stringstream stringstream; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/winapi.hpp // // //===----------------------------------------------------------------------===// #ifndef DUCKDB_API #if defined(_WIN32) && !defined(__MINGW32__) #ifdef DUCKDB_STATIC_BUILD #define DUCKDB_API #else #if defined(DUCKDB_BUILD_LIBRARY) && !defined(DUCKDB_BUILD_LOADABLE_EXTENSION) #define DUCKDB_API __declspec(dllexport) #else #define DUCKDB_API __declspec(dllimport) #endif #endif #else #define DUCKDB_API #endif #endif #ifndef DUCKDB_EXTENSION_API #ifdef _WIN32 #ifdef DUCKDB_STATIC_BUILD #define DUCKDB_EXTENSION_API #else #define DUCKDB_EXTENSION_API __declspec(dllexport) #endif #else #define DUCKDB_EXTENSION_API __attribute__((visibility("default"))) #endif #endif //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/exception.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/assert.hpp // // //===----------------------------------------------------------------------===// // clang-format off #if ( \ /* Not a debug build */ \ !defined(DEBUG) && \ /* FORCE_ASSERT is not set (enables assertions even on release mode when set to true) */ \ !defined(DUCKDB_FORCE_ASSERT) && \ /* The project is not compiled for Microsoft Visual Studio */ \ !defined(__MVS__) \ ) // clang-format on //! On most builds, NDEBUG is defined, turning the assert call into a NO-OP //! Only the 'else' condition is supposed to check the assertions #include #define D_ASSERT assert namespace duckdb { DUCKDB_API void DuckDBAssertInternal(bool condition, const char *condition_name, const char *file, int linenr); } #else namespace duckdb { DUCKDB_API void DuckDBAssertInternal(bool condition, const char *condition_name, const char *file, int linenr); } #define D_ASSERT(condition) duckdb::DuckDBAssertInternal(bool(condition), #condition, __FILE__, __LINE__) #define D_ASSERT_IS_ENABLED #endif //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/exception_format_value.hpp // // //===----------------------------------------------------------------------===// #include //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/typedefs.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { //! a saner size_t for loop indices etc typedef uint64_t idx_t; //! The type used for row identifiers typedef int64_t row_t; //! The type used for hashes typedef uint64_t hash_t; //! data pointers typedef uint8_t data_t; typedef data_t *data_ptr_t; typedef const data_t *const_data_ptr_t; //! Type used for the selection vector typedef uint32_t sel_t; //! Type used for transaction timestamps typedef idx_t transaction_t; //! Type used to identify connections typedef idx_t connection_t; //! Type used for column identifiers typedef idx_t column_t; //! Type used for storage (column) identifiers typedef idx_t storage_t; template data_ptr_t data_ptr_cast(SRC *src) { // NOLINT: naming return reinterpret_cast(src); } template const_data_ptr_t const_data_ptr_cast(const SRC *src) { // NOLINT: naming return reinterpret_cast(src); } template char *char_ptr_cast(SRC *src) { // NOLINT: naming return reinterpret_cast(src); } template const char *const_char_ptr_cast(const SRC *src) { // NOLINT: naming return reinterpret_cast(src); } template const unsigned char *const_uchar_ptr_cast(const SRC *src) { // NOLINT: naming return reinterpret_cast(src); } template uintptr_t CastPointerToValue(SRC *src) { return reinterpret_cast(src); } template uint64_t cast_pointer_to_uint64(SRC *src) { return static_cast(reinterpret_cast(src)); } template SRC *cast_uint64_to_pointer(uint64_t value) { return reinterpret_cast(static_cast(value)); } } // namespace duckdb namespace duckdb { // Forward declaration to allow conversion between hugeint and uhugeint struct uhugeint_t; // NOLINT: use numeric casing struct hugeint_t { // NOLINT: use numeric casing public: uint64_t lower; int64_t upper; public: hugeint_t() = default; DUCKDB_API hugeint_t(int64_t value); // NOLINT: Allow implicit conversion from `int64_t` constexpr hugeint_t(int64_t upper, uint64_t lower) : lower(lower), upper(upper) { } constexpr hugeint_t(const hugeint_t &rhs) = default; constexpr hugeint_t(hugeint_t &&rhs) = default; hugeint_t &operator=(const hugeint_t &rhs) = default; hugeint_t &operator=(hugeint_t &&rhs) = default; DUCKDB_API string ToString() const; // comparison operators DUCKDB_API bool operator==(const hugeint_t &rhs) const; DUCKDB_API bool operator!=(const hugeint_t &rhs) const; DUCKDB_API bool operator<=(const hugeint_t &rhs) const; DUCKDB_API bool operator<(const hugeint_t &rhs) const; DUCKDB_API bool operator>(const hugeint_t &rhs) const; DUCKDB_API bool operator>=(const hugeint_t &rhs) const; // arithmetic operators DUCKDB_API hugeint_t operator+(const hugeint_t &rhs) const; DUCKDB_API hugeint_t operator-(const hugeint_t &rhs) const; DUCKDB_API hugeint_t operator*(const hugeint_t &rhs) const; DUCKDB_API hugeint_t operator/(const hugeint_t &rhs) const; DUCKDB_API hugeint_t operator%(const hugeint_t &rhs) const; DUCKDB_API hugeint_t operator-() const; // bitwise operators DUCKDB_API hugeint_t operator>>(const hugeint_t &rhs) const; DUCKDB_API hugeint_t operator<<(const hugeint_t &rhs) const; DUCKDB_API hugeint_t operator&(const hugeint_t &rhs) const; DUCKDB_API hugeint_t operator|(const hugeint_t &rhs) const; DUCKDB_API hugeint_t operator^(const hugeint_t &rhs) const; DUCKDB_API hugeint_t operator~() const; // in-place operators DUCKDB_API hugeint_t &operator+=(const hugeint_t &rhs); DUCKDB_API hugeint_t &operator-=(const hugeint_t &rhs); DUCKDB_API hugeint_t &operator*=(const hugeint_t &rhs); DUCKDB_API hugeint_t &operator/=(const hugeint_t &rhs); DUCKDB_API hugeint_t &operator%=(const hugeint_t &rhs); DUCKDB_API hugeint_t &operator>>=(const hugeint_t &rhs); DUCKDB_API hugeint_t &operator<<=(const hugeint_t &rhs); DUCKDB_API hugeint_t &operator&=(const hugeint_t &rhs); DUCKDB_API hugeint_t &operator|=(const hugeint_t &rhs); DUCKDB_API hugeint_t &operator^=(const hugeint_t &rhs); // boolean operators DUCKDB_API explicit operator bool() const; DUCKDB_API bool operator!() const; // cast operators -- doesn't check bounds/overflow/underflow DUCKDB_API explicit operator uint8_t() const; DUCKDB_API explicit operator uint16_t() const; DUCKDB_API explicit operator uint32_t() const; DUCKDB_API explicit operator uint64_t() const; DUCKDB_API explicit operator int8_t() const; DUCKDB_API explicit operator int16_t() const; DUCKDB_API explicit operator int32_t() const; DUCKDB_API explicit operator int64_t() const; DUCKDB_API explicit operator uhugeint_t() const; }; } // namespace duckdb namespace std { template <> struct hash { size_t operator()(const duckdb::hugeint_t &val) const { using std::hash; return hash {}(val.upper) ^ hash {}(val.lower); } }; } // namespace std #include namespace duckdb { class String; // Helper class to support custom overloading // Escaping " and quoting the value with " class SQLIdentifier { public: explicit SQLIdentifier(const string &raw_string) : raw_string(raw_string) { } public: string raw_string; }; // Helper class to support custom overloading // Escaping ' and quoting the value with ' class SQLString { public: explicit SQLString(const string &raw_string) : raw_string(raw_string) { } public: string raw_string; }; enum class PhysicalType : uint8_t; struct LogicalType; enum class ExceptionFormatValueType : uint8_t { FORMAT_VALUE_TYPE_DOUBLE, FORMAT_VALUE_TYPE_INTEGER, FORMAT_VALUE_TYPE_STRING }; struct ExceptionFormatValue { DUCKDB_API ExceptionFormatValue(double dbl_val); // NOLINT DUCKDB_API ExceptionFormatValue(int64_t int_val); // NOLINT DUCKDB_API ExceptionFormatValue(idx_t uint_val); // NOLINT DUCKDB_API ExceptionFormatValue(string str_val); // NOLINT DUCKDB_API ExceptionFormatValue(String str_val); // NOLINT DUCKDB_API ExceptionFormatValue(hugeint_t hg_val); // NOLINT DUCKDB_API ExceptionFormatValue(uhugeint_t uhg_val); // NOLINT ExceptionFormatValueType type; double dbl_val = 0; hugeint_t int_val = 0; string str_val; public: template static ExceptionFormatValue CreateFormatValue(T value) { return int64_t(value); } static string Format(const string &msg, std::vector &values); }; template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(PhysicalType value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(SQLString value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(SQLIdentifier value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(LogicalType value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(float value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(double value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(string value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(String value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(const char *value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(char *value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(idx_t value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(hugeint_t value); template <> DUCKDB_API ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(uhugeint_t value); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/unordered_map.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::unordered_map; } #include #include namespace duckdb { enum class PhysicalType : uint8_t; struct LogicalType; class Expression; class ParsedExpression; class QueryErrorContext; class TableRef; struct hugeint_t; class optional_idx; // NOLINT: matching std style inline void AssertRestrictFunction(const void *left_start, const void *left_end, const void *right_start, const void *right_end, const char *fname, int linenr) { // assert that the two pointers do not overlap #ifdef DEBUG if (!(left_end <= right_start || right_end <= left_start)) { printf("ASSERT RESTRICT FAILED: %s:%d\n", fname, linenr); D_ASSERT(0); } #endif } #define ASSERT_RESTRICT(left_start, left_end, right_start, right_end) \ AssertRestrictFunction(left_start, left_end, right_start, right_end, __FILE__, __LINE__) //===--------------------------------------------------------------------===// // Exception Types //===--------------------------------------------------------------------===// enum class ExceptionType : uint8_t { INVALID = 0, // invalid type OUT_OF_RANGE = 1, // value out of range error CONVERSION = 2, // conversion/casting error UNKNOWN_TYPE = 3, // unknown type DECIMAL = 4, // decimal related MISMATCH_TYPE = 5, // type mismatch DIVIDE_BY_ZERO = 6, // divide by 0 OBJECT_SIZE = 7, // object size exceeded INVALID_TYPE = 8, // incompatible for operation SERIALIZATION = 9, // serialization TRANSACTION = 10, // transaction management NOT_IMPLEMENTED = 11, // method not implemented EXPRESSION = 12, // expression parsing CATALOG = 13, // catalog related PARSER = 14, // parser related PLANNER = 15, // planner related SCHEDULER = 16, // scheduler related EXECUTOR = 17, // executor related CONSTRAINT = 18, // constraint related INDEX = 19, // index related STAT = 20, // stat related CONNECTION = 21, // connection related SYNTAX = 22, // syntax related SETTINGS = 23, // settings related BINDER = 24, // binder related NETWORK = 25, // network related OPTIMIZER = 26, // optimizer related NULL_POINTER = 27, // nullptr exception IO = 28, // IO exception INTERRUPT = 29, // interrupt FATAL = 30, // Fatal exceptions are non-recoverable, and render the entire DB in an unusable state INTERNAL = 31, // Internal exceptions indicate something went wrong internally (i.e. bug in the code base) INVALID_INPUT = 32, // Input or arguments error OUT_OF_MEMORY = 33, // out of memory PERMISSION = 34, // insufficient permissions PARAMETER_NOT_RESOLVED = 35, // parameter types could not be resolved PARAMETER_NOT_ALLOWED = 36, // parameter types not allowed DEPENDENCY = 37, // dependency HTTP = 38, MISSING_EXTENSION = 39, // Thrown when an extension is used but not loaded AUTOLOAD = 40, // Thrown when an extension is used but not loaded SEQUENCE = 41, INVALID_CONFIGURATION = 42 // An invalid configuration was detected (e.g. a Secret param was missing, or a required setting not found) }; class Exception : public std::runtime_error { public: DUCKDB_API Exception(ExceptionType exception_type, const string &message); DUCKDB_API Exception(ExceptionType exception_type, const string &message, const unordered_map &extra_info); public: DUCKDB_API static string ExceptionTypeToString(ExceptionType type); DUCKDB_API static ExceptionType StringToExceptionType(const string &type); template static string ConstructMessage(const string &msg, ARGS... params) { const std::size_t num_args = sizeof...(ARGS); if (num_args == 0) { return msg; } std::vector values; return ConstructMessageRecursive(msg, values, params...); } DUCKDB_API static unordered_map InitializeExtraInfo(const Expression &expr); DUCKDB_API static unordered_map InitializeExtraInfo(const ParsedExpression &expr); DUCKDB_API static unordered_map InitializeExtraInfo(const QueryErrorContext &error_context); DUCKDB_API static unordered_map InitializeExtraInfo(const TableRef &ref); DUCKDB_API static unordered_map InitializeExtraInfo(optional_idx error_location); DUCKDB_API static unordered_map InitializeExtraInfo(const string &subtype, optional_idx error_location); //! Whether this exception type can occur during execution of a query DUCKDB_API static bool IsExecutionError(ExceptionType type); DUCKDB_API static string ToJSON(ExceptionType type, const string &message); DUCKDB_API static string ToJSON(ExceptionType type, const string &message, const unordered_map &extra_info); DUCKDB_API static bool InvalidatesTransaction(ExceptionType exception_type); DUCKDB_API static bool InvalidatesDatabase(ExceptionType exception_type); DUCKDB_API static string ConstructMessageRecursive(const string &msg, std::vector &values); template static string ConstructMessageRecursive(const string &msg, std::vector &values, T param, ARGS... params) { values.push_back(ExceptionFormatValue::CreateFormatValue(param)); return ConstructMessageRecursive(msg, values, params...); } DUCKDB_API static bool UncaughtException(); DUCKDB_API static string GetStackTrace(idx_t max_depth = 120); static string FormatStackTrace(const string &message = "") { return (message + "\n" + GetStackTrace()); } DUCKDB_API static void SetQueryLocation(optional_idx error_location, unordered_map &extra_info); }; //===--------------------------------------------------------------------===// // Exception derived classes //===--------------------------------------------------------------------===// class ConnectionException : public Exception { public: DUCKDB_API explicit ConnectionException(const string &msg); template explicit ConnectionException(const string &msg, ARGS... params) : ConnectionException(ConstructMessage(msg, params...)) { } }; class PermissionException : public Exception { public: DUCKDB_API explicit PermissionException(const string &msg); template explicit PermissionException(const string &msg, ARGS... params) : PermissionException(ConstructMessage(msg, params...)) { } }; class OutOfRangeException : public Exception { public: DUCKDB_API explicit OutOfRangeException(const string &msg); template explicit OutOfRangeException(const string &msg, ARGS... params) : OutOfRangeException(ConstructMessage(msg, params...)) { } DUCKDB_API OutOfRangeException(const int64_t value, const PhysicalType orig_type, const PhysicalType new_type); DUCKDB_API OutOfRangeException(const hugeint_t value, const PhysicalType orig_type, const PhysicalType new_type); DUCKDB_API OutOfRangeException(const double value, const PhysicalType orig_type, const PhysicalType new_type); DUCKDB_API OutOfRangeException(const PhysicalType var_type, const idx_t length); }; class OutOfMemoryException : public Exception { public: DUCKDB_API explicit OutOfMemoryException(const string &msg); template explicit OutOfMemoryException(const string &msg, ARGS... params) : OutOfMemoryException(ConstructMessage(msg, params...)) { } private: string ExtendOutOfMemoryError(const string &msg); }; class SyntaxException : public Exception { public: DUCKDB_API explicit SyntaxException(const string &msg); template explicit SyntaxException(const string &msg, ARGS... params) : SyntaxException(ConstructMessage(msg, params...)) { } }; class ConstraintException : public Exception { public: DUCKDB_API explicit ConstraintException(const string &msg); template explicit ConstraintException(const string &msg, ARGS... params) : ConstraintException(ConstructMessage(msg, params...)) { } }; class DependencyException : public Exception { public: DUCKDB_API explicit DependencyException(const string &msg); template explicit DependencyException(const string &msg, ARGS... params) : DependencyException(ConstructMessage(msg, params...)) { } }; class IOException : public Exception { public: DUCKDB_API explicit IOException(const string &msg); DUCKDB_API explicit IOException(const string &msg, const unordered_map &extra_info); explicit IOException(ExceptionType exception_type, const string &msg) : Exception(exception_type, msg) { } template explicit IOException(const string &msg, ARGS... params) : IOException(ConstructMessage(msg, params...)) { } template explicit IOException(const string &msg, const unordered_map &extra_info, ARGS... params) : IOException(ConstructMessage(msg, params...), extra_info) { } }; class MissingExtensionException : public Exception { public: DUCKDB_API explicit MissingExtensionException(const string &msg); template explicit MissingExtensionException(const string &msg, ARGS... params) : MissingExtensionException(ConstructMessage(msg, params...)) { } }; class NotImplementedException : public Exception { public: DUCKDB_API explicit NotImplementedException(const string &msg); template explicit NotImplementedException(const string &msg, ARGS... params) : NotImplementedException(ConstructMessage(msg, params...)) { } }; class AutoloadException : public Exception { public: DUCKDB_API explicit AutoloadException(const string &extension_name, const string &message); }; class SerializationException : public Exception { public: DUCKDB_API explicit SerializationException(const string &msg); template explicit SerializationException(const string &msg, ARGS... params) : SerializationException(ConstructMessage(msg, params...)) { } }; class SequenceException : public Exception { public: DUCKDB_API explicit SequenceException(const string &msg); template explicit SequenceException(const string &msg, ARGS... params) : SequenceException(ConstructMessage(msg, params...)) { } }; class InterruptException : public Exception { public: DUCKDB_API InterruptException(); }; class FatalException : public Exception { public: explicit FatalException(const string &msg) : FatalException(ExceptionType::FATAL, msg) { } template explicit FatalException(const string &msg, ARGS... params) : FatalException(ConstructMessage(msg, params...)) { } protected: DUCKDB_API explicit FatalException(ExceptionType type, const string &msg); template explicit FatalException(ExceptionType type, const string &msg, ARGS... params) : FatalException(type, ConstructMessage(msg, params...)) { } }; class InternalException : public Exception { public: DUCKDB_API explicit InternalException(const string &msg); template explicit InternalException(const string &msg, ARGS... params) : InternalException(ConstructMessage(msg, params...)) { } }; class InvalidInputException : public Exception { public: DUCKDB_API explicit InvalidInputException(const string &msg); DUCKDB_API explicit InvalidInputException(const string &msg, const unordered_map &extra_info); template explicit InvalidInputException(const string &msg, ARGS... params) : InvalidInputException(ConstructMessage(msg, params...)) { } template explicit InvalidInputException(const Expression &expr, const string &msg, ARGS... params) : InvalidInputException(ConstructMessage(msg, params...), Exception::InitializeExtraInfo(expr)) { } }; class ExecutorException : public Exception { public: DUCKDB_API explicit ExecutorException(const string &msg); template explicit ExecutorException(const string &msg, ARGS... params) : ExecutorException(ConstructMessage(msg, params...)) { } }; class InvalidConfigurationException : public Exception { public: DUCKDB_API explicit InvalidConfigurationException(const string &msg); DUCKDB_API explicit InvalidConfigurationException(const string &msg, const unordered_map &extra_info); template explicit InvalidConfigurationException(const string &msg, ARGS... params) : InvalidConfigurationException(ConstructMessage(msg, params...)) { } template explicit InvalidConfigurationException(const Expression &expr, const string &msg, ARGS... params) : InvalidConfigurationException(ConstructMessage(msg, params...), Exception::InitializeExtraInfo(expr)) { } }; class InvalidTypeException : public Exception { public: DUCKDB_API InvalidTypeException(PhysicalType type, const string &msg); DUCKDB_API InvalidTypeException(const LogicalType &type, const string &msg); DUCKDB_API explicit InvalidTypeException(const string &msg); }; class TypeMismatchException : public Exception { public: DUCKDB_API TypeMismatchException(const PhysicalType type_1, const PhysicalType type_2, const string &msg); DUCKDB_API TypeMismatchException(const LogicalType &type_1, const LogicalType &type_2, const string &msg); DUCKDB_API TypeMismatchException(optional_idx error_location, const LogicalType &type_1, const LogicalType &type_2, const string &msg); DUCKDB_API explicit TypeMismatchException(const string &msg); }; class ParameterNotAllowedException : public Exception { public: DUCKDB_API explicit ParameterNotAllowedException(const string &msg); template explicit ParameterNotAllowedException(const string &msg, ARGS... params) : ParameterNotAllowedException(ConstructMessage(msg, params...)) { } }; //! Special exception that should be thrown in the binder if parameter types could not be resolved //! This will cause prepared statements to be forcibly rebound with the actual parameter values //! This exception is fatal if thrown outside of the binder (i.e. it should never be thrown outside of the binder) class ParameterNotResolvedException : public Exception { public: DUCKDB_API explicit ParameterNotResolvedException(); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/likely.hpp // // //===----------------------------------------------------------------------===// #if __GNUC__ #define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (__builtin_expect(cond, expected_value)) #else #define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (cond) #endif #define DUCKDB_LIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 1) #define DUCKDB_UNLIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 0) namespace duckdb { template struct MemorySafety { #ifdef DEBUG // In DEBUG mode safety is always on static constexpr bool ENABLED = true; #else static constexpr bool ENABLED = IS_ENABLED; #endif }; } // namespace duckdb #include #define DUCKDB_BASE_STD namespace duckdb_base_std { using ::std::make_shared; using ::std::shared_ptr; using ::std::unique_ptr; // using ::std::make_unique; } // namespace duckdb_base_std #include namespace duckdb { template , bool SAFE = true> class unique_ptr : public duckdb_base_std::unique_ptr { // NOLINT: naming public: using original = duckdb_base_std::unique_ptr; using original::original; // NOLINT using pointer = typename original::pointer; private: static inline void AssertNotNull(const bool null) { #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY) return; #else if (DUCKDB_UNLIKELY(null)) { throw duckdb::InternalException("Attempted to dereference unique_ptr that is NULL!"); } #endif } public: typename std::add_lvalue_reference::type operator*() const { // NOLINT: hiding on purpose const auto ptr = original::get(); if (MemorySafety::ENABLED) { AssertNotNull(!ptr); } return *ptr; } typename original::pointer operator->() const { // NOLINT: hiding on purpose const auto ptr = original::get(); if (MemorySafety::ENABLED) { AssertNotNull(!ptr); } return ptr; } #ifdef DUCKDB_CLANG_TIDY // This is necessary to tell clang-tidy that it reinitializes the variable after a move [[clang::reinitializes]] #endif inline void reset(typename original::pointer ptr = typename original::pointer()) noexcept { // NOLINT: hiding on purpose original::reset(ptr); } }; template class unique_ptr : public duckdb_base_std::unique_ptr { public: using original = duckdb_base_std::unique_ptr; using original::original; private: static inline void AssertNotNull(const bool null) { #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY) return; #else if (DUCKDB_UNLIKELY(null)) { throw duckdb::InternalException("Attempted to dereference unique_ptr that is NULL!"); } #endif } public: typename std::add_lvalue_reference::type operator[](size_t __i) const { // NOLINT: hiding on purpose const auto ptr = original::get(); if (MemorySafety::ENABLED) { AssertNotNull(!ptr); } return ptr[__i]; } }; template class unique_ptr : public duckdb_base_std::unique_ptr { public: using original = duckdb_base_std::unique_ptr; using original::original; private: static inline void AssertNotNull(const bool null) { #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY) return; #else if (DUCKDB_UNLIKELY(null)) { throw duckdb::InternalException("Attempted to dereference unique_ptr that is NULL!"); } #endif } public: typename std::add_lvalue_reference::type operator[](size_t __i) const { // NOLINT: hiding on purpose const auto ptr = original::get(); if (MemorySafety::ENABLED) { AssertNotNull(!ptr); } return ptr[__i]; } }; template using unique_array = unique_ptr, true>; template using unsafe_unique_array = unique_ptr, false>; template using unsafe_unique_ptr = unique_ptr, false>; } // namespace duckdb namespace duckdb { class Serializer; class Deserializer; class BinarySerializer; class BinaryDeserializer; class WriteStream; class ReadStream; //! inline std directives that we use frequently #ifndef DUCKDB_DEBUG_MOVE using std::move; #endif // NOTE: there is a copy of this in the Postgres' parser grammar (gram.y) #define DEFAULT_SCHEMA "main" #define INVALID_SCHEMA "" #define INVALID_CATALOG "" #define SYSTEM_CATALOG "system" #define TEMP_CATALOG "temp" #define IN_MEMORY_PATH ":memory:" DUCKDB_API bool IsInvalidSchema(const string &str); DUCKDB_API bool IsInvalidCatalog(const string &str); //! Special value used to signify the ROW ID of a table DUCKDB_API extern const column_t COLUMN_IDENTIFIER_ROW_ID; //! Special value used to signify an empty column (used for e.g. COUNT(*)) DUCKDB_API extern const column_t COLUMN_IDENTIFIER_EMPTY; DUCKDB_API extern const column_t VIRTUAL_COLUMN_START; DUCKDB_API bool IsRowIdColumnId(column_t column_id); DUCKDB_API bool IsVirtualColumn(column_t column_id); //! The maximum row identifier used in tables extern const row_t MAX_ROW_ID; //! Transaction-local row IDs start at MAX_ROW_ID extern const row_t MAX_ROW_ID_LOCAL; extern const transaction_t TRANSACTION_ID_START; extern const transaction_t MAX_TRANSACTION_ID; extern const transaction_t MAXIMUM_QUERY_ID; extern const transaction_t NOT_DELETED_ID; extern const double PI; struct DConstants { //! The value used to signify an invalid index entry static constexpr const idx_t INVALID_INDEX = idx_t(-1); //! The total maximum vector size (128GB) static constexpr const idx_t MAX_VECTOR_SIZE = 1ULL << 37ULL; }; struct LogicalIndex { explicit LogicalIndex(idx_t index) : index(index) { } idx_t index; inline bool operator==(const LogicalIndex &rhs) const { return index == rhs.index; }; inline bool operator!=(const LogicalIndex &rhs) const { return index != rhs.index; }; inline bool operator<(const LogicalIndex &rhs) const { return index < rhs.index; }; bool IsValid() { return index != DConstants::INVALID_INDEX; } }; struct PhysicalIndex { explicit PhysicalIndex(idx_t index) : index(index) { } idx_t index; inline bool operator==(const PhysicalIndex &rhs) const { return index == rhs.index; }; inline bool operator!=(const PhysicalIndex &rhs) const { return index != rhs.index; }; inline bool operator<(const PhysicalIndex &rhs) const { return index < rhs.index; }; bool IsValid() { return index != DConstants::INVALID_INDEX; } }; DUCKDB_API bool IsPowerOfTwo(uint64_t v); DUCKDB_API uint64_t NextPowerOfTwo(uint64_t v); DUCKDB_API uint64_t PreviousPowerOfTwo(uint64_t v); } // namespace duckdb namespace duckdb { enum class ProfilerPrintFormat : uint8_t { QUERY_TREE, JSON, QUERY_TREE_OPTIMIZER, NO_OUTPUT, HTML, GRAPHVIZ }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/serializer/buffered_file_writer.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/serializer/write_stream.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/common.hpp // // //===----------------------------------------------------------------------===// #ifdef _WIN32 #ifdef DUCKDB_MAIN_LIBRARY #if defined(_WIN32) #ifndef NOMINMAX #define NOMINMAX #endif #ifndef _WINSOCKAPI_ #define _WINSOCKAPI_ #endif #include #undef CreateDirectory #undef MoveFile #undef RemoveDirectory #endif #endif #endif //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/helper.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/shared_ptr.hpp // // //===----------------------------------------------------------------------===// #include #include namespace duckdb { // This implementation is taken from the llvm-project, at this commit hash: // https://github.com/llvm/llvm-project/blob/08bb121835be432ac52372f92845950628ce9a4a/libcxx/include/__memory/shared_ptr.h#353 // originally named '__compatible_with' #if _LIBCPP_STD_VER >= 17 template struct __bounded_convertible_to_unbounded : std::false_type {}; template struct __bounded_convertible_to_unbounded<_Up[_Np], T> : std::is_same, _Up[]> {}; template struct compatible_with_t : std::_Or, __bounded_convertible_to_unbounded> {}; #else template struct compatible_with_t : std::is_convertible {}; // NOLINT: invalid case style #endif // _LIBCPP_STD_VER >= 17 } // namespace duckdb namespace duckdb { template class weak_ptr; template class enable_shared_from_this; template class shared_ptr { // NOLINT: invalid case style public: using original = std::shared_ptr; using element_type = typename original::element_type; using weak_type = weak_ptr; private: static inline void AssertNotNull(const bool null) { #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY) return; #else if (DUCKDB_UNLIKELY(null)) { throw duckdb::InternalException("Attempted to dereference shared_ptr that is NULL!"); } #endif } private: template friend class weak_ptr; template friend class shared_ptr; template friend shared_ptr shared_ptr_cast(shared_ptr src); // NOLINT: invalid case style private: original internal; public: // Constructors shared_ptr() : internal() { } shared_ptr(std::nullptr_t) : internal(nullptr) { // NOLINT: not marked as explicit } // From raw pointer of type U convertible to T template ::value, int>::type = 0> explicit shared_ptr(U *ptr) : internal(ptr) { __enable_weak_this(internal.get(), internal.get()); } // From raw pointer of type T with custom DELETER template shared_ptr(T *ptr, DELETER deleter) : internal(ptr, deleter) { __enable_weak_this(internal.get(), internal.get()); } // Aliasing constructor: shares ownership information with ref but contains ptr instead // When the created shared_ptr goes out of scope, it will call the DELETER of ref, will not delete ptr template shared_ptr(const shared_ptr &ref, T *ptr) noexcept : internal(ref.internal, ptr) { } #if _LIBCPP_STD_VER >= 20 template shared_ptr(shared_ptr &&ref, T *ptr) noexcept : internal(std::move(ref.internal), ptr) { } #endif // Copy constructor, share ownership with ref template ::value, int>::type = 0> shared_ptr(const shared_ptr &ref) noexcept : internal(ref.internal) { // NOLINT: not marked as explicit } shared_ptr(const shared_ptr &other) : internal(other.internal) { // NOLINT: not marked as explicit } // Move constructor, share ownership with ref template ::value, int>::type = 0> #ifdef DUCKDB_CLANG_TIDY [[clang::reinitializes]] #endif shared_ptr(shared_ptr &&ref) noexcept // NOLINT: not marked as explicit : internal(std::move(ref.internal)) { } #ifdef DUCKDB_CLANG_TIDY [[clang::reinitializes]] #endif shared_ptr(shared_ptr &&other) // NOLINT: not marked as explicit : internal(std::move(other.internal)) { } // Construct from std::shared_ptr explicit shared_ptr(std::shared_ptr other) : internal(other) { // FIXME: should we __enable_weak_this here? // *our* enable_shared_from_this hasn't initialized yet, so I think so? __enable_weak_this(internal.get(), internal.get()); } // Construct from weak_ptr template explicit shared_ptr(weak_ptr other) : internal(other.internal) { } // Construct from unique_ptr, takes over ownership of the unique_ptr template ::value && std::is_convertible::pointer, T *>::value, int>::type = 0> #ifdef DUCKDB_CLANG_TIDY [[clang::reinitializes]] #endif shared_ptr(unique_ptr &&other) // NOLINT: not marked as explicit : internal(std::move(other)) { __enable_weak_this(internal.get(), internal.get()); } // Destructor ~shared_ptr() = default; // Assign from shared_ptr copy shared_ptr &operator=(const shared_ptr &other) noexcept { if (this == &other) { return *this; } // Create a new shared_ptr using the copy constructor, then swap out the ownership to *this shared_ptr(other).swap(*this); return *this; } template ::value, int>::type = 0> shared_ptr &operator=(const shared_ptr &other) { shared_ptr(other).swap(*this); return *this; } // Assign from moved shared_ptr shared_ptr &operator=(shared_ptr &&other) noexcept { // Create a new shared_ptr using the move constructor, then swap out the ownership to *this shared_ptr(std::move(other)).swap(*this); return *this; } template ::value, int>::type = 0> shared_ptr &operator=(shared_ptr &&other) { shared_ptr(std::move(other)).swap(*this); return *this; } // Assign from moved unique_ptr template ::value && std::is_convertible::pointer, T *>::value, int>::type = 0> shared_ptr &operator=(unique_ptr &&ref) { shared_ptr(std::move(ref)).swap(*this); return *this; } #ifdef DUCKDB_CLANG_TIDY [[clang::reinitializes]] #endif void reset() { // NOLINT: invalid case style internal.reset(); } template #ifdef DUCKDB_CLANG_TIDY [[clang::reinitializes]] #endif void reset(U *ptr) { // NOLINT: invalid case style internal.reset(ptr); } template #ifdef DUCKDB_CLANG_TIDY [[clang::reinitializes]] #endif void reset(U *ptr, DELETER deleter) { // NOLINT: invalid case style internal.reset(ptr, deleter); } void swap(shared_ptr &r) noexcept { // NOLINT: invalid case style internal.swap(r.internal); } T *get() const { // NOLINT: invalid case style return internal.get(); } long use_count() const { // NOLINT: invalid case style return internal.use_count(); } explicit operator bool() const noexcept { return internal.operator bool(); } typename std::add_lvalue_reference::type operator*() const { if (MemorySafety::ENABLED) { const auto ptr = internal.get(); AssertNotNull(!ptr); return *ptr; } else { return *internal; } } T *operator->() const { if (MemorySafety::ENABLED) { const auto ptr = internal.get(); AssertNotNull(!ptr); return ptr; } else { return internal.operator->(); } } // Relational operators template bool operator==(const shared_ptr &other) const noexcept { return internal == other.internal; } template bool operator!=(const shared_ptr &other) const noexcept { return internal != other.internal; } bool operator==(std::nullptr_t) const noexcept { return internal == nullptr; } bool operator!=(std::nullptr_t) const noexcept { return internal != nullptr; } template bool operator<(const shared_ptr &other) const noexcept { return internal < other.internal; } template bool operator<=(const shared_ptr &other) const noexcept { return internal <= other.internal; } template bool operator>(const shared_ptr &other) const noexcept { return internal > other.internal; } template bool operator>=(const shared_ptr &other) const noexcept { return internal >= other.internal; } private: // This overload is used when the class inherits from 'enable_shared_from_this' template *>::value, int>::type = 0> void __enable_weak_this(const enable_shared_from_this *object, // NOLINT: invalid case style V *ptr) noexcept { typedef typename std::remove_cv::type non_const_u_t; if (object && object->__weak_this_.expired()) { // __weak_this__ is the mutable variable returned by 'shared_from_this' // it is initialized here auto non_const = const_cast(static_cast(ptr)); // NOLINT: const cast object->__weak_this_ = shared_ptr(*this, non_const); } } void __enable_weak_this(...) noexcept { // NOLINT: invalid case style } }; } // namespace duckdb namespace duckdb { template class weak_ptr { // NOLINT: invalid case style public: using original = std::weak_ptr; using element_type = typename original::element_type; private: template friend class shared_ptr; private: original internal; public: // Constructors weak_ptr() : internal() { } // NOLINTBEGIN template ::value, int>::type = 0> weak_ptr(shared_ptr const &ptr) noexcept : internal(ptr.internal) { } weak_ptr(weak_ptr const &other) noexcept : internal(other.internal) { } template ::value, int>::type = 0> weak_ptr(weak_ptr const &ptr) noexcept : internal(ptr.internal) { } #ifdef DUCKDB_CLANG_TIDY [[clang::reinitializes]] #endif weak_ptr(weak_ptr &&ptr) noexcept : internal(std::move(ptr.internal)) { } template ::value, int>::type = 0> #ifdef DUCKDB_CLANG_TIDY [[clang::reinitializes]] #endif weak_ptr(weak_ptr &&ptr) noexcept : internal(std::move(ptr.internal)) { } // NOLINTEND // Destructor ~weak_ptr() = default; // Assignment operators weak_ptr &operator=(const weak_ptr &other) { if (this == &other) { return *this; } internal = other.internal; return *this; } template ::value, int>::type = 0> weak_ptr &operator=(const shared_ptr &ptr) { internal = ptr.internal; return *this; } // Modifiers #ifdef DUCKDB_CLANG_TIDY // This is necessary to tell clang-tidy that it reinitializes the variable after a move [[clang::reinitializes]] #endif void reset() { // NOLINT: invalid case style internal.reset(); } // Observers long use_count() const { // NOLINT: invalid case style return internal.use_count(); } bool expired() const { // NOLINT: invalid case style return internal.expired(); } shared_ptr lock() const { // NOLINT: invalid case style return shared_ptr(internal.lock()); } // Relational operators template bool operator==(const weak_ptr &other) const noexcept { return internal == other.internal; } template bool operator!=(const weak_ptr &other) const noexcept { return internal != other.internal; } template bool operator<(const weak_ptr &other) const noexcept { return internal < other.internal; } template bool operator<=(const weak_ptr &other) const noexcept { return internal <= other.internal; } template bool operator>(const weak_ptr &other) const noexcept { return internal > other.internal; } template bool operator>=(const weak_ptr &other) const noexcept { return internal >= other.internal; } }; } // namespace duckdb namespace duckdb { template class enable_shared_from_this { // NOLINT: invalid case style public: template friend class shared_ptr; private: mutable weak_ptr __weak_this_; // NOLINT: __weak_this_ is reserved protected: constexpr enable_shared_from_this() noexcept { } enable_shared_from_this(enable_shared_from_this const &) noexcept { // NOLINT: not marked as explicit } enable_shared_from_this &operator=(enable_shared_from_this const &) noexcept { return *this; } ~enable_shared_from_this() { } public: shared_ptr shared_from_this() { // NOLINT: invalid case style return shared_ptr(__weak_this_); } shared_ptr shared_from_this() const { // NOLINT: invalid case style return shared_ptr(__weak_this_); } #if _LIBCPP_STD_VER >= 17 weak_ptr weak_from_this() noexcept { // NOLINT: invalid case style return __weak_this_; } weak_ptr weak_from_this() const noexcept { // NOLINT: invalid case style return __weak_this_; } #endif // _LIBCPP_STD_VER >= 17 }; } // namespace duckdb namespace duckdb { template using unsafe_shared_ptr = shared_ptr; template using unsafe_weak_ptr = weak_ptr; } // namespace duckdb #include #include #ifdef _MSC_VER #define suint64_t int64_t #endif #if defined(_WIN32) || defined(_WIN64) #define DUCKDB_WINDOWS #elif defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) #define DUCKDB_POSIX #endif namespace duckdb { // explicit fallthrough for switch_statementss #ifndef __has_cpp_attribute // For backwards compatibility #define __has_cpp_attribute(x) 0 #endif #if __has_cpp_attribute(clang::fallthrough) #define DUCKDB_EXPLICIT_FALLTHROUGH [[clang::fallthrough]] #elif __has_cpp_attribute(gnu::fallthrough) #define DUCKDB_EXPLICIT_FALLTHROUGH [[gnu::fallthrough]] #else #define DUCKDB_EXPLICIT_FALLTHROUGH #endif template struct AlwaysFalse { static constexpr bool VALUE = false; }; template using reference = std::reference_wrapper; template struct TemplatedUniqueIf { typedef unique_ptr, SAFE> templated_unique_single_t; }; template struct TemplatedUniqueIf { typedef void TemplatedUniqueArrayKnownBound; // NOLINT: mimic std style }; template inline typename TemplatedUniqueIf::templated_unique_single_t make_uniq(ARGS&&... args) // NOLINT: mimic std style { return unique_ptr, true>(new DATA_TYPE(std::forward(args)...)); } template inline shared_ptr make_shared_ptr(ARGS&&... args) // NOLINT: mimic std style { return shared_ptr(duckdb_base_std::make_shared(std::forward(args)...)); } template inline typename TemplatedUniqueIf::templated_unique_single_t make_unsafe_uniq(ARGS&&... args) // NOLINT: mimic std style { return unique_ptr, false>(new DATA_TYPE(std::forward(args)...)); } template inline unique_ptr, true> make_uniq_array(size_t n) // NOLINT: mimic std style { return unique_ptr, true>(new DATA_TYPE[n]()); } template inline unique_ptr, true> make_uniq_array_uninitialized(size_t n) // NOLINT: mimic std style { return unique_ptr, true>(new DATA_TYPE[n]); } template inline unique_ptr, false> make_unsafe_uniq_array(size_t n) // NOLINT: mimic std style { return unique_ptr, false>(new DATA_TYPE[n]()); } template inline unique_ptr, false> make_unsafe_uniq_array_uninitialized(size_t n) // NOLINT: mimic std style { return unique_ptr, false>(new DATA_TYPE[n]); } template typename TemplatedUniqueIf::TemplatedUniqueArrayKnownBound make_uniq(ARGS&&...) = delete; // NOLINT: mimic std style template unique_ptr make_uniq_base(ARGS &&... args) { // NOLINT: mimic std style return unique_ptr(new T(std::forward(args)...)); } #ifdef DUCKDB_ENABLE_DEPRECATED_API template unique_ptr make_unique_base(Args &&... args) { return unique_ptr(new T(std::forward(args)...)); } #endif // DUCKDB_ENABLE_DEPRECATED_API template unique_ptr unique_ptr_cast(unique_ptr src) { // NOLINT: mimic std style return unique_ptr(static_cast(src.release())); } template shared_ptr shared_ptr_cast(shared_ptr src) { // NOLINT: mimic std style return shared_ptr(std::static_pointer_cast(src.internal)); } struct SharedConstructor { template static shared_ptr Create(ARGS &&...args) { return make_shared_ptr(std::forward(args)...); } }; struct UniqueConstructor { template static unique_ptr Create(ARGS &&...args) { return make_uniq(std::forward(args)...); } }; #ifdef DUCKDB_DEBUG_MOVE template typename std::remove_reference::type&& move(T&& t) noexcept { // the nonsensical sizeof check ensures this is never instantiated static_assert(sizeof(T) == 0, "Use std::move instead of unqualified move or duckdb::move"); } #endif template static duckdb::unique_ptr make_unique(ARGS&&... __args) { // NOLINT: mimic std style #ifndef DUCKDB_ENABLE_DEPRECATED_API static_assert(sizeof(T) == 0, "Use make_uniq instead of make_unique!"); #endif // DUCKDB_ENABLE_DEPRECATED_API return unique_ptr(new T(std::forward(__args)...)); } template static duckdb::shared_ptr make_shared(ARGS&&... __args) { // NOLINT: mimic std style #ifndef DUCKDB_ENABLE_DEPRECATED_API static_assert(sizeof(T) == 0, "Use make_shared_ptr instead of make_shared!"); #endif // DUCKDB_ENABLE_DEPRECATED_API return shared_ptr(new T(std::forward(__args)...)); } template constexpr T MaxValue(T a, T b) { return a > b ? a : b; } template constexpr T MinValue(T a, T b) { return a < b ? a : b; } //! Like std::clamp (C++17), returns v if within bounds, else nearest bound template constexpr T ClampValue(T v, T min, T max) { return MinValue(MaxValue(v, min), max); } template T AbsValue(T a) { return a < 0 ? -a : a; } //! Align value (ceiling) (not for pointer types) template::value>::type> static inline T AlignValue(T n) { return ((n + (val - 1)) / val) * val; } template static T AlignValue(T n, T val) { return ((n + (val - 1)) / val) * val; } template inline T *AlignPointer(T *addr) { static_assert((alignment & (alignment - 1)) == 0, "'alignment' has to be a power of 2"); return reinterpret_cast((reinterpret_cast(addr) + alignment - 1) & ~(alignment - 1)); } template constexpr inline T AlignValueFloor(T n) { return (n / val) * val; } template static inline bool ValueIsAligned(T n) { return (n % val) == 0; } template T SignValue(T a) { return a < 0 ? -1 : 1; } template const T Load(const_data_ptr_t ptr) { T ret; memcpy(&ret, ptr, sizeof(ret)); // NOLINT return ret; } template void Store(const T &val, data_ptr_t ptr) { memcpy(ptr, (void *)&val, sizeof(val)); // NOLINT } //! This assigns a shared pointer, but ONLY assigns if "target" is not equal to "source" //! If this is often the case, this manner of assignment is significantly faster (~20X faster) //! Since it avoids the need of an atomic incref/decref at the cost of a single pointer comparison //! Benchmark: https://gist.github.com/Mytherin/4db3faa8e233c4a9b874b21f62bb4b96 //! If the shared pointers are not the same, the penalty is very low (on the order of 1%~ slower) //! This method should always be preferred if there is a (reasonable) chance that the pointers are the same template void AssignSharedPointer(shared_ptr &target, const shared_ptr &source) { if (target.get() != source.get()) { target = source; } } template using const_reference = std::reference_wrapper; //! Returns whether or not two reference wrappers refer to the same object template bool RefersToSameObject(const reference &a, const reference &b) { return &a.get() == &b.get(); } template bool RefersToSameObject(const T &a, const T &b) { return &a == &b; } template void DynamicCastCheck(const SRC *source) { #ifndef __APPLE__ // Actual check is on the fact that dynamic_cast and reinterpret_cast are equivalent D_ASSERT(reinterpret_cast(source) == dynamic_cast(source)); #endif } //! Used to increment counters that need to be exception-proof template class PostIncrement { public: explicit PostIncrement(T &t) : t(t) { } ~PostIncrement() { ++t; } private: T &t; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/vector.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { template class vector : public std::vector> { // NOLINT: matching name of std public: using original = std::vector>; using original::original; using size_type = typename original::size_type; using const_reference = typename original::const_reference; using reference = typename original::reference; private: static inline void AssertIndexInBounds(idx_t index, idx_t size) { #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY) return; #else if (DUCKDB_UNLIKELY(index >= size)) { throw InternalException("Attempted to access index %ld within vector of size %ld", index, size); } #endif } public: #ifdef DUCKDB_CLANG_TIDY // This is necessary to tell clang-tidy that it reinitializes the variable after a move [[clang::reinitializes]] #endif inline void clear() noexcept { // NOLINT: hiding on purpose original::clear(); } // Because we create the other constructor, the implicitly created constructor // gets deleted, so we have to be explicit vector() = default; vector(original &&other) : original(std::move(other)) { // NOLINT: allow implicit conversion } template vector(vector &&other) : original(std::move(other)) { // NOLINT: allow implicit conversion } template inline typename original::reference get(typename original::size_type __n) { // NOLINT: hiding on purpose if (MemorySafety::ENABLED) { AssertIndexInBounds(__n, original::size()); } return original::operator[](__n); } template inline typename original::const_reference get(typename original::size_type __n) const { // NOLINT: hiding on purpose if (MemorySafety::ENABLED) { AssertIndexInBounds(__n, original::size()); } return original::operator[](__n); } typename original::reference operator[](typename original::size_type __n) { // NOLINT: hiding on purpose return get(__n); } typename original::const_reference operator[](typename original::size_type __n) const { // NOLINT: hiding on purpose return get(__n); } typename original::reference front() { // NOLINT: hiding on purpose return get(0); } typename original::const_reference front() const { // NOLINT: hiding on purpose return get(0); } typename original::reference back() { // NOLINT: hiding on purpose if (MemorySafety::ENABLED && original::empty()) { throw InternalException("'back' called on an empty vector!"); } return get(original::size() - 1); } typename original::const_reference back() const { // NOLINT: hiding on purpose if (MemorySafety::ENABLED && original::empty()) { throw InternalException("'back' called on an empty vector!"); } return get(original::size() - 1); } void unsafe_erase_at(idx_t idx) { // NOLINT: not using camelcase on purpose here original::erase(original::begin() + static_cast(idx)); } void erase_at(idx_t idx) { // NOLINT: not using camelcase on purpose here if (MemorySafety::ENABLED && idx > original::size()) { throw InternalException("Can't remove offset %d from vector of size %d", idx, original::size()); } unsafe_erase_at(idx); } }; template using unsafe_vector = vector; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/catalog_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Catalog Types //===--------------------------------------------------------------------===// enum class CatalogType : uint8_t { INVALID = 0, TABLE_ENTRY = 1, SCHEMA_ENTRY = 2, VIEW_ENTRY = 3, INDEX_ENTRY = 4, PREPARED_STATEMENT = 5, SEQUENCE_ENTRY = 6, COLLATION_ENTRY = 7, TYPE_ENTRY = 8, DATABASE_ENTRY = 9, // functions TABLE_FUNCTION_ENTRY = 25, SCALAR_FUNCTION_ENTRY = 26, AGGREGATE_FUNCTION_ENTRY = 27, PRAGMA_FUNCTION_ENTRY = 28, COPY_FUNCTION_ENTRY = 29, MACRO_ENTRY = 30, TABLE_MACRO_ENTRY = 31, // version info DELETED_ENTRY = 51, RENAMED_ENTRY = 52, // secrets SECRET_ENTRY = 71, SECRET_TYPE_ENTRY = 72, SECRET_FUNCTION_ENTRY = 73, // dependency info DEPENDENCY_ENTRY = 100 }; DUCKDB_API string CatalogTypeToString(CatalogType type); CatalogType CatalogTypeFromString(const string &type); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/atomic.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::atomic; //! NOTE: When repeatedly trying to atomically set a value in a loop, you can use as the loop condition: //! * std::atomic_compare_exchange_weak //! * std::atomic::compare_exchange_weak //! If not used as a loop condition, use: //! * std::atomic_compare_exchange_strong //! * std::atomic::compare_exchange_strong //! If this is not done correctly, we may get correctness issues when using older compiler versions (see: issue #14389) //! Performance may be optimized using std::memory_order, but NOT at the cost of correctness. //! For correct examples of this, see concurrentqueue.h } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/optional_ptr.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { template class optional_ptr { // NOLINT: mimic std casing public: optional_ptr() noexcept : ptr(nullptr) { } optional_ptr(T *ptr_p) : ptr(ptr_p) { // NOLINT: allow implicit creation from pointer } optional_ptr(T &ref) : ptr(&ref) { // NOLINT: allow implicit creation from reference } optional_ptr(const unique_ptr &ptr_p) : ptr(ptr_p.get()) { // NOLINT: allow implicit creation from unique pointer } optional_ptr(const shared_ptr &ptr_p) : ptr(ptr_p.get()) { // NOLINT: allow implicit creation from shared pointer } void CheckValid() const { if (MemorySafety::ENABLED) { if (!ptr) { throw InternalException("Attempting to dereference an optional pointer that is not set"); } } } operator bool() const { // NOLINT: allow implicit conversion to bool return ptr; } T &operator*() { CheckValid(); return *ptr; } const T &operator*() const { CheckValid(); return *ptr; } T *operator->() { CheckValid(); return ptr; } const T *operator->() const { CheckValid(); return ptr; } T *get() { // NOLINT: mimic std casing // CheckValid(); return ptr; } const T *get() const { // NOLINT: mimic std casing // CheckValid(); return ptr; } // this looks dirty - but this is the default behavior of raw pointers T *get_mutable() const { // NOLINT: mimic std casing // CheckValid(); return ptr; } bool operator==(const optional_ptr &rhs) const { return ptr == rhs.ptr; } bool operator!=(const optional_ptr &rhs) const { return ptr != rhs.ptr; } private: T *ptr; }; template using unsafe_optional_ptr = optional_ptr; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/exception/catalog_exception.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/query_error_context.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/optional_idx.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class optional_idx { static constexpr const idx_t INVALID_INDEX = idx_t(-1); public: optional_idx() : index(INVALID_INDEX) { } optional_idx(idx_t index) : index(index) { // NOLINT: allow implicit conversion from idx_t if (index == INVALID_INDEX) { throw InternalException("optional_idx cannot be initialized with an invalid index"); } } static optional_idx Invalid() { return optional_idx(); } bool IsValid() const { return index != INVALID_INDEX; } void SetInvalid() { index = INVALID_INDEX; } idx_t GetIndex() const { if (index == INVALID_INDEX) { throw InternalException("Attempting to get the index of an optional_idx that is not set"); } return index; } inline bool operator==(const optional_idx &rhs) const { return index == rhs.index; } inline bool operator!=(const optional_idx &rhs) const { return index != rhs.index; } private: idx_t index; }; } // namespace duckdb namespace duckdb { class ParsedExpression; class QueryErrorContext { public: QueryErrorContext(const ParsedExpression &expr); // NOLINT: allow implicit conversion from expression explicit QueryErrorContext(optional_idx query_location_p = optional_idx()) : query_location(query_location_p) { } //! The location in which the error should be thrown optional_idx query_location; public: static string Format(const string &query, const string &error_message, optional_idx error_loc, bool add_line_indicator = true); }; } // namespace duckdb namespace duckdb { struct EntryLookupInfo; class CatalogException : public Exception { public: DUCKDB_API explicit CatalogException(const string &msg); DUCKDB_API explicit CatalogException(const string &msg, const unordered_map &extra_info); template explicit CatalogException(const string &msg, ARGS... params) : CatalogException(ConstructMessage(msg, params...)) { } template explicit CatalogException(QueryErrorContext error_context, const string &msg, ARGS... params) : CatalogException(ConstructMessage(msg, params...), Exception::InitializeExtraInfo(error_context)) { } static CatalogException MissingEntry(const EntryLookupInfo &lookup_info, const string &suggestion); static CatalogException MissingEntry(CatalogType type, const string &name, const string &suggestion, QueryErrorContext context = QueryErrorContext()); static CatalogException MissingEntry(const string &type, const string &name, const vector &suggestions, QueryErrorContext context = QueryErrorContext()); static CatalogException EntryAlreadyExists(CatalogType type, const string &name, QueryErrorContext context = QueryErrorContext()); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/value.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Serializer; class Deserializer; class Value; class TypeCatalogEntry; class Vector; class ClientContext; struct string_t; // NOLINT: mimic std casing template using child_list_t = vector>; //! FIXME: this should be a single_thread_ptr template using buffer_ptr = shared_ptr; template buffer_ptr make_buffer(ARGS &&...args) { // NOLINT: mimic std casing return make_shared_ptr(std::forward(args)...); } struct list_entry_t { // NOLINT: mimic std casing list_entry_t() = default; list_entry_t(uint64_t offset, uint64_t length) : offset(offset), length(length) { } inline constexpr bool operator!=(const list_entry_t &other) const { return !(*this == other); } inline constexpr bool operator==(const list_entry_t &other) const { return offset == other.offset && length == other.length; } uint64_t offset; uint64_t length; }; using union_tag_t = uint8_t; //===--------------------------------------------------------------------===// // Internal Types //===--------------------------------------------------------------------===// // taken from arrow's type.h enum class PhysicalType : uint8_t { ///// A NULL type having no physical storage // NA = 0, /// Boolean as 8 bit "bool" value BOOL = 1, /// Unsigned 8-bit little-endian integer UINT8 = 2, /// Signed 8-bit little-endian integer INT8 = 3, /// Unsigned 16-bit little-endian integer UINT16 = 4, /// Signed 16-bit little-endian integer INT16 = 5, /// Unsigned 32-bit little-endian integer UINT32 = 6, /// Signed 32-bit little-endian integer INT32 = 7, /// Unsigned 64-bit little-endian integer UINT64 = 8, /// Signed 64-bit little-endian integer INT64 = 9, ///// 2-byte floating point value // HALF_FLOAT = 10, /// 4-byte floating point value FLOAT = 11, /// 8-byte floating point value DOUBLE = 12, ///// UTF8 variable-length string as List // STRING = 13, ///// Variable-length bytes (no guarantee of UTF8-ness) // BINARY = 14, ///// Fixed-size binary. Each value occupies the same number of bytes // FIXED_SIZE_BINARY = 15, ///// int32_t days since the UNIX epoch // DATE32 = 16, ///// int64_t milliseconds since the UNIX epoch // DATE64 = 17, ///// Exact timestamp encoded with int64 since UNIX epoch ///// Default unit millisecond // TIMESTAMP = 18, ///// Time as signed 32-bit integer, representing either seconds or ///// milliseconds since midnight // TIME32 = 19, ///// Time as signed 64-bit integer, representing either microseconds or ///// nanoseconds since midnight // TIME64 = 20, /// YEAR_MONTH or DAY_TIME interval in SQL style INTERVAL = 21, /// Precision- and scale-based decimal type. Storage type depends on the /// parameters. // DECIMAL = 22, /// A list of some logical data type LIST = 23, /// Struct of logical types STRUCT = 24, ///// Unions of logical types // UNION = 25, ///// Dictionary-encoded type, also called "categorical" or "factor" ///// in other programming languages. Holds the dictionary value ///// type but not the dictionary itself, which is part of the ///// ArrayData struct // DICTIONARY = 26, ///// Custom data type, implemented by user // EXTENSION = 28, ///// Array with fixed length of some logical type (a fixed-size list) ARRAY = 29, ///// Measure of elapsed time in either seconds, milliseconds, microseconds ///// or nanoseconds. // DURATION = 30, ///// Like STRING, but with 64-bit offsets // LARGE_STRING = 31, ///// Like BINARY, but with 64-bit offsets // LARGE_BINARY = 32, ///// Like LIST, but with 64-bit offsets // LARGE_LIST = 33, /// DuckDB Extensions VARCHAR = 200, // our own string representation, different from STRING and LARGE_STRING above UINT128 = 203, // 128-bit unsigned integers INT128 = 204, // 128-bit integers UNKNOWN = 205, // Unknown physical type of user defined types /// Boolean as 1 bit, LSB bit-packed ordering BIT = 206, INVALID = 255 }; //===--------------------------------------------------------------------===// // SQL Types //===--------------------------------------------------------------------===// enum class LogicalTypeId : uint8_t { INVALID = 0, SQLNULL = 1, /* NULL type, used for constant NULL */ UNKNOWN = 2, /* unknown type, used for parameter expressions */ ANY = 3, /* ANY type, used for functions that accept any type as parameter */ USER = 4, /* A User Defined Type (e.g., ENUMs before the binder) */ // A "template" type functions as a "placeholder" type for function arguments and return types. // Templates only exist during the binding phase, in the scope of a function, and are replaced with concrete types // before execution. When defining a template, you provide a name to distinguish between different template types, // specifying to the binder that they dont need to resolve to the same concrete type. Two templates with the same // name are always resolved to the same concrete type. TEMPLATE = 5, BOOLEAN = 10, TINYINT = 11, SMALLINT = 12, INTEGER = 13, BIGINT = 14, DATE = 15, TIME = 16, TIMESTAMP_SEC = 17, TIMESTAMP_MS = 18, TIMESTAMP = 19, //! us TIMESTAMP_NS = 20, DECIMAL = 21, FLOAT = 22, DOUBLE = 23, CHAR = 24, VARCHAR = 25, BLOB = 26, INTERVAL = 27, UTINYINT = 28, USMALLINT = 29, UINTEGER = 30, UBIGINT = 31, TIMESTAMP_TZ = 32, TIME_TZ = 34, TIME_NS = 35, BIT = 36, STRING_LITERAL = 37, /* string literals, used for constant strings - only exists while binding */ INTEGER_LITERAL = 38,/* integer literals, used for constant integers - only exists while binding */ BIGNUM = 39, UHUGEINT = 49, HUGEINT = 50, POINTER = 51, VALIDITY = 53, UUID = 54, STRUCT = 100, LIST = 101, MAP = 102, TABLE = 103, ENUM = 104, AGGREGATE_STATE = 105, LAMBDA = 106, UNION = 107, ARRAY = 108, VARIANT = 109 }; struct ExtraTypeInfo; struct ExtensionTypeInfo; struct aggregate_state_t; // NOLINT: mimic std casing struct LogicalType { DUCKDB_API LogicalType(); DUCKDB_API LogicalType(LogicalTypeId id); // NOLINT: Allow implicit conversion from `LogicalTypeId` DUCKDB_API LogicalType(LogicalTypeId id, shared_ptr type_info); DUCKDB_API LogicalType(const LogicalType &other); DUCKDB_API LogicalType(LogicalType &&other) noexcept; DUCKDB_API ~LogicalType(); inline LogicalTypeId id() const { // NOLINT: mimic std casing return id_; } inline PhysicalType InternalType() const { return physical_type_; } inline const optional_ptr AuxInfo() const { return type_info_.get(); } inline bool IsNested() const { auto internal = InternalType(); if (internal == PhysicalType::STRUCT) { return true; } if (internal == PhysicalType::LIST) { return true; } if (internal == PhysicalType::ARRAY) { return true; } return false; } inline bool IsUnknown() const { return id_ == LogicalTypeId::UNKNOWN; } inline shared_ptr GetAuxInfoShrPtr() const { return type_info_; } //! Copies the logical type, making a new ExtraTypeInfo LogicalType Copy() const; //! DeepCopy() will make a unique copy of any nested ExtraTypeInfo as well LogicalType DeepCopy() const; inline void CopyAuxInfo(const LogicalType &other) { type_info_ = other.type_info_; } bool EqualTypeInfo(const LogicalType &rhs) const; // copy assignment inline LogicalType &operator=(const LogicalType &other) { if (this == &other) { return *this; } id_ = other.id_; physical_type_ = other.physical_type_; type_info_ = other.type_info_; return *this; } // move assignment inline LogicalType &operator=(LogicalType &&other) noexcept { id_ = other.id_; physical_type_ = other.physical_type_; std::swap(type_info_, other.type_info_); return *this; } DUCKDB_API bool operator==(const LogicalType &rhs) const; inline bool operator!=(const LogicalType &rhs) const { return !(*this == rhs); } DUCKDB_API void Serialize(Serializer &serializer) const; DUCKDB_API static LogicalType Deserialize(Deserializer &deserializer); static bool TypeIsTimestamp(LogicalTypeId id) { return (id == LogicalTypeId::TIMESTAMP || id == LogicalTypeId::TIMESTAMP_MS || id == LogicalTypeId::TIMESTAMP_NS || id == LogicalTypeId::TIMESTAMP_SEC || id == LogicalTypeId::TIMESTAMP_TZ); } static bool TypeIsTimestamp(const LogicalType &type) { return TypeIsTimestamp(type.id()); } DUCKDB_API string ToString() const; DUCKDB_API bool IsIntegral() const; DUCKDB_API bool IsFloating() const; DUCKDB_API bool IsNumeric() const; DUCKDB_API static bool IsNumeric(LogicalTypeId type); DUCKDB_API bool IsTemporal() const; DUCKDB_API hash_t Hash() const; DUCKDB_API void SetAlias(string alias); DUCKDB_API bool HasAlias() const; DUCKDB_API string GetAlias() const; DUCKDB_API bool HasExtensionInfo() const; DUCKDB_API optional_ptr GetExtensionInfo() const; DUCKDB_API optional_ptr GetExtensionInfo(); DUCKDB_API void SetExtensionInfo(unique_ptr info); //! Returns the maximum logical type when combining the two types - or throws an exception if combining is not possible DUCKDB_API static LogicalType MaxLogicalType(ClientContext &context, const LogicalType &left, const LogicalType &right); DUCKDB_API static bool TryGetMaxLogicalType(ClientContext &context, const LogicalType &left, const LogicalType &right, LogicalType &result); //! Forcibly returns a maximum logical type - similar to MaxLogicalType but never throws. As a fallback either left or right are returned. DUCKDB_API static LogicalType ForceMaxLogicalType(const LogicalType &left, const LogicalType &right); //! Normalize a type - removing literals DUCKDB_API static LogicalType NormalizeType(const LogicalType &type); //! Gets the decimal properties of a numeric type. Fails if the type is not numeric. DUCKDB_API bool GetDecimalProperties(uint8_t &width, uint8_t &scale) const; DUCKDB_API void Verify() const; DUCKDB_API bool IsSigned() const; DUCKDB_API bool IsUnsigned() const; DUCKDB_API bool IsValid() const; DUCKDB_API bool IsComplete() const; DUCKDB_API bool IsTemplated() const; //! True, if this type supports in-place updates. bool SupportsRegularUpdate() const; private: LogicalTypeId id_; // NOLINT: allow this naming for legacy reasons PhysicalType physical_type_; // NOLINT: allow this naming for legacy reasons shared_ptr type_info_; // NOLINT: allow this naming for legacy reasons private: PhysicalType GetInternalType(); public: static constexpr const LogicalTypeId SQLNULL = LogicalTypeId::SQLNULL; static constexpr const LogicalTypeId UNKNOWN = LogicalTypeId::UNKNOWN; static constexpr const LogicalTypeId BOOLEAN = LogicalTypeId::BOOLEAN; static constexpr const LogicalTypeId TINYINT = LogicalTypeId::TINYINT; static constexpr const LogicalTypeId UTINYINT = LogicalTypeId::UTINYINT; static constexpr const LogicalTypeId SMALLINT = LogicalTypeId::SMALLINT; static constexpr const LogicalTypeId USMALLINT = LogicalTypeId::USMALLINT; static constexpr const LogicalTypeId INTEGER = LogicalTypeId::INTEGER; static constexpr const LogicalTypeId UINTEGER = LogicalTypeId::UINTEGER; static constexpr const LogicalTypeId BIGINT = LogicalTypeId::BIGINT; static constexpr const LogicalTypeId UBIGINT = LogicalTypeId::UBIGINT; static constexpr const LogicalTypeId FLOAT = LogicalTypeId::FLOAT; static constexpr const LogicalTypeId DOUBLE = LogicalTypeId::DOUBLE; static constexpr const LogicalTypeId DATE = LogicalTypeId::DATE; static constexpr const LogicalTypeId TIMESTAMP = LogicalTypeId::TIMESTAMP; static constexpr const LogicalTypeId TIMESTAMP_S = LogicalTypeId::TIMESTAMP_SEC; static constexpr const LogicalTypeId TIMESTAMP_MS = LogicalTypeId::TIMESTAMP_MS; static constexpr const LogicalTypeId TIMESTAMP_NS = LogicalTypeId::TIMESTAMP_NS; static constexpr const LogicalTypeId TIME = LogicalTypeId::TIME; static constexpr const LogicalTypeId TIME_NS = LogicalTypeId::TIME_NS; static constexpr const LogicalTypeId TIMESTAMP_TZ = LogicalTypeId::TIMESTAMP_TZ; static constexpr const LogicalTypeId TIME_TZ = LogicalTypeId::TIME_TZ; static constexpr const LogicalTypeId VARCHAR = LogicalTypeId::VARCHAR; static constexpr const LogicalTypeId ANY = LogicalTypeId::ANY; static constexpr const LogicalTypeId BLOB = LogicalTypeId::BLOB; static constexpr const LogicalTypeId BIT = LogicalTypeId::BIT; static constexpr const LogicalTypeId BIGNUM = LogicalTypeId::BIGNUM; static constexpr const LogicalTypeId INTERVAL = LogicalTypeId::INTERVAL; static constexpr const LogicalTypeId HUGEINT = LogicalTypeId::HUGEINT; static constexpr const LogicalTypeId UHUGEINT = LogicalTypeId::UHUGEINT; static constexpr const LogicalTypeId UUID = LogicalTypeId::UUID; static constexpr const LogicalTypeId HASH = LogicalTypeId::UBIGINT; static constexpr const LogicalTypeId POINTER = LogicalTypeId::POINTER; static constexpr const LogicalTypeId TABLE = LogicalTypeId::TABLE; static constexpr const LogicalTypeId LAMBDA = LogicalTypeId::LAMBDA; static constexpr const LogicalTypeId INVALID = LogicalTypeId::INVALID; static constexpr const LogicalTypeId ROW_TYPE = LogicalTypeId::BIGINT; // explicitly allowing these functions to be capitalized to be in-line with the remaining functions DUCKDB_API static LogicalType DECIMAL(uint8_t width, uint8_t scale); // NOLINT DUCKDB_API static LogicalType VARCHAR_COLLATION(string collation); // NOLINT DUCKDB_API static LogicalType LIST(const LogicalType &child); // NOLINT DUCKDB_API static LogicalType STRUCT(child_list_t children); // NOLINT DUCKDB_API static LogicalType AGGREGATE_STATE(aggregate_state_t state_type); // NOLINT DUCKDB_API static LogicalType MAP(const LogicalType &child); // NOLINT DUCKDB_API static LogicalType MAP(LogicalType key, LogicalType value); // NOLINT DUCKDB_API static LogicalType UNION(child_list_t members); // NOLINT DUCKDB_API static LogicalType ARRAY(const LogicalType &child, optional_idx index); // NOLINT DUCKDB_API static LogicalType ENUM(Vector &ordered_data, idx_t size); // NOLINT // ANY but with special rules (default is LogicalType::ANY, 5) DUCKDB_API static LogicalType ANY_PARAMS(LogicalType target, idx_t cast_score = 5); // NOLINT DUCKDB_API static LogicalType TEMPLATE(const string &name); // NOLINT DUCKDB_API static LogicalType VARIANT(); // NOLINT //! Integer literal of the specified value DUCKDB_API static LogicalType INTEGER_LITERAL(const Value &constant); // NOLINT // DEPRECATED - provided for backwards compatibility DUCKDB_API static LogicalType ENUM(const string &enum_name, Vector &ordered_data, idx_t size); // NOLINT DUCKDB_API static LogicalType USER(const string &user_type_name); // NOLINT DUCKDB_API static LogicalType USER(const string &user_type_name, const vector &user_type_mods); // NOLINT DUCKDB_API static LogicalType USER(string catalog, string schema, string name, vector user_type_mods); // NOLINT //! A list of all NUMERIC types (integral and floating point types) DUCKDB_API static const vector Numeric(); //! A list of all INTEGRAL types DUCKDB_API static const vector Integral(); //! A list of all REAL types DUCKDB_API static const vector Real(); //! A list of ALL SQL types DUCKDB_API static const vector AllTypes(); public: //! The JSON type lives in the JSON extension, but we need to define this here for special handling static constexpr auto JSON_TYPE_NAME = "JSON"; DUCKDB_API static LogicalType JSON(); // NOLINT DUCKDB_API bool IsJSONType() const; }; struct DecimalType { DUCKDB_API static uint8_t GetWidth(const LogicalType &type); DUCKDB_API static uint8_t GetScale(const LogicalType &type); DUCKDB_API static uint8_t MaxWidth(); }; struct StringType { DUCKDB_API static string GetCollation(const LogicalType &type); }; struct ListType { DUCKDB_API static const LogicalType &GetChildType(const LogicalType &type); }; struct UserType { DUCKDB_API static const string &GetCatalog(const LogicalType &type); DUCKDB_API static const string &GetSchema(const LogicalType &type); DUCKDB_API static const string &GetTypeName(const LogicalType &type); DUCKDB_API static const vector &GetTypeModifiers(const LogicalType &type); DUCKDB_API static vector &GetTypeModifiers(LogicalType &type); }; struct EnumType { DUCKDB_API static int64_t GetPos(const LogicalType &type, const string_t &key); DUCKDB_API static const Vector &GetValuesInsertOrder(const LogicalType &type); DUCKDB_API static idx_t GetSize(const LogicalType &type); DUCKDB_API static const string GetValue(const Value &val); DUCKDB_API static PhysicalType GetPhysicalType(const LogicalType &type); DUCKDB_API static string_t GetString(const LogicalType &type, idx_t pos); }; struct StructType { DUCKDB_API static const child_list_t &GetChildTypes(const LogicalType &type); DUCKDB_API static const LogicalType &GetChildType(const LogicalType &type, idx_t index); DUCKDB_API static const string &GetChildName(const LogicalType &type, idx_t index); DUCKDB_API static idx_t GetChildIndexUnsafe(const LogicalType &type, const string &name); DUCKDB_API static idx_t GetChildCount(const LogicalType &type); DUCKDB_API static bool IsUnnamed(const LogicalType &type); }; struct MapType { DUCKDB_API static const LogicalType &KeyType(const LogicalType &type); DUCKDB_API static const LogicalType &ValueType(const LogicalType &type); }; struct UnionType { DUCKDB_API static const idx_t MAX_UNION_MEMBERS = 256; DUCKDB_API static idx_t GetMemberCount(const LogicalType &type); DUCKDB_API static const LogicalType &GetMemberType(const LogicalType &type, idx_t index); DUCKDB_API static const string &GetMemberName(const LogicalType &type, idx_t index); DUCKDB_API static const child_list_t CopyMemberTypes(const LogicalType &type); }; struct ArrayType { DUCKDB_API static const LogicalType &GetChildType(const LogicalType &type); DUCKDB_API static idx_t GetSize(const LogicalType &type); DUCKDB_API static bool IsAnySize(const LogicalType &type); DUCKDB_API static constexpr idx_t MAX_ARRAY_SIZE = 100000; // 100k for now //! Recursively replace all ARRAY types to LIST types within the given type DUCKDB_API static LogicalType ConvertToList(const LogicalType &type); }; struct AggregateStateType { DUCKDB_API static const string GetTypeName(const LogicalType &type); DUCKDB_API static const aggregate_state_t &GetStateType(const LogicalType &type); }; struct AnyType { DUCKDB_API static LogicalType GetTargetType(const LogicalType &type); DUCKDB_API static idx_t GetCastScore(const LogicalType &type); }; struct IntegerLiteral { //! Returns the type that this integer literal "prefers" DUCKDB_API static LogicalType GetType(const LogicalType &type); //! Whether or not the integer literal fits into the target numeric type DUCKDB_API static bool FitsInType(const LogicalType &type, const LogicalType &target); }; struct TemplateType { // Get the name of the template type DUCKDB_API static const string &GetName(const LogicalType &type); }; // **DEPRECATED**: Use EnumUtil directly instead. DUCKDB_API string LogicalTypeIdToString(LogicalTypeId type); DUCKDB_API LogicalTypeId TransformStringToLogicalTypeId(const string &str); DUCKDB_API LogicalType TransformStringToLogicalType(const string &str); DUCKDB_API LogicalType TransformStringToLogicalType(const string &str, ClientContext &context); //! The PhysicalType used by the row identifiers column extern const PhysicalType ROW_TYPE; DUCKDB_API string TypeIdToString(PhysicalType type); DUCKDB_API idx_t GetTypeIdSize(PhysicalType type); DUCKDB_API bool TypeIsConstantSize(PhysicalType type); DUCKDB_API bool TypeIsIntegral(PhysicalType type); DUCKDB_API bool TypeIsNumeric(PhysicalType type); DUCKDB_API bool TypeIsInteger(PhysicalType type); bool ApproxEqual(float l, float r); bool ApproxEqual(double l, double r); struct aggregate_state_t { aggregate_state_t() { } // NOLINTNEXTLINE: work around bug in clang-tidy aggregate_state_t(string function_name_p, LogicalType return_type_p, vector bound_argument_types_p) : function_name(std::move(function_name_p)), return_type(std::move(return_type_p)), bound_argument_types(std::move(bound_argument_types_p)) { } string function_name; LogicalType return_type; vector bound_argument_types; }; } // namespace duckdb #include namespace duckdb { // Forward declaration to allow conversion between hugeint and uhugeint struct hugeint_t; // NOLINT struct uhugeint_t { // NOLINT public: uint64_t lower; uint64_t upper; public: uhugeint_t() = default; DUCKDB_API uhugeint_t(uint64_t value); // NOLINT: Allow implicit conversion from `uint64_t` constexpr uhugeint_t(uint64_t upper, uint64_t lower) : lower(lower), upper(upper) { } constexpr uhugeint_t(const uhugeint_t &rhs) = default; constexpr uhugeint_t(uhugeint_t &&rhs) = default; uhugeint_t &operator=(const uhugeint_t &rhs) = default; uhugeint_t &operator=(uhugeint_t &&rhs) = default; DUCKDB_API string ToString() const; // comparison operators DUCKDB_API bool operator==(const uhugeint_t &rhs) const; DUCKDB_API bool operator!=(const uhugeint_t &rhs) const; DUCKDB_API bool operator<=(const uhugeint_t &rhs) const; DUCKDB_API bool operator<(const uhugeint_t &rhs) const; DUCKDB_API bool operator>(const uhugeint_t &rhs) const; DUCKDB_API bool operator>=(const uhugeint_t &rhs) const; // arithmetic operators DUCKDB_API uhugeint_t operator+(const uhugeint_t &rhs) const; DUCKDB_API uhugeint_t operator-(const uhugeint_t &rhs) const; DUCKDB_API uhugeint_t operator*(const uhugeint_t &rhs) const; DUCKDB_API uhugeint_t operator/(const uhugeint_t &rhs) const; DUCKDB_API uhugeint_t operator%(const uhugeint_t &rhs) const; DUCKDB_API uhugeint_t operator-() const; // bitwise operators DUCKDB_API uhugeint_t operator>>(const uhugeint_t &rhs) const; DUCKDB_API uhugeint_t operator<<(const uhugeint_t &rhs) const; DUCKDB_API uhugeint_t operator&(const uhugeint_t &rhs) const; DUCKDB_API uhugeint_t operator|(const uhugeint_t &rhs) const; DUCKDB_API uhugeint_t operator^(const uhugeint_t &rhs) const; DUCKDB_API uhugeint_t operator~() const; // in-place operators DUCKDB_API uhugeint_t &operator+=(const uhugeint_t &rhs); DUCKDB_API uhugeint_t &operator-=(const uhugeint_t &rhs); DUCKDB_API uhugeint_t &operator*=(const uhugeint_t &rhs); DUCKDB_API uhugeint_t &operator/=(const uhugeint_t &rhs); DUCKDB_API uhugeint_t &operator%=(const uhugeint_t &rhs); DUCKDB_API uhugeint_t &operator>>=(const uhugeint_t &rhs); DUCKDB_API uhugeint_t &operator<<=(const uhugeint_t &rhs); DUCKDB_API uhugeint_t &operator&=(const uhugeint_t &rhs); DUCKDB_API uhugeint_t &operator|=(const uhugeint_t &rhs); DUCKDB_API uhugeint_t &operator^=(const uhugeint_t &rhs); // boolean operators DUCKDB_API explicit operator bool() const; DUCKDB_API bool operator!() const; // cast operators -- doesn't check bounds/overflow/underflow DUCKDB_API explicit operator uint8_t() const; DUCKDB_API explicit operator uint16_t() const; DUCKDB_API explicit operator uint32_t() const; DUCKDB_API explicit operator uint64_t() const; DUCKDB_API explicit operator int8_t() const; DUCKDB_API explicit operator int16_t() const; DUCKDB_API explicit operator int32_t() const; DUCKDB_API explicit operator int64_t() const; DUCKDB_API operator hugeint_t() const; // NOLINT: Allow implicit conversion from `uhugeint_t` }; } // namespace duckdb namespace std { template <> struct hash { size_t operator()(const duckdb::uhugeint_t &val) const { using std::hash; return hash {}(val.upper) ^ hash {}(val.lower); } }; } // namespace std //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/timestamp.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/limits.hpp // // //===----------------------------------------------------------------------===// #include // Undef annoying windows macro #undef max #include namespace duckdb { template struct NumericLimits { static constexpr T Minimum() { return std::numeric_limits::has_infinity ? -std::numeric_limits::infinity() : std::numeric_limits::lowest(); } static constexpr T Maximum() { return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); } static constexpr bool IsSigned() { return std::is_signed::value; } static constexpr bool IsIntegral() { return std::is_integral::value || std::is_enum::value; } static constexpr idx_t Digits(); }; template <> struct NumericLimits { static constexpr hugeint_t Minimum() { return {std::numeric_limits::lowest(), 0}; }; static constexpr hugeint_t Maximum() { return {std::numeric_limits::max(), std::numeric_limits::max()}; }; static constexpr bool IsSigned() { return true; } static constexpr bool IsIntegral() { return true; } static constexpr idx_t Digits() { return 39; } }; template <> struct NumericLimits { static constexpr uhugeint_t Minimum() { return {0, 0}; }; static constexpr uhugeint_t Maximum() { return {std::numeric_limits::max(), std::numeric_limits::max()}; }; static constexpr bool IsSigned() { return false; } static constexpr bool IsIntegral() { return true; } static constexpr idx_t Digits() { return 39; } }; template <> constexpr idx_t NumericLimits::Digits() { return 3; } template <> constexpr idx_t NumericLimits::Digits() { return 5; } template <> constexpr idx_t NumericLimits::Digits() { return 10; } template <> constexpr idx_t NumericLimits::Digits() { return 19; } template <> constexpr idx_t NumericLimits::Digits() { return 3; } template <> constexpr idx_t NumericLimits::Digits() { return 5; } template <> constexpr idx_t NumericLimits::Digits() { return 10; } template <> constexpr idx_t NumericLimits::Digits() { return 20; } template <> constexpr idx_t NumericLimits::Digits() { return 127; } template <> constexpr idx_t NumericLimits::Digits() { return 250; } } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/string_util.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/numeric_utils.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { template struct MakeSigned { using type = typename std::make_signed::type; }; template <> struct MakeSigned { using type = hugeint_t; }; template <> struct MakeSigned { using type = hugeint_t; }; template struct MakeUnsigned { using type = typename std::make_unsigned::type; }; template <> struct MakeUnsigned { using type = uhugeint_t; }; template <> struct MakeUnsigned { using type = uhugeint_t; }; template static void ThrowNumericCastError(FROM in, TO minval, TO maxval) { throw InternalException("Information loss on integer cast: value %d outside of target range [%d, %d]", in, minval, maxval); } template struct NumericCastImpl; template struct NumericCastImpl { static TO Convert(FROM val) { return static_cast(val); } }; template struct NumericCastImpl { static TO Convert(FROM val) { // some dance around signed-unsigned integer comparison below auto minval = NumericLimits::Minimum(); auto maxval = NumericLimits::Maximum(); auto unsigned_in = static_cast::type>(val); auto unsigned_min = static_cast::type>(minval); auto unsigned_max = static_cast::type>(maxval); auto signed_in = static_cast::type>(val); auto signed_min = static_cast::type>(minval); auto signed_max = static_cast::type>(maxval); if (!NumericLimits::IsSigned() && !NumericLimits::IsSigned() && (unsigned_in < unsigned_min || unsigned_in > unsigned_max)) { ThrowNumericCastError(val, static_cast(unsigned_min), static_cast(unsigned_max)); } if (NumericLimits::IsSigned() && NumericLimits::IsSigned() && (signed_in < signed_min || signed_in > signed_max)) { ThrowNumericCastError(val, static_cast(signed_min), static_cast(signed_max)); } if (NumericLimits::IsSigned() != NumericLimits::IsSigned() && (signed_in < signed_min || unsigned_in > unsigned_max)) { ThrowNumericCastError(val, static_cast(signed_min), static_cast(unsigned_max)); } return static_cast(val); } }; // NumericCast // When: between same types, or when both types are integral // Checks: perform checked casts on range template ::IsIntegral() && NumericLimits::IsIntegral()) || std::is_same::value>::type> TO NumericCast(FROM val) { return NumericCastImpl::value>::Convert(val); } // UnsafeNumericCast // When: between same types, or when both types are integral // Checks: perform checked casts on range (in DEBUG) otherwise no checks template ::IsIntegral() && NumericLimits::IsIntegral()) || std::is_same::value>::type> TO UnsafeNumericCast(FROM in) { #if defined(DEBUG) || defined(UNSAFE_NUMERIC_CAST) return NumericCast(in); #endif return static_cast(in); } // LossyNumericCast // When: between double/float to other convertible types // Checks: no checks performed (at the moment, to be improved adding range checks) template TO LossyNumericCast(FROM val) { return static_cast(val); } // ExactNumericCast // When: between double/float to other convertible types // Checks: perform checks that casts are invertible (in DEBUG) otherwise no checks template TO ExactNumericCast(double val) { auto res = LossyNumericCast(val); #if defined(DEBUG) || defined(UNSAFE_NUMERIC_CAST) if (val != double(res)) { throw InternalException("Information loss on double cast: value %lf outside of target range [%lf, %lf]", val, double(res), double(res)); } #endif return res; } template TO ExactNumericCast(float val) { auto res = LossyNumericCast(val); #if defined(DEBUG) || defined(UNSAFE_NUMERIC_CAST) if (val != float(res)) { throw InternalException("Information loss on float cast: value %f outside of target range [%f, %f]", val, float(res), float(res)); } #endif return res; } template struct NextUnsigned {}; template <> struct NextUnsigned { using type = uint16_t; }; template <> struct NextUnsigned { using type = uint32_t; }; template <> struct NextUnsigned { using type = uint64_t; }; template <> struct NextUnsigned { #if ((__GNUC__ >= 5) || defined(__clang__)) && defined(__SIZEOF_INT128__) using type = __uint128_t; #else using type = uhugeint_t; #endif }; template class FastMod { using NEXT_TYPE = typename NextUnsigned::type; static_assert(sizeof(NEXT_TYPE) != 0, "NextUnsigned not available for this type"); public: explicit FastMod(TYPE divisor_p) : divisor(divisor_p), multiplier((static_cast(-1) / divisor) + 1) { } TYPE Div(const TYPE &val) const { return static_cast((static_cast(val) * multiplier) >> (sizeof(TYPE) * 8)); // NOLINT } TYPE Mod(const TYPE &val, const TYPE "ient) const { return val - quotient * divisor; } TYPE Mod(const TYPE &val) const { return Mod(val, Div(val)); } const TYPE &GetDivisor() const { return divisor; } private: const TYPE divisor; const TYPE multiplier; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/pair.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::make_pair; using std::pair; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/set.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::multiset; using std::set; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/complex_json.hpp // // //===----------------------------------------------------------------------===// #include #include namespace duckdb { enum class ComplexJSONType : uint8_t { VALUE = 0, OBJECT = 1, ARRAY = 2 }; //! Custom struct to handle both strings and nested JSON objects struct ComplexJSON { //! Constructor for string values explicit ComplexJSON(const string &str); //! Basic empty constructor ComplexJSON(); //! Adds entry to the underlying map, also sets the type to OBJECT void AddObjectEntry(const string &key, unique_ptr object); //! Adds element to the underlying list, also sets the type to ARRAY void AddArrayElement(unique_ptr object); //! Gets a ComplexJSON object from the map ComplexJSON &GetObject(const string &key); //! Gets a ComplexJSON element from the list ComplexJSON &GetArrayElement(const idx_t &index); //! Gets a string version of the underlying ComplexJSON object from the map string GetValue(const string &key) const; //! Gets a string version of the underlying ComplexJSON array from the list string GetValue(const idx_t &index) const; //! Recursive function for GetValue static string GetValueRecursive(const ComplexJSON &child); //! Flattens this json to a top level key -> nested json unordered_map Flatten() const; private: //! Basic string value, in case this is the last value of a nested json string str_value; //! If this is a json object a map of key/value unordered_map> obj_value; //! If this is a json array a list of values vector> arr_value; //! If this json is an object (i.e., map or not) ComplexJSONType type; }; } // namespace duckdb #include namespace duckdb { #ifndef DUCKDB_QUOTE_DEFINE // Preprocessor trick to allow text to be converted to C-string / string // Expecte use is: // #ifdef SOME_DEFINE // string str = DUCKDB_QUOTE_DEFINE(SOME_DEFINE) // ...do something with str // #endif SOME_DEFINE #define DUCKDB_QUOTE_DEFINE_IMPL(x) #x #define DUCKDB_QUOTE_DEFINE(x) DUCKDB_QUOTE_DEFINE_IMPL(x) #endif /** * String Utility Functions * Note that these are not the most efficient implementations (i.e., they copy * memory) and therefore they should only be used for debug messages and other * such things. */ class StringUtil { public: static string GenerateRandomName(idx_t length = 16); static uint8_t GetHexValue(char c) { if (c >= '0' && c <= '9') { return UnsafeNumericCast(c - '0'); } if (c >= 'a' && c <= 'f') { return UnsafeNumericCast(c - 'a' + 10); } if (c >= 'A' && c <= 'F') { return UnsafeNumericCast(c - 'A' + 10); } throw InvalidInputException("Invalid input for hex digit: %s", string(1, c)); } static uint8_t GetBinaryValue(char c) { if (c >= '0' && c <= '1') { return UnsafeNumericCast(c - '0'); } throw InvalidInputException("Invalid input for binary digit: %s", string(1, c)); } static bool CharacterIsSpace(char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; } static bool CharacterIsNewline(char c) { return c == '\n' || c == '\r'; } static bool CharacterIsDigit(char c) { return c >= '0' && c <= '9'; } static bool CharacterIsHex(char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } static char CharacterToUpper(char c) { if (c >= 'a' && c <= 'z') { return UnsafeNumericCast(c - ('a' - 'A')); } return c; } static char CharacterToLower(char c) { if (c >= 'A' && c <= 'Z') { return UnsafeNumericCast(c + ('a' - 'A')); } return c; } static bool CharacterIsAlpha(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } static bool CharacterIsAlphaNumeric(char c) { return CharacterIsAlpha(c) || CharacterIsDigit(c); } static bool CharacterIsOperator(char c) { if (c == '_') { return false; } if (c >= '!' && c <= '/') { return true; } if (c >= ':' && c <= '@') { return true; } if (c >= '[' && c <= '`') { return true; } if (c >= '{' && c <= '~') { return true; } return false; } template static vector ConvertStrings(const vector &strings) { vector result; for (auto &string : strings) { result.emplace_back(string); } return result; } static vector ConvertToSQLIdentifiers(const vector &strings) { return ConvertStrings(strings); } static vector ConvertToSQLStrings(const vector &strings) { return ConvertStrings(strings); } //! Returns true if the needle string exists in the haystack DUCKDB_API static bool Contains(const string &haystack, const string &needle); DUCKDB_API static bool Contains(const string &haystack, const char &needle_char); //! Returns the position of needle string within the haystack DUCKDB_API static optional_idx Find(const string &haystack, const string &needle); //! Returns true if the target string starts with the given prefix DUCKDB_API static bool StartsWith(string str, string prefix); //! Returns true if the target string ends with the given suffix. DUCKDB_API static bool EndsWith(const string &str, const string &suffix); //! Repeat a string multiple times DUCKDB_API static string Repeat(const string &str, const idx_t n); //! Split the input string based on newline char DUCKDB_API static vector Split(const string &str, char delimiter); //! Split the input string, ignore delimiters within parentheses. Note: leading/trailing spaces are NOT stripped DUCKDB_API static vector SplitWithParentheses(const string &str, char delimiter = ',', char par_open = '(', char par_close = ')'); //! Split the input string allong a quote. Note that any escaping is NOT supported. DUCKDB_API static vector SplitWithQuote(const string &str, char delimiter = ',', char quote = '"'); //! Join multiple strings into one string. Components are concatenated by the given separator DUCKDB_API static string Join(const vector &input, const string &separator); DUCKDB_API static string Join(const set &input, const string &separator); //! Encode special URL characters in a string DUCKDB_API static string URLEncode(const string &str, bool encode_slash = true); DUCKDB_API static idx_t URLEncodeSize(const char *input, idx_t input_size, bool encode_slash = true); DUCKDB_API static void URLEncodeBuffer(const char *input, idx_t input_size, char *output, bool encode_slash = true); //! Decode URL escape sequences (e.g. %20) in a string DUCKDB_API static string URLDecode(const string &str, bool plus_to_space = false); DUCKDB_API static idx_t URLDecodeSize(const char *input, idx_t input_size, bool plus_to_space = false); DUCKDB_API static void URLDecodeBuffer(const char *input, idx_t input_size, char *output, bool plus_to_space = false); //! BOM skipping (https://en.wikipedia.org/wiki/Byte_order_mark) DUCKDB_API static void SkipBOM(const char *buffer_ptr, const idx_t &buffer_size, idx_t &buffer_pos); DUCKDB_API static idx_t ToUnsigned(const string &str); template static string ToString(const vector &input, const string &separator) { vector input_list; for (auto &i : input) { input_list.push_back(i.ToString()); } return StringUtil::Join(input_list, separator); } //! Join multiple items of container with given size, transformed to string //! using function, into one string using the given separator template static string Join(const C &input, S count, const string &separator, FUNC f) { // The result std::string result; // If the input isn't empty, append the first element. We do this so we // don't need to introduce an if into the loop. if (count > 0) { result += f(input[0]); } // Append the remaining input components, after the first for (size_t i = 1; i < count; i++) { result += separator + f(input[i]); } return result; } //! Return a string that formats the give number of bytes DUCKDB_API static string BytesToHumanReadableString(idx_t bytes, idx_t multiplier = 1024); //! Convert a string to UPPERCASE DUCKDB_API static string Upper(const string &str); //! Convert a string to lowercase DUCKDB_API static string Lower(const string &str); //! Convert a string to Title Case DUCKDB_API static string Title(const string &str); DUCKDB_API static bool IsLower(const string &str); DUCKDB_API static bool IsUpper(const string &str); //! Case insensitive hash DUCKDB_API static uint64_t CIHash(const string &str); //! Case insensitive equals DUCKDB_API static bool CIEquals(const string &l1, const string &l2); //! Case insensitive equals (null-terminated strings) DUCKDB_API static bool CIEquals(const char *l1, idx_t l1_size, const char *l2, idx_t l2_size); //! Case insensitive compare DUCKDB_API static bool CILessThan(const string &l1, const string &l2); //! Case insensitive find, returns DConstants::INVALID_INDEX if not found DUCKDB_API static idx_t CIFind(vector &vec, const string &str); //! Format a string using printf semantics template static string Format(const string fmt_str, ARGS... params) { return Exception::ConstructMessage(fmt_str, params...); } //! Split the input string into a vector of strings based on the split string DUCKDB_API static vector Split(const string &input, const string &split); //! Remove the whitespace char in the left end of the string DUCKDB_API static void LTrim(string &str); //! Remove the whitespace char in the right end of the string DUCKDB_API static void RTrim(string &str); //! Remove the all chars from chars_to_trim char in the right end of the string DUCKDB_API static void RTrim(string &str, const string &chars_to_trim); //! Remove the whitespace char in the left and right end of the string DUCKDB_API static void Trim(string &str); DUCKDB_API static string Replace(string source, const string &from, const string &to); //! Get the levenshtein distance from two strings //! The not_equal_penalty is the penalty given when two characters in a string are not equal //! The regular levenshtein distance has a not equal penalty of 1, which means changing a character is as expensive //! as adding or removing one For similarity searches we often want to give extra weight to changing a character For //! example: with an equal penalty of 1, "pg_am" is closer to "depdelay" than "depdelay_minutes" //! with an equal penalty of 3, "depdelay_minutes" is closer to "depdelay" than to "pg_am" DUCKDB_API static idx_t LevenshteinDistance(const string &s1, const string &s2, idx_t not_equal_penalty = 1); //! Returns the similarity score between two strings (edit distance metric - lower is more similar) DUCKDB_API static idx_t SimilarityScore(const string &s1, const string &s2); //! Returns a normalized similarity rating between 0.0 - 1.0 (higher is more similar) DUCKDB_API static double SimilarityRating(const string &s1, const string &s2); //! Get the top-n strings (sorted by the given score distance) from a set of scores. //! The scores should be normalized between 0.0 and 1.0, where 1.0 is the highest score //! At least one entry is returned (if there is one). //! Strings are only returned if they have a score higher than the threshold. DUCKDB_API static vector TopNStrings(vector> scores, idx_t n = 5, double threshold = 0.5); //! DEPRECATED: old TopNStrings method that uses the levenshtein distance metric instead of the normalized 0.0 - 1.0 //! rating DUCKDB_API static vector TopNStrings(const vector> &scores, idx_t n = 5, idx_t threshold = 5); //! Computes the levenshtein distance of each string in strings, and compares it to target, then returns TopNStrings //! with the given params. DUCKDB_API static vector TopNLevenshtein(const vector &strings, const string &target, idx_t n = 5, idx_t threshold = 5); //! Computes the jaro winkler distance of each string in strings, and compares it to target, then returns //! TopNStrings with the given params. DUCKDB_API static vector TopNJaroWinkler(const vector &strings, const string &target, idx_t n = 5, double threshold = 0.5); DUCKDB_API static string CandidatesMessage(const vector &candidates, const string &candidate = "Candidate bindings"); //! Generate an error message in the form of "{message_prefix}: nearest_string, nearest_string2, ... //! Equivalent to calling TopNLevenshtein followed by CandidatesMessage DUCKDB_API static string CandidatesErrorMessage(const vector &strings, const string &target, const string &message_prefix, idx_t n = 5); //! Returns true if two null-terminated strings are equal or point to the same address. //! Returns false if only one of the strings is nullptr static bool Equals(const char *s1, const char *s2) { if (s1 == s2) { return true; } if (s1 == nullptr || s2 == nullptr) { return false; } return strcmp(s1, s2) == 0; } //! JSON method that parses a { string: value } JSON blob //! NOTE: this method is not efficient //! NOTE: this method is used in Exception construction - as such it does NOT throw on invalid JSON, instead an //! empty map is returned //! Parses complex (i.e., nested) Json maps, it also parses invalid JSONs, as a pure string. DUCKDB_API static unique_ptr ParseJSONMap(const string &json, bool ignore_errors = false); //! JSON method that constructs a { string: value } JSON map //! This is the inverse of ParseJSONMap //! NOTE: this method is not efficient DUCKDB_API static string ExceptionToJSONMap(ExceptionType type, const string &message, const unordered_map &map); //! Transforms an unordered map to a JSON string DUCKDB_API static string ToJSONMap(const unordered_map &map); //! Transforms an complex JSON to a JSON string DUCKDB_API static string ToComplexJSONMap(const ComplexJSON &complex_json); DUCKDB_API static string ValidateJSON(const char *data, const idx_t &len); DUCKDB_API static string GetFileName(const string &file_path); DUCKDB_API static string GetFileExtension(const string &file_name); DUCKDB_API static string GetFileStem(const string &file_name); DUCKDB_API static string GetFilePath(const string &file_path); struct EnumStringLiteral { uint32_t number; const char *string; }; DUCKDB_API static uint32_t StringToEnum(const EnumStringLiteral enum_list[], idx_t enum_count, const char *enum_name, const char *str_value); DUCKDB_API static const char *EnumToString(const EnumStringLiteral enum_list[], idx_t enum_count, const char *enum_name, uint32_t enum_value); DUCKDB_API static const uint8_t ASCII_TO_LOWER_MAP[]; DUCKDB_API static const uint8_t ASCII_TO_UPPER_MAP[]; }; } // namespace duckdb #include namespace duckdb { struct date_t; // NOLINT struct dtime_t; // NOLINT struct dtime_ns_t; // NOLINT struct dtime_tz_t; // NOLINT //! Type used to represent a TIMESTAMP. timestamp_t holds the microseconds since 1970-01-01. struct timestamp_t { // NOLINT // NOTE: The unit of value is microseconds for timestamp_t, but it can be // different for subclasses (e.g. it's nanos for timestamp_ns, etc). int64_t value; timestamp_t() = default; explicit inline constexpr timestamp_t(int64_t micros) : value(micros) { } inline timestamp_t &operator=(int64_t micros) { value = micros; return *this; } // explicit conversion explicit inline operator int64_t() const { return value; } // comparison operators inline bool operator==(const timestamp_t &rhs) const { return value == rhs.value; }; inline bool operator!=(const timestamp_t &rhs) const { return value != rhs.value; }; inline bool operator<=(const timestamp_t &rhs) const { return value <= rhs.value; }; inline bool operator<(const timestamp_t &rhs) const { return value < rhs.value; }; inline bool operator>(const timestamp_t &rhs) const { return value > rhs.value; }; inline bool operator>=(const timestamp_t &rhs) const { return value >= rhs.value; }; // arithmetic operators timestamp_t operator+(const double &value) const; int64_t operator-(const timestamp_t &other) const; // in-place operators timestamp_t &operator+=(const int64_t &delta); timestamp_t &operator-=(const int64_t &delta); // special values static constexpr timestamp_t infinity() { // NOLINT return timestamp_t(NumericLimits::Maximum()); } // NOLINT static constexpr timestamp_t ninfinity() { // NOLINT return timestamp_t(-NumericLimits::Maximum()); } // NOLINT static constexpr inline timestamp_t epoch() { // NOLINT return timestamp_t(0); } // NOLINT }; //! Type used to represent TIMESTAMP_S. timestamp_sec_t holds the seconds since 1970-01-01. struct timestamp_sec_t : public timestamp_t { // NOLINT timestamp_sec_t() = default; explicit inline constexpr timestamp_sec_t(int64_t seconds) : timestamp_t(seconds) { } }; //! Type used to represent TIMESTAMP_MS. timestamp_ms_t holds the milliseconds since 1970-01-01. struct timestamp_ms_t : public timestamp_t { // NOLINT timestamp_ms_t() = default; explicit inline constexpr timestamp_ms_t(int64_t millis) : timestamp_t(millis) { } }; //! Type used to represent TIMESTAMP_NS. timestamp_ns_t holds the nanoseconds since 1970-01-01. struct timestamp_ns_t : public timestamp_t { // NOLINT timestamp_ns_t() = default; explicit inline constexpr timestamp_ns_t(int64_t nanos) : timestamp_t(nanos) { } }; //! Type used to represent TIMESTAMPTZ. timestamp_tz_t holds the microseconds since 1970-01-01 (UTC). //! It is physically the same as timestamp_t, both hold microseconds since epoch. struct timestamp_tz_t : public timestamp_t { // NOLINT timestamp_tz_t() = default; explicit inline constexpr timestamp_tz_t(int64_t micros) : timestamp_t(micros) { } explicit inline constexpr timestamp_tz_t(timestamp_t ts) : timestamp_t(ts) { } }; enum class TimestampCastResult : uint8_t { SUCCESS, ERROR_INCORRECT_FORMAT, ERROR_NON_UTC_TIMEZONE, ERROR_RANGE, STRICT_UTC }; struct TimestampComponents { int32_t year; int32_t month; int32_t day; int32_t hour; int32_t minute; int32_t second; int32_t microsecond; }; //! The static Timestamp class holds helper functions for the timestamp types. class Timestamp { public: // min timestamp is 290308-12-22 (BC) constexpr static const int32_t MIN_YEAR = -290308; constexpr static const int32_t MIN_MONTH = 12; constexpr static const int32_t MIN_DAY = 22; public: //! Convert a string in the format "YYYY-MM-DD hh:mm:ss[.f][-+TH[:tm]]" to a timestamp object DUCKDB_API static timestamp_t FromString(const string &str, bool use_offset); //! Convert a string where the offset can also be a time zone string: / [A_Za-z0-9/_]+/ //! If has_offset is true, then the result is an instant that was offset from UTC //! If the tz is not empty, the result is still an instant, but the parts can be extracted and applied to the TZ DUCKDB_API static TimestampCastResult TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &result, const bool use_offset, bool &has_offset, string_t &tz, optional_ptr nanos = nullptr); //! Strict Timestamp does not accept offsets. DUCKDB_API static TimestampCastResult TryConvertTimestamp(const char *str, idx_t len, timestamp_t &result, const bool use_offset, optional_ptr nanos = nullptr, bool strict = false); DUCKDB_API static TimestampCastResult TryConvertTimestamp(const char *str, idx_t len, timestamp_ns_t &result); DUCKDB_API static timestamp_t FromCString(const char *str, idx_t len, bool use_offset = false, optional_ptr nanos = nullptr); //! Convert a date object to a string in the format "YYYY-MM-DD hh:mm:ss" DUCKDB_API static string ToString(timestamp_t timestamp); DUCKDB_API static date_t GetDate(timestamp_t timestamp); DUCKDB_API static dtime_t GetTime(timestamp_t timestamp); DUCKDB_API static dtime_ns_t GetTimeNs(timestamp_ns_t timestamp); //! Create a Timestamp object from a specified (date, time) combination DUCKDB_API static timestamp_t FromDatetime(date_t date, dtime_t time); DUCKDB_API static bool TryFromDatetime(date_t date, dtime_t time, timestamp_t &result); DUCKDB_API static bool TryFromDatetime(date_t date, dtime_tz_t timetz, timestamp_t &result); //! Scale up to ns DUCKDB_API static bool TryFromTimestampNanos(timestamp_t ts, int32_t nanos, timestamp_ns_t &result); //! Is the character a valid part of a time zone name? static inline bool CharacterIsTimeZone(char c) { return StringUtil::CharacterIsAlpha(c) || StringUtil::CharacterIsDigit(c) || c == '_' || c == '/' || c == '+' || c == '-'; } //! True, if the timestamp is finite, else false. static inline bool IsFinite(timestamp_t timestamp) { return timestamp != timestamp_t::infinity() && timestamp != timestamp_t::ninfinity(); } //! Extract the date and time from a given timestamp object DUCKDB_API static void Convert(timestamp_t date, date_t &out_date, dtime_t &out_time); //! Extract the date and time from a given timestamp object DUCKDB_API static void Convert(timestamp_ns_t date, date_t &out_date, dtime_t &out_time, int32_t &out_nanos); //! Returns current timestamp DUCKDB_API static timestamp_t GetCurrentTimestamp(); //! Convert the epoch (in sec) to a timestamp DUCKDB_API static timestamp_t FromEpochSecondsPossiblyInfinite(int64_t s); DUCKDB_API static timestamp_t FromEpochSeconds(int64_t s); //! Convert the epoch (in ms) to a timestamp DUCKDB_API static timestamp_t FromEpochMsPossiblyInfinite(int64_t ms); DUCKDB_API static timestamp_t FromEpochMs(int64_t ms); //! Convert the epoch (in microseconds) to a timestamp DUCKDB_API static timestamp_t FromEpochMicroSeconds(int64_t micros); //! Convert the epoch (in nanoseconds) to a timestamp DUCKDB_API static timestamp_t FromEpochNanoSecondsPossiblyInfinite(int64_t nanos); DUCKDB_API static timestamp_t FromEpochNanoSeconds(int64_t nanos); //! Construct ns timestamps from various epoch units DUCKDB_API static timestamp_ns_t TimestampNsFromEpochMicros(int64_t micros); DUCKDB_API static timestamp_ns_t TimestampNsFromEpochMillis(int64_t millis); //! Try convert a timestamp to epoch (in nanoseconds) DUCKDB_API static bool TryGetEpochNanoSeconds(timestamp_t timestamp, int64_t &result); //! Convert the epoch (in seconds) to a timestamp DUCKDB_API static int64_t GetEpochSeconds(timestamp_t timestamp); //! Convert the epoch (in ms) to a timestamp DUCKDB_API static int64_t GetEpochMs(timestamp_t timestamp); //! Convert a timestamp to epoch (in microseconds) DUCKDB_API static int64_t GetEpochMicroSeconds(timestamp_t timestamp); //! Convert a timestamp to epoch (in nanoseconds) DUCKDB_API static int64_t GetEpochNanoSeconds(timestamp_t timestamp); DUCKDB_API static int64_t GetEpochNanoSeconds(timestamp_ns_t timestamp); //! Convert a timestamp to a rounded epoch at a given resolution. DUCKDB_API static int64_t GetEpochRounded(timestamp_t timestamp, const int64_t power_of_ten); //! Convert a timestamp to a Julian Day DUCKDB_API static double GetJulianDay(timestamp_t timestamp); //! Decompose a timestamp into its components DUCKDB_API static TimestampComponents GetComponents(timestamp_t timestamp); DUCKDB_API static time_t ToTimeT(timestamp_t); DUCKDB_API static timestamp_t FromTimeT(time_t); DUCKDB_API static bool TryParseUTCOffset(const char *str, idx_t &pos, idx_t len, int &hh, int &mm, int &ss); DUCKDB_API static string FormatError(const string &str); DUCKDB_API static string FormatError(string_t str); DUCKDB_API static string UnsupportedTimezoneError(const string &str); DUCKDB_API static string UnsupportedTimezoneError(string_t str); DUCKDB_API static string RangeError(const string &str); DUCKDB_API static string RangeError(string_t str); }; } // namespace duckdb namespace std { //! Timestamp template <> struct hash { std::size_t operator()(const duckdb::timestamp_t &k) const { using std::hash; return hash()((int64_t)k); } }; template <> struct hash { std::size_t operator()(const duckdb::timestamp_ms_t &k) const { using std::hash; return hash()((int64_t)k); } }; template <> struct hash { std::size_t operator()(const duckdb::timestamp_ns_t &k) const { using std::hash; return hash()((int64_t)k); } }; template <> struct hash { std::size_t operator()(const duckdb::timestamp_sec_t &k) const { using std::hash; return hash()((int64_t)k); } }; template <> struct hash { std::size_t operator()(const duckdb::timestamp_tz_t &k) const { using std::hash; return hash()((int64_t)k); } }; } // namespace std //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/date.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/string_type.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/hash.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/datetime.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { //! Type used to represent time (microseconds) struct dtime_t { // NOLINT int64_t micros; dtime_t() = default; explicit inline constexpr dtime_t(int64_t micros_p) : micros(micros_p) { } inline dtime_t &operator=(int64_t micros_p) { micros = micros_p; return *this; } // explicit conversion explicit inline operator int64_t() const { return micros; } explicit inline operator double() const { return static_cast(micros); } // comparison operators inline bool operator==(const dtime_t &rhs) const { return micros == rhs.micros; }; inline bool operator!=(const dtime_t &rhs) const { return micros != rhs.micros; }; inline bool operator<=(const dtime_t &rhs) const { return micros <= rhs.micros; }; inline bool operator<(const dtime_t &rhs) const { return micros < rhs.micros; }; inline bool operator>(const dtime_t &rhs) const { return micros > rhs.micros; }; inline bool operator>=(const dtime_t &rhs) const { return micros >= rhs.micros; }; // arithmetic operators inline dtime_t operator+(const int64_t µs) const { return dtime_t(this->micros + micros); }; inline dtime_t operator+(const double µs) const { return dtime_t(this->micros + int64_t(micros)); }; inline dtime_t operator-(const int64_t µs) const { return dtime_t(this->micros - micros); }; inline dtime_t operator*(const idx_t &copies) const { return dtime_t(this->micros * UnsafeNumericCast(copies)); }; inline dtime_t operator/(const idx_t &copies) const { return dtime_t(this->micros / UnsafeNumericCast(copies)); }; inline int64_t operator-(const dtime_t &other) const { return this->micros - other.micros; }; // in-place operators inline dtime_t &operator+=(const int64_t µs) { this->micros += micros; return *this; }; inline dtime_t &operator-=(const int64_t µs) { this->micros -= micros; return *this; }; inline dtime_t &operator+=(const dtime_t &other) { this->micros += other.micros; return *this; }; // special values static inline dtime_t allballs() { // NOLINT return dtime_t(0); } // NOLINT }; //! Type used to represent TIME_NS. dtime_ns_t holds the nanoseconds since midnight. struct dtime_ns_t : public dtime_t { // NOLINT dtime_ns_t() = default; explicit inline constexpr dtime_ns_t(const int64_t nanos) : dtime_t(nanos) { } inline dtime_t time() const { // NOLINT return dtime_t(micros / 1000); } }; struct dtime_tz_t { // NOLINT static constexpr const int TIME_BITS = 40; static constexpr const int OFFSET_BITS = 24; static constexpr const uint64_t OFFSET_MASK = ~uint64_t(0) >> TIME_BITS; static constexpr const int32_t MAX_OFFSET = 16 * 60 * 60 - 1; // ±15:59:59 static constexpr const int32_t MIN_OFFSET = -MAX_OFFSET; static constexpr const uint64_t OFFSET_MICROS = 1000000; uint64_t bits; // Offsets are reverse ordered e.g., 13:00:00+01 < 12:00:00+00 < 11:00:00-01 // Because we encode them as the low order bits, // they are also biased into an unsigned integer: (-16, 16) => (32, 0) static inline uint64_t encode_offset(int32_t offset) { // NOLINT return uint64_t(MAX_OFFSET - offset); } static inline int32_t decode_offset(uint64_t bits) { // NOLINT return MAX_OFFSET - int32_t(bits & OFFSET_MASK); } static inline uint64_t encode_micros(int64_t micros) { // NOLINT return encode_micros(UnsafeNumericCast(micros)); } static inline uint64_t encode_micros(uint64_t micros) { // NOLINT return micros << OFFSET_BITS; } static inline int64_t decode_micros(uint64_t bits) { // NOLINT return int64_t(bits >> OFFSET_BITS); } dtime_tz_t() = default; inline dtime_tz_t(dtime_t t, int32_t offset) : bits(encode_micros(t.micros) | encode_offset(offset)) { } explicit inline dtime_tz_t(uint64_t bits_p) : bits(bits_p) { } inline dtime_t time() const { // NOLINT return dtime_t(decode_micros(bits)); } inline int32_t offset() const { // NOLINT return decode_offset(bits); } // Times are compared after adjusting to offset +00:00:00, e.g., 13:01:00+01 > 12:00:00+00 // Because we encode them as the high order bits, // they are biased by the maximum offset: (0, 24) => (0, 56) inline uint64_t sort_key() const { // NOLINT return bits + encode_micros((bits & OFFSET_MASK) * OFFSET_MICROS); } // comparison operators inline bool operator==(const dtime_tz_t &rhs) const { return bits == rhs.bits; }; inline bool operator!=(const dtime_tz_t &rhs) const { return bits != rhs.bits; }; inline bool operator<=(const dtime_tz_t &rhs) const { return sort_key() <= rhs.sort_key(); }; inline bool operator<(const dtime_tz_t &rhs) const { return sort_key() < rhs.sort_key(); }; inline bool operator>(const dtime_tz_t &rhs) const { return sort_key() > rhs.sort_key(); }; inline bool operator>=(const dtime_tz_t &rhs) const { return sort_key() >= rhs.sort_key(); }; }; } // namespace duckdb namespace std { //! Time template <> struct hash { std::size_t operator()(const duckdb::dtime_t &k) const { using std::hash; return hash()((int64_t)k); } }; template <> struct hash { std::size_t operator()(const duckdb::dtime_tz_t &k) const { using std::hash; return hash()(k.bits); } }; } // namespace std namespace duckdb { struct string_t; struct interval_t; // NOLINT //! Combine two hashes by XORing them inline hash_t CombineHash(hash_t left, hash_t right) { return left ^ right; } #ifdef DUCKDB_HASH_ZERO template hash_t Hash(T value) { return 0; } DUCKDB_API hash_t Hash(const char *val, size_t size); DUCKDB_API hash_t Hash(uint8_t *val, size_t size); #else //! Efficient hash function that maximizes the avalanche effect and minimizes bias //! See: https://nullprogram.com/blog/2018/07/31/ inline hash_t MurmurHash64(uint64_t x) { x ^= x >> 32; x *= 0xd6e8feb86659fd93U; x ^= x >> 32; x *= 0xd6e8feb86659fd93U; x ^= x >> 32; return x; } inline hash_t MurmurHash32(uint32_t x) { return MurmurHash64(x); } template hash_t Hash(T value) { return MurmurHash32(static_cast(value)); } template <> DUCKDB_API inline hash_t Hash(uint64_t val) { return MurmurHash64(val); } template <> DUCKDB_API inline hash_t Hash(int64_t val) { return MurmurHash64(static_cast(val)); } template <> DUCKDB_API hash_t Hash(hugeint_t val); template <> DUCKDB_API hash_t Hash(uhugeint_t val); template <> DUCKDB_API hash_t Hash(float val); template <> DUCKDB_API hash_t Hash(double val); template <> DUCKDB_API hash_t Hash(const char *val); template <> DUCKDB_API hash_t Hash(char *val); template <> DUCKDB_API hash_t Hash(string_t val); template <> DUCKDB_API hash_t Hash(interval_t val); template <> DUCKDB_API hash_t Hash(dtime_tz_t val); DUCKDB_API hash_t Hash(const char *val, size_t size); DUCKDB_API hash_t Hash(uint8_t *val, size_t size); #endif } // namespace duckdb #include #include namespace duckdb { struct string_t { friend struct StringComparisonOperators; public: static constexpr idx_t PREFIX_BYTES = 4 * sizeof(char); static constexpr idx_t INLINE_BYTES = 12 * sizeof(char); static constexpr idx_t HEADER_SIZE = sizeof(uint32_t) + PREFIX_BYTES; static constexpr idx_t MAX_STRING_SIZE = NumericLimits::Maximum(); #ifndef DUCKDB_DEBUG_NO_INLINE static constexpr idx_t PREFIX_LENGTH = PREFIX_BYTES; static constexpr idx_t INLINE_LENGTH = INLINE_BYTES; #else static constexpr idx_t PREFIX_LENGTH = 0; static constexpr idx_t INLINE_LENGTH = 0; #endif string_t() = default; explicit string_t(uint32_t len) { value.inlined.length = len; } string_t(const char *data, uint32_t len) { value.inlined.length = len; D_ASSERT(data || GetSize() == 0); if (IsInlined()) { // zero initialize the prefix first // this makes sure that strings with length smaller than 4 still have an equal prefix memset(value.inlined.inlined, 0, INLINE_BYTES); if (GetSize() == 0) { return; } // small string: inlined memcpy(value.inlined.inlined, data, GetSize()); } else { // large string: store pointer #ifndef DUCKDB_DEBUG_NO_INLINE memcpy(value.pointer.prefix, data, PREFIX_LENGTH); #else memset(value.pointer.prefix, 0, PREFIX_BYTES); #endif value.pointer.ptr = (char *)data; // NOLINT } } string_t(const char *data) // NOLINT: Allow implicit conversion from `const char*` : string_t(data, UnsafeNumericCast(strlen(data))) { } string_t(const string &value) // NOLINT: Allow implicit conversion from `const char*` : string_t(value.c_str(), UnsafeNumericCast(value.size())) { } bool IsInlined() const { return GetSize() <= INLINE_LENGTH; } const char *GetData() const { return IsInlined() ? const_char_ptr_cast(value.inlined.inlined) : value.pointer.ptr; } const char *GetDataUnsafe() const { return GetData(); } char *GetDataWriteable() const { return IsInlined() ? (char *)value.inlined.inlined : value.pointer.ptr; // NOLINT } const char *GetPrefix() const { return value.inlined.inlined; } char *GetPrefixWriteable() { return value.inlined.inlined; } idx_t GetSize() const { return value.inlined.length; } void SetSizeAndFinalize(uint32_t size, idx_t allocated_size) { value.inlined.length = size; if (allocated_size > INLINE_LENGTH && IsInlined()) { //! Data was written to the 'value.pointer.ptr', has to be copied to the inlined bytes D_ASSERT(value.pointer.ptr); auto ptr = value.pointer.ptr; memcpy(GetDataWriteable(), ptr, size); } Finalize(); VerifyCharacters(); } bool Empty() const { return value.inlined.length == 0; } string GetString() const { return string(GetData(), GetSize()); } explicit operator string() const { return GetString(); } char *GetPointer() const { D_ASSERT(!IsInlined()); return value.pointer.ptr; } void SetPointer(char *new_ptr) { D_ASSERT(!IsInlined()); value.pointer.ptr = new_ptr; } void Finalize() { // set trailing NULL byte if (GetSize() <= INLINE_LENGTH) { // fill prefix with zeros if the length is smaller than the prefix length memset(value.inlined.inlined + GetSize(), 0, INLINE_BYTES - GetSize()); } else { // copy the data into the prefix #ifndef DUCKDB_DEBUG_NO_INLINE auto dataptr = GetData(); memcpy(value.pointer.prefix, dataptr, PREFIX_LENGTH); #else memset(value.pointer.prefix, 0, PREFIX_BYTES); #endif } } void Verify() const; void VerifyUTF8() const; void VerifyCharacters() const; void VerifyNull() const; struct StringComparisonOperators { static inline bool Equals(const string_t &a, const string_t &b) { #ifdef DUCKDB_DEBUG_NO_INLINE if (a.GetSize() != b.GetSize()) { return false; } return (memcmp(a.GetData(), b.GetData(), a.GetSize()) == 0); #endif uint64_t a_bulk_comp = Load(const_data_ptr_cast(&a)); uint64_t b_bulk_comp = Load(const_data_ptr_cast(&b)); if (a_bulk_comp != b_bulk_comp) { // Either length or prefix are different -> not equal return false; } // they have the same length and same prefix! a_bulk_comp = Load(const_data_ptr_cast(&a) + 8u); b_bulk_comp = Load(const_data_ptr_cast(&b) + 8u); if (a_bulk_comp == b_bulk_comp) { // either they are both inlined (so compare equal) or point to the same string (so compare equal) return true; } if (!a.IsInlined()) { // 'long' strings of the same length -> compare pointed value if (memcmp(a.value.pointer.ptr, b.value.pointer.ptr, a.GetSize()) == 0) { return true; } } // either they are short string of same length but different content // or they point to string with different content // either way, they can't represent the same underlying string return false; } // compare up to shared length. if still the same, compare lengths static bool GreaterThan(const string_t &left, const string_t &right) { const uint32_t left_length = UnsafeNumericCast(left.GetSize()); const uint32_t right_length = UnsafeNumericCast(right.GetSize()); const uint32_t min_length = std::min(left_length, right_length); #ifndef DUCKDB_DEBUG_NO_INLINE uint32_t a_prefix = Load(const_data_ptr_cast(left.GetPrefix())); uint32_t b_prefix = Load(const_data_ptr_cast(right.GetPrefix())); // Utility to move 0xa1b2c3d4 into 0xd4c3b2a1, basically inverting the order byte-a-byte auto byte_swap = [](uint32_t v) -> uint32_t { uint32_t t1 = (v >> 16u) | (v << 16u); uint32_t t2 = t1 & 0x00ff00ff; uint32_t t3 = t1 & 0xff00ff00; return (t2 << 8u) | (t3 >> 8u); }; // Check on prefix ----- // We dont' need to mask since: // if the prefix is greater(after bswap), it will stay greater regardless of the extra bytes // if the prefix is smaller(after bswap), it will stay smaller regardless of the extra bytes // if the prefix is equal, the extra bytes are guaranteed to be /0 for the shorter one if (a_prefix != b_prefix) { return byte_swap(a_prefix) > byte_swap(b_prefix); } #endif auto memcmp_res = memcmp(left.GetData(), right.GetData(), min_length); return memcmp_res > 0 || (memcmp_res == 0 && left_length > right_length); } }; bool operator==(const string_t &r) const { return StringComparisonOperators::Equals(*this, r); } bool operator!=(const string_t &r) const { return !(*this == r); } bool operator>(const string_t &r) const { return StringComparisonOperators::GreaterThan(*this, r); } bool operator<(const string_t &r) const { return r > *this; } private: union { struct { uint32_t length; char prefix[4]; char *ptr; } pointer; struct { uint32_t length; char inlined[12]; } inlined; } value; }; } // namespace duckdb namespace std { template <> struct hash { size_t operator()(const duckdb::string_t &val) const { return Hash(val); } }; } // namespace std #include namespace duckdb { struct timestamp_t; // NOLINT: primitive case //! Type used to represent dates (days since 1970-01-01) struct date_t { // NOLINT int32_t days; date_t() = default; explicit inline date_t(int32_t days_p) : days(days_p) { } // explicit conversion explicit inline operator int32_t() const { return days; } // comparison operators inline bool operator==(const date_t &rhs) const { return days == rhs.days; }; inline bool operator!=(const date_t &rhs) const { return days != rhs.days; }; inline bool operator<=(const date_t &rhs) const { return days <= rhs.days; }; inline bool operator<(const date_t &rhs) const { return days < rhs.days; }; inline bool operator>(const date_t &rhs) const { return days > rhs.days; }; inline bool operator>=(const date_t &rhs) const { return days >= rhs.days; }; // arithmetic operators inline date_t operator+(const int32_t &days) const { return date_t(this->days + days); }; inline date_t operator-(const int32_t &days) const { return date_t(this->days - days); }; // in-place operators inline date_t &operator+=(const int32_t &days) { this->days += days; return *this; }; inline date_t &operator-=(const int32_t &days) { this->days -= days; return *this; }; // special values static inline date_t infinity() { // NOLINT return date_t(NumericLimits::Maximum()); } // NOLINT static inline date_t ninfinity() { // NOLINT return date_t(-NumericLimits::Maximum()); } // NOLINT static inline date_t epoch() { // NOLINT return date_t(0); } // NOLINT }; enum class DateCastResult : uint8_t { SUCCESS, ERROR_INCORRECT_FORMAT, ERROR_RANGE }; struct DateSpecial { const char *str; // The full string const idx_t abbr; // The abbreviation length }; //! The Date class is a static class that holds helper functions for the Date type. class Date { public: static const DateSpecial PINF; // NOLINT static const DateSpecial NINF; // NOLINT static const DateSpecial EPOCH; // NOLINT static const string_t MONTH_NAMES[12]; static const string_t MONTH_NAMES_ABBREVIATED[12]; static const string_t DAY_NAMES[7]; static const string_t DAY_NAMES_ABBREVIATED[7]; static const int32_t NORMAL_DAYS[13]; static const int32_t CUMULATIVE_DAYS[13]; static const int32_t LEAP_DAYS[13]; static const int32_t CUMULATIVE_LEAP_DAYS[13]; static const int32_t CUMULATIVE_YEAR_DAYS[401]; static const int8_t MONTH_PER_DAY_OF_YEAR[365]; static const int8_t LEAP_MONTH_PER_DAY_OF_YEAR[366]; // min date is 5877642-06-25 (BC) (-2^31+2) constexpr static const int32_t DATE_MIN_YEAR = -5877641; constexpr static const int32_t DATE_MIN_MONTH = 6; constexpr static const int32_t DATE_MIN_DAY = 25; // max date is 5881580-07-10 (2^31-2) constexpr static const int32_t DATE_MAX_YEAR = 5881580; constexpr static const int32_t DATE_MAX_MONTH = 7; constexpr static const int32_t DATE_MAX_DAY = 10; constexpr static const int32_t EPOCH_YEAR = 1970; constexpr static const int32_t YEAR_INTERVAL = 400; constexpr static const int32_t DAYS_PER_YEAR_INTERVAL = 146097; public: //! Convert a string in the format "YYYY-MM-DD" to a date object DUCKDB_API static date_t FromString(const string &str, bool strict = false); //! Convert a string in the format "YYYY-MM-DD" to a date object DUCKDB_API static date_t FromCString(const char *str, idx_t len, bool strict = false); //! Convert a date object to a string in the format "YYYY-MM-DD" DUCKDB_API static string ToString(date_t date); //! Try to convert the string as a give "special" date (e.g, PINF, ...) //! Returns true if it was successful and updates the scan pos. DUCKDB_API static bool TryConvertDateSpecial(const char *buf, idx_t len, idx_t &pos, const DateSpecial &special); //! Try to convert text in a buffer to a date; returns true if parsing was successful //! If the date was a "special" value, the special flag will be set. DUCKDB_API static DateCastResult TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool &special, bool strict = false); //! Create a string "YYYY-MM-DD" from a specified (year, month, day) //! combination DUCKDB_API static string Format(int32_t year, int32_t month, int32_t day); //! Extract the year, month and day from a given date object DUCKDB_API static void Convert(date_t date, int32_t &out_year, int32_t &out_month, int32_t &out_day); //! Create a Date object from a specified (year, month, day) combination DUCKDB_API static date_t FromDate(int32_t year, int32_t month, int32_t day); DUCKDB_API static bool TryFromDate(int32_t year, int32_t month, int32_t day, date_t &result); //! Returns true if (year) is a leap year, and false otherwise DUCKDB_API static bool IsLeapYear(int32_t year); //! Returns true if the specified (year, month, day) combination is a valid //! date DUCKDB_API static bool IsValid(int32_t year, int32_t month, int32_t day); //! Returns true if the specified date is finite static inline bool IsFinite(date_t date) { return date != date_t::infinity() && date != date_t::ninfinity(); } //! The max number of days in a month of a given year DUCKDB_API static int32_t MonthDays(int32_t year, int32_t month); //! Extract the epoch from the date (seconds since 1970-01-01) DUCKDB_API static int64_t Epoch(date_t date); //! Extract the epoch from the date (nanoseconds since 1970-01-01) DUCKDB_API static int64_t EpochNanoseconds(date_t date); //! Extract the epoch from the date (microseconds since 1970-01-01) DUCKDB_API static int64_t EpochMicroseconds(date_t date); //! Extract the epoch from the date (milliseconds since 1970-01-01) DUCKDB_API static int64_t EpochMilliseconds(date_t date); //! Convert the epoch (seconds since 1970-01-01) to a date_t DUCKDB_API static date_t EpochToDate(int64_t epoch); //! Extract the number of days since epoch (days since 1970-01-01) DUCKDB_API static int32_t EpochDays(date_t date); //! Convert the epoch number of days to a date_t DUCKDB_API static date_t EpochDaysToDate(int32_t epoch); //! Extract year of a date entry DUCKDB_API static int32_t ExtractYear(date_t date); //! Extract month of a date entry DUCKDB_API static int32_t ExtractMonth(date_t date); //! Extract day of a date entry DUCKDB_API static int32_t ExtractDay(date_t date); //! Extract the day of the week (1-7) DUCKDB_API static int32_t ExtractISODayOfTheWeek(date_t date); //! Extract the day of the year DUCKDB_API static int32_t ExtractDayOfTheYear(date_t date); //! Extract the day of the year DUCKDB_API static int64_t ExtractJulianDay(date_t date); //! Extract the ISO week number //! ISO weeks start on Monday and the first week of a year //! contains January 4 of that year. //! In the ISO week-numbering system, it is possible for early-January dates //! to be part of the 52nd or 53rd week of the previous year. DUCKDB_API static void ExtractISOYearWeek(date_t date, int32_t &year, int32_t &week); DUCKDB_API static int32_t ExtractISOWeekNumber(date_t date); DUCKDB_API static int32_t ExtractISOYearNumber(date_t date); //! Extract the week number as Python handles it. //! Either Monday or Sunday is the first day of the week, //! and any date before the first Monday/Sunday returns week 0 //! This is a bit more consistent because week numbers in a year are always incrementing DUCKDB_API static int32_t ExtractWeekNumberRegular(date_t date, bool monday_first = true); //! Returns the date of the monday of the current week. DUCKDB_API static date_t GetMondayOfCurrentWeek(date_t date); //! Helper function to parse two digits from a string (e.g. "30" -> 30, "03" -> 3, "3" -> 3) DUCKDB_API static bool ParseDoubleDigit(const char *buf, idx_t len, idx_t &pos, int32_t &result); DUCKDB_API static string FormatError(const string &str); DUCKDB_API static string FormatError(string_t str); DUCKDB_API static string RangeError(const string &str); DUCKDB_API static string RangeError(string_t str); private: static void ExtractYearOffset(int32_t &n, int32_t &year, int32_t &year_offset); }; } // namespace duckdb namespace std { //! Date template <> struct hash { std::size_t operator()(const duckdb::date_t &k) const { using std::hash; return hash()((int32_t)k); } }; } // namespace std //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/interval.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct dtime_t; // NOLINT: literal casing struct date_t; // NOLINT: literal casing struct dtime_tz_t; // NOLINT: literal casing struct timestamp_t; // NOLINT: literal casing struct TimestampComponents; class Serializer; class Deserializer; struct interval_t { // NOLINT int32_t months; int32_t days; int64_t micros; inline void Normalize(int64_t &months, int64_t &days, int64_t µs) const; // Normalize to interval bounds. inline static void Borrow(const int64_t msf, int64_t &lsf, int32_t &f, const int64_t scale); inline interval_t Normalize() const; inline bool operator==(const interval_t &right) const { // Quick equality check const auto &left = *this; if (left.months == right.months && left.days == right.days && left.micros == right.micros) { return true; } int64_t lmonths, ldays, lmicros; int64_t rmonths, rdays, rmicros; left.Normalize(lmonths, ldays, lmicros); right.Normalize(rmonths, rdays, rmicros); return lmonths == rmonths && ldays == rdays && lmicros == rmicros; } inline bool operator!=(const interval_t &right) const { return !(*this == right); } inline bool operator>(const interval_t &right) const { const auto &left = *this; int64_t lmonths, ldays, lmicros; int64_t rmonths, rdays, rmicros; left.Normalize(lmonths, ldays, lmicros); right.Normalize(rmonths, rdays, rmicros); if (lmonths > rmonths) { return true; } else if (lmonths < rmonths) { return false; } if (ldays > rdays) { return true; } else if (ldays < rdays) { return false; } return lmicros > rmicros; } inline bool operator<(const interval_t &right) const { return right > *this; } inline bool operator<=(const interval_t &right) const { return !(*this > right); } inline bool operator>=(const interval_t &right) const { return !(*this < right); } // Serialization void Serialize(Serializer &serializer) const; static interval_t Deserialize(Deserializer &source); }; //! The Interval class is a static class that holds helper functions for the Interval //! type. class Interval { public: static constexpr const int32_t MONTHS_PER_MILLENIUM = 12000; static constexpr const int32_t MONTHS_PER_CENTURY = 1200; static constexpr const int32_t MONTHS_PER_DECADE = 120; static constexpr const int32_t MONTHS_PER_YEAR = 12; static constexpr const int32_t MONTHS_PER_QUARTER = 3; static constexpr const int32_t DAYS_PER_WEEK = 7; //! only used for interval comparison/ordering purposes, in which case a month counts as 30 days static constexpr const int64_t DAYS_PER_MONTH = 30; static constexpr const int64_t DAYS_PER_YEAR = 365; static constexpr const int64_t MSECS_PER_SEC = 1000; static constexpr const int32_t SECS_PER_MINUTE = 60; static constexpr const int32_t MINS_PER_HOUR = 60; static constexpr const int32_t HOURS_PER_DAY = 24; static constexpr const int32_t SECS_PER_HOUR = SECS_PER_MINUTE * MINS_PER_HOUR; static constexpr const int32_t SECS_PER_DAY = SECS_PER_HOUR * HOURS_PER_DAY; static constexpr const int32_t SECS_PER_WEEK = SECS_PER_DAY * DAYS_PER_WEEK; static constexpr const int64_t MICROS_PER_MSEC = 1000; static constexpr const int64_t MICROS_PER_SEC = MICROS_PER_MSEC * MSECS_PER_SEC; static constexpr const int64_t MICROS_PER_MINUTE = MICROS_PER_SEC * SECS_PER_MINUTE; static constexpr const int64_t MICROS_PER_HOUR = MICROS_PER_MINUTE * MINS_PER_HOUR; static constexpr const int64_t MICROS_PER_DAY = MICROS_PER_HOUR * HOURS_PER_DAY; static constexpr const int64_t MICROS_PER_WEEK = MICROS_PER_DAY * DAYS_PER_WEEK; static constexpr const int64_t MICROS_PER_MONTH = MICROS_PER_DAY * DAYS_PER_MONTH; static constexpr const int64_t NANOS_PER_MICRO = 1000; static constexpr const int64_t NANOS_PER_MSEC = NANOS_PER_MICRO * MICROS_PER_MSEC; static constexpr const int64_t NANOS_PER_SEC = NANOS_PER_MSEC * MSECS_PER_SEC; static constexpr const int64_t NANOS_PER_MINUTE = NANOS_PER_SEC * SECS_PER_MINUTE; static constexpr const int64_t NANOS_PER_HOUR = NANOS_PER_MINUTE * MINS_PER_HOUR; static constexpr const int64_t NANOS_PER_DAY = NANOS_PER_HOUR * HOURS_PER_DAY; static constexpr const int64_t NANOS_PER_WEEK = NANOS_PER_DAY * DAYS_PER_WEEK; public: //! Convert a string to an interval object static bool FromString(const string &str, interval_t &result); //! Convert a string to an interval object static bool FromCString(const char *str, idx_t len, interval_t &result, string *error_message, bool strict); //! Convert an interval object to a string static string ToString(const interval_t &val); //! Convert milliseconds to a normalised interval DUCKDB_API static interval_t FromMicro(int64_t micros); //! Get Interval in milliseconds static int64_t GetMilli(const interval_t &val); //! Get Interval in microseconds static bool TryGetMicro(const interval_t &val, int64_t µs); static int64_t GetMicro(const interval_t &val); //! Get Interval in Nanoseconds static int64_t GetNanoseconds(const interval_t &val); //! Returns the age between two timestamps (including months) static interval_t GetAge(timestamp_t timestamp_1, timestamp_t timestamp_2); //! Returns the age between two timestamp components static interval_t GetAge(TimestampComponents ts1, TimestampComponents ts2, bool is_negative); //! Returns the exact difference between two timestamps (days and seconds) static interval_t GetDifference(timestamp_t timestamp_1, timestamp_t timestamp_2); //! Returns the inverted interval static interval_t Invert(interval_t interval); //! Add an interval to a date static date_t Add(date_t left, interval_t right); //! Add an interval to a timestamp static timestamp_t Add(timestamp_t left, interval_t right); //! Add an interval to a time. In case the time overflows or underflows, modify the date by the overflow. //! For example if we go from 23:00 to 02:00, we add a day to the date static dtime_t Add(dtime_t left, interval_t right, date_t &date); static dtime_tz_t Add(dtime_tz_t left, interval_t right, date_t &date); //! Comparison operators inline static bool Equals(const interval_t &left, const interval_t &right) { return left == right; } inline static bool GreaterThan(const interval_t &left, const interval_t &right) { return left > right; } }; void interval_t::Normalize(int64_t &months, int64_t &days, int64_t µs) const { auto &input = *this; // Carry left micros = input.micros; int64_t carry_days = micros / Interval::MICROS_PER_DAY; micros -= carry_days * Interval::MICROS_PER_DAY; days = input.days; days += carry_days; int64_t carry_months = days / Interval::DAYS_PER_MONTH; days -= carry_months * Interval::DAYS_PER_MONTH; months = input.months; months += carry_months; } void interval_t::Borrow(const int64_t msf, int64_t &lsf, int32_t &f, const int64_t scale) { if (msf > NumericLimits::Maximum()) { f = NumericLimits::Maximum(); lsf += (msf - f) * scale; } else if (msf < NumericLimits::Minimum()) { f = NumericLimits::Minimum(); lsf += (msf - f) * scale; } else { f = UnsafeNumericCast(msf); } } interval_t interval_t::Normalize() const { interval_t result; int64_t mm; int64_t dd; Normalize(mm, dd, result.micros); // Borrow right on overflow Borrow(mm, dd, result.months, Interval::DAYS_PER_MONTH); Borrow(dd, result.micros, result.days, Interval::MICROS_PER_DAY); return result; } } // namespace duckdb namespace std { template <> struct hash { size_t operator()(const duckdb::interval_t &val) const { int64_t months, days, micros; val.Normalize(months, days, micros); using std::hash; return hash {}(duckdb::UnsafeNumericCast(days)) ^ hash {}(duckdb::UnsafeNumericCast(months)) ^ hash {}(micros); } }; } // namespace std //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/insertion_order_preserving_map.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/case_insensitive_map.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/unordered_set.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::unordered_set; } //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/map.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::map; using std::multimap; } // namespace duckdb namespace duckdb { struct CaseInsensitiveStringHashFunction { uint64_t operator()(const string &str) const { return StringUtil::CIHash(str); } }; struct CaseInsensitiveStringEquality { bool operator()(const string &a, const string &b) const { return StringUtil::CIEquals(a, b); } }; template using case_insensitive_map_t = unordered_map; using case_insensitive_set_t = unordered_set; struct CaseInsensitiveStringCompare { bool operator()(const string &s1, const string &s2) const { return StringUtil::CILessThan(s1, s2); } }; template using case_insensitive_tree_t = map; } // namespace duckdb namespace duckdb { template class InsertionOrderPreservingMap { public: typedef vector> VECTOR_TYPE; // NOLINT: matching name of std typedef string key_type; // NOLINT: matching name of std public: InsertionOrderPreservingMap() { } private: VECTOR_TYPE map; case_insensitive_map_t map_idx; public: vector Keys() const { vector keys; keys.resize(this->size()); for (auto &kv : map_idx) { keys[kv.second] = kv.first; } return keys; } typename VECTOR_TYPE::iterator begin() { // NOLINT: match stl API return map.begin(); } typename VECTOR_TYPE::iterator end() { // NOLINT: match stl API return map.end(); } typename VECTOR_TYPE::const_iterator begin() const { // NOLINT: match stl API return map.begin(); } typename VECTOR_TYPE::const_iterator end() const { // NOLINT: match stl API return map.end(); } typename VECTOR_TYPE::reverse_iterator rbegin() { // NOLINT: match stl API return map.rbegin(); } typename VECTOR_TYPE::reverse_iterator rend() { // NOLINT: match stl API return map.rend(); } typename VECTOR_TYPE::iterator find(const string &key) { // NOLINT: match stl API auto entry = map_idx.find(key); if (entry == map_idx.end()) { return map.end(); } return map.begin() + static_cast(entry->second); } typename VECTOR_TYPE::const_iterator find(const string &key) const { // NOLINT: match stl API auto entry = map_idx.find(key); if (entry == map_idx.end()) { return map.end(); } return map.begin() + static_cast(entry->second); } idx_t size() const { // NOLINT: match stl API return map_idx.size(); } bool empty() const { // NOLINT: match stl API return map_idx.empty(); } void resize(idx_t nz) { // NOLINT: match stl API map.resize(nz); } void insert(const string &key, V &&value) { // NOLINT: match stl API if (contains(key)) { return; } map.emplace_back(key, std::move(value)); map_idx[key] = map.size() - 1; } void insert(const string &key, const V &value) { // NOLINT: match stl API if (contains(key)) { return; } map.emplace_back(key, value); map_idx[key] = map.size() - 1; } void insert(pair &&value) { // NOLINT: match stl API auto &key = value.first; if (contains(key)) { return; } map_idx[key] = map.size(); map.push_back(std::move(value)); } void erase(typename VECTOR_TYPE::iterator it) { // NOLINT: match stl API auto key = it->first; auto idx = map_idx[key]; map.erase(it); map_idx.erase(key); for (auto &kv : map_idx) { if (kv.second > idx) { kv.second--; } } } bool contains(const string &key) const { // NOLINT: match stl API return map_idx.find(key) != map_idx.end(); } const V &at(const string &key) const { // NOLINT: match stl API return map[map_idx.at(key)].second; } V &operator[](const string &key) { if (!contains(key)) { auto v = V(); insert(key, std::move(v)); } return map[map_idx[key]].second; } bool operator==(const InsertionOrderPreservingMap &other) const { return map == other.map && map_idx == other.map_idx; } bool operator!=(const InsertionOrderPreservingMap &other) const { return !(*this == other); } }; } // namespace duckdb namespace duckdb { class String; class CastFunctionSet; struct GetCastFunctionInput; struct ExtraValueInfo; //! The Value object holds a single arbitrary value of any type that can be //! stored in the database. class Value { friend struct StringValue; friend struct StructValue; friend struct ListValue; friend struct UnionValue; friend struct ArrayValue; friend struct MapValue; public: //! Create an empty NULL value of the specified type DUCKDB_API explicit Value(LogicalType type = LogicalType::SQLNULL); //! Create an INTEGER value DUCKDB_API Value(int32_t val); // NOLINT: Allow implicit conversion from `int32_t` //! Create a BOOLEAN value explicit DUCKDB_API Value(bool val); //! Create a BIGINT value DUCKDB_API Value(int64_t val); // NOLINT: Allow implicit conversion from `int64_t` //! Create a FLOAT value DUCKDB_API Value(float val); // NOLINT: Allow implicit conversion from `float` //! Create a DOUBLE value DUCKDB_API Value(double val); // NOLINT: Allow implicit conversion from `double` //! Create a VARCHAR value DUCKDB_API Value(const char *val); // NOLINT: Allow implicit conversion from `const char *` //! Create a NULL value DUCKDB_API Value(std::nullptr_t val); // NOLINT: Allow implicit conversion from `nullptr_t` //! Create a VARCHAR value DUCKDB_API Value(string_t val); // NOLINT: Allow implicit conversion from `string_t` //! Create a VARCHAR value DUCKDB_API Value(string val); // NOLINT: Allow implicit conversion from `string` //! Create a VARCHAR value DUCKDB_API Value(String val); // NOLINT: Allow implicit conversion from `string` //! Copy constructor DUCKDB_API Value(const Value &other); //! Move constructor DUCKDB_API Value(Value &&other) noexcept; //! Destructor DUCKDB_API ~Value(); // copy assignment DUCKDB_API Value &operator=(const Value &other); // move assignment DUCKDB_API Value &operator=(Value &&other) noexcept; inline LogicalType &GetTypeMutable() { return type_; } inline const LogicalType &type() const { // NOLINT return type_; } inline bool IsNull() const { return is_null; } //! Create the lowest possible value of a given type (numeric only) DUCKDB_API static Value MinimumValue(const LogicalType &type); //! Create the highest possible value of a given type (numeric only) DUCKDB_API static Value MaximumValue(const LogicalType &type); //! Create the negative infinite value of a given type (numeric only) DUCKDB_API static Value NegativeInfinity(const LogicalType &type); //! Create the positive infinite value of a given type (numeric only) DUCKDB_API static Value Infinity(const LogicalType &type); //! Create a Numeric value of the specified type with the specified value DUCKDB_API static Value Numeric(const LogicalType &type, int64_t value); DUCKDB_API static Value Numeric(const LogicalType &type, hugeint_t value); DUCKDB_API static Value Numeric(const LogicalType &type, uhugeint_t value); //! Create a boolean Value from a specified value DUCKDB_API static Value BOOLEAN(bool value); //! Create a tinyint Value from a specified value DUCKDB_API static Value TINYINT(int8_t value); //! Create a smallint Value from a specified value DUCKDB_API static Value SMALLINT(int16_t value); //! Create an integer Value from a specified value DUCKDB_API static Value INTEGER(int32_t value); //! Create a bigint Value from a specified value DUCKDB_API static Value BIGINT(int64_t value); //! Create an unsigned tinyint Value from a specified value DUCKDB_API static Value UTINYINT(uint8_t value); //! Create an unsigned smallint Value from a specified value DUCKDB_API static Value USMALLINT(uint16_t value); //! Create an unsigned integer Value from a specified value DUCKDB_API static Value UINTEGER(uint32_t value); //! Create an unsigned bigint Value from a specified value DUCKDB_API static Value UBIGINT(uint64_t value); //! Create a hugeint Value from a specified value DUCKDB_API static Value HUGEINT(hugeint_t value); //! Create a uhugeint Value from a specified value DUCKDB_API static Value UHUGEINT(uhugeint_t value); //! Create a uuid Value from a specified value DUCKDB_API static Value UUID(const string &value); //! Create a uuid Value from a specified value DUCKDB_API static Value UUID(hugeint_t value); //! Create a hash Value from a specified value DUCKDB_API static Value HASH(hash_t value); //! Create a pointer Value from a specified value DUCKDB_API static Value POINTER(uintptr_t value); //! Create a date Value from a specified date DUCKDB_API static Value DATE(date_t date); //! Create a date Value from a specified date DUCKDB_API static Value DATE(int32_t year, int32_t month, int32_t day); //! Create a time Value from a specified time DUCKDB_API static Value TIME(dtime_t time); DUCKDB_API static Value TIME_NS(dtime_ns_t time); DUCKDB_API static Value TIMETZ(dtime_tz_t time); //! Create a time Value from a specified time DUCKDB_API static Value TIME(int32_t hour, int32_t min, int32_t sec, int32_t micros); //! Create a timestamp Value from a specified date/time combination. DUCKDB_API static Value TIMESTAMP(date_t date, dtime_t time); //! Create a timestamp Value from a specified value. DUCKDB_API static Value TIMESTAMP(timestamp_t timestamp); //! Create a timestamp_s Value from a specified value. DUCKDB_API static Value TIMESTAMPSEC(timestamp_sec_t timestamp); //! Create a timestamp_ms Value from a specified value. DUCKDB_API static Value TIMESTAMPMS(timestamp_ms_t timestamp); //! Create a timestamp_ns Value from a specified value. DUCKDB_API static Value TIMESTAMPNS(timestamp_ns_t timestamp); //! Create a timestamp_tz Value from a specified value. DUCKDB_API static Value TIMESTAMPTZ(timestamp_tz_t timestamp); //! Create a timestamp Value from a specified timestamp in separate values DUCKDB_API static Value TIMESTAMP(int32_t year, int32_t month, int32_t day, int32_t hour, int32_t min, int32_t sec, int32_t micros); DUCKDB_API static Value INTERVAL(int32_t months, int32_t days, int64_t micros); DUCKDB_API static Value INTERVAL(interval_t interval); // Create a enum Value from a specified uint value DUCKDB_API static Value ENUM(uint64_t value, const LogicalType &original_type); // Decimal values DUCKDB_API static Value DECIMAL(int16_t value, uint8_t width, uint8_t scale); DUCKDB_API static Value DECIMAL(int32_t value, uint8_t width, uint8_t scale); DUCKDB_API static Value DECIMAL(int64_t value, uint8_t width, uint8_t scale); DUCKDB_API static Value DECIMAL(hugeint_t value, uint8_t width, uint8_t scale); //! Create a float Value from a specified value DUCKDB_API static Value FLOAT(float value); //! Create a double Value from a specified value DUCKDB_API static Value DOUBLE(double value); //! Create a struct value with given list of entries DUCKDB_API static Value STRUCT(child_list_t values); DUCKDB_API static Value STRUCT(const LogicalType &type, vector struct_values); //! Create a variant value with given list of internal variant data (keys/children/values/data) DUCKDB_API static Value VARIANT(vector children); //! Create a list value with the given entries DUCKDB_API static Value LIST(const LogicalType &child_type, vector values); //! Create a list value with the given entries //! The type of the first value determines the list type. The list cannot be empty. DUCKDB_API static Value LIST(vector values); // Create an array value with the given entries DUCKDB_API static Value ARRAY(const LogicalType &type, vector values); //! Create a map value with the given entries DUCKDB_API static Value MAP(const LogicalType &child_type, vector values); //! Create a map value with the given entries DUCKDB_API static Value MAP(const LogicalType &key_type, const LogicalType &value_type, vector keys, vector values); //! Create a map value from a set of key-value pairs DUCKDB_API static Value MAP(const InsertionOrderPreservingMap &kv_pairs); //! Create a union value from a selected value and a tag from a set of alternatives. DUCKDB_API static Value UNION(child_list_t members, uint8_t tag, Value value); //! Create a blob Value from a data pointer and a length: no bytes are interpreted DUCKDB_API static Value BLOB(const_data_ptr_t data, idx_t len); static Value BLOB_RAW(const string &data) { // NOLINT return Value::BLOB(const_data_ptr_cast(data.c_str()), data.size()); } //! Creates a blob by casting a specified string to a blob (i.e. interpreting \x characters) DUCKDB_API static Value BLOB(const string &data); //! Creates a bitstring by casting a specified string to a bitstring DUCKDB_API static Value BIT(const_data_ptr_t data, idx_t len); DUCKDB_API static Value BIT(const string &data); DUCKDB_API static Value BIGNUM(const_data_ptr_t data, idx_t len); DUCKDB_API static Value BIGNUM(const string &data); //! Creates an aggregate state DUCKDB_API static Value AGGREGATE_STATE(const LogicalType &type, const_data_ptr_t data, idx_t len); // NOLINT template T GetValue() const; template static Value CreateValue(T value) { static_assert(AlwaysFalse::VALUE, "No specialization exists for this type"); return Value(nullptr); } // Returns the internal value. Unlike GetValue(), this method does not perform casting, and assumes T matches the // type of the value. Only use this if you know what you are doing. template T GetValueUnsafe() const; //! Return a copy of this value Value Copy() const { return Value(*this); } //! Hashes the Value DUCKDB_API hash_t Hash() const; //! Convert this value to a string DUCKDB_API string ToString() const; //! Convert this value to a SQL-parseable string DUCKDB_API string ToSQLString() const; DUCKDB_API uintptr_t GetPointer() const; //! Cast this value to another type, throws exception if its not possible DUCKDB_API Value CastAs(CastFunctionSet &set, GetCastFunctionInput &get_input, const LogicalType &target_type, bool strict = false) const; DUCKDB_API Value CastAs(ClientContext &context, const LogicalType &target_type, bool strict = false) const; DUCKDB_API Value DefaultCastAs(const LogicalType &target_type, bool strict = false) const; //! Tries to cast this value to another type, and stores the result in "new_value" DUCKDB_API bool TryCastAs(CastFunctionSet &set, GetCastFunctionInput &get_input, const LogicalType &target_type, Value &new_value, string *error_message, bool strict = false) const; DUCKDB_API bool TryCastAs(ClientContext &context, const LogicalType &target_type, Value &new_value, string *error_message, bool strict = false) const; DUCKDB_API bool DefaultTryCastAs(const LogicalType &target_type, Value &new_value, string *error_message, bool strict = false) const; //! Tries to cast this value to another type, and stores the result in THIS value again DUCKDB_API bool TryCastAs(CastFunctionSet &set, GetCastFunctionInput &get_input, const LogicalType &target_type, bool strict = false); DUCKDB_API bool TryCastAs(ClientContext &context, const LogicalType &target_type, bool strict = false); DUCKDB_API bool DefaultTryCastAs(const LogicalType &target_type, bool strict = false); DUCKDB_API void Reinterpret(LogicalType new_type); //! Serializes a Value to a stand-alone binary blob DUCKDB_API void Serialize(Serializer &serializer) const; //! Deserializes a Value from a blob DUCKDB_API static Value Deserialize(Deserializer &deserializer); //===--------------------------------------------------------------------===// // Comparison Operators //===--------------------------------------------------------------------===// DUCKDB_API bool operator==(const Value &rhs) const; DUCKDB_API bool operator!=(const Value &rhs) const; DUCKDB_API bool operator<(const Value &rhs) const; DUCKDB_API bool operator>(const Value &rhs) const; DUCKDB_API bool operator<=(const Value &rhs) const; DUCKDB_API bool operator>=(const Value &rhs) const; DUCKDB_API bool operator==(const int64_t &rhs) const; DUCKDB_API bool operator!=(const int64_t &rhs) const; DUCKDB_API bool operator<(const int64_t &rhs) const; DUCKDB_API bool operator>(const int64_t &rhs) const; DUCKDB_API bool operator<=(const int64_t &rhs) const; DUCKDB_API bool operator>=(const int64_t &rhs) const; DUCKDB_API static bool FloatIsFinite(float value); DUCKDB_API static bool DoubleIsFinite(double value); template static bool IsNan(T value) { throw InternalException("Unimplemented template type for Value::IsNan"); } template static bool IsFinite(T value) { return true; } DUCKDB_API static bool StringIsValid(const char *str, idx_t length); static bool StringIsValid(const string &str) { return StringIsValid(str.c_str(), str.size()); } //! Returns true if the values are (approximately) equivalent. Note this is NOT the SQL equivalence. For this //! function, NULL values are equivalent and floating point values that are close are equivalent. DUCKDB_API static bool ValuesAreEqual(CastFunctionSet &set, GetCastFunctionInput &get_input, const Value &result_value, const Value &value); DUCKDB_API static bool ValuesAreEqual(ClientContext &context, const Value &result_value, const Value &value); DUCKDB_API static bool DefaultValuesAreEqual(const Value &result_value, const Value &value); //! Returns true if the values are not distinct from each other, following SQL semantics for NOT DISTINCT FROM. DUCKDB_API static bool NotDistinctFrom(const Value &lvalue, const Value &rvalue); friend std::ostream &operator<<(std::ostream &out, const Value &val) { out << val.ToString(); return out; } DUCKDB_API void Print() const; private: void SerializeInternal(Serializer &serializer, bool serialize_type) const; static void SerializeChildren(Serializer &serializer, const vector &children, const LogicalType &parent_type); private: //! The logical of the value LogicalType type_; // NOLINT //! Whether or not the value is NULL bool is_null; //! The value of the object, if it is of a constant size Type union Val { bool boolean; int8_t tinyint; int16_t smallint; int32_t integer; int64_t bigint; uint8_t utinyint; uint16_t usmallint; uint32_t uinteger; uint64_t ubigint; hugeint_t hugeint; uhugeint_t uhugeint; float float_; // NOLINT double double_; // NOLINT uintptr_t pointer; uint64_t hash; date_t date; dtime_t time; dtime_ns_t time_ns; dtime_tz_t timetz; timestamp_t timestamp; timestamp_sec_t timestamp_s; timestamp_ms_t timestamp_ms; timestamp_ns_t timestamp_ns; timestamp_tz_t timestamp_tz; interval_t interval; } value_; // NOLINT shared_ptr value_info_; // NOLINT private: template T GetValueInternal() const; }; //===--------------------------------------------------------------------===// // Type-specific getters //===--------------------------------------------------------------------===// // Note that these are equivalent to calling GetValueUnsafe, meaning no cast will be performed // instead, an assertion will be triggered if the value is not of the correct type struct BooleanValue { DUCKDB_API static bool Get(const Value &value); }; struct TinyIntValue { DUCKDB_API static int8_t Get(const Value &value); }; struct SmallIntValue { DUCKDB_API static int16_t Get(const Value &value); }; struct IntegerValue { DUCKDB_API static int32_t Get(const Value &value); }; struct BigIntValue { DUCKDB_API static int64_t Get(const Value &value); }; struct HugeIntValue { DUCKDB_API static hugeint_t Get(const Value &value); }; struct UTinyIntValue { DUCKDB_API static uint8_t Get(const Value &value); }; struct USmallIntValue { DUCKDB_API static uint16_t Get(const Value &value); }; struct UIntegerValue { DUCKDB_API static uint32_t Get(const Value &value); }; struct UBigIntValue { DUCKDB_API static uint64_t Get(const Value &value); }; struct UhugeIntValue { DUCKDB_API static uhugeint_t Get(const Value &value); }; struct FloatValue { DUCKDB_API static float Get(const Value &value); }; struct DoubleValue { DUCKDB_API static double Get(const Value &value); }; struct StringValue { DUCKDB_API static const string &Get(const Value &value); }; struct DateValue { DUCKDB_API static date_t Get(const Value &value); }; struct TimeValue { DUCKDB_API static dtime_t Get(const Value &value); }; struct TimestampValue { DUCKDB_API static timestamp_t Get(const Value &value); }; struct TimestampSValue { DUCKDB_API static timestamp_sec_t Get(const Value &value); }; struct TimestampMSValue { DUCKDB_API static timestamp_ms_t Get(const Value &value); }; struct TimestampNSValue { DUCKDB_API static timestamp_ns_t Get(const Value &value); }; struct TimestampTZValue { DUCKDB_API static timestamp_tz_t Get(const Value &value); }; struct IntervalValue { DUCKDB_API static interval_t Get(const Value &value); }; struct StructValue { DUCKDB_API static const vector &GetChildren(const Value &value); }; struct MapValue { DUCKDB_API static const vector &GetChildren(const Value &value); }; struct ListValue { DUCKDB_API static const vector &GetChildren(const Value &value); }; struct ArrayValue { DUCKDB_API static const vector &GetChildren(const Value &value); }; struct UnionValue { DUCKDB_API static const Value &GetValue(const Value &value); DUCKDB_API static uint8_t GetTag(const Value &value); DUCKDB_API static const LogicalType &GetType(const Value &value); }; //! Return the internal integral value for any type that is stored as an integral value internally //! This can be used on values of type integer, uinteger, but also date, timestamp, decimal, etc struct IntegralValue { static hugeint_t Get(const Value &value); }; template <> Value DUCKDB_API Value::CreateValue(bool value); template <> Value DUCKDB_API Value::CreateValue(uint8_t value); template <> Value DUCKDB_API Value::CreateValue(uint16_t value); template <> Value DUCKDB_API Value::CreateValue(uint32_t value); template <> Value DUCKDB_API Value::CreateValue(uint64_t value); template <> Value DUCKDB_API Value::CreateValue(int8_t value); template <> Value DUCKDB_API Value::CreateValue(int16_t value); template <> Value DUCKDB_API Value::CreateValue(int32_t value); template <> Value DUCKDB_API Value::CreateValue(int64_t value); template <> Value DUCKDB_API Value::CreateValue(hugeint_t value); template <> Value DUCKDB_API Value::CreateValue(uhugeint_t value); template <> Value DUCKDB_API Value::CreateValue(date_t value); template <> Value DUCKDB_API Value::CreateValue(dtime_t value); template <> Value DUCKDB_API Value::CreateValue(dtime_ns_t value); template <> Value DUCKDB_API Value::CreateValue(dtime_tz_t value); template <> Value DUCKDB_API Value::CreateValue(timestamp_t value); template <> Value DUCKDB_API Value::CreateValue(timestamp_sec_t value); template <> Value DUCKDB_API Value::CreateValue(timestamp_ms_t value); template <> Value DUCKDB_API Value::CreateValue(timestamp_ns_t value); template <> Value DUCKDB_API Value::CreateValue(timestamp_tz_t value); template <> Value DUCKDB_API Value::CreateValue(const char *value); template <> Value DUCKDB_API Value::CreateValue(string value); template <> Value DUCKDB_API Value::CreateValue(string_t value); template <> Value DUCKDB_API Value::CreateValue(float value); template <> Value DUCKDB_API Value::CreateValue(double value); template <> Value DUCKDB_API Value::CreateValue(interval_t value); template <> Value DUCKDB_API Value::CreateValue(Value value); template <> DUCKDB_API bool Value::GetValue() const; template <> DUCKDB_API int8_t Value::GetValue() const; template <> DUCKDB_API int16_t Value::GetValue() const; template <> DUCKDB_API int32_t Value::GetValue() const; template <> DUCKDB_API int64_t Value::GetValue() const; template <> DUCKDB_API uint8_t Value::GetValue() const; template <> DUCKDB_API uint16_t Value::GetValue() const; template <> DUCKDB_API uint32_t Value::GetValue() const; template <> DUCKDB_API uint64_t Value::GetValue() const; template <> DUCKDB_API hugeint_t Value::GetValue() const; template <> DUCKDB_API uhugeint_t Value::GetValue() const; template <> DUCKDB_API string Value::GetValue() const; template <> DUCKDB_API float Value::GetValue() const; template <> DUCKDB_API double Value::GetValue() const; template <> DUCKDB_API date_t Value::GetValue() const; template <> DUCKDB_API dtime_t Value::GetValue() const; template <> DUCKDB_API dtime_ns_t Value::GetValue() const; template <> DUCKDB_API dtime_tz_t Value::GetValue() const; template <> DUCKDB_API timestamp_t Value::GetValue() const; template <> DUCKDB_API timestamp_sec_t Value::GetValue() const; template <> DUCKDB_API timestamp_ms_t Value::GetValue() const; template <> DUCKDB_API timestamp_ns_t Value::GetValue() const; template <> DUCKDB_API timestamp_tz_t Value::GetValue() const; template <> DUCKDB_API interval_t Value::GetValue() const; template <> DUCKDB_API Value Value::GetValue() const; template <> DUCKDB_API bool Value::GetValueUnsafe() const; template <> DUCKDB_API int8_t Value::GetValueUnsafe() const; template <> DUCKDB_API int16_t Value::GetValueUnsafe() const; template <> DUCKDB_API int32_t Value::GetValueUnsafe() const; template <> DUCKDB_API int64_t Value::GetValueUnsafe() const; template <> DUCKDB_API hugeint_t Value::GetValueUnsafe() const; template <> DUCKDB_API uhugeint_t Value::GetValueUnsafe() const; template <> DUCKDB_API uint8_t Value::GetValueUnsafe() const; template <> DUCKDB_API uint16_t Value::GetValueUnsafe() const; template <> DUCKDB_API uint32_t Value::GetValueUnsafe() const; template <> DUCKDB_API uint64_t Value::GetValueUnsafe() const; template <> DUCKDB_API string Value::GetValueUnsafe() const; template <> DUCKDB_API string_t Value::GetValueUnsafe() const; template <> DUCKDB_API float Value::GetValueUnsafe() const; template <> DUCKDB_API double Value::GetValueUnsafe() const; template <> DUCKDB_API date_t Value::GetValueUnsafe() const; template <> DUCKDB_API dtime_t Value::GetValueUnsafe() const; template <> DUCKDB_API dtime_ns_t Value::GetValueUnsafe() const; template <> DUCKDB_API dtime_tz_t Value::GetValueUnsafe() const; template <> DUCKDB_API timestamp_t Value::GetValueUnsafe() const; template <> DUCKDB_API timestamp_sec_t Value::GetValueUnsafe() const; template <> DUCKDB_API timestamp_ms_t Value::GetValueUnsafe() const; template <> DUCKDB_API timestamp_ns_t Value::GetValueUnsafe() const; template <> DUCKDB_API timestamp_tz_t Value::GetValueUnsafe() const; template <> DUCKDB_API interval_t Value::GetValueUnsafe() const; template <> DUCKDB_API bool Value::IsNan(float input); template <> DUCKDB_API bool Value::IsNan(double input); template <> DUCKDB_API bool Value::IsFinite(float input); template <> DUCKDB_API bool Value::IsFinite(double input); template <> DUCKDB_API bool Value::IsFinite(date_t input); template <> DUCKDB_API bool Value::IsFinite(timestamp_t input); template <> DUCKDB_API bool Value::IsFinite(timestamp_sec_t input); template <> DUCKDB_API bool Value::IsFinite(timestamp_ms_t input); template <> DUCKDB_API bool Value::IsFinite(timestamp_ns_t input); template <> DUCKDB_API bool Value::IsFinite(timestamp_tz_t input); } // namespace duckdb #include namespace duckdb { class Catalog; class CatalogSet; class ClientContext; class Deserializer; class SchemaCatalogEntry; class Serializer; class Value; struct AlterInfo; struct CatalogTransaction; struct CreateInfo; //! Abstract base class of an entry in the catalog class CatalogEntry { public: CatalogEntry(CatalogType type, Catalog &catalog, string name); CatalogEntry(CatalogType type, string name, idx_t oid); virtual ~CatalogEntry(); //! The oid of the entry idx_t oid; //! The type of this catalog entry CatalogType type; //! Reference to the catalog set this entry is stored in optional_ptr set; //! The name of the entry string name; //! Whether or not the object is deleted bool deleted; //! Whether or not the object is temporary and should not be added to the WAL bool temporary; //! Whether or not the entry is an internal entry (cannot be deleted, not dumped, etc) bool internal; //! Timestamp at which the catalog entry was created atomic timestamp; //! (optional) comment on this entry Value comment; //! (optional) extra data associated with this entry InsertionOrderPreservingMap tags; private: //! Child entry unique_ptr child; //! Parent entry (the node that dependents_map this node) optional_ptr parent; public: virtual unique_ptr AlterEntry(ClientContext &context, AlterInfo &info); virtual unique_ptr AlterEntry(CatalogTransaction transaction, AlterInfo &info); virtual void UndoAlter(ClientContext &context, AlterInfo &info); virtual void Rollback(CatalogEntry &prev_entry); virtual void OnDrop(); virtual unique_ptr Copy(ClientContext &context) const; virtual unique_ptr GetInfo() const; //! Sets the CatalogEntry as the new root entry (i.e. the newest entry) // this is called on a rollback to an AlterEntry virtual void SetAsRoot(); //! Convert the catalog entry to a SQL string that can be used to re-construct the catalog entry virtual string ToSQL() const; virtual Catalog &ParentCatalog(); virtual const Catalog &ParentCatalog() const; virtual SchemaCatalogEntry &ParentSchema(); virtual const SchemaCatalogEntry &ParentSchema() const; virtual void Verify(Catalog &catalog); void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); public: void SetChild(unique_ptr child); unique_ptr TakeChild(); bool HasChild() const; bool HasParent() const; CatalogEntry &Child(); CatalogEntry &Parent(); const CatalogEntry &Parent() const; public: template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; class InCatalogEntry : public CatalogEntry { public: InCatalogEntry(CatalogType type, Catalog &catalog, string name); ~InCatalogEntry() override; //! The catalog the entry belongs to Catalog &catalog; public: Catalog &ParentCatalog() override { return catalog; } const Catalog &ParentCatalog() const override { return catalog; } void Verify(Catalog &catalog) override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_transaction.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Catalog; class ClientContext; class DatabaseInstance; class Transaction; struct CatalogTransaction { CatalogTransaction(Catalog &catalog, ClientContext &context); CatalogTransaction(DatabaseInstance &db, transaction_t transaction_id_p, transaction_t start_time_p); optional_ptr db; optional_ptr context; optional_ptr transaction; transaction_t transaction_id; transaction_t start_time; bool HasContext() const { return context; } ClientContext &GetContext(); static CatalogTransaction GetSystemCatalogTransaction(ClientContext &context); static CatalogTransaction GetSystemTransaction(DatabaseInstance &db); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/catalog_lookup_behavior.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Enum used for indicating lookup behavior of specific catalog types // STANDARD means the catalog lookups are performed in a regular manner (i.e. according to the users' search path) // LOWER_PRIORITY means the catalog lookups are de-prioritized and we do lookups in other catalogs first // NEVER_LOOKUP means we never do lookups for this specific type in this catalog enum class CatalogLookupBehavior : uint8_t { STANDARD = 0, LOWER_PRIORITY = 1, NEVER_LOOKUP = 2 }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/on_entry_not_found.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class OnEntryNotFound : uint8_t { THROW_EXCEPTION = 0, RETURN_NULL = 1 }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/error_data.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ParsedExpression; class TableRef; class ErrorData { public: //! Not initialized, default constructor DUCKDB_API ErrorData(); //! From std::exception DUCKDB_API ErrorData(const std::exception &ex); // NOLINT: allow implicit construction from exception //! From a raw string and exception type DUCKDB_API ErrorData(ExceptionType type, const string &raw_message); //! From a raw string DUCKDB_API explicit ErrorData(const string &raw_message); public: //! Throw the error [[noreturn]] DUCKDB_API void Throw(const string &prepended_message = "") const; //! Get the internal exception type of the error. DUCKDB_API const ExceptionType &Type() const; //! Used in clients like C-API, creates the final message and returns a reference to it DUCKDB_API const string &Message() const { return final_message; } DUCKDB_API const string &RawMessage() const { return raw_message; } DUCKDB_API void Merge(const ErrorData &other); DUCKDB_API bool operator==(const ErrorData &other) const; //! Returns true, if this error data contains an exception, else false. inline bool HasError() const { return initialized; } const unordered_map &ExtraInfo() const { return extra_info; } DUCKDB_API void FinalizeError(); DUCKDB_API void AddErrorLocation(const string &query); DUCKDB_API void ConvertErrorToJSON(); DUCKDB_API void AddQueryLocation(optional_idx query_location); DUCKDB_API void AddQueryLocation(QueryErrorContext error_context); DUCKDB_API void AddQueryLocation(const ParsedExpression &ref); DUCKDB_API void AddQueryLocation(const TableRef &ref); private: //! Whether this ErrorData contains an exception or not bool initialized; //! The ExceptionType of the preserved exception ExceptionType type; //! The message the exception was constructed with (does not contain the Exception Type) string raw_message; //! The final message (stored in the preserved error for compatibility reasons with C-API) string final_message; //! Extra exception info unordered_map extra_info; private: DUCKDB_API static string SanitizeErrorMessage(string error); DUCKDB_API string ConstructFinalMessage() const; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/mutex.hpp // // //===----------------------------------------------------------------------===// #ifdef __MVS__ #include #endif #include namespace duckdb { using std::lock_guard; using std::mutex; using std::unique_lock; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/reference_map.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Expression; template struct ReferenceHashFunction { uint64_t operator()(const reference &ref) const { return std::hash()((void *)&ref.get()); } }; template struct ReferenceEquality { bool operator()(const reference &a, const reference &b) const { return &a.get() == &b.get(); } }; template using reference_map_t = unordered_map, TGT, ReferenceHashFunction, ReferenceEquality>; template using reference_set_t = unordered_set, ReferenceHashFunction, ReferenceEquality>; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/entry_lookup_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BoundAtClause; struct EntryLookupInfo { public: EntryLookupInfo(CatalogType catalog_type, const string &name, QueryErrorContext error_context = QueryErrorContext()); EntryLookupInfo(CatalogType catalog_type, const string &name, optional_ptr at_clause, QueryErrorContext error_context); EntryLookupInfo(const EntryLookupInfo &parent, const string &name); EntryLookupInfo(const EntryLookupInfo &parent, optional_ptr at_clause); public: CatalogType GetCatalogType() const; const string &GetEntryName() const; const QueryErrorContext &GetErrorContext() const; const optional_ptr GetAtClause() const; static EntryLookupInfo SchemaLookup(const EntryLookupInfo &parent, const string &schema_name); private: CatalogType catalog_type; const string &name; optional_ptr at_clause; QueryErrorContext error_context; }; //! Return value of Catalog::LookupEntry struct CatalogEntryLookup { optional_ptr schema; optional_ptr entry; ErrorData error; DUCKDB_API bool Found() const { return entry; } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/string.hpp // // //===----------------------------------------------------------------------===// #include #include namespace duckdb { class String { /* String either can own its data (with automatic cleanup) or hold a non-owning reference. * Move only semantics, no copying allowed. * Small Strings (≤7 bytes) are stored inline to avoid heap allocation. * 31-bit length limit with overflow protection. To create an owning String, use the constructors. To create a non-owning String, use the Reference methods. */ public: // Constructors (create an owning String) String() : len(0), buf {0} { } String(const std::string &str) { // NOLINT allowimplicit conversion AssignOwning(str.data(), SafeStrLen(str)); } String(const char *str, const uint32_t size) { AssignOwning(str, size); } String(const char *str) // NOLINT allowimplicit conversion : String(str, str ? SafeStrLen(str) : 0) { } public: // Copying is not allowed, use move semantics instead or explicitly create a new String instance. String(const String &other) = delete; String &operator=(const String &other) = delete; // Move Constructor String(String &&other) noexcept { TransferOwnership(other); } // Move Assignment String &operator=(String &&other) noexcept { if (this != &other) { Destroy(); TransferOwnership(other); } return *this; } // Destructor ~String() { Destroy(); } public: // Operators bool operator==(const String &other) const { if (this == &other) { return true; // points to the same instance } if (size() != other.size()) { return false; } return memcmp(data(), other.data(), size()) == 0; } bool operator==(const std::string &other) const { if (SafeStrLen(other) != size()) { return false; } return memcmp(data(), other.data(), size()) == 0; } bool operator==(const char *other) const { if (!other || SafeStrLen(other) != size()) { return false; } if (this->data() == other) { return true; // points to the same instance } return memcmp(data(), other, size()) == 0; } bool operator>(const String &other) const { const auto this_size = size(); const auto other_size = other.size(); const auto min_size = MinValue(this_size, other_size); auto memcmp_res = memcmp(data(), other.data(), min_size); return memcmp_res > 0 || (memcmp_res == 0 && this_size > other_size); } bool operator!=(const String &other) const { return !(*this == other); } bool operator<(const String &other) const { return other > *this; } bool operator>=(const String &other) const { return !(*this < other); } bool operator<=(const String &other) const { return !(*this > other); } char operator[](const idx_t pos) const { D_ASSERT(pos < size()); if (IsInlined()) { return buf[pos]; } return ptr[pos]; } public: // STL-like interface // NOLINTBEGIN - mimic std::string interface uint32_t size() const { return len & ~NON_OWNING_BIT; } bool empty() const { return len == 0; } const char *data() const { return IsInlined() ? buf : ptr; } const char *begin() const { return data(); } const char *end() const { return data() + size(); } const char *c_str() const { return data(); } // NOLINTEND // Helper methods bool IsOwning() const { return (len & NON_OWNING_BIT) == 0; } bool IsInlined() const { return len <= INLINE_MAX; } static bool CanBeInlined(uint32_t size) { return size <= INLINE_MAX; } // Creates a new String instance with its own copy of the data static String Copy(const char *data, uint32_t size) { if (data == nullptr) { return String(); // Return an empty String } String result; result.AssignOwning(data, size); return result; } static String Copy(const String &other) { return Copy(other.data(), other.size()); } static String Copy(const char *data) { return Copy(data, data ? SafeStrLen(data) : 0); } static String Copy(const std::string &str) { return Copy(str.data(), SafeStrLen(str)); } String Copy() const { return String::Copy(data(), size()); } // Creates a new String instance that references the data without owning it // If the size is small enough, it will inline the data; which WILL be owning static String Reference(const char *data, uint32_t size) { if (data == nullptr) { return String(); // Return an empty String } String result; // If we reference, and we can inline it, we make owning anyway if (size <= INLINE_MAX) { result.AssignOwning(data, size); } else { result.ptr = const_cast(data); // NOLINT allow const cast result.len = size | NON_OWNING_BIT; // Set the non-owning bit } return result; } static String Reference(const String &other) { return Reference(other.data(), other.size()); } static String Reference(const char *data) { return Reference(data, data ? SafeStrLen(data) : 0); } static String Reference(const std::string &str) { return Reference(str.data(), SafeStrLen(str)); } String Reference() const { return String::Reference(data(), size()); } std::string ToStdString() const { if (IsInlined()) { return std::string(buf, size()); } return std::string(ptr, size()); } static uint32_t SafeStrLen(const char *data) { if (!data) { return 0; } const auto len = strlen(data); D_ASSERT(len < NumericLimits::Maximum()); return static_cast(len); } static uint32_t SafeStrLen(const std::string &data) { const auto len = data.size(); D_ASSERT(len < NumericLimits::Maximum()); return static_cast(len); } public: static char CharacterToLower(char c) { if (c >= 'A' && c <= 'Z') { return UnsafeNumericCast(c + ('a' - 'A')); } return c; } String Lower() const { const auto str_data = data(); const auto str_size = size(); std::string lowercase_str; lowercase_str.reserve(str_size); for (idx_t i = 0; i < str_size; ++i) { lowercase_str.push_back(CharacterToLower(str_data[i])); } return String(lowercase_str); } private: static constexpr auto INLINE_CAP = sizeof(char *); static constexpr auto INLINE_MAX = INLINE_CAP - 1; static constexpr auto NON_OWNING_BIT = 1UL << (sizeof(uint32_t) * 8 - 1); static constexpr auto LENGTH_MAX = (1UL << (sizeof(uint32_t) * 8 - 1)) - 1; void AssignOwning(const char *new_data, uint32_t new_size) { len = new_data ? new_size : 0; if (len == 0) { buf[len] = '\0'; // Null-terminate the inline buffer return; } if (len <= INLINE_MAX) { memcpy(buf, new_data, len); buf[len] = '\0'; // Null-terminate the inline buffer return; } auto new_ptr = new char[len + 1]; // +1 for null-termination memcpy(new_ptr, new_data, len); new_ptr[len] = '\0'; ptr = new_ptr; } void Destroy() { if (IsOwning() && !IsInlined()) { delete[] ptr; } } // Releases the ownership of the String, without deleting the data, e.g., when transferring ownership void ReleaseOwning() { // Set the non-owning bit if (IsOwning() && !IsInlined()) { len |= NON_OWNING_BIT; } } void TransferOwnership(String &other) { len = other.len; if (IsInlined()) { AssignOwning(other.data(), other.size()); } else { ptr = other.ptr; len = other.len; } other.ReleaseOwning(); } private: uint32_t len; // The first bit indicates ownership (0 = owning, 1 = non-owning) union { // If length is less than or equal to INLINE_MAX, then it is inlined here const char *ptr; char buf[INLINE_CAP]; }; }; inline bool operator==(const std::string &lhs, const String &rhs) { return rhs == lhs; } inline bool operator==(const char *lhs, const String &rhs) { return rhs == lhs; } } // namespace duckdb #include namespace duckdb { struct AttachOptions; struct CreateSchemaInfo; struct DropInfo; struct BoundCreateTableInfo; struct AlterTableInfo; struct CreateTableFunctionInfo; struct CreateCopyFunctionInfo; struct CreatePragmaFunctionInfo; struct CreateFunctionInfo; struct CreateViewInfo; struct CreateSequenceInfo; struct CreateCollationInfo; struct CreateIndexInfo; struct CreateTypeInfo; struct CreateTableInfo; struct DatabaseSize; struct MetadataBlockInfo; class AttachedDatabase; class ClientContext; class QueryContext; class Transaction; class AggregateFunctionCatalogEntry; class CollateCatalogEntry; class SchemaCatalogEntry; class TableCatalogEntry; class ViewCatalogEntry; class SequenceCatalogEntry; class TableFunctionCatalogEntry; class CopyFunctionCatalogEntry; class PragmaFunctionCatalogEntry; class CatalogSet; class DatabaseInstance; class DependencyManager; struct CatalogLookup; struct CatalogEntryLookup; struct SimilarCatalogEntry; class Binder; class LogicalOperator; class LogicalMergeInto; class PhysicalOperator; class PhysicalPlanGenerator; class LogicalCreateIndex; class LogicalCreateTable; class LogicalInsert; class LogicalDelete; class LogicalUpdate; class CreateStatement; class CatalogEntryRetriever; //! The Catalog object represents the catalog of the database. class Catalog { public: explicit Catalog(AttachedDatabase &db); virtual ~Catalog(); public: //! Get the SystemCatalog from the ClientContext DUCKDB_API static Catalog &GetSystemCatalog(ClientContext &context); //! Get the SystemCatalog from the DatabaseInstance DUCKDB_API static Catalog &GetSystemCatalog(DatabaseInstance &db); //! Get the specified Catalog from the ClientContext DUCKDB_API static Catalog &GetCatalog(ClientContext &context, const string &catalog_name); //! Get the specified Catalog from the ClientContext DUCKDB_API static Catalog &GetCatalog(CatalogEntryRetriever &retriever, const string &catalog_name); //! Get the specified Catalog from the DatabaseInstance DUCKDB_API static Catalog &GetCatalog(DatabaseInstance &db, const string &catalog_name); //! Gets the specified Catalog from the database if it exists DUCKDB_API static optional_ptr GetCatalogEntry(ClientContext &context, const string &catalog_name); //! Gets the specified Catalog from the database if it exists DUCKDB_API static optional_ptr GetCatalogEntry(CatalogEntryRetriever &retriever, const string &catalog_name); //! Get the specific Catalog from the AttachedDatabase DUCKDB_API static Catalog &GetCatalog(AttachedDatabase &db); DUCKDB_API AttachedDatabase &GetAttached(); DUCKDB_API const AttachedDatabase &GetAttached() const; DUCKDB_API DatabaseInstance &GetDatabase(); virtual bool IsDuckCatalog() { return false; } virtual void Initialize(bool load_builtin) = 0; virtual void Initialize(optional_ptr context, bool load_builtin); virtual void FinalizeLoad(optional_ptr context); bool IsSystemCatalog() const; bool IsTemporaryCatalog() const; //! Returns a version number that uniquely characterizes the current catalog snapshot. //! If there are transaction-local changes, the version returned is >= TRANSACTION_START, o.w. it is a simple number //! starting at 0 that is incremented at each commit that has had catalog changes. //! If the catalog does not support versioning, no index is returned. DUCKDB_API virtual optional_idx GetCatalogVersion(ClientContext &context) { return {}; // don't return anything by default } //! Returns the catalog name - based on how the catalog was attached DUCKDB_API const string &GetName() const; DUCKDB_API idx_t GetOid(); DUCKDB_API virtual string GetCatalogType() = 0; DUCKDB_API CatalogTransaction GetCatalogTransaction(ClientContext &context); //! Creates a schema in the catalog. DUCKDB_API virtual optional_ptr CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) = 0; DUCKDB_API optional_ptr CreateSchema(ClientContext &context, CreateSchemaInfo &info); //! Creates a table in the catalog. DUCKDB_API optional_ptr CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info); DUCKDB_API optional_ptr CreateTable(ClientContext &context, BoundCreateTableInfo &info); //! Creates a table in the catalog. DUCKDB_API optional_ptr CreateTable(ClientContext &context, unique_ptr info); //! Create a table function in the catalog DUCKDB_API optional_ptr CreateTableFunction(CatalogTransaction transaction, CreateTableFunctionInfo &info); DUCKDB_API optional_ptr CreateTableFunction(ClientContext &context, CreateTableFunctionInfo &info); // Kept for backwards compatibility DUCKDB_API optional_ptr CreateTableFunction(ClientContext &context, optional_ptr info); //! Create a copy function in the catalog DUCKDB_API optional_ptr CreateCopyFunction(CatalogTransaction transaction, CreateCopyFunctionInfo &info); DUCKDB_API optional_ptr CreateCopyFunction(ClientContext &context, CreateCopyFunctionInfo &info); //! Create a pragma function in the catalog DUCKDB_API optional_ptr CreatePragmaFunction(CatalogTransaction transaction, CreatePragmaFunctionInfo &info); DUCKDB_API optional_ptr CreatePragmaFunction(ClientContext &context, CreatePragmaFunctionInfo &info); //! Create a scalar or aggregate function in the catalog DUCKDB_API optional_ptr CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info); DUCKDB_API optional_ptr CreateFunction(ClientContext &context, CreateFunctionInfo &info); //! Creates a table in the catalog. DUCKDB_API optional_ptr CreateView(CatalogTransaction transaction, CreateViewInfo &info); DUCKDB_API optional_ptr CreateView(ClientContext &context, CreateViewInfo &info); //! Creates a sequence in the catalog. DUCKDB_API optional_ptr CreateSequence(CatalogTransaction transaction, CreateSequenceInfo &info); DUCKDB_API optional_ptr CreateSequence(ClientContext &context, CreateSequenceInfo &info); //! Creates a Enum in the catalog. DUCKDB_API optional_ptr CreateType(CatalogTransaction transaction, CreateTypeInfo &info); DUCKDB_API optional_ptr CreateType(ClientContext &context, CreateTypeInfo &info); //! Creates a collation in the catalog DUCKDB_API optional_ptr CreateCollation(CatalogTransaction transaction, CreateCollationInfo &info); DUCKDB_API optional_ptr CreateCollation(ClientContext &context, CreateCollationInfo &info); //! Creates an index in the catalog DUCKDB_API optional_ptr CreateIndex(CatalogTransaction transaction, CreateIndexInfo &info); DUCKDB_API optional_ptr CreateIndex(ClientContext &context, CreateIndexInfo &info); //! Creates a table in the catalog. DUCKDB_API optional_ptr CreateTable(CatalogTransaction transaction, SchemaCatalogEntry &schema, BoundCreateTableInfo &info); //! Create a table function in the catalog DUCKDB_API optional_ptr CreateTableFunction(CatalogTransaction transaction, SchemaCatalogEntry &schema, CreateTableFunctionInfo &info); //! Create a copy function in the catalog DUCKDB_API optional_ptr CreateCopyFunction(CatalogTransaction transaction, SchemaCatalogEntry &schema, CreateCopyFunctionInfo &info); //! Create a pragma function in the catalog DUCKDB_API optional_ptr CreatePragmaFunction(CatalogTransaction transaction, SchemaCatalogEntry &schema, CreatePragmaFunctionInfo &info); //! Create a scalar or aggregate function in the catalog DUCKDB_API optional_ptr CreateFunction(CatalogTransaction transaction, SchemaCatalogEntry &schema, CreateFunctionInfo &info); //! Creates a view in the catalog DUCKDB_API optional_ptr CreateView(CatalogTransaction transaction, SchemaCatalogEntry &schema, CreateViewInfo &info); //! Creates a table in the catalog. DUCKDB_API optional_ptr CreateSequence(CatalogTransaction transaction, SchemaCatalogEntry &schema, CreateSequenceInfo &info); //! Creates a enum in the catalog. DUCKDB_API optional_ptr CreateType(CatalogTransaction transaction, SchemaCatalogEntry &schema, CreateTypeInfo &info); //! Creates a collation in the catalog DUCKDB_API optional_ptr CreateCollation(CatalogTransaction transaction, SchemaCatalogEntry &schema, CreateCollationInfo &info); //! Drops an entry from the catalog DUCKDB_API void DropEntry(ClientContext &context, DropInfo &info); DUCKDB_API virtual optional_ptr LookupSchema(CatalogTransaction transaction, const EntryLookupInfo &schema_lookup, OnEntryNotFound if_not_found) = 0; //! Returns the schema object with the specified name, or throws an exception if it does not exist DUCKDB_API SchemaCatalogEntry &GetSchema(ClientContext &context, const EntryLookupInfo &schema_lookup); DUCKDB_API optional_ptr GetSchema(ClientContext &context, const EntryLookupInfo &schema_lookup, OnEntryNotFound if_not_found); //! Overloadable method for giving warnings on ambiguous naming id.tab due to a database and schema with name id DUCKDB_API virtual bool CheckAmbiguousCatalogOrSchema(ClientContext &context, const string &schema); DUCKDB_API SchemaCatalogEntry &GetSchema(ClientContext &context, const string &schema); DUCKDB_API SchemaCatalogEntry &GetSchema(CatalogTransaction transaction, const string &schema); DUCKDB_API SchemaCatalogEntry &GetSchema(CatalogTransaction transaction, const EntryLookupInfo &schema_lookup); DUCKDB_API static SchemaCatalogEntry &GetSchema(ClientContext &context, const string &catalog_name, const EntryLookupInfo &schema_lookup); DUCKDB_API optional_ptr GetSchema(ClientContext &context, const string &schema, OnEntryNotFound if_not_found); DUCKDB_API optional_ptr GetSchema(CatalogTransaction transaction, const string &schema, OnEntryNotFound if_not_found); DUCKDB_API static optional_ptr GetSchema(ClientContext &context, const string &catalog_name, const EntryLookupInfo &schema_lookup, OnEntryNotFound if_not_found); DUCKDB_API static SchemaCatalogEntry &GetSchema(ClientContext &context, const string &catalog_name, const string &schema); DUCKDB_API static optional_ptr GetSchema(ClientContext &context, const string &catalog_name, const string &schema, OnEntryNotFound if_not_found); DUCKDB_API static optional_ptr GetSchema(CatalogEntryRetriever &retriever, const string &catalog_name, const EntryLookupInfo &schema_lookup, OnEntryNotFound if_not_found); //! Scans all the schemas in the system one-by-one, invoking the callback for each entry DUCKDB_API virtual void ScanSchemas(ClientContext &context, std::function callback) = 0; //! Gets the "schema.name" entry of the specified type, if entry does not exist behavior depends on OnEntryNotFound DUCKDB_API optional_ptr GetEntry(ClientContext &context, const string &schema, const EntryLookupInfo &lookup_info, OnEntryNotFound if_not_found); DUCKDB_API optional_ptr GetEntry(ClientContext &context, CatalogType catalog_type, const string &schema, const string &name, OnEntryNotFound if_not_found); DUCKDB_API optional_ptr GetEntry(CatalogEntryRetriever &retriever, const string &schema, const EntryLookupInfo &lookup_info, OnEntryNotFound if_not_found); DUCKDB_API CatalogEntry &GetEntry(ClientContext &context, const string &schema, const EntryLookupInfo &lookup_info); //! Gets the "catalog.schema.name" entry of the specified type, if entry does not exist behavior depends on //! OnEntryNotFound DUCKDB_API static optional_ptr GetEntry(ClientContext &context, const string &catalog, const string &schema, const EntryLookupInfo &lookup_info, OnEntryNotFound if_not_found); DUCKDB_API static optional_ptr GetEntry(CatalogEntryRetriever &retriever, const string &catalog, const string &schema, const EntryLookupInfo &lookup_info, OnEntryNotFound if_not_found); DUCKDB_API static CatalogEntry &GetEntry(ClientContext &context, const string &catalog, const string &schema, const EntryLookupInfo &lookup_info); template optional_ptr GetEntry(ClientContext &context, const string &schema_name, const string &name, OnEntryNotFound if_not_found, QueryErrorContext error_context = QueryErrorContext()) { EntryLookupInfo lookup_info(T::Type, name, error_context); auto entry = GetEntry(context, schema_name, lookup_info, if_not_found); if (!entry) { return nullptr; } if (entry->type != T::Type) { throw CatalogException(error_context, "%s is not an %s", name, T::Name); } return &entry->template Cast(); } template T &GetEntry(ClientContext &context, const string &schema_name, const string &name, QueryErrorContext error_context = QueryErrorContext()) { auto entry = GetEntry(context, schema_name, name, OnEntryNotFound::THROW_EXCEPTION, error_context); return *entry; } static CatalogEntry &GetEntry(ClientContext &context, CatalogType catalog_type, const string &catalog_name, const string &schema_name, const string &name); CatalogEntry &GetEntry(ClientContext &context, CatalogType catalog_type, const string &schema_name, const string &name); //! Append a scalar or aggregate function to the catalog DUCKDB_API optional_ptr AddFunction(ClientContext &context, CreateFunctionInfo &info); //! Alter an existing entry in the catalog. DUCKDB_API void Alter(CatalogTransaction transaction, AlterInfo &info); DUCKDB_API void Alter(ClientContext &context, AlterInfo &info); virtual PhysicalOperator &PlanCreateTableAs(ClientContext &context, PhysicalPlanGenerator &planner, LogicalCreateTable &op, PhysicalOperator &plan) = 0; virtual PhysicalOperator &PlanInsert(ClientContext &context, PhysicalPlanGenerator &planner, LogicalInsert &op, optional_ptr plan) = 0; virtual PhysicalOperator &PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op, PhysicalOperator &plan) = 0; virtual PhysicalOperator &PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op); virtual PhysicalOperator &PlanUpdate(ClientContext &context, PhysicalPlanGenerator &planner, LogicalUpdate &op, PhysicalOperator &plan) = 0; virtual PhysicalOperator &PlanUpdate(ClientContext &context, PhysicalPlanGenerator &planner, LogicalUpdate &op); virtual PhysicalOperator &PlanMergeInto(ClientContext &context, PhysicalPlanGenerator &planner, LogicalMergeInto &op, PhysicalOperator &plan); virtual unique_ptr BindCreateIndex(Binder &binder, CreateStatement &stmt, TableCatalogEntry &table, unique_ptr plan); virtual unique_ptr BindAlterAddIndex(Binder &binder, TableCatalogEntry &table_entry, unique_ptr plan, unique_ptr create_info, unique_ptr alter_info); virtual DatabaseSize GetDatabaseSize(ClientContext &context) = 0; virtual vector GetMetadataInfo(ClientContext &context); virtual bool InMemory() = 0; virtual string GetDBPath() = 0; virtual bool SupportsTimeTravel() const { return false; } virtual bool IsEncrypted() const { return false; } virtual string GetEncryptionCipher() const { return string(); } //! Whether or not this catalog should search a specific type with the standard priority DUCKDB_API virtual CatalogLookupBehavior CatalogTypeLookupRule(CatalogType type) const { return CatalogLookupBehavior::STANDARD; } //! Returns the default schema of the catalog virtual string GetDefaultSchema() const; //! The default table is used for `SELECT * FROM ;` //! FIXME: these should be virtual methods DUCKDB_API bool HasDefaultTable() const; DUCKDB_API void SetDefaultTable(const string &schema, const string &name); DUCKDB_API string GetDefaultTable() const; DUCKDB_API string GetDefaultTableSchema() const; //! Returns the dependency manager of this catalog - if the catalog has anye virtual optional_ptr GetDependencyManager(); //! Whether attaching a catalog with the given path and attach options would be considered a conflict virtual bool HasConflictingAttachOptions(const string &path, const AttachOptions &options); public: template static optional_ptr GetEntry(ClientContext &context, const string &catalog_name, const string &schema_name, const string &name, OnEntryNotFound if_not_found, QueryErrorContext error_context = QueryErrorContext()) { EntryLookupInfo lookup_info(T::Type, name, error_context); auto entry = GetEntry(context, catalog_name, schema_name, lookup_info, if_not_found); if (!entry) { return nullptr; } if (entry->type != T::Type) { throw CatalogException(error_context, "%s is not an %s", name, T::Name); } return &entry->template Cast(); } template static T &GetEntry(ClientContext &context, const string &catalog_name, const string &schema_name, const string &name, QueryErrorContext error_context = QueryErrorContext()) { auto entry = GetEntry(context, catalog_name, schema_name, name, OnEntryNotFound::THROW_EXCEPTION, error_context); return *entry; } DUCKDB_API vector> GetSchemas(ClientContext &context); DUCKDB_API static vector> GetSchemas(ClientContext &context, const string &catalog_name); DUCKDB_API static vector> GetSchemas(CatalogEntryRetriever &retriever, const string &catalog_name); DUCKDB_API static vector> GetAllSchemas(ClientContext &context); static vector> GetAllEntries(ClientContext &context, CatalogType catalog_type); virtual void Verify(); static CatalogException UnrecognizedConfigurationError(ClientContext &context, const string &name); //! Autoload the extension required for `configuration_name` or throw a CatalogException static String AutoloadExtensionByConfigName(ClientContext &context, const String &configuration_name); //! Autoload the extension required for `function_name` or throw a CatalogException static bool AutoLoadExtensionByCatalogEntry(DatabaseInstance &db, CatalogType type, const string &entry_name); DUCKDB_API static bool TryAutoLoad(ClientContext &context, const string &extension_name) noexcept; //! Called when the catalog is detached DUCKDB_API virtual void OnDetach(ClientContext &context); protected: //! Reference to the database AttachedDatabase &db; //! (optionally) a default table to query for `SELECT * FROM ;` string default_table; string default_table_schema; public: //! Lookup an entry using TryLookupEntry, throws if entry not found and if_not_found == THROW_EXCEPTION CatalogEntryLookup LookupEntry(CatalogEntryRetriever &retriever, const string &schema, const EntryLookupInfo &lookup_info, OnEntryNotFound if_not_found); private: //! Lookup an entry in the schema, returning a lookup with the entry and schema if they exist CatalogEntryLookup TryLookupEntryInternal(CatalogTransaction transaction, const string &schema, const EntryLookupInfo &lookup_info); //! Calls LookupEntryInternal on the schema, trying other schemas if the schema is invalid. Sets //! CatalogEntryLookup->error depending on if_not_found when no entry is found CatalogEntryLookup TryLookupEntry(CatalogEntryRetriever &retriever, const string &schema, const EntryLookupInfo &lookup_info, OnEntryNotFound if_not_found); static CatalogEntryLookup TryLookupEntry(CatalogEntryRetriever &retriever, const vector &lookups, const EntryLookupInfo &lookup_info, OnEntryNotFound if_not_found, bool allow_default_table_lookup); static CatalogEntryLookup TryLookupEntry(CatalogEntryRetriever &retriever, const string &catalog, const string &schema, const EntryLookupInfo &lookup_info, OnEntryNotFound if_not_found); //! Looks for a Catalog with a DefaultTable that matches the lookup static CatalogEntryLookup TryLookupDefaultTable(CatalogEntryRetriever &retriever, const EntryLookupInfo &lookup_info, bool allow_ignore_at_clause = false); //! Return an exception with did-you-mean suggestion. static CatalogException CreateMissingEntryException(CatalogEntryRetriever &retriever, const EntryLookupInfo &lookup_info, const reference_set_t &schemas); //! Return the close entry name, the distance and the belonging schema. static vector SimilarEntriesInSchemas(ClientContext &context, const EntryLookupInfo &lookup_info, const reference_set_t &schemas); virtual void DropSchema(ClientContext &context, DropInfo &info) = 0; public: template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; } // namespace duckdb #include namespace duckdb { class WriteStream { public: // Writes a set amount of data from the specified buffer into the stream and moves the stream forward accordingly virtual void WriteData(const_data_ptr_t buffer, idx_t write_size) = 0; // Writes a type into the stream and moves the stream forward sizeof(T) bytes // The type must be a standard layout type template void Write(T element) { static_assert(std::is_standard_layout(), "Write element must be a standard layout data type"); WriteData(const_data_ptr_cast(&element), sizeof(T)); } virtual ~WriteStream() { } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/file_system.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/file_compression_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class FileCompressionType : uint8_t { AUTO_DETECT = 0, UNCOMPRESSED = 1, GZIP = 2, ZSTD = 3 }; FileCompressionType FileCompressionTypeFromString(const string &input); string CompressionExtensionFromType(const FileCompressionType type); bool IsFileCompressed(string path, FileCompressionType type); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/file_buffer.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/debug_initialize.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class DebugInitialize : uint8_t { NO_INITIALIZE = 0, DEBUG_ZERO_INITIALIZE = 1, DEBUG_ONE_INITIALIZE = 2 }; } // namespace duckdb namespace duckdb { class Allocator; class BlockManager; class QueryContext; struct FileHandle; enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2, TINY_BUFFER = 3, EXTERNAL_FILE = 4 }; static constexpr idx_t FILE_BUFFER_TYPE_COUNT = 4; //! The FileBuffer represents a buffer that can be read or written to a Direct IO FileHandle. class FileBuffer { public: //! Allocates a buffer of the specified size, with room for additional header bytes //! (typically 8 bytes). On return, this->AllocSize() >= this->size >= user_size. //! Our allocation size will always be page-aligned, which is necessary to support //! DIRECT_IO FileBuffer(Allocator &allocator, FileBufferType type, uint64_t user_size, idx_t block_header_size); FileBuffer(Allocator &allocator, FileBufferType type, BlockManager &block_manager); FileBuffer(FileBuffer &source, FileBufferType type, idx_t block_header_size); virtual ~FileBuffer(); Allocator &allocator; //! The buffer that users can write to data_ptr_t buffer; //! The user-facing size of the buffer. //! This is equivalent to internal_size - block_header_size. uint64_t size; public: //! Read into the FileBuffer from the location. void Read(QueryContext context, FileHandle &handle, uint64_t location); //! Write the FileBuffer to the location. void Write(QueryContext context, FileHandle &handle, const uint64_t location); void Clear(); FileBufferType GetBufferType() const { return type; } // Same rules as the constructor. We add room for a header, in addition to // the requested user bytes. We then sector-align the result. void Resize(uint64_t user_size, BlockManager &block_manager); void Resize(BlockManager &block_manager); idx_t GetHeaderSize() const { return internal_size - size; } uint64_t AllocSize() const { return internal_size; } uint64_t Size() const { return size; } data_ptr_t InternalBuffer() { return internal_buffer; } struct MemoryRequirement { idx_t alloc_size; idx_t header_size; }; MemoryRequirement CalculateMemory(uint64_t user_size, uint64_t block_header_size) const; void Initialize(DebugInitialize info); protected: //! The type of the buffer. FileBufferType type; //! The pointer to the internal buffer that will be read from or written to. //! This includes the buffer header. data_ptr_t internal_buffer; //! The aligned size as passed to the constructor. //! This is the size that is read from or written to disk. uint64_t internal_size; void ReallocBuffer(idx_t new_size); void Init(); private: void ResizeInternal(uint64_t user_size, uint64_t block_header_size); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/file_glob_options.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class FileGlobOptions : uint8_t { DISALLOW_EMPTY = 0, ALLOW_EMPTY = 1, FALLBACK_GLOB = 2 }; struct FileGlobInput { FileGlobInput(FileGlobOptions options) // NOLINT: allow implicit conversion from FileGlobOptions : behavior(options) { } FileGlobInput(FileGlobOptions options, string extension_p) : behavior(options), extension(std::move(extension_p)) { } FileGlobOptions behavior; string extension; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/file_open_flags.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class FileLockType : uint8_t { NO_LOCK = 0, READ_LOCK = 1, WRITE_LOCK = 2 }; class FileOpenFlags { public: static constexpr idx_t FILE_FLAGS_READ = idx_t(1 << 0); static constexpr idx_t FILE_FLAGS_WRITE = idx_t(1 << 1); static constexpr idx_t FILE_FLAGS_DIRECT_IO = idx_t(1 << 2); static constexpr idx_t FILE_FLAGS_FILE_CREATE = idx_t(1 << 3); static constexpr idx_t FILE_FLAGS_FILE_CREATE_NEW = idx_t(1 << 4); static constexpr idx_t FILE_FLAGS_APPEND = idx_t(1 << 5); static constexpr idx_t FILE_FLAGS_PRIVATE = idx_t(1 << 6); static constexpr idx_t FILE_FLAGS_NULL_IF_NOT_EXISTS = idx_t(1 << 7); static constexpr idx_t FILE_FLAGS_PARALLEL_ACCESS = idx_t(1 << 8); static constexpr idx_t FILE_FLAGS_EXCLUSIVE_CREATE = idx_t(1 << 9); static constexpr idx_t FILE_FLAGS_NULL_IF_EXISTS = idx_t(1 << 10); static constexpr idx_t FILE_FLAGS_MULTI_CLIENT_ACCESS = idx_t(1 << 11); static constexpr idx_t FILE_FLAGS_DISABLE_LOGGING = idx_t(1 << 12); public: FileOpenFlags() = default; constexpr FileOpenFlags(idx_t flags) : flags(flags) { // NOLINT: allow implicit conversion } constexpr FileOpenFlags(FileLockType lock) : lock(lock) { // NOLINT: allow implicit conversion } constexpr FileOpenFlags(FileCompressionType compression) // NOLINT: allow implicit conversion : compression(compression) { } constexpr FileOpenFlags(idx_t flags, FileLockType lock, FileCompressionType compression) : flags(flags), lock(lock), compression(compression) { } static constexpr FileLockType MergeLock(FileLockType a, FileLockType b) { return a == FileLockType::NO_LOCK ? b : a; } static constexpr FileCompressionType MergeCompression(FileCompressionType a, FileCompressionType b) { return a == FileCompressionType::UNCOMPRESSED ? b : a; } inline constexpr FileOpenFlags operator|(FileOpenFlags b) const { return FileOpenFlags(flags | b.flags, MergeLock(lock, b.lock), MergeCompression(compression, b.compression)); } inline FileOpenFlags &operator|=(FileOpenFlags b) { flags |= b.flags; lock = MergeLock(lock, b.lock); compression = MergeCompression(compression, b.compression); return *this; } FileLockType Lock() { return lock; } FileCompressionType Compression() { return compression; } void SetCompression(FileCompressionType new_compression) { compression = new_compression; } void Verify(); inline bool OpenForReading() const { return flags & FILE_FLAGS_READ; } inline bool OpenForWriting() const { return flags & FILE_FLAGS_WRITE; } inline bool DirectIO() const { return flags & FILE_FLAGS_DIRECT_IO; } inline bool CreateFileIfNotExists() const { return flags & FILE_FLAGS_FILE_CREATE; } inline bool OverwriteExistingFile() const { return flags & FILE_FLAGS_FILE_CREATE_NEW; } inline bool OpenForAppending() const { return flags & FILE_FLAGS_APPEND; } inline bool CreatePrivateFile() const { return flags & FILE_FLAGS_PRIVATE; } inline bool ReturnNullIfNotExists() const { return flags & FILE_FLAGS_NULL_IF_NOT_EXISTS; } inline bool RequireParallelAccess() const { return flags & FILE_FLAGS_PARALLEL_ACCESS; } inline bool ExclusiveCreate() const { return flags & FILE_FLAGS_EXCLUSIVE_CREATE; } inline bool ReturnNullIfExists() const { return flags & FILE_FLAGS_NULL_IF_EXISTS; } inline bool MultiClientAccess() const { return flags & FILE_FLAGS_MULTI_CLIENT_ACCESS; } inline bool DisableLogging() const { return flags & FILE_FLAGS_DISABLE_LOGGING; } inline idx_t GetFlagsInternal() const { return flags; } private: idx_t flags = 0; FileLockType lock = FileLockType::NO_LOCK; FileCompressionType compression = FileCompressionType::UNCOMPRESSED; }; class FileFlags { public: //! Open file with read access static constexpr FileOpenFlags FILE_FLAGS_READ = FileOpenFlags(FileOpenFlags::FILE_FLAGS_READ); //! Open file with write access static constexpr FileOpenFlags FILE_FLAGS_WRITE = FileOpenFlags(FileOpenFlags::FILE_FLAGS_WRITE); //! Use direct IO when reading/writing to the file static constexpr FileOpenFlags FILE_FLAGS_DIRECT_IO = FileOpenFlags(FileOpenFlags::FILE_FLAGS_DIRECT_IO); //! Create file if not exists, can only be used together with WRITE static constexpr FileOpenFlags FILE_FLAGS_FILE_CREATE = FileOpenFlags(FileOpenFlags::FILE_FLAGS_FILE_CREATE); //! Always create a new file. If a file exists, the file is truncated. Cannot be used together with CREATE. static constexpr FileOpenFlags FILE_FLAGS_FILE_CREATE_NEW = FileOpenFlags(FileOpenFlags::FILE_FLAGS_FILE_CREATE_NEW); //! Open file in append mode static constexpr FileOpenFlags FILE_FLAGS_APPEND = FileOpenFlags(FileOpenFlags::FILE_FLAGS_APPEND); //! Open file with restrictive permissions (600 on linux/mac) can only be used when creating, throws if file exists static constexpr FileOpenFlags FILE_FLAGS_PRIVATE = FileOpenFlags(FileOpenFlags::FILE_FLAGS_PRIVATE); //! Return NULL if the file does not exist instead of throwing an error static constexpr FileOpenFlags FILE_FLAGS_NULL_IF_NOT_EXISTS = FileOpenFlags(FileOpenFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS); //! Multiple threads may perform reads and writes in parallel static constexpr FileOpenFlags FILE_FLAGS_PARALLEL_ACCESS = FileOpenFlags(FileOpenFlags::FILE_FLAGS_PARALLEL_ACCESS); //! Ensure that this call creates the file, throw is file exists static constexpr FileOpenFlags FILE_FLAGS_EXCLUSIVE_CREATE = FileOpenFlags(FileOpenFlags::FILE_FLAGS_EXCLUSIVE_CREATE); //! Return NULL if the file exist instead of throwing an error static constexpr FileOpenFlags FILE_FLAGS_NULL_IF_EXISTS = FileOpenFlags(FileOpenFlags::FILE_FLAGS_NULL_IF_EXISTS); //! Multiple clients may access the file at the same time static constexpr FileOpenFlags FILE_FLAGS_MULTI_CLIENT_ACCESS = FileOpenFlags(FileOpenFlags::FILE_FLAGS_MULTI_CLIENT_ACCESS); //! Disables logging to avoid infinite loops when using FileHandle-backed log storage static constexpr FileOpenFlags FILE_FLAGS_DISABLE_LOGGING = FileOpenFlags(FileOpenFlags::FILE_FLAGS_DISABLE_LOGGING); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/open_file_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct ExtendedOpenFileInfo { unordered_map options; }; struct OpenFileInfo { OpenFileInfo() = default; OpenFileInfo(string path_p) // NOLINT: allow implicit conversion from string : path(std::move(path_p)) { } string path; shared_ptr extended_info; public: bool operator<(const OpenFileInfo &rhs) const { return path < rhs.path; } }; } // namespace duckdb #include #undef CreateDirectory #undef MoveFile #undef RemoveDirectory namespace duckdb { class AttachedDatabase; class DatabaseInstance; class FileOpener; class FileSystem; class Logger; class ClientContext; class QueryContext; enum class FileType { //! Regular file FILE_TYPE_REGULAR, //! Directory FILE_TYPE_DIR, //! FIFO named pipe FILE_TYPE_FIFO, //! Socket FILE_TYPE_SOCKET, //! Symbolic link FILE_TYPE_LINK, //! Block device FILE_TYPE_BLOCKDEV, //! Character device FILE_TYPE_CHARDEV, //! Unknown or invalid file handle FILE_TYPE_INVALID, }; struct FileHandle { public: DUCKDB_API FileHandle(FileSystem &file_system, string path, FileOpenFlags flags); FileHandle(const FileHandle &) = delete; DUCKDB_API virtual ~FileHandle(); // Read at [nr_bytes] bytes into [buffer], and return the bytes actually read. // File offset will be changed, which advances for number of bytes read. DUCKDB_API int64_t Read(void *buffer, idx_t nr_bytes); DUCKDB_API int64_t Read(QueryContext context, void *buffer, idx_t nr_bytes); DUCKDB_API int64_t Write(void *buffer, idx_t nr_bytes); // Read at [nr_bytes] bytes into [buffer]. // File offset will not be changed. DUCKDB_API void Read(void *buffer, idx_t nr_bytes, idx_t location); DUCKDB_API void Read(QueryContext context, void *buffer, idx_t nr_bytes, idx_t location); DUCKDB_API void Write(QueryContext context, void *buffer, idx_t nr_bytes, idx_t location); DUCKDB_API void Seek(idx_t location); DUCKDB_API void Reset(); DUCKDB_API idx_t SeekPosition(); DUCKDB_API void Sync(); DUCKDB_API void Truncate(int64_t new_size); DUCKDB_API string ReadLine(); DUCKDB_API string ReadLine(QueryContext context); DUCKDB_API bool Trim(idx_t offset_bytes, idx_t length_bytes); DUCKDB_API virtual idx_t GetProgress(); DUCKDB_API virtual FileCompressionType GetFileCompressionType(); DUCKDB_API bool CanSeek(); DUCKDB_API bool IsPipe(); DUCKDB_API bool OnDiskFile(); DUCKDB_API idx_t GetFileSize(); DUCKDB_API FileType GetType(); DUCKDB_API void TryAddLogger(FileOpener &opener); //! Closes the file handle. DUCKDB_API virtual void Close() = 0; string GetPath() const { return path; } FileOpenFlags GetFlags() const { return flags; } template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } public: FileSystem &file_system; string path; FileOpenFlags flags; shared_ptr logger; }; class FileSystem { public: DUCKDB_API virtual ~FileSystem(); public: DUCKDB_API static FileSystem &GetFileSystem(ClientContext &context); DUCKDB_API static FileSystem &GetFileSystem(DatabaseInstance &db); DUCKDB_API static FileSystem &Get(AttachedDatabase &db); DUCKDB_API virtual unique_ptr OpenFile(const string &path, FileOpenFlags flags, optional_ptr opener = nullptr); DUCKDB_API unique_ptr OpenFile(const OpenFileInfo &path, FileOpenFlags flags, optional_ptr opener = nullptr); //! Read exactly nr_bytes from the specified location in the file. Fails if nr_bytes could not be read. This is //! equivalent to calling SetFilePointer(location) followed by calling Read(). DUCKDB_API virtual void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location); //! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be written. This is //! equivalent to calling SetFilePointer(location) followed by calling Write(). DUCKDB_API virtual void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location); //! Read nr_bytes from the specified file into the buffer, moving the file pointer forward by nr_bytes. Returns the //! amount of bytes read. DUCKDB_API virtual int64_t Read(FileHandle &handle, void *buffer, int64_t nr_bytes); //! Write nr_bytes from the buffer into the file, moving the file pointer forward by nr_bytes. DUCKDB_API virtual int64_t Write(FileHandle &handle, void *buffer, int64_t nr_bytes); //! Excise a range of the file. The OS can drop pages from the page-cache, and the file-system is free to deallocate //! this range (sparse file support). Reads to the range will succeed but will return undefined data. DUCKDB_API virtual bool Trim(FileHandle &handle, idx_t offset_bytes, idx_t length_bytes); //! Returns the file size of a file handle, returns -1 on error DUCKDB_API virtual int64_t GetFileSize(FileHandle &handle); //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error DUCKDB_API virtual timestamp_t GetLastModifiedTime(FileHandle &handle); //! Returns a tag that uniquely identifies the version of the file, //! used for checking cache invalidation for CachingFileSystem httpfs files DUCKDB_API virtual string GetVersionTag(FileHandle &handle); //! Returns the file type of the attached handle DUCKDB_API virtual FileType GetFileType(FileHandle &handle); //! Truncate a file to a maximum size of new_size, new_size should be smaller than or equal to the current size of //! the file DUCKDB_API virtual void Truncate(FileHandle &handle, int64_t new_size); //! Check if a directory exists DUCKDB_API virtual bool DirectoryExists(const string &directory, optional_ptr opener = nullptr); //! Create a directory if it does not exist DUCKDB_API virtual void CreateDirectory(const string &directory, optional_ptr opener = nullptr); //! Helper function that uses DirectoryExists and CreateDirectory to ensure all directories in path are created DUCKDB_API virtual void CreateDirectoriesRecursive(const string &path, optional_ptr opener = nullptr); //! Recursively remove a directory and all files in it DUCKDB_API virtual void RemoveDirectory(const string &directory, optional_ptr opener = nullptr); //! List files in a directory, invoking the callback method for each one with (filename, is_dir) DUCKDB_API virtual bool ListFiles(const string &directory, const std::function &callback, FileOpener *opener = nullptr); DUCKDB_API bool ListFiles(const string &directory, const std::function &callback, optional_ptr opener = nullptr); //! Move a file from source path to the target, StorageManager relies on this being an atomic action for ACID //! properties DUCKDB_API virtual void MoveFile(const string &source, const string &target, optional_ptr opener = nullptr); //! Check if a file exists DUCKDB_API virtual bool FileExists(const string &filename, optional_ptr opener = nullptr); //! Check if path is pipe DUCKDB_API virtual bool IsPipe(const string &filename, optional_ptr opener = nullptr); //! Remove a file from disk DUCKDB_API virtual void RemoveFile(const string &filename, optional_ptr opener = nullptr); //! Remvoe a file from disk if it exists - if it does not exist, return false DUCKDB_API virtual bool TryRemoveFile(const string &filename, optional_ptr opener = nullptr); //! Sync a file handle to disk DUCKDB_API virtual void FileSync(FileHandle &handle); //! Sets the working directory DUCKDB_API static void SetWorkingDirectory(const string &path); //! Gets the working directory DUCKDB_API static string GetWorkingDirectory(); //! Gets the users home directory DUCKDB_API static string GetHomeDirectory(optional_ptr opener); //! Gets the users home directory DUCKDB_API virtual string GetHomeDirectory(); //! Expands a given path, including e.g. expanding the home directory of the user DUCKDB_API static string ExpandPath(const string &path, optional_ptr opener); //! Expands a given path, including e.g. expanding the home directory of the user DUCKDB_API virtual string ExpandPath(const string &path); //! Returns the system-available memory in bytes. Returns DConstants::INVALID_INDEX if the system function fails. DUCKDB_API static optional_idx GetAvailableMemory(); //! Returns the space available on the disk. Returns DConstants::INVALID_INDEX if the information was not available. DUCKDB_API static optional_idx GetAvailableDiskSpace(const string &path); //! Path separator for path DUCKDB_API virtual string PathSeparator(const string &path); //! Checks if path is starts with separator (i.e., '/' on UNIX '\\' on Windows) DUCKDB_API bool IsPathAbsolute(const string &path); //! Normalize an absolute path - the goal of normalizing is converting "\test.db" and "C:/test.db" into "C:\test.db" //! so that the database system cache can correctly DUCKDB_API string NormalizeAbsolutePath(const string &path); //! Join two paths together DUCKDB_API string JoinPath(const string &a, const string &path); //! Convert separators in a path to the local separators (e.g. convert "/" into \\ on windows) DUCKDB_API string ConvertSeparators(const string &path); //! Extract the base name of a file (e.g. if the input is lib/example.dll the base name is 'example') DUCKDB_API string ExtractBaseName(const string &path); //! Extract the extension of a file (e.g. if the input is lib/example.dll the extension is 'dll') DUCKDB_API string ExtractExtension(const string &path); //! Extract the name of a file (e.g if the input is lib/example.dll the name is 'example.dll') DUCKDB_API string ExtractName(const string &path); //! Returns the value of an environment variable - or the empty string if it is not set DUCKDB_API static string GetEnvVariable(const string &name); //! Whether there is a glob in the string DUCKDB_API static bool HasGlob(const string &str); //! Runs a glob on the file system, returning a list of matching files DUCKDB_API virtual vector Glob(const string &path, FileOpener *opener = nullptr); DUCKDB_API vector GlobFiles(const string &path, ClientContext &context, const FileGlobInput &input = FileGlobOptions::DISALLOW_EMPTY); //! registers a sub-file system to handle certain file name prefixes, e.g. http:// etc. DUCKDB_API virtual void RegisterSubSystem(unique_ptr sub_fs); DUCKDB_API virtual void RegisterSubSystem(FileCompressionType compression_type, unique_ptr fs); //! Unregister a sub-filesystem by name DUCKDB_API virtual void UnregisterSubSystem(const string &name); // !Extract a sub-filesystem by name, with ownership transfered, return nullptr if not registered or the subsystem // has been disabled. DUCKDB_API virtual unique_ptr ExtractSubSystem(const string &name); //! List registered sub-filesystems, including builtin ones DUCKDB_API virtual vector ListSubSystems(); //! Whether or not a sub-system can handle a specific file path DUCKDB_API virtual bool CanHandleFile(const string &fpath); //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location DUCKDB_API virtual void Seek(FileHandle &handle, idx_t location); //! Reset a file to the beginning (equivalent to Seek(handle, 0) for simple files) DUCKDB_API virtual void Reset(FileHandle &handle); DUCKDB_API virtual idx_t SeekPosition(FileHandle &handle); //! If FS was manually set by the user DUCKDB_API virtual bool IsManuallySet(); //! Whether or not we can seek into the file DUCKDB_API virtual bool CanSeek(); //! Whether or not the FS handles plain files on disk. This is relevant for certain optimizations, as random reads //! in a file on-disk are much cheaper than e.g. random reads in a file over the network DUCKDB_API virtual bool OnDiskFile(FileHandle &handle); DUCKDB_API virtual unique_ptr OpenCompressedFile(QueryContext context, unique_ptr handle, bool write); //! Create a LocalFileSystem. DUCKDB_API static unique_ptr CreateLocal(); //! Return the name of the filesytem. Used for forming diagnosis messages. DUCKDB_API virtual std::string GetName() const = 0; //! Whether or not a file is remote or local, based only on file path DUCKDB_API static bool IsRemoteFile(const string &path); DUCKDB_API static bool IsRemoteFile(const string &path, string &extension); DUCKDB_API virtual void SetDisabledFileSystems(const vector &names); DUCKDB_API virtual bool SubSystemIsDisabled(const string &name); DUCKDB_API static bool IsDirectory(const OpenFileInfo &info); protected: DUCKDB_API virtual unique_ptr OpenFileExtended(const OpenFileInfo &path, FileOpenFlags flags, optional_ptr opener); DUCKDB_API virtual bool SupportsOpenFileExtended() const; DUCKDB_API virtual bool ListFilesExtended(const string &directory, const std::function &callback, optional_ptr opener); DUCKDB_API virtual bool SupportsListFilesExtended() const; public: template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; } // namespace duckdb namespace duckdb { #define FILE_BUFFER_SIZE 4096 class BufferedFileWriter : public WriteStream { public: static constexpr FileOpenFlags DEFAULT_OPEN_FLAGS = FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE; //! Serializes to a buffer allocated by the serializer, will expand when //! writing past the initial threshold DUCKDB_API BufferedFileWriter(FileSystem &fs, const string &path, FileOpenFlags open_flags = DEFAULT_OPEN_FLAGS); FileSystem &fs; string path; unsafe_unique_array data; idx_t offset; idx_t total_written; unique_ptr handle; public: DUCKDB_API void WriteData(const_data_ptr_t buffer, idx_t write_size) override; //! Flush all changes to the file and then close the file DUCKDB_API void Close(); //! Flush all changes and fsync the file to disk DUCKDB_API void Sync(); //! Flush the buffer to the file (without sync) DUCKDB_API void Flush(); //! Returns the current size of the file DUCKDB_API idx_t GetFileSize(); //! Truncate the size to a previous size (given that size <= GetFileSize()) DUCKDB_API void Truncate(idx_t size); DUCKDB_API idx_t GetTotalWritten() const; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/udf_function.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/scalar_function.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/vector_operations/binary_executor.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/vector.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/bitset.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::bitset; } //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/vector_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class VectorType : uint8_t { FLAT_VECTOR, // Flat vectors represent a standard uncompressed vector FSST_VECTOR, // Contains string data compressed with FSST CONSTANT_VECTOR, // Constant vector represents a single constant DICTIONARY_VECTOR, // Dictionary vector represents a selection vector on top of another vector SEQUENCE_VECTOR // Sequence vector represents a sequence with a start point and an increment }; string VectorTypeToString(VectorType type); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/selection_vector.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/allocator.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Allocator; class AttachedDatabase; class ClientContext; class DatabaseInstance; class ExecutionContext; class ThreadContext; struct AllocatorDebugInfo; enum class AllocatorFreeType { REQUIRES_FREE, DOES_NOT_REQUIRE_FREE }; struct PrivateAllocatorData { PrivateAllocatorData(); virtual ~PrivateAllocatorData(); AllocatorFreeType free_type = AllocatorFreeType::REQUIRES_FREE; unique_ptr debug_info; template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; typedef data_ptr_t (*allocate_function_ptr_t)(PrivateAllocatorData *private_data, idx_t size); typedef void (*free_function_ptr_t)(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size); typedef data_ptr_t (*reallocate_function_ptr_t)(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t old_size, idx_t size); class AllocatedData { public: DUCKDB_API AllocatedData(); DUCKDB_API AllocatedData(Allocator &allocator, data_ptr_t pointer, idx_t allocated_size); DUCKDB_API ~AllocatedData(); // disable copy constructors AllocatedData(const AllocatedData &other) = delete; AllocatedData &operator=(const AllocatedData &) = delete; //! enable move constructors DUCKDB_API AllocatedData(AllocatedData &&other) noexcept; DUCKDB_API AllocatedData &operator=(AllocatedData &&) noexcept; data_ptr_t get() { // NOLINT: matching std style return pointer; } operator bool() const { // NOLINT: missing explicit return pointer != nullptr; } const_data_ptr_t get() const { // NOLINT: matching std style return pointer; } idx_t GetSize() const { return allocated_size; } bool IsSet() { return pointer; } optional_ptr GetAllocator() const { return allocator; } void Reset(); private: optional_ptr allocator; data_ptr_t pointer; idx_t allocated_size; }; class Allocator { // 281TB ought to be enough for anybody static constexpr const idx_t MAXIMUM_ALLOC_SIZE = 281474976710656ULL; public: DUCKDB_API Allocator(); DUCKDB_API Allocator(allocate_function_ptr_t allocate_function_p, free_function_ptr_t free_function_p, reallocate_function_ptr_t reallocate_function_p, unique_ptr private_data); Allocator &operator=(Allocator &&allocator) noexcept = delete; DUCKDB_API ~Allocator(); DUCKDB_API data_ptr_t AllocateData(idx_t size); DUCKDB_API void FreeData(data_ptr_t pointer, idx_t size); DUCKDB_API data_ptr_t ReallocateData(data_ptr_t pointer, idx_t old_size, idx_t new_size); AllocatedData Allocate(idx_t size) { return AllocatedData(*this, AllocateData(size), size); } static data_ptr_t DefaultAllocate(PrivateAllocatorData *private_data, idx_t size); static void DefaultFree(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size); static data_ptr_t DefaultReallocate(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t old_size, idx_t size); static Allocator &Get(ClientContext &context); static Allocator &Get(DatabaseInstance &db); static Allocator &Get(AttachedDatabase &db); PrivateAllocatorData *GetPrivateData() { return private_data.get(); } DUCKDB_API static Allocator &DefaultAllocator(); DUCKDB_API static shared_ptr &DefaultAllocatorReference(); static bool SupportsFlush(); static optional_idx DecayDelay(); static void ThreadFlush(bool allocator_background_threads, idx_t threshold, idx_t thread_count); static void ThreadIdle(); static void FlushAll(); static void SetBackgroundThreads(bool enable); private: allocate_function_ptr_t allocate_function; free_function_ptr_t free_function; reallocate_function_ptr_t reallocate_function; unique_ptr private_data; }; template T *AllocateArray(idx_t size) { return (T *)Allocator::DefaultAllocator().AllocateData(size * sizeof(T)); } template void DeleteArray(T *ptr, idx_t size) { Allocator::DefaultAllocator().FreeData(data_ptr_cast(ptr), size * sizeof(T)); } template T *AllocateObject(ARGS &&... args) { auto data = Allocator::DefaultAllocator().AllocateData(sizeof(T)); return new (data) T(std::forward(args)...); } template void DestroyObject(T *ptr) { ptr->~T(); Allocator::DefaultAllocator().FreeData(data_ptr_cast(ptr), sizeof(T)); } //! The BufferAllocator is a wrapper around the global allocator class that sends any allocations made through the //! buffer manager. This makes the buffer manager aware of the memory usage, allowing it to potentially free //! other blocks to make space in memory. //! Note that there is a cost to doing so (several atomic operations will be performed on allocation/free). //! As such this class should be used primarily for larger allocations. struct BufferAllocator { DUCKDB_API static Allocator &Get(ClientContext &context); DUCKDB_API static Allocator &Get(DatabaseInstance &db); DUCKDB_API static Allocator &Get(AttachedDatabase &db); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/vector_size.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! The default standard vector size #define DEFAULT_STANDARD_VECTOR_SIZE 2048U //! The vector size used in the execution engine #ifndef STANDARD_VECTOR_SIZE #define STANDARD_VECTOR_SIZE DEFAULT_STANDARD_VECTOR_SIZE #endif #if (STANDARD_VECTOR_SIZE & (STANDARD_VECTOR_SIZE - 1) != 0) #error The vector size must be a power of two #endif } // namespace duckdb namespace duckdb { class VectorBuffer; struct SelectionData { DUCKDB_API explicit SelectionData(idx_t count); AllocatedData owned_data; }; struct SelectionVector { SelectionVector() : sel_vector(nullptr) { } explicit SelectionVector(sel_t *sel) { Initialize(sel); } explicit SelectionVector(idx_t count) { Initialize(count); } SelectionVector(idx_t start, idx_t count) { Initialize(MaxValue(count, STANDARD_VECTOR_SIZE)); for (idx_t i = 0; i < count; i++) { set_index(i, start + i); } } SelectionVector(const SelectionVector &sel_vector) { Initialize(sel_vector); } explicit SelectionVector(buffer_ptr data) { Initialize(std::move(data)); } SelectionVector &operator=(SelectionVector &&other) noexcept { sel_vector = other.sel_vector; other.sel_vector = nullptr; selection_data = std::move(other.selection_data); return *this; } public: static idx_t Inverted(const SelectionVector &src, SelectionVector &dst, idx_t source_size, idx_t count) { idx_t src_idx = 0; idx_t dst_idx = 0; for (idx_t i = 0; i < count; i++) { if (src_idx < source_size && src.get_index(src_idx) == i) { src_idx++; // This index is selected by 'src', skip it in 'dst' continue; } // This index does not exist in 'src', add it to the selection of 'dst' dst.set_index(dst_idx++, i); } return dst_idx; } void Initialize(sel_t *sel) { selection_data.reset(); sel_vector = sel; } void Initialize(idx_t count = STANDARD_VECTOR_SIZE) { selection_data = make_shared_ptr(count); sel_vector = reinterpret_cast(selection_data->owned_data.get()); } void Initialize(buffer_ptr data) { selection_data = std::move(data); sel_vector = reinterpret_cast(selection_data->owned_data.get()); } void Initialize(const SelectionVector &other) { selection_data = other.selection_data; sel_vector = other.sel_vector; } inline void set_index(idx_t idx, idx_t loc) { // NOLINT: allow casing for legacy reasons sel_vector[idx] = UnsafeNumericCast(loc); } inline void swap(idx_t i, idx_t j) { // NOLINT: allow casing for legacy reasons sel_t tmp = sel_vector[i]; sel_vector[i] = sel_vector[j]; sel_vector[j] = tmp; } inline idx_t get_index(idx_t idx) const { // NOLINT: allow casing for legacy reasons return sel_vector ? get_index_unsafe(idx) : idx; } inline idx_t get_index_unsafe(idx_t idx) const { // NOLINT: allow casing for legacy reasons return sel_vector[idx]; } sel_t *data() { // NOLINT: allow casing for legacy reasons return sel_vector; } const sel_t *data() const { // NOLINT: allow casing for legacy reasons return sel_vector; } buffer_ptr sel_data() { // NOLINT: allow casing for legacy reasons return selection_data; } buffer_ptr Slice(const SelectionVector &sel, idx_t count) const; string ToString(idx_t count = 0) const; void Print(idx_t count = 0) const; inline const sel_t &operator[](idx_t index) const { return sel_vector[index]; } inline sel_t &operator[](idx_t index) { return sel_vector[index]; } inline bool IsSet() const { return sel_vector; } void Verify(idx_t count, idx_t vector_size) const; void Sort(idx_t count); private: sel_t *sel_vector; buffer_ptr selection_data; }; class OptionalSelection { public: explicit OptionalSelection(SelectionVector *sel_p) { Initialize(sel_p); } void Initialize(SelectionVector *sel_p) { sel = sel_p; if (sel) { vec.Initialize(sel->data()); sel = &vec; } } inline operator SelectionVector *() { // NOLINT: allow implicit conversion to SelectionVector return sel; } inline void Append(idx_t &count, const idx_t idx) { if (sel) { sel->set_index(count, idx); } ++count; } inline void Advance(idx_t completed) { if (sel) { sel->Initialize(sel->data() + completed); } } private: SelectionVector *sel; SelectionVector vec; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/validity_mask.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/to_string.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { using std::to_string; } namespace duckdb { struct SelectionVector; struct ValidityMask; template struct TemplatedValidityData { static constexpr const idx_t BITS_PER_VALUE = sizeof(V) * 8; static constexpr const V MAX_ENTRY = V(~V(0)); public: inline explicit TemplatedValidityData(idx_t count) { auto entry_count = EntryCount(count); owned_data = make_unsafe_uniq_array_uninitialized(entry_count); for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { owned_data[entry_idx] = MAX_ENTRY; } } inline TemplatedValidityData(const V *validity_mask, idx_t count) { D_ASSERT(validity_mask); auto entry_count = EntryCount(count); owned_data = make_unsafe_uniq_array_uninitialized(entry_count); for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { owned_data[entry_idx] = validity_mask[entry_idx]; } } unsafe_unique_array owned_data; public: static inline idx_t EntryCount(idx_t count) { return (count + (BITS_PER_VALUE - 1)) / BITS_PER_VALUE; } }; using validity_t = uint64_t; struct ValidityData : TemplatedValidityData { public: DUCKDB_API explicit ValidityData(idx_t count); DUCKDB_API ValidityData(const ValidityMask &original, idx_t count); }; //! Type used for validity masks template struct TemplatedValidityMask { using ValidityBuffer = TemplatedValidityData; public: static constexpr const idx_t BITS_PER_VALUE = ValidityBuffer::BITS_PER_VALUE; static constexpr const idx_t STANDARD_ENTRY_COUNT = (STANDARD_VECTOR_SIZE + (BITS_PER_VALUE - 1)) / BITS_PER_VALUE; static constexpr const idx_t STANDARD_MASK_SIZE = STANDARD_ENTRY_COUNT * sizeof(V); public: inline TemplatedValidityMask() : validity_mask(nullptr), capacity(STANDARD_VECTOR_SIZE) { } inline explicit TemplatedValidityMask(idx_t target_count) : validity_mask(nullptr), capacity(target_count) { } inline explicit TemplatedValidityMask(V *ptr, idx_t capacity) : validity_mask(ptr), capacity(capacity) { } inline TemplatedValidityMask(const TemplatedValidityMask &original, idx_t count) { Copy(original, count); } static inline idx_t ValidityMaskSize(idx_t count = STANDARD_VECTOR_SIZE) { return ValidityBuffer::EntryCount(count) * sizeof(V); } inline bool AllValid() const { return !validity_mask; } inline bool CheckAllValid(idx_t count) const { return CountValid(count) == count; } inline bool CheckAllInvalid(idx_t count) const { return CountValid(count) == 0; } inline bool CheckAllValid(idx_t to, idx_t from) const { if (AllValid()) { return true; } for (idx_t i = from; i < to; i++) { if (!RowIsValid(i)) { return false; } } return true; } idx_t CountValid(const idx_t count) const { if (AllValid() || count == 0) { return count; } idx_t valid = 0; const auto entry_count = EntryCount(count); for (idx_t entry_idx = 0; entry_idx < entry_count;) { auto entry = GetValidityEntry(entry_idx++); // Handle ragged end (if not exactly multiple of BITS_PER_VALUE) if (entry_idx == entry_count && count % BITS_PER_VALUE != 0) { const auto shift = BITS_PER_VALUE - (count % BITS_PER_VALUE); const auto mask = ValidityBuffer::MAX_ENTRY >> shift; entry &= mask; } else if (AllValid(entry)) { // Handle all set valid += BITS_PER_VALUE; continue; } // Count partial entry (Kernighan's algorithm) while (entry) { entry &= (entry - 1); ++valid; } } return valid; } inline V *GetData() const { return validity_mask; } inline void Reset(idx_t target_count_p = STANDARD_VECTOR_SIZE) { validity_mask = nullptr; validity_data.reset(); capacity = target_count_p; } static inline idx_t EntryCount(idx_t count) { return ValidityBuffer::EntryCount(count); } inline V GetValidityEntry(idx_t entry_idx) const { if (!validity_mask) { return ValidityBuffer::MAX_ENTRY; } return GetValidityEntryUnsafe(entry_idx); } inline V &GetValidityEntryUnsafe(idx_t entry_idx) const { return validity_mask[entry_idx]; } static inline bool AllValid(V entry) { // Check if all the tuples that are covered by this entry (usually 64) are valid return entry == ValidityBuffer::MAX_ENTRY; } static inline bool NoneValid(V entry) { return entry == 0; } static inline bool RowIsValid(const V &entry, const idx_t &idx_in_entry) { return entry & (V(1) << V(idx_in_entry)); } static inline void GetEntryIndex(idx_t row_idx, idx_t &entry_idx, idx_t &idx_in_entry) { entry_idx = row_idx / BITS_PER_VALUE; idx_in_entry = row_idx % BITS_PER_VALUE; } //! Get an entry that has first-n bits set as valid and rest set as invalid static inline V EntryWithValidBits(idx_t n) { if (n == 0) { return V(0); } return ValidityBuffer::MAX_ENTRY >> (BITS_PER_VALUE - n); } static inline idx_t SizeInBytes(idx_t n) { return (n + BITS_PER_VALUE - 1) / BITS_PER_VALUE; } //! RowIsValidUnsafe should only be used if AllValid() is false: it achieves the same as RowIsValid but skips a //! not-null check inline bool RowIsValidUnsafe(idx_t row_idx) const { D_ASSERT(validity_mask); idx_t entry_idx, idx_in_entry; GetEntryIndex(row_idx, entry_idx, idx_in_entry); auto entry = GetValidityEntryUnsafe(entry_idx); return RowIsValid(entry, idx_in_entry); } //! Returns true if a row is valid (i.e. not null), false otherwise inline bool RowIsValid(idx_t row_idx) const { #ifdef DEBUG if (row_idx >= capacity) { throw InternalException("ValidityMask::RowIsValid - row_idx %d is out-of-range for mask with capacity %llu", row_idx, capacity); } #endif if (!validity_mask) { return true; } return RowIsValidUnsafe(row_idx); } //! Same as SetValid, but skips a null check on validity_mask inline void SetValidUnsafe(idx_t row_idx) { D_ASSERT(validity_mask); idx_t entry_idx, idx_in_entry; GetEntryIndex(row_idx, entry_idx, idx_in_entry); validity_mask[entry_idx] |= (V(1) << V(idx_in_entry)); } //! Marks the entry at the specified row index as valid (i.e. not-null) inline void SetValid(idx_t row_idx) { #ifdef DEBUG if (row_idx >= capacity) { throw InternalException("ValidityMask::SetValid - row_idx %d is out-of-range for mask with capacity %llu", row_idx, capacity); } #endif if (!validity_mask) { // if AllValid() we don't need to do anything // the row is already valid return; } SetValidUnsafe(row_idx); } //! Marks the bit at the specified entry as invalid (i.e. null) inline void SetInvalidUnsafe(idx_t entry_idx, idx_t idx_in_entry) { D_ASSERT(validity_mask); validity_mask[entry_idx] &= ~(V(1) << V(idx_in_entry)); } //! Marks the bit at the specified row index as invalid (i.e. null) inline void SetInvalidUnsafe(idx_t row_idx) { idx_t entry_idx, idx_in_entry; GetEntryIndex(row_idx, entry_idx, idx_in_entry); SetInvalidUnsafe(entry_idx, idx_in_entry); } //! Marks the entry at the specified row index as invalid (i.e. null) inline void SetInvalid(idx_t row_idx) { #ifdef DEBUG if (row_idx >= capacity) { throw InternalException("ValidityMask::SetInvalid - row_idx %d is out-of-range for mask with capacity %llu", row_idx, capacity); } #endif if (!validity_mask) { Initialize(capacity); } SetInvalidUnsafe(row_idx); } //! Mark the entry at the specified index as either valid or invalid (non-null or null) inline void Set(idx_t row_idx, bool valid) { if (valid) { SetValid(row_idx); } else { SetInvalid(row_idx); } } //! Ensure the validity mask is writable, allocating space if it is not initialized inline void EnsureWritable() { if (!validity_mask) { Initialize(); } } //! Marks a range of entries in the validity mask as invalid (null) //! This is useful for initialising large masks in parallel. inline void SetRangeInvalid(const idx_t count, const idx_t begin_entry, const idx_t end_entry) { EnsureWritable(); if (count == 0) { return; } const auto last_entry_index = ValidityBuffer::EntryCount(count) - 1; for (idx_t i = begin_entry; i < MinValue(last_entry_index, end_entry); i++) { validity_mask[i] = 0; } if (end_entry <= last_entry_index) { return; } const auto last_entry_bits = count % BITS_PER_VALUE; validity_mask[last_entry_index] = (last_entry_bits == 0) ? 0 : static_cast(ValidityBuffer::MAX_ENTRY << (last_entry_bits)); } //! Marks exactly "count" bits in the validity mask as invalid (null) inline void SetAllInvalid(idx_t count) { SetRangeInvalid(count, 0, EntryCount(count)); } //! Marks exactly "count" bits in the validity mask as valid (not null) inline void SetAllValid(idx_t count) { EnsureWritable(); if (count == 0) { return; } auto last_entry_index = ValidityBuffer::EntryCount(count) - 1; for (idx_t i = 0; i < last_entry_index; i++) { validity_mask[i] = ValidityBuffer::MAX_ENTRY; } auto last_entry_bits = count % BITS_PER_VALUE; validity_mask[last_entry_index] |= (last_entry_bits == 0) ? ValidityBuffer::MAX_ENTRY : ~static_cast(ValidityBuffer::MAX_ENTRY << (last_entry_bits)); } inline bool IsMaskSet() const { if (validity_mask) { return true; } return false; } public: inline void Initialize(validity_t *validity, idx_t new_capacity) { validity_data.reset(); validity_mask = validity; capacity = new_capacity; } inline void Initialize(const TemplatedValidityMask &other) { validity_mask = other.validity_mask; validity_data = other.validity_data; capacity = other.capacity; } inline void Initialize(idx_t count) { capacity = count; validity_data = make_buffer(count); validity_mask = validity_data->owned_data.get(); } inline void Initialize() { Initialize(capacity); } inline void Copy(const TemplatedValidityMask &other, idx_t count) { capacity = count; if (other.AllValid()) { validity_data = nullptr; validity_mask = nullptr; } else { validity_data = make_buffer(other.validity_mask, count); validity_mask = validity_data->owned_data.get(); } } protected: V *validity_mask; buffer_ptr validity_data; idx_t capacity; }; struct ValidityMask : public TemplatedValidityMask { public: inline ValidityMask() : TemplatedValidityMask(nullptr, STANDARD_VECTOR_SIZE) { } inline explicit ValidityMask(idx_t capacity) : TemplatedValidityMask(capacity) { } inline explicit ValidityMask(validity_t *ptr, idx_t capacity) : TemplatedValidityMask(ptr, capacity) { } inline ValidityMask(const ValidityMask &original, idx_t count) : TemplatedValidityMask(original, count) { } public: DUCKDB_API void Resize(idx_t new_size); DUCKDB_API idx_t Capacity() const; DUCKDB_API void SliceInPlace(const ValidityMask &other, idx_t target_offset, idx_t source_offset, idx_t count); DUCKDB_API void Slice(const ValidityMask &other, idx_t source_offset, idx_t count); DUCKDB_API void CopySel(const ValidityMask &other, const SelectionVector &sel, idx_t source_offset, idx_t target_offset, idx_t count); DUCKDB_API void Combine(const ValidityMask &other, idx_t count); DUCKDB_API string ToString(idx_t count) const; DUCKDB_API string ToString() const; DUCKDB_API static bool IsAligned(idx_t count); void Write(WriteStream &writer, idx_t count); void Read(ReadStream &reader, idx_t count); }; //===--------------------------------------------------------------------===// // ValidityArray //===--------------------------------------------------------------------===// struct ValidityArray { inline ValidityArray() { } inline bool AllValid() const { return !validity_mask; } inline void Initialize(idx_t count, bool initial = true) { capacity = count; validity_data = make_unsafe_uniq_array(count); validity_mask = validity_data.get(); memset(validity_mask, initial, sizeof(bool) * count); } inline void InitializeEmpty(idx_t count) { capacity = count; } idx_t Capacity() const { return capacity; } //! RowIsValidUnsafe should only be used if AllValid() is false: it achieves the same as RowIsValid but skips a //! not-null check inline bool RowIsValidUnsafe(idx_t row_idx) const { D_ASSERT(validity_mask); return validity_mask[row_idx]; } //! Returns true if a row is valid (i.e. not null), false otherwise inline bool RowIsValid(idx_t row_idx) const { #ifdef DEBUG if (row_idx >= capacity) { throw InternalException("ValidityData::RowIsValid - row_idx %d is out-of-range for mask with capacity %llu", row_idx, capacity); } #endif if (!validity_mask) { return true; } return RowIsValidUnsafe(row_idx); } //! Same as SetValid, but skips a null check on validity_mask inline void SetValidUnsafe(idx_t row_idx) { D_ASSERT(validity_mask); validity_mask[row_idx] = true; } //! Marks the entry at the specified row index as valid (i.e. not-null) inline void SetValid(idx_t row_idx) { #ifdef DEBUG if (row_idx >= capacity) { throw InternalException("ValidityData::SetValid - row_idx %d is out-of-range for mask with capacity %llu", row_idx, capacity); } #endif if (!validity_mask) { // if AllValid() we don't need to do anything // the row is already valid return; } SetValidUnsafe(row_idx); } inline void Pack(ValidityMask &mask, const idx_t count) const { if (AllValid()) { mask.Reset(count); return; } mask.Initialize(count); const auto entire_entries = count / ValidityMask::BITS_PER_VALUE; const auto ragged = count % ValidityMask::BITS_PER_VALUE; auto bits = mask.GetData(); idx_t row_idx = 0; for (idx_t i = 0; i < entire_entries; ++i) { validity_t entry = 0; for (idx_t j = 0; j < ValidityMask::BITS_PER_VALUE; ++j) { if (RowIsValidUnsafe(row_idx++)) { entry |= validity_t(1) << j; } } *bits++ = entry; } if (ragged) { validity_t entry = 0; for (idx_t j = 0; j < ragged; ++j) { if (RowIsValidUnsafe(row_idx++)) { entry |= validity_t(1) << j; } } *bits++ = entry; } } private: bool *validity_mask = nullptr; unsafe_unique_array validity_data; idx_t capacity = 0; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/vector_buffer.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/string_heap.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/arena_allocator.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct ArenaChunk { ArenaChunk(Allocator &allocator, idx_t size); ~ArenaChunk(); AllocatedData data; idx_t current_position; idx_t maximum_size; unsafe_unique_ptr next; ArenaChunk *prev; }; class ArenaAllocator { public: static constexpr const idx_t ARENA_ALLOCATOR_INITIAL_CAPACITY = 2048; static constexpr const idx_t ARENA_ALLOCATOR_MAX_CAPACITY = 1ULL << 24ULL; // 16MB public: DUCKDB_API explicit ArenaAllocator(Allocator &allocator, idx_t initial_capacity = ARENA_ALLOCATOR_INITIAL_CAPACITY); DUCKDB_API ~ArenaAllocator(); data_ptr_t Allocate(idx_t len) { D_ASSERT(!head || head->current_position <= head->maximum_size); if (!head || head->current_position + len > head->maximum_size) { AllocateNewBlock(len); } D_ASSERT(head->current_position + len <= head->maximum_size); auto result = head->data.get() + head->current_position; head->current_position += len; return result; } DUCKDB_API data_ptr_t Reallocate(data_ptr_t pointer, idx_t old_size, idx_t size); DUCKDB_API data_ptr_t AllocateAligned(idx_t size); DUCKDB_API data_ptr_t ReallocateAligned(data_ptr_t pointer, idx_t old_size, idx_t size); //! Increment the internal cursor (if required) so the next allocation is guaranteed to be aligned to 8 bytes DUCKDB_API void AlignNext(); //! This shrinks the LAST allocation that was made using the allocator //! Note that we can ONLY safely call this method if Allocate has been called previously with a size >= shrink_size DUCKDB_API void ShrinkHead(idx_t shrink_size) const { D_ASSERT(head && head->current_position >= shrink_size); head->current_position -= shrink_size; } //! Resets the current head and destroys all previous arena chunks DUCKDB_API void Reset(); DUCKDB_API void Destroy(); DUCKDB_API void Move(ArenaAllocator &allocator); DUCKDB_API ArenaChunk *GetHead(); DUCKDB_API ArenaChunk *GetTail(); DUCKDB_API bool IsEmpty() const; //! Get the total *used* size (not cached) DUCKDB_API idx_t SizeInBytes() const; //! Get the currently allocated size in bytes (cached, read from "allocated_size") DUCKDB_API idx_t AllocationSize() const; //! Returns an "Allocator" wrapper for this arena allocator Allocator &GetAllocator() { return arena_allocator; } template T *Make(ARGS &&... args) { auto mem = AllocateAligned(sizeof(T)); return new (mem) T(std::forward(args)...); } String MakeString(const char *data, const size_t len) { data_ptr_t mem = nullptr; D_ASSERT(len < NumericLimits::Maximum()); const auto size = static_cast(len); if (!String::CanBeInlined(size)) { // If the string can't be inlined, we allocate it on the arena allocator mem = AllocateAligned(sizeof(char) * size + 1); // +1 for null terminator memcpy(mem, data, size); mem[size] = '\0'; } return String::Reference(mem ? reinterpret_cast(mem) : data, size); } String MakeString(const std::string &data) { return MakeString(data.c_str(), data.size()); } private: void AllocateNewBlock(idx_t min_size); private: //! Internal allocator that is used by the arena allocator Allocator &allocator; idx_t initial_capacity; unsafe_unique_ptr head; ArenaChunk *tail; //! An allocator wrapper using this arena allocator Allocator arena_allocator; //! The total allocated size idx_t allocated_size = 0; }; } // namespace duckdb namespace duckdb { //! A string heap is the owner of a set of strings, strings can be inserted into //! it On every insert, a pointer to the inserted string is returned The //! returned pointer will remain valid until the StringHeap is destroyed class StringHeap { public: DUCKDB_API explicit StringHeap(Allocator &allocator = Allocator::DefaultAllocator()); DUCKDB_API void Destroy(); DUCKDB_API void Move(StringHeap &other); //! Add a string to the string heap, returns a pointer to the string DUCKDB_API string_t AddString(const char *data, idx_t len); //! Add a string to the string heap, returns a pointer to the string DUCKDB_API string_t AddString(const char *data); //! Add a string to the string heap, returns a pointer to the string DUCKDB_API string_t AddString(const string &data); //! Add a string to the string heap, returns a pointer to the string DUCKDB_API string_t AddString(const string_t &data); //! Add a blob to the string heap; blobs can be non-valid UTF8 DUCKDB_API string_t AddBlob(const string_t &data); //! Add a blob to the string heap; blobs can be non-valid UTF8 DUCKDB_API string_t AddBlob(const char *data, idx_t len); //! Allocates space for an empty string of size "len" on the heap DUCKDB_API string_t EmptyString(idx_t len); //! Size of strings DUCKDB_API idx_t SizeInBytes() const; //! Total allocation size (cached) DUCKDB_API idx_t AllocationSize() const; DUCKDB_API ArenaAllocator &GetAllocator() { return allocator; } private: ArenaAllocator allocator; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/buffer/buffer_handle.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/storage_info.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/encryption_state.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class EncryptionTypes { public: enum CipherType : uint8_t { INVALID = 0, GCM = 1, CTR = 2, CBC = 3 }; enum KeyDerivationFunction : uint8_t { DEFAULT = 0, SHA256 = 1, PBKDF2 = 2 }; enum Mode { ENCRYPT, DECRYPT }; static string CipherToString(CipherType cipher_p); static CipherType StringToCipher(const string &encryption_cipher_p); static string KDFToString(KeyDerivationFunction kdf_p); static KeyDerivationFunction StringToKDF(const string &key_derivation_function_p); }; class EncryptionState { public: DUCKDB_API explicit EncryptionState(EncryptionTypes::CipherType cipher_p, idx_t key_len); DUCKDB_API virtual ~EncryptionState(); public: DUCKDB_API virtual void InitializeEncryption(const_data_ptr_t iv, idx_t iv_len, const_data_ptr_t key, idx_t key_len, const_data_ptr_t aad = nullptr, idx_t aad_len = 0); DUCKDB_API virtual void InitializeDecryption(const_data_ptr_t iv, idx_t iv_len, const_data_ptr_t key, idx_t key_len, const_data_ptr_t aad = nullptr, idx_t aad_len = 0); DUCKDB_API virtual size_t Process(const_data_ptr_t in, idx_t in_len, data_ptr_t out, idx_t out_len); DUCKDB_API virtual size_t Finalize(data_ptr_t out, idx_t out_len, data_ptr_t tag, idx_t tag_len); DUCKDB_API virtual void GenerateRandomData(data_ptr_t data, idx_t len); protected: EncryptionTypes::CipherType cipher; idx_t key_len; }; class EncryptionUtil { public: DUCKDB_API explicit EncryptionUtil() {}; public: virtual shared_ptr CreateEncryptionState(EncryptionTypes::CipherType cipher_p, idx_t key_len = 0) const { return make_shared_ptr(cipher_p, key_len); } virtual ~EncryptionUtil() { } //! Whether the EncryptionUtil supports encryption (some may only support decryption) DUCKDB_API virtual bool SupportsEncryption() { return true; } }; } // namespace duckdb namespace duckdb { struct FileHandle; class QueryContext; //! The standard row group size #define DEFAULT_ROW_GROUP_SIZE 122880ULL //! The definition of an invalid block #define INVALID_BLOCK (-1) //! The maximum block id is 2^62 #define MAXIMUM_BLOCK 4611686018427388000LL //! The default block allocation size. #define DEFAULT_BLOCK_ALLOC_SIZE 262144ULL //! The default block header size. #define DEFAULT_BLOCK_HEADER_STORAGE_SIZE 8ULL //! The default block header size. #define DEFAULT_ENCRYPTION_BLOCK_HEADER_SIZE 40ULL //! The configurable block allocation size. #ifndef DUCKDB_BLOCK_HEADER_STORAGE_SIZE #define DUCKDB_BLOCK_HEADER_STORAGE_SIZE DEFAULT_BLOCK_HEADER_STORAGE_SIZE #define DEFAULT_ENCRYPTED_BUFFER_HEADER_SIZE 32ULL #endif using block_id_t = int64_t; struct Storage { //! The size of a hard disk sector, only really needed for Direct IO constexpr static idx_t SECTOR_SIZE = 4096U; //! The size of the headers. This should be small and written more or less atomically by the hard disk. We default //! to the page size, which is 4KB. (1 << 12) constexpr static idx_t FILE_HEADER_SIZE = 4096U; //! The maximum row group size constexpr static const idx_t MAX_ROW_GROUP_SIZE = 1ULL << 30ULL; //! The minimum block allocation size. This is the minimum size we test in our nightly tests. constexpr static idx_t MIN_BLOCK_ALLOC_SIZE = 16384ULL; //! The maximum block allocation size. This is the maximum size currently supported by duckdb. constexpr static idx_t MAX_BLOCK_ALLOC_SIZE = 262144ULL; //! The default block header size for blocks written to storage. constexpr static idx_t DEFAULT_BLOCK_HEADER_SIZE = sizeof(idx_t); //! The default block header size for blocks written to storage. constexpr static idx_t MAX_BLOCK_HEADER_SIZE = 128ULL; //! Block header size for encrypted blocks (64 bytes) constexpr static idx_t ENCRYPTED_BLOCK_HEADER_SIZE = 64ULL; //! The default block size. constexpr static idx_t DEFAULT_BLOCK_SIZE = DEFAULT_BLOCK_ALLOC_SIZE - DEFAULT_BLOCK_HEADER_SIZE; //! Ensures that a user-provided block allocation size matches all requirements. static void VerifyBlockAllocSize(const idx_t block_alloc_size); static void VerifyBlockHeaderSize(const idx_t block_header_size); }; //! The version number default, lower and upper bounds of the database storage format extern const uint64_t VERSION_NUMBER; extern const uint64_t VERSION_NUMBER_LOWER; extern const uint64_t VERSION_NUMBER_UPPER; string GetDuckDBVersions(const idx_t version_number); optional_idx GetStorageVersion(const char *version_string); string GetStorageVersionName(const idx_t serialization_version, const bool add_suffix); optional_idx GetSerializationVersion(const char *version_string); vector GetSerializationCandidates(); //! The MainHeader is the first header in the storage file. //! It is written only once for a database file. class MainHeader { public: static constexpr idx_t MAX_VERSION_SIZE = 32; static constexpr idx_t MAGIC_BYTE_SIZE = 4; static constexpr idx_t MAGIC_BYTE_OFFSET = Storage::DEFAULT_BLOCK_HEADER_SIZE; static constexpr idx_t FLAG_COUNT = 4; //! Indicates whether database is encrypted or not. static constexpr uint64_t ENCRYPTED_DATABASE_FLAG = 1; //! The encryption key length. static constexpr uint64_t DEFAULT_ENCRYPTION_KEY_LENGTH = 32; //! The magic bytes in front of the file should be "DUCK". static const char MAGIC_BYTES[]; //! The canary should be "DUCKKEY". static const char CANARY[]; //! The (storage) version of the database. uint64_t version_number; //! The set of flags used by the database. uint64_t flags[FLAG_COUNT]; //! The length of the unique database identifier. static constexpr idx_t DB_IDENTIFIER_LEN = 16; //! Optional metadata for encryption, if encryption flag is set. static constexpr idx_t ENCRYPTION_METADATA_LEN = 8; //! The canary is a known plaintext for detecting wrong keys early. static constexpr idx_t CANARY_BYTE_SIZE = 8; //! Nonce, IV (nonce + counter) and tag length static constexpr uint64_t AES_NONCE_LEN = 16; static constexpr uint64_t AES_IV_LEN = 16; static constexpr uint64_t AES_TAG_LEN = 16; static void CheckMagicBytes(QueryContext context, FileHandle &handle); string LibraryGitDesc() { return string(char_ptr_cast(library_git_desc), 0, MAX_VERSION_SIZE); } string LibraryGitHash() { return string(char_ptr_cast(library_git_hash), 0, MAX_VERSION_SIZE); } bool IsEncrypted() const { return flags[0] & MainHeader::ENCRYPTED_DATABASE_FLAG; } void SetEncrypted() { flags[0] |= MainHeader::ENCRYPTED_DATABASE_FLAG; } void SetEncryptionMetadata(data_ptr_t source) { memset(encryption_metadata, 0, ENCRYPTION_METADATA_LEN); memcpy(encryption_metadata, source, ENCRYPTION_METADATA_LEN); } EncryptionTypes::CipherType GetEncryptionCipher() { return static_cast(encryption_metadata[2]); } void SetDBIdentifier(data_ptr_t source) { memset(db_identifier, 0, DB_IDENTIFIER_LEN); memcpy(db_identifier, source, DB_IDENTIFIER_LEN); } void SetEncryptedCanary(data_ptr_t source) { memset(encrypted_canary, 0, CANARY_BYTE_SIZE); memcpy(encrypted_canary, source, CANARY_BYTE_SIZE); } data_ptr_t GetDBIdentifier() { return db_identifier; } static bool CompareDBIdentifiers(const data_ptr_t db_identifier_1, const data_ptr_t db_identifier_2) { for (idx_t i = 0; i < DB_IDENTIFIER_LEN; i++) { if (db_identifier_1[i] != db_identifier_2[i]) { return false; } } return true; } data_ptr_t GetEncryptedCanary() { return encrypted_canary; } void Write(WriteStream &ser); static MainHeader Read(ReadStream &source); private: data_t library_git_desc[MAX_VERSION_SIZE]; data_t library_git_hash[MAX_VERSION_SIZE]; data_t encryption_metadata[ENCRYPTION_METADATA_LEN]; //! The unique database identifier and optional encryption salt. data_t db_identifier[DB_IDENTIFIER_LEN]; data_t encrypted_canary[CANARY_BYTE_SIZE]; }; //! The DatabaseHeader contains information about the current state of the database. Every storage file has two //! DatabaseHeaders. On startup, the DatabaseHeader with the highest iteration count is used as the active header. When //! a checkpoint is performed, the active DatabaseHeader is switched by increasing the iteration count of the //! DatabaseHeader. struct DatabaseHeader { //! The iteration count, increases by 1 every time the storage is checkpointed. uint64_t iteration; //! A pointer to the initial meta block idx_t meta_block; //! A pointer to the block containing the free list idx_t free_list; //! The number of blocks that is in the file as of this database header. If the file is larger than BLOCK_SIZE * //! block_count any blocks appearing AFTER block_count are implicitly part of the free_list. uint64_t block_count; //! The allocation size of blocks in this database file. Defaults to default_block_alloc_size (DBConfig). idx_t block_alloc_size; //! The vector size of the database file idx_t vector_size; //! The serialization compatibility version idx_t serialization_compatibility; void Write(WriteStream &ser); static DatabaseHeader Read(const MainHeader &header, ReadStream &source); }; //! Detect mismatching constant values when compiling #if (DEFAULT_ROW_GROUP_SIZE % STANDARD_VECTOR_SIZE != 0) #error The row group size must be a multiple of the vector size #endif #if (DEFAULT_ROW_GROUP_SIZE < STANDARD_VECTOR_SIZE) #error Row groups must be able to hold at least one vector #endif #if (DEFAULT_BLOCK_ALLOC_SIZE & (DEFAULT_BLOCK_ALLOC_SIZE - 1) != 0) #error The default block allocation size must be a power of two #endif } // namespace duckdb namespace duckdb { class BlockHandle; class FileBuffer; class BufferHandle { public: DUCKDB_API BufferHandle(); DUCKDB_API explicit BufferHandle(shared_ptr handle, optional_ptr node); DUCKDB_API ~BufferHandle(); // disable copy constructors BufferHandle(const BufferHandle &other) = delete; BufferHandle &operator=(const BufferHandle &) = delete; //! enable move constructors DUCKDB_API BufferHandle(BufferHandle &&other) noexcept; DUCKDB_API BufferHandle &operator=(BufferHandle &&) noexcept; public: //! Returns whether or not the BufferHandle is valid. DUCKDB_API bool IsValid() const; //! Returns a pointer to the buffer data. Handle must be valid. inline data_ptr_t Ptr() const { D_ASSERT(IsValid()); return node->buffer; } //! Returns a pointer to the buffer data. Handle must be valid. inline data_ptr_t Ptr() { D_ASSERT(IsValid()); return node->buffer; } //! Gets the underlying file buffer. Handle must be valid. DUCKDB_API FileBuffer &GetFileBuffer(); //! Destroys the buffer handle DUCKDB_API void Destroy(); const shared_ptr &GetBlockHandle() const { return handle; } private: //! The block handle shared_ptr handle; //! The managed buffer node optional_ptr node; }; } // namespace duckdb namespace duckdb { class BufferHandle; class VectorBuffer; class Vector; enum class VectorBufferType : uint8_t { STANDARD_BUFFER, // standard buffer, holds a single array of data DICTIONARY_BUFFER, // dictionary buffer, holds a selection vector VECTOR_CHILD_BUFFER, // vector child buffer: holds another vector STRING_BUFFER, // string buffer, holds a string heap FSST_BUFFER, // fsst compressed string buffer, holds a string heap, fsst symbol table and a string count STRUCT_BUFFER, // struct buffer, holds a ordered mapping from name to child vector LIST_BUFFER, // list buffer, holds a single flatvector child MANAGED_BUFFER, // managed buffer, holds a buffer managed by the buffermanager OPAQUE_BUFFER, // opaque buffer, can be created for example by the parquet reader ARRAY_BUFFER // array buffer, holds a single flatvector child }; enum class VectorAuxiliaryDataType : uint8_t { ARROW_AUXILIARY // Holds Arrow Chunks that this vector depends on }; struct VectorAuxiliaryData { explicit VectorAuxiliaryData(VectorAuxiliaryDataType type_p) : type(type_p) { }; VectorAuxiliaryDataType type; virtual ~VectorAuxiliaryData() { } public: template TARGET &Cast() { if (type != TARGET::TYPE) { throw InternalException("Failed to cast vector auxiliary data to type - type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (type != TARGET::TYPE) { throw InternalException("Failed to cast vector auxiliary data to type - type mismatch"); } return reinterpret_cast(*this); } }; //! The VectorBuffer is a class used by the vector to hold its data class VectorBuffer { public: explicit VectorBuffer(VectorBufferType type) : buffer_type(type) { } explicit VectorBuffer(idx_t data_size) : buffer_type(VectorBufferType::STANDARD_BUFFER) { if (data_size > 0) { data = Allocator::DefaultAllocator().Allocate(data_size); } } explicit VectorBuffer(AllocatedData &&data_p) : buffer_type(VectorBufferType::STANDARD_BUFFER), data(std::move(data_p)) { } virtual ~VectorBuffer() { } VectorBuffer() { } public: data_ptr_t GetData() { return data.get(); } void SetData(AllocatedData &&new_data) { data = std::move(new_data); } VectorAuxiliaryData *GetAuxiliaryData() { return aux_data.get(); } void SetAuxiliaryData(unique_ptr aux_data_p) { aux_data = std::move(aux_data_p); } void MoveAuxiliaryData(VectorBuffer &source_buffer) { SetAuxiliaryData(std::move(source_buffer.aux_data)); } virtual optional_ptr GetAllocator() const { return data.GetAllocator(); } static buffer_ptr CreateStandardVector(PhysicalType type, idx_t capacity = STANDARD_VECTOR_SIZE); static buffer_ptr CreateConstantVector(PhysicalType type); static buffer_ptr CreateConstantVector(const LogicalType &logical_type); static buffer_ptr CreateStandardVector(const LogicalType &logical_type, idx_t capacity = STANDARD_VECTOR_SIZE); inline VectorBufferType GetBufferType() const { return buffer_type; } inline VectorAuxiliaryDataType GetAuxiliaryDataType() const { return aux_data->type; } protected: VectorBufferType buffer_type; unique_ptr aux_data; AllocatedData data; public: template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; //! The DictionaryBuffer holds a selection vector class DictionaryBuffer : public VectorBuffer { public: explicit DictionaryBuffer(const SelectionVector &sel) : VectorBuffer(VectorBufferType::DICTIONARY_BUFFER), sel_vector(sel) { } explicit DictionaryBuffer(buffer_ptr data) : VectorBuffer(VectorBufferType::DICTIONARY_BUFFER), sel_vector(std::move(data)) { } explicit DictionaryBuffer(idx_t count = STANDARD_VECTOR_SIZE) : VectorBuffer(VectorBufferType::DICTIONARY_BUFFER), sel_vector(count) { } public: const SelectionVector &GetSelVector() const { return sel_vector; } SelectionVector &GetSelVector() { return sel_vector; } void SetSelVector(const SelectionVector &vector) { this->sel_vector.Initialize(vector); } void SetDictionarySize(idx_t dict_size) { dictionary_size = dict_size; } optional_idx GetDictionarySize() const { return dictionary_size; } void SetDictionaryId(string id) { dictionary_id = std::move(id); } const string &GetDictionaryId() const { return dictionary_id; } private: SelectionVector sel_vector; optional_idx dictionary_size; //! A unique identifier for the dictionary that can be used to check if two dictionaries are equivalent string dictionary_id; }; class VectorStringBuffer : public VectorBuffer { public: VectorStringBuffer(); explicit VectorStringBuffer(Allocator &allocator); explicit VectorStringBuffer(VectorBufferType type); public: string_t AddString(const char *data, idx_t len) { return heap.AddString(data, len); } string_t AddString(string_t data) { return heap.AddString(data); } string_t AddBlob(string_t data) { return heap.AddBlob(data.GetData(), data.GetSize()); } string_t EmptyString(idx_t len) { return heap.EmptyString(len); } ArenaAllocator &GetStringAllocator() { return heap.GetAllocator(); } //! Allocate a buffer to store up to "len" bytes for a string //! This can be turned into a proper string by using FinalizeBuffer afterwards //! Note that alloc_len only has to be an upper bound, the final string may be smaller data_ptr_t AllocateShrinkableBuffer(idx_t alloc_len) { auto &allocator = heap.GetAllocator(); return allocator.Allocate(alloc_len); } //! Finalize a buffer allocated with AllocateShrinkableBuffer into a string of size str_len //! str_len must be <= alloc_len string_t FinalizeShrinkableBuffer(data_ptr_t buffer, idx_t alloc_len, idx_t str_len) { auto &allocator = heap.GetAllocator(); D_ASSERT(str_len <= alloc_len); D_ASSERT(buffer == allocator.GetHead()->data.get() + allocator.GetHead()->current_position - alloc_len); bool is_not_inlined = str_len > string_t::INLINE_LENGTH; idx_t shrink_count = alloc_len - (str_len * is_not_inlined); allocator.ShrinkHead(shrink_count); return string_t(const_char_ptr_cast(buffer), UnsafeNumericCast(str_len)); } void AddHeapReference(buffer_ptr heap) { references.push_back(std::move(heap)); } private: //! The string heap of this buffer StringHeap heap; //! References to additional vector buffers referenced by this string buffer vector> references; }; class VectorFSSTStringBuffer : public VectorStringBuffer { public: VectorFSSTStringBuffer(); public: void AddDecoder(buffer_ptr &duckdb_fsst_decoder_p, const idx_t string_block_limit) { duckdb_fsst_decoder = duckdb_fsst_decoder_p; decompress_buffer.resize(string_block_limit + 1); } void *GetDecoder() { return duckdb_fsst_decoder.get(); } vector &GetDecompressBuffer() { return decompress_buffer; } void SetCount(idx_t count) { total_string_count = count; } idx_t GetCount() { return total_string_count; } private: buffer_ptr duckdb_fsst_decoder; idx_t total_string_count = 0; vector decompress_buffer; }; class VectorStructBuffer : public VectorBuffer { public: VectorStructBuffer(); explicit VectorStructBuffer(const LogicalType &struct_type, idx_t capacity = STANDARD_VECTOR_SIZE); VectorStructBuffer(Vector &other, const SelectionVector &sel, idx_t count); ~VectorStructBuffer() override; public: const vector> &GetChildren() const { return children; } vector> &GetChildren() { return children; } private: //! child vectors used for nested data vector> children; }; class VectorListBuffer : public VectorBuffer { public: explicit VectorListBuffer(unique_ptr vector, idx_t initial_capacity = STANDARD_VECTOR_SIZE); explicit VectorListBuffer(const LogicalType &list_type, idx_t initial_capacity = STANDARD_VECTOR_SIZE); ~VectorListBuffer() override; public: Vector &GetChild() { return *child; } void Reserve(idx_t to_reserve); void Append(const Vector &to_append, idx_t to_append_size, idx_t source_offset = 0); void Append(const Vector &to_append, const SelectionVector &sel, idx_t to_append_size, idx_t source_offset = 0); void PushBack(const Value &insert); idx_t GetSize() { return size; } idx_t GetCapacity() { return capacity; } void SetCapacity(idx_t new_capacity); void SetSize(idx_t new_size); private: //! child vectors used for nested data unique_ptr child; idx_t capacity = 0; idx_t size = 0; }; class VectorArrayBuffer : public VectorBuffer { public: explicit VectorArrayBuffer(unique_ptr child_vector, idx_t array_size, idx_t initial_capacity); explicit VectorArrayBuffer(const LogicalType &array, idx_t initial = STANDARD_VECTOR_SIZE); ~VectorArrayBuffer() override; public: Vector &GetChild(); idx_t GetArraySize(); idx_t GetChildSize(); private: unique_ptr child; // The size of each array in this buffer idx_t array_size = 0; // How many arrays are currently stored in this buffer // The child vector has size (array_size * size) idx_t size = 0; }; //! The ManagedVectorBuffer holds a buffer handle class ManagedVectorBuffer : public VectorBuffer { public: explicit ManagedVectorBuffer(BufferHandle handle); ~ManagedVectorBuffer() override; private: BufferHandle handle; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/type_util.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/double_na_equal.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { // special double/float class to deal with dictionary encoding and NaN equality struct double_na_equal { double_na_equal() : val(0) { } explicit double_na_equal(const double val_p) : val(val_p) { } // NOLINTNEXTLINE: allow implicit conversion to double operator double() const { return val; } bool operator==(const double &right) const { if (std::isnan(val) && std::isnan(right)) { return true; } return val == right; } bool operator!=(const double &right) const { return !(*this == right); } double val; }; struct float_na_equal { float_na_equal() : val(0) { } explicit float_na_equal(const float val_p) : val(val_p) { } // NOLINTNEXTLINE: allow implicit conversion to float operator float() const { return val; } bool operator==(const float &right) const { if (std::isnan(val) && std::isnan(right)) { return true; } return val == right; } bool operator!=(const float &right) const { return !(*this == right); } float val; }; } // namespace duckdb namespace duckdb { struct bignum_t; //! Returns the PhysicalType for the given type template PhysicalType GetTypeId() { if (std::is_same()) { return PhysicalType::BOOL; } else if (std::is_same()) { return PhysicalType::INT8; } else if (std::is_same()) { return PhysicalType::INT16; } else if (std::is_same()) { return PhysicalType::INT32; } else if (std::is_same()) { return PhysicalType::INT64; } else if (std::is_same()) { return PhysicalType::UINT8; } else if (std::is_same()) { return PhysicalType::UINT16; } else if (std::is_same()) { return PhysicalType::UINT32; } else if (std::is_same()) { return PhysicalType::UINT64; } else if (std::is_same() || std::is_same()) { return PhysicalType::UINT64; } else if (std::is_same()) { return PhysicalType::INT128; } else if (std::is_same()) { return PhysicalType::UINT128; } else if (std::is_same()) { return PhysicalType::INT32; } else if (std::is_same()) { return PhysicalType::INT64; } else if (std::is_same()) { return PhysicalType::INT64; } else if (std::is_same()) { return PhysicalType::INT64; } else if (std::is_same()) { return PhysicalType::INT64; } else if (std::is_same()) { return PhysicalType::INT64; } else if (std::is_same()) { return PhysicalType::INT64; } else if (std::is_same()) { return PhysicalType::INT64; } else if (std::is_same()) { return PhysicalType::INT64; } else if (std::is_same() || std::is_same()) { return PhysicalType::FLOAT; } else if (std::is_same() || std::is_same()) { return PhysicalType::DOUBLE; } else if (std::is_same() || std::is_same() || std::is_same() || std::is_same()) { return PhysicalType::VARCHAR; } else if (std::is_same()) { return PhysicalType::INTERVAL; } else if (std::is_same()) { return PhysicalType::LIST; } else if (std::is_pointer() || std::is_same()) { if (sizeof(uintptr_t) == sizeof(uint32_t)) { return PhysicalType::UINT32; } else if (sizeof(uintptr_t) == sizeof(uint64_t)) { return PhysicalType::UINT64; } else { throw InternalException("Unsupported pointer size in GetTypeId"); } } else { throw InternalException("Unsupported type in GetTypeId"); } } template bool StorageTypeCompatible(PhysicalType type) { if (std::is_same()) { return type == PhysicalType::INT8 || type == PhysicalType::BOOL; } if (std::is_same()) { return type == PhysicalType::UINT8 || type == PhysicalType::BOOL; } return type == GetTypeId(); } template bool TypeIsNumber() { return std::is_integral() || std::is_floating_point() || std::is_same() || std::is_same(); } template bool IsValidType() { return GetTypeId() != PhysicalType::INVALID; } template bool IsIntegerType() { return TypeIsIntegral(GetTypeId()); } } // namespace duckdb namespace duckdb { class VectorCache; class VectorStringBuffer; class VectorStructBuffer; class VectorListBuffer; struct SelCache; struct UnifiedVectorFormat { DUCKDB_API UnifiedVectorFormat(); // disable copy constructors UnifiedVectorFormat(const UnifiedVectorFormat &other) = delete; UnifiedVectorFormat &operator=(const UnifiedVectorFormat &) = delete; //! enable move constructors DUCKDB_API UnifiedVectorFormat(UnifiedVectorFormat &&other) noexcept; DUCKDB_API UnifiedVectorFormat &operator=(UnifiedVectorFormat &&) noexcept; const SelectionVector *sel; data_ptr_t data; ValidityMask validity; SelectionVector owned_sel; PhysicalType physical_type; template void VerifyVectorType() const { #ifdef DUCKDB_DEBUG_NO_SAFETY D_ASSERT(StorageTypeCompatible(physical_type)); #else if (!StorageTypeCompatible(physical_type)) { throw InternalException("Expected unified vector format of type %s, but found type %s", GetTypeId(), physical_type); } #endif } template static inline const T *GetDataUnsafe(const UnifiedVectorFormat &format) { return reinterpret_cast(format.data); } template static inline const T *GetData(const UnifiedVectorFormat &format) { return format.GetData(); } template inline const T *GetData() const { VerifyVectorType(); return GetDataUnsafe(*this); } template static inline T *GetDataNoConst(UnifiedVectorFormat &format) { format.VerifyVectorType(); return reinterpret_cast(format.data); } }; struct RecursiveUnifiedVectorFormat { UnifiedVectorFormat unified; vector children; LogicalType logical_type; }; struct UnifiedVariantVector { //! The 'keys' list (dictionary) DUCKDB_API static const UnifiedVectorFormat &GetKeys(const RecursiveUnifiedVectorFormat &vec); //! The 'keys' list entry DUCKDB_API static const UnifiedVectorFormat &GetKeysEntry(const RecursiveUnifiedVectorFormat &vec); //! The 'children' list DUCKDB_API static const UnifiedVectorFormat &GetChildren(const RecursiveUnifiedVectorFormat &vec); //! The 'keys_index' inside the 'children' list DUCKDB_API static const UnifiedVectorFormat &GetChildrenKeysIndex(const RecursiveUnifiedVectorFormat &vec); //! The 'values_index' inside the 'children' list DUCKDB_API static const UnifiedVectorFormat &GetChildrenValuesIndex(const RecursiveUnifiedVectorFormat &vec); //! The 'values' list DUCKDB_API static const UnifiedVectorFormat &GetValues(const RecursiveUnifiedVectorFormat &vec); //! The 'type_id' inside the 'values' list DUCKDB_API static const UnifiedVectorFormat &GetValuesTypeId(const RecursiveUnifiedVectorFormat &vec); //! The 'byte_offset' inside the 'values' list DUCKDB_API static const UnifiedVectorFormat &GetValuesByteOffset(const RecursiveUnifiedVectorFormat &vec); //! The binary blob 'data' encoding the Variant for the row DUCKDB_API static const UnifiedVectorFormat &GetData(const RecursiveUnifiedVectorFormat &vec); }; //! This is a helper data structure. It contains all fields necessary to resize a vector. struct ResizeInfo { ResizeInfo(Vector &vec, data_ptr_t data, optional_ptr buffer, const idx_t multiplier) : vec(vec), data(data), buffer(buffer), multiplier(multiplier) { } Vector &vec; data_ptr_t data; optional_ptr buffer; idx_t multiplier; }; struct ConsecutiveChildListInfo { ConsecutiveChildListInfo() : is_constant(true), needs_slicing(false), child_list_info(list_entry_t(0, 0)) { } bool is_constant; bool needs_slicing; list_entry_t child_list_info; }; //! Vector of values of a specified PhysicalType. class Vector { friend struct ConstantVector; friend struct DictionaryVector; friend struct FlatVector; friend struct ListVector; friend struct StringVector; friend struct FSSTVector; friend struct StructVector; friend struct UnionVector; friend struct SequenceVector; friend struct ArrayVector; friend class DataChunk; friend class VectorCacheBuffer; public: //! Create a vector that references the other vector DUCKDB_API Vector(Vector &other); //! Create a vector that slices another vector DUCKDB_API explicit Vector(const Vector &other, const SelectionVector &sel, idx_t count); //! Create a vector that slices another vector between a pair of offsets DUCKDB_API explicit Vector(const Vector &other, idx_t offset, idx_t end); //! Create a vector of size one holding the passed on value DUCKDB_API explicit Vector(const Value &value); //! Create a vector of size tuple_count (non-standard) DUCKDB_API explicit Vector(LogicalType type, idx_t capacity = STANDARD_VECTOR_SIZE); //! Create an empty standard vector with a type, equivalent to calling Vector(type, true, false) DUCKDB_API explicit Vector(const VectorCache &cache); //! Create a non-owning vector that references the specified data DUCKDB_API Vector(LogicalType type, data_ptr_t dataptr); //! Create an owning vector that holds at most STANDARD_VECTOR_SIZE entries. /*! Create a new vector If create_data is true, the vector will be an owning empty vector. If initialize_to_zero is true, the allocated data will be zero-initialized. */ DUCKDB_API Vector(LogicalType type, bool create_data, bool initialize_to_zero, idx_t capacity = STANDARD_VECTOR_SIZE); // implicit copying of Vectors is not allowed Vector(const Vector &) = delete; // but moving of vectors is allowed DUCKDB_API Vector(Vector &&other) noexcept; public: //! Create a vector that references the specified value. DUCKDB_API void Reference(const Value &value); //! Causes this vector to reference the data held by the other vector. //! The type of the "other" vector should match the type of this vector DUCKDB_API void Reference(const Vector &other); //! Reinterpret the data of the other vector as the type of this vector //! Note that this takes the data of the other vector as-is and places it in this vector //! Without changing the type of this vector DUCKDB_API void Reinterpret(const Vector &other); //! Causes this vector to reference the data held by the other vector, changes the type if required. DUCKDB_API void ReferenceAndSetType(const Vector &other); //! Resets a vector from a vector cache. //! This turns the vector back into an empty FlatVector with STANDARD_VECTOR_SIZE entries. //! The VectorCache is used so this can be done without requiring any allocations. DUCKDB_API void ResetFromCache(const VectorCache &cache); //! Creates a reference to a slice of the other vector DUCKDB_API void Slice(const Vector &other, idx_t offset, idx_t end); //! Creates a reference to a slice of the other vector DUCKDB_API void Slice(const Vector &other, const SelectionVector &sel, idx_t count); //! Turns the vector into a dictionary vector with the specified dictionary DUCKDB_API void Slice(const SelectionVector &sel, idx_t count); //! Slice the vector, keeping the result around in a cache or potentially using the cache instead of slicing DUCKDB_API void Slice(const SelectionVector &sel, idx_t count, SelCache &cache); //! Turn this vector into a dictionary vector DUCKDB_API void Dictionary(idx_t dictionary_size, const SelectionVector &sel, idx_t count); //! Creates a reference to a dictionary of the other vector DUCKDB_API void Dictionary(Vector &dict, idx_t dictionary_size, const SelectionVector &sel, idx_t count); //! Creates the data of this vector with the specified type. Any data that //! is currently in the vector is destroyed. DUCKDB_API void Initialize(bool initialize_to_zero = false, idx_t capacity = STANDARD_VECTOR_SIZE); //! Converts this Vector to a printable string representation DUCKDB_API string ToString(idx_t count) const; DUCKDB_API void Print(idx_t count) const; DUCKDB_API string ToString() const; DUCKDB_API void Print() const; //! Flatten the vector, removing any compression and turning it into a FLAT_VECTOR DUCKDB_API void Flatten(idx_t count); DUCKDB_API void Flatten(const SelectionVector &sel, idx_t count); //! Creates a UnifiedVectorFormat of a vector //! The UnifiedVectorFormat allows efficient reading of vectors regardless of their vector type //! It contains (1) a data pointer, (2) a validity mask, and (3) a selection vector //! Access to the individual vector elements can be performed through data_pointer[sel_idx[i]]/validity[sel_idx[i]] //! The most common vector types (flat, constant & dictionary) can be converted to the canonical format "for free" //! ToUnifiedFormat was originally called Orrify, as a tribute to Orri Erling who came up with it DUCKDB_API void ToUnifiedFormat(idx_t count, UnifiedVectorFormat &data); //! Recursively calls UnifiedVectorFormat on a vector and its child vectors (for nested types) static void RecursiveToUnifiedFormat(Vector &input, idx_t count, RecursiveUnifiedVectorFormat &data); //! Turn the vector into a sequence vector DUCKDB_API void Sequence(int64_t start, int64_t increment, idx_t count); //! Verify that the Vector is in a consistent, not corrupt state. DEBUG //! FUNCTION ONLY! DUCKDB_API void Verify(idx_t count); //! Asserts that the CheckMapValidity returns MapInvalidReason::VALID DUCKDB_API static void VerifyMap(Vector &map, const SelectionVector &sel, idx_t count); DUCKDB_API static void VerifyUnion(Vector &map, const SelectionVector &sel, idx_t count); DUCKDB_API static void VerifyVariant(Vector &map, const SelectionVector &sel, idx_t count); DUCKDB_API static void Verify(Vector &vector, const SelectionVector &sel, idx_t count); DUCKDB_API void UTFVerify(idx_t count); DUCKDB_API void UTFVerify(const SelectionVector &sel, idx_t count); //! Returns the [index] element of the Vector as a Value. DUCKDB_API Value GetValue(idx_t index) const; //! Sets the [index] element of the Vector to the specified Value. DUCKDB_API void SetValue(idx_t index, const Value &val); inline void SetAuxiliary(buffer_ptr new_buffer) { auxiliary = std::move(new_buffer); }; inline void CopyBuffer(Vector &other) { buffer = other.buffer; data = other.data; } //! Resizes the vector. DUCKDB_API void Resize(idx_t cur_size, idx_t new_size); //! Returns a vector of ResizeInfo containing each (nested) vector to resize. DUCKDB_API void FindResizeInfos(vector &resize_infos, const idx_t multiplier); DUCKDB_API void Serialize(Serializer &serializer, idx_t count, bool compressed_serialization = true); DUCKDB_API void Deserialize(Deserializer &deserializer, idx_t count); idx_t GetAllocationSize(idx_t cardinality) const; // Getters inline VectorType GetVectorType() const { return vector_type; } inline const LogicalType &GetType() const { return type; } inline data_ptr_t GetData() const { return data; } inline buffer_ptr GetAuxiliary() { return auxiliary; } inline buffer_ptr GetBuffer() { return buffer; } // Setters DUCKDB_API void SetVectorType(VectorType vector_type); // Transform vector to an equivalent dictionary vector static void DebugTransformToDictionary(Vector &vector, idx_t count); // Transform vector to an equivalent nested vector static void DebugShuffleNestedVector(Vector &vector, idx_t count); private: //! Returns the [index] element of the Vector as a Value. static Value GetValue(const Vector &v, idx_t index); //! Returns the [index] element of the Vector as a Value. static Value GetValueInternal(const Vector &v, idx_t index); protected: //! The vector type specifies how the data of the vector is physically stored (i.e. if it is a single repeated //! constant, if it is compressed) VectorType vector_type; //! The type of the elements stored in the vector (e.g. integer, float) LogicalType type; //! A pointer to the data. data_ptr_t data; //! The validity mask of the vector ValidityMask validity; //! The main buffer holding the data of the vector buffer_ptr buffer; //! The buffer holding auxiliary data of the vector //! e.g. a string vector uses this to store strings buffer_ptr auxiliary; //! The buffer holding precomputed hashes of the data in the vector //! used for caching hashes of string dictionaries buffer_ptr cached_hashes; }; //! The DictionaryBuffer holds a selection vector class VectorChildBuffer : public VectorBuffer { public: explicit VectorChildBuffer(Vector vector) : VectorBuffer(VectorBufferType::VECTOR_CHILD_BUFFER), data(std::move(vector)) { } public: Vector data; }; struct ConstantVector { template static void VerifyVectorType(const Vector &vector) { #ifdef DUCKDB_DEBUG_NO_SAFETY D_ASSERT(StorageTypeCompatible(vector.GetType().InternalType())); #else if (!StorageTypeCompatible(vector.GetType().InternalType())) { throw InternalException("Expected vector of type %s, but found vector of type %s", GetTypeId(), vector.GetType().InternalType()); } #endif } static inline const_data_ptr_t GetData(const Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR || vector.GetVectorType() == VectorType::FLAT_VECTOR); return vector.data; } static inline data_ptr_t GetData(Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR || vector.GetVectorType() == VectorType::FLAT_VECTOR); return vector.data; } template static inline const T *GetDataUnsafe(const Vector &vector) { return reinterpret_cast(GetData(vector)); } template static inline T *GetDataUnsafe(Vector &vector) { return reinterpret_cast(GetData(vector)); } template static inline const T *GetData(const Vector &vector) { VerifyVectorType(vector); return GetDataUnsafe(vector); } template static inline T *GetData(Vector &vector) { VerifyVectorType(vector); return GetDataUnsafe(vector); } static inline bool IsNull(const Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR); return !vector.validity.RowIsValid(0); } DUCKDB_API static void SetNull(Vector &vector, bool is_null); static inline ValidityMask &Validity(Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR); return vector.validity; } DUCKDB_API static const SelectionVector *ZeroSelectionVector(idx_t count, SelectionVector &owned_sel); DUCKDB_API static const SelectionVector *ZeroSelectionVector(); //! Turns "vector" into a constant vector by referencing a value within the source vector DUCKDB_API static void Reference(Vector &vector, Vector &source, idx_t position, idx_t count); static const sel_t ZERO_VECTOR[STANDARD_VECTOR_SIZE]; }; struct DictionaryVector { static void VerifyDictionary(const Vector &vector) { #ifdef DUCKDB_DEBUG_NO_SAFETY D_ASSERT(vector.GetVectorType() == VectorType::DICTIONARY_VECTOR); #else if (vector.GetVectorType() != VectorType::DICTIONARY_VECTOR) { throw InternalException( "Operation requires a dictionary vector but a non-dictionary vector was encountered"); } #endif } static inline const SelectionVector &SelVector(const Vector &vector) { VerifyDictionary(vector); return vector.buffer->Cast().GetSelVector(); } static inline SelectionVector &SelVector(Vector &vector) { VerifyDictionary(vector); return vector.buffer->Cast().GetSelVector(); } static inline const Vector &Child(const Vector &vector) { VerifyDictionary(vector); return vector.auxiliary->Cast().data; } static inline Vector &Child(Vector &vector) { VerifyDictionary(vector); return vector.auxiliary->Cast().data; } static inline optional_idx DictionarySize(const Vector &vector) { VerifyDictionary(vector); return vector.buffer->Cast().GetDictionarySize(); } static inline const string &DictionaryId(const Vector &vector) { VerifyDictionary(vector); return vector.buffer->Cast().GetDictionaryId(); } static inline void SetDictionaryId(Vector &vector, string new_id) { VerifyDictionary(vector); vector.buffer->Cast().SetDictionaryId(std::move(new_id)); } static inline bool CanCacheHashes(const LogicalType &type) { return type.InternalType() == PhysicalType::VARCHAR; } static inline bool CanCacheHashes(const Vector &vector) { return DictionarySize(vector).IsValid() && CanCacheHashes(vector.GetType()); } static const Vector &GetCachedHashes(Vector &input); }; struct FlatVector { static void VerifyFlatVector(const Vector &vector) { #ifdef DUCKDB_DEBUG_NO_SAFETY D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); #else if (vector.GetVectorType() != VectorType::FLAT_VECTOR) { throw InternalException("Operation requires a flat vector but a non-flat vector was encountered"); } #endif } static inline data_ptr_t GetData(Vector &vector) { return ConstantVector::GetData(vector); } template static inline const T *GetData(const Vector &vector) { return ConstantVector::GetData(vector); } template static inline T *GetData(Vector &vector) { return ConstantVector::GetData(vector); } template static inline const T *GetDataUnsafe(const Vector &vector) { return ConstantVector::GetDataUnsafe(vector); } template static inline T *GetDataUnsafe(Vector &vector) { return ConstantVector::GetDataUnsafe(vector); } static inline void SetData(Vector &vector, data_ptr_t data) { D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); vector.data = data; } template static inline T GetValue(Vector &vector, idx_t idx) { D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); return FlatVector::GetData(vector)[idx]; } static inline const ValidityMask &Validity(const Vector &vector) { VerifyFlatVector(vector); return vector.validity; } static inline ValidityMask &Validity(Vector &vector) { VerifyFlatVector(vector); return vector.validity; } static inline void SetValidity(Vector &vector, const ValidityMask &new_validity) { VerifyFlatVector(vector); vector.validity.Initialize(new_validity); } DUCKDB_API static void SetNull(Vector &vector, idx_t idx, bool is_null); static inline bool IsNull(const Vector &vector, idx_t idx) { D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); return !vector.validity.RowIsValid(idx); } DUCKDB_API static const SelectionVector *IncrementalSelectionVector(); }; struct ListVector { static inline list_entry_t *GetData(Vector &v) { if (v.GetVectorType() == VectorType::DICTIONARY_VECTOR) { auto &child = DictionaryVector::Child(v); return GetData(child); } return FlatVector::GetData(v); } //! Gets a reference to the underlying child-vector of a list DUCKDB_API static const Vector &GetEntry(const Vector &vector); //! Gets a reference to the underlying child-vector of a list DUCKDB_API static Vector &GetEntry(Vector &vector); //! Gets the total size of the underlying child-vector of a list DUCKDB_API static idx_t GetListSize(const Vector &vector); //! Sets the total size of the underlying child-vector of a list DUCKDB_API static void SetListSize(Vector &vec, idx_t size); //! Gets the total capacity of the underlying child-vector of a list DUCKDB_API static idx_t GetListCapacity(const Vector &vector); //! Sets the total capacity of the underlying child-vector of a list DUCKDB_API static void Reserve(Vector &vec, idx_t required_capacity); DUCKDB_API static void Append(Vector &target, const Vector &source, idx_t source_size, idx_t source_offset = 0); DUCKDB_API static void Append(Vector &target, const Vector &source, const SelectionVector &sel, idx_t source_size, idx_t source_offset = 0); DUCKDB_API static void PushBack(Vector &target, const Value &insert); //! Returns the child_vector of list starting at offset until offset + count, and its length DUCKDB_API static idx_t GetConsecutiveChildList(Vector &list, Vector &result, idx_t offset, idx_t count); //! Returns information to only copy a section of a list child vector DUCKDB_API static ConsecutiveChildListInfo GetConsecutiveChildListInfo(Vector &list, idx_t offset, idx_t count); //! Slice and flatten a child vector to only contain a consecutive subsection of the child entries DUCKDB_API static void GetConsecutiveChildSelVector(Vector &list, SelectionVector &sel, idx_t offset, idx_t count); //! Share the entry of the other list vector DUCKDB_API static void ReferenceEntry(Vector &vector, Vector &other); private: template static T &GetEntryInternal(T &vector); }; struct StringVector { //! Add a string to the string heap of the vector (auxiliary data) DUCKDB_API static string_t AddString(Vector &vector, const char *data, idx_t len); //! Add a string or a blob to the string heap of the vector (auxiliary data) //! This function is the same as ::AddString, except the added data does not need to be valid UTF8 DUCKDB_API static string_t AddStringOrBlob(Vector &vector, const char *data, idx_t len); //! Add a string to the string heap of the vector (auxiliary data) DUCKDB_API static string_t AddString(Vector &vector, const char *data); //! Add a string to the string heap of the vector (auxiliary data) DUCKDB_API static string_t AddString(Vector &vector, string_t data); //! Add a string to the string heap of the vector (auxiliary data) DUCKDB_API static string_t AddString(Vector &vector, const string &data); //! Add a string or a blob to the string heap of the vector (auxiliary data) //! This function is the same as ::AddString, except the added data does not need to be valid UTF8 DUCKDB_API static string_t AddStringOrBlob(Vector &vector, string_t data); //! Allocates an empty string of the specified size, and returns a writable pointer that can be used to store the //! result of an operation DUCKDB_API static string_t EmptyString(Vector &vector, idx_t len); //! Returns a reference to the underlying VectorStringBuffer - throws an error if vector is not of type VARCHAR DUCKDB_API static VectorStringBuffer &GetStringBuffer(Vector &vector); //! Adds a reference to a handle that stores strings of this vector DUCKDB_API static void AddHandle(Vector &vector, BufferHandle handle); //! Adds a reference to an unspecified vector buffer that stores strings of this vector DUCKDB_API static void AddBuffer(Vector &vector, buffer_ptr buffer); //! Add a reference from this vector to the string heap of the provided vector DUCKDB_API static void AddHeapReference(Vector &vector, Vector &other); }; struct FSSTVector { static inline const ValidityMask &Validity(const Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::FSST_VECTOR); return vector.validity; } static inline ValidityMask &Validity(Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::FSST_VECTOR); return vector.validity; } static inline void SetValidity(Vector &vector, ValidityMask &new_validity) { D_ASSERT(vector.GetVectorType() == VectorType::FSST_VECTOR); vector.validity.Initialize(new_validity); } static inline const_data_ptr_t GetCompressedData(const Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::FSST_VECTOR); return vector.data; } static inline data_ptr_t GetCompressedData(Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::FSST_VECTOR); return vector.data; } template static inline const T *GetCompressedData(const Vector &vector) { return (const T *)FSSTVector::GetCompressedData(vector); } template static inline T *GetCompressedData(Vector &vector) { return (T *)FSSTVector::GetCompressedData(vector); } //! Decompresses an FSST_VECTOR into a FLAT_VECTOR. Note: validity is not copied. static void DecompressVector(const Vector &src, Vector &dst, idx_t src_offset, idx_t dst_offset, idx_t copy_count, const SelectionVector *sel); DUCKDB_API static string_t AddCompressedString(Vector &vector, string_t data); DUCKDB_API static string_t AddCompressedString(Vector &vector, const char *data, idx_t len); DUCKDB_API static void RegisterDecoder(Vector &vector, buffer_ptr &duckdb_fsst_decoder, const idx_t string_block_limit); DUCKDB_API static void *GetDecoder(const Vector &vector); DUCKDB_API static vector &GetDecompressBuffer(const Vector &vector); //! Setting the string count is required to be able to correctly flatten the vector DUCKDB_API static void SetCount(Vector &vector, idx_t count); DUCKDB_API static idx_t GetCount(Vector &vector); }; enum class MapInvalidReason : uint8_t { VALID, NULL_KEY, DUPLICATE_KEY, NOT_ALIGNED, INVALID_PARAMS }; struct MapVector { DUCKDB_API static const Vector &GetKeys(const Vector &vector); DUCKDB_API static const Vector &GetValues(const Vector &vector); DUCKDB_API static Vector &GetKeys(Vector &vector); DUCKDB_API static Vector &GetValues(Vector &vector); DUCKDB_API static MapInvalidReason CheckMapValidity(Vector &map, idx_t count, const SelectionVector &sel = *FlatVector::IncrementalSelectionVector()); DUCKDB_API static void EvalMapInvalidReason(MapInvalidReason reason); DUCKDB_API static void MapConversionVerify(Vector &vector, idx_t count); }; struct StructVector { DUCKDB_API static const vector> &GetEntries(const Vector &vector); DUCKDB_API static vector> &GetEntries(Vector &vector); }; struct ArrayVector { //! Gets a reference to the underlying child-vector of an array DUCKDB_API static const Vector &GetEntry(const Vector &vector); //! Gets a reference to the underlying child-vector of an array DUCKDB_API static Vector &GetEntry(Vector &vector); //! Gets the total size of the underlying child-vector of an array DUCKDB_API static idx_t GetTotalSize(const Vector &vector); private: template static T &GetEntryInternal(T &vector); }; struct VariantVector { //! Gets a reference to the 'keys' list (dictionary) of a Variant DUCKDB_API static Vector &GetKeys(Vector &vec); DUCKDB_API static Vector &GetKeys(const Vector &vec); //! Gets a reference to the 'children' list of a Variant DUCKDB_API static Vector &GetChildren(Vector &vec); DUCKDB_API static Vector &GetChildren(const Vector &vec); //! Gets a reference to the 'keys_index' inside the 'children' list of a Variant DUCKDB_API static Vector &GetChildrenKeysIndex(Vector &vec); DUCKDB_API static Vector &GetChildrenKeysIndex(const Vector &vec); //! Gets a reference to the 'values_index' inside the 'children' list of a Variant DUCKDB_API static Vector &GetChildrenValuesIndex(Vector &vec); DUCKDB_API static Vector &GetChildrenValuesIndex(const Vector &vec); //! Gets a reference to the 'values' list of a Variant DUCKDB_API static Vector &GetValues(Vector &vec); DUCKDB_API static Vector &GetValues(const Vector &vec); //! Gets a reference to the 'type_id' inside the 'values' list of a Variant DUCKDB_API static Vector &GetValuesTypeId(Vector &vec); DUCKDB_API static Vector &GetValuesTypeId(const Vector &vec); //! Gets a reference to the 'byte_offset' inside the 'values' list of a Variant DUCKDB_API static Vector &GetValuesByteOffset(Vector &vec); DUCKDB_API static Vector &GetValuesByteOffset(const Vector &vec); //! Gets a reference to the binary blob 'value', which encodes the data of the row DUCKDB_API static Vector &GetData(Vector &vec); DUCKDB_API static Vector &GetData(const Vector &vec); }; enum class UnionInvalidReason : uint8_t { VALID, TAG_OUT_OF_RANGE, NO_MEMBERS, VALIDITY_OVERLAP, TAG_MISMATCH, NULL_TAG }; struct UnionVector { // Unions are stored as structs, but the first child is always the "tag" // vector, specifying the currently selected member for that row. // The remaining children are the members of the union. // INVARIANTS: // 1. Only one member vector (the one "selected" by the tag) can be // non-NULL in each row. // // 2. The validity of the tag vector always matches the validity of the // union vector itself. // // 3. A valid union cannot have a NULL tag, but the selected member can // be NULL. therefore, there is a difference between a union that "is" // NULL and a union that "holds" a NULL. The latter still has a valid // tag. // // 4. For each tag in the tag vector, 0 <= tag < |members| //! Get the tag vector of a union vector DUCKDB_API static const Vector &GetTags(const Vector &v); DUCKDB_API static Vector &GetTags(Vector &v); //! Try to get the tag at the specific flat index of the union vector. Returns false if the tag is NULL. //! This will handle and map the index properly for constant and dictionary vectors internally. DUCKDB_API static bool TryGetTag(const Vector &vector, idx_t index, union_tag_t &tag); //! Get the member vector of a union vector by index DUCKDB_API static const Vector &GetMember(const Vector &vector, idx_t member_index); DUCKDB_API static Vector &GetMember(Vector &vector, idx_t member_index); //! Set every entry in the UnionVector to a specific member. //! This is useful to set the entire vector to a single member, e.g. when "creating" //! a union to return in a function, when you only have one alternative to return. //! if 'keep_tags_for_null' is false, the tags will be set to NULL where the member is NULL. //! (the validity of the tag vector will match the selected member vector) //! otherwise, they are all set to the 'tag'. //! This will also handle invalidation of the non-selected members DUCKDB_API static void SetToMember(Vector &vector, union_tag_t tag, Vector &member_vector, idx_t count, bool keep_tags_for_null); DUCKDB_API static UnionInvalidReason CheckUnionValidity(Vector &vector, idx_t count, const SelectionVector &sel = *FlatVector::IncrementalSelectionVector()); }; struct SequenceVector { static void GetSequence(const Vector &vector, int64_t &start, int64_t &increment, int64_t &sequence_count) { D_ASSERT(vector.GetVectorType() == VectorType::SEQUENCE_VECTOR); auto data = reinterpret_cast(vector.buffer->GetData()); start = data[0]; increment = data[1]; sequence_count = data[2]; } static void GetSequence(const Vector &vector, int64_t &start, int64_t &increment) { int64_t sequence_count; GetSequence(vector, start, increment, sequence_count); } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/vector_operations/vector_operations.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/data_chunk.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/arrow/arrow_wrapper.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/arrow/arrow.hpp // // //===----------------------------------------------------------------------===// #ifndef ARROW_FLAG_DICTIONARY_ORDERED #include #ifdef __cplusplus extern "C" { #endif #ifndef ARROW_C_DATA_INTERFACE #define ARROW_C_DATA_INTERFACE #define ARROW_FLAG_DICTIONARY_ORDERED 1 #define ARROW_FLAG_NULLABLE 2 #define ARROW_FLAG_MAP_KEYS_SORTED 4 struct ArrowSchema { //! Array type description const char *format; const char *name; const char *metadata; int64_t flags; int64_t n_children; struct ArrowSchema **children; struct ArrowSchema *dictionary; //! Release callback void (*release)(struct ArrowSchema *); //! Opaque producer-specific data void *private_data; //! Initialize all fields void Init() { flags = 0; n_children = 0; release = nullptr; private_data = nullptr; } }; struct ArrowArray { //! Array data description int64_t length; int64_t null_count; int64_t offset; int64_t n_buffers; int64_t n_children; const void **buffers; struct ArrowArray **children; struct ArrowArray *dictionary; //! Release callback void (*release)(struct ArrowArray *); //! Opaque producer-specific data void *private_data; //! Initialize all fields void Init() { length = 0; null_count = 0; offset = 0; n_buffers = 0; n_children = 0; release = nullptr; private_data = nullptr; } }; #endif #ifndef ARROW_C_STREAM_INTERFACE #define ARROW_C_STREAM_INTERFACE // EXPERIMENTAL struct ArrowArrayStream { // Callback to get the stream type // (will be the same for all arrays in the stream). // Return value: 0 if successful, an `errno`-compatible error code otherwise. int (*get_schema)(struct ArrowArrayStream *, struct ArrowSchema *out); // Callback to get the next array // (if no error and the array is released, the stream has ended) // Return value: 0 if successful, an `errno`-compatible error code otherwise. int (*get_next)(struct ArrowArrayStream *, struct ArrowArray *out); // Callback to get optional detailed error information. // This must only be called if the last stream operation failed // with a non-0 return code. The returned pointer is only valid until // the next operation on this stream (including release). // If unavailable, NULL is returned. const char *(*get_last_error)(struct ArrowArrayStream *); // Release callback: release the stream's own resources. // Note that arrays returned by `get_next` must be individually released. void (*release)(struct ArrowArrayStream *); // Opaque producer-specific data void *private_data; }; #endif #ifdef __cplusplus } #endif #endif //! Here we have the internal duckdb classes that interact with Arrow's Internal Header (i.e., duckdb/commons/arrow.hpp) namespace duckdb { class ArrowSchemaWrapper { public: ArrowSchema arrow_schema; ArrowSchemaWrapper() { arrow_schema.release = nullptr; } ~ArrowSchemaWrapper(); }; class ArrowArrayWrapper { public: ArrowArray arrow_array; ArrowArrayWrapper() { arrow_array.length = 0; arrow_array.release = nullptr; } ArrowArrayWrapper(ArrowArrayWrapper &&other) noexcept : arrow_array(other.arrow_array) { other.arrow_array.release = nullptr; } ArrowArrayWrapper &operator=(ArrowArrayWrapper &&other) noexcept { if (this != &other) { if (arrow_array.release) { arrow_array.release(&arrow_array); } arrow_array = other.arrow_array; other.arrow_array.release = nullptr; } return *this; } ~ArrowArrayWrapper(); }; class ArrowArrayStreamWrapper { public: ArrowArrayStream arrow_array_stream; int64_t number_of_rows; public: void GetSchema(ArrowSchemaWrapper &schema); virtual shared_ptr GetNextChunk(); const char *GetError(); virtual ~ArrowArrayStreamWrapper(); ArrowArrayStreamWrapper() { arrow_array_stream.release = nullptr; } }; } // namespace duckdb namespace duckdb { class Allocator; class ClientContext; class ExecutionContext; class VectorCache; class Serializer; class Deserializer; //! A Data Chunk represents a set of vectors. /*! The data chunk class is the intermediate representation used by the execution engine of DuckDB. It effectively represents a subset of a relation. It holds a set of vectors that all have the same length. DataChunk is initialized using the DataChunk::Initialize function by providing it with a vector of TypeIds for the Vector members. By default, this function will also allocate a chunk of memory in the DataChunk for the vectors and all the vectors will be referencing vectors to the data owned by the chunk. The reason for this behavior is that the underlying vectors can become referencing vectors to other chunks as well (i.e. in the case an operator does not alter the data, such as a Filter operator which only adds a selection vector). In addition to holding the data of the vectors, the DataChunk also owns the selection vector that underlying vectors can point to. */ class DataChunk { public: //! Creates an empty DataChunk DUCKDB_API DataChunk(); DUCKDB_API ~DataChunk(); //! The vectors owned by the DataChunk. vector data; public: inline idx_t size() const { // NOLINT return count; } inline idx_t ColumnCount() const { return data.size(); } inline void SetCardinality(idx_t count_p) { D_ASSERT(count_p <= capacity); this->count = count_p; } inline void SetCardinality(const DataChunk &other) { SetCardinality(other.size()); } inline idx_t GetCapacity() const { return capacity; } inline void SetCapacity(idx_t capacity_p) { this->capacity = capacity_p; } inline void SetCapacity(const DataChunk &other) { SetCapacity(other.capacity); } DUCKDB_API Value GetValue(idx_t col_idx, idx_t index) const; DUCKDB_API void SetValue(idx_t col_idx, idx_t index, const Value &val); idx_t GetAllocationSize() const; //! Returns true if all vectors in the DataChunk are constant DUCKDB_API bool AllConstant() const; //! Set the DataChunk to reference another data chunk DUCKDB_API void Reference(DataChunk &chunk); //! Set the DataChunk to own the data of data chunk, destroying the other chunk in the process DUCKDB_API void Move(DataChunk &chunk); //! Initializes a DataChunk with the given types and without any vector data allocation. DUCKDB_API void InitializeEmpty(const vector &types); //! Initializes a DataChunk with the given types. Then, if the corresponding boolean in the initialize-vector is //! true, it initializes the vector for that data type. DUCKDB_API void Initialize(ClientContext &context, const vector &types, idx_t capacity = STANDARD_VECTOR_SIZE); DUCKDB_API void Initialize(Allocator &allocator, const vector &types, idx_t capacity = STANDARD_VECTOR_SIZE); DUCKDB_API void Initialize(ClientContext &context, const vector &types, const vector &initialize, idx_t capacity = STANDARD_VECTOR_SIZE); DUCKDB_API void Initialize(Allocator &allocator, const vector &types, const vector &initialize, idx_t capacity = STANDARD_VECTOR_SIZE); //! Append the other DataChunk to this one. The column count and types of //! the two DataChunks have to match exactly. Throws an exception if there //! is not enough space in the chunk and resize is not allowed. DUCKDB_API void Append(const DataChunk &other, bool resize = false, SelectionVector *sel = nullptr, idx_t count = 0); //! Destroy all data and columns owned by this DataChunk DUCKDB_API void Destroy(); //! Copies the data from this chunk to another chunk. DUCKDB_API void Copy(DataChunk &other, idx_t offset = 0) const; DUCKDB_API void Copy(DataChunk &other, const SelectionVector &sel, const idx_t source_count, const idx_t offset = 0) const; //! Splits the DataChunk in two DUCKDB_API void Split(DataChunk &other, idx_t split_idx); //! Fuses a DataChunk onto the right of this one, and destroys the other. Inverse of Split. DUCKDB_API void Fuse(DataChunk &other); //! Makes this DataChunk reference the specified columns in the other DataChunk DUCKDB_API void ReferenceColumns(DataChunk &other, const vector &column_ids); //! Turn all the vectors from the chunk into flat vectors DUCKDB_API void Flatten(); // FIXME: this is DUCKDB_API, might need conversion back to regular unique ptr? DUCKDB_API unsafe_unique_array ToUnifiedFormat(); DUCKDB_API void Slice(const SelectionVector &sel_vector, idx_t count); //! Slice all Vectors from other.data[i] to data[i + 'col_offset'] //! Turning all Vectors into Dictionary Vectors, using 'sel' DUCKDB_API void Slice(const DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset = 0); //! Slice a DataChunk from "offset" to "offset + count" DUCKDB_API void Slice(idx_t offset, idx_t count); //! Resets the DataChunk to its state right after the DataChunk::Initialize //! function was called. This sets the count to 0, the capacity to initial_capacity and resets each member //! Vector to point back to the data owned by this DataChunk. DUCKDB_API void Reset(); DUCKDB_API void Serialize(Serializer &serializer, bool compressed_serialization = true) const; DUCKDB_API void Deserialize(Deserializer &source); //! Hashes the DataChunk to the target vector DUCKDB_API void Hash(Vector &result); //! Hashes specific vectors of the DataChunk to the target vector DUCKDB_API void Hash(vector &column_ids, Vector &result); //! Returns a list of types of the vectors of this data chunk DUCKDB_API vector GetTypes() const; //! Converts this DataChunk to a printable string representation DUCKDB_API string ToString() const; DUCKDB_API void Print() const; DataChunk(const DataChunk &) = delete; //! Verify that the DataChunk is in a consistent, not corrupt state. DEBUG //! FUNCTION ONLY! DUCKDB_API void Verify(); private: //! The amount of tuples stored in the data chunk idx_t count; //! The amount of tuples that can be stored in the data chunk idx_t capacity; //! The initial capacity of this chunk set during ::Initialize, used when resetting idx_t initial_capacity; //! Vector caches, used to store data when ::Initialize is called vector vector_caches; }; } // namespace duckdb #include namespace duckdb { class CastFunctionSet; struct GetCastFunctionInput; // VectorOperations contains a set of operations that operate on sets of // vectors. In general, the operators must all have the same type, otherwise an // exception is thrown. Note that the functions underneath use restrict // pointers, hence the data that the vectors point to (and hence the vector // themselves) should not be equal! For example, if you call the function Add(A, // B, A) then ASSERT_RESTRICT will be triggered. Instead call AddInPlace(A, B) // or Add(A, B, C) struct VectorOperations { //===--------------------------------------------------------------------===// // In-Place Operators //===--------------------------------------------------------------------===// //! left += delta static void AddInPlace(Vector &left, int64_t delta, idx_t count); //===--------------------------------------------------------------------===// // NULL Operators //===--------------------------------------------------------------------===// //! result = IS NOT NULL(input) static void IsNotNull(Vector &arg, Vector &result, idx_t count); //! result = IS NULL (input) static void IsNull(Vector &input, Vector &result, idx_t count); // Returns whether or not arg vector has a NULL value static bool HasNull(Vector &input, idx_t count); static bool HasNotNull(Vector &input, idx_t count); //! Count the number of not-NULL values. static idx_t CountNotNull(Vector &input, const idx_t count); //===--------------------------------------------------------------------===// // Boolean Operations //===--------------------------------------------------------------------===// // result = left && right static void And(Vector &left, Vector &right, Vector &result, idx_t count); // result = left || right static void Or(Vector &left, Vector &right, Vector &result, idx_t count); // result = NOT(left) static void Not(Vector &left, Vector &result, idx_t count); //===--------------------------------------------------------------------===// // Comparison Operations //===--------------------------------------------------------------------===// // result = left == right static void Equals(Vector &left, Vector &right, Vector &result, idx_t count); // result = left != right static void NotEquals(Vector &left, Vector &right, Vector &result, idx_t count); // result = left > right static void GreaterThan(Vector &left, Vector &right, Vector &result, idx_t count); // result = left >= right static void GreaterThanEquals(Vector &left, Vector &right, Vector &result, idx_t count); // result = left < right static void LessThan(Vector &left, Vector &right, Vector &result, idx_t count); // result = left <= right static void LessThanEquals(Vector &left, Vector &right, Vector &result, idx_t count); // result = A != B with nulls being equal static void DistinctFrom(Vector &left, Vector &right, Vector &result, idx_t count); // result := A == B with nulls being equal static void NotDistinctFrom(Vector &left, Vector &right, Vector &result, idx_t count); // result := A > B with nulls being maximal static void DistinctGreaterThan(Vector &left, Vector &right, Vector &result, idx_t count); // result := A >= B with nulls being maximal static void DistinctGreaterThanEquals(Vector &left, Vector &right, Vector &result, idx_t count); // result := A < B with nulls being maximal static void DistinctLessThan(Vector &left, Vector &right, Vector &result, idx_t count); // result := A <= B with nulls being maximal static void DistinctLessThanEquals(Vector &left, Vector &right, Vector &result, idx_t count); //===--------------------------------------------------------------------===// // Select Comparisons //===--------------------------------------------------------------------===// static idx_t Equals(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); static idx_t NotEquals(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); static idx_t GreaterThan(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); static idx_t GreaterThanEquals(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); static idx_t LessThan(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); static idx_t LessThanEquals(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); // true := A != B with nulls being equal static idx_t DistinctFrom(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel); // true := A == B with nulls being equal static idx_t NotDistinctFrom(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel); // true := A > B with nulls being maximal static idx_t DistinctGreaterThan(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); // true := A >= B with nulls being maximal static idx_t DistinctGreaterThanEquals(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); // true := A < B with nulls being maximal static idx_t DistinctLessThan(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); // true := A <= B with nulls being maximal static idx_t DistinctLessThanEquals(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); // true := A > B with nulls being minimal static idx_t DistinctGreaterThanNullsFirst(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); // true := A < B with nulls being minimal static idx_t DistinctLessThanNullsFirst(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); //===--------------------------------------------------------------------===// // Nested Comparisons //===--------------------------------------------------------------------===// // true := A != B with nulls being equal static idx_t NestedNotEquals(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); // true := A == B with nulls being equal static idx_t NestedEquals(Vector &left, Vector &right, optional_ptr sel, idx_t count, optional_ptr true_sel, optional_ptr false_sel, optional_ptr null_mask = nullptr); //===--------------------------------------------------------------------===// // Hash functions //===--------------------------------------------------------------------===// // hashes = HASH(input) static void Hash(Vector &input, Vector &hashes, idx_t count); static void Hash(Vector &input, Vector &hashes, const SelectionVector &rsel, idx_t count); // hashes ^= HASH(input) static void CombineHash(Vector &hashes, Vector &input, idx_t count); static void CombineHash(Vector &hashes, Vector &input, const SelectionVector &rsel, idx_t count); //===--------------------------------------------------------------------===// // Generate functions //===--------------------------------------------------------------------===// static void GenerateSequence(Vector &result, idx_t count, int64_t start = 0, int64_t increment = 1); static void GenerateSequence(Vector &result, idx_t count, const SelectionVector &sel, int64_t start = 0, int64_t increment = 1); //===--------------------------------------------------------------------===// // Helpers //===--------------------------------------------------------------------===// //! Cast the data from the source type to the target type. Any elements that could not be converted are turned into //! NULLs. If any elements cannot be converted, returns false and fills in the error_message. If no error message is //! provided, an exception is thrown instead. DUCKDB_API static bool TryCast(CastFunctionSet &set, GetCastFunctionInput &input, Vector &source, Vector &result, idx_t count, string *error_message, bool strict = false, const bool nullify_parent = false); DUCKDB_API static bool DefaultTryCast(Vector &source, Vector &result, idx_t count, string *error_message, bool strict = false); DUCKDB_API static bool TryCast(ClientContext &context, Vector &source, Vector &result, idx_t count, string *error_message, bool strict = false, const bool nullify_parent = false); //! Cast the data from the source type to the target type. Throws an exception if the cast fails. DUCKDB_API static void Cast(ClientContext &context, Vector &source, Vector &result, idx_t count, bool strict = false); DUCKDB_API static void DefaultCast(Vector &source, Vector &result, idx_t count, bool strict = false); // Copy the data of to the target vector static void Copy(const Vector &source, Vector &target, idx_t source_count, idx_t source_offset, idx_t target_offset); static void Copy(const Vector &source, Vector &target, const SelectionVector &sel, idx_t source_count, idx_t source_offset, idx_t target_offset); static void Copy(const Vector &source, Vector &target, const SelectionVector &sel, idx_t source_count, idx_t source_offset, idx_t target_offset, idx_t copy_count); // Copy the data of to the target location, setting null values to // NullValue. Used to store data without separate NULL mask. static void WriteToStorage(Vector &source, idx_t count, data_ptr_t target); // Reads the data of to the target vector, setting the nullmask // for any NullValue of source. Used to go back from storage to a proper vector static void ReadFromStorage(data_ptr_t source, idx_t count, Vector &result); }; } // namespace duckdb #include namespace duckdb { struct DefaultNullCheckOperator { template static inline bool Operation(LEFT_TYPE left, RIGHT_TYPE right) { return false; } }; struct BinaryStandardOperatorWrapper { template static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) { return OP::template Operation(left, right); } static bool AddsNulls() { return false; } }; struct BinarySingleArgumentOperatorWrapper { template static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) { return OP::template Operation(left, right); } static bool AddsNulls() { return false; } }; struct BinaryLambdaWrapper { template static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) { return fun(left, right); } static bool AddsNulls() { return false; } }; struct BinaryLambdaWrapperWithNulls { template static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) { return fun(left, right, mask, idx); } static bool AddsNulls() { return true; } }; struct BinaryExecutor { #ifndef DUCKDB_SMALLER_BINARY template static void ExecuteFlatLoop(const LEFT_TYPE *__restrict ldata, const RIGHT_TYPE *__restrict rdata, RESULT_TYPE *__restrict result_data, idx_t count, ValidityMask &mask, FUNC fun) { if (!LEFT_CONSTANT) { ASSERT_RESTRICT(ldata, ldata + count, result_data, result_data + count); } if (!RIGHT_CONSTANT) { ASSERT_RESTRICT(rdata, rdata + count, result_data, result_data + count); } if (!mask.AllValid()) { idx_t base_idx = 0; auto entry_count = ValidityMask::EntryCount(count); for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { auto validity_entry = mask.GetValidityEntry(entry_idx); idx_t next = MinValue(base_idx + ValidityMask::BITS_PER_VALUE, count); if (ValidityMask::AllValid(validity_entry)) { // all valid: perform operation for (; base_idx < next; base_idx++) { auto lentry = ldata[LEFT_CONSTANT ? 0 : base_idx]; auto rentry = rdata[RIGHT_CONSTANT ? 0 : base_idx]; result_data[base_idx] = OPWRAPPER::template Operation( fun, lentry, rentry, mask, base_idx); } } else if (ValidityMask::NoneValid(validity_entry)) { // nothing valid: skip all base_idx = next; continue; } else { // partially valid: need to check individual elements for validity idx_t start = base_idx; for (; base_idx < next; base_idx++) { if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) { auto lentry = ldata[LEFT_CONSTANT ? 0 : base_idx]; auto rentry = rdata[RIGHT_CONSTANT ? 0 : base_idx]; result_data[base_idx] = OPWRAPPER::template Operation( fun, lentry, rentry, mask, base_idx); } } } } } else { for (idx_t i = 0; i < count; i++) { auto lentry = ldata[LEFT_CONSTANT ? 0 : i]; auto rentry = rdata[RIGHT_CONSTANT ? 0 : i]; result_data[i] = OPWRAPPER::template Operation( fun, lentry, rentry, mask, i); } } } #endif template static void ExecuteConstant(Vector &left, Vector &right, Vector &result, FUNC fun) { result.SetVectorType(VectorType::CONSTANT_VECTOR); auto ldata = ConstantVector::GetData(left); auto rdata = ConstantVector::GetData(right); auto result_data = ConstantVector::GetData(result); if (ConstantVector::IsNull(left) || ConstantVector::IsNull(right)) { ConstantVector::SetNull(result, true); return; } *result_data = OPWRAPPER::template Operation( fun, *ldata, *rdata, ConstantVector::Validity(result), 0); } #ifndef DUCKDB_SMALLER_BINARY template static void ExecuteFlat(Vector &left, Vector &right, Vector &result, idx_t count, FUNC fun) { auto ldata = FlatVector::GetData(left); auto rdata = FlatVector::GetData(right); if ((LEFT_CONSTANT && ConstantVector::IsNull(left)) || (RIGHT_CONSTANT && ConstantVector::IsNull(right))) { // either left or right is constant NULL: result is constant NULL result.SetVectorType(VectorType::CONSTANT_VECTOR); ConstantVector::SetNull(result, true); return; } result.SetVectorType(VectorType::FLAT_VECTOR); auto result_data = FlatVector::GetData(result); auto &result_validity = FlatVector::Validity(result); if (LEFT_CONSTANT) { if (OPWRAPPER::AddsNulls()) { result_validity.Copy(FlatVector::Validity(right), count); } else { FlatVector::SetValidity(result, FlatVector::Validity(right)); } } else if (RIGHT_CONSTANT) { if (OPWRAPPER::AddsNulls()) { result_validity.Copy(FlatVector::Validity(left), count); } else { FlatVector::SetValidity(result, FlatVector::Validity(left)); } } else { if (OPWRAPPER::AddsNulls()) { result_validity.Copy(FlatVector::Validity(left), count); if (result_validity.AllValid()) { result_validity.Copy(FlatVector::Validity(right), count); } else { result_validity.Combine(FlatVector::Validity(right), count); } } else { FlatVector::SetValidity(result, FlatVector::Validity(left)); result_validity.Combine(FlatVector::Validity(right), count); } } ExecuteFlatLoop( ldata, rdata, result_data, count, result_validity, fun); } #endif template static void ExecuteGenericLoop(const LEFT_TYPE *__restrict ldata, const RIGHT_TYPE *__restrict rdata, RESULT_TYPE *__restrict result_data, const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, idx_t count, ValidityMask &lvalidity, ValidityMask &rvalidity, ValidityMask &result_validity, FUNC fun) { if (!lvalidity.AllValid() || !rvalidity.AllValid()) { for (idx_t i = 0; i < count; i++) { auto lindex = lsel->get_index(i); auto rindex = rsel->get_index(i); if (lvalidity.RowIsValid(lindex) && rvalidity.RowIsValid(rindex)) { auto lentry = ldata[lindex]; auto rentry = rdata[rindex]; result_data[i] = OPWRAPPER::template Operation( fun, lentry, rentry, result_validity, i); } else { result_validity.SetInvalid(i); } } } else { for (idx_t i = 0; i < count; i++) { auto lentry = ldata[lsel->get_index(i)]; auto rentry = rdata[rsel->get_index(i)]; result_data[i] = OPWRAPPER::template Operation( fun, lentry, rentry, result_validity, i); } } } template static void ExecuteGeneric(Vector &left, Vector &right, Vector &result, idx_t count, FUNC fun) { UnifiedVectorFormat ldata, rdata; left.ToUnifiedFormat(count, ldata); right.ToUnifiedFormat(count, rdata); result.SetVectorType(VectorType::FLAT_VECTOR); auto result_data = FlatVector::GetData(result); ExecuteGenericLoop( UnifiedVectorFormat::GetData(ldata), UnifiedVectorFormat::GetData(rdata), result_data, ldata.sel, rdata.sel, count, ldata.validity, rdata.validity, FlatVector::Validity(result), fun); } template static void ExecuteSwitch(Vector &left, Vector &right, Vector &result, idx_t count, FUNC fun) { auto left_vector_type = left.GetVectorType(); auto right_vector_type = right.GetVectorType(); if (left_vector_type == VectorType::CONSTANT_VECTOR && right_vector_type == VectorType::CONSTANT_VECTOR) { ExecuteConstant(left, right, result, fun); #ifndef DUCKDB_SMALLER_BINARY } else if (left_vector_type == VectorType::FLAT_VECTOR && right_vector_type == VectorType::CONSTANT_VECTOR) { ExecuteFlat(left, right, result, count, fun); } else if (left_vector_type == VectorType::CONSTANT_VECTOR && right_vector_type == VectorType::FLAT_VECTOR) { ExecuteFlat(left, right, result, count, fun); } else if (left_vector_type == VectorType::FLAT_VECTOR && right_vector_type == VectorType::FLAT_VECTOR) { ExecuteFlat(left, right, result, count, fun); #endif } else { ExecuteGeneric(left, right, result, count, fun); } } public: template > static void Execute(Vector &left, Vector &right, Vector &result, idx_t count, FUNC fun) { ExecuteSwitch(left, right, result, count, fun); } template static void Execute(Vector &left, Vector &right, Vector &result, idx_t count) { ExecuteSwitch(left, right, result, count, false); } template static void ExecuteStandard(Vector &left, Vector &right, Vector &result, idx_t count) { ExecuteSwitch(left, right, result, count, false); } template > static void ExecuteWithNulls(Vector &left, Vector &right, Vector &result, idx_t count, FUNC fun) { ExecuteSwitch(left, right, result, count, fun); } public: template static idx_t SelectConstant(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { auto ldata = ConstantVector::GetData(left); auto rdata = ConstantVector::GetData(right); // both sides are constant, return either 0 or the count // in this case we do not fill in the result selection vector at all if (ConstantVector::IsNull(left) || ConstantVector::IsNull(right) || !OP::Operation(*ldata, *rdata)) { if (false_sel) { for (idx_t i = 0; i < count; i++) { false_sel->set_index(i, sel->get_index(i)); } } return 0; } else { if (true_sel) { for (idx_t i = 0; i < count; i++) { true_sel->set_index(i, sel->get_index(i)); } } return count; } } #ifndef DUCKDB_SMALLER_BINARY template static inline idx_t SelectFlatLoop(const LEFT_TYPE *__restrict ldata, const RIGHT_TYPE *__restrict rdata, const SelectionVector *sel, idx_t count, ValidityMask &validity_mask, SelectionVector *true_sel, SelectionVector *false_sel) { idx_t true_count = 0, false_count = 0; idx_t base_idx = 0; auto entry_count = ValidityMask::EntryCount(count); for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { auto validity_entry = validity_mask.GetValidityEntry(entry_idx); idx_t next = MinValue(base_idx + ValidityMask::BITS_PER_VALUE, count); if (ValidityMask::AllValid(validity_entry)) { // all valid: perform operation for (; base_idx < next; base_idx++) { idx_t result_idx = sel->get_index(base_idx); idx_t lidx = LEFT_CONSTANT ? 0 : base_idx; idx_t ridx = RIGHT_CONSTANT ? 0 : base_idx; bool comparison_result = OP::Operation(ldata[lidx], rdata[ridx]); if (HAS_TRUE_SEL) { true_sel->set_index(true_count, result_idx); true_count += comparison_result; } if (HAS_FALSE_SEL) { false_sel->set_index(false_count, result_idx); false_count += !comparison_result; } } } else if (ValidityMask::NoneValid(validity_entry)) { // nothing valid: skip all if (HAS_FALSE_SEL) { for (; base_idx < next; base_idx++) { idx_t result_idx = sel->get_index(base_idx); false_sel->set_index(false_count, result_idx); false_count++; } } base_idx = next; continue; } else { // partially valid: need to check individual elements for validity idx_t start = base_idx; for (; base_idx < next; base_idx++) { idx_t result_idx = sel->get_index(base_idx); idx_t lidx = LEFT_CONSTANT ? 0 : base_idx; idx_t ridx = RIGHT_CONSTANT ? 0 : base_idx; bool comparison_result = ValidityMask::RowIsValid(validity_entry, base_idx - start) && OP::Operation(ldata[lidx], rdata[ridx]); if (HAS_TRUE_SEL) { true_sel->set_index(true_count, result_idx); true_count += comparison_result; } if (HAS_FALSE_SEL) { false_sel->set_index(false_count, result_idx); false_count += !comparison_result; } } } } if (HAS_TRUE_SEL) { return true_count; } else { return count - false_count; } } template static inline idx_t SelectFlatLoopSwitch(const LEFT_TYPE *__restrict ldata, const RIGHT_TYPE *__restrict rdata, const SelectionVector *sel, idx_t count, ValidityMask &mask, SelectionVector *true_sel, SelectionVector *false_sel) { if (true_sel && false_sel) { return SelectFlatLoop( ldata, rdata, sel, count, mask, true_sel, false_sel); } else if (true_sel) { return SelectFlatLoop( ldata, rdata, sel, count, mask, true_sel, false_sel); } else { D_ASSERT(false_sel); return SelectFlatLoop( ldata, rdata, sel, count, mask, true_sel, false_sel); } } template static idx_t SelectFlat(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { auto ldata = FlatVector::GetData(left); auto rdata = FlatVector::GetData(right); if (LEFT_CONSTANT && ConstantVector::IsNull(left)) { if (false_sel) { for (idx_t i = 0; i < count; i++) { false_sel->set_index(i, sel->get_index(i)); } } return 0; } if (RIGHT_CONSTANT && ConstantVector::IsNull(right)) { if (false_sel) { for (idx_t i = 0; i < count; i++) { false_sel->set_index(i, sel->get_index(i)); } } return 0; } if (LEFT_CONSTANT) { return SelectFlatLoopSwitch( ldata, rdata, sel, count, FlatVector::Validity(right), true_sel, false_sel); } else if (RIGHT_CONSTANT) { return SelectFlatLoopSwitch( ldata, rdata, sel, count, FlatVector::Validity(left), true_sel, false_sel); } else { ValidityMask combined_mask = FlatVector::Validity(left); combined_mask.Combine(FlatVector::Validity(right), count); return SelectFlatLoopSwitch( ldata, rdata, sel, count, combined_mask, true_sel, false_sel); } } #endif #ifndef DUCKDB_SMALLER_BINARY template #else template #endif static inline idx_t SelectGenericLoop(const LEFT_TYPE *__restrict ldata, const RIGHT_TYPE *__restrict rdata, const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lvalidity, ValidityMask &rvalidity, SelectionVector *true_sel, SelectionVector *false_sel) { idx_t true_count = 0, false_count = 0; #ifdef DUCKDB_SMALLER_BINARY const bool HAS_TRUE_SEL = true_sel; const bool HAS_FALSE_SEL = false_sel; const bool NO_NULL = false; #endif for (idx_t i = 0; i < count; i++) { auto result_idx = result_sel->get_index(i); auto lindex = lsel->get_index(i); auto rindex = rsel->get_index(i); if ((NO_NULL || (lvalidity.RowIsValid(lindex) && rvalidity.RowIsValid(rindex))) && OP::Operation(ldata[lindex], rdata[rindex])) { if (HAS_TRUE_SEL) { true_sel->set_index(true_count++, result_idx); } } else { if (HAS_FALSE_SEL) { false_sel->set_index(false_count++, result_idx); } } } if (HAS_TRUE_SEL) { return true_count; } else { return count - false_count; } } #ifndef DUCKDB_SMALLER_BINARY template static inline idx_t SelectGenericLoopSelSwitch(const LEFT_TYPE *__restrict ldata, const RIGHT_TYPE *__restrict rdata, const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lvalidity, ValidityMask &rvalidity, SelectionVector *true_sel, SelectionVector *false_sel) { if (true_sel && false_sel) { return SelectGenericLoop( ldata, rdata, lsel, rsel, result_sel, count, lvalidity, rvalidity, true_sel, false_sel); } else if (true_sel) { return SelectGenericLoop( ldata, rdata, lsel, rsel, result_sel, count, lvalidity, rvalidity, true_sel, false_sel); } else { D_ASSERT(false_sel); return SelectGenericLoop( ldata, rdata, lsel, rsel, result_sel, count, lvalidity, rvalidity, true_sel, false_sel); } } #endif template static inline idx_t SelectGenericLoopSwitch(const LEFT_TYPE *__restrict ldata, const RIGHT_TYPE *__restrict rdata, const SelectionVector *__restrict lsel, const SelectionVector *__restrict rsel, const SelectionVector *__restrict result_sel, idx_t count, ValidityMask &lvalidity, ValidityMask &rvalidity, SelectionVector *true_sel, SelectionVector *false_sel) { #ifndef DUCKDB_SMALLER_BINARY if (!lvalidity.AllValid() || !rvalidity.AllValid()) { return SelectGenericLoopSelSwitch( ldata, rdata, lsel, rsel, result_sel, count, lvalidity, rvalidity, true_sel, false_sel); } else { return SelectGenericLoopSelSwitch( ldata, rdata, lsel, rsel, result_sel, count, lvalidity, rvalidity, true_sel, false_sel); } #else return SelectGenericLoop(ldata, rdata, lsel, rsel, result_sel, count, lvalidity, rvalidity, true_sel, false_sel); #endif } template static idx_t SelectGeneric(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { UnifiedVectorFormat ldata, rdata; left.ToUnifiedFormat(count, ldata); right.ToUnifiedFormat(count, rdata); return SelectGenericLoopSwitch( UnifiedVectorFormat::GetData(ldata), UnifiedVectorFormat::GetData(rdata), ldata.sel, rdata.sel, sel, count, ldata.validity, rdata.validity, true_sel, false_sel); } template static idx_t Select(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { if (!sel) { sel = FlatVector::IncrementalSelectionVector(); } if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() == VectorType::CONSTANT_VECTOR) { return SelectConstant(left, right, sel, count, true_sel, false_sel); #ifndef DUCKDB_SMALLER_BINARY } else if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() == VectorType::FLAT_VECTOR) { return SelectFlat(left, right, sel, count, true_sel, false_sel); } else if (left.GetVectorType() == VectorType::FLAT_VECTOR && right.GetVectorType() == VectorType::CONSTANT_VECTOR) { return SelectFlat(left, right, sel, count, true_sel, false_sel); } else if (left.GetVectorType() == VectorType::FLAT_VECTOR && right.GetVectorType() == VectorType::FLAT_VECTOR) { return SelectFlat(left, right, sel, count, true_sel, false_sel); #endif } else { return SelectGeneric(left, right, sel, count, true_sel, false_sel); } } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/vector_operations/ternary_executor.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { template struct TernaryStandardOperatorWrapper { template static inline RESULT_TYPE Operation(FUN fun, A_TYPE a, B_TYPE b, C_TYPE c, ValidityMask &mask, idx_t idx) { return OP::template Operation(a, b, c); } }; struct TernaryLambdaWrapper { template static inline RESULT_TYPE Operation(FUN fun, A_TYPE a, B_TYPE b, C_TYPE c, ValidityMask &mask, idx_t idx) { return fun(a, b, c); } }; struct TernaryLambdaWrapperWithNulls { template static inline RESULT_TYPE Operation(FUN fun, A_TYPE a, B_TYPE b, C_TYPE c, ValidityMask &mask, idx_t idx) { return fun(a, b, c, mask, idx); } }; struct TernaryExecutor { private: template static inline void ExecuteLoop(const A_TYPE *__restrict adata, const B_TYPE *__restrict bdata, const C_TYPE *__restrict cdata, RESULT_TYPE *__restrict result_data, idx_t count, const SelectionVector &asel, const SelectionVector &bsel, const SelectionVector &csel, ValidityMask &avalidity, ValidityMask &bvalidity, ValidityMask &cvalidity, ValidityMask &result_validity, FUN fun) { if (!avalidity.AllValid() || !bvalidity.AllValid() || !cvalidity.AllValid()) { for (idx_t i = 0; i < count; i++) { auto aidx = asel.get_index(i); auto bidx = bsel.get_index(i); auto cidx = csel.get_index(i); if (avalidity.RowIsValid(aidx) && bvalidity.RowIsValid(bidx) && cvalidity.RowIsValid(cidx)) { result_data[i] = OPWRAPPER::template Operation( fun, adata[aidx], bdata[bidx], cdata[cidx], result_validity, i); } else { result_validity.SetInvalid(i); } } } else { for (idx_t i = 0; i < count; i++) { auto aidx = asel.get_index(i); auto bidx = bsel.get_index(i); auto cidx = csel.get_index(i); result_data[i] = OPWRAPPER::template Operation( fun, adata[aidx], bdata[bidx], cdata[cidx], result_validity, i); } } } public: template static void ExecuteGeneric(Vector &a, Vector &b, Vector &c, Vector &result, idx_t count, FUN fun) { if (a.GetVectorType() == VectorType::CONSTANT_VECTOR && b.GetVectorType() == VectorType::CONSTANT_VECTOR && c.GetVectorType() == VectorType::CONSTANT_VECTOR) { result.SetVectorType(VectorType::CONSTANT_VECTOR); if (ConstantVector::IsNull(a) || ConstantVector::IsNull(b) || ConstantVector::IsNull(c)) { ConstantVector::SetNull(result, true); } else { auto adata = ConstantVector::GetData(a); auto bdata = ConstantVector::GetData(b); auto cdata = ConstantVector::GetData(c); auto result_data = ConstantVector::GetData(result); auto &result_validity = ConstantVector::Validity(result); result_data[0] = OPWRAPPER::template Operation( fun, adata[0], bdata[0], cdata[0], result_validity, 0); } } else { result.SetVectorType(VectorType::FLAT_VECTOR); UnifiedVectorFormat adata, bdata, cdata; a.ToUnifiedFormat(count, adata); b.ToUnifiedFormat(count, bdata); c.ToUnifiedFormat(count, cdata); ExecuteLoop( UnifiedVectorFormat::GetData(adata), UnifiedVectorFormat::GetData(bdata), UnifiedVectorFormat::GetData(cdata), FlatVector::GetData(result), count, *adata.sel, *bdata.sel, *cdata.sel, adata.validity, bdata.validity, cdata.validity, FlatVector::Validity(result), fun); } } template > static void Execute(Vector &a, Vector &b, Vector &c, Vector &result, idx_t count, FUN fun) { ExecuteGeneric(a, b, c, result, count, fun); } template static void ExecuteStandard(Vector &a, Vector &b, Vector &c, Vector &result, idx_t count) { ExecuteGeneric, bool>(a, b, c, result, count, false); } template > static void ExecuteWithNulls(Vector &a, Vector &b, Vector &c, Vector &result, idx_t count, FUN fun) { ExecuteGeneric(a, b, c, result, count, fun); } private: template static inline idx_t SelectLoop(const A_TYPE *__restrict adata, const B_TYPE *__restrict bdata, const C_TYPE *__restrict cdata, const SelectionVector *result_sel, idx_t count, const SelectionVector &asel, const SelectionVector &bsel, const SelectionVector &csel, ValidityMask &avalidity, ValidityMask &bvalidity, ValidityMask &cvalidity, SelectionVector *true_sel, SelectionVector *false_sel) { idx_t true_count = 0, false_count = 0; for (idx_t i = 0; i < count; i++) { auto result_idx = result_sel->get_index(i); auto aidx = asel.get_index(i); auto bidx = bsel.get_index(i); auto cidx = csel.get_index(i); bool comparison_result = (NO_NULL || (avalidity.RowIsValid(aidx) && bvalidity.RowIsValid(bidx) && cvalidity.RowIsValid(cidx))) && OP::Operation(adata[aidx], bdata[bidx], cdata[cidx]); if (HAS_TRUE_SEL) { true_sel->set_index(true_count, result_idx); true_count += comparison_result; } if (HAS_FALSE_SEL) { false_sel->set_index(false_count, result_idx); false_count += !comparison_result; } } if (HAS_TRUE_SEL) { return true_count; } else { return count - false_count; } } template static inline idx_t SelectLoopSelSwitch(UnifiedVectorFormat &adata, UnifiedVectorFormat &bdata, UnifiedVectorFormat &cdata, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { if (true_sel && false_sel) { return SelectLoop( UnifiedVectorFormat::GetData(adata), UnifiedVectorFormat::GetData(bdata), UnifiedVectorFormat::GetData(cdata), sel, count, *adata.sel, *bdata.sel, *cdata.sel, adata.validity, bdata.validity, cdata.validity, true_sel, false_sel); } else if (true_sel) { return SelectLoop( UnifiedVectorFormat::GetData(adata), UnifiedVectorFormat::GetData(bdata), UnifiedVectorFormat::GetData(cdata), sel, count, *adata.sel, *bdata.sel, *cdata.sel, adata.validity, bdata.validity, cdata.validity, true_sel, false_sel); } else { D_ASSERT(false_sel); return SelectLoop( UnifiedVectorFormat::GetData(adata), UnifiedVectorFormat::GetData(bdata), UnifiedVectorFormat::GetData(cdata), sel, count, *adata.sel, *bdata.sel, *cdata.sel, adata.validity, bdata.validity, cdata.validity, true_sel, false_sel); } } template static inline idx_t SelectLoopSwitch(UnifiedVectorFormat &adata, UnifiedVectorFormat &bdata, UnifiedVectorFormat &cdata, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { if (!adata.validity.AllValid() || !bdata.validity.AllValid() || !cdata.validity.AllValid()) { return SelectLoopSelSwitch(adata, bdata, cdata, sel, count, true_sel, false_sel); } else { return SelectLoopSelSwitch(adata, bdata, cdata, sel, count, true_sel, false_sel); } } public: template static idx_t Select(Vector &a, Vector &b, Vector &c, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { if (!sel) { sel = FlatVector::IncrementalSelectionVector(); } UnifiedVectorFormat adata, bdata, cdata; a.ToUnifiedFormat(count, adata); b.ToUnifiedFormat(count, bdata); c.ToUnifiedFormat(count, cdata); return SelectLoopSwitch(adata, bdata, cdata, sel, count, true_sel, false_sel); } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/vector_operations/unary_executor.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/function_errors.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Whether or not a function can throw an error or not enum class FunctionErrors : uint8_t { CANNOT_ERROR = 0, CAN_THROW_RUNTIME_ERROR = 1 }; } // namespace duckdb #include namespace duckdb { struct UnaryOperatorWrapper { template static inline RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { return OP::template Operation(input); } }; struct UnaryLambdaWrapper { template static inline RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { auto fun = (FUNC *)dataptr; return (*fun)(input); } }; struct GenericUnaryWrapper { template static inline RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { return OP::template Operation(input, mask, idx, dataptr); } }; struct UnaryLambdaWrapperWithNulls { template static inline RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { auto fun = (FUNC *)dataptr; return (*fun)(input, mask, idx); } }; template struct UnaryStringOperator { template static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { auto vector = reinterpret_cast(dataptr); return OP::template Operation(input, *vector); } }; struct UnaryExecutor { private: template static inline void ExecuteLoop(const INPUT_TYPE *__restrict ldata, RESULT_TYPE *__restrict result_data, idx_t count, const SelectionVector *__restrict sel_vector, ValidityMask &mask, ValidityMask &result_mask, void *dataptr, bool adds_nulls) { #ifdef DEBUG // ldata may point to a compressed dictionary buffer which can be smaller than ldata + count idx_t max_index = 0; for (idx_t i = 0; i < count; i++) { auto idx = sel_vector->get_index(i); max_index = MaxValue(max_index, idx); } ASSERT_RESTRICT(ldata, ldata + max_index, result_data, result_data + count); #endif if (!mask.AllValid()) { for (idx_t i = 0; i < count; i++) { auto idx = sel_vector->get_index(i); if (mask.RowIsValidUnsafe(idx)) { result_data[i] = OPWRAPPER::template Operation(ldata[idx], result_mask, i, dataptr); } else { result_mask.SetInvalid(i); } } } else { for (idx_t i = 0; i < count; i++) { auto idx = sel_vector->get_index(i); result_data[i] = OPWRAPPER::template Operation(ldata[idx], result_mask, i, dataptr); } } } #ifndef DUCKDB_SMALLER_BINARY template static inline void ExecuteFlat(const INPUT_TYPE *__restrict ldata, RESULT_TYPE *__restrict result_data, idx_t count, ValidityMask &mask, ValidityMask &result_mask, void *dataptr, bool adds_nulls) { ASSERT_RESTRICT(ldata, ldata + count, result_data, result_data + count); if (!mask.AllValid()) { if (!adds_nulls) { result_mask.Initialize(mask); } else { result_mask.Copy(mask, count); } idx_t base_idx = 0; auto entry_count = ValidityMask::EntryCount(count); for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { auto validity_entry = mask.GetValidityEntry(entry_idx); idx_t next = MinValue(base_idx + ValidityMask::BITS_PER_VALUE, count); if (ValidityMask::AllValid(validity_entry)) { // all valid: perform operation for (; base_idx < next; base_idx++) { result_data[base_idx] = OPWRAPPER::template Operation( ldata[base_idx], result_mask, base_idx, dataptr); } } else if (ValidityMask::NoneValid(validity_entry)) { // nothing valid: skip all base_idx = next; continue; } else { // partially valid: need to check individual elements for validity idx_t start = base_idx; for (; base_idx < next; base_idx++) { if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) { D_ASSERT(mask.RowIsValid(base_idx)); result_data[base_idx] = OPWRAPPER::template Operation( ldata[base_idx], result_mask, base_idx, dataptr); } } } } } else { for (idx_t i = 0; i < count; i++) { result_data[i] = OPWRAPPER::template Operation(ldata[i], result_mask, i, dataptr); } } } #endif template static inline void ExecuteStandard(Vector &input, Vector &result, idx_t count, void *dataptr, bool adds_nulls, FunctionErrors errors = FunctionErrors::CAN_THROW_RUNTIME_ERROR) { switch (input.GetVectorType()) { case VectorType::CONSTANT_VECTOR: { result.SetVectorType(VectorType::CONSTANT_VECTOR); auto result_data = ConstantVector::GetData(result); auto ldata = ConstantVector::GetData(input); if (ConstantVector::IsNull(input)) { ConstantVector::SetNull(result, true); } else { ConstantVector::SetNull(result, false); *result_data = OPWRAPPER::template Operation( *ldata, ConstantVector::Validity(result), 0, dataptr); } break; } #ifndef DUCKDB_SMALLER_BINARY case VectorType::FLAT_VECTOR: { result.SetVectorType(VectorType::FLAT_VECTOR); auto result_data = FlatVector::GetData(result); auto ldata = FlatVector::GetData(input); ExecuteFlat(ldata, result_data, count, FlatVector::Validity(input), FlatVector::Validity(result), dataptr, adds_nulls); break; } case VectorType::DICTIONARY_VECTOR: { // dictionary vector - we can run the function ONLY on the dictionary in some cases // we can only do this if the function does not throw errors // we can execute the function on a value that is in the dictionary but that is not referenced // if the function can throw errors - this will result in us (incorrectly) throwing an error if (errors == FunctionErrors::CANNOT_ERROR) { static constexpr idx_t DICTIONARY_THRESHOLD = 2; auto dict_size = DictionaryVector::DictionarySize(input); if (dict_size.IsValid() && dict_size.GetIndex() * DICTIONARY_THRESHOLD <= count) { // we can operate directly on the dictionary if we have a dictionary size // but this only makes sense if the dictionary size is smaller than the count by some factor auto &dictionary_values = DictionaryVector::Child(input); if (dictionary_values.GetVectorType() == VectorType::FLAT_VECTOR) { // execute the function over the dictionary auto result_data = FlatVector::GetData(result); auto ldata = FlatVector::GetData(dictionary_values); ExecuteFlat( ldata, result_data, dict_size.GetIndex(), FlatVector::Validity(dictionary_values), FlatVector::Validity(result), dataptr, adds_nulls); // slice the result with the original offsets auto &offsets = DictionaryVector::SelVector(input); result.Dictionary(result, dict_size.GetIndex(), offsets, count); break; } } } DUCKDB_EXPLICIT_FALLTHROUGH; } #endif default: { UnifiedVectorFormat vdata; input.ToUnifiedFormat(count, vdata); result.SetVectorType(VectorType::FLAT_VECTOR); auto result_data = FlatVector::GetData(result); auto ldata = UnifiedVectorFormat::GetData(vdata); ExecuteLoop(ldata, result_data, count, vdata.sel, vdata.validity, FlatVector::Validity(result), dataptr, adds_nulls); break; } } } public: template static void Execute(Vector &input, Vector &result, idx_t count) { ExecuteStandard(input, result, count, nullptr, false); } template > static void Execute(Vector &input, Vector &result, idx_t count, FUNC fun, FunctionErrors errors = FunctionErrors::CAN_THROW_RUNTIME_ERROR) { ExecuteStandard( input, result, count, reinterpret_cast(&fun), false, errors); } template static void GenericExecute(Vector &input, Vector &result, idx_t count, void *dataptr, bool adds_nulls = false) { ExecuteStandard(input, result, count, dataptr, adds_nulls); } template > static void ExecuteWithNulls(Vector &input, Vector &result, idx_t count, FUNC fun) { ExecuteStandard(input, result, count, (void *)&fun, true); } template static void ExecuteString(Vector &input, Vector &result, idx_t count) { UnaryExecutor::GenericExecute>(input, result, count, (void *)&result); } private: // Select logic copied from TernaryExecutor, but with a lambda instead of a static functor template , bool NO_NULL, bool HAS_TRUE_SEL, bool HAS_FALSE_SEL> static inline idx_t SelectLoop(const INPUT_TYPE *__restrict input_data, const SelectionVector *result_sel, const idx_t count, FUNC fun, const SelectionVector &input_sel, ValidityMask &input_validity, SelectionVector *true_sel, SelectionVector *false_sel) { idx_t true_count = 0, false_count = 0; for (idx_t i = 0; i < count; i++) { const auto result_idx = result_sel->get_index(i); const auto idx = input_sel.get_index(i); const bool comparison_result = (NO_NULL || input_validity.RowIsValid(idx)) && fun(input_data[idx]); if (HAS_TRUE_SEL) { true_sel->set_index(true_count, result_idx); true_count += comparison_result; } if (HAS_FALSE_SEL) { false_sel->set_index(false_count, result_idx); false_count += !comparison_result; } } if (HAS_TRUE_SEL) { return true_count; } else { return count - false_count; } } template , bool NO_NULL> static inline idx_t SelectLoopSelSwitch(UnifiedVectorFormat &input_data, const SelectionVector *sel, const idx_t count, FUNC fun, SelectionVector *true_sel, SelectionVector *false_sel) { if (true_sel && false_sel) { return SelectLoop( UnifiedVectorFormat::GetData(input_data), sel, count, fun, *input_data.sel, input_data.validity, true_sel, false_sel); } else if (true_sel) { return SelectLoop( UnifiedVectorFormat::GetData(input_data), sel, count, fun, *input_data.sel, input_data.validity, true_sel, false_sel); } else { D_ASSERT(false_sel); return SelectLoop( UnifiedVectorFormat::GetData(input_data), sel, count, fun, *input_data.sel, input_data.validity, true_sel, false_sel); } } template > static inline idx_t SelectLoopSwitch(UnifiedVectorFormat &input_data, const SelectionVector *sel, const idx_t count, FUNC fun, SelectionVector *true_sel, SelectionVector *false_sel) { if (!input_data.validity.AllValid()) { return SelectLoopSelSwitch(input_data, sel, count, fun, true_sel, false_sel); } else { return SelectLoopSelSwitch(input_data, sel, count, fun, true_sel, false_sel); } } public: template > static idx_t Select(Vector &input, const SelectionVector *sel, const idx_t count, FUNC fun, SelectionVector *true_sel, SelectionVector *false_sel) { if (!sel) { sel = FlatVector::IncrementalSelectionVector(); } UnifiedVectorFormat input_data; input.ToUnifiedFormat(count, input_data); return SelectLoopSwitch(input_data, sel, count, fun, true_sel, false_sel); } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/expression_executor_state.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/function.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/named_parameter_map.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { using named_parameter_type_map_t = case_insensitive_map_t; using named_parameter_map_t = case_insensitive_map_t; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/external_dependencies.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { class DependencyItem { public: virtual ~DependencyItem() {}; public: template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; using dependency_scan_t = std::function item)>; class ExternalDependency { public: explicit ExternalDependency() { } ~ExternalDependency() { } public: void AddDependency(const string &name, shared_ptr item) { objects[name] = std::move(item); } shared_ptr GetDependency(const string &name) const { auto it = objects.find(name); if (it == objects.end()) { return nullptr; } return it->second; } void ScanDependencies(const dependency_scan_t &callback) { for (auto &kv : objects) { callback(kv.first, kv.second); } } private: //! The objects encompassed by this dependency case_insensitive_map_t> objects; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/column_definition.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_expression.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/base_expression.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/expression_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Predicate Expression Operation Types //===--------------------------------------------------------------------===// enum class ExpressionType : uint8_t { INVALID = 0, // explicitly cast left as right (right is integer in ValueType enum) OPERATOR_CAST = 12, // logical not operator OPERATOR_NOT = 13, // is null operator OPERATOR_IS_NULL = 14, // is not null operator OPERATOR_IS_NOT_NULL = 15, // unpack operator OPERATOR_UNPACK = 16, // ----------------------------- // Comparison Operators // ----------------------------- // equal operator between left and right COMPARE_EQUAL = 25, // compare initial boundary COMPARE_BOUNDARY_START = COMPARE_EQUAL, // inequal operator between left and right COMPARE_NOTEQUAL = 26, // less than operator between left and right COMPARE_LESSTHAN = 27, // greater than operator between left and right COMPARE_GREATERTHAN = 28, // less than equal operator between left and right COMPARE_LESSTHANOREQUALTO = 29, // greater than equal operator between left and right COMPARE_GREATERTHANOREQUALTO = 30, // IN operator [left IN (right1, right2, ...)] COMPARE_IN = 35, // NOT IN operator [left NOT IN (right1, right2, ...)] COMPARE_NOT_IN = 36, // IS DISTINCT FROM operator COMPARE_DISTINCT_FROM = 37, COMPARE_BETWEEN = 38, COMPARE_NOT_BETWEEN = 39, // IS NOT DISTINCT FROM operator COMPARE_NOT_DISTINCT_FROM = 40, // compare final boundary COMPARE_BOUNDARY_END = COMPARE_NOT_DISTINCT_FROM, // ----------------------------- // Conjunction Operators // ----------------------------- CONJUNCTION_AND = 50, CONJUNCTION_OR = 51, // ----------------------------- // Values // ----------------------------- VALUE_CONSTANT = 75, VALUE_PARAMETER = 76, VALUE_TUPLE = 77, VALUE_TUPLE_ADDRESS = 78, VALUE_NULL = 79, VALUE_VECTOR = 80, VALUE_SCALAR = 81, VALUE_DEFAULT = 82, // ----------------------------- // Aggregates // ----------------------------- AGGREGATE = 100, BOUND_AGGREGATE = 101, GROUPING_FUNCTION = 102, // ----------------------------- // Window Functions // ----------------------------- WINDOW_AGGREGATE = 110, WINDOW_RANK = 120, WINDOW_RANK_DENSE = 121, WINDOW_NTILE = 122, WINDOW_PERCENT_RANK = 123, WINDOW_CUME_DIST = 124, WINDOW_ROW_NUMBER = 125, WINDOW_FIRST_VALUE = 130, WINDOW_LAST_VALUE = 131, WINDOW_LEAD = 132, WINDOW_LAG = 133, WINDOW_NTH_VALUE = 134, WINDOW_FILL = 135, // ----------------------------- // Functions // ----------------------------- FUNCTION = 140, BOUND_FUNCTION = 141, // ----------------------------- // Operators // ----------------------------- CASE_EXPR = 150, OPERATOR_NULLIF = 151, OPERATOR_COALESCE = 152, ARRAY_EXTRACT = 153, ARRAY_SLICE = 154, STRUCT_EXTRACT = 155, ARRAY_CONSTRUCTOR = 156, ARROW = 157, OPERATOR_TRY = 158, // ----------------------------- // Subquery IN/EXISTS // ----------------------------- SUBQUERY = 175, // ----------------------------- // Parser // ----------------------------- STAR = 200, TABLE_STAR = 201, PLACEHOLDER = 202, COLUMN_REF = 203, FUNCTION_REF = 204, TABLE_REF = 205, LAMBDA_REF = 206, // ----------------------------- // Miscellaneous // ----------------------------- CAST = 225, BOUND_REF = 227, BOUND_COLUMN_REF = 228, BOUND_UNNEST = 229, COLLATE = 230, LAMBDA = 231, POSITIONAL_REFERENCE = 232, BOUND_LAMBDA_REF = 233, BOUND_EXPANDED = 234 }; //===--------------------------------------------------------------------===// // Expression Class //===--------------------------------------------------------------------===// enum class ExpressionClass : uint8_t { INVALID = 0, //===--------------------------------------------------------------------===// // Parsed Expressions //===--------------------------------------------------------------------===// AGGREGATE = 1, CASE = 2, CAST = 3, COLUMN_REF = 4, COMPARISON = 5, CONJUNCTION = 6, CONSTANT = 7, DEFAULT = 8, FUNCTION = 9, OPERATOR = 10, STAR = 11, SUBQUERY = 13, WINDOW = 14, PARAMETER = 15, COLLATE = 16, LAMBDA = 17, POSITIONAL_REFERENCE = 18, BETWEEN = 19, LAMBDA_REF = 20, //===--------------------------------------------------------------------===// // Bound Expressions //===--------------------------------------------------------------------===// BOUND_AGGREGATE = 25, BOUND_CASE = 26, BOUND_CAST = 27, BOUND_COLUMN_REF = 28, BOUND_COMPARISON = 29, BOUND_CONJUNCTION = 30, BOUND_CONSTANT = 31, BOUND_DEFAULT = 32, BOUND_FUNCTION = 33, BOUND_OPERATOR = 34, BOUND_PARAMETER = 35, BOUND_REF = 36, BOUND_SUBQUERY = 37, BOUND_WINDOW = 38, BOUND_BETWEEN = 39, BOUND_UNNEST = 40, BOUND_LAMBDA = 41, BOUND_LAMBDA_REF = 42, //===--------------------------------------------------------------------===// // Miscellaneous //===--------------------------------------------------------------------===// BOUND_EXPRESSION = 50, BOUND_EXPANDED = 51 }; DUCKDB_API string ExpressionTypeToString(ExpressionType type); string ExpressionTypeToOperator(ExpressionType type); // Operator String to ExpressionType (e.g. + => OPERATOR_ADD) ExpressionType OperatorToExpressionType(const string &op); //! Negate a comparison expression, turning e.g. = into !=, or < into >= ExpressionType NegateComparisonExpression(ExpressionType type); //! Flip a comparison expression, turning e.g. < into >, or = into = ExpressionType FlipComparisonExpression(ExpressionType type); DUCKDB_API string ExpressionClassToString(ExpressionClass type); } // namespace duckdb namespace duckdb { //! The BaseExpression class is a base class that can represent any expression //! part of a SQL statement. class BaseExpression { public: //! Create an Expression BaseExpression(ExpressionType type, ExpressionClass expression_class) : type(type), expression_class(expression_class) { } virtual ~BaseExpression() { } //! Returns the class of the expression ExpressionClass GetExpressionClass() const { return expression_class; } //! Returns the type of the expression ExpressionType GetExpressionType() const { return type; } //! Sets the type of the expression unsafely. In general expressions are immutable and should not be changed after //! creation. Only use this if you know what you are doing. void SetExpressionTypeUnsafe(ExpressionType new_type) { type = new_type; } //! Returns the location in the query (if any) optional_idx GetQueryLocation() const { return query_location; } //! Sets the location in the query void SetQueryLocation(optional_idx location) { query_location = location; } //! Returns true if the expression has a non-empty alias bool HasAlias() const { return !alias.empty(); } //! Returns the alias of the expression const string &GetAlias() const { return alias; } //! Sets the alias of the expression void SetAlias(const string &alias_p) { alias = alias_p; } //! Sets the alias of the expression void SetAlias(string &&alias_p) { alias = std::move(alias_p); } //! Clears the alias of the expression void ClearAlias() { alias.clear(); } // TODO: Make the following protected // protected: //! Type of the expression ExpressionType type; //! The expression class of the node ExpressionClass expression_class; //! The alias of the expression, string alias; //! The location in the query (if any) optional_idx query_location; public: //! Returns true if this expression is an aggregate or not. /*! Examples: (1) SUM(a) + 1 -- True (2) a + 1 -- False */ virtual bool IsAggregate() const = 0; //! Returns true if the expression has a window function or not virtual bool IsWindow() const = 0; //! Returns true if the query contains a subquery virtual bool HasSubquery() const = 0; //! Returns true if expression does not contain a group ref or col ref or parameter virtual bool IsScalar() const = 0; //! Returns true if the expression has a parameter virtual bool HasParameter() const = 0; //! Get the name of the expression virtual string GetName() const; //! Convert the Expression to a String virtual string ToString() const = 0; //! Print the expression to stdout void Print() const; //! Creates a hash value of this expression. It is important that if two expressions are identical (i.e. //! Expression::Equals() returns true), that their hash value is identical as well. virtual hash_t Hash() const = 0; //! Returns true if this expression is equal to another expression virtual bool Equals(const BaseExpression &other) const; static bool Equals(const BaseExpression &left, const BaseExpression &right) { return left.Equals(right); } bool operator==(const BaseExpression &rhs) const { return Equals(rhs); } virtual void Verify() const; public: template TARGET &Cast() { if (expression_class != TARGET::TYPE) { throw InternalException("Failed to cast expression to type - expression type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (expression_class != TARGET::TYPE) { throw InternalException("Failed to cast expression to type - expression type mismatch"); } return reinterpret_cast(*this); } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/qualified_name.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/exception/parser_exception.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ParserException : public Exception { public: DUCKDB_API explicit ParserException(const string &msg); DUCKDB_API explicit ParserException(const string &msg, const unordered_map &extra_info); template explicit ParserException(const string &msg, ARGS... params) : ParserException(ConstructMessage(msg, params...)) { } template explicit ParserException(optional_idx error_location, const string &msg, ARGS... params) : ParserException(ConstructMessage(msg, params...), Exception::InitializeExtraInfo(error_location)) { } template explicit ParserException(const ParsedExpression &expr, const string &msg, ARGS... params) : ParserException(ConstructMessage(msg, params...), Exception::InitializeExtraInfo(expr)) { } static ParserException SyntaxError(const string &query, const string &error_message, optional_idx error_location); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/keyword_helper.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/simplified_token.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Simplified tokens are a simplified (dense) representation of the lexer //! Used for simple syntax highlighting in the tests enum class SimplifiedTokenType : uint8_t { SIMPLIFIED_TOKEN_IDENTIFIER, SIMPLIFIED_TOKEN_NUMERIC_CONSTANT, SIMPLIFIED_TOKEN_STRING_CONSTANT, SIMPLIFIED_TOKEN_OPERATOR, SIMPLIFIED_TOKEN_KEYWORD, SIMPLIFIED_TOKEN_COMMENT, SIMPLIFIED_TOKEN_ERROR }; struct SimplifiedToken { SimplifiedTokenType type; idx_t start; }; enum class KeywordCategory : uint8_t { KEYWORD_RESERVED, KEYWORD_UNRESERVED, KEYWORD_TYPE_FUNC, KEYWORD_COL_NAME, KEYWORD_NONE }; struct ParserKeyword { string name; KeywordCategory category; }; } // namespace duckdb namespace duckdb { class KeywordHelper { public: //! Returns true if the given text matches a keyword of the parser static bool IsKeyword(const string &text); static KeywordCategory KeywordCategoryType(const string &text); static string EscapeQuotes(const string &text, char quote = '"'); //! Returns true if the given string needs to be quoted when written as an identifier static bool RequiresQuotes(const string &text, bool allow_caps = true); //! Writes a string that is quoted static string WriteQuoted(const string &text, char quote = '\''); //! Writes a string that is optionally quoted + escaped so it can be used as an identifier static string WriteOptionallyQuoted(const string &text, char quote = '"', bool allow_caps = true); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/binding_alias.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class StandardEntry; struct BindingAlias { BindingAlias(); explicit BindingAlias(string alias); BindingAlias(string schema, string alias); BindingAlias(string catalog, string schema, string alias); explicit BindingAlias(const StandardEntry &entry); bool IsSet() const; const string &GetAlias() const; const string &GetCatalog() const { return catalog; } const string &GetSchema() const { return schema; } bool Matches(const BindingAlias &other) const; bool operator==(const BindingAlias &other) const; string ToString() const; private: string catalog; string schema; string alias; }; } // namespace duckdb namespace duckdb { struct QualifiedName { string catalog; string schema; string name; //! Parse the (optional) schema and a name from a string in the format of e.g. "schema"."table"; if there is no dot //! the schema will be set to INVALID_SCHEMA static QualifiedName Parse(const string &input); static vector ParseComponents(const string &input); string ToString() const; }; struct QualifiedColumnName { QualifiedColumnName(); QualifiedColumnName(string column_p); // NOLINT: allow implicit conversion from string to column name QualifiedColumnName(string table_p, string column_p); QualifiedColumnName(const BindingAlias &alias, string column_p); string catalog; string schema; string table; string column; static QualifiedColumnName Parse(string &input); string ToString() const; void Serialize(Serializer &serializer) const; static QualifiedColumnName Deserialize(Deserializer &deserializer); bool IsQualified() const; bool operator==(const QualifiedColumnName &rhs) const; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/expression_util.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ParsedExpression; class Expression; class ExpressionUtil { public: //! ListEquals: check if a list of two expressions is equal (order is important) static bool ListEquals(const vector> &a, const vector> &b); static bool ListEquals(const vector> &a, const vector> &b); //! SetEquals: check if two sets of expressions are equal (order is not important) static bool SetEquals(const vector> &a, const vector> &b); static bool SetEquals(const vector> &a, const vector> &b); private: template static bool ExpressionListEquals(const vector> &a, const vector> &b); template static bool ExpressionSetEquals(const vector> &a, const vector> &b); }; } // namespace duckdb namespace duckdb { class Deserializer; class Serializer; //! The ParsedExpression class is a base class that can represent any expression //! part of a SQL statement. /*! The ParsedExpression class is a base class that can represent any expression part of a SQL statement. This is, for example, a column reference in a SELECT clause, but also operators, aggregates or filters. The Expression is emitted by the parser and does not contain any information about bindings to the catalog or to the types. ParsedExpressions are transformed into regular Expressions in the Binder. */ class ParsedExpression : public BaseExpression { public: //! Create an Expression ParsedExpression(ExpressionType type, ExpressionClass expression_class) : BaseExpression(type, expression_class) { } public: bool IsAggregate() const override; bool IsWindow() const override; bool HasSubquery() const override; bool IsScalar() const override; bool HasParameter() const override; bool Equals(const BaseExpression &other) const override; hash_t Hash() const override; //! Create a copy of this expression virtual unique_ptr Copy() const = 0; virtual void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); static bool Equals(const unique_ptr &left, const unique_ptr &right); static bool ListEquals(const vector> &left, const vector> &right); protected: //! Copy base Expression properties from another expression to this one, //! used in Copy method void CopyProperties(const ParsedExpression &other) { type = other.type; expression_class = other.expression_class; alias = other.alias; query_location = other.query_location; } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/compression_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class StorageManager; enum class CompressionType : uint8_t { COMPRESSION_AUTO = 0, COMPRESSION_UNCOMPRESSED = 1, COMPRESSION_CONSTANT = 2, // internal only COMPRESSION_RLE = 3, COMPRESSION_DICTIONARY = 4, COMPRESSION_PFOR_DELTA = 5, COMPRESSION_BITPACKING = 6, COMPRESSION_FSST = 7, COMPRESSION_CHIMP = 8, COMPRESSION_PATAS = 9, COMPRESSION_ALP = 10, COMPRESSION_ALPRD = 11, COMPRESSION_ZSTD = 12, COMPRESSION_ROARING = 13, COMPRESSION_EMPTY = 14, // internal only COMPRESSION_DICT_FSST = 15, COMPRESSION_COUNT // This has to stay the last entry of the type! }; struct CompressionAvailabilityResult { private: enum class UnavailableReason : uint8_t { AVAILABLE, //! Introduced later, not available to this version NOT_AVAILABLE_YET, //! Used to be available, but isnt anymore DEPRECATED }; public: CompressionAvailabilityResult() = default; static CompressionAvailabilityResult Deprecated() { return CompressionAvailabilityResult(UnavailableReason::DEPRECATED); } static CompressionAvailabilityResult NotAvailableYet() { return CompressionAvailabilityResult(UnavailableReason::NOT_AVAILABLE_YET); } public: bool IsAvailable() const { return reason == UnavailableReason::AVAILABLE; } bool IsDeprecated() { D_ASSERT(!IsAvailable()); return reason == UnavailableReason::DEPRECATED; } bool IsNotAvailableYet() { D_ASSERT(!IsAvailable()); return reason == UnavailableReason::NOT_AVAILABLE_YET; } private: explicit CompressionAvailabilityResult(UnavailableReason reason) : reason(reason) { } public: UnavailableReason reason = UnavailableReason::AVAILABLE; }; CompressionAvailabilityResult CompressionTypeIsAvailable(CompressionType compression_type, optional_ptr storage_manager = nullptr); vector ListCompressionTypes(void); CompressionType CompressionTypeFromString(const string &str); string CompressionTypeToString(CompressionType type); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/table_column_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class TableColumnType : uint8_t { STANDARD = 0, GENERATED = 1 }; } // namespace duckdb namespace duckdb { struct RenameColumnInfo; struct RenameTableInfo; class ColumnDefinition; //! A column of a table. class ColumnDefinition { public: DUCKDB_API ColumnDefinition(string name, LogicalType type); DUCKDB_API ColumnDefinition(string name, LogicalType type, unique_ptr expression, TableColumnType category); public: //! default_value const ParsedExpression &DefaultValue() const; bool HasDefaultValue() const; void SetDefaultValue(unique_ptr default_value); //! type DUCKDB_API const LogicalType &Type() const; LogicalType &TypeMutable(); void SetType(const LogicalType &type); //! name DUCKDB_API const string &Name() const; void SetName(const string &name); //! comment DUCKDB_API const Value &Comment() const; void SetComment(const Value &comment); //! compression_type const duckdb::CompressionType &CompressionType() const; void SetCompressionType(duckdb::CompressionType compression_type); //! storage_oid const storage_t &StorageOid() const; void SetStorageOid(storage_t storage_oid); LogicalIndex Logical() const; PhysicalIndex Physical() const; //! oid const column_t &Oid() const; void SetOid(column_t oid); //! category const TableColumnType &Category() const; //! Whether this column is a Generated Column bool Generated() const; DUCKDB_API ColumnDefinition Copy() const; DUCKDB_API void Serialize(Serializer &serializer) const; DUCKDB_API static ColumnDefinition Deserialize(Deserializer &deserializer); //===--------------------------------------------------------------------===// // Generated Columns (VIRTUAL) //===--------------------------------------------------------------------===// ParsedExpression &GeneratedExpressionMutable(); const ParsedExpression &GeneratedExpression() const; void SetGeneratedExpression(unique_ptr expression); void ChangeGeneratedExpressionType(const LogicalType &type); void GetListOfDependencies(vector &dependencies) const; string GetName() const; LogicalType GetType() const; private: //! The name of the entry string name; //! The type of the column LogicalType type; //! Compression Type used for this column duckdb::CompressionType compression_type = duckdb::CompressionType::COMPRESSION_AUTO; //! The index of the column in the storage of the table storage_t storage_oid = DConstants::INVALID_INDEX; //! The index of the column in the table idx_t oid = DConstants::INVALID_INDEX; //! The category of the column TableColumnType category = TableColumnType::STANDARD; //! The default value of the column (for non-generated columns) //! The generated column expression (for generated columns) unique_ptr expression; //! Comment on this column Value comment; //! Tags on this column unordered_map tags; }; } // namespace duckdb namespace duckdb { class CatalogEntry; class Catalog; class ClientContext; class Expression; class ExpressionExecutor; class Transaction; class AggregateFunction; class AggregateFunctionSet; class CopyFunction; class PragmaFunction; class PragmaFunctionSet; class ScalarFunctionSet; class ScalarFunction; class TableFunctionSet; class TableFunction; class SimpleFunction; struct PragmaInfo; //! The default null handling is NULL in, NULL out enum class FunctionNullHandling : uint8_t { DEFAULT_NULL_HANDLING = 0, SPECIAL_HANDLING = 1 }; //! The stability of the function, used by the optimizer //! CONSISTENT -> this function always returns the same result when given the same input, no variance //! CONSISTENT_WITHIN_QUERY -> this function returns the same result WITHIN the same query/transaction //! but the result might change across queries (e.g. NOW(), CURRENT_TIME) //! VOLATILE -> the result of this function might change per row (e.g. RANDOM()) enum class FunctionStability : uint8_t { CONSISTENT = 0, VOLATILE = 1, CONSISTENT_WITHIN_QUERY = 2 }; //! How to handle collations //! PROPAGATE_COLLATIONS -> this function combines collation from its inputs and emits them again (default) //! PUSH_COMBINABLE_COLLATIONS -> combinable collations are executed for the input arguments //! IGNORE_COLLATIONS -> collations are completely ignored by the function enum class FunctionCollationHandling : uint8_t { PROPAGATE_COLLATIONS = 0, PUSH_COMBINABLE_COLLATIONS = 1, IGNORE_COLLATIONS = 2 }; struct FunctionData { DUCKDB_API virtual ~FunctionData(); DUCKDB_API virtual unique_ptr Copy() const = 0; DUCKDB_API virtual bool Equals(const FunctionData &other) const = 0; DUCKDB_API static bool Equals(const FunctionData *left, const FunctionData *right); DUCKDB_API virtual bool SupportStatementCache() const; template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } // FIXME: this function should be removed in the future template TARGET &CastNoConst() const { return const_cast(Cast()); // NOLINT: FIXME } }; struct TableFunctionData : public FunctionData { // used to pass on projections to table functions that support them. NB, can contain COLUMN_IDENTIFIER_ROW_ID vector column_ids; DUCKDB_API ~TableFunctionData() override; DUCKDB_API unique_ptr Copy() const override; DUCKDB_API bool Equals(const FunctionData &other) const override; }; struct FunctionParameters { vector values; named_parameter_map_t named_parameters; }; //! Function is the base class used for any type of function (scalar, aggregate or simple function) class Function { public: DUCKDB_API explicit Function(string name); DUCKDB_API virtual ~Function(); //! The name of the function string name; //! Additional Information to specify function from it's name string extra_info; // Optional catalog name of the function string catalog_name; // Optional schema name of the function string schema_name; public: //! Returns the formatted string name(arg1, arg2, ...) DUCKDB_API static string CallToString(const string &catalog_name, const string &schema_name, const string &name, const vector &arguments, const LogicalType &varargs = LogicalType::INVALID); //! Returns the formatted string name(arg1, arg2..) -> return_type DUCKDB_API static string CallToString(const string &catalog_name, const string &schema_name, const string &name, const vector &arguments, const LogicalType &varargs, const LogicalType &return_type); //! Returns the formatted string name(arg1, arg2.., np1=a, np2=b, ...) DUCKDB_API static string CallToString(const string &catalog_name, const string &schema_name, const string &name, const vector &arguments, const named_parameter_type_map_t &named_parameters); //! Used in the bind to erase an argument from a function DUCKDB_API static void EraseArgument(SimpleFunction &bound_function, vector> &arguments, idx_t argument_index); }; class SimpleFunction : public Function { public: DUCKDB_API SimpleFunction(string name, vector arguments, LogicalType varargs = LogicalType(LogicalTypeId::INVALID)); DUCKDB_API ~SimpleFunction() override; //! The set of arguments of the function vector arguments; //! The set of original arguments of the function - only set if Function::EraseArgument is called //! Used for (de)serialization purposes vector original_arguments; //! The type of varargs to support, or LogicalTypeId::INVALID if the function does not accept variable length //! arguments LogicalType varargs; public: DUCKDB_API virtual string ToString() const; DUCKDB_API bool HasVarArgs() const; }; class SimpleNamedParameterFunction : public SimpleFunction { public: DUCKDB_API SimpleNamedParameterFunction(string name, vector arguments, LogicalType varargs = LogicalType(LogicalTypeId::INVALID)); DUCKDB_API ~SimpleNamedParameterFunction() override; //! The named parameters of the function named_parameter_type_map_t named_parameters; public: DUCKDB_API string ToString() const override; DUCKDB_API bool HasNamedParameters() const; }; class BaseScalarFunction : public SimpleFunction { public: DUCKDB_API BaseScalarFunction(string name, vector arguments, LogicalType return_type, FunctionStability stability, LogicalType varargs = LogicalType(LogicalTypeId::INVALID), FunctionNullHandling null_handling = FunctionNullHandling::DEFAULT_NULL_HANDLING, FunctionErrors errors = FunctionErrors::CANNOT_ERROR); DUCKDB_API ~BaseScalarFunction() override; //! Return type of the function LogicalType return_type; //! The stability of the function (see FunctionStability enum for more info) FunctionStability stability; //! How this function handles NULL values FunctionNullHandling null_handling; //! Whether or not this function can throw an error FunctionErrors errors; //! Collation handling of the function FunctionCollationHandling collation_handling; static BaseScalarFunction SetReturnsError(BaseScalarFunction &function) { function.errors = FunctionErrors::CAN_THROW_RUNTIME_ERROR; return function; } public: DUCKDB_API hash_t Hash() const; DUCKDB_API string ToString() const override; }; } // namespace duckdb namespace duckdb { class Expression; class BoundFunctionExpression; class ExpressionExecutor; struct ExpressionExecutorState; struct FunctionLocalState; struct ExpressionState { ExpressionState(const Expression &expr, ExpressionExecutorState &root); virtual ~ExpressionState() { } const Expression &expr; ExpressionExecutorState &root; vector> child_states; vector types; DataChunk intermediate_chunk; vector initialize; public: void AddChild(Expression &child_expr); void Finalize(); Allocator &GetAllocator(); bool HasContext(); DUCKDB_API ClientContext &GetContext(); void Verify(ExpressionExecutorState &root); public: template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; struct ExecuteFunctionState : public ExpressionState { public: ExecuteFunctionState(const Expression &expr, ExpressionExecutorState &root); ~ExecuteFunctionState() override; public: static optional_ptr GetFunctionState(ExpressionState &state) { return state.Cast().local_state.get(); } bool TryExecuteDictionaryExpression(const BoundFunctionExpression &expr, DataChunk &args, ExpressionState &state, Vector &result); public: unique_ptr local_state; private: //! The column index of the "unary" input column that may be a dictionary vector //! Only valid when the expression is eligible for the dictionary expression optimization //! This is the case when the input is "practically unary", i.e., only one non-const input column optional_idx input_col_idx; //! Storage ID of the input dictionary vector string current_input_dictionary_id; //! Vector holding the expression executed on the entire dictionary unique_ptr output_dictionary; //! ID of the output dictionary_vector string output_dictionary_id; }; struct ExpressionExecutorState { ExpressionExecutorState(); unique_ptr root_state; ExpressionExecutor *executor = nullptr; void Verify(); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/statistics/base_statistics.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/operator/comparison_operators.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/hugeint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! The Hugeint class contains static operations for the INT128 type class Hugeint { public: constexpr static const char *HUGEINT_MINIMUM_STRING = "-170141183460469231731687303715884105728"; //! Convert a hugeint object to a string static string ToString(hugeint_t input); template DUCKDB_API static bool TryCast(hugeint_t input, T &result); template static T Cast(hugeint_t input) { T result = 0; TryCast(input, result); return result; } template static bool TryConvert(T value, hugeint_t &result); template static hugeint_t Convert(T value) { hugeint_t result; if (!TryConvert(value, result)) { // LCOV_EXCL_START throw OutOfRangeException(double(value), GetTypeId(), GetTypeId()); } // LCOV_EXCL_STOP return result; } static bool TryNegate(hugeint_t input, hugeint_t &result); template inline static void NegateInPlace(hugeint_t &input) { if (!TryNegate(input, input)) { throw OutOfRangeException("Negation of HUGEINT is out of range!"); } } template inline static hugeint_t Negate(hugeint_t input) { NegateInPlace(input); return input; } static bool TryMultiply(hugeint_t lhs, hugeint_t rhs, hugeint_t &result); template inline static hugeint_t Multiply(hugeint_t lhs, hugeint_t rhs) { hugeint_t result; if (!TryMultiply(lhs, rhs, result)) { throw OutOfRangeException("Overflow in HUGEINT multiplication: %s * %s", lhs.ToString(), rhs.ToString()); } return result; } static bool TryDivMod(hugeint_t lhs, hugeint_t rhs, hugeint_t &result, hugeint_t &remainder); template inline static hugeint_t Divide(hugeint_t lhs, hugeint_t rhs) { // No division by zero if (rhs == 0) { throw OutOfRangeException("Division of HUGEINT by zero: %s / %s", lhs.ToString(), rhs.ToString()); } // division only has one reason to overflow: MINIMUM / -1 if (lhs == NumericLimits::Minimum() && rhs == -1) { throw OutOfRangeException("Overflow in HUGEINT division: %s / %s", lhs.ToString(), rhs.ToString()); } return Divide(lhs, rhs); } template inline static hugeint_t Modulo(hugeint_t lhs, hugeint_t rhs) { // No division by zero if (rhs == 0) { throw OutOfRangeException("Modulo of HUGEINT by zero: %s %% %s", lhs.ToString(), rhs.ToString()); } // division only has one reason to overflow: MINIMUM / -1 if (lhs == NumericLimits::Minimum() && rhs == -1) { throw OutOfRangeException("Overflow in HUGEINT modulo: %s %% %s", lhs.ToString(), rhs.ToString()); } return Modulo(lhs, rhs); } static bool TryAddInPlace(hugeint_t &lhs, hugeint_t rhs); template inline static hugeint_t Add(hugeint_t lhs, hugeint_t rhs) { if (!TryAddInPlace(lhs, rhs)) { throw OutOfRangeException("Overflow in HUGEINT addition: %s + %s", lhs.ToString(), rhs.ToString()); } return lhs; } static bool TrySubtractInPlace(hugeint_t &lhs, hugeint_t rhs); template inline static hugeint_t Subtract(hugeint_t lhs, hugeint_t rhs) { if (!TrySubtractInPlace(lhs, rhs)) { throw OutOfRangeException("Underflow in HUGEINT subtraction: %s - %s", lhs.ToString(), rhs.ToString()); } return lhs; } // DivMod -> returns the result of the division (lhs / rhs), and fills up the remainder static hugeint_t DivMod(hugeint_t lhs, hugeint_t rhs, hugeint_t &remainder); // DivMod but lhs MUST be positive, and rhs is a uint64_t static hugeint_t DivModPositive(hugeint_t lhs, uint64_t rhs, uint64_t &remainder); static int Sign(hugeint_t n); static hugeint_t Abs(hugeint_t n); // comparison operators static bool Equals(const hugeint_t &lhs, const hugeint_t &rhs) { bool lower_equals = lhs.lower == rhs.lower; bool upper_equals = lhs.upper == rhs.upper; return lower_equals && upper_equals; } static bool NotEquals(const hugeint_t &lhs, const hugeint_t &rhs) { return !Equals(lhs, rhs); } static bool GreaterThan(const hugeint_t &lhs, const hugeint_t &rhs) { bool upper_bigger = lhs.upper > rhs.upper; bool upper_equal = lhs.upper == rhs.upper; bool lower_bigger = lhs.lower > rhs.lower; return upper_bigger || (upper_equal && lower_bigger); } static bool GreaterThanEquals(const hugeint_t &lhs, const hugeint_t &rhs) { bool upper_bigger = lhs.upper > rhs.upper; bool upper_equal = lhs.upper == rhs.upper; bool lower_bigger_equals = lhs.lower >= rhs.lower; return upper_bigger || (upper_equal && lower_bigger_equals); } static bool LessThan(const hugeint_t &lhs, const hugeint_t &rhs) { bool upper_smaller = lhs.upper < rhs.upper; bool upper_equal = lhs.upper == rhs.upper; bool lower_smaller = lhs.lower < rhs.lower; return upper_smaller || (upper_equal && lower_smaller); } static bool LessThanEquals(const hugeint_t &lhs, const hugeint_t &rhs) { bool upper_smaller = lhs.upper < rhs.upper; bool upper_equal = lhs.upper == rhs.upper; bool lower_smaller_equals = lhs.lower <= rhs.lower; return upper_smaller || (upper_equal && lower_smaller_equals); } static constexpr uint8_t CACHED_POWERS_OF_TEN = 39; static const hugeint_t POWERS_OF_TEN[CACHED_POWERS_OF_TEN]; }; template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, int8_t &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, int16_t &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, int32_t &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, int64_t &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, uint8_t &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, uint16_t &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, uint32_t &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, uint64_t &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, hugeint_t &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, uhugeint_t &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, float &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, double &result); template <> DUCKDB_API bool Hugeint::TryCast(hugeint_t input, long double &result); template <> bool Hugeint::TryConvert(int8_t value, hugeint_t &result); template <> bool Hugeint::TryConvert(int16_t value, hugeint_t &result); template <> bool Hugeint::TryConvert(int32_t value, hugeint_t &result); template <> bool Hugeint::TryConvert(int64_t value, hugeint_t &result); template <> bool Hugeint::TryConvert(uint8_t value, hugeint_t &result); template <> bool Hugeint::TryConvert(uint16_t value, hugeint_t &result); template <> bool Hugeint::TryConvert(uint32_t value, hugeint_t &result); template <> bool Hugeint::TryConvert(uint64_t value, hugeint_t &result); template <> bool Hugeint::TryConvert(float value, hugeint_t &result); template <> bool Hugeint::TryConvert(double value, hugeint_t &result); template <> bool Hugeint::TryConvert(long double value, hugeint_t &result); template <> bool Hugeint::TryConvert(const char *value, hugeint_t &result); } // namespace duckdb #include namespace duckdb { //===--------------------------------------------------------------------===// // Comparison Operations //===--------------------------------------------------------------------===// struct Equals { template static inline bool Operation(const T &left, const T &right) { return left == right; } }; struct NotEquals { template static inline bool Operation(const T &left, const T &right) { return !Equals::Operation(left, right); } }; struct GreaterThan { template static inline bool Operation(const T &left, const T &right) { return left > right; } }; struct GreaterThanEquals { template static inline bool Operation(const T &left, const T &right) { return !GreaterThan::Operation(right, left); } }; struct LessThan { template static inline bool Operation(const T &left, const T &right) { return GreaterThan::Operation(right, left); } }; struct LessThanEquals { template static inline bool Operation(const T &left, const T &right) { return !GreaterThan::Operation(left, right); } }; template <> DUCKDB_API bool Equals::Operation(const float &left, const float &right); template <> DUCKDB_API bool Equals::Operation(const double &left, const double &right); template <> DUCKDB_API bool GreaterThan::Operation(const float &left, const float &right); template <> DUCKDB_API bool GreaterThan::Operation(const double &left, const double &right); template <> DUCKDB_API bool GreaterThanEquals::Operation(const float &left, const float &right); template <> DUCKDB_API bool GreaterThanEquals::Operation(const double &left, const double &right); // Distinct semantics are from Postgres record sorting. NULL = NULL and not-NULL < NULL // Deferring to the non-distinct operations removes the need for further specialisation. // TODO: To reverse the semantics, swap left_null and right_null for comparisons struct DistinctFrom { template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { if (left_null || right_null) { return left_null != right_null; } return NotEquals::Operation(left, right); } }; struct NotDistinctFrom { template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { return !DistinctFrom::Operation(left, right, left_null, right_null); } }; struct DistinctGreaterThan { template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { if (left_null || right_null) { return !right_null; } return GreaterThan::Operation(left, right); } }; struct DistinctGreaterThanNullsFirst { template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { return DistinctGreaterThan::Operation(left, right, right_null, left_null); } }; struct DistinctGreaterThanEquals { template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { return !DistinctGreaterThan::Operation(right, left, right_null, left_null); } }; struct DistinctLessThan { template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { return DistinctGreaterThan::Operation(right, left, right_null, left_null); } }; struct DistinctLessThanNullsFirst { template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { return DistinctGreaterThan::Operation(right, left, left_null, right_null); } }; struct DistinctLessThanEquals { template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { return !DistinctGreaterThan::Operation(left, right, left_null, right_null); } }; //===--------------------------------------------------------------------===// // Comparison Operator Wrappers (so (Not)DistinctFrom have the same API) //===--------------------------------------------------------------------===// template struct ComparisonOperationWrapper { static constexpr const bool COMPARE_NULL = false; template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { if (right_null || left_null) { return false; } return OP::template Operation(left, right); } }; template <> struct ComparisonOperationWrapper { static constexpr const bool COMPARE_NULL = true; template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { return DistinctFrom::template Operation(left, right, left_null, right_null); } }; template <> struct ComparisonOperationWrapper { static constexpr const bool COMPARE_NULL = true; template static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) { return NotDistinctFrom::template Operation(left, right, left_null, right_null); } }; //===--------------------------------------------------------------------===// // Specialized Boolean Comparison Operators //===--------------------------------------------------------------------===// template <> inline bool GreaterThan::Operation(const bool &left, const bool &right) { return !right && left; } //===--------------------------------------------------------------------===// // Specialized String Comparison Operations //===--------------------------------------------------------------------===// template <> inline bool Equals::Operation(const string_t &left, const string_t &right) { return left == right; } template <> inline bool GreaterThan::Operation(const string_t &left, const string_t &right) { return left > right; } //===--------------------------------------------------------------------===// // Specialized Interval Comparison Operators //===--------------------------------------------------------------------===// template <> inline bool Equals::Operation(const interval_t &left, const interval_t &right) { return Interval::Equals(left, right); } template <> inline bool GreaterThan::Operation(const interval_t &left, const interval_t &right) { return Interval::GreaterThan(left, right); } } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/statistics/numeric_stats.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/filter_propagate_result.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class FilterPropagateResult : uint8_t { NO_PRUNING_POSSIBLE = 0, FILTER_ALWAYS_TRUE = 1, FILTER_ALWAYS_FALSE = 2, FILTER_TRUE_OR_NULL = 3, FILTER_FALSE_OR_NULL = 4 }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/statistics/numeric_stats_union.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct NumericValueUnion { union Val { bool boolean; int8_t tinyint; int16_t smallint; int32_t integer; int64_t bigint; uint8_t utinyint; uint16_t usmallint; uint32_t uinteger; uint64_t ubigint; hugeint_t hugeint; uhugeint_t uhugeint; float float_; // NOLINT double double_; // NOLINT } value_; // NOLINT template T &GetReferenceUnsafe(); }; template <> DUCKDB_API inline bool &NumericValueUnion::GetReferenceUnsafe() { return value_.boolean; } template <> DUCKDB_API inline int8_t &NumericValueUnion::GetReferenceUnsafe() { return value_.tinyint; } template <> DUCKDB_API inline int16_t &NumericValueUnion::GetReferenceUnsafe() { return value_.smallint; } template <> DUCKDB_API inline int32_t &NumericValueUnion::GetReferenceUnsafe() { return value_.integer; } template <> DUCKDB_API inline int64_t &NumericValueUnion::GetReferenceUnsafe() { return value_.bigint; } template <> DUCKDB_API inline hugeint_t &NumericValueUnion::GetReferenceUnsafe() { return value_.hugeint; } template <> DUCKDB_API inline uhugeint_t &NumericValueUnion::GetReferenceUnsafe() { return value_.uhugeint; } template <> DUCKDB_API inline uint8_t &NumericValueUnion::GetReferenceUnsafe() { return value_.utinyint; } template <> DUCKDB_API inline uint16_t &NumericValueUnion::GetReferenceUnsafe() { return value_.usmallint; } template <> DUCKDB_API inline uint32_t &NumericValueUnion::GetReferenceUnsafe() { return value_.uinteger; } template <> DUCKDB_API inline uint64_t &NumericValueUnion::GetReferenceUnsafe() { return value_.ubigint; } template <> DUCKDB_API inline float &NumericValueUnion::GetReferenceUnsafe() { return value_.float_; } template <> DUCKDB_API inline double &NumericValueUnion::GetReferenceUnsafe() { return value_.double_; } } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/array_ptr.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { template class array_ptr_iterator { // NOLINT: match std naming style public: array_ptr_iterator(DATA_TYPE *ptr, idx_t index, idx_t size) : ptr(ptr), index(index), size(size) { } public: array_ptr_iterator &operator++() { index++; if (index > size) { index = size; } return *this; } bool operator!=(const array_ptr_iterator &other) const { return ptr != other.ptr || index != other.index || size != other.size; } DATA_TYPE &operator*() const { if (DUCKDB_UNLIKELY(index >= size)) { throw InternalException("array_ptr iterator dereferenced while iterator is out of range"); } return ptr[index]; } private: DATA_TYPE *ptr; idx_t index; idx_t size; }; //! array_ptr is a non-owning (optionally) bounds-checked pointer to an array template class array_ptr { // NOLINT: match std naming style public: using iterator_type = array_ptr_iterator; private: static inline void AssertNotNull(const bool null) { #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY) return; #else if (DUCKDB_UNLIKELY(null)) { throw duckdb::InternalException("Attempted to construct an array_ptr from a NULL pointer"); } #endif } static inline void AssertIndexInBounds(idx_t index, idx_t size) { #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY) return; #else if (DUCKDB_UNLIKELY(index >= size)) { throw InternalException("Attempted to access index %ld within array_ptr of size %ld", index, size); } #endif } public: array_ptr(DATA_TYPE *ptr_p, idx_t count) : ptr(ptr_p), count(count) { if (MemorySafety::ENABLED) { AssertNotNull(!ptr); } } explicit array_ptr(DATA_TYPE &ref) : ptr(&ref), count(1) { } const DATA_TYPE &operator[](idx_t idx) const { if (MemorySafety::ENABLED) { AssertIndexInBounds(idx, count); } return ptr[idx]; } DATA_TYPE &operator[](idx_t idx) { if (MemorySafety::ENABLED) { AssertIndexInBounds(idx, count); } return ptr[idx]; } idx_t size() const { // NOLINT: match std naming style return count; } array_ptr_iterator begin() { // NOLINT: match std naming style return array_ptr_iterator(ptr, 0, count); } array_ptr_iterator begin() const { // NOLINT: match std naming style return array_ptr_iterator(ptr, 0, count); } array_ptr_iterator cbegin() { // NOLINT: match std naming style return array_ptr_iterator(ptr, 0, count); } array_ptr_iterator end() { // NOLINT: match std naming style return array_ptr_iterator(ptr, count, count); } array_ptr_iterator end() const { // NOLINT: match std naming style return array_ptr_iterator(ptr, count, count); } array_ptr_iterator cend() { // NOLINT: match std naming style return array_ptr_iterator(ptr, count, count); } private: DATA_TYPE *ptr; idx_t count; }; template using unsafe_array_ptr = array_ptr; } // namespace duckdb namespace duckdb { class BaseStatistics; struct SelectionVector; class Vector; struct NumericStatsData { //! Whether or not the value has a max value bool has_min; //! Whether or not the segment has a min value bool has_max; //! The minimum value of the segment NumericValueUnion min; //! The maximum value of the segment NumericValueUnion max; }; struct NumericStats { //! Unknown statistics - i.e. "has_min" is false, "has_max" is false DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type); //! Empty statistics - i.e. "min = MaxValue, max = MinValue" DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type); //! Returns true if the stats has a constant value DUCKDB_API static bool IsConstant(const BaseStatistics &stats); //! Returns true if the stats has both a min and max value defined DUCKDB_API static bool HasMinMax(const BaseStatistics &stats); //! Returns true if the stats has a min value defined DUCKDB_API static bool HasMin(const BaseStatistics &stats); //! Returns true if the stats has a max value defined DUCKDB_API static bool HasMax(const BaseStatistics &stats); //! Returns the min value - throws an exception if there is no min value DUCKDB_API static Value Min(const BaseStatistics &stats); //! Returns the max value - throws an exception if there is no max value DUCKDB_API static Value Max(const BaseStatistics &stats); //! Sets the min value of the statistics DUCKDB_API static void SetMin(BaseStatistics &stats, const Value &val); //! Sets the max value of the statistics DUCKDB_API static void SetMax(BaseStatistics &stats, const Value &val); template static void SetMax(BaseStatistics &stats, T val) { auto &nstats = GetDataUnsafe(stats); nstats.has_max = true; nstats.max.GetReferenceUnsafe() = val; } template static void SetMin(BaseStatistics &stats, T val) { auto &nstats = GetDataUnsafe(stats); nstats.has_min = true; nstats.min.GetReferenceUnsafe() = val; } //! Check whether or not a given comparison with a constant could possibly be satisfied by rows given the statistics DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type, array_ptr constants); DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other_p); DUCKDB_API static void Serialize(const BaseStatistics &stats, Serializer &serializer); DUCKDB_API static void Deserialize(Deserializer &deserializer, BaseStatistics &stats); DUCKDB_API static string ToString(const BaseStatistics &stats); template static inline void UpdateValue(T new_value, T &min, T &max) { min = LessThan::Operation(new_value, min) ? new_value : min; max = GreaterThan::Operation(new_value, max) ? new_value : max; } template static inline void Update(NumericStatsData &nstats, T new_value) { UpdateValue(new_value, nstats.min.GetReferenceUnsafe(), nstats.max.GetReferenceUnsafe()); } static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count); template static T GetMin(const BaseStatistics &stats) { return NumericStats::Min(stats).GetValueUnsafe(); } template static T GetMax(const BaseStatistics &stats) { return NumericStats::Max(stats).GetValueUnsafe(); } template static T GetMinUnsafe(const BaseStatistics &stats); template static T GetMaxUnsafe(const BaseStatistics &stats); private: static NumericStatsData &GetDataUnsafe(BaseStatistics &stats); static const NumericStatsData &GetDataUnsafe(const BaseStatistics &stats); static Value MinOrNull(const BaseStatistics &stats); static Value MaxOrNull(const BaseStatistics &stats); template static void TemplatedVerify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/statistics/string_stats.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BaseStatistics; struct SelectionVector; class Vector; struct StringStatsData { constexpr static uint32_t MAX_STRING_MINMAX_SIZE = 8; //! The minimum value of the segment, potentially truncated data_t min[MAX_STRING_MINMAX_SIZE]; //! The maximum value of the segment, potentially truncated data_t max[MAX_STRING_MINMAX_SIZE]; //! Whether or not the column can contain unicode characters bool has_unicode; //! Whether or not the maximum string length is known bool has_max_string_length; //! The maximum string length in bytes uint32_t max_string_length; }; struct StringStats { //! Unknown statistics - i.e. "has_unicode" is true, "max_string_length" is unknown, "min" is \0, max is \xFF DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type); //! Empty statistics - i.e. "has_unicode" is false, "max_string_length" is 0, "min" is \xFF, max is \x00 DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type); //! Whether or not the statistics have a maximum string length defined DUCKDB_API static bool HasMaxStringLength(const BaseStatistics &stats); //! Returns the maximum string length, or throws an exception if !HasMaxStringLength() DUCKDB_API static uint32_t MaxStringLength(const BaseStatistics &stats); //! Whether or not the strings can contain unicode DUCKDB_API static bool CanContainUnicode(const BaseStatistics &stats); //! Returns the min value (up to a length of StringStatsData::MAX_STRING_MINMAX_SIZE) DUCKDB_API static string Min(const BaseStatistics &stats); //! Returns the max value (up to a length of StringStatsData::MAX_STRING_MINMAX_SIZE) DUCKDB_API static string Max(const BaseStatistics &stats); //! Resets the max string length so HasMaxStringLength() is false DUCKDB_API static void ResetMaxStringLength(BaseStatistics &stats); //! Sets the max string length DUCKDB_API static void SetMaxStringLength(BaseStatistics &stats, uint32_t length); //! FIXME: make this part of Set on statistics DUCKDB_API static void SetContainsUnicode(BaseStatistics &stats); DUCKDB_API static void Serialize(const BaseStatistics &stats, Serializer &serializer); DUCKDB_API static void Deserialize(Deserializer &deserializer, BaseStatistics &base); DUCKDB_API static string ToString(const BaseStatistics &stats); DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type, array_ptr constants); DUCKDB_API static FilterPropagateResult CheckZonemap(const_data_ptr_t min_data, idx_t min_len, const_data_ptr_t max_data, idx_t max_len, ExpressionType comparison_type, const string &value); DUCKDB_API static void Update(BaseStatistics &stats, const string_t &value); DUCKDB_API static void SetMin(BaseStatistics &stats, const string_t &value); DUCKDB_API static void SetMax(BaseStatistics &stats, const string_t &value); DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other); DUCKDB_API static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count); private: static StringStatsData &GetDataUnsafe(BaseStatistics &stats); static const StringStatsData &GetDataUnsafe(const BaseStatistics &stats); }; } // namespace duckdb namespace duckdb { struct SelectionVector; class Serializer; class Deserializer; class Vector; struct UnifiedVectorFormat; enum class StatsInfo : uint8_t { CAN_HAVE_NULL_VALUES = 0, CANNOT_HAVE_NULL_VALUES = 1, CAN_HAVE_VALID_VALUES = 2, CANNOT_HAVE_VALID_VALUES = 3, CAN_HAVE_NULL_AND_VALID_VALUES = 4 }; enum class StatisticsType : uint8_t { NUMERIC_STATS, STRING_STATS, LIST_STATS, STRUCT_STATS, BASE_STATS, ARRAY_STATS }; class BaseStatistics { friend struct NumericStats; friend struct StringStats; friend struct StructStats; friend struct ListStats; friend struct ArrayStats; public: DUCKDB_API ~BaseStatistics(); // disable copy constructors BaseStatistics(const BaseStatistics &other) = delete; BaseStatistics &operator=(const BaseStatistics &) = delete; //! enable move constructors DUCKDB_API BaseStatistics(BaseStatistics &&other) noexcept; DUCKDB_API BaseStatistics &operator=(BaseStatistics &&) noexcept; public: //! Creates a set of statistics for data that is unknown, i.e. "has_null" is true, "has_no_null" is true, etc //! This can be used in case nothing is known about the data - or can be used as a baseline when only a few things //! are known static BaseStatistics CreateUnknown(LogicalType type); //! Creates statistics for an empty database, i.e. "has_null" is false, "has_no_null" is false, etc //! This is used when incrementally constructing statistics by constantly adding new values static BaseStatistics CreateEmpty(LogicalType type); DUCKDB_API StatisticsType GetStatsType() const; DUCKDB_API static StatisticsType GetStatsType(const LogicalType &type); DUCKDB_API bool CanHaveNull() const; DUCKDB_API bool CanHaveNoNull() const; void SetDistinctCount(idx_t distinct_count); bool IsConstant() const; const LogicalType &GetType() const { return type; } void Set(StatsInfo info); void CombineValidity(BaseStatistics &left, BaseStatistics &right); void CopyValidity(BaseStatistics &stats); //! Set that the CURRENT level can have null values //! Note that this is not correct for nested types unless this information is propagated in a different manner //! Use Set(StatsInfo::CAN_HAVE_NULL_VALUES) in the general case inline void SetHasNullFast() { has_null = true; } //! Set that the CURRENT level can have valid values //! Note that this is not correct for nested types unless this information is propagated in a different manner //! Use Set(StatsInfo::CAN_HAVE_VALID_VALUES) in the general case inline void SetHasNoNullFast() { has_no_null = true; } void SetHasNull(); void SetHasNoNull(); void Merge(const BaseStatistics &other); void Copy(const BaseStatistics &other); BaseStatistics Copy() const; unique_ptr ToUnique() const; void CopyBase(const BaseStatistics &orig); void Serialize(Serializer &serializer) const; static BaseStatistics Deserialize(Deserializer &deserializer); //! Verify that a vector does not violate the statistics void Verify(Vector &vector, const SelectionVector &sel, idx_t count, bool ignore_has_null = false) const; void Verify(Vector &vector, idx_t count) const; string ToString() const; idx_t GetDistinctCount(); static BaseStatistics FromConstant(const Value &input); template void UpdateNumericStats(T new_value) { D_ASSERT(GetStatsType() == StatisticsType::NUMERIC_STATS); NumericStats::Update(stats_union.numeric_data, new_value); } private: BaseStatistics(); explicit BaseStatistics(LogicalType type); static void Construct(BaseStatistics &stats, LogicalType type); void InitializeUnknown(); void InitializeEmpty(); static BaseStatistics CreateUnknownType(LogicalType type); static BaseStatistics CreateEmptyType(LogicalType type); static BaseStatistics FromConstantType(const Value &input); private: //! The type of the logical segment LogicalType type; //! Whether or not the segment can contain NULL values bool has_null; //! Whether or not the segment can contain values that are not null bool has_no_null; //! estimate that one may have even if distinct_stats==nullptr idx_t distinct_count; //! Numeric and String stats union { //! Numeric stats data, for numeric stats NumericStatsData numeric_data; //! String stats data, for string stats StringStatsData string_data; } stats_union; //! Child stats (for LIST and STRUCT) unsafe_unique_array child_stats; }; template <> inline void BaseStatistics::UpdateNumericStats(interval_t new_value) { } template <> inline void BaseStatistics::UpdateNumericStats(list_entry_t new_value) { } } // namespace duckdb namespace duckdb { struct FunctionLocalState { DUCKDB_API virtual ~FunctionLocalState(); template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; struct ScalarFunctionInfo { DUCKDB_API virtual ~ScalarFunctionInfo(); template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; class Binder; class BoundFunctionExpression; class ScalarFunctionCatalogEntry; struct StatementProperties; struct FunctionStatisticsInput { FunctionStatisticsInput(BoundFunctionExpression &expr_p, optional_ptr bind_data_p, vector &child_stats_p, unique_ptr *expr_ptr_p) : expr(expr_p), bind_data(bind_data_p), child_stats(child_stats_p), expr_ptr(expr_ptr_p) { } BoundFunctionExpression &expr; optional_ptr bind_data; vector &child_stats; unique_ptr *expr_ptr; }; struct FunctionModifiedDatabasesInput { FunctionModifiedDatabasesInput(optional_ptr bind_data_p, StatementProperties &properties) : bind_data(bind_data_p), properties(properties) { } optional_ptr bind_data; StatementProperties &properties; }; struct FunctionBindExpressionInput { FunctionBindExpressionInput(ClientContext &context_p, optional_ptr bind_data_p, vector> &children_p) : context(context_p), bind_data(bind_data_p), children(children_p) { } ClientContext &context; optional_ptr bind_data; vector> &children; }; struct ScalarFunctionBindInput { explicit ScalarFunctionBindInput(Binder &binder) : binder(binder) { } Binder &binder; }; //! The scalar function type typedef std::function scalar_function_t; //! The type to bind the scalar function and to create the function data typedef unique_ptr (*bind_scalar_function_t)(ClientContext &context, ScalarFunction &bound_function, vector> &arguments); typedef unique_ptr (*bind_scalar_function_extended_t)(ScalarFunctionBindInput &bind_input, ScalarFunction &bound_function, vector> &arguments); //! The type to initialize a thread local state for the scalar function typedef unique_ptr (*init_local_state_t)(ExpressionState &state, const BoundFunctionExpression &expr, FunctionData *bind_data); //! The type to propagate statistics for this scalar function typedef unique_ptr (*function_statistics_t)(ClientContext &context, FunctionStatisticsInput &input); //! The type to bind lambda-specific parameter types typedef LogicalType (*bind_lambda_function_t)(ClientContext &context, const vector &function_child_types, idx_t parameter_idx); //! The type to bind lambda-specific parameter types typedef void (*get_modified_databases_t)(ClientContext &context, FunctionModifiedDatabasesInput &input); typedef void (*function_serialize_t)(Serializer &serializer, const optional_ptr bind_data, const ScalarFunction &function); typedef unique_ptr (*function_deserialize_t)(Deserializer &deserializer, ScalarFunction &function); //! The type to bind lambda-specific parameter types typedef unique_ptr (*function_bind_expression_t)(FunctionBindExpressionInput &input); class ScalarFunction : public BaseScalarFunction { // NOLINT: work-around bug in clang-tidy public: DUCKDB_API ScalarFunction(string name, vector arguments, LogicalType return_type, scalar_function_t function, bind_scalar_function_t bind = nullptr, bind_scalar_function_extended_t bind_extended = nullptr, function_statistics_t statistics = nullptr, init_local_state_t init_local_state = nullptr, LogicalType varargs = LogicalType(LogicalTypeId::INVALID), FunctionStability stability = FunctionStability::CONSISTENT, FunctionNullHandling null_handling = FunctionNullHandling::DEFAULT_NULL_HANDLING, bind_lambda_function_t bind_lambda = nullptr); DUCKDB_API ScalarFunction(vector arguments, LogicalType return_type, scalar_function_t function, bind_scalar_function_t bind = nullptr, bind_scalar_function_extended_t bind_extended = nullptr, function_statistics_t statistics = nullptr, init_local_state_t init_local_state = nullptr, LogicalType varargs = LogicalType(LogicalTypeId::INVALID), FunctionStability stability = FunctionStability::CONSISTENT, FunctionNullHandling null_handling = FunctionNullHandling::DEFAULT_NULL_HANDLING, bind_lambda_function_t bind_lambda = nullptr); //! The main scalar function to execute scalar_function_t function; //! The bind function (if any) bind_scalar_function_t bind; //! The bind function that receives extra input to perform more complex binding operations (if any) bind_scalar_function_extended_t bind_extended = nullptr; //! Init thread local state for the function (if any) init_local_state_t init_local_state; //! The statistics propagation function (if any) function_statistics_t statistics; //! The lambda bind function (if any) bind_lambda_function_t bind_lambda; //! Function to bind the result function expression directly (if any) function_bind_expression_t bind_expression; //! Gets the modified databases (if any) get_modified_databases_t get_modified_databases; function_serialize_t serialize; function_deserialize_t deserialize; //! Additional function info, passed to the bind shared_ptr function_info; DUCKDB_API bool operator==(const ScalarFunction &rhs) const; DUCKDB_API bool operator!=(const ScalarFunction &rhs) const; DUCKDB_API bool Equal(const ScalarFunction &rhs) const; public: DUCKDB_API static void NopFunction(DataChunk &input, ExpressionState &state, Vector &result); template static void UnaryFunction(DataChunk &input, ExpressionState &state, Vector &result) { D_ASSERT(input.ColumnCount() >= 1); UnaryExecutor::Execute(input.data[0], result, input.size()); } template static void BinaryFunction(DataChunk &input, ExpressionState &state, Vector &result) { D_ASSERT(input.ColumnCount() == 2); BinaryExecutor::ExecuteStandard(input.data[0], input.data[1], result, input.size()); } template static void TernaryFunction(DataChunk &input, ExpressionState &state, Vector &result) { D_ASSERT(input.ColumnCount() == 3); TernaryExecutor::ExecuteStandard(input.data[0], input.data[1], input.data[2], result, input.size()); } public: template static scalar_function_t GetScalarUnaryFunction(const LogicalType &type) { scalar_function_t function; switch (type.id()) { case LogicalTypeId::TINYINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::SMALLINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::INTEGER: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::BIGINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::UTINYINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::USMALLINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::UINTEGER: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::UBIGINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::HUGEINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::UHUGEINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::FLOAT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::DOUBLE: function = &ScalarFunction::UnaryFunction; break; default: throw InternalException("Unimplemented type for GetScalarUnaryFunction"); } return function; } template static scalar_function_t GetScalarUnaryFunctionFixedReturn(const LogicalType &type) { scalar_function_t function; switch (type.id()) { case LogicalTypeId::TINYINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::SMALLINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::INTEGER: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::BIGINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::UTINYINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::USMALLINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::UINTEGER: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::UBIGINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::HUGEINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::UHUGEINT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::FLOAT: function = &ScalarFunction::UnaryFunction; break; case LogicalTypeId::DOUBLE: function = &ScalarFunction::UnaryFunction; break; default: throw InternalException("Unimplemented type for GetScalarUnaryFunctionFixedReturn"); } return function; } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/aggregate_function.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/array.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::array; } //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/vector_operations/aggregate_executor.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/aggregate_state.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/statistics/node_statistics.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class NodeStatistics { public: NodeStatistics() : has_estimated_cardinality(false), has_max_cardinality(false) { } explicit NodeStatistics(idx_t estimated_cardinality) : has_estimated_cardinality(true), estimated_cardinality(estimated_cardinality), has_max_cardinality(false) { } NodeStatistics(idx_t estimated_cardinality, idx_t max_cardinality) : has_estimated_cardinality(true), estimated_cardinality(estimated_cardinality), has_max_cardinality(true), max_cardinality(max_cardinality) { } //! Whether or not the node has an estimated cardinality specified bool has_estimated_cardinality; //! The estimated cardinality at the specified node idx_t estimated_cardinality; //! Whether or not the node has a maximum cardinality specified bool has_max_cardinality; //! The max possible cardinality at the specified node idx_t max_cardinality; }; } // namespace duckdb namespace duckdb { enum class AggregateType : uint8_t { NON_DISTINCT = 1, DISTINCT = 2 }; //! Whether or not the input order influences the result of the aggregate enum class AggregateOrderDependent : uint8_t { ORDER_DEPENDENT = 1, NOT_ORDER_DEPENDENT = 2 }; //! Whether or not the input distinctness influences the result of the aggregate enum class AggregateDistinctDependent : uint8_t { DISTINCT_DEPENDENT = 1, NOT_DISTINCT_DEPENDENT = 2 }; //! Whether or not the combiner needs to preserve the source enum class AggregateCombineType : uint8_t { PRESERVE_INPUT = 1, ALLOW_DESTRUCTIVE = 2 }; class BoundAggregateExpression; struct AggregateInputData { AggregateInputData(optional_ptr bind_data_p, ArenaAllocator &allocator_p, AggregateCombineType combine_type_p = AggregateCombineType::PRESERVE_INPUT) : bind_data(bind_data_p), allocator(allocator_p), combine_type(combine_type_p) { } optional_ptr bind_data; ArenaAllocator &allocator; AggregateCombineType combine_type; }; struct AggregateUnaryInput { AggregateUnaryInput(AggregateInputData &input_p, ValidityMask &input_mask_p) : input(input_p), input_mask(input_mask_p), input_idx(0) { } AggregateInputData &input; ValidityMask &input_mask; idx_t input_idx; inline bool RowIsValid() { return input_mask.RowIsValid(input_idx); } }; struct AggregateBinaryInput { AggregateBinaryInput(AggregateInputData &input_p, ValidityMask &left_mask_p, ValidityMask &right_mask_p) : input(input_p), left_mask(left_mask_p), right_mask(right_mask_p) { } AggregateInputData &input; ValidityMask &left_mask; ValidityMask &right_mask; idx_t lidx; idx_t ridx; }; struct AggregateFinalizeData { AggregateFinalizeData(Vector &result_p, AggregateInputData &input_p) : result(result_p), input(input_p), result_idx(0) { } Vector &result; AggregateInputData &input; idx_t result_idx; inline void ReturnNull() { switch (result.GetVectorType()) { case VectorType::FLAT_VECTOR: FlatVector::SetNull(result, result_idx, true); break; case VectorType::CONSTANT_VECTOR: ConstantVector::SetNull(result, true); break; default: throw InternalException("Invalid result vector type for aggregate"); } } inline string_t ReturnString(string_t value) { return StringVector::AddStringOrBlob(result, value); } }; struct AggregateStatisticsInput { AggregateStatisticsInput(optional_ptr bind_data_p, vector &child_stats_p, optional_ptr node_stats_p) : bind_data(bind_data_p), child_stats(child_stats_p), node_stats(node_stats_p) { } optional_ptr bind_data; vector &child_stats; optional_ptr node_stats; }; } // namespace duckdb namespace duckdb { // structs struct AggregateInputData; // The bounds of a window frame struct FrameBounds { FrameBounds() : start(0), end(0) {}; FrameBounds(idx_t start, idx_t end) : start(start), end(end) {}; idx_t start = 0; idx_t end = 0; }; // A set of window subframes for windowed EXCLUDE using SubFrames = vector; class AggregateExecutor { private: #ifndef DUCKDB_SMALLER_BINARY template static inline void NullaryFlatLoop(STATE_TYPE **__restrict states, AggregateInputData &aggr_input_data, idx_t count) { for (idx_t i = 0; i < count; i++) { OP::template Operation(*states[i], aggr_input_data, i); } } #endif template static inline void NullaryScatterLoop(STATE_TYPE *__restrict const *__restrict const states, AggregateInputData &aggr_input_data, const SelectionVector &ssel, const idx_t count) { if (ssel.IsSet()) { for (idx_t i = 0; i < count; i++) { auto sidx = ssel.get_index_unsafe(i); OP::template Operation(*states[sidx], aggr_input_data, sidx); } } else { for (idx_t i = 0; i < count; i++) { OP::template Operation(*states[i], aggr_input_data, i); } } } #ifndef DUCKDB_SMALLER_BINARY template static inline void UnaryFlatLoop(const INPUT_TYPE *__restrict idata, AggregateInputData &aggr_input_data, STATE_TYPE **__restrict states, ValidityMask &mask, idx_t count) { if (OP::IgnoreNull() && !mask.AllValid()) { AggregateUnaryInput input(aggr_input_data, mask); auto &base_idx = input.input_idx; base_idx = 0; auto entry_count = ValidityMask::EntryCount(count); for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { auto validity_entry = mask.GetValidityEntry(entry_idx); idx_t next = MinValue(base_idx + ValidityMask::BITS_PER_VALUE, count); if (ValidityMask::AllValid(validity_entry)) { // all valid: perform operation for (; base_idx < next; base_idx++) { OP::template Operation(*states[base_idx], idata[base_idx], input); } } else if (ValidityMask::NoneValid(validity_entry)) { // nothing valid: skip all base_idx = next; continue; } else { // partially valid: need to check individual elements for validity idx_t start = base_idx; for (; base_idx < next; base_idx++) { if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) { OP::template Operation(*states[base_idx], idata[base_idx], input); } } } } } else { AggregateUnaryInput input(aggr_input_data, mask); auto &i = input.input_idx; for (i = 0; i < count; i++) { OP::template Operation(*states[i], idata[i], input); } } } #endif #ifndef DUCKDB_SMALLER_BINARY template #else template #endif static inline void UnaryScatterLoop(const INPUT_TYPE *__restrict idata, AggregateInputData &aggr_input_data, STATE_TYPE **__restrict states, const SelectionVector &isel, const SelectionVector &ssel, ValidityMask &mask, idx_t count) { #ifdef DUCKDB_SMALLER_BINARY const auto HAS_ISEL = isel.IsSet(); const auto HAS_SSEL = ssel.IsSet(); #endif if (OP::IgnoreNull() && !mask.AllValid()) { // potential NULL values and NULL values are ignored AggregateUnaryInput input(aggr_input_data, mask); for (idx_t i = 0; i < count; i++) { input.input_idx = HAS_ISEL ? isel.get_index_unsafe(i) : i; auto sidx = HAS_SSEL ? ssel.get_index_unsafe(i) : i; if (mask.RowIsValidUnsafe(input.input_idx)) { OP::template Operation(*states[sidx], idata[input.input_idx], input); } } } else { // quick path: no NULL values or NULL values are not ignored AggregateUnaryInput input(aggr_input_data, mask); for (idx_t i = 0; i < count; i++) { input.input_idx = HAS_ISEL ? isel.get_index_unsafe(i) : i; auto sidx = HAS_SSEL ? ssel.get_index_unsafe(i) : i; OP::template Operation(*states[sidx], idata[input.input_idx], input); } } } #ifndef DUCKDB_SMALLER_BINARY template static inline void UnaryFlatUpdateLoop(const INPUT_TYPE *__restrict idata, AggregateInputData &aggr_input_data, STATE_TYPE *__restrict state, idx_t count, ValidityMask &mask) { AggregateUnaryInput input(aggr_input_data, mask); auto &base_idx = input.input_idx; base_idx = 0; auto entry_count = ValidityMask::EntryCount(count); for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) { auto validity_entry = mask.GetValidityEntry(entry_idx); idx_t next = MinValue(base_idx + ValidityMask::BITS_PER_VALUE, count); if (!OP::IgnoreNull() || ValidityMask::AllValid(validity_entry)) { // all valid: perform operation for (; base_idx < next; base_idx++) { OP::template Operation(*state, idata[base_idx], input); } } else if (ValidityMask::NoneValid(validity_entry)) { // nothing valid: skip all base_idx = next; continue; } else { // partially valid: need to check individual elements for validity idx_t start = base_idx; for (; base_idx < next; base_idx++) { if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) { OP::template Operation(*state, idata[base_idx], input); } } } } } #endif template static inline void UnaryUpdateLoop(const INPUT_TYPE *__restrict idata, AggregateInputData &aggr_input_data, STATE_TYPE *__restrict state, idx_t count, ValidityMask &mask, const SelectionVector &__restrict sel_vector) { AggregateUnaryInput input(aggr_input_data, mask); if (OP::IgnoreNull() && !mask.AllValid()) { // potential NULL values and NULL values are ignored for (idx_t i = 0; i < count; i++) { input.input_idx = sel_vector.get_index(i); if (mask.RowIsValid(input.input_idx)) { OP::template Operation(*state, idata[input.input_idx], input); } } } else { // quick path: no NULL values or NULL values are not ignored for (idx_t i = 0; i < count; i++) { input.input_idx = sel_vector.get_index(i); OP::template Operation(*state, idata[input.input_idx], input); } } } template static inline void BinaryScatterLoop(const A_TYPE *__restrict adata, AggregateInputData &aggr_input_data, const B_TYPE *__restrict bdata, STATE_TYPE **__restrict states, idx_t count, const SelectionVector &asel, const SelectionVector &bsel, const SelectionVector &ssel, ValidityMask &avalidity, ValidityMask &bvalidity) { AggregateBinaryInput input(aggr_input_data, avalidity, bvalidity); if (OP::IgnoreNull() && (!avalidity.AllValid() || !bvalidity.AllValid())) { // potential NULL values and NULL values are ignored for (idx_t i = 0; i < count; i++) { input.lidx = asel.get_index(i); input.ridx = bsel.get_index(i); auto sidx = ssel.get_index(i); if (avalidity.RowIsValid(input.lidx) && bvalidity.RowIsValid(input.ridx)) { OP::template Operation(*states[sidx], adata[input.lidx], bdata[input.ridx], input); } } } else { // quick path: no NULL values or NULL values are not ignored for (idx_t i = 0; i < count; i++) { input.lidx = asel.get_index(i); input.ridx = bsel.get_index(i); auto sidx = ssel.get_index(i); OP::template Operation(*states[sidx], adata[input.lidx], bdata[input.ridx], input); } } } template static inline void BinaryUpdateLoop(const A_TYPE *__restrict adata, AggregateInputData &aggr_input_data, const B_TYPE *__restrict bdata, STATE_TYPE *__restrict state, idx_t count, const SelectionVector &asel, const SelectionVector &bsel, ValidityMask &avalidity, ValidityMask &bvalidity) { AggregateBinaryInput input(aggr_input_data, avalidity, bvalidity); if (OP::IgnoreNull() && (!avalidity.AllValid() || !bvalidity.AllValid())) { // potential NULL values and NULL values are ignored for (idx_t i = 0; i < count; i++) { input.lidx = asel.get_index(i); input.ridx = bsel.get_index(i); if (avalidity.RowIsValid(input.lidx) && bvalidity.RowIsValid(input.ridx)) { OP::template Operation(*state, adata[input.lidx], bdata[input.ridx], input); } } } else { // quick path: no NULL values or NULL values are not ignored for (idx_t i = 0; i < count; i++) { input.lidx = asel.get_index(i); input.ridx = bsel.get_index(i); OP::template Operation(*state, adata[input.lidx], bdata[input.ridx], input); } } } public: template static void NullaryScatter(Vector &states, AggregateInputData &aggr_input_data, idx_t count) { if (states.GetVectorType() == VectorType::CONSTANT_VECTOR) { auto sdata = ConstantVector::GetData(states); OP::template ConstantOperation(**sdata, aggr_input_data, count); #ifndef DUCKDB_SMALLER_BINARY } else if (states.GetVectorType() == VectorType::FLAT_VECTOR) { auto sdata = FlatVector::GetData(states); NullaryFlatLoop(sdata, aggr_input_data, count); #endif } else { UnifiedVectorFormat sdata; states.ToUnifiedFormat(count, sdata); NullaryScatterLoop((STATE_TYPE **)sdata.data, aggr_input_data, *sdata.sel, count); } } template static void NullaryUpdate(data_ptr_t state, AggregateInputData &aggr_input_data, idx_t count) { OP::template ConstantOperation(*reinterpret_cast(state), aggr_input_data, count); } template static void UnaryScatter(Vector &input, Vector &states, AggregateInputData &aggr_input_data, idx_t count) { if (input.GetVectorType() == VectorType::CONSTANT_VECTOR && states.GetVectorType() == VectorType::CONSTANT_VECTOR) { if (OP::IgnoreNull() && ConstantVector::IsNull(input)) { // constant NULL input in function that ignores NULL values return; } // regular constant: get first state auto idata = ConstantVector::GetData(input); auto sdata = ConstantVector::GetData(states); AggregateUnaryInput input_data(aggr_input_data, ConstantVector::Validity(input)); OP::template ConstantOperation(**sdata, *idata, input_data, count); #ifndef DUCKDB_SMALLER_BINARY } else if (input.GetVectorType() == VectorType::FLAT_VECTOR && states.GetVectorType() == VectorType::FLAT_VECTOR) { auto idata = FlatVector::GetData(input); auto sdata = FlatVector::GetData(states); UnaryFlatLoop(idata, aggr_input_data, sdata, FlatVector::Validity(input), count); #endif } else { UnifiedVectorFormat idata, sdata; input.ToUnifiedFormat(count, idata); states.ToUnifiedFormat(count, sdata); #ifdef DUCKDB_SMALLER_BINARY UnaryScatterLoop(UnifiedVectorFormat::GetData(idata), aggr_input_data, (STATE_TYPE **)sdata.data, *idata.sel, *sdata.sel, idata.validity, count); #else if (idata.sel->IsSet()) { if (sdata.sel->IsSet()) { UnaryScatterLoop( UnifiedVectorFormat::GetData(idata), aggr_input_data, (STATE_TYPE **)sdata.data, *idata.sel, *sdata.sel, idata.validity, count); } else { UnaryScatterLoop( UnifiedVectorFormat::GetData(idata), aggr_input_data, (STATE_TYPE **)sdata.data, *idata.sel, *sdata.sel, idata.validity, count); } } else { if (sdata.sel->IsSet()) { UnaryScatterLoop( UnifiedVectorFormat::GetData(idata), aggr_input_data, (STATE_TYPE **)sdata.data, *idata.sel, *sdata.sel, idata.validity, count); } else { UnaryScatterLoop( UnifiedVectorFormat::GetData(idata), aggr_input_data, (STATE_TYPE **)sdata.data, *idata.sel, *sdata.sel, idata.validity, count); } } #endif } } template static void UnaryUpdate(Vector &input, AggregateInputData &aggr_input_data, data_ptr_t state, idx_t count) { switch (input.GetVectorType()) { case VectorType::CONSTANT_VECTOR: { if (OP::IgnoreNull() && ConstantVector::IsNull(input)) { return; } auto idata = ConstantVector::GetData(input); AggregateUnaryInput input_data(aggr_input_data, ConstantVector::Validity(input)); OP::template ConstantOperation(*reinterpret_cast(state), *idata, input_data, count); break; } #ifndef DUCKDB_SMALLER_BINARY case VectorType::FLAT_VECTOR: { auto idata = FlatVector::GetData(input); UnaryFlatUpdateLoop(idata, aggr_input_data, (STATE_TYPE *)state, count, FlatVector::Validity(input)); break; } #endif default: { UnifiedVectorFormat idata; input.ToUnifiedFormat(count, idata); UnaryUpdateLoop(UnifiedVectorFormat::GetData(idata), aggr_input_data, (STATE_TYPE *)state, count, idata.validity, *idata.sel); break; } } } template static void BinaryScatter(AggregateInputData &aggr_input_data, Vector &a, Vector &b, Vector &states, idx_t count) { UnifiedVectorFormat adata, bdata, sdata; a.ToUnifiedFormat(count, adata); b.ToUnifiedFormat(count, bdata); states.ToUnifiedFormat(count, sdata); BinaryScatterLoop( UnifiedVectorFormat::GetData(adata), aggr_input_data, UnifiedVectorFormat::GetData(bdata), (STATE_TYPE **)sdata.data, count, *adata.sel, *bdata.sel, *sdata.sel, adata.validity, bdata.validity); } template static void BinaryUpdate(AggregateInputData &aggr_input_data, Vector &a, Vector &b, data_ptr_t state, idx_t count) { UnifiedVectorFormat adata, bdata; a.ToUnifiedFormat(count, adata); b.ToUnifiedFormat(count, bdata); BinaryUpdateLoop( UnifiedVectorFormat::GetData(adata), aggr_input_data, UnifiedVectorFormat::GetData(bdata), (STATE_TYPE *)state, count, *adata.sel, *bdata.sel, adata.validity, bdata.validity); } template static void Combine(Vector &source, Vector &target, AggregateInputData &aggr_input_data, idx_t count) { D_ASSERT(source.GetType().id() == LogicalTypeId::POINTER && target.GetType().id() == LogicalTypeId::POINTER); auto sdata = FlatVector::GetData(source); auto tdata = FlatVector::GetData(target); for (idx_t i = 0; i < count; i++) { OP::template Combine(*sdata[i], *tdata[i], aggr_input_data); } } template static void Finalize(Vector &states, AggregateInputData &aggr_input_data, Vector &result, idx_t count, idx_t offset) { if (states.GetVectorType() == VectorType::CONSTANT_VECTOR) { result.SetVectorType(VectorType::CONSTANT_VECTOR); auto sdata = ConstantVector::GetData(states); auto rdata = ConstantVector::GetData(result); AggregateFinalizeData finalize_data(result, aggr_input_data); OP::template Finalize(**sdata, *rdata, finalize_data); } else { D_ASSERT(states.GetVectorType() == VectorType::FLAT_VECTOR); result.SetVectorType(VectorType::FLAT_VECTOR); auto sdata = FlatVector::GetData(states); auto rdata = FlatVector::GetData(result); AggregateFinalizeData finalize_data(result, aggr_input_data); for (idx_t i = 0; i < count; i++) { finalize_data.result_idx = i + offset; OP::template Finalize(*sdata[i], rdata[finalize_data.result_idx], finalize_data); } } } template static void VoidFinalize(Vector &states, AggregateInputData &aggr_input_data, Vector &result, idx_t count, idx_t offset) { if (states.GetVectorType() == VectorType::CONSTANT_VECTOR) { result.SetVectorType(VectorType::CONSTANT_VECTOR); auto sdata = ConstantVector::GetData(states); AggregateFinalizeData finalize_data(result, aggr_input_data); OP::template Finalize(**sdata, finalize_data); } else { D_ASSERT(states.GetVectorType() == VectorType::FLAT_VECTOR); result.SetVectorType(VectorType::FLAT_VECTOR); auto sdata = FlatVector::GetData(states); AggregateFinalizeData finalize_data(result, aggr_input_data); for (idx_t i = 0; i < count; i++) { finalize_data.result_idx = i + offset; OP::template Finalize(*sdata[i], finalize_data); } } } template static void IntersectFrames(const SubFrames &lefts, const SubFrames &rights, OP &op) { const auto cover_start = MinValue(rights[0].start, lefts[0].start); const auto cover_end = MaxValue(rights.back().end, lefts.back().end); const FrameBounds last(cover_end, cover_end); // Subframe indices idx_t l = 0; idx_t r = 0; for (auto i = cover_start; i < cover_end;) { uint8_t overlap = 0; // Are we in the previous frame? auto left = &last; if (l < lefts.size()) { left = &lefts[l]; overlap |= uint8_t(left->start <= i && i < left->end) << 0; } // Are we in the current frame? auto right = &last; if (r < rights.size()) { right = &rights[r]; overlap |= uint8_t(right->start <= i && i < right->end) << 1; } auto limit = i; switch (overlap) { case 0x00: // i ∉ F U P limit = MinValue(right->start, left->start); op.Neither(i, limit); break; case 0x01: // i ∈ P \ F limit = MinValue(left->end, right->start); op.Left(i, limit); break; case 0x02: // i ∈ F \ P limit = MinValue(right->end, left->start); op.Right(i, limit); break; case 0x03: default: D_ASSERT(overlap == 0x03); // i ∈ F ∩ P limit = MinValue(right->end, left->end); op.Both(i, limit); break; } // Advance the subframe indices i = limit; l += (i == left->end); r += (i == right->end); } } template static void Destroy(Vector &states, AggregateInputData &aggr_input_data, idx_t count) { auto sdata = FlatVector::GetData(states); for (idx_t i = 0; i < count; i++) { OP::template Destroy(*sdata[i], aggr_input_data); } } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/bound_result_modifier.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/group_by_node.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { using GroupingSet = set; class GroupByNode { public: //! The total set of all group expressions vector> group_expressions; //! The different grouping sets as they map to the group expressions vector grouping_sets; public: GroupByNode Copy() { GroupByNode node; node.group_expressions.reserve(group_expressions.size()); for (auto &expr : group_expressions) { node.group_expressions.push_back(expr->Copy()); } node.grouping_sets = grouping_sets; return node; } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/result_modifier.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/order_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class OrderType : uint8_t { INVALID = 0, ORDER_DEFAULT = 1, ASCENDING = 2, DESCENDING = 3 }; enum class OrderByNullType : uint8_t { INVALID = 0, ORDER_DEFAULT = 1, NULLS_FIRST = 2, NULLS_LAST = 3 }; enum class DefaultOrderByNullType : uint8_t { INVALID = 0, NULLS_FIRST = 2, NULLS_LAST = 3, NULLS_FIRST_ON_ASC_LAST_ON_DESC = 4, NULLS_LAST_ON_ASC_FIRST_ON_DESC = 5 }; } // namespace duckdb namespace duckdb { class Deserializer; class Serializer; enum class ResultModifierType : uint8_t { LIMIT_MODIFIER = 1, ORDER_MODIFIER = 2, DISTINCT_MODIFIER = 3, LIMIT_PERCENT_MODIFIER = 4 }; const char *ToString(ResultModifierType value); ResultModifierType ResultModifierFromString(const char *value); //! A ResultModifier class ResultModifier { public: explicit ResultModifier(ResultModifierType type) : type(type) { } virtual ~ResultModifier() { } ResultModifierType type; public: //! Returns true if the two result modifiers are equivalent virtual bool Equals(const ResultModifier &other) const; //! Create a copy of this ResultModifier virtual unique_ptr Copy() const = 0; virtual void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); public: template TARGET &Cast() { if (type != TARGET::TYPE) { throw InternalException("Failed to cast result modifier to type - result modifier type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (type != TARGET::TYPE) { throw InternalException("Failed to cast result modifier to type - result modifier type mismatch"); } return reinterpret_cast(*this); } }; //! Single node in ORDER BY statement struct OrderByNode { OrderByNode(OrderType type, OrderByNullType null_order, unique_ptr expression) : type(type), null_order(null_order), expression(std::move(expression)) { } //! Sort order, ASC or DESC OrderType type; //! The NULL sort order, NULLS_FIRST or NULLS_LAST OrderByNullType null_order; //! Expression to order by unique_ptr expression; public: string ToString() const; void Serialize(Serializer &serializer) const; static OrderByNode Deserialize(Deserializer &deserializer); }; class LimitModifier : public ResultModifier { public: static constexpr const ResultModifierType TYPE = ResultModifierType::LIMIT_MODIFIER; public: LimitModifier() : ResultModifier(ResultModifierType::LIMIT_MODIFIER) { } //! LIMIT count unique_ptr limit; //! OFFSET unique_ptr offset; public: bool Equals(const ResultModifier &other) const override; unique_ptr Copy() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); }; class OrderModifier : public ResultModifier { public: static constexpr const ResultModifierType TYPE = ResultModifierType::ORDER_MODIFIER; public: OrderModifier() : ResultModifier(ResultModifierType::ORDER_MODIFIER) { } //! List of order nodes vector orders; public: bool Equals(const ResultModifier &other) const override; unique_ptr Copy() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); static bool Equals(const unique_ptr &left, const unique_ptr &right); }; class DistinctModifier : public ResultModifier { public: static constexpr const ResultModifierType TYPE = ResultModifierType::DISTINCT_MODIFIER; public: DistinctModifier() : ResultModifier(ResultModifierType::DISTINCT_MODIFIER) { } //! list of distinct on targets (if any) vector> distinct_on_targets; public: bool Equals(const ResultModifier &other) const override; unique_ptr Copy() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); }; class LimitPercentModifier : public ResultModifier { public: static constexpr const ResultModifierType TYPE = ResultModifierType::LIMIT_PERCENT_MODIFIER; public: LimitPercentModifier() : ResultModifier(ResultModifierType::LIMIT_PERCENT_MODIFIER) { } //! LIMIT % unique_ptr limit; //! OFFSET unique_ptr offset; public: bool Equals(const ResultModifier &other) const override; unique_ptr Copy() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/bound_statement.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class LogicalOperator; struct LogicalType; struct BoundStatement { unique_ptr plan; vector types; vector names; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/expression.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BaseStatistics; class ClientContext; //! The Expression class represents a bound Expression with a return type class Expression : public BaseExpression { public: Expression(ExpressionType type, ExpressionClass expression_class, LogicalType return_type); ~Expression() override; //! The return type of the expression LogicalType return_type; //! Expression statistics (if any) - ONLY USED FOR VERIFICATION unique_ptr verification_stats; public: bool IsAggregate() const override; bool IsWindow() const override; bool HasSubquery() const override; bool IsScalar() const override; bool HasParameter() const override; virtual bool IsVolatile() const; virtual bool IsConsistent() const; virtual bool PropagatesNullValues() const; virtual bool IsFoldable() const; virtual bool CanThrow() const; hash_t Hash() const override; bool Equals(const BaseExpression &other) const override { if (!BaseExpression::Equals(other)) { return false; } return return_type == reinterpret_cast(other).return_type; } static bool Equals(const Expression &left, const Expression &right) { return left.Equals(right); } static bool Equals(const unique_ptr &left, const unique_ptr &right); static bool ListEquals(const vector> &left, const vector> &right); //! Create a copy of this expression virtual unique_ptr Copy() const = 0; virtual void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); protected: //! Copy base Expression properties from another expression to this one, //! used in Copy method void CopyProperties(const Expression &other) { type = other.type; expression_class = other.expression_class; alias = other.alias; return_type = other.return_type; query_location = other.query_location; } }; } // namespace duckdb namespace duckdb { //! A ResultModifier class BoundResultModifier { public: explicit BoundResultModifier(ResultModifierType type); virtual ~BoundResultModifier(); ResultModifierType type; public: template TARGET &Cast() { if (type != TARGET::TYPE) { throw InternalException("Failed to cast result modifier to type - result modifier type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (type != TARGET::TYPE) { throw InternalException("Failed to cast result modifier to type - result modifier type mismatch"); } return reinterpret_cast(*this); } }; struct BoundOrderByNode { public: static constexpr const ResultModifierType TYPE = ResultModifierType::ORDER_MODIFIER; public: BoundOrderByNode(OrderType type, OrderByNullType null_order, unique_ptr expression); BoundOrderByNode(OrderType type, OrderByNullType null_order, unique_ptr expression, unique_ptr stats); OrderType type; OrderByNullType null_order; unique_ptr expression; unique_ptr stats; public: BoundOrderByNode Copy() const; bool Equals(const BoundOrderByNode &other) const; string GetOrderModifier() const; string ToString() const; void Serialize(Serializer &serializer) const; static BoundOrderByNode Deserialize(Deserializer &deserializer); }; enum class LimitNodeType : uint8_t { UNSET = 0, CONSTANT_VALUE = 1, CONSTANT_PERCENTAGE = 2, EXPRESSION_VALUE = 3, EXPRESSION_PERCENTAGE = 4 }; struct BoundLimitNode { public: BoundLimitNode(); BoundLimitNode(LimitNodeType type, idx_t constant_integer, double constant_percentage, unique_ptr expression); public: static BoundLimitNode ConstantValue(int64_t value); static BoundLimitNode ConstantPercentage(double percentage); static BoundLimitNode ExpressionValue(unique_ptr expression); static BoundLimitNode ExpressionPercentage(unique_ptr expression); LimitNodeType Type() const { return type; } //! Returns the constant value, only valid if Type() == CONSTANT_VALUE idx_t GetConstantValue() const; //! Returns the constant percentage, only valid if Type() == CONSTANT_PERCENTAGE double GetConstantPercentage() const; //! Returns the constant percentage, only valid if Type() == EXPRESSION_VALUE const Expression &GetValueExpression() const; //! Returns the constant percentage, only valid if Type() == EXPRESSION_PERCENTAGE const Expression &GetPercentageExpression() const; //! Returns a pointer to the expression - should only be used for limit-agnostic optimizations. //! Prefer using the methods above in other scenarios. unique_ptr &GetExpression() { return expression; } void Serialize(Serializer &serializer) const; static BoundLimitNode Deserialize(Deserializer &deserializer); private: LimitNodeType type = LimitNodeType::UNSET; //! Integer value, if value is a constant non-percentage idx_t constant_integer = 0; //! Percentage value, if value is a constant percentage double constant_percentage = -1; //! Expression in case node is not constant unique_ptr expression; private: explicit BoundLimitNode(int64_t constant_value); explicit BoundLimitNode(double percentage_value); explicit BoundLimitNode(unique_ptr expression, bool is_percentage); }; class BoundLimitModifier : public BoundResultModifier { public: static constexpr const ResultModifierType TYPE = ResultModifierType::LIMIT_MODIFIER; public: BoundLimitModifier(); //! LIMIT BoundLimitNode limit_val; //! OFFSET BoundLimitNode offset_val; }; class BoundOrderModifier : public BoundResultModifier { public: static constexpr const ResultModifierType TYPE = ResultModifierType::ORDER_MODIFIER; public: BoundOrderModifier(); //! List of order nodes vector orders; unique_ptr Copy() const; static bool Equals(const BoundOrderModifier &left, const BoundOrderModifier &right); static bool Equals(const unique_ptr &left, const unique_ptr &right); void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); //! Remove unneeded/duplicate order elements. //! Returns true of orders is not empty. static bool Simplify(vector &orders, const vector> &groups, optional_ptr> grouping_sets); bool Simplify(const vector> &groups, optional_ptr> grouping_sets); }; enum class DistinctType : uint8_t { DISTINCT = 0, DISTINCT_ON = 1 }; class BoundDistinctModifier : public BoundResultModifier { public: static constexpr const ResultModifierType TYPE = ResultModifierType::DISTINCT_MODIFIER; public: BoundDistinctModifier(); //! Whether or not this is a DISTINCT or DISTINCT ON DistinctType distinct_type; //! list of distinct on targets vector> target_distincts; }; } // namespace duckdb namespace duckdb { class BufferManager; class InterruptState; //! A half-open range of frame boundary values _relative to the current row_ //! This is why they are signed values. struct FrameDelta { FrameDelta() : begin(0), end(0) {}; FrameDelta(int64_t begin, int64_t end) : begin(begin), end(end) {}; int64_t begin = 0; int64_t end = 0; }; //! The half-open ranges of frame boundary values relative to the current row using FrameStats = array; //! The partition data for custom window functions //! Note that if the inputs is nullptr then the column count is 0, //! but the row count will still be valid class ColumnDataCollection; struct WindowPartitionInput { WindowPartitionInput(ExecutionContext &context, const ColumnDataCollection *inputs, const idx_t count, const vector &column_ids, const vector &all_valid, const ValidityMask &filter_mask, const FrameStats &stats, InterruptState &interrupt_state) : context(context), inputs(inputs), count(count), column_ids(column_ids), all_valid(all_valid), filter_mask(filter_mask), stats(stats), interrupt_state(interrupt_state) { } ExecutionContext &context; const ColumnDataCollection *inputs; const idx_t count; const vector column_ids; const vector &all_valid; const ValidityMask &filter_mask; const FrameStats stats; InterruptState &interrupt_state; }; //! The type used for sizing hashed aggregate function states typedef idx_t (*aggregate_size_t)(const AggregateFunction &function); //! The type used for initializing hashed aggregate function states typedef void (*aggregate_initialize_t)(const AggregateFunction &function, data_ptr_t state); //! The type used for updating hashed aggregate functions typedef void (*aggregate_update_t)(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, Vector &state, idx_t count); //! The type used for combining hashed aggregate states typedef void (*aggregate_combine_t)(Vector &state, Vector &combined, AggregateInputData &aggr_input_data, idx_t count); //! The type used for finalizing hashed aggregate function payloads typedef void (*aggregate_finalize_t)(Vector &state, AggregateInputData &aggr_input_data, Vector &result, idx_t count, idx_t offset); //! The type used for propagating statistics in aggregate functions (optional) typedef unique_ptr (*aggregate_statistics_t)(ClientContext &context, BoundAggregateExpression &expr, AggregateStatisticsInput &input); //! Binds the scalar function and creates the function data typedef unique_ptr (*bind_aggregate_function_t)(ClientContext &context, AggregateFunction &function, vector> &arguments); //! The type used for the aggregate destructor method. NOTE: this method is used in destructors and MAY NOT throw. typedef void (*aggregate_destructor_t)(Vector &state, AggregateInputData &aggr_input_data, idx_t count); //! The type used for updating simple (non-grouped) aggregate functions typedef void (*aggregate_simple_update_t)(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, data_ptr_t state, idx_t count); //! The type used for computing complex/custom windowed aggregate functions (optional) typedef void (*aggregate_window_t)(AggregateInputData &aggr_input_data, const WindowPartitionInput &partition, const_data_ptr_t g_state, data_ptr_t l_state, const SubFrames &subframes, Vector &result, idx_t rid); //! The type used for initializing shared complex/custom windowed aggregate state (optional) typedef void (*aggregate_wininit_t)(AggregateInputData &aggr_input_data, const WindowPartitionInput &partition, data_ptr_t g_state); typedef void (*aggregate_serialize_t)(Serializer &serializer, const optional_ptr bind_data, const AggregateFunction &function); typedef unique_ptr (*aggregate_deserialize_t)(Deserializer &deserializer, AggregateFunction &function); struct AggregateFunctionInfo { DUCKDB_API virtual ~AggregateFunctionInfo(); template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; enum class AggregateDestructorType { STANDARD, // legacy destructors allow non-trivial destructors in aggregate states // these might not be trivial to off-load to disk LEGACY }; class AggregateFunction : public BaseScalarFunction { // NOLINT: work-around bug in clang-tidy public: AggregateFunction(const string &name, const vector &arguments, const LogicalType &return_type, aggregate_size_t state_size, aggregate_initialize_t initialize, aggregate_update_t update, aggregate_combine_t combine, aggregate_finalize_t finalize, FunctionNullHandling null_handling = FunctionNullHandling::DEFAULT_NULL_HANDLING, aggregate_simple_update_t simple_update = nullptr, bind_aggregate_function_t bind = nullptr, aggregate_destructor_t destructor = nullptr, aggregate_statistics_t statistics = nullptr, aggregate_window_t window = nullptr, aggregate_serialize_t serialize = nullptr, aggregate_deserialize_t deserialize = nullptr) : BaseScalarFunction(name, arguments, return_type, FunctionStability::CONSISTENT, LogicalType(LogicalTypeId::INVALID), null_handling), state_size(state_size), initialize(initialize), update(update), combine(combine), finalize(finalize), simple_update(simple_update), window(window), bind(bind), destructor(destructor), statistics(statistics), serialize(serialize), deserialize(deserialize), order_dependent(AggregateOrderDependent::ORDER_DEPENDENT), distinct_dependent(AggregateDistinctDependent::DISTINCT_DEPENDENT) { } AggregateFunction(const string &name, const vector &arguments, const LogicalType &return_type, aggregate_size_t state_size, aggregate_initialize_t initialize, aggregate_update_t update, aggregate_combine_t combine, aggregate_finalize_t finalize, aggregate_simple_update_t simple_update = nullptr, bind_aggregate_function_t bind = nullptr, aggregate_destructor_t destructor = nullptr, aggregate_statistics_t statistics = nullptr, aggregate_window_t window = nullptr, aggregate_serialize_t serialize = nullptr, aggregate_deserialize_t deserialize = nullptr) : BaseScalarFunction(name, arguments, return_type, FunctionStability::CONSISTENT, LogicalType(LogicalTypeId::INVALID)), state_size(state_size), initialize(initialize), update(update), combine(combine), finalize(finalize), simple_update(simple_update), window(window), bind(bind), destructor(destructor), statistics(statistics), serialize(serialize), deserialize(deserialize), order_dependent(AggregateOrderDependent::ORDER_DEPENDENT), distinct_dependent(AggregateDistinctDependent::DISTINCT_DEPENDENT) { } AggregateFunction(const vector &arguments, const LogicalType &return_type, aggregate_size_t state_size, aggregate_initialize_t initialize, aggregate_update_t update, aggregate_combine_t combine, aggregate_finalize_t finalize, FunctionNullHandling null_handling = FunctionNullHandling::DEFAULT_NULL_HANDLING, aggregate_simple_update_t simple_update = nullptr, bind_aggregate_function_t bind = nullptr, aggregate_destructor_t destructor = nullptr, aggregate_statistics_t statistics = nullptr, aggregate_window_t window = nullptr, aggregate_serialize_t serialize = nullptr, aggregate_deserialize_t deserialize = nullptr) : AggregateFunction(string(), arguments, return_type, state_size, initialize, update, combine, finalize, null_handling, simple_update, bind, destructor, statistics, window, serialize, deserialize) { } AggregateFunction(const vector &arguments, const LogicalType &return_type, aggregate_size_t state_size, aggregate_initialize_t initialize, aggregate_update_t update, aggregate_combine_t combine, aggregate_finalize_t finalize, aggregate_simple_update_t simple_update = nullptr, bind_aggregate_function_t bind = nullptr, aggregate_destructor_t destructor = nullptr, aggregate_statistics_t statistics = nullptr, aggregate_window_t window = nullptr, aggregate_serialize_t serialize = nullptr, aggregate_deserialize_t deserialize = nullptr) : AggregateFunction(string(), arguments, return_type, state_size, initialize, update, combine, finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, simple_update, bind, destructor, statistics, window, serialize, deserialize) { } // Window constructor AggregateFunction(const vector &arguments, const LogicalType &return_type, aggregate_size_t state_size, aggregate_initialize_t initialize, aggregate_wininit_t window_init, aggregate_window_t window, bind_aggregate_function_t bind = nullptr, aggregate_destructor_t destructor = nullptr, aggregate_statistics_t statistics = nullptr, aggregate_serialize_t serialize = nullptr, aggregate_deserialize_t deserialize = nullptr) : BaseScalarFunction(name, arguments, return_type, FunctionStability::CONSISTENT, LogicalType(LogicalTypeId::INVALID)), state_size(state_size), initialize(initialize), update(nullptr), combine(nullptr), finalize(nullptr), simple_update(nullptr), window(window), window_init(window_init), bind(bind), destructor(destructor), statistics(statistics), serialize(serialize), deserialize(deserialize), order_dependent(AggregateOrderDependent::ORDER_DEPENDENT), distinct_dependent(AggregateDistinctDependent::DISTINCT_DEPENDENT) { } //! The hashed aggregate state sizing function aggregate_size_t state_size; //! The hashed aggregate state initialization function aggregate_initialize_t initialize; //! The hashed aggregate update state function (may be null, if window is set) aggregate_update_t update; //! The hashed aggregate combine states function (may be null, if window is set) aggregate_combine_t combine; //! The hashed aggregate finalization function (may be null, if window is set) aggregate_finalize_t finalize; //! The simple aggregate update function (may be null) aggregate_simple_update_t simple_update; //! The windowed aggregate custom function (may be null) aggregate_window_t window; //! The windowed aggregate custom initialization function (may be null) aggregate_wininit_t window_init = nullptr; //! The bind function (may be null) bind_aggregate_function_t bind; //! The destructor method (may be null) aggregate_destructor_t destructor; //! The statistics propagation function (may be null) aggregate_statistics_t statistics; aggregate_serialize_t serialize; aggregate_deserialize_t deserialize; //! Whether or not the aggregate is order dependent AggregateOrderDependent order_dependent; //! Whether or not the aggregate is affect by distinct modifiers AggregateDistinctDependent distinct_dependent; //! Additional function info, passed to the bind shared_ptr function_info; bool operator==(const AggregateFunction &rhs) const { return state_size == rhs.state_size && initialize == rhs.initialize && update == rhs.update && combine == rhs.combine && finalize == rhs.finalize && window == rhs.window; } bool operator!=(const AggregateFunction &rhs) const { return !(*this == rhs); } bool CanAggregate() const { return update || combine || finalize; } bool CanWindow() const { return window; } public: template static AggregateFunction NullaryAggregate(LogicalType return_type) { return AggregateFunction( {}, return_type, AggregateFunction::StateSize, AggregateFunction::StateInitialize, AggregateFunction::NullaryScatterUpdate, AggregateFunction::StateCombine, AggregateFunction::StateFinalize, AggregateFunction::NullaryUpdate); } template static AggregateFunction UnaryAggregate(const LogicalType &input_type, LogicalType return_type, FunctionNullHandling null_handling = FunctionNullHandling::DEFAULT_NULL_HANDLING) { return AggregateFunction({input_type}, return_type, AggregateFunction::StateSize, AggregateFunction::StateInitialize, AggregateFunction::UnaryScatterUpdate, AggregateFunction::StateCombine, AggregateFunction::StateFinalize, null_handling, AggregateFunction::UnaryUpdate); } template static AggregateFunction UnaryAggregateDestructor(LogicalType input_type, LogicalType return_type) { auto aggregate = UnaryAggregate(input_type, return_type); aggregate.destructor = AggregateFunction::StateDestroy; return aggregate; } template static AggregateFunction BinaryAggregate(const LogicalType &a_type, const LogicalType &b_type, LogicalType return_type) { return AggregateFunction({a_type, b_type}, return_type, AggregateFunction::StateSize, AggregateFunction::StateInitialize, AggregateFunction::BinaryScatterUpdate, AggregateFunction::StateCombine, AggregateFunction::StateFinalize, AggregateFunction::BinaryUpdate); } public: template static idx_t StateSize(const AggregateFunction &) { return sizeof(STATE); } template static void StateInitialize(const AggregateFunction &, data_ptr_t state) { // FIXME: we should remove the "destructor_type" option in the future #if !defined(__GNUC__) || (__GNUC__ >= 5) static_assert(std::is_trivially_move_constructible::value || destructor_type == AggregateDestructorType::LEGACY, "Aggregate state must be trivially move constructible"); #endif OP::Initialize(*reinterpret_cast(state)); } template static void NullaryScatterUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, Vector &states, idx_t count) { D_ASSERT(input_count == 0); AggregateExecutor::NullaryScatter(states, aggr_input_data, count); } template static void NullaryUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, data_ptr_t state, idx_t count) { D_ASSERT(input_count == 0); AggregateExecutor::NullaryUpdate(state, aggr_input_data, count); } template static void UnaryScatterUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, Vector &states, idx_t count) { D_ASSERT(input_count == 1); AggregateExecutor::UnaryScatter(inputs[0], states, aggr_input_data, count); } template static void UnaryUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, data_ptr_t state, idx_t count) { D_ASSERT(input_count == 1); AggregateExecutor::UnaryUpdate(inputs[0], aggr_input_data, state, count); } template static void BinaryScatterUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, Vector &states, idx_t count) { D_ASSERT(input_count == 2); AggregateExecutor::BinaryScatter(aggr_input_data, inputs[0], inputs[1], states, count); } template static void BinaryUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, data_ptr_t state, idx_t count) { D_ASSERT(input_count == 2); AggregateExecutor::BinaryUpdate(aggr_input_data, inputs[0], inputs[1], state, count); } template static void StateCombine(Vector &source, Vector &target, AggregateInputData &aggr_input_data, idx_t count) { AggregateExecutor::Combine(source, target, aggr_input_data, count); } template static void StateFinalize(Vector &states, AggregateInputData &aggr_input_data, Vector &result, idx_t count, idx_t offset) { AggregateExecutor::Finalize(states, aggr_input_data, result, count, offset); } template static void StateVoidFinalize(Vector &states, AggregateInputData &aggr_input_data, Vector &result, idx_t count, idx_t offset) { AggregateExecutor::VoidFinalize(states, aggr_input_data, result, count, offset); } template static void StateDestroy(Vector &states, AggregateInputData &aggr_input_data, idx_t count) { AggregateExecutor::Destroy(states, aggr_input_data, count); } }; } // namespace duckdb namespace duckdb { // NOLINTBEGIN struct UDFWrapper { public: template inline static scalar_function_t CreateScalarFunction(const string &name, TR (*udf_func)(ARGS...)) { const std::size_t num_template_argc = sizeof...(ARGS); switch (num_template_argc) { case 1: return CreateUnaryFunction(name, udf_func); case 2: return CreateBinaryFunction(name, udf_func); case 3: return CreateTernaryFunction(name, udf_func); default: // LCOV_EXCL_START throw std::runtime_error("UDF function only supported until ternary!"); } // LCOV_EXCL_STOP } template inline static scalar_function_t CreateScalarFunction(const string &name, const vector &args, const LogicalType &ret_type, TR (*udf_func)(ARGS...)) { if (!TypesMatch(ret_type)) { // LCOV_EXCL_START throw std::runtime_error("Return type doesn't match with the first template type."); } // LCOV_EXCL_STOP const std::size_t num_template_types = sizeof...(ARGS); if (num_template_types != args.size()) { // LCOV_EXCL_START throw std::runtime_error( "The number of templated types should be the same quantity of the LogicalType arguments."); } // LCOV_EXCL_STOP switch (num_template_types) { case 1: return CreateUnaryFunction(name, args, ret_type, udf_func); case 2: return CreateBinaryFunction(name, args, ret_type, udf_func); case 3: return CreateTernaryFunction(name, args, ret_type, udf_func); default: // LCOV_EXCL_START throw std::runtime_error("UDF function only supported until ternary!"); } // LCOV_EXCL_STOP } template inline static void RegisterFunction(const string &name, scalar_function_t udf_function, ClientContext &context, LogicalType varargs = LogicalType(LogicalTypeId::INVALID)) { vector arguments; GetArgumentTypesRecursive(arguments); LogicalType ret_type = GetArgumentType(); RegisterFunction(name, arguments, ret_type, std::move(udf_function), context, std::move(varargs)); } static void RegisterFunction(string name, vector args, LogicalType ret_type, scalar_function_t udf_function, ClientContext &context, LogicalType varargs = LogicalType(LogicalTypeId::INVALID)); //--------------------------------- Aggregate UDFs ------------------------------------// template inline static AggregateFunction CreateAggregateFunction(const string &name) { return CreateUnaryAggregateFunction(name); } template inline static AggregateFunction CreateAggregateFunction(const string &name) { return CreateBinaryAggregateFunction(name); } template inline static AggregateFunction CreateAggregateFunction(const string &name, const LogicalType &ret_type, const LogicalType &input_type) { if (!TypesMatch(ret_type)) { // LCOV_EXCL_START throw std::runtime_error("The return argument don't match!"); } // LCOV_EXCL_STOP if (!TypesMatch(input_type)) { // LCOV_EXCL_START throw std::runtime_error("The input argument don't match!"); } // LCOV_EXCL_STOP return CreateUnaryAggregateFunction(name, ret_type, input_type); } template inline static AggregateFunction CreateAggregateFunction(const string &name, const LogicalType &ret_type, const LogicalType &input_type_a, const LogicalType &input_type_b) { if (!TypesMatch(ret_type)) { // LCOV_EXCL_START throw std::runtime_error("The return argument don't match!"); } if (!TypesMatch(input_type_a)) { throw std::runtime_error("The first input argument don't match!"); } if (!TypesMatch(input_type_b)) { throw std::runtime_error("The second input argument don't match!"); } // LCOV_EXCL_STOP return CreateBinaryAggregateFunction(name, ret_type, input_type_a, input_type_b); } //! A generic CreateAggregateFunction ---------------------------------------------------------------------------// inline static AggregateFunction CreateAggregateFunction(const string &name, const vector &arguments, const LogicalType &return_type, aggregate_size_t state_size, aggregate_initialize_t initialize, aggregate_update_t update, aggregate_combine_t combine, aggregate_finalize_t finalize, aggregate_simple_update_t simple_update = nullptr, bind_aggregate_function_t bind = nullptr, aggregate_destructor_t destructor = nullptr) { AggregateFunction aggr_function(name, arguments, return_type, state_size, initialize, update, combine, finalize, simple_update, bind, destructor); aggr_function.null_handling = FunctionNullHandling::SPECIAL_HANDLING; return aggr_function; } static void RegisterAggrFunction(AggregateFunction aggr_function, ClientContext &context, LogicalType varargs = LogicalType(LogicalTypeId::INVALID)); private: //-------------------------------- Templated functions --------------------------------// struct UnaryUDFExecutor { template static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) { typedef RESULT_TYPE (*unary_function_t)(INPUT_TYPE); auto udf = (unary_function_t)dataptr; return udf(input); } }; template inline static scalar_function_t CreateUnaryFunction(const string &name, TR (*udf_func)(TA)) { scalar_function_t udf_function = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { UnaryExecutor::GenericExecute(input.data[0], result, input.size(), (void *)udf_func); }; return udf_function; } template inline static scalar_function_t CreateBinaryFunction(const string &name, TR (*udf_func)(TA, TB)) { scalar_function_t udf_function = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { BinaryExecutor::Execute(input.data[0], input.data[1], result, input.size(), udf_func); }; return udf_function; } template inline static scalar_function_t CreateTernaryFunction(const string &name, TR (*udf_func)(TA, TB, TC)) { scalar_function_t udf_function = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { TernaryExecutor::Execute(input.data[0], input.data[1], input.data[2], result, input.size(), udf_func); }; return udf_function; } template inline static scalar_function_t CreateUnaryFunction(const string &name, TR (*udf_func)(ARGS...)) { // LCOV_EXCL_START throw std::runtime_error("Incorrect number of arguments for unary function"); } // LCOV_EXCL_STOP template inline static scalar_function_t CreateBinaryFunction(const string &name, TR (*udf_func)(ARGS...)) { // LCOV_EXCL_START throw std::runtime_error("Incorrect number of arguments for binary function"); } // LCOV_EXCL_STOP template inline static scalar_function_t CreateTernaryFunction(const string &name, TR (*udf_func)(ARGS...)) { // LCOV_EXCL_START throw std::runtime_error("Incorrect number of arguments for ternary function"); } // LCOV_EXCL_STOP template inline static LogicalType GetArgumentType() { if (std::is_same()) { return LogicalType(LogicalTypeId::BOOLEAN); } else if (std::is_same()) { return LogicalType(LogicalTypeId::TINYINT); } else if (std::is_same()) { return LogicalType(LogicalTypeId::SMALLINT); } else if (std::is_same()) { return LogicalType(LogicalTypeId::INTEGER); } else if (std::is_same()) { return LogicalType(LogicalTypeId::BIGINT); } else if (std::is_same()) { return LogicalType(LogicalTypeId::FLOAT); } else if (std::is_same()) { return LogicalType(LogicalTypeId::DOUBLE); } else if (std::is_same()) { return LogicalType(LogicalTypeId::VARCHAR); } else { // LCOV_EXCL_START throw std::runtime_error("Unrecognized type!"); } // LCOV_EXCL_STOP } template inline static void GetArgumentTypesRecursive(vector &arguments) { arguments.push_back(GetArgumentType()); GetArgumentTypesRecursive(arguments); } template inline static void GetArgumentTypesRecursive(vector &arguments) { arguments.push_back(GetArgumentType()); } private: //-------------------------------- Argumented functions --------------------------------// template inline static scalar_function_t CreateUnaryFunction(const string &name, const vector &args, const LogicalType &ret_type, TR (*udf_func)(ARGS...)) { // LCOV_EXCL_START throw std::runtime_error("Incorrect number of arguments for unary function"); } // LCOV_EXCL_STOP template inline static scalar_function_t CreateUnaryFunction(const string &name, const vector &args, const LogicalType &ret_type, TR (*udf_func)(TA)) { if (args.size() != 1) { // LCOV_EXCL_START throw std::runtime_error("The number of LogicalType arguments (\"args\") should be 1!"); } if (!TypesMatch(args[0])) { throw std::runtime_error("The first arguments don't match!"); } // LCOV_EXCL_STOP scalar_function_t udf_function = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { UnaryExecutor::GenericExecute(input.data[0], result, input.size(), (void *)udf_func); }; return udf_function; } template inline static scalar_function_t CreateBinaryFunction(const string &name, const vector &args, const LogicalType &ret_type, TR (*udf_func)(ARGS...)) { // LCOV_EXCL_START throw std::runtime_error("Incorrect number of arguments for binary function"); } // LCOV_EXCL_STOP template inline static scalar_function_t CreateBinaryFunction(const string &name, const vector &args, const LogicalType &ret_type, TR (*udf_func)(TA, TB)) { if (args.size() != 2) { // LCOV_EXCL_START throw std::runtime_error("The number of LogicalType arguments (\"args\") should be 2!"); } if (!TypesMatch(args[0])) { throw std::runtime_error("The first arguments don't match!"); } if (!TypesMatch(args[1])) { throw std::runtime_error("The second arguments don't match!"); } // LCOV_EXCL_STOP scalar_function_t udf_function = [=](DataChunk &input, ExpressionState &state, Vector &result) { BinaryExecutor::Execute(input.data[0], input.data[1], result, input.size(), udf_func); }; return udf_function; } template inline static scalar_function_t CreateTernaryFunction(const string &name, const vector &args, const LogicalType &ret_type, TR (*udf_func)(ARGS...)) { // LCOV_EXCL_START throw std::runtime_error("Incorrect number of arguments for ternary function"); } // LCOV_EXCL_STOP template inline static scalar_function_t CreateTernaryFunction(const string &name, const vector &args, const LogicalType &ret_type, TR (*udf_func)(TA, TB, TC)) { if (args.size() != 3) { // LCOV_EXCL_START throw std::runtime_error("The number of LogicalType arguments (\"args\") should be 3!"); } if (!TypesMatch(args[0])) { throw std::runtime_error("The first arguments don't match!"); } if (!TypesMatch(args[1])) { throw std::runtime_error("The second arguments don't match!"); } if (!TypesMatch(args[2])) { throw std::runtime_error("The second arguments don't match!"); } // LCOV_EXCL_STOP scalar_function_t udf_function = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { TernaryExecutor::Execute(input.data[0], input.data[1], input.data[2], result, input.size(), udf_func); }; return udf_function; } template inline static bool TypesMatch(const LogicalType &sql_type) { switch (sql_type.id()) { case LogicalTypeId::BOOLEAN: return std::is_same(); case LogicalTypeId::TINYINT: return std::is_same(); case LogicalTypeId::SMALLINT: return std::is_same(); case LogicalTypeId::INTEGER: return std::is_same(); case LogicalTypeId::BIGINT: return std::is_same(); case LogicalTypeId::DATE: return std::is_same(); case LogicalTypeId::TIME: return std::is_same(); case LogicalTypeId::TIME_TZ: return std::is_same(); case LogicalTypeId::TIMESTAMP: case LogicalTypeId::TIMESTAMP_MS: case LogicalTypeId::TIMESTAMP_NS: case LogicalTypeId::TIMESTAMP_SEC: case LogicalTypeId::TIMESTAMP_TZ: return std::is_same(); case LogicalTypeId::FLOAT: return std::is_same(); case LogicalTypeId::DOUBLE: return std::is_same(); case LogicalTypeId::VARCHAR: case LogicalTypeId::CHAR: case LogicalTypeId::BLOB: return std::is_same(); default: // LCOV_EXCL_START throw std::runtime_error("Type is not supported!"); } // LCOV_EXCL_STOP } private: //-------------------------------- Aggregate functions --------------------------------// template inline static AggregateFunction CreateUnaryAggregateFunction(const string &name) { LogicalType return_type = GetArgumentType(); LogicalType input_type = GetArgumentType(); return CreateUnaryAggregateFunction(name, return_type, input_type); } template inline static AggregateFunction CreateUnaryAggregateFunction(const string &name, const LogicalType &ret_type, const LogicalType &input_type) { AggregateFunction aggr_function = AggregateFunction::UnaryAggregate(input_type, ret_type); aggr_function.name = name; return aggr_function; } template inline static AggregateFunction CreateBinaryAggregateFunction(const string &name) { LogicalType return_type = GetArgumentType(); LogicalType input_type_a = GetArgumentType(); LogicalType input_type_b = GetArgumentType(); return CreateBinaryAggregateFunction(name, return_type, input_type_a, input_type_b); } template inline static AggregateFunction CreateBinaryAggregateFunction(const string &name, const LogicalType &ret_type, const LogicalType &input_type_a, const LogicalType &input_type_b) { AggregateFunction aggr_function = AggregateFunction::BinaryAggregate(input_type_a, input_type_b, ret_type); aggr_function.name = name; return aggr_function; } }; // end UDFWrapper // NOLINTEND } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/materialized_query_result.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/column/column_data_collection.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/column/column_data_collection_iterators.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/types/column/column_data_scan_states.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class ColumnDataAllocatorType : uint8_t { //! Use a buffer manager to allocate large chunks of memory that vectors then use BUFFER_MANAGER_ALLOCATOR, //! Use an in-memory allocator, allocating data for every chunk //! This causes the column data collection to allocate blocks that are not tied to a buffer manager IN_MEMORY_ALLOCATOR, //! Use a buffer manager to allocate vectors, but use a StringHeap for strings HYBRID }; enum class ColumnDataScanProperties : uint8_t { INVALID, //! Allow zero copy scans - this introduces a dependency on the resulting vector on the scan state of the column //! data collection, which means vectors might not be valid anymore after the next chunk is scanned. ALLOW_ZERO_COPY, //! Disallow zero-copy scans, always copying data into the target vector //! As a result, data scanned will be valid even after the column data collection is destroyed DISALLOW_ZERO_COPY }; struct ChunkManagementState { unordered_map handles; ColumnDataScanProperties properties = ColumnDataScanProperties::INVALID; }; struct ColumnDataAppendState { ChunkManagementState current_chunk_state; vector vector_data; }; struct ColumnDataScanState { ChunkManagementState current_chunk_state; idx_t segment_index; idx_t chunk_index; idx_t current_row_index; idx_t next_row_index; ColumnDataScanProperties properties; vector column_ids; }; struct ColumnDataParallelScanState { ColumnDataScanState scan_state; mutex lock; }; struct ColumnDataLocalScanState { ChunkManagementState current_chunk_state; idx_t current_segment_index = DConstants::INVALID_INDEX; idx_t current_row_index; }; class ColumnDataRow { public: ColumnDataRow(DataChunk &chunk, idx_t row_index, idx_t base_index); DataChunk &chunk; idx_t row_index; idx_t base_index; public: Value GetValue(idx_t column_index) const; idx_t RowIndex() const; }; } // namespace duckdb namespace duckdb { class ColumnDataCollection; class ColumnDataChunkIterationHelper { public: DUCKDB_API ColumnDataChunkIterationHelper(const ColumnDataCollection &collection, vector column_ids); private: const ColumnDataCollection &collection; vector column_ids; private: class ColumnDataChunkIterator; class ColumnDataChunkIterator { public: DUCKDB_API explicit ColumnDataChunkIterator(const ColumnDataCollection *collection_p, vector column_ids); const ColumnDataCollection *collection; ColumnDataScanState scan_state; shared_ptr scan_chunk; idx_t row_index; public: DUCKDB_API void Next(); DUCKDB_API ColumnDataChunkIterator &operator++(); DUCKDB_API bool operator!=(const ColumnDataChunkIterator &other) const; DUCKDB_API DataChunk &operator*() const; }; public: ColumnDataChunkIterator begin() { // NOLINT: match stl API return ColumnDataChunkIterator(&collection, column_ids); } ColumnDataChunkIterator end() { // NOLINT: match stl API return ColumnDataChunkIterator(nullptr, vector()); } }; class ColumnDataRowIterationHelper { public: DUCKDB_API explicit ColumnDataRowIterationHelper(const ColumnDataCollection &collection); private: const ColumnDataCollection &collection; private: class ColumnDataRowIterator; class ColumnDataRowIterator { public: DUCKDB_API explicit ColumnDataRowIterator(const ColumnDataCollection *collection_p); const ColumnDataCollection *collection; ColumnDataScanState scan_state; shared_ptr scan_chunk; ColumnDataRow current_row; public: void Next(); DUCKDB_API ColumnDataRowIterator &operator++(); DUCKDB_API bool operator!=(const ColumnDataRowIterator &other) const; DUCKDB_API const ColumnDataRow &operator*() const; }; public: DUCKDB_API ColumnDataRowIterator begin(); // NOLINT: match stl API DUCKDB_API ColumnDataRowIterator end(); // NOLINT: match stl API }; } // namespace duckdb namespace duckdb { class BufferManager; class BlockHandle; class ClientContext; struct ColumnDataCopyFunction; class ColumnDataAllocator; class ColumnDataCollection; class ColumnDataCollectionSegment; class ColumnDataRowCollection; //! The ColumnDataCollection represents a set of (buffer-managed) data stored in columnar format //! It is efficient to read and scan class ColumnDataCollection { public: //! Constructs an in-memory column data collection from an allocator DUCKDB_API ColumnDataCollection(Allocator &allocator, vector types); //! Constructs an empty (but valid) in-memory column data collection from an allocator DUCKDB_API explicit ColumnDataCollection(Allocator &allocator); //! Constructs a buffer-managed column data collection DUCKDB_API ColumnDataCollection(BufferManager &buffer_manager, vector types); //! Constructs either an in-memory or a buffer-managed column data collection DUCKDB_API ColumnDataCollection(ClientContext &context, vector types, ColumnDataAllocatorType type = ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR); //! Creates a column data collection that inherits the blocks to write to. This allows blocks to be shared //! between multiple column data collections and prevents wasting space. //! Note that after one CDC inherits blocks from another, the other //! cannot be written to anymore (i.e. we take ownership of the half-written blocks). DUCKDB_API ColumnDataCollection(ColumnDataCollection &parent); DUCKDB_API ColumnDataCollection(shared_ptr allocator, vector types); DUCKDB_API ~ColumnDataCollection(); public: //! The types of columns in the ColumnDataCollection vector &Types() { return types; } const vector &Types() const { return types; } //! The amount of rows in the ColumnDataCollection const idx_t &Count() const { return count; } //! The amount of columns in the ColumnDataCollection idx_t ColumnCount() const { return types.size(); } //! The size (in bytes) of this ColumnDataCollection idx_t SizeInBytes() const; //! The allocation size (in bytes) of this ColumnDataCollection - this property is cached idx_t AllocationSize() const; //! Sets the partition index of this ColumnDataCollection void SetPartitionIndex(idx_t index); //! Get the allocator DUCKDB_API Allocator &GetAllocator() const; //! Initializes an Append state - useful for optimizing many appends made to the same column data collection DUCKDB_API void InitializeAppend(ColumnDataAppendState &state); //! Append a DataChunk to this ColumnDataCollection using the specified append state DUCKDB_API void Append(ColumnDataAppendState &state, DataChunk &new_chunk); //! Initializes a chunk with the correct types that can be used to call Scan DUCKDB_API void InitializeScanChunk(DataChunk &chunk) const; //! Initializes a chunk with the correct types for a given scan state DUCKDB_API void InitializeScanChunk(ColumnDataScanState &state, DataChunk &chunk) const; //! Initializes a Scan state for scanning all columns DUCKDB_API void InitializeScan(ColumnDataScanState &state, ColumnDataScanProperties properties = ColumnDataScanProperties::ALLOW_ZERO_COPY) const; //! Initializes a Scan state for scanning a subset of the columns DUCKDB_API void InitializeScan(ColumnDataScanState &state, vector column_ids, ColumnDataScanProperties properties = ColumnDataScanProperties::ALLOW_ZERO_COPY) const; //! Initialize a parallel scan over the column data collection over all columns DUCKDB_API void InitializeScan(ColumnDataParallelScanState &state, ColumnDataScanProperties properties = ColumnDataScanProperties::ALLOW_ZERO_COPY) const; //! Initialize a parallel scan over the column data collection over a subset of the columns DUCKDB_API void InitializeScan(ColumnDataParallelScanState &state, vector column_ids, ColumnDataScanProperties properties = ColumnDataScanProperties::ALLOW_ZERO_COPY) const; //! Scans a DataChunk from the ColumnDataCollection DUCKDB_API bool Scan(ColumnDataScanState &state, DataChunk &result) const; //! Scans a DataChunk from the ColumnDataCollection DUCKDB_API bool Scan(ColumnDataParallelScanState &state, ColumnDataLocalScanState &lstate, DataChunk &result) const; //! Append a DataChunk directly to this ColumnDataCollection - calls InitializeAppend and Append internally DUCKDB_API void Append(DataChunk &new_chunk); //! Appends the other ColumnDataCollection to this, destroying the other data collection DUCKDB_API void Combine(ColumnDataCollection &other); DUCKDB_API void Verify(); DUCKDB_API string ToString() const; DUCKDB_API void Print() const; DUCKDB_API void Reset(); //! Returns the number of data chunks present in the ColumnDataCollection DUCKDB_API idx_t ChunkCount() const; //! Fetch an individual chunk from the ColumnDataCollection DUCKDB_API void FetchChunk(idx_t chunk_idx, DataChunk &result) const; //! Constructs a class that can be iterated over to fetch individual chunks //! Iterating over this is syntactic sugar over just calling Scan DUCKDB_API ColumnDataChunkIterationHelper Chunks() const; //! Constructs a class that can be iterated over to fetch individual chunks //! Only the column indexes specified in the column_ids list are scanned DUCKDB_API ColumnDataChunkIterationHelper Chunks(vector column_ids) const; //! Constructs a class that can be iterated over to fetch individual rows //! Note that row iteration is slow, and the `.Chunks()` method should be used instead DUCKDB_API ColumnDataRowIterationHelper Rows() const; //! Returns a materialized set of all of the rows in the column data collection //! Note that usage of this is slow - avoid using this unless the amount of rows is small, or if you do not care //! about performance DUCKDB_API ColumnDataRowCollection GetRows() const; //! Compare two column data collections to another. If they are equal according to result equality rules, //! return true. That means null values are equal, and approx equality is used for floating point values. //! If they are not equal, return false and fill in the error message. static bool ResultEquals(const ColumnDataCollection &left, const ColumnDataCollection &right, string &error_message, bool ordered = false); //! Obtains the next scan index to scan from bool NextScanIndex(ColumnDataScanState &state, idx_t &chunk_index, idx_t &segment_index, idx_t &row_index) const; //! Obtains the previous scan index to scan from bool PrevScanIndex(ColumnDataScanState &state, idx_t &chunk_index, idx_t &segment_index, idx_t &row_index) const; //! Scans at the indices (obtained from NextScanIndex) void ScanAtIndex(ColumnDataParallelScanState &state, ColumnDataLocalScanState &lstate, DataChunk &result, idx_t chunk_index, idx_t segment_index, idx_t row_index) const; //! Seeks to the chunk _containing_ the row. Returns false if it is past the end. //! Note that the returned chunk will likely not be aligned to the given row //! but the scan state will provide the actual range bool Seek(idx_t row_idx, ColumnDataScanState &state, DataChunk &result) const; //! Initialize the column data collection void Initialize(vector types); //! Get references to the string heaps in this ColumnDataCollection vector> GetHeapReferences(); //! Get the allocator type of this ColumnDataCollection ColumnDataAllocatorType GetAllocatorType() const; //! Get a vector of the segments in this ColumnDataCollection const vector> &GetSegments() const; void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); private: //! Creates a new segment within the ColumnDataCollection void CreateSegment(); static ColumnDataCopyFunction GetCopyFunction(const LogicalType &type); private: //! The Column Data Allocator buffer_ptr allocator; //! The types of the stored entries vector types; //! The number of entries stored in the column data collection idx_t count; //! The data segments of the column data collection vector> segments; //! The set of copy functions vector copy_functions; //! When the column data collection is marked as finished - new tuples can no longer be appended to it bool finished_append; //! Partition index (optional, if partitioned) optional_idx partition_index; }; //! The ColumnDataRowCollection represents a set of materialized rows, as obtained from the ColumnDataCollection class ColumnDataRowCollection { public: DUCKDB_API explicit ColumnDataRowCollection(const ColumnDataCollection &collection); public: DUCKDB_API Value GetValue(idx_t column, idx_t index) const; public: // container API bool empty() const { // NOLINT: match stl API return rows.empty(); // NOLINT } idx_t size() const { // NOLINT: match stl API return rows.size(); } DUCKDB_API ColumnDataRow &operator[](idx_t i); DUCKDB_API const ColumnDataRow &operator[](idx_t i) const; vector::iterator begin() { // NOLINT: match stl API return rows.begin(); } vector::iterator end() { // NOLINT: match stl API return rows.end(); } vector::const_iterator cbegin() const { // NOLINT: match stl API return rows.cbegin(); } vector::const_iterator cend() const { // NOLINT: match stl API return rows.cend(); } vector::const_iterator begin() const { // NOLINT: match stl API return rows.begin(); } vector::const_iterator end() const { // NOLINT: match stl API return rows.end(); } private: vector rows; vector> chunks; ColumnDataScanState scan_state; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/query_result.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/statement_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Statement Types //===--------------------------------------------------------------------===// enum class StatementType : uint8_t { INVALID_STATEMENT, // invalid statement type SELECT_STATEMENT, // select statement type INSERT_STATEMENT, // insert statement type UPDATE_STATEMENT, // update statement type CREATE_STATEMENT, // create statement type DELETE_STATEMENT, // delete statement type PREPARE_STATEMENT, // prepare statement type EXECUTE_STATEMENT, // execute statement type ALTER_STATEMENT, // alter statement type TRANSACTION_STATEMENT, // transaction statement type, COPY_STATEMENT, // copy type ANALYZE_STATEMENT, // analyze type VARIABLE_SET_STATEMENT, // variable set statement type CREATE_FUNC_STATEMENT, // create func statement type EXPLAIN_STATEMENT, // explain statement type DROP_STATEMENT, // DROP statement type EXPORT_STATEMENT, // EXPORT statement type PRAGMA_STATEMENT, // PRAGMA statement type VACUUM_STATEMENT, // VACUUM statement type CALL_STATEMENT, // CALL statement type SET_STATEMENT, // SET statement type LOAD_STATEMENT, // LOAD statement type RELATION_STATEMENT, EXTENSION_STATEMENT, LOGICAL_PLAN_STATEMENT, ATTACH_STATEMENT, DETACH_STATEMENT, MULTI_STATEMENT, COPY_DATABASE_STATEMENT, UPDATE_EXTENSIONS_STATEMENT, MERGE_INTO_STATEMENT }; DUCKDB_API string StatementTypeToString(StatementType type); enum class StatementReturnType : uint8_t { QUERY_RESULT, // the statement returns a query result (e.g. for display to the user) CHANGED_ROWS, // the statement returns a single row containing the number of changed rows (e.g. an insert stmt) NOTHING // the statement returns nothing }; string StatementReturnTypeToString(StatementReturnType type); class Catalog; class ClientContext; //! A struct containing various properties of a SQL statement struct StatementProperties { StatementProperties() : requires_valid_transaction(true), allow_stream_result(false), bound_all_parameters(true), return_type(StatementReturnType::QUERY_RESULT), parameter_count(0), always_require_rebind(false) { } struct CatalogIdentity { idx_t catalog_oid; optional_idx catalog_version; bool operator==(const CatalogIdentity &rhs) const { return catalog_oid == rhs.catalog_oid && catalog_version == rhs.catalog_version; } bool operator!=(const CatalogIdentity &rhs) const { return !operator==(rhs); } }; //! The set of databases this statement will read from unordered_map read_databases; //! The set of databases this statement will modify unordered_map modified_databases; //! Whether or not the statement requires a valid transaction. Almost all statements require this, with the //! exception of ROLLBACK bool requires_valid_transaction; //! Whether or not the result can be streamed to the client bool allow_stream_result; //! Whether or not all parameters have successfully had their types determined bool bound_all_parameters; //! What type of data the statement returns StatementReturnType return_type; //! The number of prepared statement parameters idx_t parameter_count; //! Whether or not the statement ALWAYS requires a rebind bool always_require_rebind; bool IsReadOnly() { return modified_databases.empty(); } void RegisterDBRead(Catalog &catalog, ClientContext &context); void RegisterDBModify(Catalog &catalog, ClientContext &context); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/client_properties.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/arrow_format_version.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class ArrowOffsetSize : uint8_t { REGULAR, LARGE }; enum class ArrowFormatVersion : uint8_t { //! Base Version V1_0 = 10, //! Added 256-bit Decimal type. V1_1 = 11, //! Added MonthDayNano interval type. V1_2 = 12, //! Added Run-End Encoded Layout. V1_3 = 13, //! Added Variable-size Binary View Layout and the associated BinaryView and Utf8View types. //! Added ListView Layout and the associated ListView and LargeListView types. Added Variadic buffers. V1_4 = 14, //! Expanded Decimal type bit widths to allow 32-bit and 64-bit types. V1_5 = 15 }; } // namespace duckdb namespace duckdb { //! A set of properties from the client context that can be used to interpret the query result struct ClientProperties { ClientProperties(string time_zone_p, const ArrowOffsetSize arrow_offset_size_p, const bool arrow_use_list_view_p, const bool produce_arrow_string_view_p, const bool lossless_conversion, const ArrowFormatVersion arrow_output_version, const optional_ptr client_context) : time_zone(std::move(time_zone_p)), arrow_offset_size(arrow_offset_size_p), arrow_use_list_view(arrow_use_list_view_p), produce_arrow_string_view(produce_arrow_string_view_p), arrow_lossless_conversion(lossless_conversion), arrow_output_version(arrow_output_version), client_context(client_context) { } ClientProperties() {}; string time_zone = "UTC"; ArrowOffsetSize arrow_offset_size = ArrowOffsetSize::REGULAR; bool arrow_use_list_view = false; bool produce_arrow_string_view = false; bool arrow_lossless_conversion = false; ArrowFormatVersion arrow_output_version = ArrowFormatVersion::V1_0; optional_ptr client_context; }; } // namespace duckdb namespace duckdb { struct BoxRendererConfig; enum class QueryResultType : uint8_t { MATERIALIZED_RESULT, STREAM_RESULT, PENDING_RESULT, ARROW_RESULT }; class BaseQueryResult { public: //! Creates a successful query result with the specified names and types DUCKDB_API BaseQueryResult(QueryResultType type, StatementType statement_type, StatementProperties properties, vector types, vector names); //! Creates an unsuccessful query result with error condition DUCKDB_API BaseQueryResult(QueryResultType type, ErrorData error); DUCKDB_API virtual ~BaseQueryResult(); //! The type of the result (MATERIALIZED or STREAMING) QueryResultType type; //! The type of the statement that created this result StatementType statement_type; //! Properties of the statement StatementProperties properties; //! The SQL types of the result vector types; //! The names of the result vector names; public: [[noreturn]] DUCKDB_API void ThrowError(const string &prepended_message = "") const; DUCKDB_API void SetError(ErrorData error); DUCKDB_API bool HasError() const; DUCKDB_API const ExceptionType &GetErrorType() const; DUCKDB_API const std::string &GetError(); DUCKDB_API ErrorData &GetErrorObject(); DUCKDB_API idx_t ColumnCount(); protected: //! Whether or not execution was successful bool success; //! The error (in case execution was not successful) ErrorData error; }; //! The QueryResult object holds the result of a query. It can either be a MaterializedQueryResult, in which case the //! result contains the entire result set, or a StreamQueryResult in which case the Fetch method can be called to //! incrementally fetch data from the database. class QueryResult : public BaseQueryResult { public: //! Creates a successful query result with the specified names and types DUCKDB_API QueryResult(QueryResultType type, StatementType statement_type, StatementProperties properties, vector types, vector names, ClientProperties client_properties); //! Creates an unsuccessful query result with error condition DUCKDB_API QueryResult(QueryResultType type, ErrorData error); DUCKDB_API ~QueryResult() override; //! Properties from the client context ClientProperties client_properties; //! The next result (if any) unique_ptr next; public: template TARGET &Cast() { if (type != TARGET::TYPE) { throw InternalException("Failed to cast query result to type - query result type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (type != TARGET::TYPE) { throw InternalException("Failed to cast query result to type - query result type mismatch"); } return reinterpret_cast(*this); } public: //! Deduplicate column names for interop with external libraries static void DeduplicateColumns(vector &names); public: //! Returns the name of the column for the given index DUCKDB_API const string &ColumnName(idx_t index) const; //! Fetches a DataChunk of normalized (flat) vectors from the query result. //! Returns nullptr if there are no more results to fetch. DUCKDB_API virtual unique_ptr Fetch(); //! Fetches a DataChunk from the query result. The vectors are not normalized and hence any vector types can be //! returned. DUCKDB_API virtual unique_ptr FetchRaw() = 0; //! Converts the QueryResult to a string DUCKDB_API virtual string ToString() = 0; //! Converts the QueryResult to a box-rendered string DUCKDB_API virtual string ToBox(ClientContext &context, const BoxRendererConfig &config); //! Prints the QueryResult to the console DUCKDB_API void Print(); //! Returns true if the two results are identical; false otherwise. Note that this method is destructive; it calls //! Fetch() until both results are exhausted. The data in the results will be lost. DUCKDB_API bool Equals(QueryResult &other); bool TryFetch(unique_ptr &result, ErrorData &error) { try { result = Fetch(); return success; } catch (std::exception &ex) { error = ErrorData(ex); return false; } catch (...) { error = ErrorData("Unknown error in Fetch"); return false; } } private: class QueryResultIterator; class QueryResultRow { public: explicit QueryResultRow(QueryResultIterator &iterator_p, idx_t row_idx) : iterator(iterator_p), row(0) { } QueryResultIterator &iterator; idx_t row; bool IsNull(idx_t col_idx) const { return iterator.chunk->GetValue(col_idx, row).IsNull(); } template T GetValue(idx_t col_idx) const { return iterator.chunk->GetValue(col_idx, row).GetValue(); } }; //! The row-based query result iterator. Invoking the class QueryResultIterator { public: explicit QueryResultIterator(optional_ptr result_p) : current_row(*this, 0), result(result_p), base_row(0) { if (result) { chunk = shared_ptr(result->Fetch().release()); if (!chunk) { result = nullptr; } } } QueryResultRow current_row; shared_ptr chunk; optional_ptr result; idx_t base_row; public: void Next() { if (!chunk) { return; } current_row.row++; if (current_row.row >= chunk->size()) { base_row += chunk->size(); chunk = shared_ptr(result->Fetch().release()); current_row.row = 0; if (!chunk || chunk->size() == 0) { // exhausted all rows base_row = 0; result = nullptr; chunk.reset(); } } } QueryResultIterator &operator++() { Next(); return *this; } bool operator!=(const QueryResultIterator &other) const { return result != other.result || base_row != other.base_row || current_row.row != other.current_row.row; } const QueryResultRow &operator*() const { return current_row; } }; public: QueryResultIterator begin() { // NOLINT: match stl API return QueryResultIterator(this); } QueryResultIterator end() { // NOLINT: match stl API return QueryResultIterator(nullptr); } protected: DUCKDB_API string HeaderToString(); private: QueryResult(const QueryResult &) = delete; }; } // namespace duckdb namespace duckdb { class ClientContext; class MaterializedQueryResult : public QueryResult { public: static constexpr const QueryResultType TYPE = QueryResultType::MATERIALIZED_RESULT; public: friend class ClientContext; //! Creates a successful query result with the specified names and types DUCKDB_API MaterializedQueryResult(StatementType statement_type, StatementProperties properties, vector names, unique_ptr collection, ClientProperties client_properties); //! Creates an unsuccessful query result with error condition DUCKDB_API explicit MaterializedQueryResult(ErrorData error); public: //! Fetches a DataChunk from the query result. //! This will consume the result (i.e. the result can only be scanned once with this function) DUCKDB_API unique_ptr Fetch() override; DUCKDB_API unique_ptr FetchRaw() override; //! Converts the QueryResult to a string DUCKDB_API string ToString() override; DUCKDB_API string ToBox(ClientContext &context, const BoxRendererConfig &config) override; //! Gets the (index) value of the (column index) column. //! Note: this is very slow. Scanning over the underlying collection is much faster. DUCKDB_API Value GetValue(idx_t column, idx_t index); template T GetValue(idx_t column, idx_t index) { auto value = GetValue(column, index); return (T)value.GetValue(); } DUCKDB_API idx_t RowCount() const; //! Returns a reference to the underlying column data collection ColumnDataCollection &Collection(); //! Takes ownership of the collection, 'collection' is null after this operation unique_ptr TakeCollection(); private: unique_ptr collection; //! Row collection, only created if GetValue is called unique_ptr row_collection; //! Scan state for Fetch calls ColumnDataScanState scan_state; bool scan_initialized; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/pending_query_result.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/pending_execution_result.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class PendingExecutionResult : uint8_t { RESULT_READY, RESULT_NOT_READY, EXECUTION_ERROR, BLOCKED, NO_TASKS_AVAILABLE, EXECUTION_FINISHED }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/executor.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/task_error_manager.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class TaskErrorManager { public: TaskErrorManager() : has_error(false) { } void PushError(ErrorData error) { lock_guard elock(error_lock); this->exceptions.push_back(std::move(error)); has_error = true; } ErrorData GetError() { lock_guard elock(error_lock); D_ASSERT(!exceptions.empty()); // FIXME: Should we try to get the biggest priority error? // In case the first exception is a StandardException but a regular Exception or a FatalException occurred // Maybe we should throw the more critical exception instead, as that changes behavior. auto &entry = exceptions[0]; return entry; } bool HasError() { return has_error; } void ThrowException() { lock_guard elock(error_lock); D_ASSERT(!exceptions.empty()); auto &entry = exceptions[0]; entry.Throw(); } void Reset() { lock_guard elock(error_lock); exceptions.clear(); has_error = false; } private: mutex error_lock; //! Exceptions that occurred during the execution of the current query vector exceptions; //! Lock-free error flag atomic has_error; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/progress_data.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct ProgressData { double done = 0.0; double total = 0.0; bool invalid = false; public: double ProgressDone() const { // ProgressDone requires a valid state D_ASSERT(IsValid()); return done / total; } void Add(const ProgressData &other) { // Add is unchecked, propagating invalid done += other.done; total += other.total; invalid = invalid || other.invalid; } void Normalize(const double target = 1.0) { // Normalize checks only `target`, propagating invalid D_ASSERT(target > 0.0); if (IsValid()) { if (total > 0.0) { done /= total; } total = 1.0; done *= target; total *= target; } else { SetInvalid(); } } void SetInvalid() { invalid = true; done = 0.0; total = 1.0; } bool IsValid() const { return (!invalid) && (done >= 0.0) && (done <= total) && (total >= 0.0); } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parallel/pipeline.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/physical_operator.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { template class ArenaLinkedList { public: static_assert(std::is_trivially_destructible::value, "T must be trivially destructible"); public: explicit ArenaLinkedList(ArenaAllocator &arena) : arena(arena) { } ArenaLinkedList(const ArenaLinkedList &) = delete; ArenaLinkedList &operator=(const ArenaLinkedList &) = delete; ArenaLinkedList(ArenaLinkedList &&other) noexcept : head(other.head), tail(other.tail) { other.head = nullptr; other.tail = nullptr; } ArenaLinkedList &operator=(ArenaLinkedList &&other) noexcept { if (this != &other) { head = other.head; tail = other.tail; other.head = nullptr; other.tail = nullptr; } return *this; } public: bool empty() const { return head == nullptr; } idx_t size() const { return _size; } void push_back(const T &value) { auto node = arena.Make(value); auto ptr = head ? &tail->next : &head; *ptr = node; tail = node; _size++; } template void emplace_back(ARGS &&... args) { auto node = arena.Make(std::forward(args)...); auto ptr = head ? &tail->next : &head; *ptr = node; tail = node; _size++; } //! FIXME: eventually remove this. T &operator[](const idx_t index) { idx_t i = 0; for (auto &elem : *this) { if (i == index) { return elem; } i++; } throw InternalException("index out of bounds in ArenaLinkedList"); } //! FIXME: eventually remove this. const T &operator[](const idx_t index) const { idx_t i = 0; for (const auto &elem : *this) { if (i == index) { return elem; } i++; } throw InternalException("index out of bounds in ArenaLinkedList"); } struct Iterator; struct ConstIterator; Iterator begin(); Iterator end(); ConstIterator begin() const; ConstIterator end() const; private: struct Node { explicit Node(const T &value_p) : next(nullptr), value(value_p) { } Node *next; T value; }; ArenaAllocator &arena; Node *head = nullptr; Node *tail = nullptr; idx_t _size = 0; }; template struct ArenaLinkedList::Iterator { Node *node; explicit Iterator(Node *node_p) : node(node_p) { } T &operator*() { return node->value; } Iterator &operator++() { node = node->next; return *this; } bool operator!=(const Iterator &other) const { return node != other.node; } }; template struct ArenaLinkedList::ConstIterator { const Node *node; explicit ConstIterator(const Node *node_p) : node(node_p) { } const T &operator*() const { return node->value; } ConstIterator &operator++() { node = node->next; return *this; } bool operator!=(const ConstIterator &other) const { return node != other.node; } }; template typename ArenaLinkedList::Iterator ArenaLinkedList::begin() { return Iterator(head); } template typename ArenaLinkedList::Iterator ArenaLinkedList::end() { return Iterator(nullptr); } template typename ArenaLinkedList::ConstIterator ArenaLinkedList::begin() const { return ConstIterator(head); } template typename ArenaLinkedList::ConstIterator ArenaLinkedList::end() const { return ConstIterator(nullptr); } } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/explain_format.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class ExplainFormat : uint8_t { DEFAULT, TEXT, JSON, HTML, GRAPHVIZ, YAML }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/operator_result_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! The OperatorResultType is used to indicate how data should flow around a regular (i.e. non-sink and non-source) //! physical operator //! There are four possible results: //! NEED_MORE_INPUT means the operator is done with the current input and can consume more input if available //! If there is more input the operator will be called with more input, otherwise the operator will not be called again. //! HAVE_MORE_OUTPUT means the operator is not finished yet with the current input. //! The operator will be called again with the same input. //! FINISHED means the operator has finished the entire pipeline and no more processing is necessary. //! The operator will not be called again, and neither will any other operators in this pipeline. //! BLOCKED means the operator does not want to be called right now. e.g. because its currently doing async I/O. The //! operator has set the interrupt state and the caller is expected to handle it. Note that intermediate operators //! should currently not emit this state. enum class OperatorResultType : uint8_t { NEED_MORE_INPUT, HAVE_MORE_OUTPUT, FINISHED, BLOCKED }; //! OperatorFinalizeResultType is used to indicate whether operators have finished flushing their cached results. //! FINISHED means the operator has flushed all cached data. //! HAVE_MORE_OUTPUT means the operator contains more results. enum class OperatorFinalizeResultType : uint8_t { HAVE_MORE_OUTPUT, FINISHED }; //! OperatorFinalResultType is used for the final call enum class OperatorFinalResultType : uint8_t { FINISHED, BLOCKED }; //! SourceResultType is used to indicate the result of data being pulled out of a source. //! There are three possible results: //! HAVE_MORE_OUTPUT means the source has more output, this flag should only be set when data is returned, empty results //! should only occur for the FINISHED and BLOCKED flags //! FINISHED means the source is exhausted //! BLOCKED means the source is currently blocked, e.g. by some async I/O enum class SourceResultType : uint8_t { HAVE_MORE_OUTPUT, FINISHED, BLOCKED }; //! The SinkResultType is used to indicate the result of data flowing into a sink //! There are three possible results: //! NEED_MORE_INPUT means the sink needs more input //! FINISHED means the sink is finished executing, and more input will not change the result any further //! BLOCKED means the sink is currently blocked, e.g. by some async I/O. enum class SinkResultType : uint8_t { NEED_MORE_INPUT, FINISHED, BLOCKED }; // todo comment enum class SinkCombineResultType : uint8_t { FINISHED, BLOCKED }; //! The SinkFinalizeType is used to indicate the result of a Finalize call on a sink //! There are two possible results: //! READY means the sink is ready for further processing //! NO_OUTPUT_POSSIBLE means the sink will never provide output, and any pipelines involving the sink can be skipped //! BLOCKED means the finalize call to the sink is currently blocked, e.g. by some async I/O. enum class SinkFinalizeType : uint8_t { READY, NO_OUTPUT_POSSIBLE, BLOCKED }; //! The SinkNextBatchType is used to indicate the result of a NextBatch call on a sink //! There are two possible results: //! READY means the sink is ready for further processing //! BLOCKED means the NextBatch call to the sink is currently blocked, e.g. by some async I/O. enum class SinkNextBatchType : uint8_t { READY, BLOCKED }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/order_preservation_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Order Preservation Type //===--------------------------------------------------------------------===// enum class OrderPreservationType : uint8_t { NO_ORDER, // the operator makes no guarantees on order preservation (i.e. it might re-order the entire input) INSERTION_ORDER, // the operator maintains the order of the child operators FIXED_ORDER // the operator outputs rows in a fixed order that must be maintained (e.g. ORDER BY) }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/physical_operator_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Physical Operator Types //===--------------------------------------------------------------------===// enum class PhysicalOperatorType : uint8_t { INVALID, ORDER_BY, LIMIT, STREAMING_LIMIT, LIMIT_PERCENT, TOP_N, WINDOW, UNNEST, UNGROUPED_AGGREGATE, HASH_GROUP_BY, PERFECT_HASH_GROUP_BY, PARTITIONED_AGGREGATE, FILTER, PROJECTION, COPY_TO_FILE, BATCH_COPY_TO_FILE, RESERVOIR_SAMPLE, STREAMING_SAMPLE, STREAMING_WINDOW, PIVOT, COPY_DATABASE, // ----------------------------- // Scans // ----------------------------- TABLE_SCAN, DUMMY_SCAN, COLUMN_DATA_SCAN, CHUNK_SCAN, RECURSIVE_CTE_SCAN, RECURSIVE_RECURRING_CTE_SCAN, CTE_SCAN, DELIM_SCAN, EXPRESSION_SCAN, POSITIONAL_SCAN, // ----------------------------- // Joins // ----------------------------- BLOCKWISE_NL_JOIN, NESTED_LOOP_JOIN, HASH_JOIN, CROSS_PRODUCT, PIECEWISE_MERGE_JOIN, IE_JOIN, LEFT_DELIM_JOIN, RIGHT_DELIM_JOIN, POSITIONAL_JOIN, ASOF_JOIN, // ----------------------------- // SetOps // ----------------------------- UNION, RECURSIVE_CTE, RECURSIVE_KEY_CTE, CTE, // ----------------------------- // Updates // ----------------------------- INSERT, BATCH_INSERT, DELETE_OPERATOR, UPDATE, MERGE_INTO, // ----------------------------- // Schema // ----------------------------- CREATE_TABLE, CREATE_TABLE_AS, BATCH_CREATE_TABLE_AS, CREATE_INDEX, ALTER, CREATE_SEQUENCE, CREATE_VIEW, CREATE_SCHEMA, CREATE_MACRO, DROP, PRAGMA, TRANSACTION, CREATE_TYPE, ATTACH, DETACH, // ----------------------------- // Helpers // ----------------------------- EXPLAIN, EXPLAIN_ANALYZE, EMPTY_RESULT, EXECUTE, PREPARE, VACUUM, EXPORT, SET, SET_VARIABLE, LOAD, INOUT_FUNCTION, RESULT_COLLECTOR, RESET, EXTENSION, VERIFY_VECTOR, UPDATE_EXTENSIONS, // ----------------------------- // Secret // ----------------------------- CREATE_SECRET, }; string PhysicalOperatorToString(PhysicalOperatorType type); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/execution_context.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ClientContext; class ThreadContext; class Pipeline; class ExecutionContext { public: ExecutionContext(ClientContext &client_p, ThreadContext &thread_p, optional_ptr pipeline_p) : client(client_p), thread(thread_p), pipeline(pipeline_p) { } //! The client-global context; caution needs to be taken when used in parallel situations ClientContext &client; //! The thread-local context for this execution ThreadContext &thread; //! Reference to the pipeline for this execution, can be used for example by operators determine caching strategy optional_ptr pipeline; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/partition_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class PartitionInfo { NONE, REQUIRES_BATCH_INDEX }; struct ColumnPartitionData { explicit ColumnPartitionData(Value partition_val) : min_val(partition_val), max_val(std::move(partition_val)) { } Value min_val; Value max_val; }; struct SourcePartitionInfo { //! The current batch index //! This is only set in case RequiresBatchIndex() is true, and the source has support for it (SupportsBatchIndex()) //! Otherwise this is left on INVALID_INDEX //! The batch index is a globally unique, increasing index that should be used to maintain insertion order //! //! in conjunction with parallelism optional_idx batch_index; //! The minimum batch index that any thread is currently actively reading optional_idx min_batch_index; //! Column partition data vector partition_data; }; struct OperatorPartitionInfo { OperatorPartitionInfo() = default; explicit OperatorPartitionInfo(bool batch_index) : batch_index(batch_index) { } explicit OperatorPartitionInfo(vector partition_columns_p) : partition_columns(std::move(partition_columns_p)) { } bool batch_index = false; vector partition_columns; static OperatorPartitionInfo NoPartitionInfo() { return OperatorPartitionInfo(false); } static OperatorPartitionInfo BatchIndex() { return OperatorPartitionInfo(true); } static OperatorPartitionInfo PartitionColumns(vector columns) { return OperatorPartitionInfo(std::move(columns)); } bool RequiresPartitionColumns() const { return !partition_columns.empty(); } bool RequiresBatchIndex() const { return batch_index; } bool AnyRequired() const { return RequiresPartitionColumns() || RequiresBatchIndex(); } }; struct OperatorPartitionData { explicit OperatorPartitionData(idx_t batch_index) : batch_index(batch_index) { } idx_t batch_index; vector partition_data; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/physical_operator_states.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/optimizer/join_order/join_node.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/optimizer/join_order/join_relation.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! Set of relations, used in the join graph. struct JoinRelationSet { JoinRelationSet(unsafe_unique_array relations, idx_t count) : relations(std::move(relations)), count(count) { } string ToString() const; unsafe_unique_array relations; idx_t count; static bool IsSubset(JoinRelationSet &super, JoinRelationSet &sub); }; //! The JoinRelationTree is a structure holding all the created JoinRelationSet objects and allowing fast lookup on to //! them class JoinRelationSetManager { public: //! Contains a node with a JoinRelationSet and child relations // FIXME: this structure is inefficient, could use a bitmap for lookup instead (todo: profile) struct JoinRelationTreeNode { unique_ptr relation; unordered_map> children; }; public: //! Create or get a JoinRelationSet from a single node with the given index JoinRelationSet &GetJoinRelation(idx_t index); //! Create or get a JoinRelationSet from a set of relation bindings JoinRelationSet &GetJoinRelation(const unordered_set &bindings); //! Create or get a JoinRelationSet from a (sorted, duplicate-free!) list of relations JoinRelationSet &GetJoinRelation(unsafe_unique_array relations, idx_t count); //! Union two sets of relations together and create a new relation set JoinRelationSet &Union(JoinRelationSet &left, JoinRelationSet &right); // //! Create the set difference of left \ right (i.e. all elements in left that are not in right) // JoinRelationSet *Difference(JoinRelationSet *left, JoinRelationSet *right); string ToString() const; void Print(); private: JoinRelationTreeNode root; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/optimizer/join_order/query_graph.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/optimizer/join_order/relation_manager.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/optimizer/join_order/cardinality_estimator.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/column_binding_map.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/column_binding.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { class Serializer; class Deserializer; struct ColumnBinding { idx_t table_index; // This index is local to a Binding, and has no meaning outside of the context of the Binding that created it idx_t column_index; ColumnBinding() : table_index(DConstants::INVALID_INDEX), column_index(DConstants::INVALID_INDEX) { } ColumnBinding(idx_t table, idx_t column) : table_index(table), column_index(column) { } string ToString() const { return "#[" + to_string(table_index) + "." + to_string(column_index) + "]"; } bool operator==(const ColumnBinding &rhs) const { return table_index == rhs.table_index && column_index == rhs.column_index; } bool operator!=(const ColumnBinding &rhs) const { return !(*this == rhs); } void Serialize(Serializer &serializer) const; static ColumnBinding Deserialize(Deserializer &deserializer); }; } // namespace duckdb namespace duckdb { struct ColumnBindingHashFunction { uint64_t operator()(const ColumnBinding &a) const { return CombineHash(Hash(a.table_index), Hash(a.column_index)); } }; struct ColumnBindingEquality { bool operator()(const ColumnBinding &a, const ColumnBinding &b) const { return a == b; } }; template using column_binding_map_t = unordered_map; using column_binding_set_t = unordered_set; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/optimizer/join_order/statistics_extractor.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/filter/conjunction_filter.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/table_filter.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/column_index.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct ColumnIndex { ColumnIndex() : index(DConstants::INVALID_INDEX) { } explicit ColumnIndex(idx_t index) : index(index) { } ColumnIndex(idx_t index, vector child_indexes_p) : index(index), child_indexes(std::move(child_indexes_p)) { } inline bool operator==(const ColumnIndex &rhs) const { return index == rhs.index; } inline bool operator!=(const ColumnIndex &rhs) const { return index != rhs.index; } inline bool operator<(const ColumnIndex &rhs) const { return index < rhs.index; } idx_t GetPrimaryIndex() const { return index; } LogicalIndex ToLogical() const { return LogicalIndex(index); } bool HasChildren() const { return !child_indexes.empty(); } idx_t ChildIndexCount() const { return child_indexes.size(); } const ColumnIndex &GetChildIndex(idx_t idx) const { return child_indexes[idx]; } ColumnIndex &GetChildIndex(idx_t idx) { return child_indexes[idx]; } const vector &GetChildIndexes() const { return child_indexes; } vector &GetChildIndexesMutable() { return child_indexes; } void AddChildIndex(ColumnIndex new_index) { this->child_indexes.push_back(std::move(new_index)); } bool IsRowIdColumn() const { return index == COLUMN_IDENTIFIER_ROW_ID; } bool IsEmptyColumn() const { return index == COLUMN_IDENTIFIER_EMPTY; } bool IsVirtualColumn() const { return index >= VIRTUAL_COLUMN_START; } void Serialize(Serializer &serializer) const; static ColumnIndex Deserialize(Deserializer &deserializer); private: idx_t index; vector child_indexes; }; } // namespace duckdb namespace duckdb { class BaseStatistics; class Expression; class PhysicalOperator; class PhysicalTableScan; enum class TableFilterType : uint8_t { CONSTANT_COMPARISON = 0, // constant comparison (e.g. =C, >C, >=C, Copy() const = 0; virtual bool Equals(const TableFilter &other) const { return filter_type == other.filter_type; } virtual unique_ptr ToExpression(const Expression &column) const = 0; virtual void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); public: template TARGET &Cast() { if (filter_type != TARGET::TYPE) { throw InternalException("Failed to cast to type - table filter type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (filter_type != TARGET::TYPE) { throw InternalException("Failed to cast to type - table filter type mismatch"); } return reinterpret_cast(*this); } }; //! The filters in here are non-composite (only need a single column to be evaluated) //! Conditions like `A = 2 OR B = 4` are not pushed into a TableFilterSet. class TableFilterSet { public: map> filters; public: void PushFilter(const ColumnIndex &col_idx, unique_ptr filter); bool Equals(TableFilterSet &other) { if (filters.size() != other.filters.size()) { return false; } for (auto &entry : filters) { auto other_entry = other.filters.find(entry.first); if (other_entry == other.filters.end()) { return false; } if (!entry.second->Equals(*other_entry->second)) { return false; } } return true; } static bool Equals(TableFilterSet *left, TableFilterSet *right) { if (left == right) { return true; } if (!left || !right) { return false; } return left->Equals(*right); } unique_ptr Copy() const { auto copy = make_uniq(); for (auto &it : filters) { copy->filters.emplace(it.first, it.second->Copy()); } return copy; } void Serialize(Serializer &serializer) const; static TableFilterSet Deserialize(Deserializer &deserializer); }; class DynamicTableFilterSet { public: void ClearFilters(const PhysicalOperator &op); void PushFilter(const PhysicalOperator &op, idx_t column_index, unique_ptr filter); bool HasFilters() const; unique_ptr GetFinalTableFilters(const PhysicalTableScan &scan, optional_ptr existing_filters) const; private: mutable mutex lock; reference_map_t> filters; }; } // namespace duckdb namespace duckdb { class ConjunctionFilter : public TableFilter { public: explicit ConjunctionFilter(TableFilterType filter_type_p) : TableFilter(filter_type_p) { } ~ConjunctionFilter() override { } //! The filters of this conjunction vector> child_filters; public: bool Equals(const TableFilter &other) const override { return TableFilter::Equals(other); } }; class ConjunctionOrFilter : public ConjunctionFilter { public: static constexpr const TableFilterType TYPE = TableFilterType::CONJUNCTION_OR; public: ConjunctionOrFilter(); FilterPropagateResult CheckStatistics(BaseStatistics &stats) const override; string ToString(const string &column_name) const override; bool Equals(const TableFilter &other) const override; unique_ptr Copy() const override; unique_ptr ToExpression(const Expression &column) const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); }; class ConjunctionAndFilter : public ConjunctionFilter { public: static constexpr const TableFilterType TYPE = TableFilterType::CONJUNCTION_AND; public: ConjunctionAndFilter(); public: FilterPropagateResult CheckStatistics(BaseStatistics &stats) const override; string ToString(const string &column_name) const override; bool Equals(const TableFilter &other) const override; unique_ptr Copy() const override; unique_ptr ToExpression(const Expression &column) const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/logical_operator.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/logical_operator_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Logical Operator Types //===--------------------------------------------------------------------===// enum class LogicalOperatorType : uint8_t { LOGICAL_INVALID = 0, LOGICAL_PROJECTION = 1, LOGICAL_FILTER = 2, LOGICAL_AGGREGATE_AND_GROUP_BY = 3, LOGICAL_WINDOW = 4, LOGICAL_UNNEST = 5, LOGICAL_LIMIT = 6, LOGICAL_ORDER_BY = 7, LOGICAL_TOP_N = 8, LOGICAL_COPY_TO_FILE = 10, LOGICAL_DISTINCT = 11, LOGICAL_SAMPLE = 12, LOGICAL_PIVOT = 14, LOGICAL_COPY_DATABASE = 15, // ----------------------------- // Data sources // ----------------------------- LOGICAL_GET = 25, LOGICAL_CHUNK_GET = 26, LOGICAL_DELIM_GET = 27, LOGICAL_EXPRESSION_GET = 28, LOGICAL_DUMMY_SCAN = 29, LOGICAL_EMPTY_RESULT = 30, LOGICAL_CTE_REF = 31, // ----------------------------- // Joins // ----------------------------- LOGICAL_JOIN = 50, LOGICAL_DELIM_JOIN = 51, LOGICAL_COMPARISON_JOIN = 52, LOGICAL_ANY_JOIN = 53, LOGICAL_CROSS_PRODUCT = 54, LOGICAL_POSITIONAL_JOIN = 55, LOGICAL_ASOF_JOIN = 56, LOGICAL_DEPENDENT_JOIN = 57, // ----------------------------- // SetOps // ----------------------------- LOGICAL_UNION = 75, LOGICAL_EXCEPT = 76, LOGICAL_INTERSECT = 77, LOGICAL_RECURSIVE_CTE = 78, LOGICAL_MATERIALIZED_CTE = 79, // ----------------------------- // Updates // ----------------------------- LOGICAL_INSERT = 100, LOGICAL_DELETE = 101, LOGICAL_UPDATE = 102, LOGICAL_MERGE_INTO = 103, // ----------------------------- // Schema // ----------------------------- LOGICAL_ALTER = 125, LOGICAL_CREATE_TABLE = 126, LOGICAL_CREATE_INDEX = 127, LOGICAL_CREATE_SEQUENCE = 128, LOGICAL_CREATE_VIEW = 129, LOGICAL_CREATE_SCHEMA = 130, LOGICAL_CREATE_MACRO = 131, LOGICAL_DROP = 132, LOGICAL_PRAGMA = 133, LOGICAL_TRANSACTION = 134, LOGICAL_CREATE_TYPE = 135, LOGICAL_ATTACH = 136, LOGICAL_DETACH = 137, // ----------------------------- // Explain // ----------------------------- LOGICAL_EXPLAIN = 150, // ----------------------------- // Helpers // ----------------------------- LOGICAL_PREPARE = 175, LOGICAL_EXECUTE = 176, LOGICAL_EXPORT = 177, LOGICAL_VACUUM = 178, LOGICAL_SET = 179, LOGICAL_LOAD = 180, LOGICAL_RESET = 181, LOGICAL_UPDATE_EXTENSIONS = 182, // ----------------------------- // Secrets // ----------------------------- LOGICAL_CREATE_SECRET = 190, LOGICAL_EXTENSION_OPERATOR = 255 }; DUCKDB_API string LogicalOperatorToString(LogicalOperatorType type); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/logical_operator_visitor.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/bound_tokens.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Query Node //===--------------------------------------------------------------------===// class BoundQueryNode; class BoundSelectNode; class BoundSetOperationNode; class BoundRecursiveCTENode; class BoundCTENode; //===--------------------------------------------------------------------===// // Expressions //===--------------------------------------------------------------------===// class Expression; class BoundAggregateExpression; class BoundBetweenExpression; class BoundCaseExpression; class BoundCastExpression; class BoundColumnRefExpression; class BoundComparisonExpression; class BoundConjunctionExpression; class BoundConstantExpression; class BoundDefaultExpression; class BoundFunctionExpression; class BoundLambdaRefExpression; class BoundOperatorExpression; class BoundParameterExpression; class BoundReferenceExpression; class BoundSubqueryExpression; class BoundUnnestExpression; class BoundWindowExpression; //===--------------------------------------------------------------------===// // TableRefs //===--------------------------------------------------------------------===// class BoundTableRef; class BoundBaseTableRef; class BoundJoinRef; class BoundSubqueryRef; class BoundTableFunction; class BoundEmptyTableRef; class BoundExpressionListRef; class BoundColumnDataRef; class BoundCTERef; class BoundPivotRef; class BoundMergeIntoAction; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/logical_tokens.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class LogicalOperator; class LogicalAggregate; class LogicalAnyJoin; class LogicalColumnDataGet; class LogicalComparisonJoin; class LogicalCopyDatabase; class LogicalCopyToFile; class LogicalCreate; class LogicalCreateTable; class LogicalCreateIndex; class LogicalCreateTable; class LogicalCreateSecret; class LogicalCrossProduct; class LogicalCTERef; class LogicalDelete; class LogicalDelimGet; class LogicalDistinct; class LogicalDummyScan; class LogicalEmptyResult; class LogicalExecute; class LogicalExplain; class LogicalExport; class LogicalExpressionGet; class LogicalFilter; class LogicalGet; class LogicalInsert; class LogicalJoin; class LogicalLimit; class LogicalMergeInto; class LogicalOrder; class LogicalPivot; class LogicalPositionalJoin; class LogicalPragma; class LogicalPrepare; class LogicalProjection; class LogicalRecursiveCTE; class LogicalMaterializedCTE; class LogicalSetOperation; class LogicalSample; class LogicalSimple; class LogicalVacuum; class LogicalSet; class LogicalReset; class LogicalTopN; class LogicalUnnest; class LogicalUpdate; class LogicalWindow; } // namespace duckdb #include namespace duckdb { //! The LogicalOperatorVisitor is an abstract base class that implements the //! Visitor pattern on LogicalOperator. class LogicalOperatorVisitor { public: virtual ~LogicalOperatorVisitor() { } virtual void VisitOperator(LogicalOperator &op); virtual void VisitExpression(unique_ptr *expression); static void EnumerateExpressions(LogicalOperator &op, const std::function *child)> &callback); protected: //! Automatically calls the Visit method for LogicalOperator children of the current operator. Can be overloaded to //! change this behavior. void VisitOperatorChildren(LogicalOperator &op); //! Automatically calls the Visit method for Expression children of the current operator. Can be overloaded to //! change this behavior. void VisitOperatorExpressions(LogicalOperator &op); //! Alternatives for VisitOperatorChildren for operators that have a projection map void VisitOperatorWithProjectionMapChildren(LogicalOperator &op); void VisitChildOfOperatorWithProjectionMap(LogicalOperator &child, vector &projection_map); // The VisitExpressionChildren method is called at the end of every call to VisitExpression to recursively visit all // expressions in an expression tree. It can be overloaded to prevent automatically visiting the entire tree. virtual void VisitExpressionChildren(Expression &expression); virtual unique_ptr VisitReplace(BoundAggregateExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundBetweenExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundCaseExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundCastExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundColumnRefExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundComparisonExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundConjunctionExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundConstantExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundDefaultExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundFunctionExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundOperatorExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundReferenceExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundSubqueryExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundParameterExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundWindowExpression &expr, unique_ptr *expr_ptr); virtual unique_ptr VisitReplace(BoundUnnestExpression &expr, unique_ptr *expr_ptr); }; } // namespace duckdb #include #include namespace duckdb { //! LogicalOperator is the base class of the logical operators present in the //! logical query tree class LogicalOperator { public: explicit LogicalOperator(LogicalOperatorType type); LogicalOperator(LogicalOperatorType type, vector> expressions); virtual ~LogicalOperator(); //! The type of the logical operator LogicalOperatorType type; //! The set of children of the operator vector> children; //! The set of expressions contained within the operator, if any vector> expressions; //! The types returned by this logical operator. Set by calling LogicalOperator::ResolveTypes. vector types; //! Estimated Cardinality idx_t estimated_cardinality; bool has_estimated_cardinality; public: virtual vector GetColumnBindings(); static string ColumnBindingsToString(const vector &bindings); void PrintColumnBindings(); static vector GenerateColumnBindings(idx_t table_idx, idx_t column_count); static vector MapTypes(const vector &types, const vector &projection_map); static vector MapBindings(const vector &types, const vector &projection_map); //! Resolve the types of the logical operator and its children void ResolveOperatorTypes(); virtual string GetName() const; virtual InsertionOrderPreservingMap ParamsToString() const; virtual string ToString(ExplainFormat format = ExplainFormat::DEFAULT) const; DUCKDB_API void Print(); //! Debug method: verify that the integrity of expressions & child nodes are maintained virtual void Verify(ClientContext &context); void AddChild(unique_ptr child); virtual idx_t EstimateCardinality(ClientContext &context); void SetEstimatedCardinality(idx_t _estimated_cardinality); void SetParamsEstimatedCardinality(InsertionOrderPreservingMap &result) const; virtual void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); virtual unique_ptr Copy(ClientContext &context) const; virtual bool RequireOptimizer() const { return true; } //! Allows LogicalOperators to opt out of serialization virtual bool SupportSerialization() const { return true; }; virtual bool HasProjectionMap() const { return false; } //! Returns the set of table indexes of this operator virtual vector GetTableIndex() const; protected: //! Resolve types for this specific operator virtual void ResolveTypes() = 0; public: template TARGET &Cast() { if (TARGET::TYPE != LogicalOperatorType::LOGICAL_INVALID && type != TARGET::TYPE) { throw InternalException("Failed to cast logical operator to type - logical operator type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (TARGET::TYPE != LogicalOperatorType::LOGICAL_INVALID && type != TARGET::TYPE) { throw InternalException("Failed to cast logical operator to type - logical operator type mismatch"); } return reinterpret_cast(*this); } }; } // namespace duckdb namespace duckdb { class CardinalityEstimator; struct DistinctCount { idx_t distinct_count; bool from_hll; }; struct ExpressionBinding { bool found_expression = false; ColumnBinding child_binding; bool expression_is_constant = false; }; struct RelationStats { // column_id -> estimated distinct count for column vector column_distinct_count; idx_t cardinality; double filter_strength = 1; bool stats_initialized = false; // for debug, column names and tables vector column_names; string table_name; RelationStats() : cardinality(1), filter_strength(1), stats_initialized(false) { } }; class RelationStatisticsHelper { public: static constexpr double DEFAULT_SELECTIVITY = 0.2; public: static idx_t InspectTableFilter(idx_t cardinality, idx_t column_index, TableFilter &filter, BaseStatistics &base_stats); // static idx_t InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &filter, // BaseStatistics &base_stats); //! Extract Statistics from a LogicalGet. static RelationStats ExtractGetStats(LogicalGet &get, ClientContext &context); static RelationStats ExtractDelimGetStats(LogicalDelimGet &delim_get, ClientContext &context); //! Create the statistics for a projection using the statistics of the operator that sits underneath the //! projection. Then also create statistics for any extra columns the projection creates. static RelationStats ExtractDummyScanStats(LogicalDummyScan &dummy_scan, ClientContext &context); static RelationStats ExtractExpressionGetStats(LogicalExpressionGet &expression_get, ClientContext &context); //! All relation extractors for blocking relations static RelationStats ExtractProjectionStats(LogicalProjection &proj, RelationStats &child_stats); static RelationStats ExtractAggregationStats(LogicalAggregate &aggr, RelationStats &child_stats); static RelationStats ExtractWindowStats(LogicalWindow &window, RelationStats &child_stats); static RelationStats ExtractEmptyResultStats(LogicalEmptyResult &empty); //! Called after reordering a query plan with potentially 2+ relations. static RelationStats CombineStatsOfReorderableOperator(vector &bindings, vector relation_stats); //! Called after reordering a query plan with potentially 2+ relations. static RelationStats CombineStatsOfNonReorderableOperator(LogicalOperator &op, vector child_stats); static void CopyRelationStats(RelationStats &to, const RelationStats &from); private: static idx_t GetDistinctCount(LogicalGet &get, ClientContext &context, idx_t column_id); }; } // namespace duckdb namespace duckdb { class FilterInfo; struct DenomInfo { DenomInfo(JoinRelationSet &numerator_relations, double filter_strength, double denominator) : numerator_relations(numerator_relations), filter_strength(filter_strength), denominator(denominator) { } JoinRelationSet &numerator_relations; double filter_strength; double denominator; }; struct RelationsToTDom { //! column binding sets that are equivalent in a join plan. //! if you have A.x = B.y and B.y = C.z, then one set is {A.x, B.y, C.z}. column_binding_set_t equivalent_relations; //! the estimated total domains of the equivalent relations determined using HLL idx_t tdom_hll; //! the estimated total domains of each relation without using HLL idx_t tdom_no_hll; bool has_tdom_hll; vector> filters; vector column_names; explicit RelationsToTDom(const column_binding_set_t &column_binding_set) : equivalent_relations(column_binding_set), tdom_hll(0), tdom_no_hll(NumericLimits::Maximum()), has_tdom_hll(false) {}; }; class FilterInfoWithTotalDomains { public: FilterInfoWithTotalDomains(optional_ptr filter_info, RelationsToTDom &relation2tdom) : filter_info(filter_info), tdom_hll(relation2tdom.tdom_hll), tdom_no_hll(relation2tdom.tdom_no_hll), has_tdom_hll(relation2tdom.has_tdom_hll) { } optional_ptr filter_info; //! the estimated total domains of the equivalent relations determined using HLL idx_t tdom_hll; //! the estimated total domains of each relation without using HLL idx_t tdom_no_hll; bool has_tdom_hll; }; struct Subgraph2Denominator { optional_ptr relations; optional_ptr numerator_relations; double denom; Subgraph2Denominator() : relations(nullptr), numerator_relations(nullptr), denom(1) {}; }; class CardinalityHelper { public: CardinalityHelper() { } explicit CardinalityHelper(double cardinality_before_filters) : cardinality_before_filters(cardinality_before_filters) {}; public: // must be a double. Otherwise we can lose significance between different join orders. // our cardinality estimator severely underestimates cardinalities for 3+ joins. However, // if one join order has an estimate of 0.8, and another has an estimate of 0.6, rounding // them means there is no estimated difference, when in reality there could be a very large // difference. double cardinality_before_filters; vector table_names_joined; vector column_names; }; class CardinalityEstimator { public: static constexpr double DEFAULT_SEMI_ANTI_SELECTIVITY = 5; explicit CardinalityEstimator() {}; private: vector relations_to_tdoms; unordered_map relation_set_2_cardinality; JoinRelationSetManager set_manager; vector relation_stats; public: void RemoveEmptyTotalDomains(); void UpdateTotalDomains(optional_ptr set, RelationStats &stats); void InitEquivalentRelations(const vector> &filter_infos); void InitCardinalityEstimatorProps(optional_ptr set, RelationStats &stats); //! cost model needs estimated cardinalities to the fraction since the formula captures //! distinct count selectivities and multiplicities. Hence the template template T EstimateCardinalityWithSet(JoinRelationSet &new_set); //! used for debugging. void AddRelationNamesToTdoms(vector &stats); void PrintRelationToTdomInfo(); private: double GetNumerator(JoinRelationSet &set); DenomInfo GetDenominator(JoinRelationSet &set); bool SingleColumnFilter(FilterInfo &filter_info); vector DetermineMatchingEquivalentSets(optional_ptr filter_info); //! Given a filter, add the column bindings to the matching equivalent set at the index //! given in matching equivalent sets. //! If there are multiple equivalence sets, they are merged. void AddToEquivalenceSets(optional_ptr filter_info, vector matching_equivalent_sets); double CalculateUpdatedDenom(Subgraph2Denominator left, Subgraph2Denominator right, FilterInfoWithTotalDomains &filter); JoinRelationSet &UpdateNumeratorRelations(Subgraph2Denominator left, Subgraph2Denominator right, FilterInfoWithTotalDomains &filter); void AddRelationTdom(FilterInfo &filter_info); bool EmptyFilter(FilterInfo &filter_info); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/expression_map.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Expression; template struct ExpressionHashFunction { uint64_t operator()(const reference &expr) const { return (uint64_t)expr.get().Hash(); } }; template struct ExpressionEquality { bool operator()(const reference &a, const reference &b) const { return a.get().Equals(b.get()); } }; template using expression_map_t = unordered_map, T, ExpressionHashFunction, ExpressionEquality>; using expression_set_t = unordered_set, ExpressionHashFunction, ExpressionEquality>; template using parsed_expression_map_t = unordered_map, T, ExpressionHashFunction, ExpressionEquality>; using parsed_expression_set_t = unordered_set, ExpressionHashFunction, ExpressionEquality>; } // namespace duckdb namespace duckdb { class JoinOrderOptimizer; class FilterInfo; //! Represents a single relation and any metadata accompanying that relation struct SingleJoinRelation { LogicalOperator &op; optional_ptr parent; RelationStats stats; SingleJoinRelation(LogicalOperator &op, optional_ptr parent) : op(op), parent(parent) { } SingleJoinRelation(LogicalOperator &op, optional_ptr parent, RelationStats stats) : op(op), parent(parent), stats(std::move(stats)) { } }; class RelationManager { public: explicit RelationManager(ClientContext &context) : context(context) { } idx_t NumRelations(); bool ExtractJoinRelations(JoinOrderOptimizer &optimizer, LogicalOperator &input_op, vector> &filter_operators, optional_ptr parent = nullptr); //! for each join filter in the logical plan op, extract the relations that are referred to on //! both sides of the join filter, along with the tables & indexes. vector> ExtractEdges(LogicalOperator &op, vector> &filter_operators, JoinRelationSetManager &set_manager); //! Extract the set of relations referred to inside an expression bool ExtractBindings(Expression &expression, unordered_set &bindings); void AddRelation(LogicalOperator &op, optional_ptr parent, const RelationStats &stats); //! Add an unnest relation which can come from a logical unnest or a logical get which has an unnest function void AddRelationWithChildren(JoinOrderOptimizer &optimizer, LogicalOperator &op, LogicalOperator &input_op, optional_ptr parent, RelationStats &child_stats, optional_ptr limit_op, vector> &datasource_filters); void AddAggregateOrWindowRelation(LogicalOperator &op, optional_ptr parent, const RelationStats &stats, LogicalOperatorType op_type); vector> GetRelations(); const vector GetRelationStats(); //! A mapping of base table index -> index into relations array (relation number) unordered_map relation_mapping; bool CrossProductWithRelationAllowed(idx_t relation_id); void PrintRelationStats(); private: ClientContext &context; //! Set of all relations considered in the join optimizer vector> relations; unordered_set no_cross_product_relations; }; } // namespace duckdb #include namespace duckdb { class FilterInfo; struct NeighborInfo { explicit NeighborInfo(optional_ptr neighbor) : neighbor(neighbor) { } optional_ptr neighbor; vector> filters; }; //! The QueryGraph contains edges between relations and allows edges to be created/queried class QueryGraphEdges { public: //! Contains a node with info about neighboring relations and child edge infos struct QueryEdge { vector> neighbors; unordered_map> children; }; public: string ToString() const; void Print(); //! Returns a connection if there is an edge that connects these two sets, or nullptr otherwise const vector> GetConnections(JoinRelationSet &node, JoinRelationSet &other) const; //! Enumerate the neighbors of a specific node that do not belong to any of the exclusion_set. Note that if a //! neighbor has multiple nodes, this function will return the lowest entry in that set. const vector GetNeighbors(JoinRelationSet &node, unordered_set &exclusion_set) const; //! Enumerate all neighbors of a given JoinRelationSet node void EnumerateNeighbors(JoinRelationSet &node, const std::function &callback) const; //! Create an edge in the edge_set void CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr info); private: //! Get the QueryEdge of a specific node optional_ptr GetQueryEdge(JoinRelationSet &left); void EnumerateNeighborsDFS(JoinRelationSet &node, reference info, idx_t index, const std::function &callback) const; QueryEdge root; }; } // namespace duckdb namespace duckdb { struct NeighborInfo; class DPJoinNode { public: //! Represents a node in the join plan JoinRelationSet &set; //! information on how left and right are connected optional_ptr info; bool is_leaf; //! left and right plans JoinRelationSet &left_set; JoinRelationSet &right_set; //! The cost of the join node. The cost is stored here so that the cost of //! a join node stays in sync with how the join node is constructed. Storing the cost in an unordered_set //! in the cost model is error prone. If the plan enumerator join node is updated and not the cost model //! the whole Join Order Optimizer can start exhibiting undesired behavior. double cost; //! used only to populate logical operators with estimated cardinalities after the best join plan has been found. idx_t cardinality; //! Create an intermediate node in the join tree. base_cardinality = estimated_props.cardinality DPJoinNode(JoinRelationSet &set, optional_ptr info, JoinRelationSet &left, JoinRelationSet &right, double cost); //! Create a leaf node in the join tree //! set cost to 0 for leaf nodes //! cost will be the cost to *produce* an intermediate table explicit DPJoinNode(JoinRelationSet &set); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parallel/interrupt.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parallel/task.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ClientContext; class Executor; class Task; class DatabaseInstance; struct ProducerToken; enum class TaskExecutionMode : uint8_t { PROCESS_ALL, PROCESS_PARTIAL }; enum class TaskExecutionResult : uint8_t { TASK_FINISHED, TASK_NOT_FINISHED, TASK_ERROR, TASK_BLOCKED }; //! Generic parallel task class Task : public enable_shared_from_this { public: virtual ~Task() { } public: //! Execute the task in the specified execution mode //! If mode is PROCESS_ALL, Execute should always finish processing and return TASK_FINISHED //! If mode is PROCESS_PARTIAL, Execute can return TASK_NOT_FINISHED, in which case Execute will be called again //! In case of an error, TASK_ERROR is returned //! In case the task has interrupted, BLOCKED is returned. virtual TaskExecutionResult Execute(TaskExecutionMode mode) = 0; //! Descheduling a task ensures the task is not executed, but remains available for rescheduling as long as //! required, generally until some code in an operator calls the InterruptState::Callback() method of a state of the //! InterruptMode::TASK mode. virtual void Deschedule() { throw InternalException("Cannot deschedule task of base Task class"); }; //! Ensures a task is rescheduled to the correct queue virtual void Reschedule() { throw InternalException("Cannot reschedule task of base Task class"); } virtual bool TaskBlockedOnResult() const { return false; } virtual string TaskType() const { return "UnnamedTask"; } public: optional_ptr token; }; } // namespace duckdb #include namespace duckdb { //! InterruptMode specifies how operators should block/unblock, note that this will happen transparently to the //! operator, as the operator only needs to return a BLOCKED result and call the callback using the InterruptState. //! NO_INTERRUPTS: No blocking mode is specified, an error will be thrown when the operator blocks. Should only be used //! when manually calling operators of which is known they will never block. //! TASK: A weak pointer to a task is provided. On the callback, this task will be signalled. If the Task has //! been deleted, this callback becomes a NOP. This is the preferred way to await blocked pipelines. //! BLOCKING: The caller has blocked awaiting some synchronization primitive to wait for the callback. enum class InterruptMode : uint8_t { NO_INTERRUPTS, TASK, BLOCKING }; //! Synchronization primitive used to await a callback in InterruptMode::BLOCKING. struct InterruptDoneSignalState { //! Called by the callback to signal the interrupt is over void Signal(); //! Await the callback signalling the interrupt is over void Await(); protected: mutex lock; std::condition_variable cv; bool done = false; }; //! State required to make the callback after some asynchronous operation within an operator source / sink. class InterruptState { public: //! Default interrupt state will be set to InterruptMode::NO_INTERRUPTS and throw an error on use of Callback() InterruptState(); //! Register the task to be interrupted and set mode to InterruptMode::TASK, the preferred way to handle interrupts explicit InterruptState(weak_ptr task); //! Register signal state and set mode to InterruptMode::BLOCKING, used for code paths without Task. explicit InterruptState(weak_ptr done_signal); //! Perform the callback to indicate the Interrupt is over DUCKDB_API void Callback() const; protected: //! Current interrupt mode InterruptMode mode; //! Task ptr for InterruptMode::TASK weak_ptr current_task; //! Signal state for InterruptMode::BLOCKING weak_ptr signal_state; }; class StateWithBlockableTasks { public: unique_lock Lock() { return unique_lock(lock); } void PreventBlocking(const unique_lock &guard) { VerifyLock(guard); can_block = false; } //! Add a task to 'blocked_tasks' before returning SourceResultType::BLOCKED (must hold the lock) bool BlockTask(const unique_lock &guard, const InterruptState &interrupt_state) { VerifyLock(guard); if (can_block) { blocked_tasks.push_back(interrupt_state); return true; } return false; } //! Unblock all tasks (must hold the lock) bool UnblockTasks(const unique_lock &guard) { VerifyLock(guard); if (blocked_tasks.empty()) { return false; } for (auto &entry : blocked_tasks) { entry.Callback(); } blocked_tasks.clear(); return true; } SinkResultType BlockSink(const unique_lock &guard, const InterruptState &interrupt_state) { return BlockTask(guard, interrupt_state) ? SinkResultType::BLOCKED : SinkResultType::FINISHED; } SourceResultType BlockSource(const unique_lock &guard, const InterruptState &interrupt_state) { return BlockTask(guard, interrupt_state) ? SourceResultType::BLOCKED : SourceResultType::FINISHED; } void VerifyLock(const unique_lock &guard) const { #ifdef DEBUG D_ASSERT(guard.mutex() && RefersToSameObject(*guard.mutex(), lock)); #endif } private: //! Whether we can block tasks atomic can_block {true}; //! Global lock, acquired by calling Lock() mutable mutex lock; //! Tasks that are currently blocked mutable vector blocked_tasks; }; } // namespace duckdb namespace duckdb { class Event; class Executor; class PhysicalOperator; class Pipeline; class PipelineBuildState; class MetaPipeline; class InterruptState; // LCOV_EXCL_START class OperatorState { public: virtual ~OperatorState() { } virtual void Finalize(const PhysicalOperator &op, ExecutionContext &context) { } template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; class GlobalOperatorState { public: virtual ~GlobalOperatorState() { } template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; class GlobalSinkState : public StateWithBlockableTasks { public: GlobalSinkState() : state(SinkFinalizeType::READY) { } virtual ~GlobalSinkState() { } SinkFinalizeType state; template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } virtual idx_t MaxThreads(idx_t source_max_threads) { return source_max_threads; } }; class LocalSinkState { public: virtual ~LocalSinkState() { } //! Source partition info SourcePartitionInfo partition_info; template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; class GlobalSourceState : public StateWithBlockableTasks { public: virtual ~GlobalSourceState() { } virtual idx_t MaxThreads() { return 1; } template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; class LocalSourceState { public: virtual ~LocalSourceState() { } template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; struct OperatorSinkInput { GlobalSinkState &global_state; LocalSinkState &local_state; InterruptState &interrupt_state; }; struct OperatorSourceInput { GlobalSourceState &global_state; LocalSourceState &local_state; InterruptState &interrupt_state; }; struct OperatorSinkCombineInput { GlobalSinkState &global_state; LocalSinkState &local_state; InterruptState &interrupt_state; }; struct OperatorSinkFinalizeInput { GlobalSinkState &global_state; InterruptState &interrupt_state; }; struct OperatorFinalizeInput { GlobalOperatorState &global_state; InterruptState &interrupt_state; }; struct OperatorSinkNextBatchInput { GlobalSinkState &global_state; LocalSinkState &local_state; InterruptState &interrupt_state; }; // LCOV_EXCL_STOP } // namespace duckdb namespace duckdb { class Event; class Executor; class PhysicalOperator; class Pipeline; class PipelineBuildState; class MetaPipeline; class PhysicalPlan; //! PhysicalOperator is the base class of the physical operators present in the execution plan. class PhysicalOperator { public: static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::INVALID; public: PhysicalOperator(PhysicalPlan &physical_plan, PhysicalOperatorType type, vector types, idx_t estimated_cardinality); virtual ~PhysicalOperator() { } //! Deleted copy constructors. PhysicalOperator(const PhysicalOperator &other) = delete; PhysicalOperator &operator=(const PhysicalOperator &) = delete; //! The child operators. ArenaLinkedList> children; //! The physical operator type. PhysicalOperatorType type; //! The return types. vector types; //! The estimated cardinality. idx_t estimated_cardinality; //! The global sink state. unique_ptr sink_state; //! The global operator state. unique_ptr op_state; //! Lock for (re)setting any of the operator states. mutex lock; public: virtual string GetName() const; virtual InsertionOrderPreservingMap ParamsToString() const { return InsertionOrderPreservingMap(); } static void SetEstimatedCardinality(InsertionOrderPreservingMap &result, idx_t estimated_cardinality); virtual string ToString(ExplainFormat format = ExplainFormat::DEFAULT) const; void Print() const; virtual vector> GetChildren() const; //! Return a vector of the types that will be returned by this operator const vector &GetTypes() const { return types; } virtual bool Equals(const PhysicalOperator &other) const { return false; } //! Functions to help decide how to set up pipeline dependencies idx_t EstimatedThreadCount() const; bool CanSaturateThreads(ClientContext &context) const; virtual void Verify(); public: // Operator interface virtual unique_ptr GetOperatorState(ExecutionContext &context) const; virtual unique_ptr GetGlobalOperatorState(ClientContext &context) const; virtual OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk, GlobalOperatorState &gstate, OperatorState &state) const; virtual OperatorFinalizeResultType FinalExecute(ExecutionContext &context, DataChunk &chunk, GlobalOperatorState &gstate, OperatorState &state) const; virtual OperatorFinalResultType OperatorFinalize(Pipeline &pipeline, Event &event, ClientContext &context, OperatorFinalizeInput &input) const; virtual bool ParallelOperator() const { return false; } virtual bool RequiresFinalExecute() const { return false; } virtual bool RequiresOperatorFinalize() const { return false; } //! The influence the operator has on order (insertion order means no influence) virtual OrderPreservationType OperatorOrder() const { return OrderPreservationType::INSERTION_ORDER; } public: // Source interface virtual unique_ptr GetLocalSourceState(ExecutionContext &context, GlobalSourceState &gstate) const; virtual unique_ptr GetGlobalSourceState(ClientContext &context) const; virtual SourceResultType GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const; virtual OperatorPartitionData GetPartitionData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate, LocalSourceState &lstate, const OperatorPartitionInfo &partition_info) const; virtual bool IsSource() const { return false; } virtual bool ParallelSource() const { return false; } virtual bool SupportsPartitioning(const OperatorPartitionInfo &partition_info) const { if (partition_info.AnyRequired()) { return false; } return true; } //! The type of order emitted by the operator (as a source) virtual OrderPreservationType SourceOrder() const { return OrderPreservationType::INSERTION_ORDER; } //! Returns the current progress percentage, or a negative value if progress bars are not supported virtual ProgressData GetProgress(ClientContext &context, GlobalSourceState &gstate) const; //! Returns the current progress percentage, or a negative value if progress bars are not supported virtual ProgressData GetSinkProgress(ClientContext &context, GlobalSinkState &gstate, const ProgressData source_progress) const { return source_progress; } virtual InsertionOrderPreservingMap ExtraSourceParams(GlobalSourceState &gstate, LocalSourceState &lstate) const { return InsertionOrderPreservingMap(); } public: // Sink interface //! The sink method is called constantly with new input, as long as new input is available. Note that this method //! CAN be called in parallel, proper locking is needed when accessing dat //! a inside the GlobalSinkState. virtual SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const; //! The combine is called when a single thread has completed execution of its part of the pipeline, it is the final //! time that a specific LocalSinkState is accessible. This method can be called in parallel while other Sink() or //! Combine() calls are active on the same GlobalSinkState. virtual SinkCombineResultType Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const; //! (optional) function that will be called before Finalize //! For now, its only use is to to communicate memory usage in multi-join pipelines through TemporaryMemoryManager virtual void PrepareFinalize(ClientContext &context, GlobalSinkState &sink_state) const; //! The finalize is called when ALL threads are finished execution. It is called only once per pipeline, and is //! entirely single threaded. //! If Finalize returns SinkResultType::Finished, the sink is marked as finished virtual SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context, OperatorSinkFinalizeInput &input) const; //! For sinks with RequiresBatchIndex set to true, when a new batch starts being processed this method is called //! This allows flushing of the current batch (e.g. to disk) virtual SinkNextBatchType NextBatch(ExecutionContext &context, OperatorSinkNextBatchInput &input) const; virtual unique_ptr GetLocalSinkState(ExecutionContext &context) const; virtual unique_ptr GetGlobalSinkState(ClientContext &context) const; //! The maximum amount of memory the operator should use per thread. static idx_t GetMaxThreadMemory(ClientContext &context); //! Whether operator caching is allowed in the current execution context static bool OperatorCachingAllowed(ExecutionContext &context); virtual bool IsSink() const { return false; } virtual bool ParallelSink() const { return false; } virtual OperatorPartitionInfo RequiredPartitionInfo() const { return OperatorPartitionInfo::NoPartitionInfo(); } //! Whether or not the sink operator depends on the order of the input chunks //! If this is set to true, we cannot do things like caching intermediate vectors virtual bool SinkOrderDependent() const { return false; } public: // Pipeline construction virtual vector> GetSources() const; bool AllSourcesSupportBatchIndex() const; virtual void BuildPipelines(Pipeline ¤t, MetaPipeline &meta_pipeline); public: template TARGET &Cast() { if (TARGET::TYPE != PhysicalOperatorType::INVALID && type != TARGET::TYPE) { throw InternalException("Failed to cast physical operator to type - physical operator type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (TARGET::TYPE != PhysicalOperatorType::INVALID && type != TARGET::TYPE) { throw InternalException("Failed to cast physical operator to type - physical operator type mismatch"); } return reinterpret_cast(*this); } }; //! Contains state for the CachingPhysicalOperator class CachingOperatorState : public OperatorState { public: ~CachingOperatorState() override { } void Finalize(const PhysicalOperator &op, ExecutionContext &context) override { } unique_ptr cached_chunk; bool initialized = false; //! Whether or not the chunk can be cached bool can_cache_chunk = false; }; //! Base class that caches output from child Operator class. Note that Operators inheriting from this class should also //! inherit their state class from the CachingOperatorState. class CachingPhysicalOperator : public PhysicalOperator { public: static constexpr const idx_t CACHE_THRESHOLD = 64; CachingPhysicalOperator(PhysicalPlan &physical_plan, PhysicalOperatorType type, vector types, idx_t estimated_cardinality); bool caching_supported; public: //! This Execute will prevent small chunks from entering the pipeline, buffering them until a bigger chunk is //! created. OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk, GlobalOperatorState &gstate, OperatorState &state) const final; //! FinalExecute is used here to send out the remainder of the chunk (< STANDARD_VECTOR_SIZE) that we still had //! cached. OperatorFinalizeResultType FinalExecute(ExecutionContext &context, DataChunk &chunk, GlobalOperatorState &gstate, OperatorState &state) const final; bool RequiresFinalExecute() const final { return caching_supported; } protected: //! Child classes need to implement the ExecuteInternal method instead of the Execute virtual OperatorResultType ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk, GlobalOperatorState &gstate, OperatorState &state) const = 0; private: bool CanCacheType(const LogicalType &type); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/table_function.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/table_column.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct TableColumn { TableColumn() = default; TableColumn(string name_p, LogicalType type_p) : name(std::move(name_p)), type(std::move(type_p)) { } string name; LogicalType type; void Serialize(Serializer &serializer) const; static TableColumn Deserialize(Deserializer &deserializer); }; using virtual_column_map_t = unordered_map; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/function/partition_stats.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //! How a table is partitioned by a given set of columns enum class TablePartitionInfo : uint8_t { NOT_PARTITIONED, // the table is not partitioned by the given set of columns SINGLE_VALUE_PARTITIONS, // each partition has exactly one unique value (e.g. bounds = [1,1][2,2][3,3]) OVERLAPPING_PARTITIONS, // the partitions overlap **only** at the boundaries (e.g. bounds = [1,2][2,3][3,4] DISJOINT_PARTITIONS // the partitions are disjoint (e.g. bounds = [1,2][3,4][5,6]) }; enum class CountType { COUNT_EXACT, COUNT_APPROXIMATE }; struct PartitionStatistics { PartitionStatistics(); //! The row id start idx_t row_start; //! The amount of rows in the partition idx_t count; //! Whether or not the count is exact or approximate CountType count_type; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/exception/binder_exception.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class BinderException : public Exception { public: DUCKDB_API explicit BinderException(const string &msg, const unordered_map &extra_info); DUCKDB_API explicit BinderException(const string &msg); template explicit BinderException(const string &msg, ARGS... params) : BinderException(ConstructMessage(msg, params...)) { } template explicit BinderException(const TableRef &ref, const string &msg, ARGS... params) : BinderException(ConstructMessage(msg, params...), Exception::InitializeExtraInfo(ref)) { } template explicit BinderException(const ParsedExpression &expr, const string &msg, ARGS... params) : BinderException(ConstructMessage(msg, params...), Exception::InitializeExtraInfo(expr)) { } template explicit BinderException(const Expression &expr, const string &msg, ARGS... params) : BinderException(ConstructMessage(msg, params...), Exception::InitializeExtraInfo(expr)) { } template explicit BinderException(QueryErrorContext error_context, const string &msg, ARGS... params) : BinderException(ConstructMessage(msg, params...), Exception::InitializeExtraInfo(error_context)) { } template explicit BinderException(optional_idx error_location, const string &msg, ARGS... params) : BinderException(ConstructMessage(msg, params...), Exception::InitializeExtraInfo(error_location)) { } static BinderException ColumnNotFound(const string &name, const vector &similar_bindings, QueryErrorContext context = QueryErrorContext()); static BinderException NoMatchingFunction(const string &catalog_name, const string &schema_name, const string &name, const vector &arguments, const vector &candidates); static BinderException Unsupported(ParsedExpression &expr, const string &message); }; } // namespace duckdb #include namespace duckdb { class BaseStatistics; class LogicalDependencyList; class LogicalGet; class TableFunction; class TableFilterSet; class TableFunctionRef; class TableCatalogEntry; class SampleOptions; struct MultiFileReader; struct OperatorPartitionData; struct OperatorPartitionInfo; struct TableFunctionInfo { DUCKDB_API virtual ~TableFunctionInfo(); template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; struct GlobalTableFunctionState { public: // value returned from MaxThreads when as many threads as possible should be used constexpr static const int64_t MAX_THREADS = 999999999; public: DUCKDB_API virtual ~GlobalTableFunctionState(); virtual idx_t MaxThreads() const { return 1; } template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; struct LocalTableFunctionState { DUCKDB_API virtual ~LocalTableFunctionState(); template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } }; struct TableFunctionBindInput { TableFunctionBindInput(vector &inputs, named_parameter_map_t &named_parameters, vector &input_table_types, vector &input_table_names, optional_ptr info, optional_ptr binder, TableFunction &table_function, const TableFunctionRef &ref) : inputs(inputs), named_parameters(named_parameters), input_table_types(input_table_types), input_table_names(input_table_names), info(info), binder(binder), table_function(table_function), ref(ref) { } vector &inputs; named_parameter_map_t &named_parameters; vector &input_table_types; vector &input_table_names; optional_ptr info; optional_ptr binder; TableFunction &table_function; const TableFunctionRef &ref; }; struct TableFunctionInitInput { TableFunctionInitInput(optional_ptr bind_data_p, vector column_ids_p, const vector &projection_ids_p, optional_ptr filters_p, optional_ptr sample_options_p = nullptr, optional_ptr op_p = nullptr) : bind_data(bind_data_p), column_ids(std::move(column_ids_p)), projection_ids(projection_ids_p), filters(filters_p), sample_options(sample_options_p), op(op_p) { for (auto &col_id : column_ids) { column_indexes.emplace_back(col_id); } } TableFunctionInitInput(optional_ptr bind_data_p, vector column_indexes_p, const vector &projection_ids_p, optional_ptr filters_p, optional_ptr sample_options_p = nullptr, optional_ptr op_p = nullptr) : bind_data(bind_data_p), column_indexes(std::move(column_indexes_p)), projection_ids(projection_ids_p), filters(filters_p), sample_options(sample_options_p), op(op_p) { for (auto &col_id : column_indexes) { column_ids.emplace_back(col_id.GetPrimaryIndex()); } } optional_ptr bind_data; vector column_ids; vector column_indexes; const vector projection_ids; optional_ptr filters; optional_ptr sample_options; optional_ptr op; bool CanRemoveFilterColumns() const { if (projection_ids.empty()) { // No filter columns to remove. return false; } if (projection_ids.size() == column_ids.size()) { // Filter column is used in remainder of plan, so we cannot remove it. return false; } // Fewer columns need to be projected out than that we scan. return true; } }; struct TableFunctionInput { public: TableFunctionInput(optional_ptr bind_data_p, optional_ptr local_state_p, optional_ptr global_state_p) : bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) { } public: optional_ptr bind_data; optional_ptr local_state; optional_ptr global_state; }; struct TableFunctionPartitionInput { TableFunctionPartitionInput(optional_ptr bind_data_p, const vector &partition_ids) : bind_data(bind_data_p), partition_ids(partition_ids) { } optional_ptr bind_data; const vector &partition_ids; }; struct TableFunctionToStringInput { TableFunctionToStringInput(const TableFunction &table_function_p, optional_ptr bind_data_p) : table_function(table_function_p), bind_data(bind_data_p) { } const TableFunction &table_function; optional_ptr bind_data; }; struct TableFunctionDynamicToStringInput { TableFunctionDynamicToStringInput(const TableFunction &table_function_p, optional_ptr bind_data_p, optional_ptr local_state_p, optional_ptr global_state_p) : table_function(table_function_p), bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) { } const TableFunction &table_function; optional_ptr bind_data; optional_ptr local_state; optional_ptr global_state; }; struct TableFunctionGetPartitionInput { public: TableFunctionGetPartitionInput(optional_ptr bind_data_p, optional_ptr local_state_p, optional_ptr global_state_p, const OperatorPartitionInfo &partition_info_p) : bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p), partition_info(partition_info_p) { } public: optional_ptr bind_data; optional_ptr local_state; optional_ptr global_state; const OperatorPartitionInfo &partition_info; }; struct GetPartitionStatsInput { GetPartitionStatsInput(const TableFunction &table_function_p, optional_ptr bind_data_p) : table_function(table_function_p), bind_data(bind_data_p) { } const TableFunction &table_function; optional_ptr bind_data; }; enum class ScanType : uint8_t { TABLE, PARQUET, EXTERNAL }; struct BindInfo { public: explicit BindInfo(ScanType type_p) : type(type_p) {}; explicit BindInfo(TableCatalogEntry &table) : type(ScanType::TABLE), table(&table) {}; unordered_map options; ScanType type; optional_ptr table; void InsertOption(const string &name, Value value) { // NOLINT: work-around bug in clang-tidy if (options.find(name) != options.end()) { throw InternalException("This option already exists"); } options.emplace(name, std::move(value)); } template T GetOption(const string &name) { if (options.find(name) == options.end()) { throw InternalException("This option does not exist"); } return options[name].GetValue(); } template vector GetOptionList(const string &name) { if (options.find(name) == options.end()) { throw InternalException("This option does not exist"); } auto option = options[name]; if (option.type().id() != LogicalTypeId::LIST) { throw InternalException("This option is not a list"); } vector result; auto list_children = ListValue::GetChildren(option); for (auto &child : list_children) { result.emplace_back(child.GetValue()); } return result; } }; typedef unique_ptr (*table_function_bind_t)(ClientContext &context, TableFunctionBindInput &input, vector &return_types, vector &names); typedef unique_ptr (*table_function_bind_replace_t)(ClientContext &context, TableFunctionBindInput &input); typedef unique_ptr (*table_function_bind_operator_t)(ClientContext &context, TableFunctionBindInput &input, idx_t bind_index, vector &return_names); typedef unique_ptr (*table_function_init_global_t)(ClientContext &context, TableFunctionInitInput &input); typedef unique_ptr (*table_function_init_local_t)(ExecutionContext &context, TableFunctionInitInput &input, GlobalTableFunctionState *global_state); typedef unique_ptr (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data, column_t column_index); typedef void (*table_function_t)(ClientContext &context, TableFunctionInput &data, DataChunk &output); typedef OperatorResultType (*table_in_out_function_t)(ExecutionContext &context, TableFunctionInput &data, DataChunk &input, DataChunk &output); typedef OperatorFinalizeResultType (*table_in_out_function_final_t)(ExecutionContext &context, TableFunctionInput &data, DataChunk &output); typedef OperatorPartitionData (*table_function_get_partition_data_t)(ClientContext &context, TableFunctionGetPartitionInput &input); typedef BindInfo (*table_function_get_bind_info_t)(const optional_ptr bind_data); typedef unique_ptr (*table_function_get_multi_file_reader_t)(const TableFunction &); typedef bool (*table_function_supports_pushdown_type_t)(const FunctionData &bind_data, idx_t col_idx); typedef double (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data, const GlobalTableFunctionState *global_state); typedef void (*table_function_dependency_t)(LogicalDependencyList &dependencies, const FunctionData *bind_data); typedef unique_ptr (*table_function_cardinality_t)(ClientContext &context, const FunctionData *bind_data); typedef void (*table_function_pushdown_complex_filter_t)(ClientContext &context, LogicalGet &get, FunctionData *bind_data, vector> &filters); typedef bool (*table_function_pushdown_expression_t)(ClientContext &context, const LogicalGet &get, Expression &expr); typedef InsertionOrderPreservingMap (*table_function_to_string_t)(TableFunctionToStringInput &input); typedef InsertionOrderPreservingMap (*table_function_dynamic_to_string_t)( TableFunctionDynamicToStringInput &input); typedef void (*table_function_serialize_t)(Serializer &serializer, const optional_ptr bind_data, const TableFunction &function); typedef unique_ptr (*table_function_deserialize_t)(Deserializer &deserializer, TableFunction &function); typedef void (*table_function_type_pushdown_t)(ClientContext &context, optional_ptr bind_data, const unordered_map &new_column_types); typedef TablePartitionInfo (*table_function_get_partition_info_t)(ClientContext &context, TableFunctionPartitionInput &input); typedef vector (*table_function_get_partition_stats_t)(ClientContext &context, GetPartitionStatsInput &input); typedef virtual_column_map_t (*table_function_get_virtual_columns_t)(ClientContext &context, optional_ptr bind_data); typedef vector (*table_function_get_row_id_columns)(ClientContext &context, optional_ptr bind_data); //! When to call init_global to initialize the table function enum class TableFunctionInitialization { INITIALIZE_ON_EXECUTE, INITIALIZE_ON_SCHEDULE }; class TableFunction : public SimpleNamedParameterFunction { // NOLINT: work-around bug in clang-tidy public: DUCKDB_API TableFunction(string name, vector arguments, table_function_t function, table_function_bind_t bind = nullptr, table_function_init_global_t init_global = nullptr, table_function_init_local_t init_local = nullptr); DUCKDB_API TableFunction(const vector &arguments, table_function_t function, table_function_bind_t bind = nullptr, table_function_init_global_t init_global = nullptr, table_function_init_local_t init_local = nullptr); DUCKDB_API TableFunction(); //! Bind function //! This function is used for determining the return type of a table producing function and returning bind data //! The returned FunctionData object should be constant and should not be changed during execution. table_function_bind_t bind; //! (Optional) Bind replace function //! This function is called before the regular bind function. It allows returning a TableRef that will be used to //! to generate a logical plan that replaces the LogicalGet of a regularly bound TableFunction. The BindReplace can //! also return a nullptr to indicate a regular bind needs to be performed instead. table_function_bind_replace_t bind_replace; //! (Optional) Bind operator function //! This function is called before the regular bind function - similar to bind_replace - but allows returning a //! custom LogicalOperator instead. table_function_bind_operator_t bind_operator; //! (Optional) global init function //! Initialize the global operator state of the function. //! The global operator state is used to keep track of the progress in the table function and is shared between //! all threads working on the table function. table_function_init_global_t init_global; //! (Optional) local init function //! Initialize the local operator state of the function. //! The local operator state is used to keep track of the progress in the table function and is thread-local. table_function_init_local_t init_local; //! The main function table_function_t function; //! The table in-out function (if this is an in-out function) table_in_out_function_t in_out_function; //! The table in-out final function (if this is an in-out function) table_in_out_function_final_t in_out_function_final; //! (Optional) statistics function //! Returns the statistics of a specified column table_statistics_t statistics; //! (Optional) dependency function //! Sets up which catalog entries this table function depend on table_function_dependency_t dependency; //! (Optional) cardinality function //! Returns the expected cardinality of this scan table_function_cardinality_t cardinality; //! (Optional) pushdown a set of arbitrary filter expressions, rather than only simple comparisons with a constant //! Any functions remaining in the expression list will be pushed as a regular filter after the scan table_function_pushdown_complex_filter_t pushdown_complex_filter; //! (Optional) whether or not this table function supports pushing down an expression into a TableFilter table_function_pushdown_expression_t pushdown_expression; //! (Optional) function for rendering the operator to a string in explain/profiling output (invoked pre-execution) table_function_to_string_t to_string; //! (Optional) function for rendering the operator to a string in profiling output (invoked post-execution) table_function_dynamic_to_string_t dynamic_to_string; //! (Optional) return how much of the table we have scanned up to this point (% of the data) table_function_progress_t table_scan_progress; //! (Optional) returns the partition info of the current scan operator table_function_get_partition_data_t get_partition_data; //! (Optional) returns extra bind info table_function_get_bind_info_t get_bind_info; //! (Optional) pushes down type information to scanner, returns true if pushdown was successful table_function_type_pushdown_t type_pushdown; //! (Optional) allows injecting a custom MultiFileReader implementation table_function_get_multi_file_reader_t get_multi_file_reader; //! (Optional) If this scanner supports filter pushdown, but not to all data types table_function_supports_pushdown_type_t supports_pushdown_type; //! Get partition info of the table table_function_get_partition_info_t get_partition_info; //! (Optional) get a list of all the partition stats of the table table_function_get_partition_stats_t get_partition_stats; //! (Optional) returns a list of virtual columns emitted by the table function table_function_get_virtual_columns_t get_virtual_columns; //! (Optional) returns a list of row id columns table_function_get_row_id_columns get_row_id_columns; table_function_serialize_t serialize; table_function_deserialize_t deserialize; bool verify_serialization = true; //! Whether or not the table function supports projection pushdown. If not supported a projection will be added //! that filters out unused columns. bool projection_pushdown; //! Whether or not the table function supports filter pushdown. If not supported a filter will be added //! that applies the table filter directly. bool filter_pushdown; //! Whether or not the table function can immediately prune out filter columns that are unused in the remainder of //! the query plan, e.g., "SELECT i FROM tbl WHERE j = 42;" - j does not need to leave the table function at all bool filter_prune; //! Whether or not the table function supports sampling pushdown. If not supported a sample will be taken after the //! table function. bool sampling_pushdown; //! Whether or not the table function supports late materialization bool late_materialization; //! Additional function info, passed to the bind shared_ptr function_info; //! When to call init_global //! By default init_global is called when the pipeline is ready for execution //! If this is set to `INITIALIZE_ON_SCHEDULE` the table function is initialized when the query is scheduled TableFunctionInitialization global_initialization = TableFunctionInitialization::INITIALIZE_ON_EXECUTE; DUCKDB_API bool Equal(const TableFunction &rhs) const; DUCKDB_API bool operator==(const TableFunction &rhs) const; DUCKDB_API bool operator!=(const TableFunction &rhs) const; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parallel/task_scheduler.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct ConcurrentQueue; struct QueueProducerToken; class ClientContext; class DatabaseInstance; class TaskScheduler; struct SchedulerThread; struct ProducerToken { ProducerToken(TaskScheduler &scheduler, unique_ptr token); ~ProducerToken(); TaskScheduler &scheduler; unique_ptr token; mutex producer_lock; }; //! The TaskScheduler is responsible for managing tasks and threads class TaskScheduler { // timeout for semaphore wait, default 5ms constexpr static int64_t TASK_TIMEOUT_USECS = 5000; public: explicit TaskScheduler(DatabaseInstance &db); ~TaskScheduler(); DUCKDB_API static TaskScheduler &GetScheduler(ClientContext &context); DUCKDB_API static TaskScheduler &GetScheduler(DatabaseInstance &db); unique_ptr CreateProducer(); //! Schedule a task to be executed by the task scheduler void ScheduleTask(ProducerToken &producer, shared_ptr task); void ScheduleTasks(ProducerToken &producer, vector> &tasks); //! Fetches a task from a specific producer, returns true if successful or false if no tasks were available bool GetTaskFromProducer(ProducerToken &token, shared_ptr &task); //! Run tasks forever until "marker" is set to false, "marker" must remain valid until the thread is joined void ExecuteForever(atomic *marker); //! Run tasks until `marker` is set to false, `max_tasks` have been completed, or until there are no more tasks //! available. Returns the number of tasks that were completed. idx_t ExecuteTasks(atomic *marker, idx_t max_tasks); //! Run tasks until `max_tasks` have been completed, or until there are no more tasks available void ExecuteTasks(idx_t max_tasks); //! Sets the amount of background threads to be used for execution, based on the number of total threads //! and the number of external threads. External threads, e.g. the main thread, will also be used for execution. //! Launches `total_threads - external_threads` background worker threads. void SetThreads(idx_t total_threads, idx_t external_threads); void RelaunchThreads(); //! Returns the number of threads DUCKDB_API int32_t NumberOfThreads(); idx_t GetNumberOfTasks() const; idx_t GetProducerCount() const; idx_t GetTaskCountForProducer(ProducerToken &token) const; //! Send signals to n threads, signalling for them to wake up and attempt to execute a task void Signal(idx_t n); //! Yield to other threads static void YieldThread(); //! Set the allocator flush threshold void SetAllocatorFlushTreshold(idx_t threshold); //! Sets the allocator background thread void SetAllocatorBackgroundThreads(bool enable); //! Get the number of the CPU on which the calling thread is currently executing. //! Fallback to calling thread id if CPU number is not available. //! Result do not need to be exact 'return 0' is a valid fallback strategy static idx_t GetEstimatedCPUId(); private: void RelaunchThreadsInternal(int32_t n); private: DatabaseInstance &db; //! The task queue unique_ptr queue; //! Lock for modifying the thread count mutex thread_lock; //! The active background threads of the task scheduler vector> threads; //! Markers used by the various threads, if the markers are set to "false" the thread execution is stopped vector>> markers; //! The threshold after which to flush the allocator after completing a task atomic allocator_flush_threshold; //! Whether allocator background threads are enabled atomic allocator_background_threads; //! Requested thread count (set by the 'threads' setting) atomic requested_thread_count; //! The amount of threads currently running atomic current_thread_count; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parallel/executor_task.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Event; class PhysicalOperator; class ThreadContext; //! Execute a task within an executor, including exception handling //! This should be used within queries class ExecutorTask : public Task { public: ExecutorTask(Executor &executor, shared_ptr event); ExecutorTask(ClientContext &context, shared_ptr event, const PhysicalOperator &op); ~ExecutorTask() override; public: void Deschedule() override; void Reschedule() override; public: Executor &executor; shared_ptr event; unique_ptr thread_context; optional_ptr op; private: ClientContext &context; public: virtual TaskExecutionResult ExecuteTask(TaskExecutionMode mode) = 0; TaskExecutionResult Execute(TaskExecutionMode mode) override; }; } // namespace duckdb namespace duckdb { class Executor; class MetaPipeline; class PipelineExecutor; class Pipeline; class PipelineTask : public ExecutorTask { static constexpr const idx_t PARTIAL_CHUNK_COUNT = 50; public: explicit PipelineTask(Pipeline &pipeline_p, shared_ptr event_p); Pipeline &pipeline; unique_ptr pipeline_executor; string TaskType() const override { return "PipelineTask"; } public: const PipelineExecutor &GetPipelineExecutor() const; bool TaskBlockedOnResult() const override; public: TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override; }; class PipelineBuildState { public: //! How much to increment batch indexes when multiple pipelines share the same source constexpr static idx_t BATCH_INCREMENT = 10000000000000; public: //! Duplicate eliminated join scan dependencies reference_map_t> delim_join_dependencies; //! Materialized CTE scan dependencies reference_map_t> cte_dependencies; public: void SetPipelineSource(Pipeline &pipeline, PhysicalOperator &op); void SetPipelineSink(Pipeline &pipeline, optional_ptr op, idx_t sink_pipeline_count); void SetPipelineOperators(Pipeline &pipeline, vector> operators); void AddPipelineOperator(Pipeline &pipeline, PhysicalOperator &op); shared_ptr CreateChildPipeline(Executor &executor, Pipeline &pipeline, PhysicalOperator &op); optional_ptr GetPipelineSource(Pipeline &pipeline); optional_ptr GetPipelineSink(Pipeline &pipeline); vector> GetPipelineOperators(Pipeline &pipeline); }; //! The Pipeline class represents an execution pipeline starting at a class Pipeline : public enable_shared_from_this { friend class Executor; friend class PipelineExecutor; friend class PipelineEvent; friend class PipelineFinishEvent; friend class PipelineBuildState; friend class MetaPipeline; public: explicit Pipeline(Executor &execution_context); Executor &executor; public: ClientContext &GetClientContext(); void AddDependency(shared_ptr &pipeline); void Ready(); void Reset(); void ResetSink(); void ResetSource(bool force); void ClearSource(); void Schedule(shared_ptr &event); void PrepareFinalize(); string ToString() const; void Print() const; void PrintDependencies() const; //! Returns query progress bool GetProgress(ProgressData &progress_data); //! Returns a list of all operators (including source and sink) involved in this pipeline vector> GetOperators(); vector> GetOperators() const; const vector> &GetIntermediateOperators() const; optional_ptr GetSink() { return sink; } optional_ptr GetSource() { return source; } //! Returns whether any of the operators in the pipeline care about preserving order bool IsOrderDependent() const; //! Registers a new batch index for a pipeline executor - returns the current minimum batch index idx_t RegisterNewBatchIndex(); //! Updates the batch index of a pipeline (and returns the new minimum batch index) idx_t UpdateBatchIndex(idx_t old_index, idx_t new_index); private: //! Whether or not the pipeline has been readied bool ready; //! Whether or not the pipeline has been initialized atomic initialized; //! The source of this pipeline optional_ptr source; //! The chain of intermediate operators vector> operators; //! The sink (i.e. destination) for data; this is e.g. a hash table to-be-built optional_ptr sink; //! The global source state unique_ptr source_state; //! The parent pipelines (i.e. pipelines that are dependent on this pipeline to finish) vector> parents; //! The dependencies of this pipeline vector> dependencies; //! The base batch index of this pipeline idx_t base_batch_index = 0; //! Lock for accessing the set of batch indexes mutex batch_lock; //! The set of batch indexes that are currently being processed //! Despite batch indexes being unique - this is a multiset //! The reason is that when we start a new pipeline we insert the current minimum batch index as a placeholder //! Which leads to duplicate entries in the set of active batch indexes multiset batch_indexes; private: void ScheduleSequentialTask(shared_ptr &event); bool LaunchScanTasks(shared_ptr &event, idx_t max_threads); bool ScheduleParallel(shared_ptr &event); }; } // namespace duckdb #include namespace duckdb { class ClientContext; class DataChunk; class PhysicalOperator; class PipelineExecutor; class OperatorState; class QueryProfiler; class ThreadContext; class Task; struct PipelineEventStack; struct ProducerToken; struct ScheduleEventData; class Executor { friend class Pipeline; friend class PipelineTask; friend class PipelineBuildState; public: static constexpr idx_t WAIT_TIME = 20; public: explicit Executor(ClientContext &context); ~Executor(); ClientContext &context; public: static Executor &Get(ClientContext &context); void Initialize(PhysicalOperator &physical_plan); void CancelTasks(); PendingExecutionResult ExecuteTask(bool dry_run = false); void WaitForTask(); void SignalTaskRescheduled(lock_guard &); void Reset(); vector GetTypes(); //! Push a new error void PushError(ErrorData exception); ErrorData GetError(); //! True if an error has been thrown bool HasError(); //! Throw the exception that was pushed using PushError. //! Should only be called if HasError returns true void ThrowException(); //! Work on tasks for this specific executor, until there are no tasks remaining void WorkOnTasks(); //! Flush a thread context into the client context void Flush(ThreadContext &context); //! Reschedules a task that was blocked void RescheduleTask(shared_ptr &task); //! Add the task to be rescheduled void AddToBeRescheduled(shared_ptr &task); //! Returns the progress of the pipelines idx_t GetPipelinesProgress(ProgressData &progress); void CompletePipeline() { completed_pipelines++; } ProducerToken &GetToken() { return *producer; } void AddEvent(shared_ptr event); void AddRecursiveCTE(PhysicalOperator &rec_cte); void ReschedulePipelines(const vector> &pipelines, vector> &events); //! Whether or not the root of the pipeline is a result collector object bool HasResultCollector(); //! Whether or not the root of the pipeline is a streaming result collector object bool HasStreamingResultCollector(); //! Returns the query result - can only be used if `HasResultCollector` returns true unique_ptr GetResult(); //! Returns true if all pipelines have been completed bool ExecutionIsFinished(); void RegisterTask() { executor_tasks++; } void UnregisterTask() { executor_tasks--; } idx_t GetTotalPipelines() const { return total_pipelines; } idx_t GetCompletedPipelines() const { return completed_pipelines.load(); } private: //! Check if the streaming query result is waiting to be fetched from, must hold the 'executor_lock' bool ResultCollectorIsBlocked(); void InitializeInternal(PhysicalOperator &physical_plan); void ScheduleEvents(const vector> &meta_pipelines); void ScheduleEventsInternal(ScheduleEventData &event_data); static void VerifyScheduledEvents(const ScheduleEventData &event_data); static void VerifyScheduledEventsInternal(const idx_t i, const vector> &vertices, vector &visited, vector &recursion_stack); void SchedulePipeline(const shared_ptr &pipeline, ScheduleEventData &event_data); bool NextExecutor(); shared_ptr CreateChildPipeline(Pipeline ¤t, PhysicalOperator &op); void VerifyPipeline(Pipeline &pipeline); void VerifyPipelines(); private: optional_ptr physical_plan; mutex executor_lock; //! All pipelines of the query plan vector> pipelines; //! The root pipelines of the query vector> root_pipelines; //! The recursive CTE's in this query plan vector> recursive_ctes; //! The pipeline executor for the root pipeline unique_ptr root_executor; //! The current root pipeline index idx_t root_pipeline_idx; //! The producer of this query unique_ptr producer; //! List of events vector> events; //! The query profiler shared_ptr profiler; //! Task error manager TaskErrorManager error_manager; //! The amount of completed pipelines of the query atomic completed_pipelines; //! The total amount of pipelines in the query idx_t total_pipelines; //! Whether or not execution is cancelled bool cancelled; //! The last pending execution result (if any) PendingExecutionResult execution_result; //! The current task in process (if any) shared_ptr task; //! Task that have been descheduled unordered_map> to_be_rescheduled_tasks; //! The semaphore to signal task rescheduling std::condition_variable task_reschedule; //! Currently alive executor tasks atomic executor_tasks; //! Total time blocked while waiting on tasks. In ticks. One tick corresponds to WAIT_TIME. atomic blocked_thread_time; }; } // namespace duckdb namespace duckdb { class ClientContext; class ClientContextLock; class PreparedStatementData; class PendingQueryResult : public BaseQueryResult { friend class ClientContext; public: static constexpr const QueryResultType TYPE = QueryResultType::PENDING_RESULT; public: DUCKDB_API PendingQueryResult(shared_ptr context, PreparedStatementData &statement, vector types, bool allow_stream_result); DUCKDB_API explicit PendingQueryResult(ErrorData error_message); DUCKDB_API ~PendingQueryResult() override; DUCKDB_API bool AllowStreamResult() const; PendingQueryResult(const PendingQueryResult &) = delete; PendingQueryResult &operator=(const PendingQueryResult &) = delete; public: //! Executes a single task within the query, returning whether or not the query is ready. //! If this returns RESULT_READY, the Execute function can be called to obtain a pointer to the result. //! If this returns RESULT_NOT_READY, the ExecuteTask function should be called again. //! If this returns EXECUTION_ERROR, an error occurred during execution. //! If this returns NO_TASKS_AVAILABLE, this means currently no meaningful work can be done by the current executor, //! but tasks may become available in the future. //! The error message can be obtained by calling GetError() on the PendingQueryResult. DUCKDB_API PendingExecutionResult ExecuteTask(); DUCKDB_API PendingExecutionResult CheckPulse(); //! Halt execution of the thread until a Task is ready to be executed (use with caution) void WaitForTask(); //! Returns the result of the query as an actual query result. //! This returns (mostly) instantly if ExecuteTask has been called until RESULT_READY was returned. DUCKDB_API unique_ptr Execute(); DUCKDB_API void Close(); //! Function to determine whether execution is considered finished DUCKDB_API static bool IsResultReady(PendingExecutionResult result); DUCKDB_API static bool IsExecutionFinished(PendingExecutionResult result); private: shared_ptr context; bool allow_stream_result; private: void CheckExecutableInternal(ClientContextLock &lock); PendingExecutionResult ExecuteTaskInternal(ClientContextLock &lock); unique_ptr ExecuteInternal(ClientContextLock &lock); unique_ptr LockContext(); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/prepared_statement.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/planner/expression/bound_parameter_data.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { struct BoundParameterData { public: BoundParameterData() { } explicit BoundParameterData(Value val) : value(std::move(val)), return_type(GetDefaultType(value.type())) { } BoundParameterData(Value val, LogicalType type_p) : value(std::move(val)), return_type(std::move(type_p)) { } private: Value value; public: LogicalType return_type; public: void SetValue(Value val) { value = std::move(val); } const Value &GetValue() const { return value; } void Serialize(Serializer &serializer) const; static shared_ptr Deserialize(Deserializer &deserializer); private: LogicalType GetDefaultType(const LogicalType &type) { if (value.type().id() == LogicalTypeId::VARCHAR && StringType::GetCollation(type).empty()) { return LogicalTypeId::STRING_LITERAL; } return value.type(); } }; } // namespace duckdb namespace duckdb { class ClientContext; class PreparedStatementData; //! A prepared statement class PreparedStatement { public: //! Create a successfully prepared prepared statement object with the given name DUCKDB_API PreparedStatement(shared_ptr context, shared_ptr data, string query, case_insensitive_map_t named_param_map); //! Create a prepared statement that was not successfully prepared DUCKDB_API explicit PreparedStatement(ErrorData error); DUCKDB_API ~PreparedStatement(); public: //! The client context this prepared statement belongs to shared_ptr context; //! The prepared statement data shared_ptr data; //! The query that is being prepared string query; //! Whether or not the statement was successfully prepared bool success; //! The error message (if success = false) ErrorData error; //! The parameter mapping case_insensitive_map_t named_param_map; public: //! Returns the stored error message DUCKDB_API const string &GetError(); //! Returns the stored error object DUCKDB_API ErrorData &GetErrorObject(); //! Returns whether or not an error occurred DUCKDB_API bool HasError() const; //! Returns the number of columns in the result DUCKDB_API idx_t ColumnCount(); //! Returns the statement type of the underlying prepared statement object DUCKDB_API StatementType GetStatementType(); //! Returns the underlying statement properties DUCKDB_API StatementProperties GetStatementProperties(); //! Returns the result SQL types of the prepared statement DUCKDB_API const vector &GetTypes(); //! Returns the result names of the prepared statement DUCKDB_API const vector &GetNames(); //! Returns the map of parameter index to the expected type of parameter DUCKDB_API case_insensitive_map_t GetExpectedParameterTypes() const; //! Create a pending query result of the prepared statement with the given set of arguments template unique_ptr PendingQuery(ARGS... args) { vector values; return PendingQueryRecursive(values, args...); } //! Create a pending query result of the prepared statement with the given set of arguments DUCKDB_API unique_ptr PendingQuery(vector &values, bool allow_stream_result = true); //! Create a pending query result of the prepared statement with the given set named arguments DUCKDB_API unique_ptr PendingQuery(case_insensitive_map_t &named_values, bool allow_stream_result = true); //! Execute the prepared statement with the given set of values DUCKDB_API unique_ptr Execute(vector &values, bool allow_stream_result = true); //! Execute the prepared statement with the given set of named+unnamed values DUCKDB_API unique_ptr Execute(case_insensitive_map_t &named_values, bool allow_stream_result = true); //! Execute the prepared statement with the given set of arguments template unique_ptr Execute(ARGS... args) { vector values; return ExecuteRecursive(values, args...); } template static string ExcessValuesException(const case_insensitive_map_t ¶meters, const case_insensitive_map_t &values) { // Too many values set excess_set; for (auto &pair : values) { auto &name = pair.first; if (!parameters.count(name)) { excess_set.insert(name); } } vector excess_values; for (auto &val : excess_set) { excess_values.push_back(val); } return StringUtil::Format("Parameter argument/count mismatch, identifiers of the excess parameters: %s", StringUtil::Join(excess_values, ", ")); } template static string MissingValuesException(const case_insensitive_map_t ¶meters, const case_insensitive_map_t &values) { // Missing values set missing_set; for (auto &pair : parameters) { auto &name = pair.first; if (!values.count(name)) { missing_set.insert(name); } } vector missing_values; for (auto &val : missing_set) { missing_values.push_back(val); } return StringUtil::Format("Values were not provided for the following prepared statement parameters: %s", StringUtil::Join(missing_values, ", ")); } template static void VerifyParameters(const case_insensitive_map_t &provided, const case_insensitive_map_t &expected) { if (expected.size() == provided.size()) { // Same amount of identifiers, if for (auto &pair : expected) { auto &identifier = pair.first; if (!provided.count(identifier)) { throw InvalidInputException(MissingValuesException(expected, provided)); } } return; } // Mismatch in expected and provided parameters/values if (expected.size() > provided.size()) { throw InvalidInputException(MissingValuesException(expected, provided)); } else { D_ASSERT(provided.size() > expected.size()); throw InvalidInputException(ExcessValuesException(expected, provided)); } } private: unique_ptr PendingQueryRecursive(vector &values) { return PendingQuery(values); } template unique_ptr PendingQueryRecursive(vector &values, T value, ARGS... args) { values.push_back(Value::CreateValue(value)); return PendingQueryRecursive(values, args...); } unique_ptr ExecuteRecursive(vector &values) { return Execute(values); } template unique_ptr ExecuteRecursive(vector &values, T value, ARGS... args) { values.push_back(Value::CreateValue(value)); return ExecuteRecursive(values, args...); } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/relation.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/join_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Join Types //===--------------------------------------------------------------------===// enum class JoinType : uint8_t { INVALID = 0, // invalid join type LEFT = 1, // left RIGHT = 2, // right INNER = 3, // inner OUTER = 4, // outer SEMI = 5, // LEFT SEMI join returns left side row ONLY if it has a join partner, no duplicates. ANTI = 6, // LEFT ANTI join returns left side row ONLY if it has NO join partner, no duplicates MARK = 7, // MARK join returns marker indicating whether or not there is a join partner (true), there is no join // partner (false) SINGLE = 8, // SINGLE join is like LEFT OUTER JOIN, BUT returns at most one join partner per entry on the LEFT side // (and NULL if no partner is found) RIGHT_SEMI = 9, // RIGHT SEMI join is created by the optimizer when the children of a semi join need to be switched // so that the build side can be the smaller table RIGHT_ANTI = 10 // RIGHT ANTI join is created by the optimizer when the children of an anti join need to be // switched so that the build side can be the smaller table }; //! True if join is left or full outer join bool IsLeftOuterJoin(JoinType type); //! True if join is rght or full outer join bool IsRightOuterJoin(JoinType type); //! Whether the build side is propagated out of the join bool PropagatesBuildSide(JoinType type); //! Whether the JoinType has an inverse bool HasInverseJoinType(JoinType type); //! Gets the inverse JoinType, e.g., LEFT -> RIGHT JoinType InverseJoinType(JoinType type); // **DEPRECATED**: Use EnumUtil directly instead. string JoinTypeToString(JoinType type); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/relation_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Catalog Types //===--------------------------------------------------------------------===// enum class RelationType : uint8_t { INVALID_RELATION, TABLE_RELATION, PROJECTION_RELATION, FILTER_RELATION, EXPLAIN_RELATION, CROSS_PRODUCT_RELATION, JOIN_RELATION, AGGREGATE_RELATION, SET_OPERATION_RELATION, DISTINCT_RELATION, LIMIT_RELATION, ORDER_RELATION, CREATE_VIEW_RELATION, CREATE_TABLE_RELATION, INSERT_RELATION, VALUE_LIST_RELATION, MATERIALIZED_RELATION, DELETE_RELATION, UPDATE_RELATION, WRITE_CSV_RELATION, WRITE_PARQUET_RELATION, READ_CSV_RELATION, SUBQUERY_RELATION, TABLE_FUNCTION_RELATION, VIEW_RELATION, QUERY_RELATION, DELIM_JOIN_RELATION, DELIM_GET_RELATION }; string RelationTypeToString(RelationType type); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/joinref_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { //===--------------------------------------------------------------------===// // Join Reference Types //===--------------------------------------------------------------------===// enum class JoinRefType : uint8_t { REGULAR, // Explicit conditions NATURAL, // Implied conditions CROSS, // No condition POSITIONAL, // Positional condition ASOF, // AsOf conditions DEPENDENT, // Dependent join conditions }; const char *ToString(JoinRefType value); } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/client_context.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/schema_catalog_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_set.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/default/default_generator.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class ClientContext; class DefaultGenerator { public: explicit DefaultGenerator(Catalog &catalog); virtual ~DefaultGenerator(); Catalog &catalog; atomic created_all_entries; public: //! Creates a default entry with the specified name, or returns nullptr if no such entry can be generated virtual unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name); virtual unique_ptr CreateDefaultEntry(CatalogTransaction transaction, const string &entry_name); //! Get a list of all default entries in the generator virtual vector GetDefaultEntries() = 0; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/transaction/transaction.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/standard_entry.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/dependency_list.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/catalog_entry_map.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class CatalogEntry; struct CatalogEntryHashFunction { uint64_t operator()(const reference &a) const { std::hash hash_func; return hash_func((void *)&a.get()); } }; struct CatalogEntryEquality { bool operator()(const reference &a, const reference &b) const { return RefersToSameObject(a, b); } }; using catalog_entry_set_t = unordered_set, CatalogEntryHashFunction, CatalogEntryEquality>; template using catalog_entry_map_t = unordered_map, T, CatalogEntryHashFunction, CatalogEntryEquality>; using catalog_entry_vector_t = vector>; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/dependency.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class CatalogEntry; struct DependencyFlags { public: DependencyFlags() : value(0) { } DependencyFlags(const DependencyFlags &other) : value(other.value) { } virtual ~DependencyFlags() = default; DependencyFlags &operator=(const DependencyFlags &other) { value = other.value; return *this; } bool operator==(const DependencyFlags &other) const { return other.value == value; } bool operator!=(const DependencyFlags &other) const { return !(*this == other); } public: virtual string ToString() const = 0; protected: template bool IsSet() const { static const uint8_t FLAG = (1 << BIT); return (value & FLAG) == FLAG; } template void Set() { static const uint8_t FLAG = (1 << BIT); value |= FLAG; } void Merge(uint8_t other) { value |= other; } uint8_t Value() { return value; } private: uint8_t value; }; struct DependencySubjectFlags : public DependencyFlags { private: static constexpr uint8_t OWNERSHIP = 0; public: DependencySubjectFlags &Apply(DependencySubjectFlags other) { Merge(other.Value()); return *this; } public: bool IsOwnership() const { return IsSet(); } public: DependencySubjectFlags &SetOwnership() { Set(); return *this; } public: string ToString() const override { string result; if (IsOwnership()) { result += "OWNS"; } return result; } }; struct DependencyDependentFlags : public DependencyFlags { private: static constexpr uint8_t BLOCKING = 0; static constexpr uint8_t OWNED_BY = 1; public: DependencyDependentFlags &Apply(DependencyDependentFlags other) { Merge(other.Value()); return *this; } public: bool IsBlocking() const { return IsSet(); } bool IsOwnedBy() const { return IsSet(); } public: DependencyDependentFlags &SetBlocking() { Set(); return *this; } DependencyDependentFlags &SetOwnedBy() { Set(); return *this; } public: string ToString() const override { string result; if (IsBlocking()) { result += "REGULAR"; } else { result += "AUTOMATIC"; } result += " | "; if (IsOwnedBy()) { result += "OWNED BY"; } return result; } }; struct CatalogEntryInfo { public: CatalogType type; string schema; string name; public: bool operator==(const CatalogEntryInfo &other) const { if (other.type != type) { return false; } if (!StringUtil::CIEquals(other.schema, schema)) { return false; } if (!StringUtil::CIEquals(other.name, name)) { return false; } return true; } public: void Serialize(Serializer &serializer) const; static CatalogEntryInfo Deserialize(Deserializer &deserializer); }; struct Dependency { Dependency(CatalogEntry &entry, // NOLINT: Allow implicit conversion from `CatalogEntry` DependencyDependentFlags flags = DependencyDependentFlags().SetBlocking()) : entry(entry), flags(std::move(flags)) { } //! The catalog entry this depends on reference entry; //! The type of dependency DependencyDependentFlags flags; }; struct DependencyHashFunction { uint64_t operator()(const Dependency &a) const { std::hash hash_func; return hash_func((void *)&a.entry.get()); } }; struct DependencyEquality { bool operator()(const Dependency &a, const Dependency &b) const { return RefersToSameObject(a.entry, b.entry); } }; using dependency_set_t = unordered_set; } // namespace duckdb namespace duckdb { class Catalog; class CatalogEntry; struct CreateInfo; class SchemaCatalogEntry; struct CatalogTransaction; class LogicalDependencyList; //! A minimal representation of a CreateInfo / CatalogEntry //! enough to look up the entry inside SchemaCatalogEntry::GetEntry struct LogicalDependency { public: CatalogEntryInfo entry; string catalog; public: explicit LogicalDependency(CatalogEntry &entry); LogicalDependency(); LogicalDependency(optional_ptr catalog, CatalogEntryInfo entry, string catalog_str); bool operator==(const LogicalDependency &other) const; public: void Serialize(Serializer &serializer) const; static LogicalDependency Deserialize(Deserializer &deserializer); }; struct LogicalDependencyHashFunction { uint64_t operator()(const LogicalDependency &a) const; }; struct LogicalDependencyEquality { bool operator()(const LogicalDependency &a, const LogicalDependency &b) const; }; //! The LogicalDependencyList containing LogicalDependency objects, not looked up in the catalog yet class LogicalDependencyList { using create_info_set_t = unordered_set; public: DUCKDB_API void AddDependency(CatalogEntry &entry); DUCKDB_API void AddDependency(const LogicalDependency &entry); DUCKDB_API bool Contains(CatalogEntry &entry); public: DUCKDB_API void VerifyDependencies(Catalog &catalog, const string &name); void Serialize(Serializer &serializer) const; static LogicalDependencyList Deserialize(Deserializer &deserializer); bool operator==(const LogicalDependencyList &other) const; const create_info_set_t &Set() const; private: create_info_set_t set; }; } // namespace duckdb namespace duckdb { class SchemaCatalogEntry; //! A StandardEntry is a catalog entry that is a member of a schema class StandardEntry : public InCatalogEntry { public: StandardEntry(CatalogType type, SchemaCatalogEntry &schema, Catalog &catalog, string name) : InCatalogEntry(type, catalog, std::move(name)), schema(schema) { } ~StandardEntry() override { } //! The schema the entry belongs to SchemaCatalogEntry &schema; //! The dependencies of the entry, can be empty LogicalDependencyList dependencies; public: SchemaCatalogEntry &ParentSchema() override { return schema; } const SchemaCatalogEntry &ParentSchema() const override { return schema; } }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_sequence_info.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/create_info.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/parse_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class CatalogType : uint8_t; enum class ParseInfoType : uint8_t { ALTER_INFO, ATTACH_INFO, COPY_INFO, CREATE_INFO, CREATE_SECRET_INFO, DETACH_INFO, DROP_INFO, BOUND_EXPORT_DATA, LOAD_INFO, PRAGMA_INFO, SHOW_SELECT_INFO, TRANSACTION_INFO, VACUUM_INFO, COMMENT_ON_INFO, COMMENT_ON_COLUMN_INFO, COPY_DATABASE_INFO, UPDATE_EXTENSIONS_INFO }; struct ParseInfo { explicit ParseInfo(ParseInfoType info_type) : info_type(info_type) { } virtual ~ParseInfo() { } ParseInfoType info_type; public: template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } virtual void Serialize(Serializer &serializer) const; static unique_ptr Deserialize(Deserializer &deserializer); static string QualifierToString(const string &catalog, const string &schema, const string &name); static string TypeToString(CatalogType type); }; } // namespace duckdb //------------------------------------------------------------------------- // This file is automatically generated by scripts/generate_enum_util.py // Do not edit this file manually, your changes will be overwritten // If you want to exclude an enum from serialization, add it to the blacklist in the script // // Note: The generated code will only work properly if the enum is a top level item in the duckdb namespace // If the enum is nested in a class, or in another namespace, the generated code will not compile. // You should move the enum to the duckdb namespace, manually write a specialization or add it to the blacklist //------------------------------------------------------------------------- #include namespace duckdb { struct EnumUtil { // String -> Enum template static T FromString(const char *value) = delete; template static T FromString(const string &value) { return FromString(value.c_str()); } // Enum -> String template static const char *ToChars(T value) = delete; template static string ToString(T value) { return string(ToChars(value)); } }; enum class ARTConflictType : uint8_t; enum class ARTHandlingResult : uint8_t; enum class ARTScanHandling : uint8_t; enum class AccessMode : uint8_t; enum class AggregateCombineType : uint8_t; enum class AggregateDistinctDependent : uint8_t; enum class AggregateHandling : uint8_t; enum class AggregateOrderDependent : uint8_t; enum class AggregateType : uint8_t; enum class AlterForeignKeyType : uint8_t; enum class AlterScalarFunctionType : uint8_t; enum class AlterTableFunctionType : uint8_t; enum class AlterTableType : uint8_t; enum class AlterType : uint8_t; enum class AlterViewType : uint8_t; enum class AppenderType : uint8_t; enum class ArrowArrayPhysicalType : uint8_t; enum class ArrowDateTimeType : uint8_t; enum class ArrowFormatVersion : uint8_t; enum class ArrowOffsetSize : uint8_t; enum class ArrowTypeInfoType : uint8_t; enum class ArrowVariableSizeType : uint8_t; enum class BinderType : uint8_t; enum class BindingMode : uint8_t; enum class BitpackingMode : uint8_t; enum class BlockIteratorStateType : int8_t; enum class BlockState : uint8_t; enum class BufferedIndexReplay : uint8_t; enum class CAPIResultSetType : uint8_t; enum class CSVState : uint8_t; enum class CTEMaterialize : uint8_t; enum class CatalogLookupBehavior : uint8_t; enum class CatalogType : uint8_t; enum class CheckpointAbort : uint8_t; enum class ChunkInfoType : uint8_t; enum class ColumnDataAllocatorType : uint8_t; enum class ColumnDataScanProperties : uint8_t; enum class ColumnSegmentType : uint8_t; enum class CompressedMaterializationDirection : uint8_t; enum class CompressionType : uint8_t; enum class CompressionValidity : uint8_t; enum class ConflictManagerMode : uint8_t; enum class ConstraintType : uint8_t; enum class CopyFunctionReturnType : uint8_t; enum class CopyOverwriteMode : uint8_t; enum class CopyToType : uint8_t; enum class DataFileType : uint8_t; enum class DateCastResult : uint8_t; enum class DatePartSpecifier : uint8_t; enum class DebugInitialize : uint8_t; enum class DebugVectorVerification : uint8_t; enum class DecimalBitWidth : uint8_t; enum class DefaultOrderByNullType : uint8_t; enum class DependencyEntryType : uint8_t; enum class DeprecatedIndexType : uint8_t; enum class DestroyBufferUpon : uint8_t; enum class DistinctType : uint8_t; enum class ErrorType : uint16_t; enum class ExceptionFormatValueType : uint8_t; enum class ExceptionType : uint8_t; enum class ExplainFormat : uint8_t; enum class ExplainOutputType : uint8_t; enum class ExplainType : uint8_t; enum class ExponentType : uint8_t; enum class ExpressionClass : uint8_t; enum class ExpressionType : uint8_t; enum class ExtensionABIType : uint8_t; enum class ExtensionInstallMode : uint8_t; enum class ExtensionLoadResult : uint8_t; enum class ExtensionUpdateResultTag : uint8_t; enum class ExtraDropInfoType : uint8_t; enum class ExtraTypeInfoType : uint8_t; enum class FileBufferType : uint8_t; enum class FileCompressionType : uint8_t; enum class FileExpandResult : uint8_t; enum class FileGlobOptions : uint8_t; enum class FileLockType : uint8_t; enum class FileNameSegmentType : uint8_t; enum class FilterPropagateResult : uint8_t; enum class ForeignKeyType : uint8_t; enum class FunctionCollationHandling : uint8_t; enum class FunctionErrors : uint8_t; enum class FunctionNullHandling : uint8_t; enum class FunctionStability : uint8_t; enum class GateStatus : uint8_t; enum class HLLStorageType : uint8_t; enum class HTTPStatusCode : uint16_t; enum class IndexAppendMode : uint8_t; enum class IndexBindState : uint8_t; enum class IndexConstraintType : uint8_t; enum class InsertColumnOrder : uint8_t; enum class InterruptMode : uint8_t; enum class JoinRefType : uint8_t; enum class JoinType : uint8_t; enum class KeywordCategory : uint8_t; enum class LambdaSyntax : uint8_t; enum class LambdaSyntaxType : uint8_t; enum class LimitNodeType : uint8_t; enum class LoadType : uint8_t; enum class LogContextScope : uint8_t; enum class LogLevel : uint8_t; enum class LogMode : uint8_t; enum class LoggingTargetTable : uint8_t; enum class LogicalOperatorType : uint8_t; enum class LogicalTypeId : uint8_t; enum class LookupResultType : uint8_t; enum class MacroType : uint8_t; enum class MapInvalidReason : uint8_t; enum class MemoryTag : uint8_t; enum class MergeActionCondition : uint8_t; enum class MergeActionType : uint8_t; enum class MetaPipelineType : uint8_t; enum class MetricsType : uint8_t; enum class MultiFileColumnMappingMode : uint8_t; enum class MultiFileFileState : uint8_t; enum class NType : uint8_t; enum class NewLineIdentifier : uint8_t; enum class OnConflictAction : uint8_t; enum class OnCreateConflict : uint8_t; enum class OnEntryNotFound : uint8_t; enum class OperatorFinalResultType : uint8_t; enum class OperatorFinalizeResultType : uint8_t; enum class OperatorResultType : uint8_t; enum class OptimizerType : uint32_t; enum class OrderByNullType : uint8_t; enum class OrderPreservationType : uint8_t; enum class OrderType : uint8_t; enum class OrdinalityType : uint8_t; enum class OutputStream : uint8_t; enum class ParseInfoType : uint8_t; enum class ParserExtensionResultType : uint8_t; enum class PartitionSortStage : uint8_t; enum class PartitionedColumnDataType : uint8_t; enum class PartitionedTupleDataType : uint8_t; enum class PendingExecutionResult : uint8_t; enum class PhysicalOperatorType : uint8_t; enum class PhysicalType : uint8_t; enum class PragmaType : uint8_t; enum class PreparedParamType : uint8_t; enum class PreparedStatementMode : uint8_t; enum class PreserveOrderType : uint8_t; enum class ProfilerPrintFormat : uint8_t; enum class ProfilingCoverage : uint8_t; enum class QuantileSerializationType : uint8_t; enum class QueryNodeType : uint8_t; enum class QueryResultType : uint8_t; enum class RelationType : uint8_t; enum class RenderMode : uint8_t; enum class RequestType : uint8_t; enum class ResultModifierType : uint8_t; enum class SampleMethod : uint8_t; enum class SampleType : uint8_t; enum class SamplingState : uint8_t; enum class ScanType : uint8_t; enum class SecretDisplayType : uint8_t; enum class SecretPersistType : uint8_t; enum class SecretSerializationType : uint8_t; enum class SequenceInfo : uint8_t; enum class SetOperationType : uint8_t; enum class SetScope : uint8_t; enum class SetType : uint8_t; enum class SettingScope : uint8_t; enum class ShowType : uint8_t; enum class SimplifiedTokenType : uint8_t; enum class SinkCombineResultType : uint8_t; enum class SinkFinalizeType : uint8_t; enum class SinkNextBatchType : uint8_t; enum class SinkResultType : uint8_t; enum class SortKeyType : uint8_t; enum class SourceResultType : uint8_t; enum class StarExpressionType : uint8_t; enum class StatementReturnType : uint8_t; enum class StatementType : uint8_t; enum class StatisticsType : uint8_t; enum class StatsInfo : uint8_t; enum class StrTimeSpecifier : uint8_t; enum class StreamExecutionResult : uint8_t; enum class SubqueryType : uint8_t; enum class TableColumnType : uint8_t; enum class TableFilterType : uint8_t; enum class TablePartitionInfo : uint8_t; enum class TableReferenceType : uint8_t; enum class TableScanType : uint8_t; enum class TaskExecutionMode : uint8_t; enum class TaskExecutionResult : uint8_t; enum class TemporaryBufferSize : uint64_t; enum class TemporaryCompressionLevel : int; enum class ThreadPinMode : uint8_t; enum class TimestampCastResult : uint8_t; enum class TransactionModifierType : uint8_t; enum class TransactionType : uint8_t; enum class TupleDataNestednessType : uint8_t; enum class TupleDataPinProperties : uint8_t; enum class TupleDataValidityType : uint8_t; enum class UndoFlags : uint32_t; enum class UnionInvalidReason : uint8_t; enum class VariantChildLookupMode : uint8_t; enum class VariantLogicalType : uint8_t; enum class VectorAuxiliaryDataType : uint8_t; enum class VectorBufferType : uint8_t; enum class VectorType : uint8_t; enum class VerificationType : uint8_t; enum class VerifyExistenceType : uint8_t; enum class WALType : uint8_t; enum class WindowAggregationMode : uint32_t; enum class WindowBoundary : uint8_t; enum class WindowExcludeMode : uint8_t; enum class WindowMergeSortStage : uint8_t; template<> const char* EnumUtil::ToChars(ARTConflictType value); template<> const char* EnumUtil::ToChars(ARTHandlingResult value); template<> const char* EnumUtil::ToChars(ARTScanHandling value); template<> const char* EnumUtil::ToChars(AccessMode value); template<> const char* EnumUtil::ToChars(AggregateCombineType value); template<> const char* EnumUtil::ToChars(AggregateDistinctDependent value); template<> const char* EnumUtil::ToChars(AggregateHandling value); template<> const char* EnumUtil::ToChars(AggregateOrderDependent value); template<> const char* EnumUtil::ToChars(AggregateType value); template<> const char* EnumUtil::ToChars(AlterForeignKeyType value); template<> const char* EnumUtil::ToChars(AlterScalarFunctionType value); template<> const char* EnumUtil::ToChars(AlterTableFunctionType value); template<> const char* EnumUtil::ToChars(AlterTableType value); template<> const char* EnumUtil::ToChars(AlterType value); template<> const char* EnumUtil::ToChars(AlterViewType value); template<> const char* EnumUtil::ToChars(AppenderType value); template<> const char* EnumUtil::ToChars(ArrowArrayPhysicalType value); template<> const char* EnumUtil::ToChars(ArrowDateTimeType value); template<> const char* EnumUtil::ToChars(ArrowFormatVersion value); template<> const char* EnumUtil::ToChars(ArrowOffsetSize value); template<> const char* EnumUtil::ToChars(ArrowTypeInfoType value); template<> const char* EnumUtil::ToChars(ArrowVariableSizeType value); template<> const char* EnumUtil::ToChars(BinderType value); template<> const char* EnumUtil::ToChars(BindingMode value); template<> const char* EnumUtil::ToChars(BitpackingMode value); template<> const char* EnumUtil::ToChars(BlockIteratorStateType value); template<> const char* EnumUtil::ToChars(BlockState value); template<> const char* EnumUtil::ToChars(BufferedIndexReplay value); template<> const char* EnumUtil::ToChars(CAPIResultSetType value); template<> const char* EnumUtil::ToChars(CSVState value); template<> const char* EnumUtil::ToChars(CTEMaterialize value); template<> const char* EnumUtil::ToChars(CatalogLookupBehavior value); template<> const char* EnumUtil::ToChars(CatalogType value); template<> const char* EnumUtil::ToChars(CheckpointAbort value); template<> const char* EnumUtil::ToChars(ChunkInfoType value); template<> const char* EnumUtil::ToChars(ColumnDataAllocatorType value); template<> const char* EnumUtil::ToChars(ColumnDataScanProperties value); template<> const char* EnumUtil::ToChars(ColumnSegmentType value); template<> const char* EnumUtil::ToChars(CompressedMaterializationDirection value); template<> const char* EnumUtil::ToChars(CompressionType value); template<> const char* EnumUtil::ToChars(CompressionValidity value); template<> const char* EnumUtil::ToChars(ConflictManagerMode value); template<> const char* EnumUtil::ToChars(ConstraintType value); template<> const char* EnumUtil::ToChars(CopyFunctionReturnType value); template<> const char* EnumUtil::ToChars(CopyOverwriteMode value); template<> const char* EnumUtil::ToChars(CopyToType value); template<> const char* EnumUtil::ToChars(DataFileType value); template<> const char* EnumUtil::ToChars(DateCastResult value); template<> const char* EnumUtil::ToChars(DatePartSpecifier value); template<> const char* EnumUtil::ToChars(DebugInitialize value); template<> const char* EnumUtil::ToChars(DebugVectorVerification value); template<> const char* EnumUtil::ToChars(DecimalBitWidth value); template<> const char* EnumUtil::ToChars(DefaultOrderByNullType value); template<> const char* EnumUtil::ToChars(DependencyEntryType value); template<> const char* EnumUtil::ToChars(DeprecatedIndexType value); template<> const char* EnumUtil::ToChars(DestroyBufferUpon value); template<> const char* EnumUtil::ToChars(DistinctType value); template<> const char* EnumUtil::ToChars(ErrorType value); template<> const char* EnumUtil::ToChars(ExceptionFormatValueType value); template<> const char* EnumUtil::ToChars(ExceptionType value); template<> const char* EnumUtil::ToChars(ExplainFormat value); template<> const char* EnumUtil::ToChars(ExplainOutputType value); template<> const char* EnumUtil::ToChars(ExplainType value); template<> const char* EnumUtil::ToChars(ExponentType value); template<> const char* EnumUtil::ToChars(ExpressionClass value); template<> const char* EnumUtil::ToChars(ExpressionType value); template<> const char* EnumUtil::ToChars(ExtensionABIType value); template<> const char* EnumUtil::ToChars(ExtensionInstallMode value); template<> const char* EnumUtil::ToChars(ExtensionLoadResult value); template<> const char* EnumUtil::ToChars(ExtensionUpdateResultTag value); template<> const char* EnumUtil::ToChars(ExtraDropInfoType value); template<> const char* EnumUtil::ToChars(ExtraTypeInfoType value); template<> const char* EnumUtil::ToChars(FileBufferType value); template<> const char* EnumUtil::ToChars(FileCompressionType value); template<> const char* EnumUtil::ToChars(FileExpandResult value); template<> const char* EnumUtil::ToChars(FileGlobOptions value); template<> const char* EnumUtil::ToChars(FileLockType value); template<> const char* EnumUtil::ToChars(FileNameSegmentType value); template<> const char* EnumUtil::ToChars(FilterPropagateResult value); template<> const char* EnumUtil::ToChars(ForeignKeyType value); template<> const char* EnumUtil::ToChars(FunctionCollationHandling value); template<> const char* EnumUtil::ToChars(FunctionErrors value); template<> const char* EnumUtil::ToChars(FunctionNullHandling value); template<> const char* EnumUtil::ToChars(FunctionStability value); template<> const char* EnumUtil::ToChars(GateStatus value); template<> const char* EnumUtil::ToChars(HLLStorageType value); template<> const char* EnumUtil::ToChars(HTTPStatusCode value); template<> const char* EnumUtil::ToChars(IndexAppendMode value); template<> const char* EnumUtil::ToChars(IndexBindState value); template<> const char* EnumUtil::ToChars(IndexConstraintType value); template<> const char* EnumUtil::ToChars(InsertColumnOrder value); template<> const char* EnumUtil::ToChars(InterruptMode value); template<> const char* EnumUtil::ToChars(JoinRefType value); template<> const char* EnumUtil::ToChars(JoinType value); template<> const char* EnumUtil::ToChars(KeywordCategory value); template<> const char* EnumUtil::ToChars(LambdaSyntax value); template<> const char* EnumUtil::ToChars(LambdaSyntaxType value); template<> const char* EnumUtil::ToChars(LimitNodeType value); template<> const char* EnumUtil::ToChars(LoadType value); template<> const char* EnumUtil::ToChars(LogContextScope value); template<> const char* EnumUtil::ToChars(LogLevel value); template<> const char* EnumUtil::ToChars(LogMode value); template<> const char* EnumUtil::ToChars(LoggingTargetTable value); template<> const char* EnumUtil::ToChars(LogicalOperatorType value); template<> const char* EnumUtil::ToChars(LogicalTypeId value); template<> const char* EnumUtil::ToChars(LookupResultType value); template<> const char* EnumUtil::ToChars(MacroType value); template<> const char* EnumUtil::ToChars(MapInvalidReason value); template<> const char* EnumUtil::ToChars(MemoryTag value); template<> const char* EnumUtil::ToChars(MergeActionCondition value); template<> const char* EnumUtil::ToChars(MergeActionType value); template<> const char* EnumUtil::ToChars(MetaPipelineType value); template<> const char* EnumUtil::ToChars(MetricsType value); template<> const char* EnumUtil::ToChars(MultiFileColumnMappingMode value); template<> const char* EnumUtil::ToChars(MultiFileFileState value); template<> const char* EnumUtil::ToChars(NType value); template<> const char* EnumUtil::ToChars(NewLineIdentifier value); template<> const char* EnumUtil::ToChars(OnConflictAction value); template<> const char* EnumUtil::ToChars(OnCreateConflict value); template<> const char* EnumUtil::ToChars(OnEntryNotFound value); template<> const char* EnumUtil::ToChars(OperatorFinalResultType value); template<> const char* EnumUtil::ToChars(OperatorFinalizeResultType value); template<> const char* EnumUtil::ToChars(OperatorResultType value); template<> const char* EnumUtil::ToChars(OptimizerType value); template<> const char* EnumUtil::ToChars(OrderByNullType value); template<> const char* EnumUtil::ToChars(OrderPreservationType value); template<> const char* EnumUtil::ToChars(OrderType value); template<> const char* EnumUtil::ToChars(OrdinalityType value); template<> const char* EnumUtil::ToChars(OutputStream value); template<> const char* EnumUtil::ToChars(ParseInfoType value); template<> const char* EnumUtil::ToChars(ParserExtensionResultType value); template<> const char* EnumUtil::ToChars(PartitionSortStage value); template<> const char* EnumUtil::ToChars(PartitionedColumnDataType value); template<> const char* EnumUtil::ToChars(PartitionedTupleDataType value); template<> const char* EnumUtil::ToChars(PendingExecutionResult value); template<> const char* EnumUtil::ToChars(PhysicalOperatorType value); template<> const char* EnumUtil::ToChars(PhysicalType value); template<> const char* EnumUtil::ToChars(PragmaType value); template<> const char* EnumUtil::ToChars(PreparedParamType value); template<> const char* EnumUtil::ToChars(PreparedStatementMode value); template<> const char* EnumUtil::ToChars(PreserveOrderType value); template<> const char* EnumUtil::ToChars(ProfilerPrintFormat value); template<> const char* EnumUtil::ToChars(ProfilingCoverage value); template<> const char* EnumUtil::ToChars(QuantileSerializationType value); template<> const char* EnumUtil::ToChars(QueryNodeType value); template<> const char* EnumUtil::ToChars(QueryResultType value); template<> const char* EnumUtil::ToChars(RelationType value); template<> const char* EnumUtil::ToChars(RenderMode value); template<> const char* EnumUtil::ToChars(RequestType value); template<> const char* EnumUtil::ToChars(ResultModifierType value); template<> const char* EnumUtil::ToChars(SampleMethod value); template<> const char* EnumUtil::ToChars(SampleType value); template<> const char* EnumUtil::ToChars(SamplingState value); template<> const char* EnumUtil::ToChars(ScanType value); template<> const char* EnumUtil::ToChars(SecretDisplayType value); template<> const char* EnumUtil::ToChars(SecretPersistType value); template<> const char* EnumUtil::ToChars(SecretSerializationType value); template<> const char* EnumUtil::ToChars(SequenceInfo value); template<> const char* EnumUtil::ToChars(SetOperationType value); template<> const char* EnumUtil::ToChars(SetScope value); template<> const char* EnumUtil::ToChars(SetType value); template<> const char* EnumUtil::ToChars(SettingScope value); template<> const char* EnumUtil::ToChars(ShowType value); template<> const char* EnumUtil::ToChars(SimplifiedTokenType value); template<> const char* EnumUtil::ToChars(SinkCombineResultType value); template<> const char* EnumUtil::ToChars(SinkFinalizeType value); template<> const char* EnumUtil::ToChars(SinkNextBatchType value); template<> const char* EnumUtil::ToChars(SinkResultType value); template<> const char* EnumUtil::ToChars(SortKeyType value); template<> const char* EnumUtil::ToChars(SourceResultType value); template<> const char* EnumUtil::ToChars(StarExpressionType value); template<> const char* EnumUtil::ToChars(StatementReturnType value); template<> const char* EnumUtil::ToChars(StatementType value); template<> const char* EnumUtil::ToChars(StatisticsType value); template<> const char* EnumUtil::ToChars(StatsInfo value); template<> const char* EnumUtil::ToChars(StrTimeSpecifier value); template<> const char* EnumUtil::ToChars(StreamExecutionResult value); template<> const char* EnumUtil::ToChars(SubqueryType value); template<> const char* EnumUtil::ToChars(TableColumnType value); template<> const char* EnumUtil::ToChars(TableFilterType value); template<> const char* EnumUtil::ToChars(TablePartitionInfo value); template<> const char* EnumUtil::ToChars(TableReferenceType value); template<> const char* EnumUtil::ToChars(TableScanType value); template<> const char* EnumUtil::ToChars(TaskExecutionMode value); template<> const char* EnumUtil::ToChars(TaskExecutionResult value); template<> const char* EnumUtil::ToChars(TemporaryBufferSize value); template<> const char* EnumUtil::ToChars(TemporaryCompressionLevel value); template<> const char* EnumUtil::ToChars(ThreadPinMode value); template<> const char* EnumUtil::ToChars(TimestampCastResult value); template<> const char* EnumUtil::ToChars(TransactionModifierType value); template<> const char* EnumUtil::ToChars(TransactionType value); template<> const char* EnumUtil::ToChars(TupleDataNestednessType value); template<> const char* EnumUtil::ToChars(TupleDataPinProperties value); template<> const char* EnumUtil::ToChars(TupleDataValidityType value); template<> const char* EnumUtil::ToChars(UndoFlags value); template<> const char* EnumUtil::ToChars(UnionInvalidReason value); template<> const char* EnumUtil::ToChars(VariantChildLookupMode value); template<> const char* EnumUtil::ToChars(VariantLogicalType value); template<> const char* EnumUtil::ToChars(VectorAuxiliaryDataType value); template<> const char* EnumUtil::ToChars(VectorBufferType value); template<> const char* EnumUtil::ToChars(VectorType value); template<> const char* EnumUtil::ToChars(VerificationType value); template<> const char* EnumUtil::ToChars(VerifyExistenceType value); template<> const char* EnumUtil::ToChars(WALType value); template<> const char* EnumUtil::ToChars(WindowAggregationMode value); template<> const char* EnumUtil::ToChars(WindowBoundary value); template<> const char* EnumUtil::ToChars(WindowExcludeMode value); template<> const char* EnumUtil::ToChars(WindowMergeSortStage value); template<> ARTConflictType EnumUtil::FromString(const char *value); template<> ARTHandlingResult EnumUtil::FromString(const char *value); template<> ARTScanHandling EnumUtil::FromString(const char *value); template<> AccessMode EnumUtil::FromString(const char *value); template<> AggregateCombineType EnumUtil::FromString(const char *value); template<> AggregateDistinctDependent EnumUtil::FromString(const char *value); template<> AggregateHandling EnumUtil::FromString(const char *value); template<> AggregateOrderDependent EnumUtil::FromString(const char *value); template<> AggregateType EnumUtil::FromString(const char *value); template<> AlterForeignKeyType EnumUtil::FromString(const char *value); template<> AlterScalarFunctionType EnumUtil::FromString(const char *value); template<> AlterTableFunctionType EnumUtil::FromString(const char *value); template<> AlterTableType EnumUtil::FromString(const char *value); template<> AlterType EnumUtil::FromString(const char *value); template<> AlterViewType EnumUtil::FromString(const char *value); template<> AppenderType EnumUtil::FromString(const char *value); template<> ArrowArrayPhysicalType EnumUtil::FromString(const char *value); template<> ArrowDateTimeType EnumUtil::FromString(const char *value); template<> ArrowFormatVersion EnumUtil::FromString(const char *value); template<> ArrowOffsetSize EnumUtil::FromString(const char *value); template<> ArrowTypeInfoType EnumUtil::FromString(const char *value); template<> ArrowVariableSizeType EnumUtil::FromString(const char *value); template<> BinderType EnumUtil::FromString(const char *value); template<> BindingMode EnumUtil::FromString(const char *value); template<> BitpackingMode EnumUtil::FromString(const char *value); template<> BlockIteratorStateType EnumUtil::FromString(const char *value); template<> BlockState EnumUtil::FromString(const char *value); template<> BufferedIndexReplay EnumUtil::FromString(const char *value); template<> CAPIResultSetType EnumUtil::FromString(const char *value); template<> CSVState EnumUtil::FromString(const char *value); template<> CTEMaterialize EnumUtil::FromString(const char *value); template<> CatalogLookupBehavior EnumUtil::FromString(const char *value); template<> CatalogType EnumUtil::FromString(const char *value); template<> CheckpointAbort EnumUtil::FromString(const char *value); template<> ChunkInfoType EnumUtil::FromString(const char *value); template<> ColumnDataAllocatorType EnumUtil::FromString(const char *value); template<> ColumnDataScanProperties EnumUtil::FromString(const char *value); template<> ColumnSegmentType EnumUtil::FromString(const char *value); template<> CompressedMaterializationDirection EnumUtil::FromString(const char *value); template<> CompressionType EnumUtil::FromString(const char *value); template<> CompressionValidity EnumUtil::FromString(const char *value); template<> ConflictManagerMode EnumUtil::FromString(const char *value); template<> ConstraintType EnumUtil::FromString(const char *value); template<> CopyFunctionReturnType EnumUtil::FromString(const char *value); template<> CopyOverwriteMode EnumUtil::FromString(const char *value); template<> CopyToType EnumUtil::FromString(const char *value); template<> DataFileType EnumUtil::FromString(const char *value); template<> DateCastResult EnumUtil::FromString(const char *value); template<> DatePartSpecifier EnumUtil::FromString(const char *value); template<> DebugInitialize EnumUtil::FromString(const char *value); template<> DebugVectorVerification EnumUtil::FromString(const char *value); template<> DecimalBitWidth EnumUtil::FromString(const char *value); template<> DefaultOrderByNullType EnumUtil::FromString(const char *value); template<> DependencyEntryType EnumUtil::FromString(const char *value); template<> DeprecatedIndexType EnumUtil::FromString(const char *value); template<> DestroyBufferUpon EnumUtil::FromString(const char *value); template<> DistinctType EnumUtil::FromString(const char *value); template<> ErrorType EnumUtil::FromString(const char *value); template<> ExceptionFormatValueType EnumUtil::FromString(const char *value); template<> ExceptionType EnumUtil::FromString(const char *value); template<> ExplainFormat EnumUtil::FromString(const char *value); template<> ExplainOutputType EnumUtil::FromString(const char *value); template<> ExplainType EnumUtil::FromString(const char *value); template<> ExponentType EnumUtil::FromString(const char *value); template<> ExpressionClass EnumUtil::FromString(const char *value); template<> ExpressionType EnumUtil::FromString(const char *value); template<> ExtensionABIType EnumUtil::FromString(const char *value); template<> ExtensionInstallMode EnumUtil::FromString(const char *value); template<> ExtensionLoadResult EnumUtil::FromString(const char *value); template<> ExtensionUpdateResultTag EnumUtil::FromString(const char *value); template<> ExtraDropInfoType EnumUtil::FromString(const char *value); template<> ExtraTypeInfoType EnumUtil::FromString(const char *value); template<> FileBufferType EnumUtil::FromString(const char *value); template<> FileCompressionType EnumUtil::FromString(const char *value); template<> FileExpandResult EnumUtil::FromString(const char *value); template<> FileGlobOptions EnumUtil::FromString(const char *value); template<> FileLockType EnumUtil::FromString(const char *value); template<> FileNameSegmentType EnumUtil::FromString(const char *value); template<> FilterPropagateResult EnumUtil::FromString(const char *value); template<> ForeignKeyType EnumUtil::FromString(const char *value); template<> FunctionCollationHandling EnumUtil::FromString(const char *value); template<> FunctionErrors EnumUtil::FromString(const char *value); template<> FunctionNullHandling EnumUtil::FromString(const char *value); template<> FunctionStability EnumUtil::FromString(const char *value); template<> GateStatus EnumUtil::FromString(const char *value); template<> HLLStorageType EnumUtil::FromString(const char *value); template<> HTTPStatusCode EnumUtil::FromString(const char *value); template<> IndexAppendMode EnumUtil::FromString(const char *value); template<> IndexBindState EnumUtil::FromString(const char *value); template<> IndexConstraintType EnumUtil::FromString(const char *value); template<> InsertColumnOrder EnumUtil::FromString(const char *value); template<> InterruptMode EnumUtil::FromString(const char *value); template<> JoinRefType EnumUtil::FromString(const char *value); template<> JoinType EnumUtil::FromString(const char *value); template<> KeywordCategory EnumUtil::FromString(const char *value); template<> LambdaSyntax EnumUtil::FromString(const char *value); template<> LambdaSyntaxType EnumUtil::FromString(const char *value); template<> LimitNodeType EnumUtil::FromString(const char *value); template<> LoadType EnumUtil::FromString(const char *value); template<> LogContextScope EnumUtil::FromString(const char *value); template<> LogLevel EnumUtil::FromString(const char *value); template<> LogMode EnumUtil::FromString(const char *value); template<> LoggingTargetTable EnumUtil::FromString(const char *value); template<> LogicalOperatorType EnumUtil::FromString(const char *value); template<> LogicalTypeId EnumUtil::FromString(const char *value); template<> LookupResultType EnumUtil::FromString(const char *value); template<> MacroType EnumUtil::FromString(const char *value); template<> MapInvalidReason EnumUtil::FromString(const char *value); template<> MemoryTag EnumUtil::FromString(const char *value); template<> MergeActionCondition EnumUtil::FromString(const char *value); template<> MergeActionType EnumUtil::FromString(const char *value); template<> MetaPipelineType EnumUtil::FromString(const char *value); template<> MetricsType EnumUtil::FromString(const char *value); template<> MultiFileColumnMappingMode EnumUtil::FromString(const char *value); template<> MultiFileFileState EnumUtil::FromString(const char *value); template<> NType EnumUtil::FromString(const char *value); template<> NewLineIdentifier EnumUtil::FromString(const char *value); template<> OnConflictAction EnumUtil::FromString(const char *value); template<> OnCreateConflict EnumUtil::FromString(const char *value); template<> OnEntryNotFound EnumUtil::FromString(const char *value); template<> OperatorFinalResultType EnumUtil::FromString(const char *value); template<> OperatorFinalizeResultType EnumUtil::FromString(const char *value); template<> OperatorResultType EnumUtil::FromString(const char *value); template<> OptimizerType EnumUtil::FromString(const char *value); template<> OrderByNullType EnumUtil::FromString(const char *value); template<> OrderPreservationType EnumUtil::FromString(const char *value); template<> OrderType EnumUtil::FromString(const char *value); template<> OrdinalityType EnumUtil::FromString(const char *value); template<> OutputStream EnumUtil::FromString(const char *value); template<> ParseInfoType EnumUtil::FromString(const char *value); template<> ParserExtensionResultType EnumUtil::FromString(const char *value); template<> PartitionSortStage EnumUtil::FromString(const char *value); template<> PartitionedColumnDataType EnumUtil::FromString(const char *value); template<> PartitionedTupleDataType EnumUtil::FromString(const char *value); template<> PendingExecutionResult EnumUtil::FromString(const char *value); template<> PhysicalOperatorType EnumUtil::FromString(const char *value); template<> PhysicalType EnumUtil::FromString(const char *value); template<> PragmaType EnumUtil::FromString(const char *value); template<> PreparedParamType EnumUtil::FromString(const char *value); template<> PreparedStatementMode EnumUtil::FromString(const char *value); template<> PreserveOrderType EnumUtil::FromString(const char *value); template<> ProfilerPrintFormat EnumUtil::FromString(const char *value); template<> ProfilingCoverage EnumUtil::FromString(const char *value); template<> QuantileSerializationType EnumUtil::FromString(const char *value); template<> QueryNodeType EnumUtil::FromString(const char *value); template<> QueryResultType EnumUtil::FromString(const char *value); template<> RelationType EnumUtil::FromString(const char *value); template<> RenderMode EnumUtil::FromString(const char *value); template<> RequestType EnumUtil::FromString(const char *value); template<> ResultModifierType EnumUtil::FromString(const char *value); template<> SampleMethod EnumUtil::FromString(const char *value); template<> SampleType EnumUtil::FromString(const char *value); template<> SamplingState EnumUtil::FromString(const char *value); template<> ScanType EnumUtil::FromString(const char *value); template<> SecretDisplayType EnumUtil::FromString(const char *value); template<> SecretPersistType EnumUtil::FromString(const char *value); template<> SecretSerializationType EnumUtil::FromString(const char *value); template<> SequenceInfo EnumUtil::FromString(const char *value); template<> SetOperationType EnumUtil::FromString(const char *value); template<> SetScope EnumUtil::FromString(const char *value); template<> SetType EnumUtil::FromString(const char *value); template<> SettingScope EnumUtil::FromString(const char *value); template<> ShowType EnumUtil::FromString(const char *value); template<> SimplifiedTokenType EnumUtil::FromString(const char *value); template<> SinkCombineResultType EnumUtil::FromString(const char *value); template<> SinkFinalizeType EnumUtil::FromString(const char *value); template<> SinkNextBatchType EnumUtil::FromString(const char *value); template<> SinkResultType EnumUtil::FromString(const char *value); template<> SortKeyType EnumUtil::FromString(const char *value); template<> SourceResultType EnumUtil::FromString(const char *value); template<> StarExpressionType EnumUtil::FromString(const char *value); template<> StatementReturnType EnumUtil::FromString(const char *value); template<> StatementType EnumUtil::FromString(const char *value); template<> StatisticsType EnumUtil::FromString(const char *value); template<> StatsInfo EnumUtil::FromString(const char *value); template<> StrTimeSpecifier EnumUtil::FromString(const char *value); template<> StreamExecutionResult EnumUtil::FromString(const char *value); template<> SubqueryType EnumUtil::FromString(const char *value); template<> TableColumnType EnumUtil::FromString(const char *value); template<> TableFilterType EnumUtil::FromString(const char *value); template<> TablePartitionInfo EnumUtil::FromString(const char *value); template<> TableReferenceType EnumUtil::FromString(const char *value); template<> TableScanType EnumUtil::FromString(const char *value); template<> TaskExecutionMode EnumUtil::FromString(const char *value); template<> TaskExecutionResult EnumUtil::FromString(const char *value); template<> TemporaryBufferSize EnumUtil::FromString(const char *value); template<> TemporaryCompressionLevel EnumUtil::FromString(const char *value); template<> ThreadPinMode EnumUtil::FromString(const char *value); template<> TimestampCastResult EnumUtil::FromString(const char *value); template<> TransactionModifierType EnumUtil::FromString(const char *value); template<> TransactionType EnumUtil::FromString(const char *value); template<> TupleDataNestednessType EnumUtil::FromString(const char *value); template<> TupleDataPinProperties EnumUtil::FromString(const char *value); template<> TupleDataValidityType EnumUtil::FromString(const char *value); template<> UndoFlags EnumUtil::FromString(const char *value); template<> UnionInvalidReason EnumUtil::FromString(const char *value); template<> VariantChildLookupMode EnumUtil::FromString(const char *value); template<> VariantLogicalType EnumUtil::FromString(const char *value); template<> VectorAuxiliaryDataType EnumUtil::FromString(const char *value); template<> VectorBufferType EnumUtil::FromString(const char *value); template<> VectorType EnumUtil::FromString(const char *value); template<> VerificationType EnumUtil::FromString(const char *value); template<> VerifyExistenceType EnumUtil::FromString(const char *value); template<> WALType EnumUtil::FromString(const char *value); template<> WindowAggregationMode EnumUtil::FromString(const char *value); template<> WindowBoundary EnumUtil::FromString(const char *value); template<> WindowExcludeMode EnumUtil::FromString(const char *value); template<> WindowMergeSortStage EnumUtil::FromString(const char *value); } //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/on_create_conflict.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class OnCreateConflict : uint8_t { // Standard: throw error ERROR_ON_CONFLICT, // CREATE IF NOT EXISTS, silently do nothing on conflict IGNORE_ON_CONFLICT, // CREATE OR REPLACE REPLACE_ON_CONFLICT, // Update on conflict - only support for functions. Add a function overload if the function already exists. ALTER_ON_CONFLICT }; } // namespace duckdb namespace duckdb { struct AlterInfo; struct CreateInfo : public ParseInfo { public: static constexpr const ParseInfoType TYPE = ParseInfoType::CREATE_INFO; public: explicit CreateInfo(CatalogType type, string schema = DEFAULT_SCHEMA, string catalog_p = INVALID_CATALOG) : ParseInfo(TYPE), type(type), catalog(std::move(catalog_p)), schema(std::move(schema)), on_conflict(OnCreateConflict::ERROR_ON_CONFLICT), temporary(false), internal(false) { } ~CreateInfo() override { } //! The to-be-created catalog type CatalogType type; //! The catalog name of the entry string catalog; //! The schema name of the entry string schema; //! What to do on create conflict OnCreateConflict on_conflict; //! Whether or not the entry is temporary bool temporary; //! Whether or not the entry is an internal entry bool internal; //! The SQL string of the CREATE statement string sql; //! The inherent dependencies of the created entry LogicalDependencyList dependencies; //! User provided comment Value comment; //! Key-value tags with additional metadata InsertionOrderPreservingMap tags; public: void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); virtual unique_ptr Copy() const = 0; DUCKDB_API void CopyProperties(CreateInfo &other) const; //! Generates an alter statement from the create statement - used for OnCreateConflict::ALTER_ON_CONFLICT DUCKDB_API virtual unique_ptr GetAlterInfo() const; //! Returns a string like "CREATE (OR REPLACE) (TEMPORARY) (IF NOT EXISTS) " for TABLE/VIEW/TYPE/MACRO DUCKDB_API string GetCreatePrefix(const string &entry) const; virtual string ToString() const { throw NotImplementedException("ToString not supported for this type of CreateInfo: '%s'", EnumUtil::ToString(info_type)); } }; } // namespace duckdb namespace duckdb { enum class SequenceInfo : uint8_t { // Sequence start SEQ_START, // Sequence increment SEQ_INC, // Sequence minimum value SEQ_MIN, // Sequence maximum value SEQ_MAX, // Sequence cycle option SEQ_CYCLE, // Sequence owner table SEQ_OWN }; struct CreateSequenceInfo : public CreateInfo { CreateSequenceInfo(); //! Sequence name to create string name; //! Usage count of the sequence uint64_t usage_count; //! The increment value int64_t increment; //! The minimum value of the sequence int64_t min_value; //! The maximum value of the sequence int64_t max_value; //! The start value of the sequence int64_t start_value; //! Whether or not the sequence cycles bool cycle; public: unique_ptr Copy() const override; public: DUCKDB_API void Serialize(Serializer &serializer) const override; DUCKDB_API static unique_ptr Deserialize(Deserializer &deserializer); string ToString() const override; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/alter_table_info.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/parsed_data/alter_info.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class AlterType : uint8_t { INVALID = 0, ALTER_TABLE = 1, ALTER_VIEW = 2, ALTER_SEQUENCE = 3, CHANGE_OWNERSHIP = 4, ALTER_SCALAR_FUNCTION = 5, ALTER_TABLE_FUNCTION = 6, SET_COMMENT = 7, SET_COLUMN_COMMENT = 8 }; struct AlterEntryData { AlterEntryData() { } AlterEntryData(string catalog_p, string schema_p, string name_p, OnEntryNotFound if_not_found) : catalog(std::move(catalog_p)), schema(std::move(schema_p)), name(std::move(name_p)), if_not_found(if_not_found) { } string catalog; string schema; string name; OnEntryNotFound if_not_found; }; struct AlterInfo : public ParseInfo { public: static constexpr const ParseInfoType TYPE = ParseInfoType::ALTER_INFO; public: AlterInfo(AlterType type, string catalog, string schema, string name, OnEntryNotFound if_not_found); ~AlterInfo() override; AlterType type; //! if exists OnEntryNotFound if_not_found; //! Catalog name to alter string catalog; //! Schema name to alter string schema; //! Entry name to alter string name; //! Allow altering internal entries bool allow_internal; public: virtual CatalogType GetCatalogType() const = 0; virtual unique_ptr Copy() const = 0; virtual string ToString() const = 0; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); virtual string GetColumnName() const { return ""; }; AlterEntryData GetAlterEntryData() const; bool IsAddPrimaryKey() const; protected: explicit AlterInfo(AlterType type); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/parser/constraint.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class Serializer; class Deserializer; //===--------------------------------------------------------------------===// // Constraint Types //===--------------------------------------------------------------------===// enum class ConstraintType : uint8_t { INVALID = 0, // invalid constraint type NOT_NULL = 1, // NOT NULL constraint CHECK = 2, // CHECK constraint UNIQUE = 3, // UNIQUE constraint FOREIGN_KEY = 4, // FOREIGN KEY constraint }; enum class ForeignKeyType : uint8_t { FK_TYPE_PRIMARY_KEY_TABLE = 0, // main table FK_TYPE_FOREIGN_KEY_TABLE = 1, // referencing table FK_TYPE_SELF_REFERENCE_TABLE = 2 // self refrencing table }; struct ForeignKeyInfo { ForeignKeyType type; string schema; //! if type is FK_TYPE_FOREIGN_KEY_TABLE, means main key table, if type is FK_TYPE_PRIMARY_KEY_TABLE, means foreign //! key table string table; //! The set of main key table's column's index vector pk_keys; //! The set of foreign key table's column's index vector fk_keys; bool IsDeleteConstraint() const { return type == ForeignKeyType::FK_TYPE_PRIMARY_KEY_TABLE || type == ForeignKeyType::FK_TYPE_SELF_REFERENCE_TABLE; } bool IsAppendConstraint() const { return type == ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE || type == ForeignKeyType::FK_TYPE_SELF_REFERENCE_TABLE; } }; //! Constraint is the base class of any type of table constraint. class Constraint { public: DUCKDB_API explicit Constraint(ConstraintType type); DUCKDB_API virtual ~Constraint(); ConstraintType type; public: DUCKDB_API virtual string ToString() const = 0; DUCKDB_API void Print() const; DUCKDB_API virtual unique_ptr Copy() const = 0; DUCKDB_API virtual void Serialize(Serializer &serializer) const; DUCKDB_API static unique_ptr Deserialize(Deserializer &deserializer); public: template TARGET &Cast() { if (type != TARGET::TYPE) { throw InternalException("Failed to cast constraint to type - constraint type mismatch"); } return reinterpret_cast(*this); } template const TARGET &Cast() const { if (type != TARGET::TYPE) { throw InternalException("Failed to cast constraint to type - constraint type mismatch"); } return reinterpret_cast(*this); } }; } // namespace duckdb namespace duckdb { enum class AlterForeignKeyType : uint8_t { AFT_ADD = 0, AFT_DELETE = 1 }; //===--------------------------------------------------------------------===// // Change Ownership //===--------------------------------------------------------------------===// struct ChangeOwnershipInfo : public AlterInfo { ChangeOwnershipInfo(CatalogType entry_catalog_type, string entry_catalog, string entry_schema, string entry_name, string owner_schema, string owner_name, OnEntryNotFound if_not_found); // Catalog type refers to the entry type, since this struct is usually built from an // ALTER . OWNED BY . statement // here it is only possible to know the type of who is to be owned CatalogType entry_catalog_type; string owner_schema; string owner_name; public: CatalogType GetCatalogType() const override; unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); explicit ChangeOwnershipInfo(); }; //===--------------------------------------------------------------------===// // Set Comment //===--------------------------------------------------------------------===// struct SetCommentInfo : public AlterInfo { SetCommentInfo(CatalogType entry_catalog_type, string entry_catalog, string entry_schema, string entry_name, Value new_comment_value_p, OnEntryNotFound if_not_found); CatalogType entry_catalog_type; Value comment_value; public: CatalogType GetCatalogType() const override; unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); explicit SetCommentInfo(); }; //===--------------------------------------------------------------------===// // Alter Table //===--------------------------------------------------------------------===// enum class AlterTableType : uint8_t { INVALID = 0, RENAME_COLUMN = 1, RENAME_TABLE = 2, ADD_COLUMN = 3, REMOVE_COLUMN = 4, ALTER_COLUMN_TYPE = 5, SET_DEFAULT = 6, FOREIGN_KEY_CONSTRAINT = 7, SET_NOT_NULL = 8, DROP_NOT_NULL = 9, SET_COLUMN_COMMENT = 10, ADD_CONSTRAINT = 11, SET_PARTITIONED_BY = 12, SET_SORTED_BY = 13, ADD_FIELD = 14, REMOVE_FIELD = 15, RENAME_FIELD = 16 }; struct AlterTableInfo : public AlterInfo { AlterTableInfo(AlterTableType type, AlterEntryData data); ~AlterTableInfo() override; AlterTableType alter_table_type; public: CatalogType GetCatalogType() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); protected: explicit AlterTableInfo(AlterTableType type); }; //===--------------------------------------------------------------------===// // RenameColumnInfo //===--------------------------------------------------------------------===// struct RenameColumnInfo : public AlterTableInfo { RenameColumnInfo(AlterEntryData data, string old_name_p, string new_name_p); ~RenameColumnInfo() override; //! Column old name string old_name; //! Column new name string new_name; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: RenameColumnInfo(); }; //===--------------------------------------------------------------------===// // RenameFieldInfo //===--------------------------------------------------------------------===// struct RenameFieldInfo : public AlterTableInfo { RenameFieldInfo(AlterEntryData data, vector column_path, string new_name_p); ~RenameFieldInfo() override; //! Path to source field. vector column_path; //! New name of the column (field). string new_name; public: unique_ptr Copy() const override; string ToString() const override; string GetColumnName() const override { return column_path[0]; } void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: RenameFieldInfo(); }; //===--------------------------------------------------------------------===// // RenameTableInfo //===--------------------------------------------------------------------===// struct RenameTableInfo : public AlterTableInfo { RenameTableInfo(AlterEntryData data, string new_name); ~RenameTableInfo() override; //! Relation new name string new_table_name; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: RenameTableInfo(); }; //===--------------------------------------------------------------------===// // AddColumnInfo //===--------------------------------------------------------------------===// struct AddColumnInfo : public AlterTableInfo { AddColumnInfo(AlterEntryData data, ColumnDefinition new_column, bool if_column_not_exists); ~AddColumnInfo() override; //! New column ColumnDefinition new_column; //! Whether or not an error should be thrown if the column exist bool if_column_not_exists; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: explicit AddColumnInfo(ColumnDefinition new_column); }; //===--------------------------------------------------------------------===// // AddFieldInfo //===--------------------------------------------------------------------===// struct AddFieldInfo : public AlterTableInfo { AddFieldInfo(AlterEntryData data, vector column_path, ColumnDefinition new_field, bool if_field_not_exists); ~AddFieldInfo() override; //! Path to source field. vector column_path; //! New field to add. ColumnDefinition new_field; //! Whether or not an error should be thrown if the field does not exist. bool if_field_not_exists; public: unique_ptr Copy() const override; string ToString() const override; string GetColumnName() const override { return column_path[0]; } void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: explicit AddFieldInfo(ColumnDefinition new_column); }; //===--------------------------------------------------------------------===// // RemoveColumnInfo //===--------------------------------------------------------------------===// struct RemoveColumnInfo : public AlterTableInfo { RemoveColumnInfo(AlterEntryData data, string removed_column, bool if_column_exists, bool cascade); ~RemoveColumnInfo() override; //! The column to remove string removed_column; //! Whether or not an error should be thrown if the column does not exist bool if_column_exists; //! Whether or not the column should be removed if a dependency conflict arises (used by GENERATED columns) bool cascade; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); string GetColumnName() const override { return removed_column; } private: RemoveColumnInfo(); }; //===--------------------------------------------------------------------===// // RemoveFieldInfo //===--------------------------------------------------------------------===// struct RemoveFieldInfo : public AlterTableInfo { RemoveFieldInfo(AlterEntryData data, vector column_path, bool if_column_exists, bool cascade); ~RemoveFieldInfo() override; //! Path to source field. vector column_path; //! Whether or not an error should be thrown if the column does not exist. bool if_column_exists; //! Whether or not the column should be removed if a dependency conflict arises (used by GENERATED columns). bool cascade; public: unique_ptr Copy() const override; string ToString() const override; string GetColumnName() const override { return column_path[0]; } void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: RemoveFieldInfo(); }; //===--------------------------------------------------------------------===// // ChangeColumnTypeInfo //===--------------------------------------------------------------------===// struct ChangeColumnTypeInfo : public AlterTableInfo { ChangeColumnTypeInfo(AlterEntryData data, string column_name, LogicalType target_type, unique_ptr expression); ~ChangeColumnTypeInfo() override; //! The column name to alter string column_name; //! The target type of the column LogicalType target_type; //! The expression used for data conversion unique_ptr expression; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); string GetColumnName() const override { return column_name; }; private: ChangeColumnTypeInfo(); }; //===--------------------------------------------------------------------===// // SetDefaultInfo //===--------------------------------------------------------------------===// struct SetDefaultInfo : public AlterTableInfo { SetDefaultInfo(AlterEntryData data, string column_name, unique_ptr new_default); ~SetDefaultInfo() override; //! The column name to alter string column_name; //! The expression used for data conversion unique_ptr expression; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: SetDefaultInfo(); }; //===--------------------------------------------------------------------===// // AlterForeignKeyInfo //===--------------------------------------------------------------------===// struct AlterForeignKeyInfo : public AlterTableInfo { AlterForeignKeyInfo(AlterEntryData data, string fk_table, vector pk_columns, vector fk_columns, vector pk_keys, vector fk_keys, AlterForeignKeyType type); ~AlterForeignKeyInfo() override; string fk_table; vector pk_columns; vector fk_columns; vector pk_keys; vector fk_keys; AlterForeignKeyType type; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: AlterForeignKeyInfo(); }; //===--------------------------------------------------------------------===// // SetNotNullInfo //===--------------------------------------------------------------------===// struct SetNotNullInfo : public AlterTableInfo { SetNotNullInfo(AlterEntryData data, string column_name); ~SetNotNullInfo() override; //! The column name to alter string column_name; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: SetNotNullInfo(); }; //===--------------------------------------------------------------------===// // DropNotNullInfo //===--------------------------------------------------------------------===// struct DropNotNullInfo : public AlterTableInfo { DropNotNullInfo(AlterEntryData data, string column_name); ~DropNotNullInfo() override; //! The column name to alter string column_name; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: DropNotNullInfo(); }; //===--------------------------------------------------------------------===// // Alter View //===--------------------------------------------------------------------===// enum class AlterViewType : uint8_t { INVALID = 0, RENAME_VIEW = 1 }; struct AlterViewInfo : public AlterInfo { AlterViewInfo(AlterViewType type, AlterEntryData data); ~AlterViewInfo() override; AlterViewType alter_view_type; public: CatalogType GetCatalogType() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); protected: explicit AlterViewInfo(AlterViewType type); }; //===--------------------------------------------------------------------===// // RenameViewInfo //===--------------------------------------------------------------------===// struct RenameViewInfo : public AlterViewInfo { RenameViewInfo(AlterEntryData data, string new_name); ~RenameViewInfo() override; //! Relation new name string new_view_name; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: RenameViewInfo(); }; //===--------------------------------------------------------------------===// // AddConstraintInfo //===--------------------------------------------------------------------===// struct AddConstraintInfo : public AlterTableInfo { AddConstraintInfo(AlterEntryData data, unique_ptr constraint); ~AddConstraintInfo() override; //! The constraint to add. unique_ptr constraint; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: AddConstraintInfo(); }; //===--------------------------------------------------------------------===// // SetPartitionedByInfo //===--------------------------------------------------------------------===// struct SetPartitionedByInfo : public AlterTableInfo { SetPartitionedByInfo(AlterEntryData data, vector> partition_keys); ~SetPartitionedByInfo() override; //! The partition keys vector> partition_keys; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: SetPartitionedByInfo(); }; //===--------------------------------------------------------------------===// // SetSortedByInfo //===--------------------------------------------------------------------===// struct SetSortedByInfo : public AlterTableInfo { SetSortedByInfo(AlterEntryData data, vector orders); ~SetSortedByInfo() override; //! The sort keys vector orders; public: unique_ptr Copy() const override; string ToString() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); private: SetSortedByInfo(); }; } // namespace duckdb namespace duckdb { class DuckTransaction; class SequenceCatalogEntry; struct SequenceValue { SequenceCatalogEntry *entry; uint64_t usage_count; int64_t counter; }; struct SequenceData { explicit SequenceData(CreateSequenceInfo &info); //! The amount of times the sequence has been used uint64_t usage_count; //! The sequence counter int64_t counter; //! The most recently returned value int64_t last_value; //! The increment value int64_t increment; //! The minimum value of the sequence int64_t start_value; //! The minimum value of the sequence int64_t min_value; //! The maximum value of the sequence int64_t max_value; //! Whether or not the sequence cycles bool cycle; }; //! A sequence catalog entry class SequenceCatalogEntry : public StandardEntry { public: static constexpr const CatalogType Type = CatalogType::SEQUENCE_ENTRY; static constexpr const char *Name = "sequence"; public: //! Create a real TableCatalogEntry and initialize storage for it SequenceCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateSequenceInfo &info); public: unique_ptr Copy(ClientContext &context) const override; unique_ptr GetInfo() const override; SequenceData GetData() const; int64_t CurrentValue(); int64_t NextValue(DuckTransaction &transaction); void ReplayValue(uint64_t usage_count, int64_t counter); string ToSQL() const override; private: //! Lock for getting a value on the sequence mutable mutex lock; //! Sequence data SequenceData data; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/transaction/transaction_data.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class DuckTransaction; class Transaction; struct TransactionData { TransactionData(DuckTransaction &transaction_p); // NOLINT: allow implicit conversion TransactionData(transaction_t transaction_id_p, transaction_t start_time_p); optional_ptr transaction; transaction_t transaction_id; transaction_t start_time; }; } // namespace duckdb namespace duckdb { class SequenceCatalogEntry; class SchemaCatalogEntry; class AttachedDatabase; class ColumnData; class ClientContext; class CatalogEntry; class DataTable; class DatabaseInstance; class LocalStorage; class MetaTransaction; class TransactionManager; class WriteAheadLog; class ChunkVectorInfo; struct DeleteInfo; struct UpdateInfo; //! The transaction object holds information about a currently running or past //! transaction class Transaction { public: DUCKDB_API Transaction(TransactionManager &manager, ClientContext &context); DUCKDB_API virtual ~Transaction(); TransactionManager &manager; weak_ptr context; //! The current active query for the transaction. Set to MAXIMUM_QUERY_ID if //! no query is active. atomic active_query; public: DUCKDB_API static Transaction &Get(ClientContext &context, AttachedDatabase &db); DUCKDB_API static Transaction &Get(ClientContext &context, Catalog &catalog); //! Returns the transaction for the given context if it has already been started DUCKDB_API static optional_ptr TryGet(ClientContext &context, AttachedDatabase &db); //! Whether or not the transaction has made any modifications to the database so far DUCKDB_API bool IsReadOnly(); //! Promotes the transaction to a read-write transaction DUCKDB_API virtual void SetReadWrite(); virtual bool IsDuckTransaction() const { return false; } public: template TARGET &Cast() { DynamicCastCheck(this); return reinterpret_cast(*this); } template const TARGET &Cast() const { DynamicCastCheck(this); return reinterpret_cast(*this); } private: bool is_read_only; }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/catalog/similar_catalog_entry.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { class SchemaCatalogEntry; //! Return value of SimilarEntryInSchemas struct SimilarCatalogEntry { //! The entry name. Empty if absent string name; //! The similarity score of the given name (between 0.0 and 1.0, higher is better) double score = 0.0; //! The schema of the entry. optional_ptr schema; bool Found() const { return !name.empty(); } DUCKDB_API string GetQualifiedName(bool qualify_catalog, bool qualify_schema) const; }; } // namespace duckdb #include #include namespace duckdb { struct AlterInfo; class ClientContext; class LogicalDependencyList; class DuckCatalog; class TableCatalogEntry; class SequenceCatalogEntry; class CatalogEntryMap { public: CatalogEntryMap() { } public: void AddEntry(unique_ptr entry); void UpdateEntry(unique_ptr entry); void DropEntry(CatalogEntry &entry); case_insensitive_tree_t> &Entries(); optional_ptr GetEntry(const string &name); private: //! Mapping of string to catalog entry case_insensitive_tree_t> entries; }; //! The Catalog Set stores (key, value) map of a set of CatalogEntries class CatalogSet { public: struct EntryLookup { enum class FailureReason { SUCCESS, DELETED, NOT_PRESENT, INVISIBLE }; optional_ptr result; FailureReason reason; }; public: DUCKDB_API explicit CatalogSet(Catalog &catalog, unique_ptr defaults = nullptr); ~CatalogSet(); //! Create an entry in the catalog set. Returns whether or not it was //! successful. DUCKDB_API bool CreateEntry(CatalogTransaction transaction, const string &name, unique_ptr value, const LogicalDependencyList &dependencies); DUCKDB_API bool CreateEntry(ClientContext &context, const string &name, unique_ptr value, const LogicalDependencyList &dependencies); DUCKDB_API bool AlterEntry(CatalogTransaction transaction, const string &name, AlterInfo &alter_info); DUCKDB_API bool DropEntry(CatalogTransaction transaction, const string &name, bool cascade, bool allow_drop_internal = false); DUCKDB_API bool DropEntry(ClientContext &context, const string &name, bool cascade, bool allow_drop_internal = false); //! Verify that the entry referenced by the dependency is still alive DUCKDB_API void VerifyExistenceOfDependency(transaction_t commit_id, CatalogEntry &entry); //! Verify we can still drop the entry while committing DUCKDB_API void CommitDrop(transaction_t commit_id, transaction_t start_time, CatalogEntry &entry); DUCKDB_API DuckCatalog &GetCatalog(); bool AlterOwnership(CatalogTransaction transaction, ChangeOwnershipInfo &info); void CleanupEntry(CatalogEntry &catalog_entry); //! Returns the entry with the specified name DUCKDB_API EntryLookup GetEntryDetailed(CatalogTransaction transaction, const string &name); DUCKDB_API optional_ptr GetEntry(CatalogTransaction transaction, const string &name); DUCKDB_API optional_ptr GetEntry(ClientContext &context, const string &name); //! Gets the entry that is most similar to the given name (i.e. smallest levenshtein distance), or empty string if //! none is found. The returned pair consists of the entry name and the distance (smaller means closer). SimilarCatalogEntry SimilarEntry(CatalogTransaction transaction, const string &name); //! Rollback to be the currently valid entry for a certain catalog //! entry void Undo(CatalogEntry &entry); //! Scan the catalog set, invoking the callback method for every committed entry DUCKDB_API void Scan(const std::function &callback); //! Scan the catalog set, invoking the callback method for every entry DUCKDB_API void ScanWithPrefix(CatalogTransaction transaction, const std::function &callback, const string &prefix); DUCKDB_API void Scan(CatalogTransaction transaction, const std::function &callback); DUCKDB_API void ScanWithReturn(CatalogTransaction transaction, const std::function &callback); DUCKDB_API void Scan(ClientContext &context, const std::function &callback); DUCKDB_API void ScanWithReturn(ClientContext &context, const std::function &callback); template vector> GetEntries(CatalogTransaction transaction) { vector> result; Scan(transaction, [&](CatalogEntry &entry) { result.push_back(entry.Cast()); }); return result; } DUCKDB_API bool CreatedByOtherActiveTransaction(CatalogTransaction transaction, transaction_t timestamp); DUCKDB_API bool CommittedAfterStarting(CatalogTransaction transaction, transaction_t timestamp); DUCKDB_API bool HasConflict(CatalogTransaction transaction, transaction_t timestamp); DUCKDB_API bool UseTimestamp(CatalogTransaction transaction, transaction_t timestamp); static bool IsCommitted(transaction_t timestamp); static void UpdateTimestamp(CatalogEntry &entry, transaction_t timestamp); mutex &GetCatalogLock() { return catalog_lock; } void Verify(Catalog &catalog); //! Override the default generator - this should not be used after the catalog set has been used void SetDefaultGenerator(unique_ptr defaults); private: bool DropDependencies(CatalogTransaction transaction, const string &name, bool cascade, bool allow_drop_internal = false); //! Given a root entry, gets the entry valid for this transaction, 'visible' is used to indicate whether the entry //! is actually visible to the transaction CatalogEntry &GetEntryForTransaction(CatalogTransaction transaction, CatalogEntry ¤t, bool &visible); //! Given a root entry, gets the entry valid for this transaction CatalogEntry &GetEntryForTransaction(CatalogTransaction transaction, CatalogEntry ¤t); CatalogEntry &GetCommittedEntry(CatalogEntry ¤t); optional_ptr GetEntryInternal(CatalogTransaction transaction, const string &name); optional_ptr CreateCommittedEntry(unique_ptr entry); //! Create all default entries void CreateDefaultEntries(CatalogTransaction transaction, unique_lock &lock); //! Attempt to create a default entry with the specified name. Returns the entry if successful, nullptr otherwise. optional_ptr CreateDefaultEntry(CatalogTransaction transaction, const string &name, unique_lock &lock); bool DropEntryInternal(CatalogTransaction transaction, const string &name, bool allow_drop_internal = false); bool CreateEntryInternal(CatalogTransaction transaction, const string &name, unique_ptr value, unique_lock &read_lock, bool should_be_empty = true); void CheckCatalogEntryInvariants(CatalogEntry &value, const string &name); //! Verify that the previous entry in the chain is dropped. bool VerifyVacancy(CatalogTransaction transaction, CatalogEntry &entry); //! Start the catalog entry chain with a dummy node bool StartChain(CatalogTransaction transaction, const string &name, unique_lock &read_lock); bool RenameEntryInternal(CatalogTransaction transaction, CatalogEntry &old, const string &new_name, AlterInfo &alter_info, unique_lock &read_lock); private: DuckCatalog &catalog; //! The catalog lock is used to make changes to the data mutex catalog_lock; CatalogEntryMap map; //! The generator used to generate default internal entries unique_ptr defaults; }; } // namespace duckdb namespace duckdb { class ClientContext; class StandardEntry; class TableCatalogEntry; class TableFunctionCatalogEntry; class SequenceCatalogEntry; enum class OnCreateConflict : uint8_t; struct AlterTableInfo; struct CreateIndexInfo; struct CreateFunctionInfo; struct CreateCollationInfo; struct CreateViewInfo; struct BoundCreateTableInfo; struct CreatePragmaFunctionInfo; struct CreateSequenceInfo; struct CreateSchemaInfo; struct CreateTableFunctionInfo; struct CreateCopyFunctionInfo; struct CreateTypeInfo; struct DropInfo; //! A schema in the catalog class SchemaCatalogEntry : public InCatalogEntry { public: static constexpr const CatalogType Type = CatalogType::SCHEMA_ENTRY; static constexpr const char *Name = "schema"; public: SchemaCatalogEntry(Catalog &catalog, CreateSchemaInfo &info); public: unique_ptr GetInfo() const override; //! Scan the specified catalog set, invoking the callback method for every entry virtual void Scan(ClientContext &context, CatalogType type, const std::function &callback) = 0; //! Scan the specified catalog set, invoking the callback method for every committed entry virtual void Scan(CatalogType type, const std::function &callback) = 0; string ToSQL() const override; //! Creates an index with the given name in the schema virtual optional_ptr CreateIndex(CatalogTransaction transaction, CreateIndexInfo &info, TableCatalogEntry &table) = 0; optional_ptr CreateIndex(ClientContext &context, CreateIndexInfo &info, TableCatalogEntry &table); //! Create a scalar or aggregate function within the given schema virtual optional_ptr CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info) = 0; //! Creates a table with the given name in the schema virtual optional_ptr CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info) = 0; //! Creates a view with the given name in the schema virtual optional_ptr CreateView(CatalogTransaction transaction, CreateViewInfo &info) = 0; //! Creates a sequence with the given name in the schema virtual optional_ptr CreateSequence(CatalogTransaction transaction, CreateSequenceInfo &info) = 0; //! Create a table function within the given schema virtual optional_ptr CreateTableFunction(CatalogTransaction transaction, CreateTableFunctionInfo &info) = 0; //! Create a copy function within the given schema virtual optional_ptr CreateCopyFunction(CatalogTransaction transaction, CreateCopyFunctionInfo &info) = 0; //! Create a pragma function within the given schema virtual optional_ptr CreatePragmaFunction(CatalogTransaction transaction, CreatePragmaFunctionInfo &info) = 0; //! Create a collation within the given schema virtual optional_ptr CreateCollation(CatalogTransaction transaction, CreateCollationInfo &info) = 0; //! Create a enum within the given schema virtual optional_ptr CreateType(CatalogTransaction transaction, CreateTypeInfo &info) = 0; //! Lookup an entry in the schema DUCKDB_API virtual optional_ptr LookupEntry(CatalogTransaction transaction, const EntryLookupInfo &lookup_info) = 0; DUCKDB_API virtual CatalogSet::EntryLookup LookupEntryDetailed(CatalogTransaction transaction, const EntryLookupInfo &lookup_info); DUCKDB_API virtual SimilarCatalogEntry GetSimilarEntry(CatalogTransaction transaction, const EntryLookupInfo &lookup_info); DUCKDB_API optional_ptr GetEntry(CatalogTransaction transaction, CatalogType type, const string &name); //! Drops an entry from the schema virtual void DropEntry(ClientContext &context, DropInfo &info) = 0; //! Alters a catalog entry virtual void Alter(CatalogTransaction transaction, AlterInfo &info) = 0; CatalogTransaction GetCatalogTransaction(ClientContext &context); }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/deque.hpp // // //===----------------------------------------------------------------------===// #include namespace duckdb { using std::deque; } //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/prepared_statement_mode.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class PreparedStatementMode : uint8_t { PREPARE_ONLY, PREPARE_AND_EXECUTE, }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/main/client_config.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/enums/output_type.hpp // // //===----------------------------------------------------------------------===// namespace duckdb { enum class ExplainOutputType : uint8_t { ALL = 0, OPTIMIZED_ONLY = 1, PHYSICAL_ONLY = 2 }; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB // // duckdb/common/progress_bar/progress_bar.hpp // // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // // DuckDB // // duckdb.h // // //===----------------------------------------------------------------------===// // // !!!!!!! // WARNING: this file is autogenerated by scripts/generate_c_api.py, manual changes will be overwritten // !!!!!!! #ifndef DUCKDB_C_API #ifdef _WIN32 #ifdef DUCKDB_STATIC_BUILD #define DUCKDB_C_API #else #if defined(DUCKDB_BUILD_LIBRARY) && !defined(DUCKDB_BUILD_LOADABLE_EXTENSION) #define DUCKDB_C_API __declspec(dllexport) #else #define DUCKDB_C_API __declspec(dllimport) #endif #endif #else #define DUCKDB_C_API #endif #endif //! duplicate of duckdb/main/winapi.hpp #ifndef DUCKDB_EXTENSION_API #ifdef _WIN32 #ifdef DUCKDB_STATIC_BUILD #define DUCKDB_EXTENSION_API #else #define DUCKDB_EXTENSION_API __declspec(dllexport) #endif #else #define DUCKDB_EXTENSION_API __attribute__((visibility("default"))) #endif #endif #include #include #include #ifdef __cplusplus extern "C" { #endif //===--------------------------------------------------------------------===// // Enums //===--------------------------------------------------------------------===// //! WARNING: The numbers of these enums should not be changed, as changing the numbers breaks ABI compatibility. //! Always add enums at the END of the enum //! An enum over DuckDB's internal types. typedef enum DUCKDB_TYPE { DUCKDB_TYPE_INVALID = 0, // bool DUCKDB_TYPE_BOOLEAN = 1, // int8_t DUCKDB_TYPE_TINYINT = 2, // int16_t DUCKDB_TYPE_SMALLINT = 3, // int32_t DUCKDB_TYPE_INTEGER = 4, // int64_t DUCKDB_TYPE_BIGINT = 5, // uint8_t DUCKDB_TYPE_UTINYINT = 6, // uint16_t DUCKDB_TYPE_USMALLINT = 7, // uint32_t DUCKDB_TYPE_UINTEGER = 8, // uint64_t DUCKDB_TYPE_UBIGINT = 9, // float DUCKDB_TYPE_FLOAT = 10, // double DUCKDB_TYPE_DOUBLE = 11, // duckdb_timestamp (microseconds) DUCKDB_TYPE_TIMESTAMP = 12, // duckdb_date DUCKDB_TYPE_DATE = 13, // duckdb_time DUCKDB_TYPE_TIME = 14, // duckdb_interval DUCKDB_TYPE_INTERVAL = 15, // duckdb_hugeint DUCKDB_TYPE_HUGEINT = 16, // duckdb_uhugeint DUCKDB_TYPE_UHUGEINT = 32, // const char* DUCKDB_TYPE_VARCHAR = 17, // duckdb_blob DUCKDB_TYPE_BLOB = 18, // duckdb_decimal DUCKDB_TYPE_DECIMAL = 19, // duckdb_timestamp_s (seconds) DUCKDB_TYPE_TIMESTAMP_S = 20, // duckdb_timestamp_ms (milliseconds) DUCKDB_TYPE_TIMESTAMP_MS = 21, // duckdb_timestamp_ns (nanoseconds) DUCKDB_TYPE_TIMESTAMP_NS = 22, // enum type, only useful as logical type DUCKDB_TYPE_ENUM = 23, // list type, only useful as logical type DUCKDB_TYPE_LIST = 24, // struct type, only useful as logical type DUCKDB_TYPE_STRUCT = 25, // map type, only useful as logical type DUCKDB_TYPE_MAP = 26, // duckdb_array, only useful as logical type DUCKDB_TYPE_ARRAY = 33, // duckdb_hugeint DUCKDB_TYPE_UUID = 27, // union type, only useful as logical type DUCKDB_TYPE_UNION = 28, // duckdb_bit DUCKDB_TYPE_BIT = 29, // duckdb_time_tz DUCKDB_TYPE_TIME_TZ = 30, // duckdb_timestamp (microseconds) DUCKDB_TYPE_TIMESTAMP_TZ = 31, // enum type, only useful as logical type DUCKDB_TYPE_ANY = 34, // duckdb_bignum DUCKDB_TYPE_BIGNUM = 35, // enum type, only useful as logical type DUCKDB_TYPE_SQLNULL = 36, // enum type, only useful as logical type DUCKDB_TYPE_STRING_LITERAL = 37, // enum type, only useful as logical type DUCKDB_TYPE_INTEGER_LITERAL = 38, // duckdb_time_ns (nanoseconds) DUCKDB_TYPE_TIME_NS = 39, } duckdb_type; //! An enum over the returned state of different functions. typedef enum duckdb_state { DuckDBSuccess = 0, DuckDBError = 1 } duckdb_state; //! An enum over the pending state of a pending query result. typedef enum duckdb_pending_state { DUCKDB_PENDING_RESULT_READY = 0, DUCKDB_PENDING_RESULT_NOT_READY = 1, DUCKDB_PENDING_ERROR = 2, DUCKDB_PENDING_NO_TASKS_AVAILABLE = 3 } duckdb_pending_state; //! An enum over DuckDB's different result types. typedef enum duckdb_result_type { DUCKDB_RESULT_TYPE_INVALID = 0, DUCKDB_RESULT_TYPE_CHANGED_ROWS = 1, DUCKDB_RESULT_TYPE_NOTHING = 2, DUCKDB_RESULT_TYPE_QUERY_RESULT = 3, } duckdb_result_type; //! An enum over DuckDB's different statement types. typedef enum duckdb_statement_type { DUCKDB_STATEMENT_TYPE_INVALID = 0, DUCKDB_STATEMENT_TYPE_SELECT = 1, DUCKDB_STATEMENT_TYPE_INSERT = 2, DUCKDB_STATEMENT_TYPE_UPDATE = 3, DUCKDB_STATEMENT_TYPE_EXPLAIN = 4, DUCKDB_STATEMENT_TYPE_DELETE = 5, DUCKDB_STATEMENT_TYPE_PREPARE = 6, DUCKDB_STATEMENT_TYPE_CREATE = 7, DUCKDB_STATEMENT_TYPE_EXECUTE = 8, DUCKDB_STATEMENT_TYPE_ALTER = 9, DUCKDB_STATEMENT_TYPE_TRANSACTION = 10, DUCKDB_STATEMENT_TYPE_COPY = 11, DUCKDB_STATEMENT_TYPE_ANALYZE = 12, DUCKDB_STATEMENT_TYPE_VARIABLE_SET = 13, DUCKDB_STATEMENT_TYPE_CREATE_FUNC = 14, DUCKDB_STATEMENT_TYPE_DROP = 15, DUCKDB_STATEMENT_TYPE_EXPORT = 16, DUCKDB_STATEMENT_TYPE_PRAGMA = 17, DUCKDB_STATEMENT_TYPE_VACUUM = 18, DUCKDB_STATEMENT_TYPE_CALL = 19, DUCKDB_STATEMENT_TYPE_SET = 20, DUCKDB_STATEMENT_TYPE_LOAD = 21, DUCKDB_STATEMENT_TYPE_RELATION = 22, DUCKDB_STATEMENT_TYPE_EXTENSION = 23, DUCKDB_STATEMENT_TYPE_LOGICAL_PLAN = 24, DUCKDB_STATEMENT_TYPE_ATTACH = 25, DUCKDB_STATEMENT_TYPE_DETACH = 26, DUCKDB_STATEMENT_TYPE_MULTI = 27, } duckdb_statement_type; //! An enum over DuckDB's different error types. typedef enum duckdb_error_type { DUCKDB_ERROR_INVALID = 0, DUCKDB_ERROR_OUT_OF_RANGE = 1, DUCKDB_ERROR_CONVERSION = 2, DUCKDB_ERROR_UNKNOWN_TYPE = 3, DUCKDB_ERROR_DECIMAL = 4, DUCKDB_ERROR_MISMATCH_TYPE = 5, DUCKDB_ERROR_DIVIDE_BY_ZERO = 6, DUCKDB_ERROR_OBJECT_SIZE = 7, DUCKDB_ERROR_INVALID_TYPE = 8, DUCKDB_ERROR_SERIALIZATION = 9, DUCKDB_ERROR_TRANSACTION = 10, DUCKDB_ERROR_NOT_IMPLEMENTED = 11, DUCKDB_ERROR_EXPRESSION = 12, DUCKDB_ERROR_CATALOG = 13, DUCKDB_ERROR_PARSER = 14, DUCKDB_ERROR_PLANNER = 15, DUCKDB_ERROR_SCHEDULER = 16, DUCKDB_ERROR_EXECUTOR = 17, DUCKDB_ERROR_CONSTRAINT = 18, DUCKDB_ERROR_INDEX = 19, DUCKDB_ERROR_STAT = 20, DUCKDB_ERROR_CONNECTION = 21, DUCKDB_ERROR_SYNTAX = 22, DUCKDB_ERROR_SETTINGS = 23, DUCKDB_ERROR_BINDER = 24, DUCKDB_ERROR_NETWORK = 25, DUCKDB_ERROR_OPTIMIZER = 26, DUCKDB_ERROR_NULL_POINTER = 27, DUCKDB_ERROR_IO = 28, DUCKDB_ERROR_INTERRUPT = 29, DUCKDB_ERROR_FATAL = 30, DUCKDB_ERROR_INTERNAL = 31, DUCKDB_ERROR_INVALID_INPUT = 32, DUCKDB_ERROR_OUT_OF_MEMORY = 33, DUCKDB_ERROR_PERMISSION = 34, DUCKDB_ERROR_PARAMETER_NOT_RESOLVED = 35, DUCKDB_ERROR_PARAMETER_NOT_ALLOWED = 36, DUCKDB_ERROR_DEPENDENCY = 37, DUCKDB_ERROR_HTTP = 38, DUCKDB_ERROR_MISSING_EXTENSION = 39, DUCKDB_ERROR_AUTOLOAD = 40, DUCKDB_ERROR_SEQUENCE = 41, DUCKDB_INVALID_CONFIGURATION = 42 } duckdb_error_type; //! An enum over DuckDB's different cast modes. typedef enum duckdb_cast_mode { DUCKDB_CAST_NORMAL = 0, DUCKDB_CAST_TRY = 1 } duckdb_cast_mode; //===--------------------------------------------------------------------===// // General type definitions //===--------------------------------------------------------------------===// //! DuckDB's index type. typedef uint64_t idx_t; //! Type definition for the data pointers of selection vectors. typedef uint32_t sel_t; //! The callback to destroy data, e.g., //! bind data (if any), init data (if any), extra data for replacement scans (if any), etc. typedef void (*duckdb_delete_callback_t)(void *data); //! The callback to copy data, e.g., bind data (if any). typedef void *(*duckdb_copy_callback_t)(void *data); //! Used for threading, contains a task state. //! Must be destroyed with `duckdb_destroy_task_state`. typedef void *duckdb_task_state; //===--------------------------------------------------------------------===// // Types (no explicit freeing) //===--------------------------------------------------------------------===// //! DATE is stored as days since 1970-01-01. //! Use the `duckdb_from_date` and `duckdb_to_date` functions to extract individual information. typedef struct { int32_t days; } duckdb_date; typedef struct { int32_t year; int8_t month; int8_t day; } duckdb_date_struct; //! TIME is stored as microseconds since 00:00:00. //! Use the `duckdb_from_time` and `duckdb_to_time` functions to extract individual information. typedef struct { int64_t micros; } duckdb_time; typedef struct { int8_t hour; int8_t min; int8_t sec; int32_t micros; } duckdb_time_struct; //! TIME_NS is stored as nanoseconds since 00:00:00. typedef struct { int64_t nanos; } duckdb_time_ns; //! TIME_TZ is stored as 40 bits for the int64_t microseconds, and 24 bits for the int32_t offset. //! Use the `duckdb_from_time_tz` function to extract individual information. typedef struct { uint64_t bits; } duckdb_time_tz; typedef struct { duckdb_time_struct time; int32_t offset; } duckdb_time_tz_struct; //! TIMESTAMP is stored as microseconds since 1970-01-01. //! Use the `duckdb_from_timestamp` and `duckdb_to_timestamp` functions to extract individual information. typedef struct { int64_t micros; } duckdb_timestamp; typedef struct { duckdb_date_struct date; duckdb_time_struct time; } duckdb_timestamp_struct; //! TIMESTAMP_S is stored as seconds since 1970-01-01. typedef struct { int64_t seconds; } duckdb_timestamp_s; //! TIMESTAMP_MS is stored as milliseconds since 1970-01-01. typedef struct { int64_t millis; } duckdb_timestamp_ms; //! TIMESTAMP_NS is stored as nanoseconds since 1970-01-01. typedef struct { int64_t nanos; } duckdb_timestamp_ns; //! INTERVAL is stored in months, days, and micros. typedef struct { int32_t months; int32_t days; int64_t micros; } duckdb_interval; //! HUGEINT is composed of a lower and upper component. //! Its value is upper * 2^64 + lower. //! For simplified usage, use `duckdb_hugeint_to_double` and `duckdb_double_to_hugeint`. typedef struct { uint64_t lower; int64_t upper; } duckdb_hugeint; //! UHUGEINT is composed of a lower and upper component. //! Its value is upper * 2^64 + lower. //! For simplified usage, use `duckdb_uhugeint_to_double` and `duckdb_double_to_uhugeint`. typedef struct { uint64_t lower; uint64_t upper; } duckdb_uhugeint; //! DECIMAL is composed of a width and a scale. //! Their value is stored in a HUGEINT. typedef struct { uint8_t width; uint8_t scale; duckdb_hugeint value; } duckdb_decimal; //! A type holding information about the query execution progress. typedef struct { double percentage; uint64_t rows_processed; uint64_t total_rows_to_process; } duckdb_query_progress_type; //! The internal representation of a VARCHAR (string_t). If the VARCHAR does not //! exceed 12 characters, then we inline it. Otherwise, we inline a four-byte prefix for faster //! string comparisons and store a pointer to the remaining characters. This is a non- //! owning structure, i.e., it does not have to be freed. typedef struct { union { struct { uint32_t length; char prefix[4]; char *ptr; } pointer; struct { uint32_t length; char inlined[12]; } inlined; } value; } duckdb_string_t; //! DuckDB's LISTs are composed of a 'parent' vector holding metadata of each list, //! and a child vector holding the entries of the lists. //! The `duckdb_list_entry` struct contains the internal representation of a LIST metadata entry. //! A metadata entry contains the length of the list, and its offset in the child vector. typedef struct { uint64_t offset; uint64_t length; } duckdb_list_entry; //! A column consists of a pointer to its internal data. Don't operate on this type directly. //! Instead, use functions such as `duckdb_column_data`, `duckdb_nullmask_data`, //! `duckdb_column_type`, and `duckdb_column_name`. typedef struct { // Deprecated, use `duckdb_column_data`. void *deprecated_data; // Deprecated, use `duckdb_nullmask_data`. bool *deprecated_nullmask; // Deprecated, use `duckdb_column_type`. duckdb_type deprecated_type; // Deprecated, use `duckdb_column_name`. char *deprecated_name; void *internal_data; } duckdb_column; //! 1. A standalone vector that must be destroyed, or //! 2. A vector to a column in a data chunk that lives as long as the data chunk lives. typedef struct _duckdb_vector { void *internal_ptr; } * duckdb_vector; //! A selection vector is a vector of indices, which usually refer to values in a vector. //! Can be used to slice vectors, changing their length and the order of their entries. //! Standalone selection vectors must be destroyed. typedef struct _duckdb_selection_vector { void *internal_ptr; } * duckdb_selection_vector; //===--------------------------------------------------------------------===// // Types (explicit freeing/destroying) //===--------------------------------------------------------------------===// //! Strings are composed of a `char` pointer and a size. //! You must free `string.data` with `duckdb_free`. typedef struct { char *data; idx_t size; } duckdb_string; //! BLOBs are composed of a byte pointer and a size. //! You must free `blob.data` with `duckdb_free`. typedef struct { void *data; idx_t size; } duckdb_blob; //! BITs are composed of a byte pointer and a size. //! BIT byte data has 0 to 7 bits of padding. //! The first byte contains the number of padding bits. //! The padding bits of the second byte are set to 1, starting from the MSB. //! You must free `data` with `duckdb_free`. typedef struct { uint8_t *data; idx_t size; } duckdb_bit; //! BIGNUMs are composed of a byte pointer, a size, and an `is_negative` bool. //! The absolute value of the number is stored in `data` in little endian format. //! You must free `data` with `duckdb_free`. typedef struct { uint8_t *data; idx_t size; bool is_negative; } duckdb_bignum; //! A query result consists of a pointer to its internal data. //! Must be freed with 'duckdb_destroy_result'. typedef struct { // Deprecated, use `duckdb_column_count`. idx_t deprecated_column_count; // Deprecated, use `duckdb_row_count`. idx_t deprecated_row_count; // Deprecated, use `duckdb_rows_changed`. idx_t deprecated_rows_changed; // Deprecated, use `duckdb_column_*`-family of functions. duckdb_column *deprecated_columns; // Deprecated, use `duckdb_result_error`. char *deprecated_error_message; void *internal_data; } duckdb_result; //! A database instance cache object. Must be destroyed with `duckdb_destroy_instance_cache`. typedef struct _duckdb_instance_cache { void *internal_ptr; } * duckdb_instance_cache; //! A database object. Must be closed with `duckdb_close`. typedef struct _duckdb_database { void *internal_ptr; } * duckdb_database; //! A connection to a duckdb database. Must be closed with `duckdb_disconnect`. typedef struct _duckdb_connection { void *internal_ptr; } * duckdb_connection; //! A client context of a duckdb connection. Must be destroyed with `duckdb_destroy_context`. typedef struct _duckdb_client_context { void *internal_ptr; } * duckdb_client_context; //! A prepared statement is a parameterized query that allows you to bind parameters to it. //! Must be destroyed with `duckdb_destroy_prepare`. typedef struct _duckdb_prepared_statement { void *internal_ptr; } * duckdb_prepared_statement; //! Extracted statements. Must be destroyed with `duckdb_destroy_extracted`. typedef struct _duckdb_extracted_statements { void *internal_ptr; } * duckdb_extracted_statements; //! The pending result represents an intermediate structure for a query that is not yet fully executed. //! Must be destroyed with `duckdb_destroy_pending`. typedef struct _duckdb_pending_result { void *internal_ptr; } * duckdb_pending_result; //! The appender enables fast data loading into DuckDB. //! Must be destroyed with `duckdb_appender_destroy`. typedef struct _duckdb_appender { void *internal_ptr; } * duckdb_appender; //! The table description allows querying information about the table. //! Must be destroyed with `duckdb_table_description_destroy`. typedef struct _duckdb_table_description { void *internal_ptr; } * duckdb_table_description; //! The configuration can be used to provide start-up options for a database. //! Must be destroyed with `duckdb_destroy_config`. typedef struct _duckdb_config { void *internal_ptr; } * duckdb_config; //! A logical type. //! Must be destroyed with `duckdb_destroy_logical_type`. typedef struct _duckdb_logical_type { void *internal_ptr; } * duckdb_logical_type; //! Holds extra information to register a custom logical type. //! Reserved for future use. typedef struct _duckdb_create_type_info { void *internal_ptr; } * duckdb_create_type_info; //! Contains a data chunk of a duckdb_result. //! Must be destroyed with `duckdb_destroy_data_chunk`. typedef struct _duckdb_data_chunk { void *internal_ptr; } * duckdb_data_chunk; //! A value of a logical type. //! Must be destroyed with `duckdb_destroy_value`. typedef struct _duckdb_value { void *internal_ptr; } * duckdb_value; //! Holds a recursive tree containing profiling metrics. //! The tree matches the query plan, and has a top-level node. typedef struct _duckdb_profiling_info { void *internal_ptr; } * duckdb_profiling_info; //! Holds error data. //! Must be destroyed with `duckdb_destroy_error_data`. typedef struct _duckdb_error_data { void *internal_ptr; } * duckdb_error_data; //! Holds a bound expression. //! Must be destroyed with `duckdb_destroy_expression`. typedef struct _duckdb_expression { void *internal_ptr; } * duckdb_expression; //===--------------------------------------------------------------------===// // C API extension information //===--------------------------------------------------------------------===// //! Holds the state of the C API extension initialization process. typedef struct _duckdb_extension_info { void *internal_ptr; } * duckdb_extension_info; //===--------------------------------------------------------------------===// // Function types //===--------------------------------------------------------------------===// //! Additional function info. //! When setting this info, it is necessary to pass a destroy-callback function. typedef struct _duckdb_function_info { void *internal_ptr; } * duckdb_function_info; //! The bind info of a function. //! When setting this info, it is necessary to pass a destroy-callback function. typedef struct _duckdb_bind_info { void *internal_ptr; } * duckdb_bind_info; //===--------------------------------------------------------------------===// // Scalar function types //===--------------------------------------------------------------------===// //! A scalar function. Must be destroyed with `duckdb_destroy_scalar_function`. typedef struct _duckdb_scalar_function { void *internal_ptr; } * duckdb_scalar_function; //! A scalar function set. Must be destroyed with `duckdb_destroy_scalar_function_set`. typedef struct _duckdb_scalar_function_set { void *internal_ptr; } * duckdb_scalar_function_set; //! The bind function callback of the scalar function. typedef void (*duckdb_scalar_function_bind_t)(duckdb_bind_info info); //! The function to execute the scalar function on an input chunk. typedef void (*duckdb_scalar_function_t)(duckdb_function_info info, duckdb_data_chunk input, duckdb_vector output); //===--------------------------------------------------------------------===// // Aggregate function types //===--------------------------------------------------------------------===// //! An aggregate function. Must be destroyed with `duckdb_destroy_aggregate_function`. typedef struct _duckdb_aggregate_function { void *internal_ptr; } * duckdb_aggregate_function; //! A aggregate function set. Must be destroyed with `duckdb_destroy_aggregate_function_set`. typedef struct _duckdb_aggregate_function_set { void *internal_ptr; } * duckdb_aggregate_function_set; //! The state of an aggregate function. typedef struct _duckdb_aggregate_state { void *internal_ptr; } * duckdb_aggregate_state; //! A function to return the aggregate state's size. typedef idx_t (*duckdb_aggregate_state_size)(duckdb_function_info info); //! A function to initialize an aggregate state. typedef void (*duckdb_aggregate_init_t)(duckdb_function_info info, duckdb_aggregate_state state); //! An optional function to destroy an aggregate state. typedef void (*duckdb_aggregate_destroy_t)(duckdb_aggregate_state *states, idx_t count); //! A function to update a set of aggregate states with new values. typedef void (*duckdb_aggregate_update_t)(duckdb_function_info info, duckdb_data_chunk input, duckdb_aggregate_state *states); //! A function to combine aggregate states. typedef void (*duckdb_aggregate_combine_t)(duckdb_function_info info, duckdb_aggregate_state *source, duckdb_aggregate_state *target, idx_t count); //! A function to finalize aggregate states into a result vector. typedef void (*duckdb_aggregate_finalize_t)(duckdb_function_info info, duckdb_aggregate_state *source, duckdb_vector result, idx_t count, idx_t offset); //===--------------------------------------------------------------------===// // Table function types //===--------------------------------------------------------------------===// //! A table function. Must be destroyed with `duckdb_destroy_table_function`. typedef struct _duckdb_table_function { void *internal_ptr; } * duckdb_table_function; //! Additional function initialization info. //! When setting this info, it is necessary to pass a destroy-callback function. typedef struct _duckdb_init_info { void *internal_ptr; } * duckdb_init_info; //! The bind function of the table function. typedef void (*duckdb_table_function_bind_t)(duckdb_bind_info info); //! The possibly thread-local initialization function of the table function. typedef void (*duckdb_table_function_init_t)(duckdb_init_info info); //! The function to generate an output chunk during table function execution. typedef void (*duckdb_table_function_t)(duckdb_function_info info, duckdb_data_chunk output); //===--------------------------------------------------------------------===// // Cast types //===--------------------------------------------------------------------===// //! A cast function. Must be destroyed with `duckdb_destroy_cast_function`. typedef struct _duckdb_cast_function { void *internal_ptr; } * duckdb_cast_function; //! The function to cast from an input vector to an output vector. typedef bool (*duckdb_cast_function_t)(duckdb_function_info info, idx_t count, duckdb_vector input, duckdb_vector output); //===--------------------------------------------------------------------===// // Replacement scan types //===--------------------------------------------------------------------===// //! Additional replacement scan info. When setting this info, it is necessary to pass a destroy-callback function. typedef struct _duckdb_replacement_scan_info { void *internal_ptr; } * duckdb_replacement_scan_info; //! A replacement scan function. typedef void (*duckdb_replacement_callback_t)(duckdb_replacement_scan_info info, const char *table_name, void *data); //===--------------------------------------------------------------------===// // Arrow-related types //===--------------------------------------------------------------------===// //! Forward declare Arrow structs //! It is important to notice that these structs are not defined by DuckDB but are actually Arrow external objects. //! They're defined by the C Data Interface Arrow spec: https://arrow.apache.org/docs/format/CDataInterface.html struct ArrowArray; struct ArrowSchema; //! Holds an arrow query result. Must be destroyed with `duckdb_destroy_arrow`. typedef struct _duckdb_arrow { void *internal_ptr; } * duckdb_arrow; //! Holds an arrow array stream. Must be destroyed with `duckdb_destroy_arrow_stream`. typedef struct _duckdb_arrow_stream { void *internal_ptr; } * duckdb_arrow_stream; //! Holds an arrow schema. Remember to release the respective ArrowSchema object. typedef struct _duckdb_arrow_schema { void *internal_ptr; } * duckdb_arrow_schema; //! Holds an arrow converted schema (i.e., duckdb::ArrowTableSchema). //! In practice, this object holds the information necessary to do proper conversion between Arrow Types and DuckDB //! Types. Check duckdb/function/table/arrow/arrow_duck_schema.hpp for more details! Must be destroyed with //! `duckdb_destroy_arrow_converted_schema` typedef struct _duckdb_arrow_converted_schema { void *internal_ptr; } * duckdb_arrow_converted_schema; //! Holds an arrow array. Remember to release the respective ArrowSchema object. typedef struct _duckdb_arrow_array { void *internal_ptr; } * duckdb_arrow_array; //! The arrow options used when transforming the DuckDB schema and datachunks into Arrow schema and arrays. //! Used in `duckdb_to_arrow_schema` and `duckdb_data_chunk_to_arrow` typedef struct _duckdb_arrow_options { void *internal_ptr; } * duckdb_arrow_options; //===--------------------------------------------------------------------===// // DuckDB extension access //===--------------------------------------------------------------------===// //! Passed to C API extension as a parameter to the entrypoint. struct duckdb_extension_access { //! Indicate that an error has occurred. void (*set_error)(duckdb_extension_info info, const char *error); //! Fetch the database on which to register the extension. duckdb_database *(*get_database)(duckdb_extension_info info); //! Fetch the API struct pointer. const void *(*get_api)(duckdb_extension_info info, const char *version); }; #ifndef DUCKDB_API_EXCLUDE_FUNCTIONS //===--------------------------------------------------------------------===// // Functions //===--------------------------------------------------------------------===// //===--------------------------------------------------------------------===// // Open Connect //===--------------------------------------------------------------------===// /*! Creates a new database instance cache. The instance cache is necessary if a client/program (re)opens multiple databases to the same file within the same process. Must be destroyed with 'duckdb_destroy_instance_cache'. * @return The database instance cache. */ DUCKDB_C_API duckdb_instance_cache duckdb_create_instance_cache(); /*! Creates a new database instance in the instance cache, or retrieves an existing database instance. Must be closed with 'duckdb_close'. * @param instance_cache The instance cache in which to create the database, or from which to take the database. * @param path Path to the database file on disk. Both `nullptr` and `:memory:` open or retrieve an in-memory database. * @param out_database The resulting cached database. * @param config (Optional) configuration used to create the database. * @param out_error If set and the function returns `DuckDBError`, this contains the error message. Note that the error message must be freed using `duckdb_free`. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_get_or_create_from_cache(duckdb_instance_cache instance_cache, const char *path, duckdb_database *out_database, duckdb_config config, char **out_error); /*! Destroys an existing database instance cache and de-allocates its memory. * @param instance_cache The instance cache to destroy. */ DUCKDB_C_API void duckdb_destroy_instance_cache(duckdb_instance_cache *instance_cache); /*! Creates a new database or opens an existing database file stored at the given path. If no path is given a new in-memory database is created instead. The database must be closed with 'duckdb_close'. * @param path Path to the database file on disk. Both `nullptr` and `:memory:` open an in-memory database. * @param out_database The result database object. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_open(const char *path, duckdb_database *out_database); /*! Extended version of duckdb_open. Creates a new database or opens an existing database file stored at the given path. The database must be closed with 'duckdb_close'. * @param path Path to the database file on disk. Both `nullptr` and `:memory:` open an in-memory database. * @param out_database The result database object. * @param config (Optional) configuration used to start up the database. * @param out_error If set and the function returns `DuckDBError`, this contains the error message. Note that the error message must be freed using `duckdb_free`. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_open_ext(const char *path, duckdb_database *out_database, duckdb_config config, char **out_error); /*! Closes the specified database and de-allocates all memory allocated for that database. This should be called after you are done with any database allocated through `duckdb_open` or `duckdb_open_ext`. Note that failing to call `duckdb_close` (in case of e.g. a program crash) will not cause data corruption. Still, it is recommended to always correctly close a database object after you are done with it. * @param database The database object to shut down. */ DUCKDB_C_API void duckdb_close(duckdb_database *database); /*! Opens a connection to a database. Connections are required to query the database, and store transactional state associated with the connection. The instantiated connection should be closed using 'duckdb_disconnect'. * @param database The database file to connect to. * @param out_connection The result connection object. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_connect(duckdb_database database, duckdb_connection *out_connection); /*! Interrupt running query * @param connection The connection to interrupt */ DUCKDB_C_API void duckdb_interrupt(duckdb_connection connection); /*! Get progress of the running query * @param connection The working connection * @return -1 if no progress or a percentage of the progress */ DUCKDB_C_API duckdb_query_progress_type duckdb_query_progress(duckdb_connection connection); /*! Closes the specified connection and de-allocates all memory allocated for that connection. * @param connection The connection to close. */ DUCKDB_C_API void duckdb_disconnect(duckdb_connection *connection); /*! Retrieves the client context of the connection. * @param connection The connection. * @param out_context The client context of the connection. Must be destroyed with `duckdb_destroy_client_context`. */ DUCKDB_C_API void duckdb_connection_get_client_context(duckdb_connection connection, duckdb_client_context *out_context); /*! Retrieves the arrow options of the connection. * @param connection The connection. */ DUCKDB_C_API void duckdb_connection_get_arrow_options(duckdb_connection connection, duckdb_arrow_options *out_arrow_options); /*! Returns the connection id of the client context. * @param context The client context. * @return The connection id of the client context. */ DUCKDB_C_API idx_t duckdb_client_context_get_connection_id(duckdb_client_context context); /*! Destroys the client context and deallocates its memory. * @param context The client context to destroy. */ DUCKDB_C_API void duckdb_destroy_client_context(duckdb_client_context *context); /*! Destroys the arrow options and deallocates its memory. * @param arrow_options The arrow options to destroy. */ DUCKDB_C_API void duckdb_destroy_arrow_options(duckdb_arrow_options *arrow_options); /*! Returns the version of the linked DuckDB, with a version postfix for dev versions Usually used for developing C extensions that must return this for a compatibility check. */ DUCKDB_C_API const char *duckdb_library_version(); /*! Get the list of (fully qualified) table names of the query. * @param connection The connection for which to get the table names. * @param query The query for which to get the table names. * @param qualified Returns fully qualified table names (catalog.schema.table), if set to true, else only the (not escaped) table names. * @return A duckdb_value of type VARCHAR[] containing the (fully qualified) table names of the query. Must be destroyed with duckdb_destroy_value. */ DUCKDB_C_API duckdb_value duckdb_get_table_names(duckdb_connection connection, const char *query, bool qualified); //===--------------------------------------------------------------------===// // Configuration //===--------------------------------------------------------------------===// /*! Initializes an empty configuration object that can be used to provide start-up options for the DuckDB instance through `duckdb_open_ext`. The duckdb_config must be destroyed using 'duckdb_destroy_config' This will always succeed unless there is a malloc failure. Note that `duckdb_destroy_config` should always be called on the resulting config, even if the function returns `DuckDBError`. * @param out_config The result configuration object. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_create_config(duckdb_config *out_config); /*! This returns the total amount of configuration options available for usage with `duckdb_get_config_flag`. This should not be called in a loop as it internally loops over all the options. * @return The amount of config options available. */ DUCKDB_C_API size_t duckdb_config_count(); /*! Obtains a human-readable name and description of a specific configuration option. This can be used to e.g. display configuration options. This will succeed unless `index` is out of range (i.e. `>= duckdb_config_count`). The result name or description MUST NOT be freed. * @param index The index of the configuration option (between 0 and `duckdb_config_count`) * @param out_name A name of the configuration flag. * @param out_description A description of the configuration flag. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_get_config_flag(size_t index, const char **out_name, const char **out_description); /*! Sets the specified option for the specified configuration. The configuration option is indicated by name. To obtain a list of config options, see `duckdb_get_config_flag`. In the source code, configuration options are defined in `config.cpp`. This can fail if either the name is invalid, or if the value provided for the option is invalid. * @param config The configuration object to set the option on. * @param name The name of the configuration flag to set. * @param option The value to set the configuration flag to. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_set_config(duckdb_config config, const char *name, const char *option); /*! Destroys the specified configuration object and de-allocates all memory allocated for the object. * @param config The configuration object to destroy. */ DUCKDB_C_API void duckdb_destroy_config(duckdb_config *config); //===--------------------------------------------------------------------===// // Error Data //===--------------------------------------------------------------------===// // Functions that can throw DuckDB errors must return duckdb_error_data. // Please use this interface for all new functions, as it deprecates all previous error handling approaches. /*! Creates duckdb_error_data. Must be destroyed with `duckdb_destroy_error_data`. * @param type The error type. * @param message The error message. * @return The error data. */ DUCKDB_C_API duckdb_error_data duckdb_create_error_data(duckdb_error_type type, const char *message); /*! Destroys the error data and deallocates its memory. * @param error_data The error data to destroy. */ DUCKDB_C_API void duckdb_destroy_error_data(duckdb_error_data *error_data); /*! Returns the duckdb_error_type of the error data. * @param error_data The error data. * @return The error type. */ DUCKDB_C_API duckdb_error_type duckdb_error_data_error_type(duckdb_error_data error_data); /*! Returns the error message of the error data. Must not be freed. * @param error_data The error data. * @return The error message. */ DUCKDB_C_API const char *duckdb_error_data_message(duckdb_error_data error_data); /*! Returns whether the error data contains an error or not. * @param error_data The error data. * @return True, if the error data contains an exception, else false. */ DUCKDB_C_API bool duckdb_error_data_has_error(duckdb_error_data error_data); //===--------------------------------------------------------------------===// // Query Execution //===--------------------------------------------------------------------===// /*! Executes a SQL query within a connection and stores the full (materialized) result in the out_result pointer. If the query fails to execute, DuckDBError is returned and the error message can be retrieved by calling `duckdb_result_error`. Note that after running `duckdb_query`, `duckdb_destroy_result` must be called on the result object even if the query fails, otherwise the error stored within the result will not be freed correctly. * @param connection The connection to perform the query in. * @param query The SQL query to run. * @param out_result The query result. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_query(duckdb_connection connection, const char *query, duckdb_result *out_result); /*! Closes the result and de-allocates all memory allocated for that result. * @param result The result to destroy. */ DUCKDB_C_API void duckdb_destroy_result(duckdb_result *result); /*! Returns the column name of the specified column. The result should not need to be freed; the column names will automatically be destroyed when the result is destroyed. Returns `NULL` if the column is out of range. * @param result The result object to fetch the column name from. * @param col The column index. * @return The column name of the specified column. */ DUCKDB_C_API const char *duckdb_column_name(duckdb_result *result, idx_t col); /*! Returns the column type of the specified column. Returns `DUCKDB_TYPE_INVALID` if the column is out of range. * @param result The result object to fetch the column type from. * @param col The column index. * @return The column type of the specified column. */ DUCKDB_C_API duckdb_type duckdb_column_type(duckdb_result *result, idx_t col); /*! Returns the statement type of the statement that was executed * @param result The result object to fetch the statement type from. * @return duckdb_statement_type value or DUCKDB_STATEMENT_TYPE_INVALID */ DUCKDB_C_API duckdb_statement_type duckdb_result_statement_type(duckdb_result result); /*! Returns the logical column type of the specified column. The return type of this call should be destroyed with `duckdb_destroy_logical_type`. Returns `NULL` if the column is out of range. * @param result The result object to fetch the column type from. * @param col The column index. * @return The logical column type of the specified column. */ DUCKDB_C_API duckdb_logical_type duckdb_column_logical_type(duckdb_result *result, idx_t col); /*! Returns the arrow options associated with the given result. These options are definitions of how the arrow arrays/schema should be produced. * @param result The result object to fetch arrow options from. * @return The arrow options associated with the given result. This must be destroyed with `duckdb_destroy_arrow_options`. */ DUCKDB_C_API duckdb_arrow_options duckdb_result_get_arrow_options(duckdb_result *result); /*! Returns the number of columns present in a the result object. * @param result The result object. * @return The number of columns present in the result object. */ DUCKDB_C_API idx_t duckdb_column_count(duckdb_result *result); #ifndef DUCKDB_API_NO_DEPRECATED /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. Returns the number of rows present in the result object. * @param result The result object. * @return The number of rows present in the result object. */ DUCKDB_C_API idx_t duckdb_row_count(duckdb_result *result); #endif /*! Returns the number of rows changed by the query stored in the result. This is relevant only for INSERT/UPDATE/DELETE queries. For other queries the rows_changed will be 0. * @param result The result object. * @return The number of rows changed. */ DUCKDB_C_API idx_t duckdb_rows_changed(duckdb_result *result); #ifndef DUCKDB_API_NO_DEPRECATED /*! **DEPRECATED**: Prefer using `duckdb_result_get_chunk` instead. Returns the data of a specific column of a result in columnar format. The function returns a dense array which contains the result data. The exact type stored in the array depends on the corresponding duckdb_type (as provided by `duckdb_column_type`). For the exact type by which the data should be accessed, see the comments in [the types section](types) or the `DUCKDB_TYPE` enum. For example, for a column of type `DUCKDB_TYPE_INTEGER`, rows can be accessed in the following manner: ```c int32_t *data = (int32_t *) duckdb_column_data(&result, 0); printf("Data for row %d: %d\n", row, data[row]); ``` * @param result The result object to fetch the column data from. * @param col The column index. * @return The column data of the specified column. */ DUCKDB_C_API void *duckdb_column_data(duckdb_result *result, idx_t col); /*! **DEPRECATED**: Prefer using `duckdb_result_get_chunk` instead. Returns the nullmask of a specific column of a result in columnar format. The nullmask indicates for every row whether or not the corresponding row is `NULL`. If a row is `NULL`, the values present in the array provided by `duckdb_column_data` are undefined. ```c int32_t *data = (int32_t *) duckdb_column_data(&result, 0); bool *nullmask = duckdb_nullmask_data(&result, 0); if (nullmask[row]) { printf("Data for row %d: NULL\n", row); } else { printf("Data for row %d: %d\n", row, data[row]); } ``` * @param result The result object to fetch the nullmask from. * @param col The column index. * @return The nullmask of the specified column. */ DUCKDB_C_API bool *duckdb_nullmask_data(duckdb_result *result, idx_t col); #endif /*! Returns the error message contained within the result. The error is only set if `duckdb_query` returns `DuckDBError`. The result of this function must not be freed. It will be cleaned up when `duckdb_destroy_result` is called. * @param result The result object to fetch the error from. * @return The error of the result. */ DUCKDB_C_API const char *duckdb_result_error(duckdb_result *result); /*! Returns the result error type contained within the result. The error is only set if `duckdb_query` returns `DuckDBError`. * @param result The result object to fetch the error from. * @return The error type of the result. */ DUCKDB_C_API duckdb_error_type duckdb_result_error_type(duckdb_result *result); //===--------------------------------------------------------------------===// // Result Functions //===--------------------------------------------------------------------===// #ifndef DUCKDB_API_NO_DEPRECATED /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. Fetches a data chunk from the duckdb_result. This function should be called repeatedly until the result is exhausted. The result must be destroyed with `duckdb_destroy_data_chunk`. This function supersedes all `duckdb_value` functions, as well as the `duckdb_column_data` and `duckdb_nullmask_data` functions. It results in significantly better performance, and should be preferred in newer code-bases. If this function is used, none of the other result functions can be used and vice versa (i.e. this function cannot be mixed with the legacy result functions). Use `duckdb_result_chunk_count` to figure out how many chunks there are in the result. * @param result The result object to fetch the data chunk from. * @param chunk_index The chunk index to fetch from. * @return The resulting data chunk. Returns `NULL` if the chunk index is out of bounds. */ DUCKDB_C_API duckdb_data_chunk duckdb_result_get_chunk(duckdb_result result, idx_t chunk_index); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. Checks if the type of the internal result is StreamQueryResult. * @param result The result object to check. * @return Whether or not the result object is of the type StreamQueryResult */ DUCKDB_C_API bool duckdb_result_is_streaming(duckdb_result result); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. Returns the number of data chunks present in the result. * @param result The result object * @return Number of data chunks present in the result. */ DUCKDB_C_API idx_t duckdb_result_chunk_count(duckdb_result result); #endif /*! Returns the return_type of the given result, or DUCKDB_RETURN_TYPE_INVALID on error * @param result The result object * @return The return_type */ DUCKDB_C_API duckdb_result_type duckdb_result_return_type(duckdb_result result); //===--------------------------------------------------------------------===// // Safe Fetch Functions //===--------------------------------------------------------------------===// // These functions will perform conversions if necessary. // On failure (e.g. if conversion cannot be performed or if the value is NULL) a default value is returned. // Note that these functions are slow since they perform bounds checking and conversion // For fast access of values prefer using `duckdb_result_get_chunk` #ifndef DUCKDB_API_NO_DEPRECATED /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The boolean value at the specified location, or false if the value cannot be converted. */ DUCKDB_C_API bool duckdb_value_boolean(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The int8_t value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API int8_t duckdb_value_int8(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The int16_t value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API int16_t duckdb_value_int16(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The int32_t value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API int32_t duckdb_value_int32(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The int64_t value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API int64_t duckdb_value_int64(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The duckdb_hugeint value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API duckdb_hugeint duckdb_value_hugeint(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The duckdb_uhugeint value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API duckdb_uhugeint duckdb_value_uhugeint(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The duckdb_decimal value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API duckdb_decimal duckdb_value_decimal(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The uint8_t value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API uint8_t duckdb_value_uint8(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The uint16_t value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API uint16_t duckdb_value_uint16(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The uint32_t value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API uint32_t duckdb_value_uint32(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The uint64_t value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API uint64_t duckdb_value_uint64(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The float value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API float duckdb_value_float(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The double value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API double duckdb_value_double(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The duckdb_date value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API duckdb_date duckdb_value_date(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The duckdb_time value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API duckdb_time duckdb_value_time(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The duckdb_timestamp value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API duckdb_timestamp duckdb_value_timestamp(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The duckdb_interval value at the specified location, or 0 if the value cannot be converted. */ DUCKDB_C_API duckdb_interval duckdb_value_interval(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATED**: Use duckdb_value_string instead. This function does not work correctly if the string contains null bytes. * @return The text value at the specified location as a null-terminated string, or nullptr if the value cannot be converted. The result must be freed with `duckdb_free`. */ DUCKDB_C_API char *duckdb_value_varchar(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. No support for nested types, and for other complex types. The resulting field "string.data" must be freed with `duckdb_free.` * @return The string value at the specified location. Attempts to cast the result value to string. */ DUCKDB_C_API duckdb_string duckdb_value_string(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATED**: Use duckdb_value_string_internal instead. This function does not work correctly if the string contains null bytes. * @return The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast. If the column is NOT a VARCHAR column this function will return NULL. The result must NOT be freed. */ DUCKDB_C_API char *duckdb_value_varchar_internal(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATED**: Use duckdb_value_string_internal instead. This function does not work correctly if the string contains null bytes. * @return The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast. If the column is NOT a VARCHAR column this function will return NULL. The result must NOT be freed. */ DUCKDB_C_API duckdb_string duckdb_value_string_internal(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return The duckdb_blob value at the specified location. Returns a blob with blob.data set to nullptr if the value cannot be converted. The resulting field "blob.data" must be freed with `duckdb_free.` */ DUCKDB_C_API duckdb_blob duckdb_value_blob(duckdb_result *result, idx_t col, idx_t row); /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. * @return Returns true if the value at the specified index is NULL, and false otherwise. */ DUCKDB_C_API bool duckdb_value_is_null(duckdb_result *result, idx_t col, idx_t row); #endif //===--------------------------------------------------------------------===// // Helpers //===--------------------------------------------------------------------===// /*! Allocate `size` bytes of memory using the duckdb internal malloc function. Any memory allocated in this manner should be freed using `duckdb_free`. * @param size The number of bytes to allocate. * @return A pointer to the allocated memory region. */ DUCKDB_C_API void *duckdb_malloc(size_t size); /*! Free a value returned from `duckdb_malloc`, `duckdb_value_varchar`, `duckdb_value_blob`, or `duckdb_value_string`. * @param ptr The memory region to de-allocate. */ DUCKDB_C_API void duckdb_free(void *ptr); /*! The internal vector size used by DuckDB. This is the amount of tuples that will fit into a data chunk created by `duckdb_create_data_chunk`. * @return The vector size. */ DUCKDB_C_API idx_t duckdb_vector_size(); /*! Whether or not the duckdb_string_t value is inlined. This means that the data of the string does not have a separate allocation. */ DUCKDB_C_API bool duckdb_string_is_inlined(duckdb_string_t string); /*! Get the string length of a string_t * @param string The string to get the length of. * @return The length. */ DUCKDB_C_API uint32_t duckdb_string_t_length(duckdb_string_t string); /*! Get a pointer to the string data of a string_t * @param string The string to get the pointer to. * @return The pointer. */ DUCKDB_C_API const char *duckdb_string_t_data(duckdb_string_t *string); //===--------------------------------------------------------------------===// // Date Time Timestamp Helpers //===--------------------------------------------------------------------===// /*! Decompose a `duckdb_date` object into year, month and date (stored as `duckdb_date_struct`). * @param date The date object, as obtained from a `DUCKDB_TYPE_DATE` column. * @return The `duckdb_date_struct` with the decomposed elements. */ DUCKDB_C_API duckdb_date_struct duckdb_from_date(duckdb_date date); /*! Re-compose a `duckdb_date` from year, month and date (`duckdb_date_struct`). * @param date The year, month and date stored in a `duckdb_date_struct`. * @return The `duckdb_date` element. */ DUCKDB_C_API duckdb_date duckdb_to_date(duckdb_date_struct date); /*! Test a `duckdb_date` to see if it is a finite value. * @param date The date object, as obtained from a `DUCKDB_TYPE_DATE` column. * @return True if the date is finite, false if it is ±infinity. */ DUCKDB_C_API bool duckdb_is_finite_date(duckdb_date date); /*! Decompose a `duckdb_time` object into hour, minute, second and microsecond (stored as `duckdb_time_struct`). * @param time The time object, as obtained from a `DUCKDB_TYPE_TIME` column. * @return The `duckdb_time_struct` with the decomposed elements. */ DUCKDB_C_API duckdb_time_struct duckdb_from_time(duckdb_time time); /*! Create a `duckdb_time_tz` object from micros and a timezone offset. * @param micros The microsecond component of the time. * @param offset The timezone offset component of the time. * @return The `duckdb_time_tz` element. */ DUCKDB_C_API duckdb_time_tz duckdb_create_time_tz(int64_t micros, int32_t offset); /*! Decompose a TIME_TZ objects into micros and a timezone offset. Use `duckdb_from_time` to further decompose the micros into hour, minute, second and microsecond. * @param micros The time object, as obtained from a `DUCKDB_TYPE_TIME_TZ` column. */ DUCKDB_C_API duckdb_time_tz_struct duckdb_from_time_tz(duckdb_time_tz micros); /*! Re-compose a `duckdb_time` from hour, minute, second and microsecond (`duckdb_time_struct`). * @param time The hour, minute, second and microsecond in a `duckdb_time_struct`. * @return The `duckdb_time` element. */ DUCKDB_C_API duckdb_time duckdb_to_time(duckdb_time_struct time); /*! Decompose a `duckdb_timestamp` object into a `duckdb_timestamp_struct`. * @param ts The ts object, as obtained from a `DUCKDB_TYPE_TIMESTAMP` column. * @return The `duckdb_timestamp_struct` with the decomposed elements. */ DUCKDB_C_API duckdb_timestamp_struct duckdb_from_timestamp(duckdb_timestamp ts); /*! Re-compose a `duckdb_timestamp` from a duckdb_timestamp_struct. * @param ts The de-composed elements in a `duckdb_timestamp_struct`. * @return The `duckdb_timestamp` element. */ DUCKDB_C_API duckdb_timestamp duckdb_to_timestamp(duckdb_timestamp_struct ts); /*! Test a `duckdb_timestamp` to see if it is a finite value. * @param ts The duckdb_timestamp object, as obtained from a `DUCKDB_TYPE_TIMESTAMP` column. * @return True if the timestamp is finite, false if it is ±infinity. */ DUCKDB_C_API bool duckdb_is_finite_timestamp(duckdb_timestamp ts); /*! Test a `duckdb_timestamp_s` to see if it is a finite value. * @param ts The duckdb_timestamp_s object, as obtained from a `DUCKDB_TYPE_TIMESTAMP_S` column. * @return True if the timestamp is finite, false if it is ±infinity. */ DUCKDB_C_API bool duckdb_is_finite_timestamp_s(duckdb_timestamp_s ts); /*! Test a `duckdb_timestamp_ms` to see if it is a finite value. * @param ts The duckdb_timestamp_ms object, as obtained from a `DUCKDB_TYPE_TIMESTAMP_MS` column. * @return True if the timestamp is finite, false if it is ±infinity. */ DUCKDB_C_API bool duckdb_is_finite_timestamp_ms(duckdb_timestamp_ms ts); /*! Test a `duckdb_timestamp_ns` to see if it is a finite value. * @param ts The duckdb_timestamp_ns object, as obtained from a `DUCKDB_TYPE_TIMESTAMP_NS` column. * @return True if the timestamp is finite, false if it is ±infinity. */ DUCKDB_C_API bool duckdb_is_finite_timestamp_ns(duckdb_timestamp_ns ts); //===--------------------------------------------------------------------===// // Hugeint Helpers //===--------------------------------------------------------------------===// /*! Converts a duckdb_hugeint object (as obtained from a `DUCKDB_TYPE_HUGEINT` column) into a double. * @param val The hugeint value. * @return The converted `double` element. */ DUCKDB_C_API double duckdb_hugeint_to_double(duckdb_hugeint val); /*! Converts a double value to a duckdb_hugeint object. If the conversion fails because the double value is too big the result will be 0. * @param val The double value. * @return The converted `duckdb_hugeint` element. */ DUCKDB_C_API duckdb_hugeint duckdb_double_to_hugeint(double val); //===--------------------------------------------------------------------===// // Unsigned Hugeint Helpers //===--------------------------------------------------------------------===// /*! Converts a duckdb_uhugeint object (as obtained from a `DUCKDB_TYPE_UHUGEINT` column) into a double. * @param val The uhugeint value. * @return The converted `double` element. */ DUCKDB_C_API double duckdb_uhugeint_to_double(duckdb_uhugeint val); /*! Converts a double value to a duckdb_uhugeint object. If the conversion fails because the double value is too big the result will be 0. * @param val The double value. * @return The converted `duckdb_uhugeint` element. */ DUCKDB_C_API duckdb_uhugeint duckdb_double_to_uhugeint(double val); //===--------------------------------------------------------------------===// // Decimal Helpers //===--------------------------------------------------------------------===// /*! Converts a double value to a duckdb_decimal object. If the conversion fails because the double value is too big, or the width/scale are invalid the result will be 0. * @param val The double value. * @return The converted `duckdb_decimal` element. */ DUCKDB_C_API duckdb_decimal duckdb_double_to_decimal(double val, uint8_t width, uint8_t scale); /*! Converts a duckdb_decimal object (as obtained from a `DUCKDB_TYPE_DECIMAL` column) into a double. * @param val The decimal value. * @return The converted `double` element. */ DUCKDB_C_API double duckdb_decimal_to_double(duckdb_decimal val); //===--------------------------------------------------------------------===// // Prepared Statements //===--------------------------------------------------------------------===// // A prepared statement is a parameterized query that allows you to bind parameters to it. // * This is useful to easily supply parameters to functions and avoid SQL injection attacks. // * This is useful to speed up queries that you will execute several times with different parameters. // Because the query will only be parsed, bound, optimized and planned once during the prepare stage, // rather than once per execution. // For example: // SELECT * FROM tbl WHERE id=? // Or a query with multiple parameters: // SELECT * FROM tbl WHERE id=$1 OR name=$2 /*! Create a prepared statement object from a query. Note that after calling `duckdb_prepare`, the prepared statement should always be destroyed using `duckdb_destroy_prepare`, even if the prepare fails. If the prepare fails, `duckdb_prepare_error` can be called to obtain the reason why the prepare failed. * @param connection The connection object * @param query The SQL query to prepare * @param out_prepared_statement The resulting prepared statement object * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_prepare(duckdb_connection connection, const char *query, duckdb_prepared_statement *out_prepared_statement); /*! Closes the prepared statement and de-allocates all memory allocated for the statement. * @param prepared_statement The prepared statement to destroy. */ DUCKDB_C_API void duckdb_destroy_prepare(duckdb_prepared_statement *prepared_statement); /*! Returns the error message associated with the given prepared statement. If the prepared statement has no error message, this returns `nullptr` instead. The error message should not be freed. It will be de-allocated when `duckdb_destroy_prepare` is called. * @param prepared_statement The prepared statement to obtain the error from. * @return The error message, or `nullptr` if there is none. */ DUCKDB_C_API const char *duckdb_prepare_error(duckdb_prepared_statement prepared_statement); /*! Returns the number of parameters that can be provided to the given prepared statement. Returns 0 if the query was not successfully prepared. * @param prepared_statement The prepared statement to obtain the number of parameters for. */ DUCKDB_C_API idx_t duckdb_nparams(duckdb_prepared_statement prepared_statement); /*! Returns the name used to identify the parameter The returned string should be freed using `duckdb_free`. Returns NULL if the index is out of range for the provided prepared statement. * @param prepared_statement The prepared statement for which to get the parameter name from. */ DUCKDB_C_API const char *duckdb_parameter_name(duckdb_prepared_statement prepared_statement, idx_t index); /*! Returns the parameter type for the parameter at the given index. Returns `DUCKDB_TYPE_INVALID` if the parameter index is out of range or the statement was not successfully prepared. * @param prepared_statement The prepared statement. * @param param_idx The parameter index. * @return The parameter type */ DUCKDB_C_API duckdb_type duckdb_param_type(duckdb_prepared_statement prepared_statement, idx_t param_idx); /*! Returns the logical type for the parameter at the given index. Returns `nullptr` if the parameter index is out of range or the statement was not successfully prepared. The return type of this call should be destroyed with `duckdb_destroy_logical_type`. * @param prepared_statement The prepared statement. * @param param_idx The parameter index. * @return The logical type of the parameter */ DUCKDB_C_API duckdb_logical_type duckdb_param_logical_type(duckdb_prepared_statement prepared_statement, idx_t param_idx); /*! Clear the params bind to the prepared statement. */ DUCKDB_C_API duckdb_state duckdb_clear_bindings(duckdb_prepared_statement prepared_statement); /*! Returns the statement type of the statement to be executed * @param statement The prepared statement. * @return duckdb_statement_type value or DUCKDB_STATEMENT_TYPE_INVALID */ DUCKDB_C_API duckdb_statement_type duckdb_prepared_statement_type(duckdb_prepared_statement statement); /*! Returns the number of columns present in a the result of the prepared statement. If any of the column types are invalid, the result will be 1. * @param prepared_statement The prepared statement. * @return The number of columns present in the result of the prepared statement. */ DUCKDB_C_API idx_t duckdb_prepared_statement_column_count(duckdb_prepared_statement prepared_statement); /*! Returns the name of the specified column of the result of the prepared_statement. The returned string should be freed using `duckdb_free`. Returns `nullptr` if the column is out of range. * @param prepared_statement The prepared statement. * @param col_idx The column index. * @return The column name of the specified column. */ DUCKDB_C_API const char *duckdb_prepared_statement_column_name(duckdb_prepared_statement prepared_statement, idx_t col_idx); /*! Returns the column type of the specified column of the result of the prepared_statement. Returns `DUCKDB_TYPE_INVALID` if the column is out of range. The return type of this call should be destroyed with `duckdb_destroy_logical_type`. * @param prepared_statement The prepared statement to fetch the column type from. * @param col_idx The column index. * @return The logical type of the specified column. */ DUCKDB_C_API duckdb_logical_type duckdb_prepared_statement_column_logical_type(duckdb_prepared_statement prepared_statement, idx_t col_idx); /*! Returns the column type of the specified column of the result of the prepared_statement. Returns `DUCKDB_TYPE_INVALID` if the column is out of range. * @param prepared_statement The prepared statement to fetch the column type from. * @param col_idx The column index. * @return The type of the specified column. */ DUCKDB_C_API duckdb_type duckdb_prepared_statement_column_type(duckdb_prepared_statement prepared_statement, idx_t col_idx); //===--------------------------------------------------------------------===// // Bind Values to Prepared Statements //===--------------------------------------------------------------------===// /*! Binds a value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_value(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_value val); /*! Retrieve the index of the parameter for the prepared statement, identified by name */ DUCKDB_C_API duckdb_state duckdb_bind_parameter_index(duckdb_prepared_statement prepared_statement, idx_t *param_idx_out, const char *name); /*! Binds a bool value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_boolean(duckdb_prepared_statement prepared_statement, idx_t param_idx, bool val); /*! Binds an int8_t value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_int8(duckdb_prepared_statement prepared_statement, idx_t param_idx, int8_t val); /*! Binds an int16_t value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_int16(duckdb_prepared_statement prepared_statement, idx_t param_idx, int16_t val); /*! Binds an int32_t value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_int32(duckdb_prepared_statement prepared_statement, idx_t param_idx, int32_t val); /*! Binds an int64_t value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_int64(duckdb_prepared_statement prepared_statement, idx_t param_idx, int64_t val); /*! Binds a duckdb_hugeint value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_hugeint(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_hugeint val); /*! Binds a duckdb_uhugeint value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_uhugeint(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_uhugeint val); /*! Binds a duckdb_decimal value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_decimal(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_decimal val); /*! Binds a uint8_t value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_uint8(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint8_t val); /*! Binds a uint16_t value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_uint16(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint16_t val); /*! Binds a uint32_t value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_uint32(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint32_t val); /*! Binds a uint64_t value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_uint64(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint64_t val); /*! Binds a float value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_float(duckdb_prepared_statement prepared_statement, idx_t param_idx, float val); /*! Binds a double value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_double(duckdb_prepared_statement prepared_statement, idx_t param_idx, double val); /*! Binds a duckdb_date value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_date(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_date val); /*! Binds a duckdb_time value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_time(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_time val); /*! Binds a duckdb_timestamp value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_timestamp(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_timestamp val); /*! Binds a duckdb_timestamp value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_timestamp_tz(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_timestamp val); /*! Binds a duckdb_interval value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_interval(duckdb_prepared_statement prepared_statement, idx_t param_idx, duckdb_interval val); /*! Binds a null-terminated varchar value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_varchar(duckdb_prepared_statement prepared_statement, idx_t param_idx, const char *val); /*! Binds a varchar value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_varchar_length(duckdb_prepared_statement prepared_statement, idx_t param_idx, const char *val, idx_t length); /*! Binds a blob value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_blob(duckdb_prepared_statement prepared_statement, idx_t param_idx, const void *data, idx_t length); /*! Binds a NULL value to the prepared statement at the specified index. */ DUCKDB_C_API duckdb_state duckdb_bind_null(duckdb_prepared_statement prepared_statement, idx_t param_idx); //===--------------------------------------------------------------------===// // Execute Prepared Statements //===--------------------------------------------------------------------===// /*! Executes the prepared statement with the given bound parameters, and returns a materialized query result. This method can be called multiple times for each prepared statement, and the parameters can be modified between calls to this function. Note that the result must be freed with `duckdb_destroy_result`. * @param prepared_statement The prepared statement to execute. * @param out_result The query result. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_execute_prepared(duckdb_prepared_statement prepared_statement, duckdb_result *out_result); #ifndef DUCKDB_API_NO_DEPRECATED /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. Executes the prepared statement with the given bound parameters, and returns an optionally-streaming query result. To determine if the resulting query was in fact streamed, use `duckdb_result_is_streaming` This method can be called multiple times for each prepared statement, and the parameters can be modified between calls to this function. Note that the result must be freed with `duckdb_destroy_result`. * @param prepared_statement The prepared statement to execute. * @param out_result The query result. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_execute_prepared_streaming(duckdb_prepared_statement prepared_statement, duckdb_result *out_result); #endif //===--------------------------------------------------------------------===// // Extract Statements //===--------------------------------------------------------------------===// // A query string can be extracted into multiple SQL statements. Each statement can be prepared and executed separately. /*! Extract all statements from a query. Note that after calling `duckdb_extract_statements`, the extracted statements should always be destroyed using `duckdb_destroy_extracted`, even if no statements were extracted. If the extract fails, `duckdb_extract_statements_error` can be called to obtain the reason why the extract failed. * @param connection The connection object * @param query The SQL query to extract * @param out_extracted_statements The resulting extracted statements object * @return The number of extracted statements or 0 on failure. */ DUCKDB_C_API idx_t duckdb_extract_statements(duckdb_connection connection, const char *query, duckdb_extracted_statements *out_extracted_statements); /*! Prepare an extracted statement. Note that after calling `duckdb_prepare_extracted_statement`, the prepared statement should always be destroyed using `duckdb_destroy_prepare`, even if the prepare fails. If the prepare fails, `duckdb_prepare_error` can be called to obtain the reason why the prepare failed. * @param connection The connection object * @param extracted_statements The extracted statements object * @param index The index of the extracted statement to prepare * @param out_prepared_statement The resulting prepared statement object * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_prepare_extracted_statement(duckdb_connection connection, duckdb_extracted_statements extracted_statements, idx_t index, duckdb_prepared_statement *out_prepared_statement); /*! Returns the error message contained within the extracted statements. The result of this function must not be freed. It will be cleaned up when `duckdb_destroy_extracted` is called. * @param extracted_statements The extracted statements to fetch the error from. * @return The error of the extracted statements. */ DUCKDB_C_API const char *duckdb_extract_statements_error(duckdb_extracted_statements extracted_statements); /*! De-allocates all memory allocated for the extracted statements. * @param extracted_statements The extracted statements to destroy. */ DUCKDB_C_API void duckdb_destroy_extracted(duckdb_extracted_statements *extracted_statements); //===--------------------------------------------------------------------===// // Pending Result Interface //===--------------------------------------------------------------------===// /*! Executes the prepared statement with the given bound parameters, and returns a pending result. The pending result represents an intermediate structure for a query that is not yet fully executed. The pending result can be used to incrementally execute a query, returning control to the client between tasks. Note that after calling `duckdb_pending_prepared`, the pending result should always be destroyed using `duckdb_destroy_pending`, even if this function returns DuckDBError. * @param prepared_statement The prepared statement to execute. * @param out_result The pending query result. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_pending_prepared(duckdb_prepared_statement prepared_statement, duckdb_pending_result *out_result); #ifndef DUCKDB_API_NO_DEPRECATED /*! **DEPRECATION NOTICE**: This method is scheduled for removal in a future release. Executes the prepared statement with the given bound parameters, and returns a pending result. This pending result will create a streaming duckdb_result when executed. The pending result represents an intermediate structure for a query that is not yet fully executed. Note that after calling `duckdb_pending_prepared_streaming`, the pending result should always be destroyed using `duckdb_destroy_pending`, even if this function returns DuckDBError. * @param prepared_statement The prepared statement to execute. * @param out_result The pending query result. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_pending_prepared_streaming(duckdb_prepared_statement prepared_statement, duckdb_pending_result *out_result); #endif /*! Closes the pending result and de-allocates all memory allocated for the result. * @param pending_result The pending result to destroy. */ DUCKDB_C_API void duckdb_destroy_pending(duckdb_pending_result *pending_result); /*! Returns the error message contained within the pending result. The result of this function must not be freed. It will be cleaned up when `duckdb_destroy_pending` is called. * @param pending_result The pending result to fetch the error from. * @return The error of the pending result. */ DUCKDB_C_API const char *duckdb_pending_error(duckdb_pending_result pending_result); /*! Executes a single task within the query, returning whether or not the query is ready. If this returns DUCKDB_PENDING_RESULT_READY, the duckdb_execute_pending function can be called to obtain the result. If this returns DUCKDB_PENDING_RESULT_NOT_READY, the duckdb_pending_execute_task function should be called again. If this returns DUCKDB_PENDING_ERROR, an error occurred during execution. The error message can be obtained by calling duckdb_pending_error on the pending_result. * @param pending_result The pending result to execute a task within. * @return The state of the pending result after the execution. */ DUCKDB_C_API duckdb_pending_state duckdb_pending_execute_task(duckdb_pending_result pending_result); /*! If this returns DUCKDB_PENDING_RESULT_READY, the duckdb_execute_pending function can be called to obtain the result. If this returns DUCKDB_PENDING_RESULT_NOT_READY, the duckdb_pending_execute_check_state function should be called again. If this returns DUCKDB_PENDING_ERROR, an error occurred during execution. The error message can be obtained by calling duckdb_pending_error on the pending_result. * @param pending_result The pending result. * @return The state of the pending result. */ DUCKDB_C_API duckdb_pending_state duckdb_pending_execute_check_state(duckdb_pending_result pending_result); /*! Fully execute a pending query result, returning the final query result. If duckdb_pending_execute_task has been called until DUCKDB_PENDING_RESULT_READY was returned, this will return fast. Otherwise, all remaining tasks must be executed first. Note that the result must be freed with `duckdb_destroy_result`. * @param pending_result The pending result to execute. * @param out_result The result object. * @return `DuckDBSuccess` on success or `DuckDBError` on failure. */ DUCKDB_C_API duckdb_state duckdb_execute_pending(duckdb_pending_result pending_result, duckdb_result *out_result); /*! Returns whether a duckdb_pending_state is finished executing. For example if `pending_state` is DUCKDB_PENDING_RESULT_READY, this function will return true. * @param pending_state The pending state on which to decide whether to finish execution. * @return Boolean indicating pending execution should be considered finished. */ DUCKDB_C_API bool duckdb_pending_execution_is_finished(duckdb_pending_state pending_state); //===--------------------------------------------------------------------===// // Value Interface //===--------------------------------------------------------------------===// /*! Destroys the value and de-allocates all memory allocated for that type. * @param value The value to destroy. */ DUCKDB_C_API void duckdb_destroy_value(duckdb_value *value); /*! Creates a value from a null-terminated string * @param text The null-terminated string * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_varchar(const char *text); /*! Creates a value from a string * @param text The text * @param length The length of the text * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_varchar_length(const char *text, idx_t length); /*! Creates a value from a boolean * @param input The boolean value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_bool(bool input); /*! Creates a value from an int8_t (a tinyint) * @param input The tinyint value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_int8(int8_t input); /*! Creates a value from a uint8_t (a utinyint) * @param input The utinyint value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_uint8(uint8_t input); /*! Creates a value from an int16_t (a smallint) * @param input The smallint value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_int16(int16_t input); /*! Creates a value from a uint16_t (a usmallint) * @param input The usmallint value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_uint16(uint16_t input); /*! Creates a value from an int32_t (an integer) * @param input The integer value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_int32(int32_t input); /*! Creates a value from a uint32_t (a uinteger) * @param input The uinteger value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_uint32(uint32_t input); /*! Creates a value from a uint64_t (a ubigint) * @param input The ubigint value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_uint64(uint64_t input); /*! Creates a value from an int64 * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_int64(int64_t val); /*! Creates a value from a hugeint * @param input The hugeint value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_hugeint(duckdb_hugeint input); /*! Creates a value from a uhugeint * @param input The uhugeint value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_uhugeint(duckdb_uhugeint input); /*! Creates a BIGNUM value from a duckdb_bignum * @param input The duckdb_bignum value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_bignum(duckdb_bignum input); /*! Creates a DECIMAL value from a duckdb_decimal * @param input The duckdb_decimal value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_decimal(duckdb_decimal input); /*! Creates a value from a float * @param input The float value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_float(float input); /*! Creates a value from a double * @param input The double value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_double(double input); /*! Creates a value from a date * @param input The date value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_date(duckdb_date input); /*! Creates a value from a time * @param input The time value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_time(duckdb_time input); /*! Creates a value from a time_ns * @param input The time value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_time_ns(duckdb_time_ns input); /*! Creates a value from a time_tz. Not to be confused with `duckdb_create_time_tz`, which creates a duckdb_time_tz_t. * @param value The time_tz value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_time_tz_value(duckdb_time_tz value); /*! Creates a TIMESTAMP value from a duckdb_timestamp * @param input The duckdb_timestamp value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_timestamp(duckdb_timestamp input); /*! Creates a TIMESTAMP_TZ value from a duckdb_timestamp * @param input The duckdb_timestamp value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_timestamp_tz(duckdb_timestamp input); /*! Creates a TIMESTAMP_S value from a duckdb_timestamp_s * @param input The duckdb_timestamp_s value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_timestamp_s(duckdb_timestamp_s input); /*! Creates a TIMESTAMP_MS value from a duckdb_timestamp_ms * @param input The duckdb_timestamp_ms value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_timestamp_ms(duckdb_timestamp_ms input); /*! Creates a TIMESTAMP_NS value from a duckdb_timestamp_ns * @param input The duckdb_timestamp_ns value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_timestamp_ns(duckdb_timestamp_ns input); /*! Creates a value from an interval * @param input The interval value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_interval(duckdb_interval input); /*! Creates a value from a blob * @param data The blob data * @param length The length of the blob data * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_blob(const uint8_t *data, idx_t length); /*! Creates a BIT value from a duckdb_bit * @param input The duckdb_bit value * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_bit(duckdb_bit input); /*! Creates a UUID value from a uhugeint * @param input The duckdb_uhugeint containing the UUID * @return The value. This must be destroyed with `duckdb_destroy_value`. */ DUCKDB_C_API duckdb_value duckdb_create_uuid(duckdb_uhugeint input); /*! Returns the boolean value of the given value. * @param val A duckdb_value containing a boolean * @return A boolean, or false if the value cannot be converted */ DUCKDB_C_API bool duckdb_get_bool(duckdb_value val); /*! Returns the int8_t value of the given value. * @param val A duckdb_value containing a tinyint * @return A int8_t, or MinValue if the value cannot be converted */ DUCKDB_C_API int8_t duckdb_get_int8(duckdb_value val); /*! Returns the uint8_t value of the given value. * @param val A duckdb_value containing a utinyint * @return A uint8_t, or MinValue if the value cannot be converted */ DUCKDB_C_API uint8_t duckdb_get_uint8(duckdb_value val); /*! Returns the int16_t value of the given value. * @param val A duckdb_value containing a smallint * @return A int16_t, or MinValue if the value cannot be converted */ DUCKDB_C_API int16_t duckdb_get_int16(duckdb_value val); /*! Returns the uint16_t value of the given value. * @param val A duckdb_value containing a usmallint * @return A uint16_t, or MinValue if the value cannot be converted */ DUCKDB_C_API uint16_t duckdb_get_uint16(duckdb_value val); /*! Returns the int32_t value of the given value. * @param val A duckdb_value containing an integer * @return A int32_t, or MinValue if the value cannot be converted */ DUCKDB_C_API int32_t duckdb_get_int32(duckdb_value val); /*! Returns the uint32_t value of the given value. * @param val A duckdb_value containing a uinteger * @return A uint32_t, or MinValue if the value cannot be converted */ DUCKDB_C_API uint32_t duckdb_get_uint32(duckdb_value val); /*! Returns the int64_t value of the given value. * @param val A duckdb_value containing a bigint * @return A int64_t, or MinValue if the value cannot be converted */ DUCKDB_C_API int64_t duckdb_get_int64(duckdb_value val); /*! Returns the uint64_t value of the given value. * @param val A duckdb_value containing a ubigint * @return A uint64_t, or MinValue if the value cannot be converted */ DUCKDB_C_API uint64_t duckdb_get_uint64(duckdb_value val); /*! Returns the hugeint value of the given value. * @param val A duckdb_value containing a hugeint * @return A duckdb_hugeint, or MinValue if the value cannot be converted */ DUCKDB_C_API duckdb_hugeint duckdb_get_hugeint(duckdb_value val); /*! Returns the uhugeint value of the given value. * @param val A duckdb_value containing a uhugeint * @return A duckdb_uhugeint, or MinValue if the value cannot be converted */ DUCKDB_C_API duckdb_uhugeint duckdb_get_uhugeint(duckdb_value val); /*! Returns the duckdb_bignum value of the given value. The `data` field must be destroyed with `duckdb_free`. * @param val A duckdb_value containing a BIGNUM * @return A duckdb_bignum. The `data` field must be destroyed with `duckdb_free`. */ DUCKDB_C_API duckdb_bignum duckdb_get_bignum(duckdb_value val); /*! Returns the duckdb_decimal value of the given value. * @param val A duckdb_value containing a DECIMAL * @return A duckdb_decimal, or MinValue if the value cannot be converted */ DUCKDB_C_API duckdb_decimal duckdb_get_decimal(duckdb_value val); /*! Returns the float value of the given value. * @param val A duckdb_value containing a float * @return A float, or NAN if the value cannot be converted */ DUCKDB_C_API float duckdb_get_float(duckdb_value val); /*! Returns the double value of the given value. * @param val A duckdb_value containing a double * @return A double, or NAN if the value cannot be converted */ DUCKDB_C_API double duckdb_get_double(duckdb_value val); /*! Returns the date value of the given value. * @param val A duckdb_value containing a date * @return A duckdb_date, or MinValue if the value cannot be converted */ DUCKDB_C_API duckdb_date duckdb_get_date(duckdb_value val); /*! Returns the time value of the given value. * @param val A duckdb_value containing a time * @return A duckdb_time, or MinValue